552 lines
21 KiB
Protocol Buffer
552 lines
21 KiB
Protocol Buffer
// Copyright 2020 The MediaPipe Authors.
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
syntax = "proto2";
|
|
|
|
package mediapipe;
|
|
|
|
// Info about the camera characteristics used to capture images and depth data.
|
|
// See developer.apple.com/documentation/avfoundation/avcameracalibrationdata
|
|
// for more information.
|
|
message AVCameraCalibrationData {
|
|
// 3x3 row-major matrix relating a camera's internal properties to an ideal
|
|
// pinhole-camera model.
|
|
// See
|
|
// developer.apple.com/documentation/avfoundation/avcameracalibrationdata/2881135-intrinsicmatrix
|
|
// for detailed usage information.
|
|
repeated float intrinsic_matrix = 1 [packed = true];
|
|
|
|
// The image dimensions to which the intrinsic_matrix values are relative.
|
|
optional float intrinsic_matrix_reference_dimension_width = 2;
|
|
optional float intrinsic_matrix_reference_dimension_height = 3;
|
|
|
|
// 3x4 row-major matrix relating a camera's position and orientation to a
|
|
// world or scene coordinate system. Consists of a unitless 3x3 rotation
|
|
// matrix (R) on the left and a translation (t) 3x1 vector on the right. The
|
|
// translation vector's units are millimeters. For example:
|
|
//
|
|
// |r1,1 r2,1 r3,1 | t1|
|
|
// [R | t] = |r1,2 r2,2 r3,2 | t2|
|
|
// |r1,3 r2,3 r3,3 | t3|
|
|
//
|
|
// is stored as [r11, r21, r31, t1, r12, r22, r32, t2, ...]
|
|
//
|
|
// See
|
|
// developer.apple.com/documentation/avfoundation/avcameracalibrationdata/2881130-extrinsicmatrix?language=objc
|
|
// for more information.
|
|
repeated float extrinsic_matrix = 4 [packed = true];
|
|
|
|
// The size, in millimeters, of one image pixel.
|
|
optional float pixel_size = 5;
|
|
|
|
// A list of floating-point values describing radial distortions imparted by
|
|
// the camera lens, for use in rectifying camera images.
|
|
// See
|
|
// developer.apple.com/documentation/avfoundation/avcameracalibrationdata/2881129-lensdistortionlookuptable?language=objc
|
|
// for more information.
|
|
repeated float lens_distortion_lookup_values = 6 [packed = true];
|
|
|
|
// A list of floating-point values describing radial distortions for use in
|
|
// reapplying camera geometry to a rectified image.
|
|
// See
|
|
// developer.apple.com/documentation/avfoundation/avcameracalibrationdata/2881132-inverselensdistortionlookuptable?language=objc
|
|
// for more information.
|
|
repeated float inverse_lens_distortion_lookup_values = 7 [packed = true];
|
|
|
|
// The offset of the distortion center of the camera lens from the top-left
|
|
// corner of the image.
|
|
// See
|
|
// developer.apple.com/documentation/avfoundation/avcameracalibrationdata/2881131-lensdistortioncenter?language=objc
|
|
// for more information.
|
|
optional float lens_distortion_center_x = 8;
|
|
optional float lens_distortion_center_y = 9;
|
|
}
|
|
|
|
// Container for depth data information.
|
|
// See developer.apple.com/documentation/avfoundation/avdepthdata for more info.
|
|
message AVDepthData {
|
|
// PNG representation of the grayscale depth data map. See discussion about
|
|
// depth_data_map_original_minimum_value, below, for information about how
|
|
// to interpret the pixel values.
|
|
optional bytes depth_data_map = 1;
|
|
|
|
// Pixel format type of the original captured depth data.
|
|
// See
|
|
// developer.apple.com/documentation/corevideo/1563591-pixel_format_identifiers?language=objc
|
|
// for the complete list of possible pixel format types. This value represents
|
|
// a string for the associated OSType/FourCharCode.
|
|
optional string depth_data_type = 2;
|
|
|
|
// Indicates the general accuracy of the depth_data_map.
|
|
// See developer.apple.com/documentation/avfoundation/avdepthdataaccuracy for
|
|
// more information.
|
|
enum Accuracy {
|
|
UNDEFINED_ACCURACY = 0;
|
|
// Values in the depth map are usable for foreground/background separation
|
|
// but are not absolutely accurate in the physical world.
|
|
RELATIVE = 1;
|
|
// Values in the depth map are absolutely accurate in the physical world.
|
|
ABSOLUTE = 2;
|
|
}
|
|
optional Accuracy depth_data_accuracy = 3 [default = RELATIVE];
|
|
|
|
// Indicates whether the depth_data_map contains temporally smoothed data.
|
|
optional bool depth_data_filtered = 4;
|
|
|
|
// Quality of the depth_data_map.
|
|
enum Quality {
|
|
UNDEFINED_QUALITY = 0;
|
|
HIGH = 1;
|
|
LOW = 2;
|
|
}
|
|
optional Quality depth_data_quality = 5;
|
|
|
|
// Associated calibration data for the depth_data_map.
|
|
optional AVCameraCalibrationData camera_calibration_data = 6;
|
|
|
|
// The original range of values expressed by the depth_data_map, before
|
|
// grayscale normalization. For example, if the minimum and maximum values
|
|
// indicate a range of [0.5, 2.2], and the depth_data_type value indicates
|
|
// it was a depth map, then white pixels (255, 255, 255) will map to 0.5 and
|
|
// black pixels (0, 0, 0) will map to 2.2 with the grayscale range linearly
|
|
// interpolated inbetween. Conversely, if the depth_data_type value indicates
|
|
// it was a disparity map, then white pixels will map to 2.2 and black pixels
|
|
// will map to 0.5.
|
|
optional float depth_data_map_original_minimum_value = 7;
|
|
optional float depth_data_map_original_maximum_value = 8;
|
|
|
|
// The width of the depth buffer map.
|
|
optional int32 depth_data_map_width = 9;
|
|
|
|
// The height of the depth buffer map.
|
|
optional int32 depth_data_map_height = 10;
|
|
|
|
// The row-major flattened array of the depth buffer map pixels. This will be
|
|
// either a float32 or float16 byte array, depending on 'depth_data_type'.
|
|
optional bytes depth_data_map_raw_values = 11;
|
|
}
|
|
|
|
// Estimated scene lighting information associated with a captured video frame.
|
|
// See developer.apple.com/documentation/arkit/arlightestimate for more info.
|
|
message ARLightEstimate {
|
|
// The estimated intensity, in lumens, of ambient light throughout the scene.
|
|
optional double ambient_intensity = 1;
|
|
|
|
// The estimated color temperature, in degrees Kelvin, of ambient light
|
|
// throughout the scene.
|
|
optional double ambient_color_temperature = 2;
|
|
|
|
// Data describing the estimated lighting environment in all directions.
|
|
// Second-level spherical harmonics in separate red, green, and blue data
|
|
// planes. Thus, this buffer contains 3 sets of 9 coefficients, or a total of
|
|
// 27 values.
|
|
// See
|
|
// https://developer.apple.com/documentation/arkit/ardirectionallightestimate/2928222-sphericalharmonicscoefficients?language=objc
|
|
// for more information.
|
|
repeated float spherical_harmonics_coefficients = 3 [packed = true];
|
|
|
|
message DirectionVector {
|
|
optional float x = 1;
|
|
optional float y = 2;
|
|
optional float z = 3;
|
|
}
|
|
// A vector indicating the orientation of the strongest directional light
|
|
// source, normalized in the world-coordinate space.
|
|
// See
|
|
// https://developer.apple.com/documentation/arkit/ardirectionallightestimate/2928221-primarylightdirection?language=objc
|
|
// for more information;
|
|
optional DirectionVector primary_light_direction = 4;
|
|
|
|
// The estimated intensity, in lumens, of the strongest directional light
|
|
// source in the scene.
|
|
// See
|
|
// https://developer.apple.com/documentation/arkit/ardirectionallightestimate/2928219-primarylightintensity?language=objc
|
|
// for more information.
|
|
optional float primary_light_intensity = 5;
|
|
}
|
|
|
|
// Information about the camera position and imaging characteristics for a
|
|
// captured video frame.
|
|
// See developer.apple.com/documentation/arkit/arcamera for more information.
|
|
message ARCamera {
|
|
// The general quality of position tracking available when the camera captured
|
|
// a frame.
|
|
enum TrackingState {
|
|
UNDEFINED_TRACKING_STATE = 0;
|
|
// Camera position tracking is not available.
|
|
UNAVAILABLE = 1;
|
|
// Tracking is available, but the quality of results is questionable.
|
|
LIMITED = 2;
|
|
// Camera position tracking is providing optimal results.
|
|
NORMAL = 3;
|
|
}
|
|
optional TrackingState tracking_state = 1 [default = UNAVAILABLE];
|
|
|
|
// A possible diagnosis for limited position tracking quality as of when the
|
|
// frame was captured.
|
|
enum TrackingStateReason {
|
|
UNDEFINED_TRACKING_STATE_REASON = 0;
|
|
// The current tracking state is not limited.
|
|
NONE = 1;
|
|
// Not yet enough camera or motion data to provide tracking information.
|
|
INITIALIZING = 2;
|
|
// The device is moving too fast for accurate image-based position tracking.
|
|
EXCESSIVE_MOTION = 3;
|
|
// Not enough distinguishable features for image-based position tracking.
|
|
INSUFFICIENT_FEATURES = 4;
|
|
// Tracking is limited due to a relocalization in progress.
|
|
RELOCALIZING = 5;
|
|
}
|
|
optional TrackingStateReason tracking_state_reason = 2 [default = NONE];
|
|
|
|
// 4x4 row-major matrix expressing position and orientation of the camera in
|
|
// world coordinate space.
|
|
// See developer.apple.com/documentation/arkit/arcamera/2866108-transform for
|
|
// more information.
|
|
repeated float transform = 3 [packed = true];
|
|
|
|
// The orientation of the camera, expressed as roll, pitch, and yaw values.
|
|
message EulerAngles {
|
|
optional float roll = 1;
|
|
optional float pitch = 2;
|
|
optional float yaw = 3;
|
|
}
|
|
optional EulerAngles euler_angles = 4;
|
|
|
|
// The width and height, in pixels, of the captured camera image.
|
|
optional int32 image_resolution_width = 5;
|
|
optional int32 image_resolution_height = 6;
|
|
|
|
// 3x3 row-major matrix that converts between the 2D camera plane and 3D world
|
|
// coordinate space.
|
|
// See developer.apple.com/documentation/arkit/arcamera/2875730-intrinsics for
|
|
// usage information.
|
|
repeated float intrinsics = 7 [packed = true];
|
|
|
|
// 4x4 row-major transform matrix appropriate for rendering 3D content to
|
|
// match the image captured by the camera.
|
|
// See
|
|
// developer.apple.com/documentation/arkit/arcamera/2887458-projectionmatrix
|
|
// for usage information.
|
|
repeated float projection_matrix = 8 [packed = true];
|
|
|
|
// 4x4 row-major transform matrix appropriate for converting from world-space
|
|
// to camera space. Relativized for the captured_image orientation (i.e.
|
|
// UILandscapeOrientationRight).
|
|
// See
|
|
// https://developer.apple.com/documentation/arkit/arcamera/2921672-viewmatrixfororientation?language=objc
|
|
// for more information.
|
|
repeated float view_matrix = 9 [packed = true];
|
|
}
|
|
|
|
// Container for a 3D mesh describing face topology.
|
|
message ARFaceGeometry {
|
|
// Each vertex represents a 3D point in the face mesh, in the face coordinate
|
|
// space.
|
|
// See developer.apple.com/documentation/arkit/arfacegeometry/2928201-vertices
|
|
// for more information.
|
|
message Vertex {
|
|
optional float x = 1;
|
|
optional float y = 2;
|
|
optional float z = 3;
|
|
}
|
|
repeated Vertex vertices = 1;
|
|
|
|
// The number of elements in the vertices list.
|
|
optional int32 vertex_count = 2;
|
|
|
|
// Each texture coordinate represents UV texture coordinates for the vertex at
|
|
// the corresponding index in the vertices buffer.
|
|
// See
|
|
// developer.apple.com/documentation/arkit/arfacegeometry/2928203-texturecoordinates
|
|
// for more information.
|
|
message TextureCoordinate {
|
|
optional float u = 1;
|
|
optional float v = 2;
|
|
}
|
|
repeated TextureCoordinate texture_coordinates = 3;
|
|
|
|
// The number of elements in the texture_coordinates list.
|
|
optional int32 texture_coordinate_count = 4;
|
|
|
|
// Each integer value in this ordered list represents an index into the
|
|
// vertices and texture_coordinates lists. Each set of three indices
|
|
// identifies the vertices comprising a single triangle in the mesh. Each set
|
|
// of three indices forms a triangle, so the number of indices in the
|
|
// triangle_indices buffer is three times the triangle_count value.
|
|
// See
|
|
// developer.apple.com/documentation/arkit/arfacegeometry/2928199-triangleindices
|
|
// for more information.
|
|
repeated int32 triangle_indices = 5 [packed = true];
|
|
|
|
// The number of triangles described by the triangle_indices buffer.
|
|
// See
|
|
// developer.apple.com/documentation/arkit/arfacegeometry/2928207-trianglecount
|
|
// for more information.
|
|
optional int32 triangle_count = 6;
|
|
}
|
|
|
|
// Contains a list of blend shape entries wherein each item maps a specific
|
|
// blend shape location to its associated coefficient.
|
|
message ARBlendShapeMap {
|
|
message MapEntry {
|
|
// Identifier for the specific facial feature.
|
|
// See developer.apple.com/documentation/arkit/arblendshapelocation for a
|
|
// complete list of identifiers.
|
|
optional string blend_shape_location = 1;
|
|
|
|
// Indicates the current position of the feature relative to its neutral
|
|
// configuration, ranging from 0.0 (neutral) to 1.0 (maximum movement).
|
|
optional float blend_shape_coefficient = 2;
|
|
}
|
|
repeated MapEntry entries = 1;
|
|
}
|
|
|
|
// Information about the pose, topology, and expression of a detected face.
|
|
// See developer.apple.com/documentation/arkit/arfaceanchor for more info.
|
|
message ARFaceAnchor {
|
|
// A coarse triangle mesh representing the topology of the detected face.
|
|
optional ARFaceGeometry geometry = 1;
|
|
|
|
// A map of named coefficients representing the detected facial expression in
|
|
// terms of the movement of specific facial features.
|
|
optional ARBlendShapeMap blend_shapes = 2;
|
|
|
|
// 4x4 row-major matrix encoding the position, orientation, and scale of the
|
|
// anchor relative to the world coordinate space.
|
|
// See
|
|
// https://developer.apple.com/documentation/arkit/aranchor/2867981-transform?language=objc
|
|
// for more information.
|
|
repeated float transform = 3;
|
|
|
|
// Indicates whether the anchor's transform is valid. Frames that have a face
|
|
// anchor with this value set to NO should probably be ignored.
|
|
optional bool is_tracked = 4;
|
|
}
|
|
|
|
// Container for a 3D mesh.
|
|
message ARPlaneGeometry {
|
|
message Vertex {
|
|
optional float x = 1;
|
|
optional float y = 2;
|
|
optional float z = 3;
|
|
}
|
|
|
|
// Each texture coordinate represents UV texture coordinates for the vertex at
|
|
// the corresponding index in the vertices buffer.
|
|
// See
|
|
// https://developer.apple.com/documentation/arkit/arfacegeometry/2928203-texturecoordinates
|
|
// for more information.
|
|
message TextureCoordinate {
|
|
optional float u = 1;
|
|
optional float v = 2;
|
|
}
|
|
|
|
// A buffer of vertex positions for each point in the plane mesh.
|
|
repeated Vertex vertices = 1;
|
|
|
|
// The number of elements in the vertices buffer.
|
|
optional int32 vertex_count = 2;
|
|
|
|
// A buffer of texture coordinate values for each point in the plane mesh.
|
|
repeated TextureCoordinate texture_coordinates = 3;
|
|
|
|
// The number of elements in the texture_coordinates buffer.
|
|
optional int32 texture_coordinate_count = 4;
|
|
|
|
// Each integer value in this ordered list represents an index into the
|
|
// vertices and texture_coordinates lists. Each set of three indices
|
|
// identifies the vertices comprising a single triangle in the mesh. Each set
|
|
// of three indices forms a triangle, so the number of indices in the
|
|
// triangle_indices buffer is three times the triangle_count value.
|
|
// See
|
|
// https://developer.apple.com/documentation/arkit/arplanegeometry/2941051-triangleindices
|
|
// for more information.
|
|
repeated int32 triangle_indices = 5 [packed = true];
|
|
|
|
// Each set of three indices forms a triangle, so the number of indices in the
|
|
// triangle_indices buffer is three times the triangle_count value.
|
|
// See
|
|
// https://developer.apple.com/documentation/arkit/arplanegeometry/2941058-trianglecount
|
|
// for more information.
|
|
optional int32 triangle_count = 6;
|
|
|
|
// Each value in this buffer represents the position of a vertex along the
|
|
// boundary polygon of the estimated plane. The owning plane anchor's
|
|
// transform matrix defines the coordinate system for these points.
|
|
// See
|
|
// https://developer.apple.com/documentation/arkit/arplanegeometry/2941052-boundaryvertices
|
|
// for more information.
|
|
repeated Vertex boundary_vertices = 7;
|
|
|
|
// The number of elements in the boundary_vertices buffer.
|
|
optional int32 boundary_vertex_count = 8;
|
|
}
|
|
|
|
// Information about the position and orientation of a real-world flat surface.
|
|
// See https://developer.apple.com/documentation/arkit/arplaneanchor for more
|
|
// information.
|
|
message ARPlaneAnchor {
|
|
enum Alignment {
|
|
UNDEFINED = 0;
|
|
// The plane is perpendicular to gravity.
|
|
HORIZONTAL = 1;
|
|
// The plane is parallel to gravity.
|
|
VERTICAL = 2;
|
|
}
|
|
|
|
// Wrapper for a 3D point / vector within the plane. See extent and center
|
|
// values for more information.
|
|
message PlaneVector {
|
|
optional float x = 1;
|
|
optional float y = 2;
|
|
optional float z = 3;
|
|
}
|
|
|
|
enum PlaneClassification {
|
|
NONE = 0;
|
|
WALL = 1;
|
|
FLOOR = 2;
|
|
CEILING = 3;
|
|
TABLE = 4;
|
|
SEAT = 5;
|
|
}
|
|
|
|
// The classification status for the plane.
|
|
enum PlaneClassificationStatus {
|
|
// The classfication process for the plane anchor has completed but the
|
|
// result is inconclusive.
|
|
UNKNOWN = 0;
|
|
// No classication information can be provided (set on error or if the
|
|
// device does not support plane classification).
|
|
UNAVAILABLE = 1;
|
|
// The classification process has not completed.
|
|
UNDETERMINED = 2;
|
|
// The classfication process for the plane anchor has completed.
|
|
KNOWN = 3;
|
|
}
|
|
|
|
// The ID of the plane.
|
|
optional string identifier = 1;
|
|
|
|
// 4x4 row-major matrix encoding the position, orientation, and scale of the
|
|
// anchor relative to the world coordinate space.
|
|
// See
|
|
// https://developer.apple.com/documentation/arkit/aranchor/2867981-transform
|
|
// for more information.
|
|
repeated float transform = 2;
|
|
|
|
// The general orientation of the detected plane with respect to gravity.
|
|
optional Alignment alignment = 3;
|
|
|
|
// A coarse triangle mesh representing the general shape of the detected
|
|
// plane.
|
|
optional ARPlaneGeometry geometry = 4;
|
|
|
|
// The center point of the plane relative to its anchor position.
|
|
// Although the type of this property is a 3D vector, a plane anchor is always
|
|
// two-dimensional, and is always positioned in only the x and z directions
|
|
// relative to its transform position. (That is, the y-component of this
|
|
// vector is always zero.)
|
|
// See
|
|
// https://developer.apple.com/documentation/arkit/arplaneanchor/2882056-center
|
|
// for more information.
|
|
optional PlaneVector center = 5;
|
|
|
|
// The estimated width and length of the detected plane.
|
|
// See
|
|
// https://developer.apple.com/documentation/arkit/arplaneanchor/2882055-extent
|
|
// for more information.
|
|
optional PlaneVector extent = 6;
|
|
|
|
// A Boolean value that indicates whether plane classification is available on
|
|
// the current device. On devices without plane classification support, all
|
|
// plane anchors report a classification value of NONE
|
|
// and a classification_status value of UNAVAILABLE.
|
|
optional bool classification_supported = 7;
|
|
|
|
// A general characterization of what kind of real-world surface the plane
|
|
// anchor represents.
|
|
// See
|
|
// https://developer.apple.com/documentation/arkit/arplaneanchor/2990936-classification
|
|
// for more information.
|
|
optional PlaneClassification classification = 8;
|
|
|
|
// The current state of ARKit's process for classifying the plane anchor.
|
|
// When this property's value is KNOWN, the classification property represents
|
|
// ARKit's characterization of the real-world surface corresponding to the
|
|
// plane anchor.
|
|
// See
|
|
// https://developer.apple.com/documentation/arkit/arplaneanchor/2990937-classificationstatus
|
|
// for more information.
|
|
optional PlaneClassificationStatus classification_status = 9;
|
|
}
|
|
|
|
// A collection of points in the world coordinate space.
|
|
// See https://developer.apple.com/documentation/arkit/arpointcloud for more
|
|
// information.
|
|
message ARPointCloud {
|
|
message Point {
|
|
optional float x = 1;
|
|
optional float y = 2;
|
|
optional float z = 3;
|
|
}
|
|
|
|
// The number of points in the cloud.
|
|
optional int32 count = 1;
|
|
|
|
// The list of detected points.
|
|
repeated Point point = 2;
|
|
|
|
// A list of unique identifiers corresponding to detected feature points.
|
|
// Each identifier in this list corresponds to the point at the same index
|
|
// in the points array.
|
|
repeated int64 identifier = 3 [packed = true];
|
|
}
|
|
|
|
// Video image and face position tracking information.
|
|
// See developer.apple.com/documentation/arkit/arframe for more information.
|
|
message ARFrame {
|
|
// The timestamp for the frame.
|
|
optional double timestamp = 1;
|
|
|
|
// The depth data associated with the frame. Not all frames have depth data.
|
|
optional AVDepthData depth_data = 2;
|
|
|
|
// The depth data object timestamp associated with the frame. May differ from
|
|
// the frame timestamp value. Is only set when the frame has depth_data.
|
|
optional double depth_data_timestamp = 3;
|
|
|
|
// Camera information associated with the frame.
|
|
optional ARCamera camera = 4;
|
|
|
|
// Light information associated with the frame.
|
|
optional ARLightEstimate light_estimate = 5;
|
|
|
|
// Face anchor information associated with the frame. Not all frames have an
|
|
// active face anchor.
|
|
optional ARFaceAnchor face_anchor = 6;
|
|
|
|
// Plane anchors associated with the frame. Not all frames have a plane
|
|
// anchor. Plane anchors and face anchors are mutually exclusive.
|
|
repeated ARPlaneAnchor plane_anchor = 7;
|
|
|
|
// The current intermediate results of the scene analysis used to perform
|
|
// world tracking.
|
|
// See
|
|
// https://developer.apple.com/documentation/arkit/arframe/2887449-rawfeaturepoints
|
|
// for more information.
|
|
optional ARPointCloud raw_feature_points = 8;
|
|
}
|