// Copyright 2020 The MediaPipe Authors. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. syntax = "proto2"; package mediapipe; // Info about the camera characteristics used to capture images and depth data. // See developer.apple.com/documentation/avfoundation/avcameracalibrationdata // for more information. message AVCameraCalibrationData { // 3x3 row-major matrix relating a camera's internal properties to an ideal // pinhole-camera model. // See // developer.apple.com/documentation/avfoundation/avcameracalibrationdata/2881135-intrinsicmatrix // for detailed usage information. repeated float intrinsic_matrix = 1 [packed = true]; // The image dimensions to which the intrinsic_matrix values are relative. optional float intrinsic_matrix_reference_dimension_width = 2; optional float intrinsic_matrix_reference_dimension_height = 3; // 3x4 row-major matrix relating a camera's position and orientation to a // world or scene coordinate system. Consists of a unitless 3x3 rotation // matrix (R) on the left and a translation (t) 3x1 vector on the right. The // translation vector's units are millimeters. For example: // // |r1,1 r2,1 r3,1 | t1| // [R | t] = |r1,2 r2,2 r3,2 | t2| // |r1,3 r2,3 r3,3 | t3| // // is stored as [r11, r21, r31, t1, r12, r22, r32, t2, ...] // // See // developer.apple.com/documentation/avfoundation/avcameracalibrationdata/2881130-extrinsicmatrix?language=objc // for more information. repeated float extrinsic_matrix = 4 [packed = true]; // The size, in millimeters, of one image pixel. optional float pixel_size = 5; // A list of floating-point values describing radial distortions imparted by // the camera lens, for use in rectifying camera images. // See // developer.apple.com/documentation/avfoundation/avcameracalibrationdata/2881129-lensdistortionlookuptable?language=objc // for more information. repeated float lens_distortion_lookup_values = 6 [packed = true]; // A list of floating-point values describing radial distortions for use in // reapplying camera geometry to a rectified image. // See // developer.apple.com/documentation/avfoundation/avcameracalibrationdata/2881132-inverselensdistortionlookuptable?language=objc // for more information. repeated float inverse_lens_distortion_lookup_values = 7 [packed = true]; // The offset of the distortion center of the camera lens from the top-left // corner of the image. // See // developer.apple.com/documentation/avfoundation/avcameracalibrationdata/2881131-lensdistortioncenter?language=objc // for more information. optional float lens_distortion_center_x = 8; optional float lens_distortion_center_y = 9; } // Container for depth data information. // See developer.apple.com/documentation/avfoundation/avdepthdata for more info. message AVDepthData { // PNG representation of the grayscale depth data map. See discussion about // depth_data_map_original_minimum_value, below, for information about how // to interpret the pixel values. optional bytes depth_data_map = 1; // Pixel format type of the original captured depth data. // See // developer.apple.com/documentation/corevideo/1563591-pixel_format_identifiers?language=objc // for the complete list of possible pixel format types. This value represents // a string for the associated OSType/FourCharCode. optional string depth_data_type = 2; // Indicates the general accuracy of the depth_data_map. // See developer.apple.com/documentation/avfoundation/avdepthdataaccuracy for // more information. enum Accuracy { UNDEFINED_ACCURACY = 0; // Values in the depth map are usable for foreground/background separation // but are not absolutely accurate in the physical world. RELATIVE = 1; // Values in the depth map are absolutely accurate in the physical world. ABSOLUTE = 2; } optional Accuracy depth_data_accuracy = 3 [default = RELATIVE]; // Indicates whether the depth_data_map contains temporally smoothed data. optional bool depth_data_filtered = 4; // Quality of the depth_data_map. enum Quality { UNDEFINED_QUALITY = 0; HIGH = 1; LOW = 2; } optional Quality depth_data_quality = 5; // Associated calibration data for the depth_data_map. optional AVCameraCalibrationData camera_calibration_data = 6; // The original range of values expressed by the depth_data_map, before // grayscale normalization. For example, if the minimum and maximum values // indicate a range of [0.5, 2.2], and the depth_data_type value indicates // it was a depth map, then white pixels (255, 255, 255) will map to 0.5 and // black pixels (0, 0, 0) will map to 2.2 with the grayscale range linearly // interpolated inbetween. Conversely, if the depth_data_type value indicates // it was a disparity map, then white pixels will map to 2.2 and black pixels // will map to 0.5. optional float depth_data_map_original_minimum_value = 7; optional float depth_data_map_original_maximum_value = 8; // The width of the depth buffer map. optional int32 depth_data_map_width = 9; // The height of the depth buffer map. optional int32 depth_data_map_height = 10; // The row-major flattened array of the depth buffer map pixels. This will be // either a float32 or float16 byte array, depending on 'depth_data_type'. optional bytes depth_data_map_raw_values = 11; } // Estimated scene lighting information associated with a captured video frame. // See developer.apple.com/documentation/arkit/arlightestimate for more info. message ARLightEstimate { // The estimated intensity, in lumens, of ambient light throughout the scene. optional double ambient_intensity = 1; // The estimated color temperature, in degrees Kelvin, of ambient light // throughout the scene. optional double ambient_color_temperature = 2; // Data describing the estimated lighting environment in all directions. // Second-level spherical harmonics in separate red, green, and blue data // planes. Thus, this buffer contains 3 sets of 9 coefficients, or a total of // 27 values. // See // https://developer.apple.com/documentation/arkit/ardirectionallightestimate/2928222-sphericalharmonicscoefficients?language=objc // for more information. repeated float spherical_harmonics_coefficients = 3 [packed = true]; message DirectionVector { optional float x = 1; optional float y = 2; optional float z = 3; } // A vector indicating the orientation of the strongest directional light // source, normalized in the world-coordinate space. // See // https://developer.apple.com/documentation/arkit/ardirectionallightestimate/2928221-primarylightdirection?language=objc // for more information; optional DirectionVector primary_light_direction = 4; // The estimated intensity, in lumens, of the strongest directional light // source in the scene. // See // https://developer.apple.com/documentation/arkit/ardirectionallightestimate/2928219-primarylightintensity?language=objc // for more information. optional float primary_light_intensity = 5; } // Information about the camera position and imaging characteristics for a // captured video frame. // See developer.apple.com/documentation/arkit/arcamera for more information. message ARCamera { // The general quality of position tracking available when the camera captured // a frame. enum TrackingState { UNDEFINED_TRACKING_STATE = 0; // Camera position tracking is not available. UNAVAILABLE = 1; // Tracking is available, but the quality of results is questionable. LIMITED = 2; // Camera position tracking is providing optimal results. NORMAL = 3; } optional TrackingState tracking_state = 1 [default = UNAVAILABLE]; // A possible diagnosis for limited position tracking quality as of when the // frame was captured. enum TrackingStateReason { UNDEFINED_TRACKING_STATE_REASON = 0; // The current tracking state is not limited. NONE = 1; // Not yet enough camera or motion data to provide tracking information. INITIALIZING = 2; // The device is moving too fast for accurate image-based position tracking. EXCESSIVE_MOTION = 3; // Not enough distinguishable features for image-based position tracking. INSUFFICIENT_FEATURES = 4; // Tracking is limited due to a relocalization in progress. RELOCALIZING = 5; } optional TrackingStateReason tracking_state_reason = 2 [default = NONE]; // 4x4 row-major matrix expressing position and orientation of the camera in // world coordinate space. // See developer.apple.com/documentation/arkit/arcamera/2866108-transform for // more information. repeated float transform = 3 [packed = true]; // The orientation of the camera, expressed as roll, pitch, and yaw values. message EulerAngles { optional float roll = 1; optional float pitch = 2; optional float yaw = 3; } optional EulerAngles euler_angles = 4; // The width and height, in pixels, of the captured camera image. optional int32 image_resolution_width = 5; optional int32 image_resolution_height = 6; // 3x3 row-major matrix that converts between the 2D camera plane and 3D world // coordinate space. // See developer.apple.com/documentation/arkit/arcamera/2875730-intrinsics for // usage information. repeated float intrinsics = 7 [packed = true]; // 4x4 row-major transform matrix appropriate for rendering 3D content to // match the image captured by the camera. // See // developer.apple.com/documentation/arkit/arcamera/2887458-projectionmatrix // for usage information. repeated float projection_matrix = 8 [packed = true]; // 4x4 row-major transform matrix appropriate for converting from world-space // to camera space. Relativized for the captured_image orientation (i.e. // UILandscapeOrientationRight). // See // https://developer.apple.com/documentation/arkit/arcamera/2921672-viewmatrixfororientation?language=objc // for more information. repeated float view_matrix = 9 [packed = true]; } // Container for a 3D mesh describing face topology. message ARFaceGeometry { // Each vertex represents a 3D point in the face mesh, in the face coordinate // space. // See developer.apple.com/documentation/arkit/arfacegeometry/2928201-vertices // for more information. message Vertex { optional float x = 1; optional float y = 2; optional float z = 3; } repeated Vertex vertices = 1; // The number of elements in the vertices list. optional int32 vertex_count = 2; // Each texture coordinate represents UV texture coordinates for the vertex at // the corresponding index in the vertices buffer. // See // developer.apple.com/documentation/arkit/arfacegeometry/2928203-texturecoordinates // for more information. message TextureCoordinate { optional float u = 1; optional float v = 2; } repeated TextureCoordinate texture_coordinates = 3; // The number of elements in the texture_coordinates list. optional int32 texture_coordinate_count = 4; // Each integer value in this ordered list represents an index into the // vertices and texture_coordinates lists. Each set of three indices // identifies the vertices comprising a single triangle in the mesh. Each set // of three indices forms a triangle, so the number of indices in the // triangle_indices buffer is three times the triangle_count value. // See // developer.apple.com/documentation/arkit/arfacegeometry/2928199-triangleindices // for more information. repeated int32 triangle_indices = 5 [packed = true]; // The number of triangles described by the triangle_indices buffer. // See // developer.apple.com/documentation/arkit/arfacegeometry/2928207-trianglecount // for more information. optional int32 triangle_count = 6; } // Contains a list of blend shape entries wherein each item maps a specific // blend shape location to its associated coefficient. message ARBlendShapeMap { message MapEntry { // Identifier for the specific facial feature. // See developer.apple.com/documentation/arkit/arblendshapelocation for a // complete list of identifiers. optional string blend_shape_location = 1; // Indicates the current position of the feature relative to its neutral // configuration, ranging from 0.0 (neutral) to 1.0 (maximum movement). optional float blend_shape_coefficient = 2; } repeated MapEntry entries = 1; } // Information about the pose, topology, and expression of a detected face. // See developer.apple.com/documentation/arkit/arfaceanchor for more info. message ARFaceAnchor { // A coarse triangle mesh representing the topology of the detected face. optional ARFaceGeometry geometry = 1; // A map of named coefficients representing the detected facial expression in // terms of the movement of specific facial features. optional ARBlendShapeMap blend_shapes = 2; // 4x4 row-major matrix encoding the position, orientation, and scale of the // anchor relative to the world coordinate space. // See // https://developer.apple.com/documentation/arkit/aranchor/2867981-transform?language=objc // for more information. repeated float transform = 3; // Indicates whether the anchor's transform is valid. Frames that have a face // anchor with this value set to NO should probably be ignored. optional bool is_tracked = 4; } // Container for a 3D mesh. message ARPlaneGeometry { message Vertex { optional float x = 1; optional float y = 2; optional float z = 3; } // Each texture coordinate represents UV texture coordinates for the vertex at // the corresponding index in the vertices buffer. // See // https://developer.apple.com/documentation/arkit/arfacegeometry/2928203-texturecoordinates // for more information. message TextureCoordinate { optional float u = 1; optional float v = 2; } // A buffer of vertex positions for each point in the plane mesh. repeated Vertex vertices = 1; // The number of elements in the vertices buffer. optional int32 vertex_count = 2; // A buffer of texture coordinate values for each point in the plane mesh. repeated TextureCoordinate texture_coordinates = 3; // The number of elements in the texture_coordinates buffer. optional int32 texture_coordinate_count = 4; // Each integer value in this ordered list represents an index into the // vertices and texture_coordinates lists. Each set of three indices // identifies the vertices comprising a single triangle in the mesh. Each set // of three indices forms a triangle, so the number of indices in the // triangle_indices buffer is three times the triangle_count value. // See // https://developer.apple.com/documentation/arkit/arplanegeometry/2941051-triangleindices // for more information. repeated int32 triangle_indices = 5 [packed = true]; // Each set of three indices forms a triangle, so the number of indices in the // triangle_indices buffer is three times the triangle_count value. // See // https://developer.apple.com/documentation/arkit/arplanegeometry/2941058-trianglecount // for more information. optional int32 triangle_count = 6; // Each value in this buffer represents the position of a vertex along the // boundary polygon of the estimated plane. The owning plane anchor's // transform matrix defines the coordinate system for these points. // See // https://developer.apple.com/documentation/arkit/arplanegeometry/2941052-boundaryvertices // for more information. repeated Vertex boundary_vertices = 7; // The number of elements in the boundary_vertices buffer. optional int32 boundary_vertex_count = 8; } // Information about the position and orientation of a real-world flat surface. // See https://developer.apple.com/documentation/arkit/arplaneanchor for more // information. message ARPlaneAnchor { enum Alignment { UNDEFINED = 0; // The plane is perpendicular to gravity. HORIZONTAL = 1; // The plane is parallel to gravity. VERTICAL = 2; } // Wrapper for a 3D point / vector within the plane. See extent and center // values for more information. message PlaneVector { optional float x = 1; optional float y = 2; optional float z = 3; } enum PlaneClassification { NONE = 0; WALL = 1; FLOOR = 2; CEILING = 3; TABLE = 4; SEAT = 5; } // The classification status for the plane. enum PlaneClassificationStatus { // The classfication process for the plane anchor has completed but the // result is inconclusive. UNKNOWN = 0; // No classication information can be provided (set on error or if the // device does not support plane classification). UNAVAILABLE = 1; // The classification process has not completed. UNDETERMINED = 2; // The classfication process for the plane anchor has completed. KNOWN = 3; } // The ID of the plane. optional string identifier = 1; // 4x4 row-major matrix encoding the position, orientation, and scale of the // anchor relative to the world coordinate space. // See // https://developer.apple.com/documentation/arkit/aranchor/2867981-transform // for more information. repeated float transform = 2; // The general orientation of the detected plane with respect to gravity. optional Alignment alignment = 3; // A coarse triangle mesh representing the general shape of the detected // plane. optional ARPlaneGeometry geometry = 4; // The center point of the plane relative to its anchor position. // Although the type of this property is a 3D vector, a plane anchor is always // two-dimensional, and is always positioned in only the x and z directions // relative to its transform position. (That is, the y-component of this // vector is always zero.) // See // https://developer.apple.com/documentation/arkit/arplaneanchor/2882056-center // for more information. optional PlaneVector center = 5; // The estimated width and length of the detected plane. // See // https://developer.apple.com/documentation/arkit/arplaneanchor/2882055-extent // for more information. optional PlaneVector extent = 6; // A Boolean value that indicates whether plane classification is available on // the current device. On devices without plane classification support, all // plane anchors report a classification value of NONE // and a classification_status value of UNAVAILABLE. optional bool classification_supported = 7; // A general characterization of what kind of real-world surface the plane // anchor represents. // See // https://developer.apple.com/documentation/arkit/arplaneanchor/2990936-classification // for more information. optional PlaneClassification classification = 8; // The current state of ARKit's process for classifying the plane anchor. // When this property's value is KNOWN, the classification property represents // ARKit's characterization of the real-world surface corresponding to the // plane anchor. // See // https://developer.apple.com/documentation/arkit/arplaneanchor/2990937-classificationstatus // for more information. optional PlaneClassificationStatus classification_status = 9; } // A collection of points in the world coordinate space. // See https://developer.apple.com/documentation/arkit/arpointcloud for more // information. message ARPointCloud { message Point { optional float x = 1; optional float y = 2; optional float z = 3; } // The number of points in the cloud. optional int32 count = 1; // The list of detected points. repeated Point point = 2; // A list of unique identifiers corresponding to detected feature points. // Each identifier in this list corresponds to the point at the same index // in the points array. repeated int64 identifier = 3 [packed = true]; } // Video image and face position tracking information. // See developer.apple.com/documentation/arkit/arframe for more information. message ARFrame { // The timestamp for the frame. optional double timestamp = 1; // The depth data associated with the frame. Not all frames have depth data. optional AVDepthData depth_data = 2; // The depth data object timestamp associated with the frame. May differ from // the frame timestamp value. Is only set when the frame has depth_data. optional double depth_data_timestamp = 3; // Camera information associated with the frame. optional ARCamera camera = 4; // Light information associated with the frame. optional ARLightEstimate light_estimate = 5; // Face anchor information associated with the frame. Not all frames have an // active face anchor. optional ARFaceAnchor face_anchor = 6; // Plane anchors associated with the frame. Not all frames have a plane // anchor. Plane anchors and face anchors are mutually exclusive. repeated ARPlaneAnchor plane_anchor = 7; // The current intermediate results of the scene analysis used to perform // world tracking. // See // https://developer.apple.com/documentation/arkit/arframe/2887449-rawfeaturepoints // for more information. optional ARPointCloud raw_feature_points = 8; }