diff --git a/examples/hello.rs b/examples/hello.rs index 26254e0..5f48af1 100644 --- a/examples/hello.rs +++ b/examples/hello.rs @@ -62,6 +62,7 @@ mod examples { imgproc::cvt_color(&raw_frame, &mut rgb_frame, imgproc::COLOR_BGR2RGB, 0)?; opencv::core::flip(&rgb_frame, &mut flip_frame, 1)?; // horizontal + println!("processing"); let data = detector.process(&flip_frame); println!("received {} landmarks", data.len()); @@ -109,6 +110,7 @@ mod examples { imgproc::cvt_color(&raw_frame, &mut rgb_frame, imgproc::COLOR_BGR2RGB, 0)?; opencv::core::flip(&rgb_frame, &mut flip_frame, 1)?; // horizontal + println!("processing"); let data = detector.process(&flip_frame); println!("received {} landmarks", data.len()); @@ -156,6 +158,7 @@ mod examples { imgproc::cvt_color(&raw_frame, &mut rgb_frame, imgproc::COLOR_BGR2RGB, 0)?; opencv::core::flip(&rgb_frame, &mut flip_frame, 1)?; // horizontal + println!("processing"); let data = detector.process(&rgb_frame); println!("received {} landmarks", data.len()); @@ -179,6 +182,6 @@ mod examples { fn main() { // examples::pose_estimation().unwrap() - examples::hand_tracking().unwrap() - // examples::face_mesh().unwrap() + // examples::hand_tracking().unwrap() + examples::face_mesh().unwrap() } diff --git a/mediapipe/modules/README.md b/mediapipe/modules/README.md new file mode 100644 index 0000000..12ec103 --- /dev/null +++ b/mediapipe/modules/README.md @@ -0,0 +1,18 @@ +# Modules + +Each module (represented as a subfolder) provides subgraphs and corresponding resources (e.g. tflite models) to perform domain-specific tasks (e.g. detect faces, detect face landmarks). + +*Modules listed below are already used in some of `mediapipe/graphs` and more graphs are being migrated to use existing and upcoming modules.* + +| Module | Description | +| :--- | :--- | +| [`face_detection`](face_detection/README.md) | Subgraphs to detect faces. | +| [`face_geometry`](face_geometry/README.md) | Subgraphs to extract face geometry. | +| [`face_landmark`](face_landmark/README.md) | Subgraphs to detect and track face landmarks. | +| [`hand_landmark`](hand_landmark/README.md) | Subgraphs to detect and track hand landmarks. | +| [`holistic_landmark`](holistic_landmark/README.md) | Subgraphs to detect and track holistic pose which consists of pose, face and hand landmarks. | +| [`iris_landmark`](iris_landmark/README.md) | Subgraphs to detect iris landmarks. | +| [`palm_detection`](palm_detection/README.md) | Subgraphs to detect palms/hands. | +| [`pose_detection`](pose_detection/README.md) | Subgraphs to detect poses. | +| [`pose_landmark`](pose_landmark/README.md) | Subgraphs to detect and track pose landmarks. | +| [`objectron`](objectron/README.md) | Subgraphs to detect and track 3D objects. | diff --git a/mediapipe/modules/face_detection/BUILD b/mediapipe/modules/face_detection/BUILD new file mode 100644 index 0000000..b1cddeb --- /dev/null +++ b/mediapipe/modules/face_detection/BUILD @@ -0,0 +1,150 @@ +# Copyright 2019 The MediaPipe Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +load( + "//mediapipe/framework/tool:mediapipe_graph.bzl", + "mediapipe_simple_subgraph", +) + +licenses(["notice"]) + +package(default_visibility = ["//visibility:public"]) + +mediapipe_simple_subgraph( + name = "face_detection_short_range_by_roi_cpu", + graph = "face_detection_short_range_by_roi_cpu.pbtxt", + register_as = "FaceDetectionShortRangeByRoiCpu", + deps = [ + ":face_detection_short_range_common", + "//mediapipe/calculators/tensor:image_to_tensor_calculator", + "//mediapipe/calculators/tensor:inference_calculator", + "//mediapipe/calculators/util:to_image_calculator", + ], +) + +mediapipe_simple_subgraph( + name = "face_detection_short_range_by_roi_gpu", + graph = "face_detection_short_range_by_roi_gpu.pbtxt", + register_as = "FaceDetectionShortRangeByRoiGpu", + deps = [ + ":face_detection_short_range_common", + "//mediapipe/calculators/tensor:image_to_tensor_calculator", + "//mediapipe/calculators/tensor:inference_calculator", + "//mediapipe/calculators/util:to_image_calculator", + ], +) + +mediapipe_simple_subgraph( + name = "face_detection_short_range_cpu", + graph = "face_detection_short_range_cpu.pbtxt", + register_as = "FaceDetectionShortRangeCpu", + deps = [ + ":face_detection_short_range_common", + "//mediapipe/calculators/tensor:image_to_tensor_calculator", + "//mediapipe/calculators/tensor:inference_calculator", + "//mediapipe/calculators/util:to_image_calculator", + ], +) + +mediapipe_simple_subgraph( + name = "face_detection_short_range_gpu", + graph = "face_detection_short_range_gpu.pbtxt", + register_as = "FaceDetectionShortRangeGpu", + deps = [ + ":face_detection_short_range_common", + "//mediapipe/calculators/tensor:image_to_tensor_calculator", + "//mediapipe/calculators/tensor:inference_calculator", + "//mediapipe/calculators/util:to_image_calculator", + ], +) + +mediapipe_simple_subgraph( + name = "face_detection_short_range_common", + graph = "face_detection_short_range_common.pbtxt", + register_as = "FaceDetectionShortRangeCommon", + deps = [ + "//mediapipe/calculators/tensor:tensors_to_detections_calculator", + "//mediapipe/calculators/tflite:ssd_anchors_calculator", + "//mediapipe/calculators/util:detection_projection_calculator", + "//mediapipe/calculators/util:non_max_suppression_calculator", + ], +) + +mediapipe_simple_subgraph( + name = "face_detection_full_range_cpu", + graph = "face_detection_full_range_cpu.pbtxt", + register_as = "FaceDetectionFullRangeCpu", + deps = [ + ":face_detection_full_range_common", + "//mediapipe/calculators/tensor:image_to_tensor_calculator", + "//mediapipe/calculators/tensor:inference_calculator", + "//mediapipe/calculators/util:to_image_calculator", + ], +) + +mediapipe_simple_subgraph( + name = "face_detection_full_range_gpu", + graph = "face_detection_full_range_gpu.pbtxt", + register_as = "FaceDetectionFullRangeGpu", + deps = [ + ":face_detection_full_range_common", + "//mediapipe/calculators/tensor:image_to_tensor_calculator", + "//mediapipe/calculators/tensor:inference_calculator", + "//mediapipe/calculators/util:to_image_calculator", + ], +) + +mediapipe_simple_subgraph( + name = "face_detection_full_range_common", + graph = "face_detection_full_range_common.pbtxt", + register_as = "FaceDetectionFullRangeCommon", + deps = [ + "//mediapipe/calculators/tensor:tensors_to_detections_calculator", + "//mediapipe/calculators/tflite:ssd_anchors_calculator", + "//mediapipe/calculators/util:detection_projection_calculator", + "//mediapipe/calculators/util:non_max_suppression_calculator", + ], +) + +mediapipe_simple_subgraph( + name = "face_detection_short_range_image", + graph = "face_detection_short_range_image.pbtxt", + register_as = "FaceDetectionShortRangeImage", + deps = [ + ":face_detection_short_range_common", + "//mediapipe/calculators/core:flow_limiter_calculator", + "//mediapipe/calculators/tensor:image_to_tensor_calculator", + "//mediapipe/calculators/tensor:inference_calculator", + ], +) + +mediapipe_simple_subgraph( + name = "face_detection_full_range_image", + graph = "face_detection_full_range_image.pbtxt", + register_as = "FaceDetectionFullRangeImage", + deps = [ + ":face_detection_full_range_common", + "//mediapipe/calculators/core:flow_limiter_calculator", + "//mediapipe/calculators/tensor:image_to_tensor_calculator", + "//mediapipe/calculators/tensor:inference_calculator", + ], +) + +exports_files( + srcs = [ + "face_detection_full_range.tflite", + "face_detection_full_range_sparse.tflite", + "face_detection_short_range.tflite", + ], +) diff --git a/mediapipe/modules/face_detection/README.md b/mediapipe/modules/face_detection/README.md new file mode 100644 index 0000000..17cf27b --- /dev/null +++ b/mediapipe/modules/face_detection/README.md @@ -0,0 +1,8 @@ +# face_detection + +Subgraphs|Details +:--- | :--- +[`FaceDetectionFullRangeCpu`](https://github.com/google/mediapipe/tree/master/mediapipe/modules/face_detection/face_detection_full_range_cpu.pbtxt)| Detects faces. Works best for faces within 5 meters from the camera. (CPU input, and inference is executed on CPU.) +[`FaceDetectionFullRangeGpu`](https://github.com/google/mediapipe/tree/master/mediapipe/modules/face_detection/face_detection_full_range_gpu.pbtxt)| Detects faces. Works best for faces within 5 meters from the camera. (GPU input, and inference is executed on GPU.) +[`FaceDetectionShortRangeCpu`](https://github.com/google/mediapipe/tree/master/mediapipe/modules/face_detection/face_detection_short_range_cpu.pbtxt)| Detects faces. Works best for faces within 2 meters from the camera. (CPU input, and inference is executed on CPU.) +[`FaceDetectionShortRangeGpu`](https://github.com/google/mediapipe/tree/master/mediapipe/modules/face_detection/face_detection_short_range_gpu.pbtxt)| Detects faces. Works best for faces within 2 meters from the camera. (GPU input, and inference is executed on GPU.) diff --git a/mediapipe/modules/face_detection/face_detection_full_range.tflite b/mediapipe/modules/face_detection/face_detection_full_range.tflite new file mode 100755 index 0000000..98c5c16 Binary files /dev/null and b/mediapipe/modules/face_detection/face_detection_full_range.tflite differ diff --git a/mediapipe/modules/face_detection/face_detection_full_range_common.pbtxt b/mediapipe/modules/face_detection/face_detection_full_range_common.pbtxt new file mode 100644 index 0000000..937e8be --- /dev/null +++ b/mediapipe/modules/face_detection/face_detection_full_range_common.pbtxt @@ -0,0 +1,102 @@ +# MediaPipe graph performing common processing to detect faces using +# face_detection_full_range_sparse.tflite model, currently consisting of tensor +# post processing. +# +# EXAMPLE: +# node { +# calculator: "FaceDetectionFullRangeCommon" +# input_stream: "TENSORS:detection_tensors" +# input_stream: "MATRIX:transform_matrix" +# output_stream: "DETECTIONS:detections" +# } + +type: "FaceDetectionShortRangeCommon" + +# Detection tensors. (std::vector) +input_stream: "TENSORS:detection_tensors" + +# A 4x4 row-major-order matrix that maps a point represented in the detection +# tensors to a desired coordinate system, e.g., in the original input image +# before scaling/cropping. (std::array) +input_stream: "MATRIX:transform_matrix" + +# Detected faces. (std::vector) +# NOTE: there will not be an output packet in the DETECTIONS stream for this +# particular timestamp if none of faces detected. However, the MediaPipe +# framework will internally inform the downstream calculators of the absence of +# this packet so that they don't wait for it unnecessarily. +output_stream: "DETECTIONS:detections" + +# Generates a single side packet containing a vector of SSD anchors based on +# the specification in the options. +node { + calculator: "SsdAnchorsCalculator" + output_side_packet: "anchors" + options: { + [mediapipe.SsdAnchorsCalculatorOptions.ext] { + num_layers: 1 + min_scale: 0.1484375 + max_scale: 0.75 + input_size_height: 192 + input_size_width: 192 + anchor_offset_x: 0.5 + anchor_offset_y: 0.5 + strides: 4 + aspect_ratios: 1.0 + fixed_anchor_size: true + interpolated_scale_aspect_ratio: 0.0 + } + } +} + +# Decodes the detection tensors generated by the TensorFlow Lite model, based on +# the SSD anchors and the specification in the options, into a vector of +# detections. Each detection describes a detected object. +node { + calculator: "TensorsToDetectionsCalculator" + input_stream: "TENSORS:detection_tensors" + input_side_packet: "ANCHORS:anchors" + output_stream: "DETECTIONS:unfiltered_detections" + options: { + [mediapipe.TensorsToDetectionsCalculatorOptions.ext] { + num_classes: 1 + num_boxes: 2304 + num_coords: 16 + box_coord_offset: 0 + keypoint_coord_offset: 4 + num_keypoints: 6 + num_values_per_keypoint: 2 + sigmoid_score: true + score_clipping_thresh: 100.0 + reverse_output_order: true + x_scale: 192.0 + y_scale: 192.0 + h_scale: 192.0 + w_scale: 192.0 + min_score_thresh: 0.6 + } + } +} + +# Performs non-max suppression to remove excessive detections. +node { + calculator: "NonMaxSuppressionCalculator" + input_stream: "unfiltered_detections" + output_stream: "filtered_detections" + options: { + [mediapipe.NonMaxSuppressionCalculatorOptions.ext] { + min_suppression_threshold: 0.3 + overlap_type: INTERSECTION_OVER_UNION + algorithm: WEIGHTED + } + } +} + +# Projects the detections from input tensor to the corresponding locations on +# the original image (input to the graph). +node { + calculator: "DetectionProjectionCalculator" + input_stream: "DETECTIONS:filtered_detections" + input_stream: "PROJECTION_MATRIX:transform_matrix" + output_stream: "DETECTIONS:detections" +} diff --git a/mediapipe/modules/face_detection/face_detection_full_range_cpu.pbtxt b/mediapipe/modules/face_detection/face_detection_full_range_cpu.pbtxt new file mode 100644 index 0000000..2350401 --- /dev/null +++ b/mediapipe/modules/face_detection/face_detection_full_range_cpu.pbtxt @@ -0,0 +1,80 @@ +# MediaPipe graph to detect faces. (CPU input, and inference is executed on +# CPU.) +# +# It is required that "face_detection_full_range_sparse.tflite" is available at +# "mediapipe/modules/face_detection/face_detection_full_range_sparse.tflite" +# path during execution. +# +# EXAMPLE: +# node { +# calculator: "FaceDetectionFullRangeCpu" +# input_stream: "IMAGE:image" +# output_stream: "DETECTIONS:face_detections" +# } + +type: "FaceDetectionFullRangeCpu" + +# CPU image. (ImageFrame) +input_stream: "IMAGE:image" + +# Detected faces. (std::vector) +# NOTE: there will not be an output packet in the DETECTIONS stream for this +# particular timestamp if none of faces detected. However, the MediaPipe +# framework will internally inform the downstream calculators of the absence of +# this packet so that they don't wait for it unnecessarily. +output_stream: "DETECTIONS:detections" + +# Converts the input CPU image (ImageFrame) to the multi-backend image type +# (Image). +node: { + calculator: "ToImageCalculator" + input_stream: "IMAGE_CPU:image" + output_stream: "IMAGE:multi_backend_image" +} + +# Transforms the input image into a 192x192 tensor while keeping the aspect +# ratio (what is expected by the corresponding face detection model), resulting +# in potential letterboxing in the transformed image. +node: { + calculator: "ImageToTensorCalculator" + input_stream: "IMAGE:multi_backend_image" + output_stream: "TENSORS:input_tensors" + output_stream: "MATRIX:transform_matrix" + options: { + [mediapipe.ImageToTensorCalculatorOptions.ext] { + output_tensor_width: 192 + output_tensor_height: 192 + keep_aspect_ratio: true + output_tensor_float_range { + min: -1.0 + max: 1.0 + } + border_mode: BORDER_ZERO + } + } +} + +# Runs a TensorFlow Lite model on CPU that takes an image tensor and outputs a +# vector of tensors representing, for instance, detection boxes/keypoints and +# scores. +node { + calculator: "InferenceCalculator" + input_stream: "TENSORS:input_tensors" + output_stream: "TENSORS:detection_tensors" + options: { + [mediapipe.InferenceCalculatorOptions.ext] { + model_path: "mediapipe/modules/face_detection/face_detection_full_range_sparse.tflite" + delegate { + xnnpack {} + } + } + } +} + +# Performs tensor post processing to generate face detections. +node { + calculator: "FaceDetectionFullRangeCommon" + input_stream: "TENSORS:detection_tensors" + input_stream: "MATRIX:transform_matrix" + output_stream: "DETECTIONS:detections" +} diff --git a/mediapipe/modules/face_detection/face_detection_full_range_gpu.pbtxt b/mediapipe/modules/face_detection/face_detection_full_range_gpu.pbtxt new file mode 100644 index 0000000..703b717 --- /dev/null +++ b/mediapipe/modules/face_detection/face_detection_full_range_gpu.pbtxt @@ -0,0 +1,80 @@ +# MediaPipe graph to detect faces. (GPU input, and inference is executed on +# GPU.) +# +# It is required that "face_detection_full_range_sparse.tflite" is available at +# "mediapipe/modules/face_detection/face_detection_full_range_sparse.tflite" +# path during execution. +# +# EXAMPLE: +# node { +# calculator: "FaceDetectionFullRangeGpu" +# input_stream: "IMAGE:image" +# output_stream: "DETECTIONS:face_detections" +# } + +type: "FaceDetectionFullRangeGpu" + +# GPU image. (GpuBuffer) +input_stream: "IMAGE:image" + +# Detected faces. (std::vector) +# NOTE: there will not be an output packet in the DETECTIONS stream for this +# particular timestamp if none of faces detected. However, the MediaPipe +# framework will internally inform the downstream calculators of the absence of +# this packet so that they don't wait for it unnecessarily. +output_stream: "DETECTIONS:detections" + +# Converts the input GPU image (GpuBuffer) to the multi-backend image type +# (Image). +node: { + calculator: "ToImageCalculator" + input_stream: "IMAGE_GPU:image" + output_stream: "IMAGE:multi_backend_image" +} + +# Transforms the input image into a 128x128 tensor while keeping the aspect +# ratio (what is expected by the corresponding face detection model), resulting +# in potential letterboxing in the transformed image. +node: { + calculator: "ImageToTensorCalculator" + input_stream: "IMAGE:multi_backend_image" + output_stream: "TENSORS:input_tensors" + output_stream: "MATRIX:transform_matrix" + options: { + [mediapipe.ImageToTensorCalculatorOptions.ext] { + output_tensor_width: 192 + output_tensor_height: 192 + keep_aspect_ratio: true + output_tensor_float_range { + min: -1.0 + max: 1.0 + } + border_mode: BORDER_ZERO + gpu_origin: TOP_LEFT + } + } +} + +# Runs a TensorFlow Lite model on GPU that takes an image tensor and outputs a +# vector of tensors representing, for instance, detection boxes/keypoints and +# scores. +node { + calculator: "InferenceCalculator" + input_stream: "TENSORS:input_tensors" + output_stream: "TENSORS:detection_tensors" + options: { + [mediapipe.InferenceCalculatorOptions.ext] { + model_path: "mediapipe/modules/face_detection/face_detection_full_range_sparse.tflite" + # + delegate: { gpu { use_advanced_gpu_api: true } } + } + } +} + +# Performs tensor post processing to generate face detections. +node { + calculator: "FaceDetectionFullRangeCommon" + input_stream: "TENSORS:detection_tensors" + input_stream: "MATRIX:transform_matrix" + output_stream: "DETECTIONS:detections" +} diff --git a/mediapipe/modules/face_detection/face_detection_full_range_image.pbtxt b/mediapipe/modules/face_detection/face_detection_full_range_image.pbtxt new file mode 100644 index 0000000..4e0bc0b --- /dev/null +++ b/mediapipe/modules/face_detection/face_detection_full_range_image.pbtxt @@ -0,0 +1,86 @@ +# MediaPipe graph to detect faces. (GPU/CPU input, and inference is executed on +# GPU.) +# +# It is required that "face_detection_full_range_sparse.tflite" is available at +# "mediapipe/modules/face_detection/face_detection_full_range_sparse.tflite" +# path during execution. + +type: "FaceDetectionFullRangeImage" + +# Image. (Image) +input_stream: "IMAGE:image" + +# The throttled input image. (Image) +output_stream: "IMAGE:throttled_image" +# Detected faces. (std::vector) +# NOTE: there will not be an output packet in the DETECTIONS stream for this +# particular timestamp if none of faces detected. However, the MediaPipe +# framework will internally inform the downstream calculators of the absence of +# this packet so that they don't wait for it unnecessarily. +output_stream: "DETECTIONS:detections" + +node { + calculator: "FlowLimiterCalculator" + input_stream: "image" + input_stream: "FINISHED:detections" + input_stream_info: { + tag_index: "FINISHED" + back_edge: true + } + output_stream: "throttled_image" + options: { + [mediapipe.FlowLimiterCalculatorOptions.ext] { + max_in_flight: 1 + max_in_queue: 1 + } + } +} + +# Transforms the input image into a 128x128 tensor while keeping the aspect +# ratio (what is expected by the corresponding face detection model), resulting +# in potential letterboxing in the transformed image. +node: { + calculator: "ImageToTensorCalculator" + input_stream: "IMAGE:throttled_image" + output_stream: "TENSORS:input_tensors" + output_stream: "MATRIX:transform_matrix" + options: { + [mediapipe.ImageToTensorCalculatorOptions.ext] { + output_tensor_width: 192 + output_tensor_height: 192 + keep_aspect_ratio: true + output_tensor_float_range { + min: -1.0 + max: 1.0 + } + border_mode: BORDER_ZERO + gpu_origin: CONVENTIONAL + } + } +} + +# Runs a TensorFlow Lite model on GPU that takes an image tensor and outputs a +# vector of tensors representing, for instance, detection boxes/keypoints and +# scores. +# TODO: Use GraphOptions to modify the delegate field to be +# `delegate { xnnpack {} }` for the CPU only use cases. +node { + calculator: "InferenceCalculator" + input_stream: "TENSORS:input_tensors" + output_stream: "TENSORS:detection_tensors" + options: { + [mediapipe.InferenceCalculatorOptions.ext] { + model_path: "mediapipe/modules/face_detection/face_detection_full_range_sparse.tflite" + # + delegate: { gpu { use_advanced_gpu_api: true } } + } + } +} + +# Performs tensor post processing to generate face detections. +node { + calculator: "FaceDetectionFullRangeCommon" + input_stream: "TENSORS:detection_tensors" + input_stream: "MATRIX:transform_matrix" + output_stream: "DETECTIONS:detections" +} diff --git a/mediapipe/modules/face_detection/face_detection_full_range_sparse.tflite b/mediapipe/modules/face_detection/face_detection_full_range_sparse.tflite new file mode 100755 index 0000000..9575d8c Binary files /dev/null and b/mediapipe/modules/face_detection/face_detection_full_range_sparse.tflite differ diff --git a/mediapipe/modules/face_detection/face_detection_short_range.tflite b/mediapipe/modules/face_detection/face_detection_short_range.tflite new file mode 100755 index 0000000..659bce8 Binary files /dev/null and b/mediapipe/modules/face_detection/face_detection_short_range.tflite differ diff --git a/mediapipe/modules/face_detection/face_detection_short_range_by_roi_cpu.pbtxt b/mediapipe/modules/face_detection/face_detection_short_range_by_roi_cpu.pbtxt new file mode 100644 index 0000000..b3adfeb --- /dev/null +++ b/mediapipe/modules/face_detection/face_detection_short_range_by_roi_cpu.pbtxt @@ -0,0 +1,83 @@ +# MediaPipe graph to detect faces. (CPU input, and inference is executed on +# CPU.) +# +# It is required that "face_detection_short_range.tflite" is available at +# "mediapipe/modules/face_detection/face_detection_short_range.tflite" +# path during execution. +# +# EXAMPLE: +# node { +# calculator: "FaceDetectionShortRangeByRoiCpu" +# input_stream: "IMAGE:image" +# input_stream: "ROI:roi" +# output_stream: "DETECTIONS:face_detections" +# } + +type: "FaceDetectionShortRangeByRoiCpu" + +# CPU image. (ImageFrame) +input_stream: "IMAGE:image" + +# ROI (region of interest) within the given image where faces should be +# detected. (NormalizedRect) +input_stream: "ROI:roi" + +# Detected faces. (std::vector) +# NOTE: there will not be an output packet in the DETECTIONS stream for this +# particular timestamp if none of faces detected. However, the MediaPipe +# framework will internally inform the downstream calculators of the absence of +# this packet so that they don't wait for it unnecessarily. +output_stream: "DETECTIONS:detections" + +# Converts the input CPU image (ImageFrame) to the multi-backend image type +# (Image). +node: { + calculator: "ToImageCalculator" + input_stream: "IMAGE_CPU:image" + output_stream: "IMAGE:multi_backend_image" +} + +# Transforms specified region of image into 128x128 tensor keeping aspect ratio +# (padding tensor if needed). +node { + calculator: "ImageToTensorCalculator" + input_stream: "IMAGE:multi_backend_image" + input_stream: "NORM_RECT:roi" + output_stream: "TENSORS:input_tensors" + output_stream: "MATRIX:transform_matrix" + options: { + [mediapipe.ImageToTensorCalculatorOptions.ext] { + output_tensor_width: 128 + output_tensor_height: 128 + keep_aspect_ratio: true + output_tensor_float_range { + min: -1.0 + max: 1.0 + } + border_mode: BORDER_ZERO + } + } +} + +# Runs a TensorFlow Lite model on CPU that takes an image tensor and outputs a +# vector of tensors representing, for instance, detection boxes/keypoints and +# scores. +node { + calculator: "InferenceCalculator" + input_stream: "TENSORS:input_tensors" + output_stream: "TENSORS:detection_tensors" + options: { + [mediapipe.InferenceCalculatorOptions.ext] { + model_path: "mediapipe/modules/face_detection/face_detection_short_range.tflite" + delegate { xnnpack {} } + } + } +} + +# Performs tensor post processing to generate face detections. +node { + calculator: "FaceDetectionShortRangeCommon" + input_stream: "TENSORS:detection_tensors" + input_stream: "MATRIX:transform_matrix" + output_stream: "DETECTIONS:detections" +} diff --git a/mediapipe/modules/face_detection/face_detection_short_range_by_roi_gpu.pbtxt b/mediapipe/modules/face_detection/face_detection_short_range_by_roi_gpu.pbtxt new file mode 100644 index 0000000..1bd08e9 --- /dev/null +++ b/mediapipe/modules/face_detection/face_detection_short_range_by_roi_gpu.pbtxt @@ -0,0 +1,83 @@ +# MediaPipe graph to detect faces. (CPU input, and inference is executed on +# CPU.) +# +# It is required that "face_detection_short_range.tflite" is available at +# "mediapipe/modules/face_detection/face_detection_short_range.tflite" +# path during execution. +# +# EXAMPLE: +# node { +# calculator: "FaceDetectionShortRangeByRoiGpu" +# input_stream: "IMAGE:image" +# input_stream: "ROI:roi" +# output_stream: "DETECTIONS:face_detections" +# } + +type: "FaceDetectionShortRangeByRoiGpu" + +# GPU image. (GpuBuffer) +input_stream: "IMAGE:image" + +# ROI (region of interest) within the given image where faces should be +# detected. (NormalizedRect) +input_stream: "ROI:roi" + +# Detected faces. (std::vector) +# NOTE: there will not be an output packet in the DETECTIONS stream for this +# particular timestamp if none of faces detected. However, the MediaPipe +# framework will internally inform the downstream calculators of the absence of +# this packet so that they don't wait for it unnecessarily. +output_stream: "DETECTIONS:detections" + +# Converts the input GPU image (GpuBuffer) to the multi-backend image type +# (Image). +node: { + calculator: "ToImageCalculator" + input_stream: "IMAGE_GPU:image" + output_stream: "IMAGE:multi_backend_image" +} + +# Transforms specified region of image into 128x128 tensor keeping aspect ratio +# (padding tensor if needed). +node { + calculator: "ImageToTensorCalculator" + input_stream: "IMAGE:multi_backend_image" + input_stream: "NORM_RECT:roi" + output_stream: "TENSORS:input_tensors" + output_stream: "MATRIX:transform_matrix" + options: { + [mediapipe.ImageToTensorCalculatorOptions.ext] { + output_tensor_width: 128 + output_tensor_height: 128 + keep_aspect_ratio: true + output_tensor_float_range { + min: -1.0 + max: 1.0 + } + border_mode: BORDER_ZERO + gpu_origin: TOP_LEFT + } + } +} + +# Runs a TensorFlow Lite model on GPU that takes an image tensor and outputs a +# vector of tensors representing, for instance, detection boxes/keypoints and +# scores. +node { + calculator: "InferenceCalculator" + input_stream: "TENSORS:input_tensors" + output_stream: "TENSORS:detection_tensors" + options: { + [mediapipe.InferenceCalculatorOptions.ext] { + model_path: "mediapipe/modules/face_detection/face_detection_short_range.tflite" + } + } +} + +# Performs tensor post processing to generate face detections. +node { + calculator: "FaceDetectionShortRangeCommon" + input_stream: "TENSORS:detection_tensors" + input_stream: "MATRIX:transform_matrix" + output_stream: "DETECTIONS:detections" +} diff --git a/mediapipe/modules/face_detection/face_detection_short_range_common.pbtxt b/mediapipe/modules/face_detection/face_detection_short_range_common.pbtxt new file mode 100644 index 0000000..4a6a54f --- /dev/null +++ b/mediapipe/modules/face_detection/face_detection_short_range_common.pbtxt @@ -0,0 +1,103 @@ +# MediaPipe graph performing common processing to detect faces, currently +# consisting of tensor post processing. +# +# EXAMPLE: +# node { +# calculator: "FaceDetectionShortRangeCommon" +# input_stream: "TENSORS:detection_tensors" +# input_stream: "MATRIX:transform_matrix" +# output_stream: "DETECTIONS:detections" +# } + +type: "FaceDetectionShortRangeCommon" + +# Detection tensors. (std::vector) +input_stream: "TENSORS:detection_tensors" + +# A 4x4 row-major-order matrix that maps a point represented in the detection +# tensors to a desired coordinate system, e.g., in the original input image +# before scaling/cropping. (std::array) +input_stream: "MATRIX:transform_matrix" + +# Detected faces. (std::vector) +# NOTE: there will not be an output packet in the DETECTIONS stream for this +# particular timestamp if none of faces detected. However, the MediaPipe +# framework will internally inform the downstream calculators of the absence of +# this packet so that they don't wait for it unnecessarily. +output_stream: "DETECTIONS:detections" + +# Generates a single side packet containing a vector of SSD anchors based on +# the specification in the options. +node { + calculator: "SsdAnchorsCalculator" + output_side_packet: "anchors" + options: { + [mediapipe.SsdAnchorsCalculatorOptions.ext] { + num_layers: 4 + min_scale: 0.1484375 + max_scale: 0.75 + input_size_height: 128 + input_size_width: 128 + anchor_offset_x: 0.5 + anchor_offset_y: 0.5 + strides: 8 + strides: 16 + strides: 16 + strides: 16 + aspect_ratios: 1.0 + fixed_anchor_size: true + } + } +} + +# Decodes the detection tensors generated by the TensorFlow Lite model, based on +# the SSD anchors and the specification in the options, into a vector of +# detections. Each detection describes a detected object. +node { + calculator: "TensorsToDetectionsCalculator" + input_stream: "TENSORS:detection_tensors" + input_side_packet: "ANCHORS:anchors" + output_stream: "DETECTIONS:unfiltered_detections" + options: { + [mediapipe.TensorsToDetectionsCalculatorOptions.ext] { + num_classes: 1 + num_boxes: 896 + num_coords: 16 + box_coord_offset: 0 + keypoint_coord_offset: 4 + num_keypoints: 6 + num_values_per_keypoint: 2 + sigmoid_score: true + score_clipping_thresh: 100.0 + reverse_output_order: true + x_scale: 128.0 + y_scale: 128.0 + h_scale: 128.0 + w_scale: 128.0 + min_score_thresh: 0.5 + } + } +} + +# Performs non-max suppression to remove excessive detections. +node { + calculator: "NonMaxSuppressionCalculator" + input_stream: "unfiltered_detections" + output_stream: "filtered_detections" + options: { + [mediapipe.NonMaxSuppressionCalculatorOptions.ext] { + min_suppression_threshold: 0.3 + overlap_type: INTERSECTION_OVER_UNION + algorithm: WEIGHTED + } + } +} + +# Projects the detections from input tensor to the corresponding locations on +# the original image (input to the graph). +node { + calculator: "DetectionProjectionCalculator" + input_stream: "DETECTIONS:filtered_detections" + input_stream: "PROJECTION_MATRIX:transform_matrix" + output_stream: "DETECTIONS:detections" +} diff --git a/mediapipe/modules/face_detection/face_detection_short_range_cpu.pbtxt b/mediapipe/modules/face_detection/face_detection_short_range_cpu.pbtxt new file mode 100644 index 0000000..0db2420 --- /dev/null +++ b/mediapipe/modules/face_detection/face_detection_short_range_cpu.pbtxt @@ -0,0 +1,78 @@ +# MediaPipe graph to detect faces. (CPU input, and inference is executed on +# CPU.) +# +# It is required that "face_detection_short_range.tflite" is available at +# "mediapipe/modules/face_detection/face_detection_short_range.tflite" +# path during execution. +# +# EXAMPLE: +# node { +# calculator: "FaceDetectionShortRangeCpu" +# input_stream: "IMAGE:image" +# output_stream: "DETECTIONS:face_detections" +# } + +type: "FaceDetectionShortRangeCpu" + +# CPU image. (ImageFrame) +input_stream: "IMAGE:image" + +# Detected faces. (std::vector) +# NOTE: there will not be an output packet in the DETECTIONS stream for this +# particular timestamp if none of faces detected. However, the MediaPipe +# framework will internally inform the downstream calculators of the absence of +# this packet so that they don't wait for it unnecessarily. +output_stream: "DETECTIONS:detections" + +# Converts the input CPU image (ImageFrame) to the multi-backend image type +# (Image). +node: { + calculator: "ToImageCalculator" + input_stream: "IMAGE_CPU:image" + output_stream: "IMAGE:multi_backend_image" +} + +# Transforms the input image into a 128x128 tensor while keeping the aspect +# ratio (what is expected by the corresponding face detection model), resulting +# in potential letterboxing in the transformed image. +node: { + calculator: "ImageToTensorCalculator" + input_stream: "IMAGE:multi_backend_image" + output_stream: "TENSORS:input_tensors" + output_stream: "MATRIX:transform_matrix" + options: { + [mediapipe.ImageToTensorCalculatorOptions.ext] { + output_tensor_width: 128 + output_tensor_height: 128 + keep_aspect_ratio: true + output_tensor_float_range { + min: -1.0 + max: 1.0 + } + border_mode: BORDER_ZERO + } + } +} + +# Runs a TensorFlow Lite model on CPU that takes an image tensor and outputs a +# vector of tensors representing, for instance, detection boxes/keypoints and +# scores. +node { + calculator: "InferenceCalculator" + input_stream: "TENSORS:input_tensors" + output_stream: "TENSORS:detection_tensors" + options: { + [mediapipe.InferenceCalculatorOptions.ext] { + model_path: "mediapipe/modules/face_detection/face_detection_short_range.tflite" + delegate { xnnpack {} } + } + } +} + +# Performs tensor post processing to generate face detections. +node { + calculator: "FaceDetectionShortRangeCommon" + input_stream: "TENSORS:detection_tensors" + input_stream: "MATRIX:transform_matrix" + output_stream: "DETECTIONS:detections" +} diff --git a/mediapipe/modules/face_detection/face_detection_short_range_gpu.pbtxt b/mediapipe/modules/face_detection/face_detection_short_range_gpu.pbtxt new file mode 100644 index 0000000..d30644b --- /dev/null +++ b/mediapipe/modules/face_detection/face_detection_short_range_gpu.pbtxt @@ -0,0 +1,78 @@ +# MediaPipe graph to detect faces. (CPU input, and inference is executed on +# CPU.) +# +# It is required that "face_detection_short_range.tflite" is available at +# "mediapipe/modules/face_detection/face_detection_short_range.tflite" +# path during execution. +# +# EXAMPLE: +# node { +# calculator: "FaceDetectionShortRangeGpu" +# input_stream: "IMAGE:image" +# output_stream: "DETECTIONS:face_detections" +# } + +type: "FaceDetectionShortRangeGpu" + +# GPU image. (GpuBuffer) +input_stream: "IMAGE:image" + +# Detected faces. (std::vector) +# NOTE: there will not be an output packet in the DETECTIONS stream for this +# particular timestamp if none of faces detected. However, the MediaPipe +# framework will internally inform the downstream calculators of the absence of +# this packet so that they don't wait for it unnecessarily. +output_stream: "DETECTIONS:detections" + +# Converts the input GPU image (GpuBuffer) to the multi-backend image type +# (Image). +node: { + calculator: "ToImageCalculator" + input_stream: "IMAGE_GPU:image" + output_stream: "IMAGE:multi_backend_image" +} + +# Transforms the input image into a 128x128 tensor while keeping the aspect +# ratio (what is expected by the corresponding face detection model), resulting +# in potential letterboxing in the transformed image. +node: { + calculator: "ImageToTensorCalculator" + input_stream: "IMAGE:multi_backend_image" + output_stream: "TENSORS:input_tensors" + output_stream: "MATRIX:transform_matrix" + options: { + [mediapipe.ImageToTensorCalculatorOptions.ext] { + output_tensor_width: 128 + output_tensor_height: 128 + keep_aspect_ratio: true + output_tensor_float_range { + min: -1.0 + max: 1.0 + } + border_mode: BORDER_ZERO + gpu_origin: TOP_LEFT + } + } +} + +# Runs a TensorFlow Lite model on GPU that takes an image tensor and outputs a +# vector of tensors representing, for instance, detection boxes/keypoints and +# scores. +node { + calculator: "InferenceCalculator" + input_stream: "TENSORS:input_tensors" + output_stream: "TENSORS:detection_tensors" + options: { + [mediapipe.InferenceCalculatorOptions.ext] { + model_path: "mediapipe/modules/face_detection/face_detection_short_range.tflite" + } + } +} + +# Performs tensor post processing to generate face detections. +node { + calculator: "FaceDetectionShortRangeCommon" + input_stream: "TENSORS:detection_tensors" + input_stream: "MATRIX:transform_matrix" + output_stream: "DETECTIONS:detections" +} diff --git a/mediapipe/modules/face_detection/face_detection_short_range_image.pbtxt b/mediapipe/modules/face_detection/face_detection_short_range_image.pbtxt new file mode 100644 index 0000000..a259041 --- /dev/null +++ b/mediapipe/modules/face_detection/face_detection_short_range_image.pbtxt @@ -0,0 +1,94 @@ +# MediaPipe graph to detect faces. (GPU/CPU input, and inference is executed on +# GPU.) +# +# It is required that "face_detection_short_range.tflite" is available at +# "mediapipe/modules/face_detection/face_detection_short_range.tflite" +# path during execution. +# +# EXAMPLE: +# node { +# calculator: "FaceDetectionShortRangeCpu" +# input_stream: "IMAGE:image" +# output_stream: "DETECTIONS:face_detections" +# } + +type: "FaceDetectionShortRangeCpu" + +# Image. (Image) +input_stream: "IMAGE:image" + +# The throttled input image. (Image) +output_stream: "IMAGE:throttled_image" +# Detected faces. (std::vector) +# NOTE: there will not be an output packet in the DETECTIONS stream for this +# particular timestamp if none of faces detected. However, the MediaPipe +# framework will internally inform the downstream calculators of the absence of +# this packet so that they don't wait for it unnecessarily. +output_stream: "DETECTIONS:detections" + +node { + calculator: "FlowLimiterCalculator" + input_stream: "image" + input_stream: "FINISHED:detections" + input_stream_info: { + tag_index: "FINISHED" + back_edge: true + } + output_stream: "throttled_image" + options: { + [mediapipe.FlowLimiterCalculatorOptions.ext] { + max_in_flight: 1 + max_in_queue: 1 + } + } +} + +# Transforms the input image into a 128x128 tensor while keeping the aspect +# ratio (what is expected by the corresponding face detection model), resulting +# in potential letterboxing in the transformed image. +node: { + calculator: "ImageToTensorCalculator" + input_stream: "IMAGE:throttled_image" + output_stream: "TENSORS:input_tensors" + output_stream: "MATRIX:transform_matrix" + options: { + [mediapipe.ImageToTensorCalculatorOptions.ext] { + output_tensor_width: 128 + output_tensor_height: 128 + keep_aspect_ratio: true + output_tensor_float_range { + min: -1.0 + max: 1.0 + } + border_mode: BORDER_ZERO + gpu_origin: CONVENTIONAL + } + } +} + +# Runs a TensorFlow Lite model on GPU that takes an image tensor and outputs a +# vector of tensors representing, for instance, detection boxes/keypoints and +# scores. +# TODO: Use GraphOptions to modify the delegate field to be +# `delegate { xnnpack {} }` for the CPU only use cases. +node { + calculator: "InferenceCalculator" + input_stream: "TENSORS:input_tensors" + output_stream: "TENSORS:detection_tensors" + options: { + [mediapipe.InferenceCalculatorOptions.ext] { + model_path: "mediapipe/modules/face_detection/face_detection_short_range.tflite" + + # + delegate: { gpu { use_advanced_gpu_api: true } } + } + } +} + +# Performs tensor post processing to generate face detections. +node { + calculator: "FaceDetectionShortRangeCommon" + input_stream: "TENSORS:detection_tensors" + input_stream: "MATRIX:transform_matrix" + output_stream: "DETECTIONS:detections" +} diff --git a/mediapipe/modules/face_geometry/BUILD b/mediapipe/modules/face_geometry/BUILD new file mode 100644 index 0000000..c1f9967 --- /dev/null +++ b/mediapipe/modules/face_geometry/BUILD @@ -0,0 +1,137 @@ +# Copyright 2020 The MediaPipe Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +load("//mediapipe/framework/port:build_config.bzl", "mediapipe_proto_library") +load("//mediapipe/framework/tool:mediapipe_graph.bzl", "mediapipe_simple_subgraph") + +licenses(["notice"]) + +package(default_visibility = ["//visibility:public"]) + +mediapipe_simple_subgraph( + name = "face_geometry", + graph = "face_geometry.pbtxt", + register_as = "FaceGeometry", + deps = [ + ":geometry_pipeline_calculator", + ], +) + +mediapipe_simple_subgraph( + name = "face_geometry_from_detection", + graph = "face_geometry_from_detection.pbtxt", + register_as = "FaceGeometryFromDetection", + deps = [ + ":geometry_pipeline_calculator", + "//mediapipe/calculators/core:begin_loop_calculator", + "//mediapipe/calculators/core:end_loop_calculator", + "//mediapipe/calculators/util:detection_to_landmarks_calculator", + ], +) + +mediapipe_simple_subgraph( + name = "face_geometry_from_landmarks", + graph = "face_geometry_from_landmarks.pbtxt", + register_as = "FaceGeometryFromLandmarks", + deps = [ + ":geometry_pipeline_calculator", + ], +) + +mediapipe_proto_library( + name = "effect_renderer_calculator_proto", + srcs = ["effect_renderer_calculator.proto"], + deps = [ + "//mediapipe/framework:calculator_options_proto", + ], +) + +cc_library( + name = "effect_renderer_calculator", + srcs = ["effect_renderer_calculator.cc"], + deps = [ + ":effect_renderer_calculator_cc_proto", + "//mediapipe/framework:calculator_framework", + "//mediapipe/framework/formats:image_frame", + "//mediapipe/framework/formats:image_frame_opencv", + "//mediapipe/framework/port:opencv_core", + "//mediapipe/framework/port:opencv_imgcodecs", + "//mediapipe/framework/port:opencv_imgproc", + "//mediapipe/framework/port:ret_check", + "//mediapipe/framework/port:status", + "//mediapipe/framework/port:statusor", + "//mediapipe/gpu:gl_calculator_helper", + "//mediapipe/gpu:gpu_buffer", + "//mediapipe/modules/face_geometry/libs:effect_renderer", + "//mediapipe/modules/face_geometry/libs:validation_utils", + "//mediapipe/modules/face_geometry/protos:environment_cc_proto", + "//mediapipe/modules/face_geometry/protos:face_geometry_cc_proto", + "//mediapipe/modules/face_geometry/protos:mesh_3d_cc_proto", + "//mediapipe/util:resource_util", + "@com_google_absl//absl/types:optional", + ], + alwayslink = 1, +) + +mediapipe_proto_library( + name = "env_generator_calculator_proto", + srcs = ["env_generator_calculator.proto"], + deps = [ + "//mediapipe/framework:calculator_options_proto", + "//mediapipe/modules/face_geometry/protos:environment_proto", + ], +) + +cc_library( + name = "env_generator_calculator", + srcs = ["env_generator_calculator.cc"], + deps = [ + ":env_generator_calculator_cc_proto", + "//mediapipe/framework:calculator_framework", + "//mediapipe/framework/port:status", + "//mediapipe/modules/face_geometry/libs:validation_utils", + "//mediapipe/modules/face_geometry/protos:environment_cc_proto", + ], + alwayslink = 1, +) + +mediapipe_proto_library( + name = "geometry_pipeline_calculator_proto", + srcs = ["geometry_pipeline_calculator.proto"], + deps = [ + "//mediapipe/framework:calculator_options_proto", + ], +) + +cc_library( + name = "geometry_pipeline_calculator", + srcs = ["geometry_pipeline_calculator.cc"], + deps = [ + ":geometry_pipeline_calculator_cc_proto", + "//mediapipe/framework:calculator_framework", + "//mediapipe/framework/formats:landmark_cc_proto", + "//mediapipe/framework/port:logging", + "//mediapipe/framework/port:ret_check", + "//mediapipe/framework/port:status", + "//mediapipe/framework/port:statusor", + "//mediapipe/modules/face_geometry/libs:geometry_pipeline", + "//mediapipe/modules/face_geometry/libs:validation_utils", + "//mediapipe/modules/face_geometry/protos:environment_cc_proto", + "//mediapipe/modules/face_geometry/protos:face_geometry_cc_proto", + "//mediapipe/modules/face_geometry/protos:geometry_pipeline_metadata_cc_proto", + "//mediapipe/util:resource_util", + "@com_google_absl//absl/memory", + ], + alwayslink = 1, +) diff --git a/mediapipe/modules/face_geometry/README.md b/mediapipe/modules/face_geometry/README.md new file mode 100644 index 0000000..8427ea6 --- /dev/null +++ b/mediapipe/modules/face_geometry/README.md @@ -0,0 +1,20 @@ +# face_geometry + +Protos|Details +:--- | :--- +[`face_geometry.Environment`](https://github.com/google/mediapipe/tree/master/mediapipe/modules/face_geometry/protos/environment.proto)| Describes an environment; includes the camera frame origin point location as well as virtual camera parameters. +[`face_geometry.GeometryPipelineMetadata`](https://github.com/google/mediapipe/tree/master/mediapipe/modules/face_geometry/protos/geometry_pipeline_metadata.proto)| Describes metadata needed to estimate face geometry based on the face landmark module result. +[`face_geometry.FaceGeometry`](https://github.com/google/mediapipe/tree/master/mediapipe/modules/face_geometry/protos/face_geometry.proto)| Describes geometry data for a single face; includes a face mesh surface and a face pose in a given environment. +[`face_geometry.Mesh3d`](https://github.com/google/mediapipe/tree/master/mediapipe/modules/face_geometry/protos/mesh_3d.proto)| Describes a 3D mesh surface. + +Calculators|Details +:--- | :--- +[`FaceGeometryEnvGeneratorCalculator`](https://github.com/google/mediapipe/tree/master/mediapipe/modules/face_geometry/env_generator_calculator.cc)| Generates an environment that describes a virtual scene. +[`FaceGeometryPipelineCalculator`](https://github.com/google/mediapipe/tree/master/mediapipe/modules/face_geometry/geometry_pipeline_calculator.cc)| Extracts face geometry for multiple faces from a vector of landmark lists. +[`FaceGeometryEffectRendererCalculator`](https://github.com/google/mediapipe/tree/master/mediapipe/modules/face_geometry/effect_renderer_calculator.cc)| Renders a face effect. + +Subgraphs|Details +:--- | :--- +[`FaceGeometryFromDetection`](https://github.com/google/mediapipe/tree/master/mediapipe/modules/face_geometry/face_geometry_from_detection.pbtxt)| Extracts geometry from face detection for multiple faces. +[`FaceGeometryFromLandmarks`](https://github.com/google/mediapipe/tree/master/mediapipe/modules/face_geometry/face_geometry_from_landmarks.pbtxt)| Extracts geometry from face landmarks for multiple faces. +[`FaceGeometry`](https://github.com/google/mediapipe/tree/master/mediapipe/modules/face_geometry/face_geometry.pbtxt)| Extracts geometry from face landmarks for multiple faces. Deprecated, please use `FaceGeometryFromLandmarks` in the new code. diff --git a/mediapipe/modules/face_geometry/data/BUILD b/mediapipe/modules/face_geometry/data/BUILD new file mode 100644 index 0000000..1661a22 --- /dev/null +++ b/mediapipe/modules/face_geometry/data/BUILD @@ -0,0 +1,59 @@ +# Copyright 2020 The MediaPipe Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +load("//mediapipe/framework:encode_binary_proto.bzl", "encode_binary_proto") + +licenses(["notice"]) + +package(default_visibility = ["//visibility:public"]) + +encode_binary_proto( + name = "geometry_pipeline_metadata_detection", + input = "geometry_pipeline_metadata_detection.pbtxt", + message_type = "mediapipe.face_geometry.GeometryPipelineMetadata", + output = "geometry_pipeline_metadata_detection.binarypb", + deps = [ + "//mediapipe/modules/face_geometry/protos:geometry_pipeline_metadata_proto", + ], +) + +encode_binary_proto( + name = "geometry_pipeline_metadata_landmarks", + input = "geometry_pipeline_metadata_landmarks.pbtxt", + message_type = "mediapipe.face_geometry.GeometryPipelineMetadata", + output = "geometry_pipeline_metadata_landmarks.binarypb", + deps = [ + "//mediapipe/modules/face_geometry/protos:geometry_pipeline_metadata_proto", + ], +) + +# For backward-compatibility reasons, generate `geometry_pipeline_metadata.binarypb` from +# the `geometry_pipeline_metadata_landmarks.pbtxt` definition. +encode_binary_proto( + name = "geometry_pipeline_metadata", + input = "geometry_pipeline_metadata_landmarks.pbtxt", + message_type = "mediapipe.face_geometry.GeometryPipelineMetadata", + output = "geometry_pipeline_metadata.binarypb", + deps = [ + "//mediapipe/modules/face_geometry/protos:geometry_pipeline_metadata_proto", + ], +) + +# These canonical face model files are not meant to be used in runtime, but rather for asset +# creation and/or reference. +exports_files([ + "canonical_face_model.fbx", + "canonical_face_model.obj", + "canonical_face_model_uv_visualization.png", +]) diff --git a/mediapipe/modules/face_geometry/data/canonical_face_model.fbx b/mediapipe/modules/face_geometry/data/canonical_face_model.fbx new file mode 100644 index 0000000..8e9d24a Binary files /dev/null and b/mediapipe/modules/face_geometry/data/canonical_face_model.fbx differ diff --git a/mediapipe/modules/face_geometry/data/canonical_face_model.obj b/mediapipe/modules/face_geometry/data/canonical_face_model.obj new file mode 100644 index 0000000..0e666d1 --- /dev/null +++ b/mediapipe/modules/face_geometry/data/canonical_face_model.obj @@ -0,0 +1,1834 @@ +v 0.000000 -3.406404 5.979507 +v 0.000000 -1.126865 7.475604 +v 0.000000 -2.089024 6.058267 +v -0.463928 0.955357 6.633583 +v 0.000000 -0.463170 7.586580 +v 0.000000 0.365669 7.242870 +v 0.000000 2.473255 5.788627 +v -4.253081 2.577646 3.279702 +v 0.000000 4.019042 5.284764 +v 0.000000 4.885979 5.385258 +v 0.000000 8.261778 4.481535 +v 0.000000 -3.706811 5.864924 +v 0.000000 -3.918301 5.569430 +v 0.000000 -3.994436 5.219482 +v 0.000000 -4.542400 5.404754 +v 0.000000 -4.745577 5.529457 +v 0.000000 -5.019567 5.601448 +v 0.000000 -5.365123 5.535441 +v 0.000000 -6.149624 5.071372 +v 0.000000 -1.501095 7.112196 +v -0.416106 -1.466449 6.447657 +v -7.087960 5.434801 0.099620 +v -2.628639 2.035898 3.848121 +v -3.198363 1.985815 3.796952 +v -3.775151 2.039402 3.646194 +v -4.465819 2.422950 3.155168 +v -2.164289 2.189867 3.851822 +v -3.208229 3.223926 4.115822 +v -2.673803 3.205337 4.092203 +v -3.745193 3.165286 3.972409 +v -4.161018 3.059069 3.719554 +v -5.062006 1.934418 2.776093 +v -2.266659 -7.425768 4.389812 +v -4.445859 2.663991 3.173422 +v -7.214530 2.263009 0.073150 +v -5.799793 2.349546 2.204059 +v -2.844939 -0.720868 4.433130 +v -0.711452 -3.329355 5.877044 +v -0.606033 -3.924562 5.444923 +v -1.431615 -3.500953 5.496189 +v -1.914910 -3.803146 5.028930 +v -1.131043 -3.973937 5.189648 +v -1.563548 -4.082763 4.842263 +v -2.650112 -5.003649 4.188483 +v -0.427049 -1.094134 7.360529 +v -0.496396 -0.475659 7.440358 +v -5.253307 3.881582 3.363159 +v -1.718698 0.974609 4.558359 +v -1.608635 -0.942516 5.814193 +v -1.651267 -0.610868 5.581319 +v -4.765501 -0.701554 3.534632 +v -0.478306 0.295766 7.101013 +v -3.734964 4.508230 4.550454 +v -4.588603 4.302037 4.048484 +v -6.279331 6.615427 1.425850 +v -1.220941 4.142165 5.106035 +v -2.193489 3.100317 4.000575 +v -3.102642 -4.352984 4.095905 +v -6.719682 -4.788645 -1.745401 +v -1.193824 -1.306795 5.737747 +v -0.729766 -1.593712 5.833208 +v -2.456206 -4.342621 4.283884 +v -2.204823 -4.304508 4.162499 +v -4.985894 4.802461 3.751977 +v -1.592294 -1.257709 5.456949 +v -2.644548 4.524654 4.921559 +v -2.760292 5.100971 5.015990 +v -3.523964 8.005976 3.729163 +v -5.599763 5.715470 2.724259 +v -3.063932 6.566144 4.529981 +v -5.720968 4.254584 2.830852 +v -6.374393 4.785590 1.591691 +v -0.672728 -3.688016 5.737804 +v -1.262560 -3.787691 5.417779 +v -1.732553 -3.952767 5.000579 +v -1.043625 -1.464973 5.662455 +v -2.321234 -4.329069 4.258156 +v -2.056846 -4.477671 4.520883 +v -2.153084 -4.276322 4.038093 +v -0.946874 -1.035249 6.512274 +v -1.469132 -4.036351 4.604908 +v -1.024340 -3.989851 4.926693 +v -0.533422 -3.993222 5.138202 +v -0.769720 -6.095394 4.985883 +v -0.699606 -5.291850 5.448304 +v -0.669687 -4.949770 5.509612 +v -0.630947 -4.695101 5.449371 +v -0.583218 -4.517982 5.339869 +v -1.537170 -4.423206 4.745470 +v -1.615600 -4.475942 4.813632 +v -1.729053 -4.618680 4.854463 +v -1.838624 -4.828746 4.823737 +v -2.368250 -3.106237 4.868096 +v -7.542244 -1.049282 -2.431321 +v 0.000000 -1.724003 6.601390 +v -1.826614 -4.399531 4.399021 +v -1.929558 -4.411831 4.497052 +v -0.597442 -2.013686 5.866456 +v -1.405627 -1.714196 5.241087 +v -0.662449 -1.819321 5.863759 +v -2.342340 0.572222 4.294303 +v -3.327324 0.104863 4.113860 +v -1.726175 -0.919165 5.273355 +v -5.133204 7.485602 2.660442 +v -4.538641 6.319907 3.683424 +v -3.986562 5.109487 4.466315 +v -2.169681 -5.440433 4.455874 +v -1.395634 5.011963 5.316032 +v -1.619500 6.599217 4.921106 +v -1.891399 8.236377 4.274997 +v -4.195832 2.235205 3.375099 +v -5.733342 1.411738 2.431726 +v -1.859887 2.355757 3.843181 +v -4.988612 3.074654 3.083858 +v -1.303263 1.416453 4.831091 +v -1.305757 -0.672779 6.415959 +v -6.465170 0.937119 1.689873 +v -5.258659 0.945811 2.974312 +v -4.432338 0.722096 3.522615 +v -3.300681 0.861641 3.872784 +v -2.430178 1.131492 4.039035 +v -1.820731 1.467954 4.224124 +v -0.563221 2.307693 5.566789 +v -6.338145 -0.529279 1.881175 +v -5.587698 3.208071 2.687839 +v -0.242624 -1.462857 7.071491 +v -1.611251 0.339326 4.895421 +v -7.743095 2.364999 -2.005167 +v -1.391142 1.851048 4.448999 +v -1.785794 -0.978284 4.850470 +v -4.670959 2.664461 3.084075 +v -1.333970 -0.283761 6.097047 +v -7.270895 -2.890917 -2.252455 +v -1.856432 2.585245 3.757904 +v -0.923388 0.073076 6.671944 +v -5.000589 -6.135128 1.892523 +v -5.085276 -7.178590 0.714711 +v -7.159291 -0.811820 -0.072044 +v -5.843051 -5.248023 0.924091 +v -6.847258 3.662916 0.724695 +v -2.412942 -8.258853 4.119213 +v -0.179909 -1.689864 6.573301 +v -2.103655 -0.163946 4.566119 +v -6.407571 2.236021 1.560843 +v -3.670075 2.360153 3.635230 +v -3.177186 2.294265 3.775704 +v -2.196121 -4.598322 4.479786 +v -6.234883 -1.944430 1.663542 +v -1.292924 -9.295920 4.094063 +v -3.210651 -8.533278 2.802001 +v -4.068926 -7.993109 1.925119 +v 0.000000 6.545390 5.027311 +v 0.000000 -9.403378 4.264492 +v -2.724032 2.315802 3.777151 +v -2.288460 2.398891 3.697603 +v -1.998311 2.496547 3.689148 +v -6.130040 3.399261 2.038516 +v -2.288460 2.886504 3.775031 +v -2.724032 2.961810 3.871767 +v -3.177186 2.964136 3.876973 +v -3.670075 2.927714 3.724325 +v -4.018389 2.857357 3.482983 +v -7.555811 4.106811 -0.991917 +v -4.018389 2.483695 3.440898 +v 0.000000 -2.521945 5.932265 +v -1.776217 -2.683946 5.213116 +v -1.222237 -1.182444 5.952465 +v -0.731493 -2.536683 5.815343 +v 0.000000 3.271027 5.236015 +v -4.135272 -6.996638 2.671970 +v -3.311811 -7.660815 3.382963 +v -1.313701 -8.639995 4.702456 +v -5.940524 -6.223629 -0.631468 +v -1.998311 2.743838 3.744030 +v -0.901447 1.236992 5.754256 +v 0.000000 -8.765243 4.891441 +v -2.308977 -8.974196 3.609070 +v -6.954154 -2.439843 -0.131163 +v -1.098819 -4.458788 5.120727 +v -1.181124 -4.579996 5.189564 +v -1.255818 -4.787901 5.237051 +v -1.325085 -5.106507 5.205010 +v -1.546388 -5.819392 4.757893 +v -1.953754 -4.183892 4.431713 +v -2.117802 -4.137093 4.555096 +v -2.285339 -4.051196 4.582438 +v -2.850160 -3.665720 4.484994 +v -5.278538 -2.238942 2.861224 +v -0.946709 1.907628 5.196779 +v -1.314173 3.104912 4.231404 +v -1.780000 2.860000 3.881555 +v -1.845110 -4.098880 4.247264 +v -5.436187 -4.030482 2.109852 +v -0.766444 3.182131 4.861453 +v -1.938616 -6.614410 4.521085 +v 0.000000 1.059413 6.774605 +v -0.516573 1.583572 6.148363 +v 0.000000 1.728369 6.316750 +v -1.246815 0.230297 5.681036 +v 0.000000 -7.942194 5.181173 +v 0.000000 -6.991499 5.153478 +v -0.997827 -6.930921 4.979576 +v -3.288807 -5.382514 3.795752 +v -2.311631 -1.566237 4.590085 +v -2.680250 -6.111567 4.096152 +v -3.832928 -1.537326 4.137731 +v -2.961860 -2.274215 4.440943 +v -4.386901 -2.683286 3.643886 +v -1.217295 -7.834465 4.969286 +v -1.542374 -0.136843 5.201008 +v -3.878377 -6.041764 3.311079 +v -3.084037 -6.809842 3.814195 +v -3.747321 -4.503545 3.726453 +v -6.094129 -3.205991 1.473482 +v -4.588995 -4.728726 2.983221 +v -6.583231 -3.941269 0.070268 +v -3.492580 -3.195820 4.130198 +v -1.255543 0.802341 5.307551 +v -1.126122 -0.933602 6.538785 +v -1.443109 -1.142774 5.905127 +v -0.923043 -0.529042 7.003423 +v -1.755386 3.529117 4.327696 +v -2.632589 3.713828 4.364629 +v -3.388062 3.721976 4.309028 +v -4.075766 3.675413 4.076063 +v -4.622910 3.474691 3.646321 +v -5.171755 2.535753 2.670867 +v -7.297331 0.763172 -0.048769 +v -4.706828 1.651000 3.109532 +v -4.071712 1.476821 3.476944 +v -3.269817 1.470659 3.731945 +v -2.527572 1.617311 3.865444 +v -1.970894 1.858505 3.961782 +v -1.579543 2.097941 4.084996 +v -7.664182 0.673132 -2.435867 +v -1.397041 -1.340139 5.630378 +v -0.884838 0.658740 6.233232 +v -0.767097 -0.968035 7.077932 +v -0.460213 -1.334106 6.787447 +v -0.748618 -1.067994 6.798303 +v -1.236408 -1.585568 5.480490 +v -0.387306 -1.409990 6.957705 +v -0.319925 -1.607931 6.508676 +v -1.639633 2.556298 3.863736 +v -1.255645 2.467144 4.203800 +v -1.031362 2.382663 4.615849 +v -4.253081 2.772296 3.315305 +v -4.530000 2.910000 3.339685 +v 0.463928 0.955357 6.633583 +v 4.253081 2.577646 3.279702 +v 0.416106 -1.466449 6.447657 +v 7.087960 5.434801 0.099620 +v 2.628639 2.035898 3.848121 +v 3.198363 1.985815 3.796952 +v 3.775151 2.039402 3.646194 +v 4.465819 2.422950 3.155168 +v 2.164289 2.189867 3.851822 +v 3.208229 3.223926 4.115822 +v 2.673803 3.205337 4.092203 +v 3.745193 3.165286 3.972409 +v 4.161018 3.059069 3.719554 +v 5.062006 1.934418 2.776093 +v 2.266659 -7.425768 4.389812 +v 4.445859 2.663991 3.173422 +v 7.214530 2.263009 0.073150 +v 5.799793 2.349546 2.204059 +v 2.844939 -0.720868 4.433130 +v 0.711452 -3.329355 5.877044 +v 0.606033 -3.924562 5.444923 +v 1.431615 -3.500953 5.496189 +v 1.914910 -3.803146 5.028930 +v 1.131043 -3.973937 5.189648 +v 1.563548 -4.082763 4.842263 +v 2.650112 -5.003649 4.188483 +v 0.427049 -1.094134 7.360529 +v 0.496396 -0.475659 7.440358 +v 5.253307 3.881582 3.363159 +v 1.718698 0.974609 4.558359 +v 1.608635 -0.942516 5.814193 +v 1.651267 -0.610868 5.581319 +v 4.765501 -0.701554 3.534632 +v 0.478306 0.295766 7.101013 +v 3.734964 4.508230 4.550454 +v 4.588603 4.302037 4.048484 +v 6.279331 6.615427 1.425850 +v 1.220941 4.142165 5.106035 +v 2.193489 3.100317 4.000575 +v 3.102642 -4.352984 4.095905 +v 6.719682 -4.788645 -1.745401 +v 1.193824 -1.306795 5.737747 +v 0.729766 -1.593712 5.833208 +v 2.456206 -4.342621 4.283884 +v 2.204823 -4.304508 4.162499 +v 4.985894 4.802461 3.751977 +v 1.592294 -1.257709 5.456949 +v 2.644548 4.524654 4.921559 +v 2.760292 5.100971 5.015990 +v 3.523964 8.005976 3.729163 +v 5.599763 5.715470 2.724259 +v 3.063932 6.566144 4.529981 +v 5.720968 4.254584 2.830852 +v 6.374393 4.785590 1.591691 +v 0.672728 -3.688016 5.737804 +v 1.262560 -3.787691 5.417779 +v 1.732553 -3.952767 5.000579 +v 1.043625 -1.464973 5.662455 +v 2.321234 -4.329069 4.258156 +v 2.056846 -4.477671 4.520883 +v 2.153084 -4.276322 4.038093 +v 0.946874 -1.035249 6.512274 +v 1.469132 -4.036351 4.604908 +v 1.024340 -3.989851 4.926693 +v 0.533422 -3.993222 5.138202 +v 0.769720 -6.095394 4.985883 +v 0.699606 -5.291850 5.448304 +v 0.669687 -4.949770 5.509612 +v 0.630947 -4.695101 5.449371 +v 0.583218 -4.517982 5.339869 +v 1.537170 -4.423206 4.745470 +v 1.615600 -4.475942 4.813632 +v 1.729053 -4.618680 4.854463 +v 1.838624 -4.828746 4.823737 +v 2.368250 -3.106237 4.868096 +v 7.542244 -1.049282 -2.431321 +v 1.826614 -4.399531 4.399021 +v 1.929558 -4.411831 4.497052 +v 0.597442 -2.013686 5.866456 +v 1.405627 -1.714196 5.241087 +v 0.662449 -1.819321 5.863759 +v 2.342340 0.572222 4.294303 +v 3.327324 0.104863 4.113860 +v 1.726175 -0.919165 5.273355 +v 5.133204 7.485602 2.660442 +v 4.538641 6.319907 3.683424 +v 3.986562 5.109487 4.466315 +v 2.169681 -5.440433 4.455874 +v 1.395634 5.011963 5.316032 +v 1.619500 6.599217 4.921106 +v 1.891399 8.236377 4.274997 +v 4.195832 2.235205 3.375099 +v 5.733342 1.411738 2.431726 +v 1.859887 2.355757 3.843181 +v 4.988612 3.074654 3.083858 +v 1.303263 1.416453 4.831091 +v 1.305757 -0.672779 6.415959 +v 6.465170 0.937119 1.689873 +v 5.258659 0.945811 2.974312 +v 4.432338 0.722096 3.522615 +v 3.300681 0.861641 3.872784 +v 2.430178 1.131492 4.039035 +v 1.820731 1.467954 4.224124 +v 0.563221 2.307693 5.566789 +v 6.338145 -0.529279 1.881175 +v 5.587698 3.208071 2.687839 +v 0.242624 -1.462857 7.071491 +v 1.611251 0.339326 4.895421 +v 7.743095 2.364999 -2.005167 +v 1.391142 1.851048 4.448999 +v 1.785794 -0.978284 4.850470 +v 4.670959 2.664461 3.084075 +v 1.333970 -0.283761 6.097047 +v 7.270895 -2.890917 -2.252455 +v 1.856432 2.585245 3.757904 +v 0.923388 0.073076 6.671944 +v 5.000589 -6.135128 1.892523 +v 5.085276 -7.178590 0.714711 +v 7.159291 -0.811820 -0.072044 +v 5.843051 -5.248023 0.924091 +v 6.847258 3.662916 0.724695 +v 2.412942 -8.258853 4.119213 +v 0.179909 -1.689864 6.573301 +v 2.103655 -0.163946 4.566119 +v 6.407571 2.236021 1.560843 +v 3.670075 2.360153 3.635230 +v 3.177186 2.294265 3.775704 +v 2.196121 -4.598322 4.479786 +v 6.234883 -1.944430 1.663542 +v 1.292924 -9.295920 4.094063 +v 3.210651 -8.533278 2.802001 +v 4.068926 -7.993109 1.925119 +v 2.724032 2.315802 3.777151 +v 2.288460 2.398891 3.697603 +v 1.998311 2.496547 3.689148 +v 6.130040 3.399261 2.038516 +v 2.288460 2.886504 3.775031 +v 2.724032 2.961810 3.871767 +v 3.177186 2.964136 3.876973 +v 3.670075 2.927714 3.724325 +v 4.018389 2.857357 3.482983 +v 7.555811 4.106811 -0.991917 +v 4.018389 2.483695 3.440898 +v 1.776217 -2.683946 5.213116 +v 1.222237 -1.182444 5.952465 +v 0.731493 -2.536683 5.815343 +v 4.135272 -6.996638 2.671970 +v 3.311811 -7.660815 3.382963 +v 1.313701 -8.639995 4.702456 +v 5.940524 -6.223629 -0.631468 +v 1.998311 2.743838 3.744030 +v 0.901447 1.236992 5.754256 +v 2.308977 -8.974196 3.609070 +v 6.954154 -2.439843 -0.131163 +v 1.098819 -4.458788 5.120727 +v 1.181124 -4.579996 5.189564 +v 1.255818 -4.787901 5.237051 +v 1.325085 -5.106507 5.205010 +v 1.546388 -5.819392 4.757893 +v 1.953754 -4.183892 4.431713 +v 2.117802 -4.137093 4.555096 +v 2.285339 -4.051196 4.582438 +v 2.850160 -3.665720 4.484994 +v 5.278538 -2.238942 2.861224 +v 0.946709 1.907628 5.196779 +v 1.314173 3.104912 4.231404 +v 1.780000 2.860000 3.881555 +v 1.845110 -4.098880 4.247264 +v 5.436187 -4.030482 2.109852 +v 0.766444 3.182131 4.861453 +v 1.938616 -6.614410 4.521085 +v 0.516573 1.583572 6.148363 +v 1.246815 0.230297 5.681036 +v 0.997827 -6.930921 4.979576 +v 3.288807 -5.382514 3.795752 +v 2.311631 -1.566237 4.590085 +v 2.680250 -6.111567 4.096152 +v 3.832928 -1.537326 4.137731 +v 2.961860 -2.274215 4.440943 +v 4.386901 -2.683286 3.643886 +v 1.217295 -7.834465 4.969286 +v 1.542374 -0.136843 5.201008 +v 3.878377 -6.041764 3.311079 +v 3.084037 -6.809842 3.814195 +v 3.747321 -4.503545 3.726453 +v 6.094129 -3.205991 1.473482 +v 4.588995 -4.728726 2.983221 +v 6.583231 -3.941269 0.070268 +v 3.492580 -3.195820 4.130198 +v 1.255543 0.802341 5.307551 +v 1.126122 -0.933602 6.538785 +v 1.443109 -1.142774 5.905127 +v 0.923043 -0.529042 7.003423 +v 1.755386 3.529117 4.327696 +v 2.632589 3.713828 4.364629 +v 3.388062 3.721976 4.309028 +v 4.075766 3.675413 4.076063 +v 4.622910 3.474691 3.646321 +v 5.171755 2.535753 2.670867 +v 7.297331 0.763172 -0.048769 +v 4.706828 1.651000 3.109532 +v 4.071712 1.476821 3.476944 +v 3.269817 1.470659 3.731945 +v 2.527572 1.617311 3.865444 +v 1.970894 1.858505 3.961782 +v 1.579543 2.097941 4.084996 +v 7.664182 0.673132 -2.435867 +v 1.397041 -1.340139 5.630378 +v 0.884838 0.658740 6.233232 +v 0.767097 -0.968035 7.077932 +v 0.460213 -1.334106 6.787447 +v 0.748618 -1.067994 6.798303 +v 1.236408 -1.585568 5.480490 +v 0.387306 -1.409990 6.957705 +v 0.319925 -1.607931 6.508676 +v 1.639633 2.556298 3.863736 +v 1.255645 2.467144 4.203800 +v 1.031362 2.382663 4.615849 +v 4.253081 2.772296 3.315305 +v 4.530000 2.910000 3.339685 +vt 0.427942 0.304722 +vt 0.526878 0.295374 +vt 0.444832 0.269206 +vt 0.607600 0.322297 +vt 0.377046 0.677222 +vt 0.473033 0.304722 +vt 0.526913 0.282143 +vt 0.447112 0.284192 +vt 0.599262 0.318931 +vt 0.414712 0.664780 +vt 0.473122 0.295374 +vt 0.527671 0.263774 +vt 0.448020 0.295368 +vt 0.593203 0.314324 +vt 0.467288 0.470075 +vt 0.473087 0.282143 +vt 0.534090 0.220859 +vt 0.448662 0.304722 +vt 0.569944 0.232965 +vt 0.437114 0.441104 +vt 0.472329 0.263774 +vt 0.524613 0.307634 +vt 0.114210 0.384978 +vt 0.555168 0.269206 +vt 0.455528 0.451377 +vt 0.465828 0.220810 +vt 0.547818 0.307634 +vt 0.375437 0.075808 +vt 0.552888 0.284192 +vt 0.429884 0.533478 +vt 0.475387 0.307634 +vt 0.568842 0.307634 +vt 0.499877 0.091010 +vt 0.551980 0.295368 +vt 0.336768 0.355267 +vt 0.452182 0.307634 +vt 0.539958 0.442861 +vt 0.455607 0.548199 +vt 0.551338 0.304722 +vt 0.133823 0.317299 +vt 0.431158 0.307634 +vt 0.596371 0.306047 +vt 0.408772 0.626106 +vt 0.885770 0.384971 +vt 0.279777 0.285342 +vt 0.460042 0.442861 +vt 0.596961 0.293460 +vt 0.128294 0.208059 +vt 0.624563 0.075808 +vt 0.189096 0.353700 +vt 0.403629 0.306047 +vt 0.611897 0.306039 +vt 0.440512 0.097581 +vt 0.544341 0.548416 +vt 0.324548 0.296007 +vt 0.403039 0.293460 +vt 0.554692 0.419934 +vt 0.335279 0.147180 +vt 0.591234 0.626106 +vt 0.354128 0.187447 +vt 0.388103 0.306039 +vt 0.577238 0.326110 +vt 0.288719 0.180054 +vt 0.871706 0.208059 +vt 0.445308 0.419934 +vt 0.553172 0.331473 +vt 0.499923 0.648476 +vt 0.559100 0.097368 +vt 0.422762 0.326110 +vt 0.527121 0.333802 +vt 0.465844 0.379359 +vt 0.664630 0.147129 +vt 0.446828 0.331473 +vt 0.826722 0.721245 +vt 0.445682 0.433923 +vt 0.711218 0.180025 +vt 0.472879 0.333802 +vt 0.770391 0.700444 +vt 0.415838 0.375804 +vt 0.534154 0.379360 +vt 0.173287 0.721252 +vt 0.635536 0.810751 +vt 0.499988 0.381566 +vt 0.554318 0.433923 +vt 0.229622 0.700459 +vt 0.770092 0.767979 +vt 0.301415 0.612551 +vt 0.584177 0.375893 +vt 0.364501 0.810886 +vt 0.668509 0.880086 +vt 0.058133 0.680924 +vt 0.698585 0.612551 +vt 0.229924 0.767997 +vt 0.616907 0.744114 +vt 0.301415 0.636844 +vt 0.941867 0.680924 +vt 0.331431 0.880286 +vt 0.614083 0.718613 +vt 0.318785 0.641660 +vt 0.698585 0.636844 +vt 0.383103 0.744160 +vt 0.577414 0.436833 +vt 0.343364 0.644643 +vt 0.681215 0.641660 +vt 0.385919 0.718636 +vt 0.722943 0.728037 +vt 0.365962 0.644029 +vt 0.656636 0.644643 +vt 0.422552 0.436767 +vt 0.607591 0.305797 +vt 0.388665 0.637716 +vt 0.634038 0.644029 +vt 0.277076 0.728068 +vt 0.618026 0.305289 +vt 0.194993 0.657898 +vt 0.611335 0.637716 +vt 0.392389 0.305797 +vt 0.542902 0.415208 +vt 0.410373 0.608920 +vt 0.805016 0.657892 +vt 0.381974 0.305289 +vt 0.557261 0.427174 +vt 0.393207 0.604463 +vt 0.589660 0.608938 +vt 0.457098 0.415208 +vt 0.932695 0.269895 +vt 0.366170 0.601178 +vt 0.606793 0.604463 +vt 0.442739 0.427174 +vt 0.645429 0.303293 +vt 0.499977 0.045547 +vt 0.633830 0.601178 +vt 0.067305 0.269895 +vt 0.607610 0.646112 +vt 0.500023 0.809424 +vt 0.733752 0.130299 +vt 0.354490 0.303216 +vt 0.552386 0.697432 +vt 0.266248 0.130299 +vt 0.681008 0.101715 +vt 0.392390 0.646112 +vt 0.830705 0.806186 +vt 0.318993 0.101715 +vt 0.568013 0.055435 +vt 0.447580 0.697390 +vt 0.703624 0.706729 +vt 0.430987 0.055935 +vt 0.812086 0.411461 +vt 0.169295 0.806186 +vt 0.662801 0.717082 +vt 0.187885 0.411462 +vt 0.603900 0.289783 +vt 0.296392 0.706757 +vt 0.516446 0.500361 +vt 0.396100 0.289783 +vt 0.656636 0.599403 +vt 0.337212 0.717117 +vt 0.723330 0.636627 +vt 0.723087 0.467946 +vt 0.343364 0.599403 +vt 0.681215 0.603765 +vt 0.483370 0.500413 +vt 0.710288 0.631747 +vt 0.578632 0.466377 +vt 0.318785 0.603765 +vt 0.825608 0.602325 +vt 0.276896 0.467943 +vt 0.549756 0.600249 +vt 0.570338 0.451425 +vt 0.174399 0.602329 +vt 0.617942 0.491684 +vt 0.421352 0.466259 +vt 0.560698 0.604668 +vt 0.598631 0.545021 +vt 0.382385 0.491427 +vt 0.508953 0.420562 +vt 0.429819 0.451385 +vt 0.573595 0.610193 +vt 0.742247 0.685493 +vt 0.490967 0.420622 +vt 0.614074 0.116754 +vt 0.401223 0.544828 +vt 0.517472 0.422123 +vt 0.515097 0.472748 +vt 0.385764 0.116846 +vt 0.865595 0.666313 +vt 0.257765 0.685510 +vt 0.516311 0.436946 +vt 0.513050 0.452718 +vt 0.134410 0.666317 +vt 0.816351 0.259740 +vt 0.485301 0.472605 +vt 0.566036 0.417671 +vt 0.624852 0.271901 +vt 0.183610 0.259743 +vt 0.892441 0.459239 +vt 0.486717 0.452371 +vt 0.531529 0.444943 +vt 0.571228 0.317308 +vt 0.107550 0.459245 +vt 0.801779 0.168062 +vt 0.374971 0.272195 +vt 0.523913 0.436170 +vt 0.549626 0.319139 +vt 0.198221 0.168062 +vt 0.760966 0.220247 +vt 0.428771 0.317309 +vt 0.526564 0.453882 +vt 0.585384 0.333459 +vt 0.238979 0.220255 +vt 0.537728 0.494615 +vt 0.450374 0.319139 +vt 0.541366 0.521101 +vt 0.560215 0.342771 +vt 0.462783 0.494253 +vt 0.580985 0.612840 +vt 0.414617 0.333459 +vt 0.567192 0.430580 +vt 0.525850 0.319809 +vt 0.419054 0.612845 +vt 0.967686 0.355643 +vt 0.439785 0.342771 +vt 0.992440 0.519223 +vt 0.528249 0.349596 +vt 0.032314 0.355643 +vt 0.560611 0.480983 +vt 0.474155 0.319808 +vt 0.579658 0.590055 +vt 0.643998 0.465512 +vt 0.439121 0.481042 +vt 0.733530 0.623023 +vt 0.471751 0.349596 +vt 0.603876 0.583413 +vt 0.790082 0.608646 +vt 0.266470 0.623023 +vt 0.602995 0.451312 +vt 0.355808 0.465594 +vt 0.633505 0.573912 +vt 0.893693 0.600040 +vt 0.396993 0.451203 +vt 0.573500 0.580000 +vt 0.209925 0.608647 +vt 0.666525 0.566134 +vt 0.719902 0.624400 +vt 0.426243 0.579569 +vt 0.980531 0.598436 +vt 0.106310 0.600044 +vt 0.702114 0.566837 +vt 0.602918 0.157137 +vt 0.019469 0.598436 +vt 0.595293 0.514976 +vt 0.280098 0.624400 +vt 0.732392 0.575453 +vt 0.752212 0.589195 +vt 0.404670 0.514867 +vt 0.509127 0.437282 +vt 0.396889 0.157245 +vt 0.897013 0.531231 +vt 0.702097 0.646409 +vt 0.490726 0.437599 +vt 0.771046 0.651041 +vt 0.247792 0.589190 +vt 0.758757 0.617213 +vt 0.680678 0.652735 +vt 0.228962 0.651049 +vt 0.810748 0.476074 +vt 0.297903 0.646409 +vt 0.716482 0.666799 +vt 0.629906 0.653924 +vt 0.189241 0.476076 +vt 0.523481 0.594373 +vt 0.319322 0.652735 +vt 0.687132 0.677654 +vt 0.654766 0.655989 +vt 0.476410 0.594194 +vt 0.600862 0.567527 +vt 0.370094 0.653924 +vt 0.655896 0.679837 +vt 0.606630 0.596295 +vt 0.398964 0.567345 +vt 0.631101 0.552846 +vt 0.345234 0.655989 +vt 0.622953 0.677221 +vt 0.725342 0.610869 +vt 0.368756 0.552793 +vt 0.667113 0.539327 +vt 0.393362 0.596294 +vt 0.585271 0.664823 +vt 0.688880 0.590540 +vt 0.332828 0.539288 +vt 0.713757 0.532373 +vt 0.274658 0.610869 +vt 0.531987 0.469860 +vt 0.661242 0.586975 +vt 0.286267 0.532325 +vt 0.752702 0.542818 +vt 0.311120 0.590540 +vt 0.562759 0.441215 +vt 0.634070 0.590424 +vt 0.247308 0.542806 +vt 0.821442 0.542444 +vt 0.313951 0.224692 +vt 0.338758 0.586975 +vt 0.544562 0.451624 +vt 0.895093 0.745859 +vt 0.178560 0.542446 +vt 0.551868 0.463430 +vt 0.410986 0.491277 +vt 0.365930 0.590424 +vt 0.570082 0.533674 +vt 0.526227 0.426090 +vt 0.448340 0.463064 +vt 0.572156 0.562348 +vt 0.447750 0.137523 +vt 0.104907 0.745859 +vt 0.663187 0.355403 +vt 0.710288 0.619236 +vt 0.427685 0.562039 +vt 0.742870 0.644554 +vt 0.295284 0.378419 +vt 0.473773 0.426090 +vt 0.866152 0.317295 +vt 0.517862 0.528052 +vt 0.257135 0.644560 +vt 0.587247 0.601068 +vt 0.357155 0.395730 +vt 0.499816 0.437019 +vt 0.720122 0.285333 +vt 0.276670 0.636627 +vt 0.412782 0.601030 +vt 0.781070 0.564595 +vt 0.319688 0.429262 +vt 0.499968 0.218629 +vt 0.810858 0.353695 +vt 0.289712 0.631747 +vt 0.218937 0.564589 +vt 0.711045 0.601048 +vt 0.374293 0.219815 +vt 0.499977 0.262981 +vt 0.675343 0.296022 +vt 0.450067 0.599566 +vt 0.288955 0.601048 +vt 0.588166 0.890956 +vt 0.378909 0.425990 +vt 0.499977 0.280615 +vt 0.645735 0.187360 +vt 0.438999 0.603505 +vt 0.412198 0.891099 +vt 0.570304 0.812129 +vt 0.344549 0.254561 +vt 0.499977 0.294066 +vt 0.685945 0.224643 +vt 0.426450 0.610201 +vt 0.429765 0.812166 +vt 0.558266 0.738328 +vt 0.456549 0.180799 +vt 0.499977 0.304722 +vt 0.589072 0.491363 +vt 0.482483 0.422151 +vt 0.441728 0.738324 +vt 0.600409 0.250995 +vt 0.499913 0.178271 +vt 0.500023 0.307652 +vt 0.552012 0.137408 +vt 0.483518 0.437016 +vt 0.399510 0.251079 +vt 0.672684 0.743419 +vt 0.499886 0.133083 +vt 0.500016 0.320776 +vt 0.704663 0.378470 +vt 0.433991 0.417638 +vt 0.327338 0.743473 +vt 0.709250 0.798492 +vt 0.432112 0.506411 +vt 0.500023 0.333766 +vt 0.642764 0.395662 +vt 0.468472 0.444943 +vt 0.290777 0.798554 +vt 0.757824 0.852324 +vt 0.499974 0.560363 +vt 0.500023 0.892950 +vt 0.680198 0.429281 +vt 0.476088 0.436170 +vt 0.242176 0.852324 +vt 0.588354 0.453138 +vt 0.479154 0.557346 +vt 0.499987 0.730081 +vt 0.625560 0.219688 +vt 0.473466 0.454256 +vt 0.411671 0.453035 +vt 0.665586 0.504049 +vt 0.499989 0.530175 +vt 0.499955 0.687602 +vt 0.621009 0.425982 +vt 0.458639 0.520911 +vt 0.334562 0.503927 +vt 0.627543 0.526648 +vt 0.411362 0.195673 +vt 0.289712 0.619236 +vt 0.655317 0.254485 +vt 0.432949 0.430482 +vt 0.372120 0.526586 +vt 0.536915 0.406214 +vt 0.468268 0.647329 +vt 0.499523 0.598938 +vt 0.543283 0.180745 +vt 0.007561 0.519223 +vt 0.463080 0.406216 +vt 0.577268 0.414065 +vt 0.228018 0.316428 +vt 0.499910 0.501747 +vt 0.567985 0.506521 +vt 0.420121 0.589772 +vt 0.422729 0.414015 +vt 0.531915 0.398463 +vt 0.413386 0.307634 +vt 0.500151 0.472844 +vt 0.520797 0.557435 +vt 0.396012 0.583304 +vt 0.468080 0.398465 +vt 0.590372 0.298177 +vt 0.416164 0.631286 +vt 0.482113 0.528021 +vt 0.588371 0.195559 +vt 0.366427 0.573884 +vt 0.409626 0.298177 +vt 0.586800 0.304600 +vt 0.436392 0.640113 +vt 0.499974 0.397628 +vt 0.531597 0.647517 +vt 0.333434 0.566122 +vt 0.413200 0.304600 +vt 0.986046 0.439966 +vt 0.452770 0.579150 +vt 0.500026 0.452513 +vt 0.771915 0.316422 +vt 0.297879 0.566824 +vt 0.499914 0.419853 +vt 0.609945 0.360090 +vt 0.247923 0.398667 +vt 0.499977 0.347466 +vt 0.586614 0.307634 +vt 0.267612 0.575440 +vt 0.013954 0.439966 +vt 0.581691 0.279937 +vt 0.367856 0.336081 +vt 0.583841 0.631286 +vt 0.102986 0.531237 +vt 0.390095 0.360427 +vt 0.576838 0.288154 +vt 0.392400 0.322297 +vt 0.563544 0.640172 +vt 0.241246 0.617214 +vt 0.418309 0.279937 +vt 0.573521 0.296460 +vt 0.400738 0.318931 +vt 0.547226 0.579605 +vt 0.283526 0.666810 +vt 0.423162 0.288154 +vt 0.572058 0.304722 +vt 0.406787 0.314327 +vt 0.752033 0.398685 +vt 0.312876 0.677668 +vt 0.426479 0.296460 +vt 0.526967 0.304722 +vt 0.430012 0.233191 +vt 0.631938 0.336500 +vt 0.344108 0.679849 +f 174/43 156/119 134/220 +f 247/335 34/252 8/399 +f 383/124 399/59 363/216 +f 264/244 467/163 250/317 +f 309/42 416/442 325/427 +f 79/51 96/432 192/416 +f 357/246 390/96 265/239 +f 128/250 35/247 163/91 +f 369/186 265/239 390/96 +f 140/190 163/91 35/247 +f 268/224 1/441 303/70 +f 38/232 73/77 1/441 +f 12/375 303/70 1/441 +f 12/375 1/441 73/77 +f 350/281 452/238 351/276 +f 121/285 122/280 232/425 +f 453/233 351/276 452/238 +f 233/419 232/425 122/280 +f 268/224 303/70 270/214 +f 38/232 40/222 73/77 +f 304/66 270/214 303/70 +f 74/73 73/77 40/222 +f 358/241 344/313 351/276 +f 129/245 122/280 115/318 +f 278/174 351/276 344/313 +f 48/182 115/318 122/280 +f 351/276 453/233 358/241 +f 122/280 129/245 233/419 +f 454/228 358/241 453/233 +f 234/413 233/419 129/245 +f 300/82 334/373 298/90 +f 70/89 68/97 105/378 +f 333/379 298/90 334/373 +f 104/384 105/378 68/97 +f 176/33 153/131 397/68 +f 176/33 172/53 153/131 +f 378/144 397/68 153/131 +f 149/147 153/131 172/53 +f 382/128 385/116 383/124 +f 155/123 156/119 158/111 +f 399/59 383/124 385/116 +f 174/43 158/111 156/119 +f 281/159 348/291 331/391 +f 51/167 102/396 119/295 +f 349/286 331/391 348/291 +f 120/290 119/295 102/396 +f 270/214 304/66 271/209 +f 40/222 41/217 74/73 +f 305/62 271/209 304/66 +f 75/69 74/73 41/217 +f 10/387 337/355 152/135 +f 10/387 152/135 108/360 +f 338/349 152/135 337/355 +f 109/354 108/360 152/135 +f 345/307 279/169 361/226 +f 116/312 132/230 49/177 +f 280/164 361/226 279/169 +f 50/172 49/177 132/230 +f 263/249 432/346 419/424 +f 33/257 195/398 212/60 +f 425/388 419/424 432/346 +f 205/338 212/60 195/398 +f 305/62 409/9 271/209 +f 75/69 41/217 185/456 +f 410/4 271/209 409/9 +f 186/451 185/456 41/217 +f 273/199 311/32 408/14 +f 43/207 184/461 81/41 +f 416/442 408/14 311/32 +f 192/416 81/41 184/461 +f 323/439 271/209 411/467 +f 93/449 187/446 41/217 +f 410/4 411/467 271/209 +f 186/451 41/217 187/446 +f 348/291 450/248 349/286 +f 119/295 120/290 230/437 +f 451/243 349/286 450/248 +f 231/431 230/437 120/290 +f 435/328 433/340 431/352 +f 215/45 211/302 213/55 +f 423/400 431/352 433/340 +f 203/350 213/55 211/302 +f 314/17 315/12 19/333 +f 84/26 19/333 85/21 +f 18/339 19/333 315/12 +f 18/339 85/21 19/333 +f 308/47 376/152 307/52 +f 78/56 77/61 147/155 +f 292/114 307/52 376/152 +f 62/121 147/155 77/61 +f 260/264 388/104 261/259 +f 30/272 31/267 161/99 +f 389/100 261/259 388/104 +f 162/95 161/99 31/267 +f 287/134 415/447 385/116 +f 57/141 158/111 191/422 +f 399/59 385/116 415/447 +f 174/43 191/422 158/111 +f 419/424 425/388 407/19 +f 195/398 183/466 205/338 +f 336/361 407/19 425/388 +f 107/366 205/338 183/466 +f 368/191 417/436 365/206 +f 139/195 136/210 193/410 +f 435/328 365/206 417/436 +f 215/45 193/410 136/210 +f 392/88 424/394 328/409 +f 166/79 99/414 204/344 +f 359/236 328/409 424/394 +f 130/240 204/344 99/414 +f 299/86 302/74 285/142 +f 69/93 55/149 72/81 +f 252/305 285/142 302/74 +f 22/315 72/81 55/149 +f 5/417 276/184 6/411 +f 5/417 6/411 46/192 +f 282/154 6/411 276/184 +f 52/162 46/192 6/411 +f 255/289 374/161 254/294 +f 25/297 24/303 145/165 +f 375/156 254/294 374/161 +f 146/160 145/165 24/303 +f 321/450 322/445 308/47 +f 91/459 78/56 92/454 +f 376/152 308/47 322/445 +f 147/155 92/454 78/56 +f 281/159 426/382 412/462 +f 51/167 188/440 206/332 +f 428/370 412/462 426/382 +f 208/320 206/332 188/440 +f 422/406 314/17 201/362 +f 202/356 201/362 84/26 +f 19/333 201/362 314/17 +f 19/333 84/26 201/362 +f 336/361 322/445 407/19 +f 107/366 183/466 92/454 +f 406/24 407/19 322/445 +f 182/3 92/454 183/466 +f 406/24 322/445 405/29 +f 182/3 181/8 92/454 +f 321/450 405/29 322/445 +f 91/459 92/454 181/8 +f 18/339 315/12 17/345 +f 18/339 17/345 85/21 +f 316/7 17/345 315/12 +f 86/16 85/21 17/345 +f 426/382 267/229 427/376 +f 206/332 207/326 37/237 +f 424/394 427/376 267/229 +f 204/344 37/237 207/326 +f 370/181 397/68 401/49 +f 141/185 177/28 172/53 +f 378/144 401/49 397/68 +f 149/147 172/53 177/28 +f 392/88 270/214 323/439 +f 166/79 93/449 40/222 +f 271/209 323/439 270/214 +f 41/217 40/222 93/449 +f 418/430 466/168 414/452 +f 194/404 190/428 246/341 +f 465/173 414/452 466/168 +f 245/347 246/341 190/428 +f 258/274 259/269 387/108 +f 28/282 160/103 29/277 +f 386/112 387/108 259/269 +f 159/107 29/277 160/103 +f 261/259 389/100 468/158 +f 31/267 248/329 162/95 +f 467/163 468/158 389/100 +f 247/335 162/95 248/329 +f 249/323 457/213 420/418 +f 4/423 197/386 237/395 +f 400/54 420/418 457/213 +f 175/38 237/395 197/386 +f 334/373 299/86 333/379 +f 105/378 104/384 69/93 +f 285/142 333/379 299/86 +f 55/149 69/93 104/384 +f 286/138 9/393 418/430 +f 56/145 194/404 9/393 +f 169/67 418/430 9/393 +f 169/67 9/393 194/404 +f 341/331 262/254 347/296 +f 112/336 118/300 32/262 +f 449/253 347/296 262/254 +f 229/443 32/262 118/300 +f 286/138 418/430 442/288 +f 56/145 222/10 194/404 +f 414/452 442/288 418/430 +f 190/428 194/404 222/10 +f 328/409 461/193 327/415 +f 99/414 98/420 241/371 +f 329/403 327/415 461/193 +f 100/408 241/371 98/420 +f 278/174 356/251 330/397 +f 48/182 101/402 127/255 +f 372/171 330/397 356/251 +f 143/175 127/255 101/402 +f 310/37 393/84 439/304 +f 80/46 219/25 167/75 +f 440/298 439/304 393/84 +f 220/20 167/75 219/25 +f 382/128 383/124 257/279 +f 155/123 27/287 156/119 +f 342/325 257/279 383/124 +f 113/330 156/119 27/287 +f 361/226 280/164 421/412 +f 132/230 199/374 50/172 +f 430/358 421/412 280/164 +f 210/308 50/172 199/374 +f 366/201 365/206 380/136 +f 137/205 151/139 136/210 +f 395/76 380/136 365/206 +f 170/63 136/210 151/139 +f 356/251 278/174 438/310 +f 127/255 218/30 48/182 +f 344/313 438/310 278/174 +f 115/318 48/182 218/30 +f 444/278 445/273 283/150 +f 224/468 53/157 225/463 +f 284/146 283/150 445/273 +f 54/153 225/463 53/157 +f 282/154 276/184 364/211 +f 52/162 135/215 46/192 +f 441/293 364/211 276/184 +f 221/15 46/192 135/215 +f 432/346 263/249 396/72 +f 212/60 171/58 33/257 +f 370/181 396/72 263/249 +f 141/185 33/257 171/58 +f 338/349 300/82 339/343 +f 109/354 110/348 70/89 +f 298/90 339/343 300/82 +f 68/97 70/89 110/348 +f 336/361 274/194 322/445 +f 107/366 92/454 44/202 +f 376/152 322/445 274/194 +f 147/155 44/202 92/454 +f 349/286 451/243 350/281 +f 120/290 121/285 231/431 +f 452/238 350/281 451/243 +f 232/425 231/431 121/285 +f 468/158 360/231 343/319 +f 248/329 114/324 131/235 +f 447/263 343/319 360/231 +f 227/453 131/235 114/324 +f 283/150 284/146 335/367 +f 53/157 106/372 54/153 +f 294/106 335/367 284/146 +f 64/113 54/153 106/372 +f 251/311 459/203 463/183 +f 21/321 243/359 239/383 +f 462/188 463/183 459/203 +f 242/365 239/383 243/359 +f 277/179 354/261 301/78 +f 47/187 71/85 125/265 +f 384/120 301/78 354/261 +f 157/115 125/265 71/85 +f 326/421 293/110 325/427 +f 97/426 96/432 63/117 +f 309/42 325/427 293/110 +f 79/51 63/117 96/432 +f 284/146 277/179 294/106 +f 54/153 64/113 47/187 +f 301/78 294/106 277/179 +f 71/85 47/187 64/113 +f 448/258 265/239 346/301 +f 228/448 117/306 35/247 +f 373/166 346/301 265/239 +f 144/170 35/247 117/306 +f 353/266 346/301 347/296 +f 124/270 118/300 117/306 +f 341/331 347/296 346/301 +f 112/336 117/306 118/300 +f 2/435 20/327 275/189 +f 2/435 45/197 20/327 +f 355/256 275/189 20/327 +f 126/260 20/327 45/197 +f 249/323 282/154 457/213 +f 4/423 237/395 52/162 +f 364/211 457/213 282/154 +f 135/215 52/162 237/395 +f 426/382 427/376 428/370 +f 206/332 208/320 207/326 +f 437/316 428/370 427/376 +f 217/35 207/326 208/320 +f 381/132 382/128 253/299 +f 154/127 23/309 155/123 +f 257/279 253/299 382/128 +f 27/287 155/123 23/309 +f 392/88 394/80 270/214 +f 166/79 40/222 168/71 +f 268/224 270/214 394/80 +f 38/232 168/71 40/222 +f 200/368 429/364 201/362 +f 200/368 201/362 209/314 +f 422/406 201/362 429/364 +f 202/356 209/314 201/362 +f 331/391 330/397 267/229 +f 102/396 37/237 101/402 +f 372/171 267/229 330/397 +f 143/175 101/402 37/237 +f 423/400 433/340 274/194 +f 203/350 44/202 213/55 +f 288/130 274/194 433/340 +f 58/137 213/55 44/202 +f 291/118 251/311 329/403 +f 61/125 100/408 21/321 +f 463/183 329/403 251/311 +f 243/359 21/321 100/408 +f 259/269 287/134 386/112 +f 29/277 159/107 57/141 +f 385/116 386/112 287/134 +f 158/111 57/141 159/107 +f 343/319 447/263 354/261 +f 114/324 125/265 227/453 +f 266/234 354/261 447/263 +f 36/242 227/453 125/265 +f 258/274 387/108 260/264 +f 28/282 30/272 160/103 +f 388/104 260/264 387/108 +f 161/99 160/103 30/272 +f 431/352 423/400 432/346 +f 211/302 212/60 203/350 +f 425/388 432/346 423/400 +f 205/338 203/350 212/60 +f 446/268 343/319 277/179 +f 226/458 47/187 114/324 +f 354/261 277/179 343/319 +f 125/265 114/324 47/187 +f 425/388 423/400 336/361 +f 205/338 107/366 203/350 +f 274/194 336/361 423/400 +f 44/202 203/350 107/366 +f 307/52 293/110 308/47 +f 77/61 78/56 63/117 +f 326/421 308/47 293/110 +f 97/426 63/117 78/56 +f 367/196 448/258 353/266 +f 138/200 124/270 228/448 +f 346/301 353/266 448/258 +f 117/306 228/448 124/270 +f 303/70 269/219 304/66 +f 73/77 74/73 39/227 +f 272/204 304/66 269/219 +f 42/212 39/227 74/73 +f 372/171 359/236 267/229 +f 143/175 37/237 130/240 +f 424/394 267/229 359/236 +f 204/344 130/240 37/237 +f 328/409 295/102 461/193 +f 99/414 241/371 65/109 +f 456/218 461/193 295/102 +f 236/401 65/109 241/371 +f 295/102 332/385 279/169 +f 65/109 49/177 103/390 +f 280/164 279/169 332/385 +f 50/172 103/390 49/177 +f 304/66 272/204 305/62 +f 74/73 75/69 42/212 +f 273/199 305/62 272/204 +f 43/207 42/212 75/69 +f 428/370 437/316 435/328 +f 208/320 215/45 217/35 +f 433/340 435/328 437/316 +f 213/55 217/35 215/45 +f 305/62 273/199 409/9 +f 75/69 185/456 43/207 +f 408/14 409/9 273/199 +f 184/461 43/207 185/456 +f 395/76 431/352 396/72 +f 170/63 171/58 211/302 +f 432/346 396/72 431/352 +f 212/60 211/302 171/58 +f 396/72 370/181 379/140 +f 171/58 150/143 141/185 +f 401/49 379/140 370/181 +f 177/28 141/185 150/143 +f 297/94 335/367 300/82 +f 67/101 70/89 106/372 +f 334/373 300/82 335/367 +f 105/378 106/372 70/89 +f 418/430 169/67 352/271 +f 194/404 123/275 169/67 +f 7/405 352/271 169/67 +f 7/405 169/67 123/275 +f 281/159 412/462 353/266 +f 51/167 124/270 188/440 +f 377/148 353/266 412/462 +f 148/151 188/440 124/270 +f 320/455 321/450 326/421 +f 90/464 97/426 91/459 +f 308/47 326/421 321/450 +f 78/56 91/459 97/426 +f 286/138 296/98 337/355 +f 56/145 108/360 66/105 +f 297/94 337/355 296/98 +f 67/101 66/105 108/360 +f 405/29 321/450 404/34 +f 181/8 180/13 91/459 +f 320/455 404/34 321/450 +f 90/464 91/459 180/13 +f 331/391 349/286 330/397 +f 102/396 101/402 120/290 +f 350/281 330/397 349/286 +f 121/285 120/290 101/402 +f 335/367 294/106 334/373 +f 106/372 105/378 64/113 +f 299/86 334/373 294/106 +f 69/93 64/113 105/378 +f 324/433 455/223 367/196 +f 94/444 138/200 235/407 +f 448/258 367/196 455/223 +f 228/448 235/407 138/200 +f 17/345 316/7 16/351 +f 17/345 16/351 86/16 +f 317/2 16/351 316/7 +f 87/11 86/16 16/351 +f 430/358 280/164 359/236 +f 210/308 130/240 50/172 +f 332/385 359/236 280/164 +f 103/390 50/172 130/240 +f 16/351 317/2 15/357 +f 16/351 15/357 87/11 +f 318/465 15/357 317/2 +f 88/6 87/11 15/357 +f 9/393 286/138 10/387 +f 9/393 10/387 56/145 +f 337/355 10/387 286/138 +f 108/360 56/145 10/387 +f 330/397 350/281 278/174 +f 101/402 48/182 121/285 +f 351/276 278/174 350/281 +f 122/280 121/285 48/182 +f 253/299 254/294 381/132 +f 23/309 154/127 24/303 +f 375/156 381/132 254/294 +f 146/160 24/303 154/127 +f 403/39 404/34 319/460 +f 179/18 89/1 180/13 +f 320/455 319/460 404/34 +f 90/464 180/13 89/1 +f 352/271 7/405 420/418 +f 123/275 197/386 7/405 +f 198/380 420/418 7/405 +f 198/380 7/405 197/386 +f 325/427 319/460 326/421 +f 96/432 97/426 89/1 +f 320/455 326/421 319/460 +f 90/464 89/1 97/426 +f 398/64 368/191 366/201 +f 173/48 137/205 139/195 +f 365/206 366/201 368/191 +f 136/210 139/195 137/205 +f 289/126 436/322 398/64 +f 59/133 173/48 216/40 +f 368/191 398/64 436/322 +f 139/195 216/40 173/48 +f 439/304 440/298 345/307 +f 219/25 116/312 220/20 +f 279/169 345/307 440/298 +f 49/177 220/20 116/312 +f 272/204 312/27 273/199 +f 42/212 43/207 82/36 +f 311/32 273/199 312/27 +f 81/41 82/36 43/207 +f 6/411 282/154 196/392 +f 6/411 196/392 52/162 +f 249/323 196/392 282/154 +f 4/423 52/162 196/392 +f 274/194 288/130 376/152 +f 44/202 147/155 58/137 +f 292/114 376/152 288/130 +f 62/121 58/137 147/155 +f 397/68 429/364 176/33 +f 172/53 176/33 209/314 +f 200/368 176/33 429/364 +f 200/368 209/314 176/33 +f 269/219 313/22 272/204 +f 39/227 42/212 83/31 +f 312/27 272/204 313/22 +f 82/36 83/31 42/212 +f 445/273 446/268 284/146 +f 225/463 54/153 226/458 +f 277/179 284/146 446/268 +f 47/187 226/458 54/153 +f 255/289 340/337 374/161 +f 25/297 145/165 111/342 +f 391/92 374/161 340/337 +f 164/87 111/342 145/165 +f 296/98 283/150 297/94 +f 66/105 67/101 53/157 +f 335/367 297/94 283/150 +f 106/372 53/157 67/101 +f 347/296 449/253 348/291 +f 118/300 119/295 229/443 +f 450/248 348/291 449/253 +f 230/437 229/443 119/295 +f 455/223 357/246 448/258 +f 235/407 228/448 128/250 +f 265/239 448/258 357/246 +f 35/247 128/250 228/448 +f 337/355 297/94 338/349 +f 108/360 109/354 67/101 +f 300/82 338/349 297/94 +f 70/89 67/101 109/354 +f 152/135 338/349 11/381 +f 152/135 11/381 109/354 +f 339/343 11/381 338/349 +f 110/348 109/354 11/381 +f 279/169 440/298 295/102 +f 49/177 65/109 220/20 +f 456/218 295/102 440/298 +f 236/401 220/20 65/109 +f 408/14 416/442 293/110 +f 184/461 63/117 192/416 +f 309/42 293/110 416/442 +f 79/51 192/416 63/117 +f 359/236 372/171 430/358 +f 130/240 210/308 143/175 +f 356/251 430/358 372/171 +f 127/255 143/175 210/308 +f 346/301 373/166 341/331 +f 117/306 112/336 144/170 +f 266/234 341/331 373/166 +f 36/242 144/170 112/336 +f 389/100 391/92 467/163 +f 162/95 247/335 164/87 +f 250/317 467/163 391/92 +f 8/399 164/87 247/335 +f 353/266 347/296 281/159 +f 124/270 51/167 118/300 +f 348/291 281/159 347/296 +f 119/295 118/300 51/167 +f 296/98 443/283 283/150 +f 66/105 53/157 223/5 +f 444/278 283/150 443/283 +f 224/468 223/5 53/157 +f 20/327 95/438 355/256 +f 20/327 126/260 95/438 +f 371/176 355/256 95/438 +f 142/180 95/438 126/260 +f 296/98 286/138 443/283 +f 66/105 223/5 56/145 +f 442/288 443/283 286/138 +f 222/10 56/145 223/5 +f 420/418 198/380 249/323 +f 197/386 4/423 198/380 +f 196/392 249/323 198/380 +f 196/392 198/380 4/423 +f 360/231 264/244 256/284 +f 131/235 26/292 34/252 +f 250/317 256/284 264/244 +f 8/399 34/252 26/292 +f 276/184 275/189 441/293 +f 46/192 221/15 45/197 +f 458/208 441/293 275/189 +f 238/389 45/197 221/15 +f 301/78 384/120 302/74 +f 71/85 72/81 157/115 +f 369/186 302/74 384/120 +f 140/190 157/115 72/81 +f 418/430 352/271 466/168 +f 194/404 246/341 123/275 +f 413/457 466/168 352/271 +f 189/434 123/275 246/341 +f 467/163 264/244 468/158 +f 247/335 248/329 34/252 +f 360/231 468/158 264/244 +f 131/235 34/252 248/329 +f 390/96 252/305 369/186 +f 163/91 140/190 22/315 +f 302/74 369/186 252/305 +f 72/81 22/315 140/190 +f 375/156 387/108 381/132 +f 146/160 154/127 160/103 +f 386/112 381/132 387/108 +f 159/107 160/103 154/127 +f 380/136 395/76 379/140 +f 151/139 150/143 170/63 +f 396/72 379/140 395/76 +f 171/58 170/63 150/143 +f 352/271 420/418 413/457 +f 123/275 189/434 197/386 +f 400/54 413/457 420/418 +f 175/38 197/386 189/434 +f 427/376 323/439 437/316 +f 207/326 217/35 93/449 +f 411/467 437/316 323/439 +f 187/446 93/449 217/35 +f 388/104 374/161 389/100 +f 161/99 162/95 145/165 +f 391/92 389/100 374/161 +f 164/87 145/165 162/95 +f 394/80 327/415 165/83 +f 168/71 165/83 98/420 +f 3/429 165/83 327/415 +f 3/429 98/420 165/83 +f 355/256 371/176 462/188 +f 126/260 242/365 142/180 +f 463/183 462/188 371/176 +f 243/359 142/180 242/365 +f 1/441 268/224 165/83 +f 1/441 165/83 38/232 +f 394/80 165/83 268/224 +f 168/71 38/232 165/83 +f 12/375 13/369 303/70 +f 12/375 73/77 13/369 +f 269/219 303/70 13/369 +f 39/227 13/369 73/77 +f 387/108 375/156 388/104 +f 160/103 161/99 146/160 +f 374/161 388/104 375/156 +f 145/165 146/160 161/99 +f 13/369 14/363 269/219 +f 13/369 39/227 14/363 +f 313/22 269/219 14/363 +f 83/31 14/363 39/227 +f 294/106 301/78 299/86 +f 64/113 69/93 71/85 +f 302/74 299/86 301/78 +f 72/81 71/85 69/93 +f 341/331 266/234 262/254 +f 112/336 32/262 36/242 +f 447/263 262/254 266/234 +f 227/453 36/242 32/262 +f 381/132 386/112 382/128 +f 154/127 155/123 159/107 +f 385/116 382/128 386/112 +f 158/111 159/107 155/123 +f 281/159 331/391 426/382 +f 51/167 206/332 102/396 +f 267/229 426/382 331/391 +f 37/237 102/396 206/332 +f 424/394 392/88 427/376 +f 204/344 207/326 166/79 +f 323/439 427/376 392/88 +f 93/449 166/79 207/326 +f 430/358 356/251 421/412 +f 210/308 199/374 127/255 +f 438/310 421/412 356/251 +f 218/30 127/255 199/374 +f 392/88 328/409 394/80 +f 166/79 168/71 99/414 +f 327/415 394/80 328/409 +f 98/420 99/414 168/71 +f 458/208 439/304 441/293 +f 238/389 221/15 219/25 +f 345/307 441/293 439/304 +f 116/312 219/25 221/15 +f 383/124 363/216 342/325 +f 156/119 113/330 134/220 +f 464/178 342/325 363/216 +f 244/353 134/220 113/330 +f 458/208 462/188 460/198 +f 238/389 240/377 242/365 +f 459/203 460/198 462/188 +f 239/383 242/365 240/377 +f 435/328 431/352 365/206 +f 215/45 136/210 211/302 +f 395/76 365/206 431/352 +f 170/63 211/302 136/210 +f 415/447 464/178 399/59 +f 191/422 174/43 244/353 +f 363/216 399/59 464/178 +f 134/220 244/353 174/43 +f 263/249 429/364 370/181 +f 33/257 141/185 209/314 +f 397/68 370/181 429/364 +f 172/53 209/314 141/185 +f 458/208 275/189 462/188 +f 238/389 242/365 45/197 +f 355/256 462/188 275/189 +f 126/260 45/197 242/365 +f 317/2 404/34 318/465 +f 87/11 88/6 180/13 +f 403/39 318/465 404/34 +f 179/18 180/13 88/6 +f 316/7 405/29 317/2 +f 86/16 87/11 181/8 +f 404/34 317/2 405/29 +f 180/13 181/8 87/11 +f 315/12 406/24 316/7 +f 85/21 86/16 182/3 +f 405/29 316/7 406/24 +f 181/8 182/3 86/16 +f 314/17 407/19 315/12 +f 84/26 85/21 183/466 +f 406/24 315/12 407/19 +f 182/3 183/466 85/21 +f 419/424 407/19 422/406 +f 195/398 202/356 183/466 +f 314/17 422/406 407/19 +f 84/26 183/466 202/356 +f 367/196 402/44 324/433 +f 138/200 94/444 178/23 +f 362/221 324/433 402/44 +f 133/225 178/23 94/444 +f 409/9 408/14 307/52 +f 185/456 77/61 184/461 +f 293/110 307/52 408/14 +f 63/117 184/461 77/61 +f 409/9 307/52 410/4 +f 185/456 186/451 77/61 +f 292/114 410/4 307/52 +f 62/121 77/61 186/451 +f 411/467 410/4 288/130 +f 187/446 58/137 186/451 +f 292/114 288/130 410/4 +f 62/121 186/451 58/137 +f 437/316 411/467 433/340 +f 217/35 213/55 187/446 +f 288/130 433/340 411/467 +f 58/137 187/446 213/55 +f 435/328 417/436 428/370 +f 215/45 208/320 193/410 +f 412/462 428/370 417/436 +f 188/440 193/410 208/320 +f 265/239 369/186 373/166 +f 35/247 144/170 140/190 +f 384/120 373/166 369/186 +f 157/115 140/190 144/170 +f 458/208 460/198 439/304 +f 238/389 219/25 240/377 +f 310/37 439/304 460/198 +f 80/46 240/377 219/25 +f 353/266 377/148 367/196 +f 124/270 138/200 148/151 +f 402/44 367/196 377/148 +f 178/23 148/151 138/200 +f 5/417 2/435 276/184 +f 5/417 46/192 2/435 +f 275/189 276/184 2/435 +f 45/197 2/435 46/192 +f 429/364 263/249 422/406 +f 209/314 202/356 33/257 +f 419/424 422/406 263/249 +f 195/398 33/257 202/356 +f 328/409 359/236 295/102 +f 99/414 65/109 130/240 +f 332/385 295/102 359/236 +f 103/390 130/240 65/109 +f 368/191 436/322 417/436 +f 139/195 193/410 216/40 +f 434/334 417/436 436/322 +f 214/50 216/40 193/410 +f 456/218 440/298 290/122 +f 236/401 60/129 220/20 +f 393/84 290/122 440/298 +f 167/75 220/20 60/129 +f 329/403 463/183 327/415 +f 100/408 98/420 243/359 +f 371/176 327/415 463/183 +f 142/180 243/359 98/420 +f 327/415 371/176 3/429 +f 98/420 3/429 142/180 +f 95/438 3/429 371/176 +f 95/438 142/180 3/429 +f 461/193 456/218 306/57 +f 241/371 76/65 236/401 +f 290/122 306/57 456/218 +f 60/129 236/401 76/65 +f 449/253 340/337 450/248 +f 229/443 230/437 111/342 +f 255/289 450/248 340/337 +f 25/297 111/342 230/437 +f 262/254 447/263 256/284 +f 32/262 26/292 227/453 +f 360/231 256/284 447/263 +f 131/235 227/453 26/292 +f 450/248 255/289 451/243 +f 230/437 231/431 25/297 +f 254/294 451/243 255/289 +f 24/303 25/297 231/431 +f 451/243 254/294 452/238 +f 231/431 232/425 24/303 +f 253/299 452/238 254/294 +f 23/309 24/303 232/425 +f 452/238 253/299 453/233 +f 232/425 233/419 23/309 +f 257/279 453/233 253/299 +f 27/287 23/309 233/419 +f 257/279 342/325 453/233 +f 27/287 233/419 113/330 +f 454/228 453/233 342/325 +f 234/413 113/330 233/419 +f 414/452 465/173 415/447 +f 190/428 191/422 245/347 +f 464/178 415/447 465/173 +f 244/353 245/347 191/422 +f 442/288 414/452 287/134 +f 222/10 57/141 190/428 +f 415/447 287/134 414/452 +f 191/422 190/428 57/141 +f 442/288 287/134 443/283 +f 222/10 223/5 57/141 +f 259/269 443/283 287/134 +f 29/277 57/141 223/5 +f 443/283 259/269 444/278 +f 223/5 224/468 29/277 +f 258/274 444/278 259/269 +f 28/282 29/277 224/468 +f 445/273 444/278 260/264 +f 225/463 30/272 224/468 +f 258/274 260/264 444/278 +f 28/282 224/468 30/272 +f 260/264 261/259 445/273 +f 30/272 225/463 31/267 +f 446/268 445/273 261/259 +f 226/458 31/267 225/463 +f 261/259 468/158 446/268 +f 31/267 226/458 248/329 +f 343/319 446/268 468/158 +f 114/324 248/329 226/458 +f 251/311 310/37 459/203 +f 21/321 239/383 80/46 +f 460/198 459/203 310/37 +f 240/377 80/46 239/383 +f 291/118 306/57 393/84 +f 61/125 167/75 76/65 +f 290/122 393/84 306/57 +f 60/129 76/65 167/75 +f 461/193 306/57 329/403 +f 241/371 100/408 76/65 +f 291/118 329/403 306/57 +f 61/125 76/65 100/408 +f 377/148 434/334 402/44 +f 148/151 178/23 214/50 +f 436/322 402/44 434/334 +f 216/40 214/50 178/23 +f 251/311 291/118 310/37 +f 21/321 80/46 61/125 +f 393/84 310/37 291/118 +f 167/75 61/125 80/46 +f 412/462 417/436 377/148 +f 188/440 148/151 193/410 +f 434/334 377/148 417/436 +f 214/50 193/410 148/151 +f 342/325 464/178 454/228 +f 113/330 234/413 244/353 +f 465/173 454/228 464/178 +f 245/347 244/353 234/413 +f 454/228 465/173 358/241 +f 234/413 129/245 245/347 +f 466/168 358/241 465/173 +f 246/341 245/347 129/245 +f 413/457 344/313 466/168 +f 189/434 246/341 115/318 +f 358/241 466/168 344/313 +f 129/245 115/318 246/341 +f 438/310 344/313 400/54 +f 218/30 175/38 115/318 +f 413/457 400/54 344/313 +f 189/434 115/318 175/38 +f 364/211 441/293 361/226 +f 135/215 132/230 221/15 +f 345/307 361/226 441/293 +f 116/312 221/15 132/230 +f 457/213 421/412 400/54 +f 237/395 175/38 199/374 +f 438/310 400/54 421/412 +f 218/30 199/374 175/38 +f 457/213 364/211 421/412 +f 237/395 199/374 135/215 +f 361/226 421/412 364/211 +f 132/230 135/215 199/374 +f 362/221 402/44 289/126 +f 133/225 59/133 178/23 +f 436/322 289/126 402/44 +f 216/40 178/23 59/133 +f 354/261 266/234 384/120 +f 125/265 157/115 36/242 +f 373/166 384/120 266/234 +f 144/170 36/242 157/115 +f 256/284 250/317 340/337 +f 26/292 111/342 8/399 +f 391/92 340/337 250/317 +f 164/87 8/399 111/342 +f 262/254 256/284 449/253 +f 32/262 229/443 26/292 +f 340/337 449/253 256/284 +f 111/342 26/292 229/443 +f 15/357 318/465 14/363 +f 15/357 14/363 88/6 +f 313/22 14/363 318/465 +f 83/31 88/6 14/363 +f 318/465 403/39 313/22 +f 88/6 83/31 179/18 +f 312/27 313/22 403/39 +f 82/36 179/18 83/31 +f 403/39 319/460 312/27 +f 179/18 82/36 89/1 +f 311/32 312/27 319/460 +f 81/41 89/1 82/36 +f 319/460 325/427 311/32 +f 89/1 81/41 96/432 +f 416/442 311/32 325/427 +f 192/416 96/432 81/41 diff --git a/mediapipe/modules/face_geometry/data/canonical_face_model_uv_visualization.png b/mediapipe/modules/face_geometry/data/canonical_face_model_uv_visualization.png new file mode 100644 index 0000000..2acd991 Binary files /dev/null and b/mediapipe/modules/face_geometry/data/canonical_face_model_uv_visualization.png differ diff --git a/mediapipe/modules/face_geometry/data/geometry_pipeline_metadata_detection.pbtxt b/mediapipe/modules/face_geometry/data/geometry_pipeline_metadata_detection.pbtxt new file mode 100644 index 0000000..c4389a6 --- /dev/null +++ b/mediapipe/modules/face_geometry/data/geometry_pipeline_metadata_detection.pbtxt @@ -0,0 +1,78 @@ +# Copyright 2020 The MediaPipe Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +input_source: FACE_DETECTION_PIPELINE +procrustes_landmark_basis { landmark_id: 0 weight: 1.0 } +procrustes_landmark_basis { landmark_id: 1 weight: 1.0 } +procrustes_landmark_basis { landmark_id: 2 weight: 1.0 } +procrustes_landmark_basis { landmark_id: 3 weight: 1.0 } +procrustes_landmark_basis { landmark_id: 4 weight: 1.0 } +procrustes_landmark_basis { landmark_id: 5 weight: 1.0 } +# NOTE: the triangular topology of the face meshes is only useful when derived +# from the 468 face landmarks, not from the 6 face detection landmarks +# (keypoints). The former don't cover the entire face and this mesh is +# defined here only to comply with the API. It should be considered as +# a placeholder and/or for debugging purposes. +# +# Use the face geometry derived from the face detection landmarks +# (keypoints) for the face pose transformation matrix, not the mesh. +canonical_mesh: { + vertex_type: VERTEX_PT + primitive_type: TRIANGLE + vertex_buffer: -3.1511454582214355 + vertex_buffer: 2.6246179342269897 + vertex_buffer: 3.4656630754470825 + vertex_buffer: 0.349575996398926 + vertex_buffer: 0.38137748837470997 + vertex_buffer: 3.1511454582214355 + vertex_buffer: 2.6246179342269897 + vertex_buffer: 3.4656630754470825 + vertex_buffer: 0.650443494319916 + vertex_buffer: 0.38137999176979054 + vertex_buffer: 0.0 + vertex_buffer: -1.126865029335022 + vertex_buffer: 7.475604057312012 + vertex_buffer: 0.500025987625122 + vertex_buffer: 0.547487020492554 + vertex_buffer: 0.0 + vertex_buffer: -4.304508209228516 + vertex_buffer: 4.162498950958252 + vertex_buffer: 0.499989986419678 + vertex_buffer: 0.694203019142151 + vertex_buffer: -7.664182186126709 + vertex_buffer: 0.673132002353668 + vertex_buffer: -2.435867071151733 + vertex_buffer: 0.007561000064015 + vertex_buffer: 0.480777025222778 + vertex_buffer: 7.664182186126709 + vertex_buffer: 0.673132002353668 + vertex_buffer: -2.435867071151733 + vertex_buffer: 0.992439985275269 + vertex_buffer: 0.480777025222778 + index_buffer: 0 + index_buffer: 1 + index_buffer: 2 + index_buffer: 1 + index_buffer: 5 + index_buffer: 2 + index_buffer: 4 + index_buffer: 0 + index_buffer: 2 + index_buffer: 4 + index_buffer: 2 + index_buffer: 3 + index_buffer: 2 + index_buffer: 5 + index_buffer: 3 +} diff --git a/mediapipe/modules/face_geometry/data/geometry_pipeline_metadata_landmarks.pbtxt b/mediapipe/modules/face_geometry/data/geometry_pipeline_metadata_landmarks.pbtxt new file mode 100644 index 0000000..8dfb463 --- /dev/null +++ b/mediapipe/modules/face_geometry/data/geometry_pipeline_metadata_landmarks.pbtxt @@ -0,0 +1,5086 @@ +# Copyright 2020 The MediaPipe Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +input_source: FACE_LANDMARK_PIPELINE +procrustes_landmark_basis { landmark_id: 4 weight: 0.070909939706326 } +procrustes_landmark_basis { landmark_id: 6 weight: 0.032100144773722 } +procrustes_landmark_basis { landmark_id: 10 weight: 0.008446550928056 } +procrustes_landmark_basis { landmark_id: 33 weight: 0.058724168688059 } +procrustes_landmark_basis { landmark_id: 54 weight: 0.007667080033571 } +procrustes_landmark_basis { landmark_id: 67 weight: 0.009078059345484 } +procrustes_landmark_basis { landmark_id: 117 weight: 0.009791937656701 } +procrustes_landmark_basis { landmark_id: 119 weight: 0.014565368182957 } +procrustes_landmark_basis { landmark_id: 121 weight: 0.018591361120343 } +procrustes_landmark_basis { landmark_id: 127 weight: 0.005197994410992 } +procrustes_landmark_basis { landmark_id: 129 weight: 0.120625205338001 } +procrustes_landmark_basis { landmark_id: 132 weight: 0.005560018587857 } +procrustes_landmark_basis { landmark_id: 133 weight: 0.05328618362546 } +procrustes_landmark_basis { landmark_id: 136 weight: 0.066890455782413 } +procrustes_landmark_basis { landmark_id: 143 weight: 0.014816547743976 } +procrustes_landmark_basis { landmark_id: 147 weight: 0.014262833632529 } +procrustes_landmark_basis { landmark_id: 198 weight: 0.025462191551924 } +procrustes_landmark_basis { landmark_id: 205 weight: 0.047252278774977 } +procrustes_landmark_basis { landmark_id: 263 weight: 0.058724168688059 } +procrustes_landmark_basis { landmark_id: 284 weight: 0.007667080033571 } +procrustes_landmark_basis { landmark_id: 297 weight: 0.009078059345484 } +procrustes_landmark_basis { landmark_id: 346 weight: 0.009791937656701 } +procrustes_landmark_basis { landmark_id: 348 weight: 0.014565368182957 } +procrustes_landmark_basis { landmark_id: 350 weight: 0.018591361120343 } +procrustes_landmark_basis { landmark_id: 356 weight: 0.005197994410992 } +procrustes_landmark_basis { landmark_id: 358 weight: 0.120625205338001 } +procrustes_landmark_basis { landmark_id: 361 weight: 0.005560018587857 } +procrustes_landmark_basis { landmark_id: 362 weight: 0.05328618362546 } +procrustes_landmark_basis { landmark_id: 365 weight: 0.066890455782413 } +procrustes_landmark_basis { landmark_id: 372 weight: 0.014816547743976 } +procrustes_landmark_basis { landmark_id: 376 weight: 0.014262833632529 } +procrustes_landmark_basis { landmark_id: 420 weight: 0.025462191551924 } +procrustes_landmark_basis { landmark_id: 425 weight: 0.047252278774977 } +canonical_mesh: { + vertex_type: VERTEX_PT + primitive_type: TRIANGLE + vertex_buffer: 0.000000 + vertex_buffer: -3.406404 + vertex_buffer: 5.979507 + vertex_buffer: 0.499977 + vertex_buffer: 0.652534 + vertex_buffer: 0.000000 + vertex_buffer: -1.126865 + vertex_buffer: 7.475604 + vertex_buffer: 0.500026 + vertex_buffer: 0.547487 + vertex_buffer: 0.000000 + vertex_buffer: -2.089024 + vertex_buffer: 6.058267 + vertex_buffer: 0.499974 + vertex_buffer: 0.602372 + vertex_buffer: -0.463928 + vertex_buffer: 0.955357 + vertex_buffer: 6.633583 + vertex_buffer: 0.482113 + vertex_buffer: 0.471979 + vertex_buffer: 0.000000 + vertex_buffer: -0.463170 + vertex_buffer: 7.586580 + vertex_buffer: 0.500151 + vertex_buffer: 0.527156 + vertex_buffer: 0.000000 + vertex_buffer: 0.365669 + vertex_buffer: 7.242870 + vertex_buffer: 0.499910 + vertex_buffer: 0.498253 + vertex_buffer: 0.000000 + vertex_buffer: 2.473255 + vertex_buffer: 5.788627 + vertex_buffer: 0.499523 + vertex_buffer: 0.401062 + vertex_buffer: -4.253081 + vertex_buffer: 2.577646 + vertex_buffer: 3.279702 + vertex_buffer: 0.289712 + vertex_buffer: 0.380764 + vertex_buffer: 0.000000 + vertex_buffer: 4.019042 + vertex_buffer: 5.284764 + vertex_buffer: 0.499955 + vertex_buffer: 0.312398 + vertex_buffer: 0.000000 + vertex_buffer: 4.885979 + vertex_buffer: 5.385258 + vertex_buffer: 0.499987 + vertex_buffer: 0.269919 + vertex_buffer: 0.000000 + vertex_buffer: 8.261778 + vertex_buffer: 4.481535 + vertex_buffer: 0.500023 + vertex_buffer: 0.107050 + vertex_buffer: 0.000000 + vertex_buffer: -3.706811 + vertex_buffer: 5.864924 + vertex_buffer: 0.500023 + vertex_buffer: 0.666234 + vertex_buffer: 0.000000 + vertex_buffer: -3.918301 + vertex_buffer: 5.569430 + vertex_buffer: 0.500016 + vertex_buffer: 0.679224 + vertex_buffer: 0.000000 + vertex_buffer: -3.994436 + vertex_buffer: 5.219482 + vertex_buffer: 0.500023 + vertex_buffer: 0.692348 + vertex_buffer: 0.000000 + vertex_buffer: -4.542400 + vertex_buffer: 5.404754 + vertex_buffer: 0.499977 + vertex_buffer: 0.695278 + vertex_buffer: 0.000000 + vertex_buffer: -4.745577 + vertex_buffer: 5.529457 + vertex_buffer: 0.499977 + vertex_buffer: 0.705934 + vertex_buffer: 0.000000 + vertex_buffer: -5.019567 + vertex_buffer: 5.601448 + vertex_buffer: 0.499977 + vertex_buffer: 0.719385 + vertex_buffer: 0.000000 + vertex_buffer: -5.365123 + vertex_buffer: 5.535441 + vertex_buffer: 0.499977 + vertex_buffer: 0.737019 + vertex_buffer: 0.000000 + vertex_buffer: -6.149624 + vertex_buffer: 5.071372 + vertex_buffer: 0.499968 + vertex_buffer: 0.781371 + vertex_buffer: 0.000000 + vertex_buffer: -1.501095 + vertex_buffer: 7.112196 + vertex_buffer: 0.499816 + vertex_buffer: 0.562981 + vertex_buffer: -0.416106 + vertex_buffer: -1.466449 + vertex_buffer: 6.447657 + vertex_buffer: 0.473773 + vertex_buffer: 0.573910 + vertex_buffer: -7.087960 + vertex_buffer: 5.434801 + vertex_buffer: 0.099620 + vertex_buffer: 0.104907 + vertex_buffer: 0.254141 + vertex_buffer: -2.628639 + vertex_buffer: 2.035898 + vertex_buffer: 3.848121 + vertex_buffer: 0.365930 + vertex_buffer: 0.409576 + vertex_buffer: -3.198363 + vertex_buffer: 1.985815 + vertex_buffer: 3.796952 + vertex_buffer: 0.338758 + vertex_buffer: 0.413025 + vertex_buffer: -3.775151 + vertex_buffer: 2.039402 + vertex_buffer: 3.646194 + vertex_buffer: 0.311120 + vertex_buffer: 0.409460 + vertex_buffer: -4.465819 + vertex_buffer: 2.422950 + vertex_buffer: 3.155168 + vertex_buffer: 0.274658 + vertex_buffer: 0.389131 + vertex_buffer: -2.164289 + vertex_buffer: 2.189867 + vertex_buffer: 3.851822 + vertex_buffer: 0.393362 + vertex_buffer: 0.403706 + vertex_buffer: -3.208229 + vertex_buffer: 3.223926 + vertex_buffer: 4.115822 + vertex_buffer: 0.345234 + vertex_buffer: 0.344011 + vertex_buffer: -2.673803 + vertex_buffer: 3.205337 + vertex_buffer: 4.092203 + vertex_buffer: 0.370094 + vertex_buffer: 0.346076 + vertex_buffer: -3.745193 + vertex_buffer: 3.165286 + vertex_buffer: 3.972409 + vertex_buffer: 0.319322 + vertex_buffer: 0.347265 + vertex_buffer: -4.161018 + vertex_buffer: 3.059069 + vertex_buffer: 3.719554 + vertex_buffer: 0.297903 + vertex_buffer: 0.353591 + vertex_buffer: -5.062006 + vertex_buffer: 1.934418 + vertex_buffer: 2.776093 + vertex_buffer: 0.247792 + vertex_buffer: 0.410810 + vertex_buffer: -2.266659 + vertex_buffer: -7.425768 + vertex_buffer: 4.389812 + vertex_buffer: 0.396889 + vertex_buffer: 0.842755 + vertex_buffer: -4.445859 + vertex_buffer: 2.663991 + vertex_buffer: 3.173422 + vertex_buffer: 0.280098 + vertex_buffer: 0.375600 + vertex_buffer: -7.214530 + vertex_buffer: 2.263009 + vertex_buffer: 0.073150 + vertex_buffer: 0.106310 + vertex_buffer: 0.399956 + vertex_buffer: -5.799793 + vertex_buffer: 2.349546 + vertex_buffer: 2.204059 + vertex_buffer: 0.209925 + vertex_buffer: 0.391353 + vertex_buffer: -2.844939 + vertex_buffer: -0.720868 + vertex_buffer: 4.433130 + vertex_buffer: 0.355808 + vertex_buffer: 0.534406 + vertex_buffer: -0.711452 + vertex_buffer: -3.329355 + vertex_buffer: 5.877044 + vertex_buffer: 0.471751 + vertex_buffer: 0.650404 + vertex_buffer: -0.606033 + vertex_buffer: -3.924562 + vertex_buffer: 5.444923 + vertex_buffer: 0.474155 + vertex_buffer: 0.680192 + vertex_buffer: -1.431615 + vertex_buffer: -3.500953 + vertex_buffer: 5.496189 + vertex_buffer: 0.439785 + vertex_buffer: 0.657229 + vertex_buffer: -1.914910 + vertex_buffer: -3.803146 + vertex_buffer: 5.028930 + vertex_buffer: 0.414617 + vertex_buffer: 0.666541 + vertex_buffer: -1.131043 + vertex_buffer: -3.973937 + vertex_buffer: 5.189648 + vertex_buffer: 0.450374 + vertex_buffer: 0.680861 + vertex_buffer: -1.563548 + vertex_buffer: -4.082763 + vertex_buffer: 4.842263 + vertex_buffer: 0.428771 + vertex_buffer: 0.682691 + vertex_buffer: -2.650112 + vertex_buffer: -5.003649 + vertex_buffer: 4.188483 + vertex_buffer: 0.374971 + vertex_buffer: 0.727805 + vertex_buffer: -0.427049 + vertex_buffer: -1.094134 + vertex_buffer: 7.360529 + vertex_buffer: 0.486717 + vertex_buffer: 0.547629 + vertex_buffer: -0.496396 + vertex_buffer: -0.475659 + vertex_buffer: 7.440358 + vertex_buffer: 0.485301 + vertex_buffer: 0.527395 + vertex_buffer: -5.253307 + vertex_buffer: 3.881582 + vertex_buffer: 3.363159 + vertex_buffer: 0.257765 + vertex_buffer: 0.314490 + vertex_buffer: -1.718698 + vertex_buffer: 0.974609 + vertex_buffer: 4.558359 + vertex_buffer: 0.401223 + vertex_buffer: 0.455172 + vertex_buffer: -1.608635 + vertex_buffer: -0.942516 + vertex_buffer: 5.814193 + vertex_buffer: 0.429819 + vertex_buffer: 0.548615 + vertex_buffer: -1.651267 + vertex_buffer: -0.610868 + vertex_buffer: 5.581319 + vertex_buffer: 0.421352 + vertex_buffer: 0.533741 + vertex_buffer: -4.765501 + vertex_buffer: -0.701554 + vertex_buffer: 3.534632 + vertex_buffer: 0.276896 + vertex_buffer: 0.532057 + vertex_buffer: -0.478306 + vertex_buffer: 0.295766 + vertex_buffer: 7.101013 + vertex_buffer: 0.483370 + vertex_buffer: 0.499587 + vertex_buffer: -3.734964 + vertex_buffer: 4.508230 + vertex_buffer: 4.550454 + vertex_buffer: 0.337212 + vertex_buffer: 0.282883 + vertex_buffer: -4.588603 + vertex_buffer: 4.302037 + vertex_buffer: 4.048484 + vertex_buffer: 0.296392 + vertex_buffer: 0.293243 + vertex_buffer: -6.279331 + vertex_buffer: 6.615427 + vertex_buffer: 1.425850 + vertex_buffer: 0.169295 + vertex_buffer: 0.193814 + vertex_buffer: -1.220941 + vertex_buffer: 4.142165 + vertex_buffer: 5.106035 + vertex_buffer: 0.447580 + vertex_buffer: 0.302610 + vertex_buffer: -2.193489 + vertex_buffer: 3.100317 + vertex_buffer: 4.000575 + vertex_buffer: 0.392390 + vertex_buffer: 0.353888 + vertex_buffer: -3.102642 + vertex_buffer: -4.352984 + vertex_buffer: 4.095905 + vertex_buffer: 0.354490 + vertex_buffer: 0.696784 + vertex_buffer: -6.719682 + vertex_buffer: -4.788645 + vertex_buffer: -1.745401 + vertex_buffer: 0.067305 + vertex_buffer: 0.730105 + vertex_buffer: -1.193824 + vertex_buffer: -1.306795 + vertex_buffer: 5.737747 + vertex_buffer: 0.442739 + vertex_buffer: 0.572826 + vertex_buffer: -0.729766 + vertex_buffer: -1.593712 + vertex_buffer: 5.833208 + vertex_buffer: 0.457098 + vertex_buffer: 0.584792 + vertex_buffer: -2.456206 + vertex_buffer: -4.342621 + vertex_buffer: 4.283884 + vertex_buffer: 0.381974 + vertex_buffer: 0.694711 + vertex_buffer: -2.204823 + vertex_buffer: -4.304508 + vertex_buffer: 4.162499 + vertex_buffer: 0.392389 + vertex_buffer: 0.694203 + vertex_buffer: -4.985894 + vertex_buffer: 4.802461 + vertex_buffer: 3.751977 + vertex_buffer: 0.277076 + vertex_buffer: 0.271932 + vertex_buffer: -1.592294 + vertex_buffer: -1.257709 + vertex_buffer: 5.456949 + vertex_buffer: 0.422552 + vertex_buffer: 0.563233 + vertex_buffer: -2.644548 + vertex_buffer: 4.524654 + vertex_buffer: 4.921559 + vertex_buffer: 0.385919 + vertex_buffer: 0.281364 + vertex_buffer: -2.760292 + vertex_buffer: 5.100971 + vertex_buffer: 5.015990 + vertex_buffer: 0.383103 + vertex_buffer: 0.255840 + vertex_buffer: -3.523964 + vertex_buffer: 8.005976 + vertex_buffer: 3.729163 + vertex_buffer: 0.331431 + vertex_buffer: 0.119714 + vertex_buffer: -5.599763 + vertex_buffer: 5.715470 + vertex_buffer: 2.724259 + vertex_buffer: 0.229924 + vertex_buffer: 0.232003 + vertex_buffer: -3.063932 + vertex_buffer: 6.566144 + vertex_buffer: 4.529981 + vertex_buffer: 0.364501 + vertex_buffer: 0.189114 + vertex_buffer: -5.720968 + vertex_buffer: 4.254584 + vertex_buffer: 2.830852 + vertex_buffer: 0.229622 + vertex_buffer: 0.299541 + vertex_buffer: -6.374393 + vertex_buffer: 4.785590 + vertex_buffer: 1.591691 + vertex_buffer: 0.173287 + vertex_buffer: 0.278748 + vertex_buffer: -0.672728 + vertex_buffer: -3.688016 + vertex_buffer: 5.737804 + vertex_buffer: 0.472879 + vertex_buffer: 0.666198 + vertex_buffer: -1.262560 + vertex_buffer: -3.787691 + vertex_buffer: 5.417779 + vertex_buffer: 0.446828 + vertex_buffer: 0.668527 + vertex_buffer: -1.732553 + vertex_buffer: -3.952767 + vertex_buffer: 5.000579 + vertex_buffer: 0.422762 + vertex_buffer: 0.673890 + vertex_buffer: -1.043625 + vertex_buffer: -1.464973 + vertex_buffer: 5.662455 + vertex_buffer: 0.445308 + vertex_buffer: 0.580066 + vertex_buffer: -2.321234 + vertex_buffer: -4.329069 + vertex_buffer: 4.258156 + vertex_buffer: 0.388103 + vertex_buffer: 0.693961 + vertex_buffer: -2.056846 + vertex_buffer: -4.477671 + vertex_buffer: 4.520883 + vertex_buffer: 0.403039 + vertex_buffer: 0.706540 + vertex_buffer: -2.153084 + vertex_buffer: -4.276322 + vertex_buffer: 4.038093 + vertex_buffer: 0.403629 + vertex_buffer: 0.693953 + vertex_buffer: -0.946874 + vertex_buffer: -1.035249 + vertex_buffer: 6.512274 + vertex_buffer: 0.460042 + vertex_buffer: 0.557139 + vertex_buffer: -1.469132 + vertex_buffer: -4.036351 + vertex_buffer: 4.604908 + vertex_buffer: 0.431158 + vertex_buffer: 0.692366 + vertex_buffer: -1.024340 + vertex_buffer: -3.989851 + vertex_buffer: 4.926693 + vertex_buffer: 0.452182 + vertex_buffer: 0.692366 + vertex_buffer: -0.533422 + vertex_buffer: -3.993222 + vertex_buffer: 5.138202 + vertex_buffer: 0.475387 + vertex_buffer: 0.692366 + vertex_buffer: -0.769720 + vertex_buffer: -6.095394 + vertex_buffer: 4.985883 + vertex_buffer: 0.465828 + vertex_buffer: 0.779190 + vertex_buffer: -0.699606 + vertex_buffer: -5.291850 + vertex_buffer: 5.448304 + vertex_buffer: 0.472329 + vertex_buffer: 0.736226 + vertex_buffer: -0.669687 + vertex_buffer: -4.949770 + vertex_buffer: 5.509612 + vertex_buffer: 0.473087 + vertex_buffer: 0.717857 + vertex_buffer: -0.630947 + vertex_buffer: -4.695101 + vertex_buffer: 5.449371 + vertex_buffer: 0.473122 + vertex_buffer: 0.704626 + vertex_buffer: -0.583218 + vertex_buffer: -4.517982 + vertex_buffer: 5.339869 + vertex_buffer: 0.473033 + vertex_buffer: 0.695278 + vertex_buffer: -1.537170 + vertex_buffer: -4.423206 + vertex_buffer: 4.745470 + vertex_buffer: 0.427942 + vertex_buffer: 0.695278 + vertex_buffer: -1.615600 + vertex_buffer: -4.475942 + vertex_buffer: 4.813632 + vertex_buffer: 0.426479 + vertex_buffer: 0.703540 + vertex_buffer: -1.729053 + vertex_buffer: -4.618680 + vertex_buffer: 4.854463 + vertex_buffer: 0.423162 + vertex_buffer: 0.711846 + vertex_buffer: -1.838624 + vertex_buffer: -4.828746 + vertex_buffer: 4.823737 + vertex_buffer: 0.418309 + vertex_buffer: 0.720063 + vertex_buffer: -2.368250 + vertex_buffer: -3.106237 + vertex_buffer: 4.868096 + vertex_buffer: 0.390095 + vertex_buffer: 0.639573 + vertex_buffer: -7.542244 + vertex_buffer: -1.049282 + vertex_buffer: -2.431321 + vertex_buffer: 0.013954 + vertex_buffer: 0.560034 + vertex_buffer: 0.000000 + vertex_buffer: -1.724003 + vertex_buffer: 6.601390 + vertex_buffer: 0.499914 + vertex_buffer: 0.580147 + vertex_buffer: -1.826614 + vertex_buffer: -4.399531 + vertex_buffer: 4.399021 + vertex_buffer: 0.413200 + vertex_buffer: 0.695400 + vertex_buffer: -1.929558 + vertex_buffer: -4.411831 + vertex_buffer: 4.497052 + vertex_buffer: 0.409626 + vertex_buffer: 0.701823 + vertex_buffer: -0.597442 + vertex_buffer: -2.013686 + vertex_buffer: 5.866456 + vertex_buffer: 0.468080 + vertex_buffer: 0.601535 + vertex_buffer: -1.405627 + vertex_buffer: -1.714196 + vertex_buffer: 5.241087 + vertex_buffer: 0.422729 + vertex_buffer: 0.585985 + vertex_buffer: -0.662449 + vertex_buffer: -1.819321 + vertex_buffer: 5.863759 + vertex_buffer: 0.463080 + vertex_buffer: 0.593784 + vertex_buffer: -2.342340 + vertex_buffer: 0.572222 + vertex_buffer: 4.294303 + vertex_buffer: 0.372120 + vertex_buffer: 0.473414 + vertex_buffer: -3.327324 + vertex_buffer: 0.104863 + vertex_buffer: 4.113860 + vertex_buffer: 0.334562 + vertex_buffer: 0.496073 + vertex_buffer: -1.726175 + vertex_buffer: -0.919165 + vertex_buffer: 5.273355 + vertex_buffer: 0.411671 + vertex_buffer: 0.546965 + vertex_buffer: -5.133204 + vertex_buffer: 7.485602 + vertex_buffer: 2.660442 + vertex_buffer: 0.242176 + vertex_buffer: 0.147676 + vertex_buffer: -4.538641 + vertex_buffer: 6.319907 + vertex_buffer: 3.683424 + vertex_buffer: 0.290777 + vertex_buffer: 0.201446 + vertex_buffer: -3.986562 + vertex_buffer: 5.109487 + vertex_buffer: 4.466315 + vertex_buffer: 0.327338 + vertex_buffer: 0.256527 + vertex_buffer: -2.169681 + vertex_buffer: -5.440433 + vertex_buffer: 4.455874 + vertex_buffer: 0.399510 + vertex_buffer: 0.748921 + vertex_buffer: -1.395634 + vertex_buffer: 5.011963 + vertex_buffer: 5.316032 + vertex_buffer: 0.441728 + vertex_buffer: 0.261676 + vertex_buffer: -1.619500 + vertex_buffer: 6.599217 + vertex_buffer: 4.921106 + vertex_buffer: 0.429765 + vertex_buffer: 0.187834 + vertex_buffer: -1.891399 + vertex_buffer: 8.236377 + vertex_buffer: 4.274997 + vertex_buffer: 0.412198 + vertex_buffer: 0.108901 + vertex_buffer: -4.195832 + vertex_buffer: 2.235205 + vertex_buffer: 3.375099 + vertex_buffer: 0.288955 + vertex_buffer: 0.398952 + vertex_buffer: -5.733342 + vertex_buffer: 1.411738 + vertex_buffer: 2.431726 + vertex_buffer: 0.218937 + vertex_buffer: 0.435411 + vertex_buffer: -1.859887 + vertex_buffer: 2.355757 + vertex_buffer: 3.843181 + vertex_buffer: 0.412782 + vertex_buffer: 0.398970 + vertex_buffer: -4.988612 + vertex_buffer: 3.074654 + vertex_buffer: 3.083858 + vertex_buffer: 0.257135 + vertex_buffer: 0.355440 + vertex_buffer: -1.303263 + vertex_buffer: 1.416453 + vertex_buffer: 4.831091 + vertex_buffer: 0.427685 + vertex_buffer: 0.437961 + vertex_buffer: -1.305757 + vertex_buffer: -0.672779 + vertex_buffer: 6.415959 + vertex_buffer: 0.448340 + vertex_buffer: 0.536936 + vertex_buffer: -6.465170 + vertex_buffer: 0.937119 + vertex_buffer: 1.689873 + vertex_buffer: 0.178560 + vertex_buffer: 0.457554 + vertex_buffer: -5.258659 + vertex_buffer: 0.945811 + vertex_buffer: 2.974312 + vertex_buffer: 0.247308 + vertex_buffer: 0.457194 + vertex_buffer: -4.432338 + vertex_buffer: 0.722096 + vertex_buffer: 3.522615 + vertex_buffer: 0.286267 + vertex_buffer: 0.467675 + vertex_buffer: -3.300681 + vertex_buffer: 0.861641 + vertex_buffer: 3.872784 + vertex_buffer: 0.332828 + vertex_buffer: 0.460712 + vertex_buffer: -2.430178 + vertex_buffer: 1.131492 + vertex_buffer: 4.039035 + vertex_buffer: 0.368756 + vertex_buffer: 0.447207 + vertex_buffer: -1.820731 + vertex_buffer: 1.467954 + vertex_buffer: 4.224124 + vertex_buffer: 0.398964 + vertex_buffer: 0.432655 + vertex_buffer: -0.563221 + vertex_buffer: 2.307693 + vertex_buffer: 5.566789 + vertex_buffer: 0.476410 + vertex_buffer: 0.405806 + vertex_buffer: -6.338145 + vertex_buffer: -0.529279 + vertex_buffer: 1.881175 + vertex_buffer: 0.189241 + vertex_buffer: 0.523924 + vertex_buffer: -5.587698 + vertex_buffer: 3.208071 + vertex_buffer: 2.687839 + vertex_buffer: 0.228962 + vertex_buffer: 0.348951 + vertex_buffer: -0.242624 + vertex_buffer: -1.462857 + vertex_buffer: 7.071491 + vertex_buffer: 0.490726 + vertex_buffer: 0.562401 + vertex_buffer: -1.611251 + vertex_buffer: 0.339326 + vertex_buffer: 4.895421 + vertex_buffer: 0.404670 + vertex_buffer: 0.485133 + vertex_buffer: -7.743095 + vertex_buffer: 2.364999 + vertex_buffer: -2.005167 + vertex_buffer: 0.019469 + vertex_buffer: 0.401564 + vertex_buffer: -1.391142 + vertex_buffer: 1.851048 + vertex_buffer: 4.448999 + vertex_buffer: 0.426243 + vertex_buffer: 0.420431 + vertex_buffer: -1.785794 + vertex_buffer: -0.978284 + vertex_buffer: 4.850470 + vertex_buffer: 0.396993 + vertex_buffer: 0.548797 + vertex_buffer: -4.670959 + vertex_buffer: 2.664461 + vertex_buffer: 3.084075 + vertex_buffer: 0.266470 + vertex_buffer: 0.376977 + vertex_buffer: -1.333970 + vertex_buffer: -0.283761 + vertex_buffer: 6.097047 + vertex_buffer: 0.439121 + vertex_buffer: 0.518958 + vertex_buffer: -7.270895 + vertex_buffer: -2.890917 + vertex_buffer: -2.252455 + vertex_buffer: 0.032314 + vertex_buffer: 0.644357 + vertex_buffer: -1.856432 + vertex_buffer: 2.585245 + vertex_buffer: 3.757904 + vertex_buffer: 0.419054 + vertex_buffer: 0.387155 + vertex_buffer: -0.923388 + vertex_buffer: 0.073076 + vertex_buffer: 6.671944 + vertex_buffer: 0.462783 + vertex_buffer: 0.505747 + vertex_buffer: -5.000589 + vertex_buffer: -6.135128 + vertex_buffer: 1.892523 + vertex_buffer: 0.238979 + vertex_buffer: 0.779745 + vertex_buffer: -5.085276 + vertex_buffer: -7.178590 + vertex_buffer: 0.714711 + vertex_buffer: 0.198221 + vertex_buffer: 0.831938 + vertex_buffer: -7.159291 + vertex_buffer: -0.811820 + vertex_buffer: -0.072044 + vertex_buffer: 0.107550 + vertex_buffer: 0.540755 + vertex_buffer: -5.843051 + vertex_buffer: -5.248023 + vertex_buffer: 0.924091 + vertex_buffer: 0.183610 + vertex_buffer: 0.740257 + vertex_buffer: -6.847258 + vertex_buffer: 3.662916 + vertex_buffer: 0.724695 + vertex_buffer: 0.134410 + vertex_buffer: 0.333683 + vertex_buffer: -2.412942 + vertex_buffer: -8.258853 + vertex_buffer: 4.119213 + vertex_buffer: 0.385764 + vertex_buffer: 0.883154 + vertex_buffer: -0.179909 + vertex_buffer: -1.689864 + vertex_buffer: 6.573301 + vertex_buffer: 0.490967 + vertex_buffer: 0.579378 + vertex_buffer: -2.103655 + vertex_buffer: -0.163946 + vertex_buffer: 4.566119 + vertex_buffer: 0.382385 + vertex_buffer: 0.508573 + vertex_buffer: -6.407571 + vertex_buffer: 2.236021 + vertex_buffer: 1.560843 + vertex_buffer: 0.174399 + vertex_buffer: 0.397671 + vertex_buffer: -3.670075 + vertex_buffer: 2.360153 + vertex_buffer: 3.635230 + vertex_buffer: 0.318785 + vertex_buffer: 0.396235 + vertex_buffer: -3.177186 + vertex_buffer: 2.294265 + vertex_buffer: 3.775704 + vertex_buffer: 0.343364 + vertex_buffer: 0.400597 + vertex_buffer: -2.196121 + vertex_buffer: -4.598322 + vertex_buffer: 4.479786 + vertex_buffer: 0.396100 + vertex_buffer: 0.710217 + vertex_buffer: -6.234883 + vertex_buffer: -1.944430 + vertex_buffer: 1.663542 + vertex_buffer: 0.187885 + vertex_buffer: 0.588538 + vertex_buffer: -1.292924 + vertex_buffer: -9.295920 + vertex_buffer: 4.094063 + vertex_buffer: 0.430987 + vertex_buffer: 0.944065 + vertex_buffer: -3.210651 + vertex_buffer: -8.533278 + vertex_buffer: 2.802001 + vertex_buffer: 0.318993 + vertex_buffer: 0.898285 + vertex_buffer: -4.068926 + vertex_buffer: -7.993109 + vertex_buffer: 1.925119 + vertex_buffer: 0.266248 + vertex_buffer: 0.869701 + vertex_buffer: 0.000000 + vertex_buffer: 6.545390 + vertex_buffer: 5.027311 + vertex_buffer: 0.500023 + vertex_buffer: 0.190576 + vertex_buffer: 0.000000 + vertex_buffer: -9.403378 + vertex_buffer: 4.264492 + vertex_buffer: 0.499977 + vertex_buffer: 0.954453 + vertex_buffer: -2.724032 + vertex_buffer: 2.315802 + vertex_buffer: 3.777151 + vertex_buffer: 0.366170 + vertex_buffer: 0.398822 + vertex_buffer: -2.288460 + vertex_buffer: 2.398891 + vertex_buffer: 3.697603 + vertex_buffer: 0.393207 + vertex_buffer: 0.395537 + vertex_buffer: -1.998311 + vertex_buffer: 2.496547 + vertex_buffer: 3.689148 + vertex_buffer: 0.410373 + vertex_buffer: 0.391080 + vertex_buffer: -6.130040 + vertex_buffer: 3.399261 + vertex_buffer: 2.038516 + vertex_buffer: 0.194993 + vertex_buffer: 0.342102 + vertex_buffer: -2.288460 + vertex_buffer: 2.886504 + vertex_buffer: 3.775031 + vertex_buffer: 0.388665 + vertex_buffer: 0.362284 + vertex_buffer: -2.724032 + vertex_buffer: 2.961810 + vertex_buffer: 3.871767 + vertex_buffer: 0.365962 + vertex_buffer: 0.355971 + vertex_buffer: -3.177186 + vertex_buffer: 2.964136 + vertex_buffer: 3.876973 + vertex_buffer: 0.343364 + vertex_buffer: 0.355357 + vertex_buffer: -3.670075 + vertex_buffer: 2.927714 + vertex_buffer: 3.724325 + vertex_buffer: 0.318785 + vertex_buffer: 0.358340 + vertex_buffer: -4.018389 + vertex_buffer: 2.857357 + vertex_buffer: 3.482983 + vertex_buffer: 0.301415 + vertex_buffer: 0.363156 + vertex_buffer: -7.555811 + vertex_buffer: 4.106811 + vertex_buffer: -0.991917 + vertex_buffer: 0.058133 + vertex_buffer: 0.319076 + vertex_buffer: -4.018389 + vertex_buffer: 2.483695 + vertex_buffer: 3.440898 + vertex_buffer: 0.301415 + vertex_buffer: 0.387449 + vertex_buffer: 0.000000 + vertex_buffer: -2.521945 + vertex_buffer: 5.932265 + vertex_buffer: 0.499988 + vertex_buffer: 0.618434 + vertex_buffer: -1.776217 + vertex_buffer: -2.683946 + vertex_buffer: 5.213116 + vertex_buffer: 0.415838 + vertex_buffer: 0.624196 + vertex_buffer: -1.222237 + vertex_buffer: -1.182444 + vertex_buffer: 5.952465 + vertex_buffer: 0.445682 + vertex_buffer: 0.566077 + vertex_buffer: -0.731493 + vertex_buffer: -2.536683 + vertex_buffer: 5.815343 + vertex_buffer: 0.465844 + vertex_buffer: 0.620641 + vertex_buffer: 0.000000 + vertex_buffer: 3.271027 + vertex_buffer: 5.236015 + vertex_buffer: 0.499923 + vertex_buffer: 0.351524 + vertex_buffer: -4.135272 + vertex_buffer: -6.996638 + vertex_buffer: 2.671970 + vertex_buffer: 0.288719 + vertex_buffer: 0.819946 + vertex_buffer: -3.311811 + vertex_buffer: -7.660815 + vertex_buffer: 3.382963 + vertex_buffer: 0.335279 + vertex_buffer: 0.852820 + vertex_buffer: -1.313701 + vertex_buffer: -8.639995 + vertex_buffer: 4.702456 + vertex_buffer: 0.440512 + vertex_buffer: 0.902419 + vertex_buffer: -5.940524 + vertex_buffer: -6.223629 + vertex_buffer: -0.631468 + vertex_buffer: 0.128294 + vertex_buffer: 0.791941 + vertex_buffer: -1.998311 + vertex_buffer: 2.743838 + vertex_buffer: 3.744030 + vertex_buffer: 0.408772 + vertex_buffer: 0.373894 + vertex_buffer: -0.901447 + vertex_buffer: 1.236992 + vertex_buffer: 5.754256 + vertex_buffer: 0.455607 + vertex_buffer: 0.451801 + vertex_buffer: 0.000000 + vertex_buffer: -8.765243 + vertex_buffer: 4.891441 + vertex_buffer: 0.499877 + vertex_buffer: 0.908990 + vertex_buffer: -2.308977 + vertex_buffer: -8.974196 + vertex_buffer: 3.609070 + vertex_buffer: 0.375437 + vertex_buffer: 0.924192 + vertex_buffer: -6.954154 + vertex_buffer: -2.439843 + vertex_buffer: -0.131163 + vertex_buffer: 0.114210 + vertex_buffer: 0.615022 + vertex_buffer: -1.098819 + vertex_buffer: -4.458788 + vertex_buffer: 5.120727 + vertex_buffer: 0.448662 + vertex_buffer: 0.695278 + vertex_buffer: -1.181124 + vertex_buffer: -4.579996 + vertex_buffer: 5.189564 + vertex_buffer: 0.448020 + vertex_buffer: 0.704632 + vertex_buffer: -1.255818 + vertex_buffer: -4.787901 + vertex_buffer: 5.237051 + vertex_buffer: 0.447112 + vertex_buffer: 0.715808 + vertex_buffer: -1.325085 + vertex_buffer: -5.106507 + vertex_buffer: 5.205010 + vertex_buffer: 0.444832 + vertex_buffer: 0.730794 + vertex_buffer: -1.546388 + vertex_buffer: -5.819392 + vertex_buffer: 4.757893 + vertex_buffer: 0.430012 + vertex_buffer: 0.766809 + vertex_buffer: -1.953754 + vertex_buffer: -4.183892 + vertex_buffer: 4.431713 + vertex_buffer: 0.406787 + vertex_buffer: 0.685673 + vertex_buffer: -2.117802 + vertex_buffer: -4.137093 + vertex_buffer: 4.555096 + vertex_buffer: 0.400738 + vertex_buffer: 0.681069 + vertex_buffer: -2.285339 + vertex_buffer: -4.051196 + vertex_buffer: 4.582438 + vertex_buffer: 0.392400 + vertex_buffer: 0.677703 + vertex_buffer: -2.850160 + vertex_buffer: -3.665720 + vertex_buffer: 4.484994 + vertex_buffer: 0.367856 + vertex_buffer: 0.663919 + vertex_buffer: -5.278538 + vertex_buffer: -2.238942 + vertex_buffer: 2.861224 + vertex_buffer: 0.247923 + vertex_buffer: 0.601333 + vertex_buffer: -0.946709 + vertex_buffer: 1.907628 + vertex_buffer: 5.196779 + vertex_buffer: 0.452770 + vertex_buffer: 0.420850 + vertex_buffer: -1.314173 + vertex_buffer: 3.104912 + vertex_buffer: 4.231404 + vertex_buffer: 0.436392 + vertex_buffer: 0.359887 + vertex_buffer: -1.780000 + vertex_buffer: 2.860000 + vertex_buffer: 3.881555 + vertex_buffer: 0.416164 + vertex_buffer: 0.368714 + vertex_buffer: -1.845110 + vertex_buffer: -4.098880 + vertex_buffer: 4.247264 + vertex_buffer: 0.413386 + vertex_buffer: 0.692366 + vertex_buffer: -5.436187 + vertex_buffer: -4.030482 + vertex_buffer: 2.109852 + vertex_buffer: 0.228018 + vertex_buffer: 0.683572 + vertex_buffer: -0.766444 + vertex_buffer: 3.182131 + vertex_buffer: 4.861453 + vertex_buffer: 0.468268 + vertex_buffer: 0.352671 + vertex_buffer: -1.938616 + vertex_buffer: -6.614410 + vertex_buffer: 4.521085 + vertex_buffer: 0.411362 + vertex_buffer: 0.804327 + vertex_buffer: 0.000000 + vertex_buffer: 1.059413 + vertex_buffer: 6.774605 + vertex_buffer: 0.499989 + vertex_buffer: 0.469825 + vertex_buffer: -0.516573 + vertex_buffer: 1.583572 + vertex_buffer: 6.148363 + vertex_buffer: 0.479154 + vertex_buffer: 0.442654 + vertex_buffer: 0.000000 + vertex_buffer: 1.728369 + vertex_buffer: 6.316750 + vertex_buffer: 0.499974 + vertex_buffer: 0.439637 + vertex_buffer: -1.246815 + vertex_buffer: 0.230297 + vertex_buffer: 5.681036 + vertex_buffer: 0.432112 + vertex_buffer: 0.493589 + vertex_buffer: 0.000000 + vertex_buffer: -7.942194 + vertex_buffer: 5.181173 + vertex_buffer: 0.499886 + vertex_buffer: 0.866917 + vertex_buffer: 0.000000 + vertex_buffer: -6.991499 + vertex_buffer: 5.153478 + vertex_buffer: 0.499913 + vertex_buffer: 0.821729 + vertex_buffer: -0.997827 + vertex_buffer: -6.930921 + vertex_buffer: 4.979576 + vertex_buffer: 0.456549 + vertex_buffer: 0.819201 + vertex_buffer: -3.288807 + vertex_buffer: -5.382514 + vertex_buffer: 3.795752 + vertex_buffer: 0.344549 + vertex_buffer: 0.745439 + vertex_buffer: -2.311631 + vertex_buffer: -1.566237 + vertex_buffer: 4.590085 + vertex_buffer: 0.378909 + vertex_buffer: 0.574010 + vertex_buffer: -2.680250 + vertex_buffer: -6.111567 + vertex_buffer: 4.096152 + vertex_buffer: 0.374293 + vertex_buffer: 0.780185 + vertex_buffer: -3.832928 + vertex_buffer: -1.537326 + vertex_buffer: 4.137731 + vertex_buffer: 0.319688 + vertex_buffer: 0.570738 + vertex_buffer: -2.961860 + vertex_buffer: -2.274215 + vertex_buffer: 4.440943 + vertex_buffer: 0.357155 + vertex_buffer: 0.604270 + vertex_buffer: -4.386901 + vertex_buffer: -2.683286 + vertex_buffer: 3.643886 + vertex_buffer: 0.295284 + vertex_buffer: 0.621581 + vertex_buffer: -1.217295 + vertex_buffer: -7.834465 + vertex_buffer: 4.969286 + vertex_buffer: 0.447750 + vertex_buffer: 0.862477 + vertex_buffer: -1.542374 + vertex_buffer: -0.136843 + vertex_buffer: 5.201008 + vertex_buffer: 0.410986 + vertex_buffer: 0.508723 + vertex_buffer: -3.878377 + vertex_buffer: -6.041764 + vertex_buffer: 3.311079 + vertex_buffer: 0.313951 + vertex_buffer: 0.775308 + vertex_buffer: -3.084037 + vertex_buffer: -6.809842 + vertex_buffer: 3.814195 + vertex_buffer: 0.354128 + vertex_buffer: 0.812553 + vertex_buffer: -3.747321 + vertex_buffer: -4.503545 + vertex_buffer: 3.726453 + vertex_buffer: 0.324548 + vertex_buffer: 0.703993 + vertex_buffer: -6.094129 + vertex_buffer: -3.205991 + vertex_buffer: 1.473482 + vertex_buffer: 0.189096 + vertex_buffer: 0.646300 + vertex_buffer: -4.588995 + vertex_buffer: -4.728726 + vertex_buffer: 2.983221 + vertex_buffer: 0.279777 + vertex_buffer: 0.714658 + vertex_buffer: -6.583231 + vertex_buffer: -3.941269 + vertex_buffer: 0.070268 + vertex_buffer: 0.133823 + vertex_buffer: 0.682701 + vertex_buffer: -3.492580 + vertex_buffer: -3.195820 + vertex_buffer: 4.130198 + vertex_buffer: 0.336768 + vertex_buffer: 0.644733 + vertex_buffer: -1.255543 + vertex_buffer: 0.802341 + vertex_buffer: 5.307551 + vertex_buffer: 0.429884 + vertex_buffer: 0.466522 + vertex_buffer: -1.126122 + vertex_buffer: -0.933602 + vertex_buffer: 6.538785 + vertex_buffer: 0.455528 + vertex_buffer: 0.548623 + vertex_buffer: -1.443109 + vertex_buffer: -1.142774 + vertex_buffer: 5.905127 + vertex_buffer: 0.437114 + vertex_buffer: 0.558896 + vertex_buffer: -0.923043 + vertex_buffer: -0.529042 + vertex_buffer: 7.003423 + vertex_buffer: 0.467288 + vertex_buffer: 0.529925 + vertex_buffer: -1.755386 + vertex_buffer: 3.529117 + vertex_buffer: 4.327696 + vertex_buffer: 0.414712 + vertex_buffer: 0.335220 + vertex_buffer: -2.632589 + vertex_buffer: 3.713828 + vertex_buffer: 4.364629 + vertex_buffer: 0.377046 + vertex_buffer: 0.322778 + vertex_buffer: -3.388062 + vertex_buffer: 3.721976 + vertex_buffer: 4.309028 + vertex_buffer: 0.344108 + vertex_buffer: 0.320151 + vertex_buffer: -4.075766 + vertex_buffer: 3.675413 + vertex_buffer: 4.076063 + vertex_buffer: 0.312876 + vertex_buffer: 0.322332 + vertex_buffer: -4.622910 + vertex_buffer: 3.474691 + vertex_buffer: 3.646321 + vertex_buffer: 0.283526 + vertex_buffer: 0.333190 + vertex_buffer: -5.171755 + vertex_buffer: 2.535753 + vertex_buffer: 2.670867 + vertex_buffer: 0.241246 + vertex_buffer: 0.382786 + vertex_buffer: -7.297331 + vertex_buffer: 0.763172 + vertex_buffer: -0.048769 + vertex_buffer: 0.102986 + vertex_buffer: 0.468763 + vertex_buffer: -4.706828 + vertex_buffer: 1.651000 + vertex_buffer: 3.109532 + vertex_buffer: 0.267612 + vertex_buffer: 0.424560 + vertex_buffer: -4.071712 + vertex_buffer: 1.476821 + vertex_buffer: 3.476944 + vertex_buffer: 0.297879 + vertex_buffer: 0.433176 + vertex_buffer: -3.269817 + vertex_buffer: 1.470659 + vertex_buffer: 3.731945 + vertex_buffer: 0.333434 + vertex_buffer: 0.433878 + vertex_buffer: -2.527572 + vertex_buffer: 1.617311 + vertex_buffer: 3.865444 + vertex_buffer: 0.366427 + vertex_buffer: 0.426116 + vertex_buffer: -1.970894 + vertex_buffer: 1.858505 + vertex_buffer: 3.961782 + vertex_buffer: 0.396012 + vertex_buffer: 0.416696 + vertex_buffer: -1.579543 + vertex_buffer: 2.097941 + vertex_buffer: 4.084996 + vertex_buffer: 0.420121 + vertex_buffer: 0.410228 + vertex_buffer: -7.664182 + vertex_buffer: 0.673132 + vertex_buffer: -2.435867 + vertex_buffer: 0.007561 + vertex_buffer: 0.480777 + vertex_buffer: -1.397041 + vertex_buffer: -1.340139 + vertex_buffer: 5.630378 + vertex_buffer: 0.432949 + vertex_buffer: 0.569518 + vertex_buffer: -0.884838 + vertex_buffer: 0.658740 + vertex_buffer: 6.233232 + vertex_buffer: 0.458639 + vertex_buffer: 0.479089 + vertex_buffer: -0.767097 + vertex_buffer: -0.968035 + vertex_buffer: 7.077932 + vertex_buffer: 0.473466 + vertex_buffer: 0.545744 + vertex_buffer: -0.460213 + vertex_buffer: -1.334106 + vertex_buffer: 6.787447 + vertex_buffer: 0.476088 + vertex_buffer: 0.563830 + vertex_buffer: -0.748618 + vertex_buffer: -1.067994 + vertex_buffer: 6.798303 + vertex_buffer: 0.468472 + vertex_buffer: 0.555057 + vertex_buffer: -1.236408 + vertex_buffer: -1.585568 + vertex_buffer: 5.480490 + vertex_buffer: 0.433991 + vertex_buffer: 0.582362 + vertex_buffer: -0.387306 + vertex_buffer: -1.409990 + vertex_buffer: 6.957705 + vertex_buffer: 0.483518 + vertex_buffer: 0.562984 + vertex_buffer: -0.319925 + vertex_buffer: -1.607931 + vertex_buffer: 6.508676 + vertex_buffer: 0.482483 + vertex_buffer: 0.577849 + vertex_buffer: -1.639633 + vertex_buffer: 2.556298 + vertex_buffer: 3.863736 + vertex_buffer: 0.426450 + vertex_buffer: 0.389799 + vertex_buffer: -1.255645 + vertex_buffer: 2.467144 + vertex_buffer: 4.203800 + vertex_buffer: 0.438999 + vertex_buffer: 0.396495 + vertex_buffer: -1.031362 + vertex_buffer: 2.382663 + vertex_buffer: 4.615849 + vertex_buffer: 0.450067 + vertex_buffer: 0.400434 + vertex_buffer: -4.253081 + vertex_buffer: 2.772296 + vertex_buffer: 3.315305 + vertex_buffer: 0.289712 + vertex_buffer: 0.368253 + vertex_buffer: -4.530000 + vertex_buffer: 2.910000 + vertex_buffer: 3.339685 + vertex_buffer: 0.276670 + vertex_buffer: 0.363373 + vertex_buffer: 0.463928 + vertex_buffer: 0.955357 + vertex_buffer: 6.633583 + vertex_buffer: 0.517862 + vertex_buffer: 0.471948 + vertex_buffer: 4.253081 + vertex_buffer: 2.577646 + vertex_buffer: 3.279702 + vertex_buffer: 0.710288 + vertex_buffer: 0.380764 + vertex_buffer: 0.416106 + vertex_buffer: -1.466449 + vertex_buffer: 6.447657 + vertex_buffer: 0.526227 + vertex_buffer: 0.573910 + vertex_buffer: 7.087960 + vertex_buffer: 5.434801 + vertex_buffer: 0.099620 + vertex_buffer: 0.895093 + vertex_buffer: 0.254141 + vertex_buffer: 2.628639 + vertex_buffer: 2.035898 + vertex_buffer: 3.848121 + vertex_buffer: 0.634070 + vertex_buffer: 0.409576 + vertex_buffer: 3.198363 + vertex_buffer: 1.985815 + vertex_buffer: 3.796952 + vertex_buffer: 0.661242 + vertex_buffer: 0.413025 + vertex_buffer: 3.775151 + vertex_buffer: 2.039402 + vertex_buffer: 3.646194 + vertex_buffer: 0.688880 + vertex_buffer: 0.409460 + vertex_buffer: 4.465819 + vertex_buffer: 2.422950 + vertex_buffer: 3.155168 + vertex_buffer: 0.725342 + vertex_buffer: 0.389131 + vertex_buffer: 2.164289 + vertex_buffer: 2.189867 + vertex_buffer: 3.851822 + vertex_buffer: 0.606630 + vertex_buffer: 0.403705 + vertex_buffer: 3.208229 + vertex_buffer: 3.223926 + vertex_buffer: 4.115822 + vertex_buffer: 0.654766 + vertex_buffer: 0.344011 + vertex_buffer: 2.673803 + vertex_buffer: 3.205337 + vertex_buffer: 4.092203 + vertex_buffer: 0.629906 + vertex_buffer: 0.346076 + vertex_buffer: 3.745193 + vertex_buffer: 3.165286 + vertex_buffer: 3.972409 + vertex_buffer: 0.680678 + vertex_buffer: 0.347265 + vertex_buffer: 4.161018 + vertex_buffer: 3.059069 + vertex_buffer: 3.719554 + vertex_buffer: 0.702097 + vertex_buffer: 0.353591 + vertex_buffer: 5.062006 + vertex_buffer: 1.934418 + vertex_buffer: 2.776093 + vertex_buffer: 0.752212 + vertex_buffer: 0.410805 + vertex_buffer: 2.266659 + vertex_buffer: -7.425768 + vertex_buffer: 4.389812 + vertex_buffer: 0.602918 + vertex_buffer: 0.842863 + vertex_buffer: 4.445859 + vertex_buffer: 2.663991 + vertex_buffer: 3.173422 + vertex_buffer: 0.719902 + vertex_buffer: 0.375600 + vertex_buffer: 7.214530 + vertex_buffer: 2.263009 + vertex_buffer: 0.073150 + vertex_buffer: 0.893693 + vertex_buffer: 0.399960 + vertex_buffer: 5.799793 + vertex_buffer: 2.349546 + vertex_buffer: 2.204059 + vertex_buffer: 0.790082 + vertex_buffer: 0.391354 + vertex_buffer: 2.844939 + vertex_buffer: -0.720868 + vertex_buffer: 4.433130 + vertex_buffer: 0.643998 + vertex_buffer: 0.534488 + vertex_buffer: 0.711452 + vertex_buffer: -3.329355 + vertex_buffer: 5.877044 + vertex_buffer: 0.528249 + vertex_buffer: 0.650404 + vertex_buffer: 0.606033 + vertex_buffer: -3.924562 + vertex_buffer: 5.444923 + vertex_buffer: 0.525850 + vertex_buffer: 0.680191 + vertex_buffer: 1.431615 + vertex_buffer: -3.500953 + vertex_buffer: 5.496189 + vertex_buffer: 0.560215 + vertex_buffer: 0.657229 + vertex_buffer: 1.914910 + vertex_buffer: -3.803146 + vertex_buffer: 5.028930 + vertex_buffer: 0.585384 + vertex_buffer: 0.666541 + vertex_buffer: 1.131043 + vertex_buffer: -3.973937 + vertex_buffer: 5.189648 + vertex_buffer: 0.549626 + vertex_buffer: 0.680861 + vertex_buffer: 1.563548 + vertex_buffer: -4.082763 + vertex_buffer: 4.842263 + vertex_buffer: 0.571228 + vertex_buffer: 0.682692 + vertex_buffer: 2.650112 + vertex_buffer: -5.003649 + vertex_buffer: 4.188483 + vertex_buffer: 0.624852 + vertex_buffer: 0.728099 + vertex_buffer: 0.427049 + vertex_buffer: -1.094134 + vertex_buffer: 7.360529 + vertex_buffer: 0.513050 + vertex_buffer: 0.547282 + vertex_buffer: 0.496396 + vertex_buffer: -0.475659 + vertex_buffer: 7.440358 + vertex_buffer: 0.515097 + vertex_buffer: 0.527252 + vertex_buffer: 5.253307 + vertex_buffer: 3.881582 + vertex_buffer: 3.363159 + vertex_buffer: 0.742247 + vertex_buffer: 0.314507 + vertex_buffer: 1.718698 + vertex_buffer: 0.974609 + vertex_buffer: 4.558359 + vertex_buffer: 0.598631 + vertex_buffer: 0.454979 + vertex_buffer: 1.608635 + vertex_buffer: -0.942516 + vertex_buffer: 5.814193 + vertex_buffer: 0.570338 + vertex_buffer: 0.548575 + vertex_buffer: 1.651267 + vertex_buffer: -0.610868 + vertex_buffer: 5.581319 + vertex_buffer: 0.578632 + vertex_buffer: 0.533623 + vertex_buffer: 4.765501 + vertex_buffer: -0.701554 + vertex_buffer: 3.534632 + vertex_buffer: 0.723087 + vertex_buffer: 0.532054 + vertex_buffer: 0.478306 + vertex_buffer: 0.295766 + vertex_buffer: 7.101013 + vertex_buffer: 0.516446 + vertex_buffer: 0.499639 + vertex_buffer: 3.734964 + vertex_buffer: 4.508230 + vertex_buffer: 4.550454 + vertex_buffer: 0.662801 + vertex_buffer: 0.282918 + vertex_buffer: 4.588603 + vertex_buffer: 4.302037 + vertex_buffer: 4.048484 + vertex_buffer: 0.703624 + vertex_buffer: 0.293271 + vertex_buffer: 6.279331 + vertex_buffer: 6.615427 + vertex_buffer: 1.425850 + vertex_buffer: 0.830705 + vertex_buffer: 0.193814 + vertex_buffer: 1.220941 + vertex_buffer: 4.142165 + vertex_buffer: 5.106035 + vertex_buffer: 0.552386 + vertex_buffer: 0.302568 + vertex_buffer: 2.193489 + vertex_buffer: 3.100317 + vertex_buffer: 4.000575 + vertex_buffer: 0.607610 + vertex_buffer: 0.353888 + vertex_buffer: 3.102642 + vertex_buffer: -4.352984 + vertex_buffer: 4.095905 + vertex_buffer: 0.645429 + vertex_buffer: 0.696707 + vertex_buffer: 6.719682 + vertex_buffer: -4.788645 + vertex_buffer: -1.745401 + vertex_buffer: 0.932695 + vertex_buffer: 0.730105 + vertex_buffer: 1.193824 + vertex_buffer: -1.306795 + vertex_buffer: 5.737747 + vertex_buffer: 0.557261 + vertex_buffer: 0.572826 + vertex_buffer: 0.729766 + vertex_buffer: -1.593712 + vertex_buffer: 5.833208 + vertex_buffer: 0.542902 + vertex_buffer: 0.584792 + vertex_buffer: 2.456206 + vertex_buffer: -4.342621 + vertex_buffer: 4.283884 + vertex_buffer: 0.618026 + vertex_buffer: 0.694711 + vertex_buffer: 2.204823 + vertex_buffer: -4.304508 + vertex_buffer: 4.162499 + vertex_buffer: 0.607591 + vertex_buffer: 0.694203 + vertex_buffer: 4.985894 + vertex_buffer: 4.802461 + vertex_buffer: 3.751977 + vertex_buffer: 0.722943 + vertex_buffer: 0.271963 + vertex_buffer: 1.592294 + vertex_buffer: -1.257709 + vertex_buffer: 5.456949 + vertex_buffer: 0.577414 + vertex_buffer: 0.563167 + vertex_buffer: 2.644548 + vertex_buffer: 4.524654 + vertex_buffer: 4.921559 + vertex_buffer: 0.614083 + vertex_buffer: 0.281387 + vertex_buffer: 2.760292 + vertex_buffer: 5.100971 + vertex_buffer: 5.015990 + vertex_buffer: 0.616907 + vertex_buffer: 0.255886 + vertex_buffer: 3.523964 + vertex_buffer: 8.005976 + vertex_buffer: 3.729163 + vertex_buffer: 0.668509 + vertex_buffer: 0.119914 + vertex_buffer: 5.599763 + vertex_buffer: 5.715470 + vertex_buffer: 2.724259 + vertex_buffer: 0.770092 + vertex_buffer: 0.232021 + vertex_buffer: 3.063932 + vertex_buffer: 6.566144 + vertex_buffer: 4.529981 + vertex_buffer: 0.635536 + vertex_buffer: 0.189249 + vertex_buffer: 5.720968 + vertex_buffer: 4.254584 + vertex_buffer: 2.830852 + vertex_buffer: 0.770391 + vertex_buffer: 0.299556 + vertex_buffer: 6.374393 + vertex_buffer: 4.785590 + vertex_buffer: 1.591691 + vertex_buffer: 0.826722 + vertex_buffer: 0.278755 + vertex_buffer: 0.672728 + vertex_buffer: -3.688016 + vertex_buffer: 5.737804 + vertex_buffer: 0.527121 + vertex_buffer: 0.666198 + vertex_buffer: 1.262560 + vertex_buffer: -3.787691 + vertex_buffer: 5.417779 + vertex_buffer: 0.553172 + vertex_buffer: 0.668527 + vertex_buffer: 1.732553 + vertex_buffer: -3.952767 + vertex_buffer: 5.000579 + vertex_buffer: 0.577238 + vertex_buffer: 0.673890 + vertex_buffer: 1.043625 + vertex_buffer: -1.464973 + vertex_buffer: 5.662455 + vertex_buffer: 0.554692 + vertex_buffer: 0.580066 + vertex_buffer: 2.321234 + vertex_buffer: -4.329069 + vertex_buffer: 4.258156 + vertex_buffer: 0.611897 + vertex_buffer: 0.693961 + vertex_buffer: 2.056846 + vertex_buffer: -4.477671 + vertex_buffer: 4.520883 + vertex_buffer: 0.596961 + vertex_buffer: 0.706540 + vertex_buffer: 2.153084 + vertex_buffer: -4.276322 + vertex_buffer: 4.038093 + vertex_buffer: 0.596371 + vertex_buffer: 0.693953 + vertex_buffer: 0.946874 + vertex_buffer: -1.035249 + vertex_buffer: 6.512274 + vertex_buffer: 0.539958 + vertex_buffer: 0.557139 + vertex_buffer: 1.469132 + vertex_buffer: -4.036351 + vertex_buffer: 4.604908 + vertex_buffer: 0.568842 + vertex_buffer: 0.692366 + vertex_buffer: 1.024340 + vertex_buffer: -3.989851 + vertex_buffer: 4.926693 + vertex_buffer: 0.547818 + vertex_buffer: 0.692366 + vertex_buffer: 0.533422 + vertex_buffer: -3.993222 + vertex_buffer: 5.138202 + vertex_buffer: 0.524613 + vertex_buffer: 0.692366 + vertex_buffer: 0.769720 + vertex_buffer: -6.095394 + vertex_buffer: 4.985883 + vertex_buffer: 0.534090 + vertex_buffer: 0.779141 + vertex_buffer: 0.699606 + vertex_buffer: -5.291850 + vertex_buffer: 5.448304 + vertex_buffer: 0.527671 + vertex_buffer: 0.736226 + vertex_buffer: 0.669687 + vertex_buffer: -4.949770 + vertex_buffer: 5.509612 + vertex_buffer: 0.526913 + vertex_buffer: 0.717857 + vertex_buffer: 0.630947 + vertex_buffer: -4.695101 + vertex_buffer: 5.449371 + vertex_buffer: 0.526878 + vertex_buffer: 0.704626 + vertex_buffer: 0.583218 + vertex_buffer: -4.517982 + vertex_buffer: 5.339869 + vertex_buffer: 0.526967 + vertex_buffer: 0.695278 + vertex_buffer: 1.537170 + vertex_buffer: -4.423206 + vertex_buffer: 4.745470 + vertex_buffer: 0.572058 + vertex_buffer: 0.695278 + vertex_buffer: 1.615600 + vertex_buffer: -4.475942 + vertex_buffer: 4.813632 + vertex_buffer: 0.573521 + vertex_buffer: 0.703540 + vertex_buffer: 1.729053 + vertex_buffer: -4.618680 + vertex_buffer: 4.854463 + vertex_buffer: 0.576838 + vertex_buffer: 0.711846 + vertex_buffer: 1.838624 + vertex_buffer: -4.828746 + vertex_buffer: 4.823737 + vertex_buffer: 0.581691 + vertex_buffer: 0.720063 + vertex_buffer: 2.368250 + vertex_buffer: -3.106237 + vertex_buffer: 4.868096 + vertex_buffer: 0.609945 + vertex_buffer: 0.639910 + vertex_buffer: 7.542244 + vertex_buffer: -1.049282 + vertex_buffer: -2.431321 + vertex_buffer: 0.986046 + vertex_buffer: 0.560034 + vertex_buffer: 1.826614 + vertex_buffer: -4.399531 + vertex_buffer: 4.399021 + vertex_buffer: 0.586800 + vertex_buffer: 0.695400 + vertex_buffer: 1.929558 + vertex_buffer: -4.411831 + vertex_buffer: 4.497052 + vertex_buffer: 0.590372 + vertex_buffer: 0.701823 + vertex_buffer: 0.597442 + vertex_buffer: -2.013686 + vertex_buffer: 5.866456 + vertex_buffer: 0.531915 + vertex_buffer: 0.601537 + vertex_buffer: 1.405627 + vertex_buffer: -1.714196 + vertex_buffer: 5.241087 + vertex_buffer: 0.577268 + vertex_buffer: 0.585935 + vertex_buffer: 0.662449 + vertex_buffer: -1.819321 + vertex_buffer: 5.863759 + vertex_buffer: 0.536915 + vertex_buffer: 0.593786 + vertex_buffer: 2.342340 + vertex_buffer: 0.572222 + vertex_buffer: 4.294303 + vertex_buffer: 0.627543 + vertex_buffer: 0.473352 + vertex_buffer: 3.327324 + vertex_buffer: 0.104863 + vertex_buffer: 4.113860 + vertex_buffer: 0.665586 + vertex_buffer: 0.495951 + vertex_buffer: 1.726175 + vertex_buffer: -0.919165 + vertex_buffer: 5.273355 + vertex_buffer: 0.588354 + vertex_buffer: 0.546862 + vertex_buffer: 5.133204 + vertex_buffer: 7.485602 + vertex_buffer: 2.660442 + vertex_buffer: 0.757824 + vertex_buffer: 0.147676 + vertex_buffer: 4.538641 + vertex_buffer: 6.319907 + vertex_buffer: 3.683424 + vertex_buffer: 0.709250 + vertex_buffer: 0.201508 + vertex_buffer: 3.986562 + vertex_buffer: 5.109487 + vertex_buffer: 4.466315 + vertex_buffer: 0.672684 + vertex_buffer: 0.256581 + vertex_buffer: 2.169681 + vertex_buffer: -5.440433 + vertex_buffer: 4.455874 + vertex_buffer: 0.600409 + vertex_buffer: 0.749005 + vertex_buffer: 1.395634 + vertex_buffer: 5.011963 + vertex_buffer: 5.316032 + vertex_buffer: 0.558266 + vertex_buffer: 0.261672 + vertex_buffer: 1.619500 + vertex_buffer: 6.599217 + vertex_buffer: 4.921106 + vertex_buffer: 0.570304 + vertex_buffer: 0.187871 + vertex_buffer: 1.891399 + vertex_buffer: 8.236377 + vertex_buffer: 4.274997 + vertex_buffer: 0.588166 + vertex_buffer: 0.109044 + vertex_buffer: 4.195832 + vertex_buffer: 2.235205 + vertex_buffer: 3.375099 + vertex_buffer: 0.711045 + vertex_buffer: 0.398952 + vertex_buffer: 5.733342 + vertex_buffer: 1.411738 + vertex_buffer: 2.431726 + vertex_buffer: 0.781070 + vertex_buffer: 0.435405 + vertex_buffer: 1.859887 + vertex_buffer: 2.355757 + vertex_buffer: 3.843181 + vertex_buffer: 0.587247 + vertex_buffer: 0.398932 + vertex_buffer: 4.988612 + vertex_buffer: 3.074654 + vertex_buffer: 3.083858 + vertex_buffer: 0.742870 + vertex_buffer: 0.355446 + vertex_buffer: 1.303263 + vertex_buffer: 1.416453 + vertex_buffer: 4.831091 + vertex_buffer: 0.572156 + vertex_buffer: 0.437652 + vertex_buffer: 1.305757 + vertex_buffer: -0.672779 + vertex_buffer: 6.415959 + vertex_buffer: 0.551868 + vertex_buffer: 0.536570 + vertex_buffer: 6.465170 + vertex_buffer: 0.937119 + vertex_buffer: 1.689873 + vertex_buffer: 0.821442 + vertex_buffer: 0.457556 + vertex_buffer: 5.258659 + vertex_buffer: 0.945811 + vertex_buffer: 2.974312 + vertex_buffer: 0.752702 + vertex_buffer: 0.457182 + vertex_buffer: 4.432338 + vertex_buffer: 0.722096 + vertex_buffer: 3.522615 + vertex_buffer: 0.713757 + vertex_buffer: 0.467627 + vertex_buffer: 3.300681 + vertex_buffer: 0.861641 + vertex_buffer: 3.872784 + vertex_buffer: 0.667113 + vertex_buffer: 0.460673 + vertex_buffer: 2.430178 + vertex_buffer: 1.131492 + vertex_buffer: 4.039035 + vertex_buffer: 0.631101 + vertex_buffer: 0.447154 + vertex_buffer: 1.820731 + vertex_buffer: 1.467954 + vertex_buffer: 4.224124 + vertex_buffer: 0.600862 + vertex_buffer: 0.432473 + vertex_buffer: 0.563221 + vertex_buffer: 2.307693 + vertex_buffer: 5.566789 + vertex_buffer: 0.523481 + vertex_buffer: 0.405627 + vertex_buffer: 6.338145 + vertex_buffer: -0.529279 + vertex_buffer: 1.881175 + vertex_buffer: 0.810748 + vertex_buffer: 0.523926 + vertex_buffer: 5.587698 + vertex_buffer: 3.208071 + vertex_buffer: 2.687839 + vertex_buffer: 0.771046 + vertex_buffer: 0.348959 + vertex_buffer: 0.242624 + vertex_buffer: -1.462857 + vertex_buffer: 7.071491 + vertex_buffer: 0.509127 + vertex_buffer: 0.562718 + vertex_buffer: 1.611251 + vertex_buffer: 0.339326 + vertex_buffer: 4.895421 + vertex_buffer: 0.595293 + vertex_buffer: 0.485024 + vertex_buffer: 7.743095 + vertex_buffer: 2.364999 + vertex_buffer: -2.005167 + vertex_buffer: 0.980531 + vertex_buffer: 0.401564 + vertex_buffer: 1.391142 + vertex_buffer: 1.851048 + vertex_buffer: 4.448999 + vertex_buffer: 0.573500 + vertex_buffer: 0.420000 + vertex_buffer: 1.785794 + vertex_buffer: -0.978284 + vertex_buffer: 4.850470 + vertex_buffer: 0.602995 + vertex_buffer: 0.548688 + vertex_buffer: 4.670959 + vertex_buffer: 2.664461 + vertex_buffer: 3.084075 + vertex_buffer: 0.733530 + vertex_buffer: 0.376977 + vertex_buffer: 1.333970 + vertex_buffer: -0.283761 + vertex_buffer: 6.097047 + vertex_buffer: 0.560611 + vertex_buffer: 0.519017 + vertex_buffer: 7.270895 + vertex_buffer: -2.890917 + vertex_buffer: -2.252455 + vertex_buffer: 0.967686 + vertex_buffer: 0.644357 + vertex_buffer: 1.856432 + vertex_buffer: 2.585245 + vertex_buffer: 3.757904 + vertex_buffer: 0.580985 + vertex_buffer: 0.387160 + vertex_buffer: 0.923388 + vertex_buffer: 0.073076 + vertex_buffer: 6.671944 + vertex_buffer: 0.537728 + vertex_buffer: 0.505385 + vertex_buffer: 5.000589 + vertex_buffer: -6.135128 + vertex_buffer: 1.892523 + vertex_buffer: 0.760966 + vertex_buffer: 0.779753 + vertex_buffer: 5.085276 + vertex_buffer: -7.178590 + vertex_buffer: 0.714711 + vertex_buffer: 0.801779 + vertex_buffer: 0.831938 + vertex_buffer: 7.159291 + vertex_buffer: -0.811820 + vertex_buffer: -0.072044 + vertex_buffer: 0.892441 + vertex_buffer: 0.540761 + vertex_buffer: 5.843051 + vertex_buffer: -5.248023 + vertex_buffer: 0.924091 + vertex_buffer: 0.816351 + vertex_buffer: 0.740260 + vertex_buffer: 6.847258 + vertex_buffer: 3.662916 + vertex_buffer: 0.724695 + vertex_buffer: 0.865595 + vertex_buffer: 0.333687 + vertex_buffer: 2.412942 + vertex_buffer: -8.258853 + vertex_buffer: 4.119213 + vertex_buffer: 0.614074 + vertex_buffer: 0.883246 + vertex_buffer: 0.179909 + vertex_buffer: -1.689864 + vertex_buffer: 6.573301 + vertex_buffer: 0.508953 + vertex_buffer: 0.579438 + vertex_buffer: 2.103655 + vertex_buffer: -0.163946 + vertex_buffer: 4.566119 + vertex_buffer: 0.617942 + vertex_buffer: 0.508316 + vertex_buffer: 6.407571 + vertex_buffer: 2.236021 + vertex_buffer: 1.560843 + vertex_buffer: 0.825608 + vertex_buffer: 0.397675 + vertex_buffer: 3.670075 + vertex_buffer: 2.360153 + vertex_buffer: 3.635230 + vertex_buffer: 0.681215 + vertex_buffer: 0.396235 + vertex_buffer: 3.177186 + vertex_buffer: 2.294265 + vertex_buffer: 3.775704 + vertex_buffer: 0.656636 + vertex_buffer: 0.400597 + vertex_buffer: 2.196121 + vertex_buffer: -4.598322 + vertex_buffer: 4.479786 + vertex_buffer: 0.603900 + vertex_buffer: 0.710217 + vertex_buffer: 6.234883 + vertex_buffer: -1.944430 + vertex_buffer: 1.663542 + vertex_buffer: 0.812086 + vertex_buffer: 0.588539 + vertex_buffer: 1.292924 + vertex_buffer: -9.295920 + vertex_buffer: 4.094063 + vertex_buffer: 0.568013 + vertex_buffer: 0.944565 + vertex_buffer: 3.210651 + vertex_buffer: -8.533278 + vertex_buffer: 2.802001 + vertex_buffer: 0.681008 + vertex_buffer: 0.898285 + vertex_buffer: 4.068926 + vertex_buffer: -7.993109 + vertex_buffer: 1.925119 + vertex_buffer: 0.733752 + vertex_buffer: 0.869701 + vertex_buffer: 2.724032 + vertex_buffer: 2.315802 + vertex_buffer: 3.777151 + vertex_buffer: 0.633830 + vertex_buffer: 0.398822 + vertex_buffer: 2.288460 + vertex_buffer: 2.398891 + vertex_buffer: 3.697603 + vertex_buffer: 0.606793 + vertex_buffer: 0.395537 + vertex_buffer: 1.998311 + vertex_buffer: 2.496547 + vertex_buffer: 3.689148 + vertex_buffer: 0.589660 + vertex_buffer: 0.391062 + vertex_buffer: 6.130040 + vertex_buffer: 3.399261 + vertex_buffer: 2.038516 + vertex_buffer: 0.805016 + vertex_buffer: 0.342108 + vertex_buffer: 2.288460 + vertex_buffer: 2.886504 + vertex_buffer: 3.775031 + vertex_buffer: 0.611335 + vertex_buffer: 0.362284 + vertex_buffer: 2.724032 + vertex_buffer: 2.961810 + vertex_buffer: 3.871767 + vertex_buffer: 0.634038 + vertex_buffer: 0.355971 + vertex_buffer: 3.177186 + vertex_buffer: 2.964136 + vertex_buffer: 3.876973 + vertex_buffer: 0.656636 + vertex_buffer: 0.355357 + vertex_buffer: 3.670075 + vertex_buffer: 2.927714 + vertex_buffer: 3.724325 + vertex_buffer: 0.681215 + vertex_buffer: 0.358340 + vertex_buffer: 4.018389 + vertex_buffer: 2.857357 + vertex_buffer: 3.482983 + vertex_buffer: 0.698585 + vertex_buffer: 0.363156 + vertex_buffer: 7.555811 + vertex_buffer: 4.106811 + vertex_buffer: -0.991917 + vertex_buffer: 0.941867 + vertex_buffer: 0.319076 + vertex_buffer: 4.018389 + vertex_buffer: 2.483695 + vertex_buffer: 3.440898 + vertex_buffer: 0.698585 + vertex_buffer: 0.387449 + vertex_buffer: 1.776217 + vertex_buffer: -2.683946 + vertex_buffer: 5.213116 + vertex_buffer: 0.584177 + vertex_buffer: 0.624107 + vertex_buffer: 1.222237 + vertex_buffer: -1.182444 + vertex_buffer: 5.952465 + vertex_buffer: 0.554318 + vertex_buffer: 0.566077 + vertex_buffer: 0.731493 + vertex_buffer: -2.536683 + vertex_buffer: 5.815343 + vertex_buffer: 0.534154 + vertex_buffer: 0.620640 + vertex_buffer: 4.135272 + vertex_buffer: -6.996638 + vertex_buffer: 2.671970 + vertex_buffer: 0.711218 + vertex_buffer: 0.819975 + vertex_buffer: 3.311811 + vertex_buffer: -7.660815 + vertex_buffer: 3.382963 + vertex_buffer: 0.664630 + vertex_buffer: 0.852871 + vertex_buffer: 1.313701 + vertex_buffer: -8.639995 + vertex_buffer: 4.702456 + vertex_buffer: 0.559100 + vertex_buffer: 0.902632 + vertex_buffer: 5.940524 + vertex_buffer: -6.223629 + vertex_buffer: -0.631468 + vertex_buffer: 0.871706 + vertex_buffer: 0.791941 + vertex_buffer: 1.998311 + vertex_buffer: 2.743838 + vertex_buffer: 3.744030 + vertex_buffer: 0.591234 + vertex_buffer: 0.373894 + vertex_buffer: 0.901447 + vertex_buffer: 1.236992 + vertex_buffer: 5.754256 + vertex_buffer: 0.544341 + vertex_buffer: 0.451584 + vertex_buffer: 2.308977 + vertex_buffer: -8.974196 + vertex_buffer: 3.609070 + vertex_buffer: 0.624563 + vertex_buffer: 0.924192 + vertex_buffer: 6.954154 + vertex_buffer: -2.439843 + vertex_buffer: -0.131163 + vertex_buffer: 0.885770 + vertex_buffer: 0.615029 + vertex_buffer: 1.098819 + vertex_buffer: -4.458788 + vertex_buffer: 5.120727 + vertex_buffer: 0.551338 + vertex_buffer: 0.695278 + vertex_buffer: 1.181124 + vertex_buffer: -4.579996 + vertex_buffer: 5.189564 + vertex_buffer: 0.551980 + vertex_buffer: 0.704632 + vertex_buffer: 1.255818 + vertex_buffer: -4.787901 + vertex_buffer: 5.237051 + vertex_buffer: 0.552888 + vertex_buffer: 0.715808 + vertex_buffer: 1.325085 + vertex_buffer: -5.106507 + vertex_buffer: 5.205010 + vertex_buffer: 0.555168 + vertex_buffer: 0.730794 + vertex_buffer: 1.546388 + vertex_buffer: -5.819392 + vertex_buffer: 4.757893 + vertex_buffer: 0.569944 + vertex_buffer: 0.767035 + vertex_buffer: 1.953754 + vertex_buffer: -4.183892 + vertex_buffer: 4.431713 + vertex_buffer: 0.593203 + vertex_buffer: 0.685676 + vertex_buffer: 2.117802 + vertex_buffer: -4.137093 + vertex_buffer: 4.555096 + vertex_buffer: 0.599262 + vertex_buffer: 0.681069 + vertex_buffer: 2.285339 + vertex_buffer: -4.051196 + vertex_buffer: 4.582438 + vertex_buffer: 0.607600 + vertex_buffer: 0.677703 + vertex_buffer: 2.850160 + vertex_buffer: -3.665720 + vertex_buffer: 4.484994 + vertex_buffer: 0.631938 + vertex_buffer: 0.663500 + vertex_buffer: 5.278538 + vertex_buffer: -2.238942 + vertex_buffer: 2.861224 + vertex_buffer: 0.752033 + vertex_buffer: 0.601315 + vertex_buffer: 0.946709 + vertex_buffer: 1.907628 + vertex_buffer: 5.196779 + vertex_buffer: 0.547226 + vertex_buffer: 0.420395 + vertex_buffer: 1.314173 + vertex_buffer: 3.104912 + vertex_buffer: 4.231404 + vertex_buffer: 0.563544 + vertex_buffer: 0.359828 + vertex_buffer: 1.780000 + vertex_buffer: 2.860000 + vertex_buffer: 3.881555 + vertex_buffer: 0.583841 + vertex_buffer: 0.368714 + vertex_buffer: 1.845110 + vertex_buffer: -4.098880 + vertex_buffer: 4.247264 + vertex_buffer: 0.586614 + vertex_buffer: 0.692366 + vertex_buffer: 5.436187 + vertex_buffer: -4.030482 + vertex_buffer: 2.109852 + vertex_buffer: 0.771915 + vertex_buffer: 0.683578 + vertex_buffer: 0.766444 + vertex_buffer: 3.182131 + vertex_buffer: 4.861453 + vertex_buffer: 0.531597 + vertex_buffer: 0.352483 + vertex_buffer: 1.938616 + vertex_buffer: -6.614410 + vertex_buffer: 4.521085 + vertex_buffer: 0.588371 + vertex_buffer: 0.804441 + vertex_buffer: 0.516573 + vertex_buffer: 1.583572 + vertex_buffer: 6.148363 + vertex_buffer: 0.520797 + vertex_buffer: 0.442565 + vertex_buffer: 1.246815 + vertex_buffer: 0.230297 + vertex_buffer: 5.681036 + vertex_buffer: 0.567985 + vertex_buffer: 0.493479 + vertex_buffer: 0.997827 + vertex_buffer: -6.930921 + vertex_buffer: 4.979576 + vertex_buffer: 0.543283 + vertex_buffer: 0.819255 + vertex_buffer: 3.288807 + vertex_buffer: -5.382514 + vertex_buffer: 3.795752 + vertex_buffer: 0.655317 + vertex_buffer: 0.745515 + vertex_buffer: 2.311631 + vertex_buffer: -1.566237 + vertex_buffer: 4.590085 + vertex_buffer: 0.621009 + vertex_buffer: 0.574018 + vertex_buffer: 2.680250 + vertex_buffer: -6.111567 + vertex_buffer: 4.096152 + vertex_buffer: 0.625560 + vertex_buffer: 0.780312 + vertex_buffer: 3.832928 + vertex_buffer: -1.537326 + vertex_buffer: 4.137731 + vertex_buffer: 0.680198 + vertex_buffer: 0.570719 + vertex_buffer: 2.961860 + vertex_buffer: -2.274215 + vertex_buffer: 4.440943 + vertex_buffer: 0.642764 + vertex_buffer: 0.604338 + vertex_buffer: 4.386901 + vertex_buffer: -2.683286 + vertex_buffer: 3.643886 + vertex_buffer: 0.704663 + vertex_buffer: 0.621530 + vertex_buffer: 1.217295 + vertex_buffer: -7.834465 + vertex_buffer: 4.969286 + vertex_buffer: 0.552012 + vertex_buffer: 0.862592 + vertex_buffer: 1.542374 + vertex_buffer: -0.136843 + vertex_buffer: 5.201008 + vertex_buffer: 0.589072 + vertex_buffer: 0.508637 + vertex_buffer: 3.878377 + vertex_buffer: -6.041764 + vertex_buffer: 3.311079 + vertex_buffer: 0.685945 + vertex_buffer: 0.775357 + vertex_buffer: 3.084037 + vertex_buffer: -6.809842 + vertex_buffer: 3.814195 + vertex_buffer: 0.645735 + vertex_buffer: 0.812640 + vertex_buffer: 3.747321 + vertex_buffer: -4.503545 + vertex_buffer: 3.726453 + vertex_buffer: 0.675343 + vertex_buffer: 0.703978 + vertex_buffer: 6.094129 + vertex_buffer: -3.205991 + vertex_buffer: 1.473482 + vertex_buffer: 0.810858 + vertex_buffer: 0.646305 + vertex_buffer: 4.588995 + vertex_buffer: -4.728726 + vertex_buffer: 2.983221 + vertex_buffer: 0.720122 + vertex_buffer: 0.714667 + vertex_buffer: 6.583231 + vertex_buffer: -3.941269 + vertex_buffer: 0.070268 + vertex_buffer: 0.866152 + vertex_buffer: 0.682705 + vertex_buffer: 3.492580 + vertex_buffer: -3.195820 + vertex_buffer: 4.130198 + vertex_buffer: 0.663187 + vertex_buffer: 0.644597 + vertex_buffer: 1.255543 + vertex_buffer: 0.802341 + vertex_buffer: 5.307551 + vertex_buffer: 0.570082 + vertex_buffer: 0.466326 + vertex_buffer: 1.126122 + vertex_buffer: -0.933602 + vertex_buffer: 6.538785 + vertex_buffer: 0.544562 + vertex_buffer: 0.548376 + vertex_buffer: 1.443109 + vertex_buffer: -1.142774 + vertex_buffer: 5.905127 + vertex_buffer: 0.562759 + vertex_buffer: 0.558785 + vertex_buffer: 0.923043 + vertex_buffer: -0.529042 + vertex_buffer: 7.003423 + vertex_buffer: 0.531987 + vertex_buffer: 0.530140 + vertex_buffer: 1.755386 + vertex_buffer: 3.529117 + vertex_buffer: 4.327696 + vertex_buffer: 0.585271 + vertex_buffer: 0.335177 + vertex_buffer: 2.632589 + vertex_buffer: 3.713828 + vertex_buffer: 4.364629 + vertex_buffer: 0.622953 + vertex_buffer: 0.322779 + vertex_buffer: 3.388062 + vertex_buffer: 3.721976 + vertex_buffer: 4.309028 + vertex_buffer: 0.655896 + vertex_buffer: 0.320163 + vertex_buffer: 4.075766 + vertex_buffer: 3.675413 + vertex_buffer: 4.076063 + vertex_buffer: 0.687132 + vertex_buffer: 0.322346 + vertex_buffer: 4.622910 + vertex_buffer: 3.474691 + vertex_buffer: 3.646321 + vertex_buffer: 0.716482 + vertex_buffer: 0.333201 + vertex_buffer: 5.171755 + vertex_buffer: 2.535753 + vertex_buffer: 2.670867 + vertex_buffer: 0.758757 + vertex_buffer: 0.382787 + vertex_buffer: 7.297331 + vertex_buffer: 0.763172 + vertex_buffer: -0.048769 + vertex_buffer: 0.897013 + vertex_buffer: 0.468769 + vertex_buffer: 4.706828 + vertex_buffer: 1.651000 + vertex_buffer: 3.109532 + vertex_buffer: 0.732392 + vertex_buffer: 0.424547 + vertex_buffer: 4.071712 + vertex_buffer: 1.476821 + vertex_buffer: 3.476944 + vertex_buffer: 0.702114 + vertex_buffer: 0.433163 + vertex_buffer: 3.269817 + vertex_buffer: 1.470659 + vertex_buffer: 3.731945 + vertex_buffer: 0.666525 + vertex_buffer: 0.433866 + vertex_buffer: 2.527572 + vertex_buffer: 1.617311 + vertex_buffer: 3.865444 + vertex_buffer: 0.633505 + vertex_buffer: 0.426088 + vertex_buffer: 1.970894 + vertex_buffer: 1.858505 + vertex_buffer: 3.961782 + vertex_buffer: 0.603876 + vertex_buffer: 0.416587 + vertex_buffer: 1.579543 + vertex_buffer: 2.097941 + vertex_buffer: 4.084996 + vertex_buffer: 0.579658 + vertex_buffer: 0.409945 + vertex_buffer: 7.664182 + vertex_buffer: 0.673132 + vertex_buffer: -2.435867 + vertex_buffer: 0.992440 + vertex_buffer: 0.480777 + vertex_buffer: 1.397041 + vertex_buffer: -1.340139 + vertex_buffer: 5.630378 + vertex_buffer: 0.567192 + vertex_buffer: 0.569420 + vertex_buffer: 0.884838 + vertex_buffer: 0.658740 + vertex_buffer: 6.233232 + vertex_buffer: 0.541366 + vertex_buffer: 0.478899 + vertex_buffer: 0.767097 + vertex_buffer: -0.968035 + vertex_buffer: 7.077932 + vertex_buffer: 0.526564 + vertex_buffer: 0.546118 + vertex_buffer: 0.460213 + vertex_buffer: -1.334106 + vertex_buffer: 6.787447 + vertex_buffer: 0.523913 + vertex_buffer: 0.563830 + vertex_buffer: 0.748618 + vertex_buffer: -1.067994 + vertex_buffer: 6.798303 + vertex_buffer: 0.531529 + vertex_buffer: 0.555057 + vertex_buffer: 1.236408 + vertex_buffer: -1.585568 + vertex_buffer: 5.480490 + vertex_buffer: 0.566036 + vertex_buffer: 0.582329 + vertex_buffer: 0.387306 + vertex_buffer: -1.409990 + vertex_buffer: 6.957705 + vertex_buffer: 0.516311 + vertex_buffer: 0.563054 + vertex_buffer: 0.319925 + vertex_buffer: -1.607931 + vertex_buffer: 6.508676 + vertex_buffer: 0.517472 + vertex_buffer: 0.577877 + vertex_buffer: 1.639633 + vertex_buffer: 2.556298 + vertex_buffer: 3.863736 + vertex_buffer: 0.573595 + vertex_buffer: 0.389807 + vertex_buffer: 1.255645 + vertex_buffer: 2.467144 + vertex_buffer: 4.203800 + vertex_buffer: 0.560698 + vertex_buffer: 0.395332 + vertex_buffer: 1.031362 + vertex_buffer: 2.382663 + vertex_buffer: 4.615849 + vertex_buffer: 0.549756 + vertex_buffer: 0.399751 + vertex_buffer: 4.253081 + vertex_buffer: 2.772296 + vertex_buffer: 3.315305 + vertex_buffer: 0.710288 + vertex_buffer: 0.368253 + vertex_buffer: 4.530000 + vertex_buffer: 2.910000 + vertex_buffer: 3.339685 + vertex_buffer: 0.723330 + vertex_buffer: 0.363373 + index_buffer: 173 + index_buffer: 155 + index_buffer: 133 + index_buffer: 246 + index_buffer: 33 + index_buffer: 7 + index_buffer: 382 + index_buffer: 398 + index_buffer: 362 + index_buffer: 263 + index_buffer: 466 + index_buffer: 249 + index_buffer: 308 + index_buffer: 415 + index_buffer: 324 + index_buffer: 78 + index_buffer: 95 + index_buffer: 191 + index_buffer: 356 + index_buffer: 389 + index_buffer: 264 + index_buffer: 127 + index_buffer: 34 + index_buffer: 162 + index_buffer: 368 + index_buffer: 264 + index_buffer: 389 + index_buffer: 139 + index_buffer: 162 + index_buffer: 34 + index_buffer: 267 + index_buffer: 0 + index_buffer: 302 + index_buffer: 37 + index_buffer: 72 + index_buffer: 0 + index_buffer: 11 + index_buffer: 302 + index_buffer: 0 + index_buffer: 11 + index_buffer: 0 + index_buffer: 72 + index_buffer: 349 + index_buffer: 451 + index_buffer: 350 + index_buffer: 120 + index_buffer: 121 + index_buffer: 231 + index_buffer: 452 + index_buffer: 350 + index_buffer: 451 + index_buffer: 232 + index_buffer: 231 + index_buffer: 121 + index_buffer: 267 + index_buffer: 302 + index_buffer: 269 + index_buffer: 37 + index_buffer: 39 + index_buffer: 72 + index_buffer: 303 + index_buffer: 269 + index_buffer: 302 + index_buffer: 73 + index_buffer: 72 + index_buffer: 39 + index_buffer: 357 + index_buffer: 343 + index_buffer: 350 + index_buffer: 128 + index_buffer: 121 + index_buffer: 114 + index_buffer: 277 + index_buffer: 350 + index_buffer: 343 + index_buffer: 47 + index_buffer: 114 + index_buffer: 121 + index_buffer: 350 + index_buffer: 452 + index_buffer: 357 + index_buffer: 121 + index_buffer: 128 + index_buffer: 232 + index_buffer: 453 + index_buffer: 357 + index_buffer: 452 + index_buffer: 233 + index_buffer: 232 + index_buffer: 128 + index_buffer: 299 + index_buffer: 333 + index_buffer: 297 + index_buffer: 69 + index_buffer: 67 + index_buffer: 104 + index_buffer: 332 + index_buffer: 297 + index_buffer: 333 + index_buffer: 103 + index_buffer: 104 + index_buffer: 67 + index_buffer: 175 + index_buffer: 152 + index_buffer: 396 + index_buffer: 175 + index_buffer: 171 + index_buffer: 152 + index_buffer: 377 + index_buffer: 396 + index_buffer: 152 + index_buffer: 148 + index_buffer: 152 + index_buffer: 171 + index_buffer: 381 + index_buffer: 384 + index_buffer: 382 + index_buffer: 154 + index_buffer: 155 + index_buffer: 157 + index_buffer: 398 + index_buffer: 382 + index_buffer: 384 + index_buffer: 173 + index_buffer: 157 + index_buffer: 155 + index_buffer: 280 + index_buffer: 347 + index_buffer: 330 + index_buffer: 50 + index_buffer: 101 + index_buffer: 118 + index_buffer: 348 + index_buffer: 330 + index_buffer: 347 + index_buffer: 119 + index_buffer: 118 + index_buffer: 101 + index_buffer: 269 + index_buffer: 303 + index_buffer: 270 + index_buffer: 39 + index_buffer: 40 + index_buffer: 73 + index_buffer: 304 + index_buffer: 270 + index_buffer: 303 + index_buffer: 74 + index_buffer: 73 + index_buffer: 40 + index_buffer: 9 + index_buffer: 336 + index_buffer: 151 + index_buffer: 9 + index_buffer: 151 + index_buffer: 107 + index_buffer: 337 + index_buffer: 151 + index_buffer: 336 + index_buffer: 108 + index_buffer: 107 + index_buffer: 151 + index_buffer: 344 + index_buffer: 278 + index_buffer: 360 + index_buffer: 115 + index_buffer: 131 + index_buffer: 48 + index_buffer: 279 + index_buffer: 360 + index_buffer: 278 + index_buffer: 49 + index_buffer: 48 + index_buffer: 131 + index_buffer: 262 + index_buffer: 431 + index_buffer: 418 + index_buffer: 32 + index_buffer: 194 + index_buffer: 211 + index_buffer: 424 + index_buffer: 418 + index_buffer: 431 + index_buffer: 204 + index_buffer: 211 + index_buffer: 194 + index_buffer: 304 + index_buffer: 408 + index_buffer: 270 + index_buffer: 74 + index_buffer: 40 + index_buffer: 184 + index_buffer: 409 + index_buffer: 270 + index_buffer: 408 + index_buffer: 185 + index_buffer: 184 + index_buffer: 40 + index_buffer: 272 + index_buffer: 310 + index_buffer: 407 + index_buffer: 42 + index_buffer: 183 + index_buffer: 80 + index_buffer: 415 + index_buffer: 407 + index_buffer: 310 + index_buffer: 191 + index_buffer: 80 + index_buffer: 183 + index_buffer: 322 + index_buffer: 270 + index_buffer: 410 + index_buffer: 92 + index_buffer: 186 + index_buffer: 40 + index_buffer: 409 + index_buffer: 410 + index_buffer: 270 + index_buffer: 185 + index_buffer: 40 + index_buffer: 186 + index_buffer: 347 + index_buffer: 449 + index_buffer: 348 + index_buffer: 118 + index_buffer: 119 + index_buffer: 229 + index_buffer: 450 + index_buffer: 348 + index_buffer: 449 + index_buffer: 230 + index_buffer: 229 + index_buffer: 119 + index_buffer: 434 + index_buffer: 432 + index_buffer: 430 + index_buffer: 214 + index_buffer: 210 + index_buffer: 212 + index_buffer: 422 + index_buffer: 430 + index_buffer: 432 + index_buffer: 202 + index_buffer: 212 + index_buffer: 210 + index_buffer: 313 + index_buffer: 314 + index_buffer: 18 + index_buffer: 83 + index_buffer: 18 + index_buffer: 84 + index_buffer: 17 + index_buffer: 18 + index_buffer: 314 + index_buffer: 17 + index_buffer: 84 + index_buffer: 18 + index_buffer: 307 + index_buffer: 375 + index_buffer: 306 + index_buffer: 77 + index_buffer: 76 + index_buffer: 146 + index_buffer: 291 + index_buffer: 306 + index_buffer: 375 + index_buffer: 61 + index_buffer: 146 + index_buffer: 76 + index_buffer: 259 + index_buffer: 387 + index_buffer: 260 + index_buffer: 29 + index_buffer: 30 + index_buffer: 160 + index_buffer: 388 + index_buffer: 260 + index_buffer: 387 + index_buffer: 161 + index_buffer: 160 + index_buffer: 30 + index_buffer: 286 + index_buffer: 414 + index_buffer: 384 + index_buffer: 56 + index_buffer: 157 + index_buffer: 190 + index_buffer: 398 + index_buffer: 384 + index_buffer: 414 + index_buffer: 173 + index_buffer: 190 + index_buffer: 157 + index_buffer: 418 + index_buffer: 424 + index_buffer: 406 + index_buffer: 194 + index_buffer: 182 + index_buffer: 204 + index_buffer: 335 + index_buffer: 406 + index_buffer: 424 + index_buffer: 106 + index_buffer: 204 + index_buffer: 182 + index_buffer: 367 + index_buffer: 416 + index_buffer: 364 + index_buffer: 138 + index_buffer: 135 + index_buffer: 192 + index_buffer: 434 + index_buffer: 364 + index_buffer: 416 + index_buffer: 214 + index_buffer: 192 + index_buffer: 135 + index_buffer: 391 + index_buffer: 423 + index_buffer: 327 + index_buffer: 165 + index_buffer: 98 + index_buffer: 203 + index_buffer: 358 + index_buffer: 327 + index_buffer: 423 + index_buffer: 129 + index_buffer: 203 + index_buffer: 98 + index_buffer: 298 + index_buffer: 301 + index_buffer: 284 + index_buffer: 68 + index_buffer: 54 + index_buffer: 71 + index_buffer: 251 + index_buffer: 284 + index_buffer: 301 + index_buffer: 21 + index_buffer: 71 + index_buffer: 54 + index_buffer: 4 + index_buffer: 275 + index_buffer: 5 + index_buffer: 4 + index_buffer: 5 + index_buffer: 45 + index_buffer: 281 + index_buffer: 5 + index_buffer: 275 + index_buffer: 51 + index_buffer: 45 + index_buffer: 5 + index_buffer: 254 + index_buffer: 373 + index_buffer: 253 + index_buffer: 24 + index_buffer: 23 + index_buffer: 144 + index_buffer: 374 + index_buffer: 253 + index_buffer: 373 + index_buffer: 145 + index_buffer: 144 + index_buffer: 23 + index_buffer: 320 + index_buffer: 321 + index_buffer: 307 + index_buffer: 90 + index_buffer: 77 + index_buffer: 91 + index_buffer: 375 + index_buffer: 307 + index_buffer: 321 + index_buffer: 146 + index_buffer: 91 + index_buffer: 77 + index_buffer: 280 + index_buffer: 425 + index_buffer: 411 + index_buffer: 50 + index_buffer: 187 + index_buffer: 205 + index_buffer: 427 + index_buffer: 411 + index_buffer: 425 + index_buffer: 207 + index_buffer: 205 + index_buffer: 187 + index_buffer: 421 + index_buffer: 313 + index_buffer: 200 + index_buffer: 201 + index_buffer: 200 + index_buffer: 83 + index_buffer: 18 + index_buffer: 200 + index_buffer: 313 + index_buffer: 18 + index_buffer: 83 + index_buffer: 200 + index_buffer: 335 + index_buffer: 321 + index_buffer: 406 + index_buffer: 106 + index_buffer: 182 + index_buffer: 91 + index_buffer: 405 + index_buffer: 406 + index_buffer: 321 + index_buffer: 181 + index_buffer: 91 + index_buffer: 182 + index_buffer: 405 + index_buffer: 321 + index_buffer: 404 + index_buffer: 181 + index_buffer: 180 + index_buffer: 91 + index_buffer: 320 + index_buffer: 404 + index_buffer: 321 + index_buffer: 90 + index_buffer: 91 + index_buffer: 180 + index_buffer: 17 + index_buffer: 314 + index_buffer: 16 + index_buffer: 17 + index_buffer: 16 + index_buffer: 84 + index_buffer: 315 + index_buffer: 16 + index_buffer: 314 + index_buffer: 85 + index_buffer: 84 + index_buffer: 16 + index_buffer: 425 + index_buffer: 266 + index_buffer: 426 + index_buffer: 205 + index_buffer: 206 + index_buffer: 36 + index_buffer: 423 + index_buffer: 426 + index_buffer: 266 + index_buffer: 203 + index_buffer: 36 + index_buffer: 206 + index_buffer: 369 + index_buffer: 396 + index_buffer: 400 + index_buffer: 140 + index_buffer: 176 + index_buffer: 171 + index_buffer: 377 + index_buffer: 400 + index_buffer: 396 + index_buffer: 148 + index_buffer: 171 + index_buffer: 176 + index_buffer: 391 + index_buffer: 269 + index_buffer: 322 + index_buffer: 165 + index_buffer: 92 + index_buffer: 39 + index_buffer: 270 + index_buffer: 322 + index_buffer: 269 + index_buffer: 40 + index_buffer: 39 + index_buffer: 92 + index_buffer: 417 + index_buffer: 465 + index_buffer: 413 + index_buffer: 193 + index_buffer: 189 + index_buffer: 245 + index_buffer: 464 + index_buffer: 413 + index_buffer: 465 + index_buffer: 244 + index_buffer: 245 + index_buffer: 189 + index_buffer: 257 + index_buffer: 258 + index_buffer: 386 + index_buffer: 27 + index_buffer: 159 + index_buffer: 28 + index_buffer: 385 + index_buffer: 386 + index_buffer: 258 + index_buffer: 158 + index_buffer: 28 + index_buffer: 159 + index_buffer: 260 + index_buffer: 388 + index_buffer: 467 + index_buffer: 30 + index_buffer: 247 + index_buffer: 161 + index_buffer: 466 + index_buffer: 467 + index_buffer: 388 + index_buffer: 246 + index_buffer: 161 + index_buffer: 247 + index_buffer: 248 + index_buffer: 456 + index_buffer: 419 + index_buffer: 3 + index_buffer: 196 + index_buffer: 236 + index_buffer: 399 + index_buffer: 419 + index_buffer: 456 + index_buffer: 174 + index_buffer: 236 + index_buffer: 196 + index_buffer: 333 + index_buffer: 298 + index_buffer: 332 + index_buffer: 104 + index_buffer: 103 + index_buffer: 68 + index_buffer: 284 + index_buffer: 332 + index_buffer: 298 + index_buffer: 54 + index_buffer: 68 + index_buffer: 103 + index_buffer: 285 + index_buffer: 8 + index_buffer: 417 + index_buffer: 55 + index_buffer: 193 + index_buffer: 8 + index_buffer: 168 + index_buffer: 417 + index_buffer: 8 + index_buffer: 168 + index_buffer: 8 + index_buffer: 193 + index_buffer: 340 + index_buffer: 261 + index_buffer: 346 + index_buffer: 111 + index_buffer: 117 + index_buffer: 31 + index_buffer: 448 + index_buffer: 346 + index_buffer: 261 + index_buffer: 228 + index_buffer: 31 + index_buffer: 117 + index_buffer: 285 + index_buffer: 417 + index_buffer: 441 + index_buffer: 55 + index_buffer: 221 + index_buffer: 193 + index_buffer: 413 + index_buffer: 441 + index_buffer: 417 + index_buffer: 189 + index_buffer: 193 + index_buffer: 221 + index_buffer: 327 + index_buffer: 460 + index_buffer: 326 + index_buffer: 98 + index_buffer: 97 + index_buffer: 240 + index_buffer: 328 + index_buffer: 326 + index_buffer: 460 + index_buffer: 99 + index_buffer: 240 + index_buffer: 97 + index_buffer: 277 + index_buffer: 355 + index_buffer: 329 + index_buffer: 47 + index_buffer: 100 + index_buffer: 126 + index_buffer: 371 + index_buffer: 329 + index_buffer: 355 + index_buffer: 142 + index_buffer: 126 + index_buffer: 100 + index_buffer: 309 + index_buffer: 392 + index_buffer: 438 + index_buffer: 79 + index_buffer: 218 + index_buffer: 166 + index_buffer: 439 + index_buffer: 438 + index_buffer: 392 + index_buffer: 219 + index_buffer: 166 + index_buffer: 218 + index_buffer: 381 + index_buffer: 382 + index_buffer: 256 + index_buffer: 154 + index_buffer: 26 + index_buffer: 155 + index_buffer: 341 + index_buffer: 256 + index_buffer: 382 + index_buffer: 112 + index_buffer: 155 + index_buffer: 26 + index_buffer: 360 + index_buffer: 279 + index_buffer: 420 + index_buffer: 131 + index_buffer: 198 + index_buffer: 49 + index_buffer: 429 + index_buffer: 420 + index_buffer: 279 + index_buffer: 209 + index_buffer: 49 + index_buffer: 198 + index_buffer: 365 + index_buffer: 364 + index_buffer: 379 + index_buffer: 136 + index_buffer: 150 + index_buffer: 135 + index_buffer: 394 + index_buffer: 379 + index_buffer: 364 + index_buffer: 169 + index_buffer: 135 + index_buffer: 150 + index_buffer: 355 + index_buffer: 277 + index_buffer: 437 + index_buffer: 126 + index_buffer: 217 + index_buffer: 47 + index_buffer: 343 + index_buffer: 437 + index_buffer: 277 + index_buffer: 114 + index_buffer: 47 + index_buffer: 217 + index_buffer: 443 + index_buffer: 444 + index_buffer: 282 + index_buffer: 223 + index_buffer: 52 + index_buffer: 224 + index_buffer: 283 + index_buffer: 282 + index_buffer: 444 + index_buffer: 53 + index_buffer: 224 + index_buffer: 52 + index_buffer: 281 + index_buffer: 275 + index_buffer: 363 + index_buffer: 51 + index_buffer: 134 + index_buffer: 45 + index_buffer: 440 + index_buffer: 363 + index_buffer: 275 + index_buffer: 220 + index_buffer: 45 + index_buffer: 134 + index_buffer: 431 + index_buffer: 262 + index_buffer: 395 + index_buffer: 211 + index_buffer: 170 + index_buffer: 32 + index_buffer: 369 + index_buffer: 395 + index_buffer: 262 + index_buffer: 140 + index_buffer: 32 + index_buffer: 170 + index_buffer: 337 + index_buffer: 299 + index_buffer: 338 + index_buffer: 108 + index_buffer: 109 + index_buffer: 69 + index_buffer: 297 + index_buffer: 338 + index_buffer: 299 + index_buffer: 67 + index_buffer: 69 + index_buffer: 109 + index_buffer: 335 + index_buffer: 273 + index_buffer: 321 + index_buffer: 106 + index_buffer: 91 + index_buffer: 43 + index_buffer: 375 + index_buffer: 321 + index_buffer: 273 + index_buffer: 146 + index_buffer: 43 + index_buffer: 91 + index_buffer: 348 + index_buffer: 450 + index_buffer: 349 + index_buffer: 119 + index_buffer: 120 + index_buffer: 230 + index_buffer: 451 + index_buffer: 349 + index_buffer: 450 + index_buffer: 231 + index_buffer: 230 + index_buffer: 120 + index_buffer: 467 + index_buffer: 359 + index_buffer: 342 + index_buffer: 247 + index_buffer: 113 + index_buffer: 130 + index_buffer: 446 + index_buffer: 342 + index_buffer: 359 + index_buffer: 226 + index_buffer: 130 + index_buffer: 113 + index_buffer: 282 + index_buffer: 283 + index_buffer: 334 + index_buffer: 52 + index_buffer: 105 + index_buffer: 53 + index_buffer: 293 + index_buffer: 334 + index_buffer: 283 + index_buffer: 63 + index_buffer: 53 + index_buffer: 105 + index_buffer: 250 + index_buffer: 458 + index_buffer: 462 + index_buffer: 20 + index_buffer: 242 + index_buffer: 238 + index_buffer: 461 + index_buffer: 462 + index_buffer: 458 + index_buffer: 241 + index_buffer: 238 + index_buffer: 242 + index_buffer: 276 + index_buffer: 353 + index_buffer: 300 + index_buffer: 46 + index_buffer: 70 + index_buffer: 124 + index_buffer: 383 + index_buffer: 300 + index_buffer: 353 + index_buffer: 156 + index_buffer: 124 + index_buffer: 70 + index_buffer: 325 + index_buffer: 292 + index_buffer: 324 + index_buffer: 96 + index_buffer: 95 + index_buffer: 62 + index_buffer: 308 + index_buffer: 324 + index_buffer: 292 + index_buffer: 78 + index_buffer: 62 + index_buffer: 95 + index_buffer: 283 + index_buffer: 276 + index_buffer: 293 + index_buffer: 53 + index_buffer: 63 + index_buffer: 46 + index_buffer: 300 + index_buffer: 293 + index_buffer: 276 + index_buffer: 70 + index_buffer: 46 + index_buffer: 63 + index_buffer: 447 + index_buffer: 264 + index_buffer: 345 + index_buffer: 227 + index_buffer: 116 + index_buffer: 34 + index_buffer: 372 + index_buffer: 345 + index_buffer: 264 + index_buffer: 143 + index_buffer: 34 + index_buffer: 116 + index_buffer: 352 + index_buffer: 345 + index_buffer: 346 + index_buffer: 123 + index_buffer: 117 + index_buffer: 116 + index_buffer: 340 + index_buffer: 346 + index_buffer: 345 + index_buffer: 111 + index_buffer: 116 + index_buffer: 117 + index_buffer: 1 + index_buffer: 19 + index_buffer: 274 + index_buffer: 1 + index_buffer: 44 + index_buffer: 19 + index_buffer: 354 + index_buffer: 274 + index_buffer: 19 + index_buffer: 125 + index_buffer: 19 + index_buffer: 44 + index_buffer: 248 + index_buffer: 281 + index_buffer: 456 + index_buffer: 3 + index_buffer: 236 + index_buffer: 51 + index_buffer: 363 + index_buffer: 456 + index_buffer: 281 + index_buffer: 134 + index_buffer: 51 + index_buffer: 236 + index_buffer: 425 + index_buffer: 426 + index_buffer: 427 + index_buffer: 205 + index_buffer: 207 + index_buffer: 206 + index_buffer: 436 + index_buffer: 427 + index_buffer: 426 + index_buffer: 216 + index_buffer: 206 + index_buffer: 207 + index_buffer: 380 + index_buffer: 381 + index_buffer: 252 + index_buffer: 153 + index_buffer: 22 + index_buffer: 154 + index_buffer: 256 + index_buffer: 252 + index_buffer: 381 + index_buffer: 26 + index_buffer: 154 + index_buffer: 22 + index_buffer: 391 + index_buffer: 393 + index_buffer: 269 + index_buffer: 165 + index_buffer: 39 + index_buffer: 167 + index_buffer: 267 + index_buffer: 269 + index_buffer: 393 + index_buffer: 37 + index_buffer: 167 + index_buffer: 39 + index_buffer: 199 + index_buffer: 428 + index_buffer: 200 + index_buffer: 199 + index_buffer: 200 + index_buffer: 208 + index_buffer: 421 + index_buffer: 200 + index_buffer: 428 + index_buffer: 201 + index_buffer: 208 + index_buffer: 200 + index_buffer: 330 + index_buffer: 329 + index_buffer: 266 + index_buffer: 101 + index_buffer: 36 + index_buffer: 100 + index_buffer: 371 + index_buffer: 266 + index_buffer: 329 + index_buffer: 142 + index_buffer: 100 + index_buffer: 36 + index_buffer: 422 + index_buffer: 432 + index_buffer: 273 + index_buffer: 202 + index_buffer: 43 + index_buffer: 212 + index_buffer: 287 + index_buffer: 273 + index_buffer: 432 + index_buffer: 57 + index_buffer: 212 + index_buffer: 43 + index_buffer: 290 + index_buffer: 250 + index_buffer: 328 + index_buffer: 60 + index_buffer: 99 + index_buffer: 20 + index_buffer: 462 + index_buffer: 328 + index_buffer: 250 + index_buffer: 242 + index_buffer: 20 + index_buffer: 99 + index_buffer: 258 + index_buffer: 286 + index_buffer: 385 + index_buffer: 28 + index_buffer: 158 + index_buffer: 56 + index_buffer: 384 + index_buffer: 385 + index_buffer: 286 + index_buffer: 157 + index_buffer: 56 + index_buffer: 158 + index_buffer: 342 + index_buffer: 446 + index_buffer: 353 + index_buffer: 113 + index_buffer: 124 + index_buffer: 226 + index_buffer: 265 + index_buffer: 353 + index_buffer: 446 + index_buffer: 35 + index_buffer: 226 + index_buffer: 124 + index_buffer: 257 + index_buffer: 386 + index_buffer: 259 + index_buffer: 27 + index_buffer: 29 + index_buffer: 159 + index_buffer: 387 + index_buffer: 259 + index_buffer: 386 + index_buffer: 160 + index_buffer: 159 + index_buffer: 29 + index_buffer: 430 + index_buffer: 422 + index_buffer: 431 + index_buffer: 210 + index_buffer: 211 + index_buffer: 202 + index_buffer: 424 + index_buffer: 431 + index_buffer: 422 + index_buffer: 204 + index_buffer: 202 + index_buffer: 211 + index_buffer: 445 + index_buffer: 342 + index_buffer: 276 + index_buffer: 225 + index_buffer: 46 + index_buffer: 113 + index_buffer: 353 + index_buffer: 276 + index_buffer: 342 + index_buffer: 124 + index_buffer: 113 + index_buffer: 46 + index_buffer: 424 + index_buffer: 422 + index_buffer: 335 + index_buffer: 204 + index_buffer: 106 + index_buffer: 202 + index_buffer: 273 + index_buffer: 335 + index_buffer: 422 + index_buffer: 43 + index_buffer: 202 + index_buffer: 106 + index_buffer: 306 + index_buffer: 292 + index_buffer: 307 + index_buffer: 76 + index_buffer: 77 + index_buffer: 62 + index_buffer: 325 + index_buffer: 307 + index_buffer: 292 + index_buffer: 96 + index_buffer: 62 + index_buffer: 77 + index_buffer: 366 + index_buffer: 447 + index_buffer: 352 + index_buffer: 137 + index_buffer: 123 + index_buffer: 227 + index_buffer: 345 + index_buffer: 352 + index_buffer: 447 + index_buffer: 116 + index_buffer: 227 + index_buffer: 123 + index_buffer: 302 + index_buffer: 268 + index_buffer: 303 + index_buffer: 72 + index_buffer: 73 + index_buffer: 38 + index_buffer: 271 + index_buffer: 303 + index_buffer: 268 + index_buffer: 41 + index_buffer: 38 + index_buffer: 73 + index_buffer: 371 + index_buffer: 358 + index_buffer: 266 + index_buffer: 142 + index_buffer: 36 + index_buffer: 129 + index_buffer: 423 + index_buffer: 266 + index_buffer: 358 + index_buffer: 203 + index_buffer: 129 + index_buffer: 36 + index_buffer: 327 + index_buffer: 294 + index_buffer: 460 + index_buffer: 98 + index_buffer: 240 + index_buffer: 64 + index_buffer: 455 + index_buffer: 460 + index_buffer: 294 + index_buffer: 235 + index_buffer: 64 + index_buffer: 240 + index_buffer: 294 + index_buffer: 331 + index_buffer: 278 + index_buffer: 64 + index_buffer: 48 + index_buffer: 102 + index_buffer: 279 + index_buffer: 278 + index_buffer: 331 + index_buffer: 49 + index_buffer: 102 + index_buffer: 48 + index_buffer: 303 + index_buffer: 271 + index_buffer: 304 + index_buffer: 73 + index_buffer: 74 + index_buffer: 41 + index_buffer: 272 + index_buffer: 304 + index_buffer: 271 + index_buffer: 42 + index_buffer: 41 + index_buffer: 74 + index_buffer: 427 + index_buffer: 436 + index_buffer: 434 + index_buffer: 207 + index_buffer: 214 + index_buffer: 216 + index_buffer: 432 + index_buffer: 434 + index_buffer: 436 + index_buffer: 212 + index_buffer: 216 + index_buffer: 214 + index_buffer: 304 + index_buffer: 272 + index_buffer: 408 + index_buffer: 74 + index_buffer: 184 + index_buffer: 42 + index_buffer: 407 + index_buffer: 408 + index_buffer: 272 + index_buffer: 183 + index_buffer: 42 + index_buffer: 184 + index_buffer: 394 + index_buffer: 430 + index_buffer: 395 + index_buffer: 169 + index_buffer: 170 + index_buffer: 210 + index_buffer: 431 + index_buffer: 395 + index_buffer: 430 + index_buffer: 211 + index_buffer: 210 + index_buffer: 170 + index_buffer: 395 + index_buffer: 369 + index_buffer: 378 + index_buffer: 170 + index_buffer: 149 + index_buffer: 140 + index_buffer: 400 + index_buffer: 378 + index_buffer: 369 + index_buffer: 176 + index_buffer: 140 + index_buffer: 149 + index_buffer: 296 + index_buffer: 334 + index_buffer: 299 + index_buffer: 66 + index_buffer: 69 + index_buffer: 105 + index_buffer: 333 + index_buffer: 299 + index_buffer: 334 + index_buffer: 104 + index_buffer: 105 + index_buffer: 69 + index_buffer: 417 + index_buffer: 168 + index_buffer: 351 + index_buffer: 193 + index_buffer: 122 + index_buffer: 168 + index_buffer: 6 + index_buffer: 351 + index_buffer: 168 + index_buffer: 6 + index_buffer: 168 + index_buffer: 122 + index_buffer: 280 + index_buffer: 411 + index_buffer: 352 + index_buffer: 50 + index_buffer: 123 + index_buffer: 187 + index_buffer: 376 + index_buffer: 352 + index_buffer: 411 + index_buffer: 147 + index_buffer: 187 + index_buffer: 123 + index_buffer: 319 + index_buffer: 320 + index_buffer: 325 + index_buffer: 89 + index_buffer: 96 + index_buffer: 90 + index_buffer: 307 + index_buffer: 325 + index_buffer: 320 + index_buffer: 77 + index_buffer: 90 + index_buffer: 96 + index_buffer: 285 + index_buffer: 295 + index_buffer: 336 + index_buffer: 55 + index_buffer: 107 + index_buffer: 65 + index_buffer: 296 + index_buffer: 336 + index_buffer: 295 + index_buffer: 66 + index_buffer: 65 + index_buffer: 107 + index_buffer: 404 + index_buffer: 320 + index_buffer: 403 + index_buffer: 180 + index_buffer: 179 + index_buffer: 90 + index_buffer: 319 + index_buffer: 403 + index_buffer: 320 + index_buffer: 89 + index_buffer: 90 + index_buffer: 179 + index_buffer: 330 + index_buffer: 348 + index_buffer: 329 + index_buffer: 101 + index_buffer: 100 + index_buffer: 119 + index_buffer: 349 + index_buffer: 329 + index_buffer: 348 + index_buffer: 120 + index_buffer: 119 + index_buffer: 100 + index_buffer: 334 + index_buffer: 293 + index_buffer: 333 + index_buffer: 105 + index_buffer: 104 + index_buffer: 63 + index_buffer: 298 + index_buffer: 333 + index_buffer: 293 + index_buffer: 68 + index_buffer: 63 + index_buffer: 104 + index_buffer: 323 + index_buffer: 454 + index_buffer: 366 + index_buffer: 93 + index_buffer: 137 + index_buffer: 234 + index_buffer: 447 + index_buffer: 366 + index_buffer: 454 + index_buffer: 227 + index_buffer: 234 + index_buffer: 137 + index_buffer: 16 + index_buffer: 315 + index_buffer: 15 + index_buffer: 16 + index_buffer: 15 + index_buffer: 85 + index_buffer: 316 + index_buffer: 15 + index_buffer: 315 + index_buffer: 86 + index_buffer: 85 + index_buffer: 15 + index_buffer: 429 + index_buffer: 279 + index_buffer: 358 + index_buffer: 209 + index_buffer: 129 + index_buffer: 49 + index_buffer: 331 + index_buffer: 358 + index_buffer: 279 + index_buffer: 102 + index_buffer: 49 + index_buffer: 129 + index_buffer: 15 + index_buffer: 316 + index_buffer: 14 + index_buffer: 15 + index_buffer: 14 + index_buffer: 86 + index_buffer: 317 + index_buffer: 14 + index_buffer: 316 + index_buffer: 87 + index_buffer: 86 + index_buffer: 14 + index_buffer: 8 + index_buffer: 285 + index_buffer: 9 + index_buffer: 8 + index_buffer: 9 + index_buffer: 55 + index_buffer: 336 + index_buffer: 9 + index_buffer: 285 + index_buffer: 107 + index_buffer: 55 + index_buffer: 9 + index_buffer: 329 + index_buffer: 349 + index_buffer: 277 + index_buffer: 100 + index_buffer: 47 + index_buffer: 120 + index_buffer: 350 + index_buffer: 277 + index_buffer: 349 + index_buffer: 121 + index_buffer: 120 + index_buffer: 47 + index_buffer: 252 + index_buffer: 253 + index_buffer: 380 + index_buffer: 22 + index_buffer: 153 + index_buffer: 23 + index_buffer: 374 + index_buffer: 380 + index_buffer: 253 + index_buffer: 145 + index_buffer: 23 + index_buffer: 153 + index_buffer: 402 + index_buffer: 403 + index_buffer: 318 + index_buffer: 178 + index_buffer: 88 + index_buffer: 179 + index_buffer: 319 + index_buffer: 318 + index_buffer: 403 + index_buffer: 89 + index_buffer: 179 + index_buffer: 88 + index_buffer: 351 + index_buffer: 6 + index_buffer: 419 + index_buffer: 122 + index_buffer: 196 + index_buffer: 6 + index_buffer: 197 + index_buffer: 419 + index_buffer: 6 + index_buffer: 197 + index_buffer: 6 + index_buffer: 196 + index_buffer: 324 + index_buffer: 318 + index_buffer: 325 + index_buffer: 95 + index_buffer: 96 + index_buffer: 88 + index_buffer: 319 + index_buffer: 325 + index_buffer: 318 + index_buffer: 89 + index_buffer: 88 + index_buffer: 96 + index_buffer: 397 + index_buffer: 367 + index_buffer: 365 + index_buffer: 172 + index_buffer: 136 + index_buffer: 138 + index_buffer: 364 + index_buffer: 365 + index_buffer: 367 + index_buffer: 135 + index_buffer: 138 + index_buffer: 136 + index_buffer: 288 + index_buffer: 435 + index_buffer: 397 + index_buffer: 58 + index_buffer: 172 + index_buffer: 215 + index_buffer: 367 + index_buffer: 397 + index_buffer: 435 + index_buffer: 138 + index_buffer: 215 + index_buffer: 172 + index_buffer: 438 + index_buffer: 439 + index_buffer: 344 + index_buffer: 218 + index_buffer: 115 + index_buffer: 219 + index_buffer: 278 + index_buffer: 344 + index_buffer: 439 + index_buffer: 48 + index_buffer: 219 + index_buffer: 115 + index_buffer: 271 + index_buffer: 311 + index_buffer: 272 + index_buffer: 41 + index_buffer: 42 + index_buffer: 81 + index_buffer: 310 + index_buffer: 272 + index_buffer: 311 + index_buffer: 80 + index_buffer: 81 + index_buffer: 42 + index_buffer: 5 + index_buffer: 281 + index_buffer: 195 + index_buffer: 5 + index_buffer: 195 + index_buffer: 51 + index_buffer: 248 + index_buffer: 195 + index_buffer: 281 + index_buffer: 3 + index_buffer: 51 + index_buffer: 195 + index_buffer: 273 + index_buffer: 287 + index_buffer: 375 + index_buffer: 43 + index_buffer: 146 + index_buffer: 57 + index_buffer: 291 + index_buffer: 375 + index_buffer: 287 + index_buffer: 61 + index_buffer: 57 + index_buffer: 146 + index_buffer: 396 + index_buffer: 428 + index_buffer: 175 + index_buffer: 171 + index_buffer: 175 + index_buffer: 208 + index_buffer: 199 + index_buffer: 175 + index_buffer: 428 + index_buffer: 199 + index_buffer: 208 + index_buffer: 175 + index_buffer: 268 + index_buffer: 312 + index_buffer: 271 + index_buffer: 38 + index_buffer: 41 + index_buffer: 82 + index_buffer: 311 + index_buffer: 271 + index_buffer: 312 + index_buffer: 81 + index_buffer: 82 + index_buffer: 41 + index_buffer: 444 + index_buffer: 445 + index_buffer: 283 + index_buffer: 224 + index_buffer: 53 + index_buffer: 225 + index_buffer: 276 + index_buffer: 283 + index_buffer: 445 + index_buffer: 46 + index_buffer: 225 + index_buffer: 53 + index_buffer: 254 + index_buffer: 339 + index_buffer: 373 + index_buffer: 24 + index_buffer: 144 + index_buffer: 110 + index_buffer: 390 + index_buffer: 373 + index_buffer: 339 + index_buffer: 163 + index_buffer: 110 + index_buffer: 144 + index_buffer: 295 + index_buffer: 282 + index_buffer: 296 + index_buffer: 65 + index_buffer: 66 + index_buffer: 52 + index_buffer: 334 + index_buffer: 296 + index_buffer: 282 + index_buffer: 105 + index_buffer: 52 + index_buffer: 66 + index_buffer: 346 + index_buffer: 448 + index_buffer: 347 + index_buffer: 117 + index_buffer: 118 + index_buffer: 228 + index_buffer: 449 + index_buffer: 347 + index_buffer: 448 + index_buffer: 229 + index_buffer: 228 + index_buffer: 118 + index_buffer: 454 + index_buffer: 356 + index_buffer: 447 + index_buffer: 234 + index_buffer: 227 + index_buffer: 127 + index_buffer: 264 + index_buffer: 447 + index_buffer: 356 + index_buffer: 34 + index_buffer: 127 + index_buffer: 227 + index_buffer: 336 + index_buffer: 296 + index_buffer: 337 + index_buffer: 107 + index_buffer: 108 + index_buffer: 66 + index_buffer: 299 + index_buffer: 337 + index_buffer: 296 + index_buffer: 69 + index_buffer: 66 + index_buffer: 108 + index_buffer: 151 + index_buffer: 337 + index_buffer: 10 + index_buffer: 151 + index_buffer: 10 + index_buffer: 108 + index_buffer: 338 + index_buffer: 10 + index_buffer: 337 + index_buffer: 109 + index_buffer: 108 + index_buffer: 10 + index_buffer: 278 + index_buffer: 439 + index_buffer: 294 + index_buffer: 48 + index_buffer: 64 + index_buffer: 219 + index_buffer: 455 + index_buffer: 294 + index_buffer: 439 + index_buffer: 235 + index_buffer: 219 + index_buffer: 64 + index_buffer: 407 + index_buffer: 415 + index_buffer: 292 + index_buffer: 183 + index_buffer: 62 + index_buffer: 191 + index_buffer: 308 + index_buffer: 292 + index_buffer: 415 + index_buffer: 78 + index_buffer: 191 + index_buffer: 62 + index_buffer: 358 + index_buffer: 371 + index_buffer: 429 + index_buffer: 129 + index_buffer: 209 + index_buffer: 142 + index_buffer: 355 + index_buffer: 429 + index_buffer: 371 + index_buffer: 126 + index_buffer: 142 + index_buffer: 209 + index_buffer: 345 + index_buffer: 372 + index_buffer: 340 + index_buffer: 116 + index_buffer: 111 + index_buffer: 143 + index_buffer: 265 + index_buffer: 340 + index_buffer: 372 + index_buffer: 35 + index_buffer: 143 + index_buffer: 111 + index_buffer: 388 + index_buffer: 390 + index_buffer: 466 + index_buffer: 161 + index_buffer: 246 + index_buffer: 163 + index_buffer: 249 + index_buffer: 466 + index_buffer: 390 + index_buffer: 7 + index_buffer: 163 + index_buffer: 246 + index_buffer: 352 + index_buffer: 346 + index_buffer: 280 + index_buffer: 123 + index_buffer: 50 + index_buffer: 117 + index_buffer: 347 + index_buffer: 280 + index_buffer: 346 + index_buffer: 118 + index_buffer: 117 + index_buffer: 50 + index_buffer: 295 + index_buffer: 442 + index_buffer: 282 + index_buffer: 65 + index_buffer: 52 + index_buffer: 222 + index_buffer: 443 + index_buffer: 282 + index_buffer: 442 + index_buffer: 223 + index_buffer: 222 + index_buffer: 52 + index_buffer: 19 + index_buffer: 94 + index_buffer: 354 + index_buffer: 19 + index_buffer: 125 + index_buffer: 94 + index_buffer: 370 + index_buffer: 354 + index_buffer: 94 + index_buffer: 141 + index_buffer: 94 + index_buffer: 125 + index_buffer: 295 + index_buffer: 285 + index_buffer: 442 + index_buffer: 65 + index_buffer: 222 + index_buffer: 55 + index_buffer: 441 + index_buffer: 442 + index_buffer: 285 + index_buffer: 221 + index_buffer: 55 + index_buffer: 222 + index_buffer: 419 + index_buffer: 197 + index_buffer: 248 + index_buffer: 196 + index_buffer: 3 + index_buffer: 197 + index_buffer: 195 + index_buffer: 248 + index_buffer: 197 + index_buffer: 195 + index_buffer: 197 + index_buffer: 3 + index_buffer: 359 + index_buffer: 263 + index_buffer: 255 + index_buffer: 130 + index_buffer: 25 + index_buffer: 33 + index_buffer: 249 + index_buffer: 255 + index_buffer: 263 + index_buffer: 7 + index_buffer: 33 + index_buffer: 25 + index_buffer: 275 + index_buffer: 274 + index_buffer: 440 + index_buffer: 45 + index_buffer: 220 + index_buffer: 44 + index_buffer: 457 + index_buffer: 440 + index_buffer: 274 + index_buffer: 237 + index_buffer: 44 + index_buffer: 220 + index_buffer: 300 + index_buffer: 383 + index_buffer: 301 + index_buffer: 70 + index_buffer: 71 + index_buffer: 156 + index_buffer: 368 + index_buffer: 301 + index_buffer: 383 + index_buffer: 139 + index_buffer: 156 + index_buffer: 71 + index_buffer: 417 + index_buffer: 351 + index_buffer: 465 + index_buffer: 193 + index_buffer: 245 + index_buffer: 122 + index_buffer: 412 + index_buffer: 465 + index_buffer: 351 + index_buffer: 188 + index_buffer: 122 + index_buffer: 245 + index_buffer: 466 + index_buffer: 263 + index_buffer: 467 + index_buffer: 246 + index_buffer: 247 + index_buffer: 33 + index_buffer: 359 + index_buffer: 467 + index_buffer: 263 + index_buffer: 130 + index_buffer: 33 + index_buffer: 247 + index_buffer: 389 + index_buffer: 251 + index_buffer: 368 + index_buffer: 162 + index_buffer: 139 + index_buffer: 21 + index_buffer: 301 + index_buffer: 368 + index_buffer: 251 + index_buffer: 71 + index_buffer: 21 + index_buffer: 139 + index_buffer: 374 + index_buffer: 386 + index_buffer: 380 + index_buffer: 145 + index_buffer: 153 + index_buffer: 159 + index_buffer: 385 + index_buffer: 380 + index_buffer: 386 + index_buffer: 158 + index_buffer: 159 + index_buffer: 153 + index_buffer: 379 + index_buffer: 394 + index_buffer: 378 + index_buffer: 150 + index_buffer: 149 + index_buffer: 169 + index_buffer: 395 + index_buffer: 378 + index_buffer: 394 + index_buffer: 170 + index_buffer: 169 + index_buffer: 149 + index_buffer: 351 + index_buffer: 419 + index_buffer: 412 + index_buffer: 122 + index_buffer: 188 + index_buffer: 196 + index_buffer: 399 + index_buffer: 412 + index_buffer: 419 + index_buffer: 174 + index_buffer: 196 + index_buffer: 188 + index_buffer: 426 + index_buffer: 322 + index_buffer: 436 + index_buffer: 206 + index_buffer: 216 + index_buffer: 92 + index_buffer: 410 + index_buffer: 436 + index_buffer: 322 + index_buffer: 186 + index_buffer: 92 + index_buffer: 216 + index_buffer: 387 + index_buffer: 373 + index_buffer: 388 + index_buffer: 160 + index_buffer: 161 + index_buffer: 144 + index_buffer: 390 + index_buffer: 388 + index_buffer: 373 + index_buffer: 163 + index_buffer: 144 + index_buffer: 161 + index_buffer: 393 + index_buffer: 326 + index_buffer: 164 + index_buffer: 167 + index_buffer: 164 + index_buffer: 97 + index_buffer: 2 + index_buffer: 164 + index_buffer: 326 + index_buffer: 2 + index_buffer: 97 + index_buffer: 164 + index_buffer: 354 + index_buffer: 370 + index_buffer: 461 + index_buffer: 125 + index_buffer: 241 + index_buffer: 141 + index_buffer: 462 + index_buffer: 461 + index_buffer: 370 + index_buffer: 242 + index_buffer: 141 + index_buffer: 241 + index_buffer: 0 + index_buffer: 267 + index_buffer: 164 + index_buffer: 0 + index_buffer: 164 + index_buffer: 37 + index_buffer: 393 + index_buffer: 164 + index_buffer: 267 + index_buffer: 167 + index_buffer: 37 + index_buffer: 164 + index_buffer: 11 + index_buffer: 12 + index_buffer: 302 + index_buffer: 11 + index_buffer: 72 + index_buffer: 12 + index_buffer: 268 + index_buffer: 302 + index_buffer: 12 + index_buffer: 38 + index_buffer: 12 + index_buffer: 72 + index_buffer: 386 + index_buffer: 374 + index_buffer: 387 + index_buffer: 159 + index_buffer: 160 + index_buffer: 145 + index_buffer: 373 + index_buffer: 387 + index_buffer: 374 + index_buffer: 144 + index_buffer: 145 + index_buffer: 160 + index_buffer: 12 + index_buffer: 13 + index_buffer: 268 + index_buffer: 12 + index_buffer: 38 + index_buffer: 13 + index_buffer: 312 + index_buffer: 268 + index_buffer: 13 + index_buffer: 82 + index_buffer: 13 + index_buffer: 38 + index_buffer: 293 + index_buffer: 300 + index_buffer: 298 + index_buffer: 63 + index_buffer: 68 + index_buffer: 70 + index_buffer: 301 + index_buffer: 298 + index_buffer: 300 + index_buffer: 71 + index_buffer: 70 + index_buffer: 68 + index_buffer: 340 + index_buffer: 265 + index_buffer: 261 + index_buffer: 111 + index_buffer: 31 + index_buffer: 35 + index_buffer: 446 + index_buffer: 261 + index_buffer: 265 + index_buffer: 226 + index_buffer: 35 + index_buffer: 31 + index_buffer: 380 + index_buffer: 385 + index_buffer: 381 + index_buffer: 153 + index_buffer: 154 + index_buffer: 158 + index_buffer: 384 + index_buffer: 381 + index_buffer: 385 + index_buffer: 157 + index_buffer: 158 + index_buffer: 154 + index_buffer: 280 + index_buffer: 330 + index_buffer: 425 + index_buffer: 50 + index_buffer: 205 + index_buffer: 101 + index_buffer: 266 + index_buffer: 425 + index_buffer: 330 + index_buffer: 36 + index_buffer: 101 + index_buffer: 205 + index_buffer: 423 + index_buffer: 391 + index_buffer: 426 + index_buffer: 203 + index_buffer: 206 + index_buffer: 165 + index_buffer: 322 + index_buffer: 426 + index_buffer: 391 + index_buffer: 92 + index_buffer: 165 + index_buffer: 206 + index_buffer: 429 + index_buffer: 355 + index_buffer: 420 + index_buffer: 209 + index_buffer: 198 + index_buffer: 126 + index_buffer: 437 + index_buffer: 420 + index_buffer: 355 + index_buffer: 217 + index_buffer: 126 + index_buffer: 198 + index_buffer: 391 + index_buffer: 327 + index_buffer: 393 + index_buffer: 165 + index_buffer: 167 + index_buffer: 98 + index_buffer: 326 + index_buffer: 393 + index_buffer: 327 + index_buffer: 97 + index_buffer: 98 + index_buffer: 167 + index_buffer: 457 + index_buffer: 438 + index_buffer: 440 + index_buffer: 237 + index_buffer: 220 + index_buffer: 218 + index_buffer: 344 + index_buffer: 440 + index_buffer: 438 + index_buffer: 115 + index_buffer: 218 + index_buffer: 220 + index_buffer: 382 + index_buffer: 362 + index_buffer: 341 + index_buffer: 155 + index_buffer: 112 + index_buffer: 133 + index_buffer: 463 + index_buffer: 341 + index_buffer: 362 + index_buffer: 243 + index_buffer: 133 + index_buffer: 112 + index_buffer: 457 + index_buffer: 461 + index_buffer: 459 + index_buffer: 237 + index_buffer: 239 + index_buffer: 241 + index_buffer: 458 + index_buffer: 459 + index_buffer: 461 + index_buffer: 238 + index_buffer: 241 + index_buffer: 239 + index_buffer: 434 + index_buffer: 430 + index_buffer: 364 + index_buffer: 214 + index_buffer: 135 + index_buffer: 210 + index_buffer: 394 + index_buffer: 364 + index_buffer: 430 + index_buffer: 169 + index_buffer: 210 + index_buffer: 135 + index_buffer: 414 + index_buffer: 463 + index_buffer: 398 + index_buffer: 190 + index_buffer: 173 + index_buffer: 243 + index_buffer: 362 + index_buffer: 398 + index_buffer: 463 + index_buffer: 133 + index_buffer: 243 + index_buffer: 173 + index_buffer: 262 + index_buffer: 428 + index_buffer: 369 + index_buffer: 32 + index_buffer: 140 + index_buffer: 208 + index_buffer: 396 + index_buffer: 369 + index_buffer: 428 + index_buffer: 171 + index_buffer: 208 + index_buffer: 140 + index_buffer: 457 + index_buffer: 274 + index_buffer: 461 + index_buffer: 237 + index_buffer: 241 + index_buffer: 44 + index_buffer: 354 + index_buffer: 461 + index_buffer: 274 + index_buffer: 125 + index_buffer: 44 + index_buffer: 241 + index_buffer: 316 + index_buffer: 403 + index_buffer: 317 + index_buffer: 86 + index_buffer: 87 + index_buffer: 179 + index_buffer: 402 + index_buffer: 317 + index_buffer: 403 + index_buffer: 178 + index_buffer: 179 + index_buffer: 87 + index_buffer: 315 + index_buffer: 404 + index_buffer: 316 + index_buffer: 85 + index_buffer: 86 + index_buffer: 180 + index_buffer: 403 + index_buffer: 316 + index_buffer: 404 + index_buffer: 179 + index_buffer: 180 + index_buffer: 86 + index_buffer: 314 + index_buffer: 405 + index_buffer: 315 + index_buffer: 84 + index_buffer: 85 + index_buffer: 181 + index_buffer: 404 + index_buffer: 315 + index_buffer: 405 + index_buffer: 180 + index_buffer: 181 + index_buffer: 85 + index_buffer: 313 + index_buffer: 406 + index_buffer: 314 + index_buffer: 83 + index_buffer: 84 + index_buffer: 182 + index_buffer: 405 + index_buffer: 314 + index_buffer: 406 + index_buffer: 181 + index_buffer: 182 + index_buffer: 84 + index_buffer: 418 + index_buffer: 406 + index_buffer: 421 + index_buffer: 194 + index_buffer: 201 + index_buffer: 182 + index_buffer: 313 + index_buffer: 421 + index_buffer: 406 + index_buffer: 83 + index_buffer: 182 + index_buffer: 201 + index_buffer: 366 + index_buffer: 401 + index_buffer: 323 + index_buffer: 137 + index_buffer: 93 + index_buffer: 177 + index_buffer: 361 + index_buffer: 323 + index_buffer: 401 + index_buffer: 132 + index_buffer: 177 + index_buffer: 93 + index_buffer: 408 + index_buffer: 407 + index_buffer: 306 + index_buffer: 184 + index_buffer: 76 + index_buffer: 183 + index_buffer: 292 + index_buffer: 306 + index_buffer: 407 + index_buffer: 62 + index_buffer: 183 + index_buffer: 76 + index_buffer: 408 + index_buffer: 306 + index_buffer: 409 + index_buffer: 184 + index_buffer: 185 + index_buffer: 76 + index_buffer: 291 + index_buffer: 409 + index_buffer: 306 + index_buffer: 61 + index_buffer: 76 + index_buffer: 185 + index_buffer: 410 + index_buffer: 409 + index_buffer: 287 + index_buffer: 186 + index_buffer: 57 + index_buffer: 185 + index_buffer: 291 + index_buffer: 287 + index_buffer: 409 + index_buffer: 61 + index_buffer: 185 + index_buffer: 57 + index_buffer: 436 + index_buffer: 410 + index_buffer: 432 + index_buffer: 216 + index_buffer: 212 + index_buffer: 186 + index_buffer: 287 + index_buffer: 432 + index_buffer: 410 + index_buffer: 57 + index_buffer: 186 + index_buffer: 212 + index_buffer: 434 + index_buffer: 416 + index_buffer: 427 + index_buffer: 214 + index_buffer: 207 + index_buffer: 192 + index_buffer: 411 + index_buffer: 427 + index_buffer: 416 + index_buffer: 187 + index_buffer: 192 + index_buffer: 207 + index_buffer: 264 + index_buffer: 368 + index_buffer: 372 + index_buffer: 34 + index_buffer: 143 + index_buffer: 139 + index_buffer: 383 + index_buffer: 372 + index_buffer: 368 + index_buffer: 156 + index_buffer: 139 + index_buffer: 143 + index_buffer: 457 + index_buffer: 459 + index_buffer: 438 + index_buffer: 237 + index_buffer: 218 + index_buffer: 239 + index_buffer: 309 + index_buffer: 438 + index_buffer: 459 + index_buffer: 79 + index_buffer: 239 + index_buffer: 218 + index_buffer: 352 + index_buffer: 376 + index_buffer: 366 + index_buffer: 123 + index_buffer: 137 + index_buffer: 147 + index_buffer: 401 + index_buffer: 366 + index_buffer: 376 + index_buffer: 177 + index_buffer: 147 + index_buffer: 137 + index_buffer: 4 + index_buffer: 1 + index_buffer: 275 + index_buffer: 4 + index_buffer: 45 + index_buffer: 1 + index_buffer: 274 + index_buffer: 275 + index_buffer: 1 + index_buffer: 44 + index_buffer: 1 + index_buffer: 45 + index_buffer: 428 + index_buffer: 262 + index_buffer: 421 + index_buffer: 208 + index_buffer: 201 + index_buffer: 32 + index_buffer: 418 + index_buffer: 421 + index_buffer: 262 + index_buffer: 194 + index_buffer: 32 + index_buffer: 201 + index_buffer: 327 + index_buffer: 358 + index_buffer: 294 + index_buffer: 98 + index_buffer: 64 + index_buffer: 129 + index_buffer: 331 + index_buffer: 294 + index_buffer: 358 + index_buffer: 102 + index_buffer: 129 + index_buffer: 64 + index_buffer: 367 + index_buffer: 435 + index_buffer: 416 + index_buffer: 138 + index_buffer: 192 + index_buffer: 215 + index_buffer: 433 + index_buffer: 416 + index_buffer: 435 + index_buffer: 213 + index_buffer: 215 + index_buffer: 192 + index_buffer: 455 + index_buffer: 439 + index_buffer: 289 + index_buffer: 235 + index_buffer: 59 + index_buffer: 219 + index_buffer: 392 + index_buffer: 289 + index_buffer: 439 + index_buffer: 166 + index_buffer: 219 + index_buffer: 59 + index_buffer: 328 + index_buffer: 462 + index_buffer: 326 + index_buffer: 99 + index_buffer: 97 + index_buffer: 242 + index_buffer: 370 + index_buffer: 326 + index_buffer: 462 + index_buffer: 141 + index_buffer: 242 + index_buffer: 97 + index_buffer: 326 + index_buffer: 370 + index_buffer: 2 + index_buffer: 97 + index_buffer: 2 + index_buffer: 141 + index_buffer: 94 + index_buffer: 2 + index_buffer: 370 + index_buffer: 94 + index_buffer: 141 + index_buffer: 2 + index_buffer: 460 + index_buffer: 455 + index_buffer: 305 + index_buffer: 240 + index_buffer: 75 + index_buffer: 235 + index_buffer: 289 + index_buffer: 305 + index_buffer: 455 + index_buffer: 59 + index_buffer: 235 + index_buffer: 75 + index_buffer: 448 + index_buffer: 339 + index_buffer: 449 + index_buffer: 228 + index_buffer: 229 + index_buffer: 110 + index_buffer: 254 + index_buffer: 449 + index_buffer: 339 + index_buffer: 24 + index_buffer: 110 + index_buffer: 229 + index_buffer: 261 + index_buffer: 446 + index_buffer: 255 + index_buffer: 31 + index_buffer: 25 + index_buffer: 226 + index_buffer: 359 + index_buffer: 255 + index_buffer: 446 + index_buffer: 130 + index_buffer: 226 + index_buffer: 25 + index_buffer: 449 + index_buffer: 254 + index_buffer: 450 + index_buffer: 229 + index_buffer: 230 + index_buffer: 24 + index_buffer: 253 + index_buffer: 450 + index_buffer: 254 + index_buffer: 23 + index_buffer: 24 + index_buffer: 230 + index_buffer: 450 + index_buffer: 253 + index_buffer: 451 + index_buffer: 230 + index_buffer: 231 + index_buffer: 23 + index_buffer: 252 + index_buffer: 451 + index_buffer: 253 + index_buffer: 22 + index_buffer: 23 + index_buffer: 231 + index_buffer: 451 + index_buffer: 252 + index_buffer: 452 + index_buffer: 231 + index_buffer: 232 + index_buffer: 22 + index_buffer: 256 + index_buffer: 452 + index_buffer: 252 + index_buffer: 26 + index_buffer: 22 + index_buffer: 232 + index_buffer: 256 + index_buffer: 341 + index_buffer: 452 + index_buffer: 26 + index_buffer: 232 + index_buffer: 112 + index_buffer: 453 + index_buffer: 452 + index_buffer: 341 + index_buffer: 233 + index_buffer: 112 + index_buffer: 232 + index_buffer: 413 + index_buffer: 464 + index_buffer: 414 + index_buffer: 189 + index_buffer: 190 + index_buffer: 244 + index_buffer: 463 + index_buffer: 414 + index_buffer: 464 + index_buffer: 243 + index_buffer: 244 + index_buffer: 190 + index_buffer: 441 + index_buffer: 413 + index_buffer: 286 + index_buffer: 221 + index_buffer: 56 + index_buffer: 189 + index_buffer: 414 + index_buffer: 286 + index_buffer: 413 + index_buffer: 190 + index_buffer: 189 + index_buffer: 56 + index_buffer: 441 + index_buffer: 286 + index_buffer: 442 + index_buffer: 221 + index_buffer: 222 + index_buffer: 56 + index_buffer: 258 + index_buffer: 442 + index_buffer: 286 + index_buffer: 28 + index_buffer: 56 + index_buffer: 222 + index_buffer: 442 + index_buffer: 258 + index_buffer: 443 + index_buffer: 222 + index_buffer: 223 + index_buffer: 28 + index_buffer: 257 + index_buffer: 443 + index_buffer: 258 + index_buffer: 27 + index_buffer: 28 + index_buffer: 223 + index_buffer: 444 + index_buffer: 443 + index_buffer: 259 + index_buffer: 224 + index_buffer: 29 + index_buffer: 223 + index_buffer: 257 + index_buffer: 259 + index_buffer: 443 + index_buffer: 27 + index_buffer: 223 + index_buffer: 29 + index_buffer: 259 + index_buffer: 260 + index_buffer: 444 + index_buffer: 29 + index_buffer: 224 + index_buffer: 30 + index_buffer: 445 + index_buffer: 444 + index_buffer: 260 + index_buffer: 225 + index_buffer: 30 + index_buffer: 224 + index_buffer: 260 + index_buffer: 467 + index_buffer: 445 + index_buffer: 30 + index_buffer: 225 + index_buffer: 247 + index_buffer: 342 + index_buffer: 445 + index_buffer: 467 + index_buffer: 113 + index_buffer: 247 + index_buffer: 225 + index_buffer: 250 + index_buffer: 309 + index_buffer: 458 + index_buffer: 20 + index_buffer: 238 + index_buffer: 79 + index_buffer: 459 + index_buffer: 458 + index_buffer: 309 + index_buffer: 239 + index_buffer: 79 + index_buffer: 238 + index_buffer: 290 + index_buffer: 305 + index_buffer: 392 + index_buffer: 60 + index_buffer: 166 + index_buffer: 75 + index_buffer: 289 + index_buffer: 392 + index_buffer: 305 + index_buffer: 59 + index_buffer: 75 + index_buffer: 166 + index_buffer: 460 + index_buffer: 305 + index_buffer: 328 + index_buffer: 240 + index_buffer: 99 + index_buffer: 75 + index_buffer: 290 + index_buffer: 328 + index_buffer: 305 + index_buffer: 60 + index_buffer: 75 + index_buffer: 99 + index_buffer: 376 + index_buffer: 433 + index_buffer: 401 + index_buffer: 147 + index_buffer: 177 + index_buffer: 213 + index_buffer: 435 + index_buffer: 401 + index_buffer: 433 + index_buffer: 215 + index_buffer: 213 + index_buffer: 177 + index_buffer: 250 + index_buffer: 290 + index_buffer: 309 + index_buffer: 20 + index_buffer: 79 + index_buffer: 60 + index_buffer: 392 + index_buffer: 309 + index_buffer: 290 + index_buffer: 166 + index_buffer: 60 + index_buffer: 79 + index_buffer: 411 + index_buffer: 416 + index_buffer: 376 + index_buffer: 187 + index_buffer: 147 + index_buffer: 192 + index_buffer: 433 + index_buffer: 376 + index_buffer: 416 + index_buffer: 213 + index_buffer: 192 + index_buffer: 147 + index_buffer: 341 + index_buffer: 463 + index_buffer: 453 + index_buffer: 112 + index_buffer: 233 + index_buffer: 243 + index_buffer: 464 + index_buffer: 453 + index_buffer: 463 + index_buffer: 244 + index_buffer: 243 + index_buffer: 233 + index_buffer: 453 + index_buffer: 464 + index_buffer: 357 + index_buffer: 233 + index_buffer: 128 + index_buffer: 244 + index_buffer: 465 + index_buffer: 357 + index_buffer: 464 + index_buffer: 245 + index_buffer: 244 + index_buffer: 128 + index_buffer: 412 + index_buffer: 343 + index_buffer: 465 + index_buffer: 188 + index_buffer: 245 + index_buffer: 114 + index_buffer: 357 + index_buffer: 465 + index_buffer: 343 + index_buffer: 128 + index_buffer: 114 + index_buffer: 245 + index_buffer: 437 + index_buffer: 343 + index_buffer: 399 + index_buffer: 217 + index_buffer: 174 + index_buffer: 114 + index_buffer: 412 + index_buffer: 399 + index_buffer: 343 + index_buffer: 188 + index_buffer: 114 + index_buffer: 174 + index_buffer: 363 + index_buffer: 440 + index_buffer: 360 + index_buffer: 134 + index_buffer: 131 + index_buffer: 220 + index_buffer: 344 + index_buffer: 360 + index_buffer: 440 + index_buffer: 115 + index_buffer: 220 + index_buffer: 131 + index_buffer: 456 + index_buffer: 420 + index_buffer: 399 + index_buffer: 236 + index_buffer: 174 + index_buffer: 198 + index_buffer: 437 + index_buffer: 399 + index_buffer: 420 + index_buffer: 217 + index_buffer: 198 + index_buffer: 174 + index_buffer: 456 + index_buffer: 363 + index_buffer: 420 + index_buffer: 236 + index_buffer: 198 + index_buffer: 134 + index_buffer: 360 + index_buffer: 420 + index_buffer: 363 + index_buffer: 131 + index_buffer: 134 + index_buffer: 198 + index_buffer: 361 + index_buffer: 401 + index_buffer: 288 + index_buffer: 132 + index_buffer: 58 + index_buffer: 177 + index_buffer: 435 + index_buffer: 288 + index_buffer: 401 + index_buffer: 215 + index_buffer: 177 + index_buffer: 58 + index_buffer: 353 + index_buffer: 265 + index_buffer: 383 + index_buffer: 124 + index_buffer: 156 + index_buffer: 35 + index_buffer: 372 + index_buffer: 383 + index_buffer: 265 + index_buffer: 143 + index_buffer: 35 + index_buffer: 156 + index_buffer: 255 + index_buffer: 249 + index_buffer: 339 + index_buffer: 25 + index_buffer: 110 + index_buffer: 7 + index_buffer: 390 + index_buffer: 339 + index_buffer: 249 + index_buffer: 163 + index_buffer: 7 + index_buffer: 110 + index_buffer: 261 + index_buffer: 255 + index_buffer: 448 + index_buffer: 31 + index_buffer: 228 + index_buffer: 25 + index_buffer: 339 + index_buffer: 448 + index_buffer: 255 + index_buffer: 110 + index_buffer: 25 + index_buffer: 228 + index_buffer: 14 + index_buffer: 317 + index_buffer: 13 + index_buffer: 14 + index_buffer: 13 + index_buffer: 87 + index_buffer: 312 + index_buffer: 13 + index_buffer: 317 + index_buffer: 82 + index_buffer: 87 + index_buffer: 13 + index_buffer: 317 + index_buffer: 402 + index_buffer: 312 + index_buffer: 87 + index_buffer: 82 + index_buffer: 178 + index_buffer: 311 + index_buffer: 312 + index_buffer: 402 + index_buffer: 81 + index_buffer: 178 + index_buffer: 82 + index_buffer: 402 + index_buffer: 318 + index_buffer: 311 + index_buffer: 178 + index_buffer: 81 + index_buffer: 88 + index_buffer: 310 + index_buffer: 311 + index_buffer: 318 + index_buffer: 80 + index_buffer: 88 + index_buffer: 81 + index_buffer: 318 + index_buffer: 324 + index_buffer: 310 + index_buffer: 88 + index_buffer: 80 + index_buffer: 95 + index_buffer: 415 + index_buffer: 310 + index_buffer: 324 + index_buffer: 191 + index_buffer: 95 + index_buffer: 80 +} diff --git a/mediapipe/modules/face_geometry/effect_renderer_calculator.cc b/mediapipe/modules/face_geometry/effect_renderer_calculator.cc new file mode 100644 index 0000000..f353b8f --- /dev/null +++ b/mediapipe/modules/face_geometry/effect_renderer_calculator.cc @@ -0,0 +1,284 @@ +// Copyright 2020 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include + +#include "absl/types/optional.h" +#include "mediapipe/framework/calculator_framework.h" +#include "mediapipe/framework/formats/image_frame.h" +#include "mediapipe/framework/formats/image_frame_opencv.h" +#include "mediapipe/framework/port/opencv_core_inc.h" // NOTYPO +#include "mediapipe/framework/port/opencv_imgcodecs_inc.h" // NOTYPO +#include "mediapipe/framework/port/opencv_imgproc_inc.h" // NOTYPO +#include "mediapipe/framework/port/ret_check.h" +#include "mediapipe/framework/port/status.h" +#include "mediapipe/framework/port/status_macros.h" +#include "mediapipe/framework/port/statusor.h" +#include "mediapipe/gpu/gl_calculator_helper.h" +#include "mediapipe/gpu/gpu_buffer.h" +#include "mediapipe/modules/face_geometry/effect_renderer_calculator.pb.h" +#include "mediapipe/modules/face_geometry/libs/effect_renderer.h" +#include "mediapipe/modules/face_geometry/libs/validation_utils.h" +#include "mediapipe/modules/face_geometry/protos/environment.pb.h" +#include "mediapipe/modules/face_geometry/protos/face_geometry.pb.h" +#include "mediapipe/modules/face_geometry/protos/mesh_3d.pb.h" +#include "mediapipe/util/resource_util.h" + +namespace mediapipe { +namespace { + +static constexpr char kEnvironmentTag[] = "ENVIRONMENT"; +static constexpr char kImageGpuTag[] = "IMAGE_GPU"; +static constexpr char kMultiFaceGeometryTag[] = "MULTI_FACE_GEOMETRY"; + +// A calculator that renders a visual effect for multiple faces. +// +// Inputs: +// IMAGE_GPU (`GpuBuffer`, required): +// A buffer containing input image. +// +// MULTI_FACE_GEOMETRY (`std::vector`, optional): +// A vector of face geometry data. +// +// If absent, the input GPU buffer is copied over into the output GPU buffer +// without any effect being rendered. +// +// Input side packets: +// ENVIRONMENT (`face_geometry::Environment`, required) +// Describes an environment; includes the camera frame origin point location +// as well as virtual camera parameters. +// +// Output: +// IMAGE_GPU (`GpuBuffer`, required): +// A buffer with a visual effect being rendered for multiple faces. +// +// Options: +// effect_texture_path (`string`, required): +// Defines a path for the visual effect texture file. The effect texture is +// later rendered on top of the effect mesh. +// +// The texture file format must be supported by the OpenCV image decoder. It +// must also define either an RGB or an RGBA texture. +// +// effect_mesh_3d_path (`string`, optional): +// Defines a path for the visual effect mesh 3D file. The effect mesh is +// later "attached" to the face and is driven by the face pose +// transformation matrix. +// +// The mesh 3D file format must be the binary `face_geometry.Mesh3d` proto. +// +// If is not present, the runtime face mesh will be used as the effect mesh +// - this mode is handy for facepaint effects. +// +class EffectRendererCalculator : public CalculatorBase { + public: + static absl::Status GetContract(CalculatorContract* cc) { + MP_RETURN_IF_ERROR(mediapipe::GlCalculatorHelper::UpdateContract(cc)) + << "Failed to update contract for the GPU helper!"; + + cc->InputSidePackets() + .Tag(kEnvironmentTag) + .Set(); + cc->Inputs().Tag(kImageGpuTag).Set(); + cc->Inputs() + .Tag(kMultiFaceGeometryTag) + .Set>(); + cc->Outputs().Tag(kImageGpuTag).Set(); + + return mediapipe::GlCalculatorHelper::UpdateContract(cc); + } + + absl::Status Open(CalculatorContext* cc) override { + cc->SetOffset(mediapipe::TimestampDiff(0)); + + MP_RETURN_IF_ERROR(gpu_helper_.Open(cc)) + << "Failed to open the GPU helper!"; + return gpu_helper_.RunInGlContext([&]() -> absl::Status { + const auto& options = + cc->Options(); + + const auto& environment = cc->InputSidePackets() + .Tag(kEnvironmentTag) + .Get(); + + MP_RETURN_IF_ERROR(face_geometry::ValidateEnvironment(environment)) + << "Invalid environment!"; + + absl::optional effect_mesh_3d; + if (options.has_effect_mesh_3d_path()) { + ASSIGN_OR_RETURN(effect_mesh_3d, + ReadMesh3dFromFile(options.effect_mesh_3d_path()), + _ << "Failed to read the effect 3D mesh from file!"); + + MP_RETURN_IF_ERROR(face_geometry::ValidateMesh3d(*effect_mesh_3d)) + << "Invalid effect 3D mesh!"; + } + + ASSIGN_OR_RETURN(ImageFrame effect_texture, + ReadTextureFromFile(options.effect_texture_path()), + _ << "Failed to read the effect texture from file!"); + + ASSIGN_OR_RETURN(effect_renderer_, + CreateEffectRenderer(environment, effect_mesh_3d, + std::move(effect_texture)), + _ << "Failed to create the effect renderer!"); + + return absl::OkStatus(); + }); + } + + absl::Status Process(CalculatorContext* cc) override { + // The `IMAGE_GPU` stream is required to have a non-empty packet. In case + // this requirement is not met, there's nothing to be processed at the + // current timestamp. + if (cc->Inputs().Tag(kImageGpuTag).IsEmpty()) { + return absl::OkStatus(); + } + + return gpu_helper_.RunInGlContext([this, cc]() -> absl::Status { + const auto& input_gpu_buffer = + cc->Inputs().Tag(kImageGpuTag).Get(); + + GlTexture input_gl_texture = + gpu_helper_.CreateSourceTexture(input_gpu_buffer); + + GlTexture output_gl_texture = gpu_helper_.CreateDestinationTexture( + input_gl_texture.width(), input_gl_texture.height()); + + std::vector empty_multi_face_geometry; + const auto& multi_face_geometry = + cc->Inputs().Tag(kMultiFaceGeometryTag).IsEmpty() + ? empty_multi_face_geometry + : cc->Inputs() + .Tag(kMultiFaceGeometryTag) + .Get>(); + + // Validate input multi face geometry data. + for (const face_geometry::FaceGeometry& face_geometry : + multi_face_geometry) { + MP_RETURN_IF_ERROR(face_geometry::ValidateFaceGeometry(face_geometry)) + << "Invalid face geometry!"; + } + + MP_RETURN_IF_ERROR(effect_renderer_->RenderEffect( + multi_face_geometry, input_gl_texture.width(), + input_gl_texture.height(), input_gl_texture.target(), + input_gl_texture.name(), output_gl_texture.target(), + output_gl_texture.name())) + << "Failed to render the effect!"; + + std::unique_ptr output_gpu_buffer = + output_gl_texture.GetFrame(); + + cc->Outputs() + .Tag(kImageGpuTag) + .AddPacket(mediapipe::Adopt(output_gpu_buffer.release()) + .At(cc->InputTimestamp())); + + output_gl_texture.Release(); + input_gl_texture.Release(); + + return absl::OkStatus(); + }); + } + + ~EffectRendererCalculator() { + gpu_helper_.RunInGlContext([this]() { effect_renderer_.reset(); }); + } + + private: + static absl::StatusOr ReadTextureFromFile( + const std::string& texture_path) { + ASSIGN_OR_RETURN(std::string texture_blob, + ReadContentBlobFromFile(texture_path), + _ << "Failed to read texture blob from file!"); + + // Use OpenCV image decoding functionality to finish reading the texture. + std::vector texture_blob_vector(texture_blob.begin(), + texture_blob.end()); + cv::Mat decoded_mat = + cv::imdecode(texture_blob_vector, cv::IMREAD_UNCHANGED); + + RET_CHECK(decoded_mat.type() == CV_8UC3 || decoded_mat.type() == CV_8UC4) + << "Texture must have `char` as the underlying type and " + "must have either 3 or 4 channels!"; + + ImageFormat::Format image_format = ImageFormat::UNKNOWN; + cv::Mat output_mat; + switch (decoded_mat.channels()) { + case 3: + image_format = ImageFormat::SRGB; + cv::cvtColor(decoded_mat, output_mat, cv::COLOR_BGR2RGB); + break; + + case 4: + image_format = ImageFormat::SRGBA; + cv::cvtColor(decoded_mat, output_mat, cv::COLOR_BGRA2RGBA); + break; + + default: + RET_CHECK_FAIL() + << "Unexpected number of channels; expected 3 or 4, got " + << decoded_mat.channels() << "!"; + } + + ImageFrame output_image_frame(image_format, output_mat.size().width, + output_mat.size().height, + ImageFrame::kGlDefaultAlignmentBoundary); + + output_mat.copyTo(formats::MatView(&output_image_frame)); + + return output_image_frame; + } + + static absl::StatusOr ReadMesh3dFromFile( + const std::string& mesh_3d_path) { + ASSIGN_OR_RETURN(std::string mesh_3d_blob, + ReadContentBlobFromFile(mesh_3d_path), + _ << "Failed to read mesh 3D blob from file!"); + + face_geometry::Mesh3d mesh_3d; + RET_CHECK(mesh_3d.ParseFromString(mesh_3d_blob)) + << "Failed to parse a mesh 3D proto from a binary blob!"; + + return mesh_3d; + } + + static absl::StatusOr ReadContentBlobFromFile( + const std::string& unresolved_path) { + ASSIGN_OR_RETURN(std::string resolved_path, + mediapipe::PathToResourceAsFile(unresolved_path), + _ << "Failed to resolve path! Path = " << unresolved_path); + + std::string content_blob; + MP_RETURN_IF_ERROR( + mediapipe::GetResourceContents(resolved_path, &content_blob)) + << "Failed to read content blob! Resolved path = " << resolved_path; + + return content_blob; + } + + mediapipe::GlCalculatorHelper gpu_helper_; + std::unique_ptr effect_renderer_; +}; + +} // namespace + +using FaceGeometryEffectRendererCalculator = EffectRendererCalculator; + +REGISTER_CALCULATOR(FaceGeometryEffectRendererCalculator); + +} // namespace mediapipe diff --git a/mediapipe/modules/face_geometry/effect_renderer_calculator.proto b/mediapipe/modules/face_geometry/effect_renderer_calculator.proto new file mode 100644 index 0000000..6c23903 --- /dev/null +++ b/mediapipe/modules/face_geometry/effect_renderer_calculator.proto @@ -0,0 +1,46 @@ +// Copyright 2020 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +syntax = "proto2"; + +package mediapipe; + +import "mediapipe/framework/calculator_options.proto"; + +message FaceGeometryEffectRendererCalculatorOptions { + extend CalculatorOptions { + optional FaceGeometryEffectRendererCalculatorOptions ext = 323693808; + } + + // Defines a path for the visual effect texture file. The effect texture is + // later rendered on top of the effect mesh. + // + // Please be aware about the difference between the CPU texture memory layout + // and the GPU texture sampler coordinate space. This renderer follows + // conventions discussed here: https://open.gl/textures + // + // The texture file format must be supported by the OpenCV image decoder. It + // must also define either an RGB or an RGBA texture. + optional string effect_texture_path = 1; + + // Defines a path for the visual effect mesh 3D file. The effect mesh is later + // "attached" to the face and is driven by the face pose transformation + // matrix. + // + // The mesh 3D file format must be the binary `face_system.Mesh3d` proto. + // + // If is not present, the runtime face mesh will be used as the effect mesh + // - this mode is handy for facepaint effects. + optional string effect_mesh_3d_path = 2; +} diff --git a/mediapipe/modules/face_geometry/env_generator_calculator.cc b/mediapipe/modules/face_geometry/env_generator_calculator.cc new file mode 100644 index 0000000..2e95a66 --- /dev/null +++ b/mediapipe/modules/face_geometry/env_generator_calculator.cc @@ -0,0 +1,81 @@ +// Copyright 2020 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "mediapipe/framework/calculator_framework.h" +#include "mediapipe/framework/port/status.h" +#include "mediapipe/framework/port/status_macros.h" +#include "mediapipe/modules/face_geometry/env_generator_calculator.pb.h" +#include "mediapipe/modules/face_geometry/libs/validation_utils.h" +#include "mediapipe/modules/face_geometry/protos/environment.pb.h" + +namespace mediapipe { +namespace { + +static constexpr char kEnvironmentTag[] = "ENVIRONMENT"; + +// A calculator that generates an environment, which describes a virtual scene. +// +// Output side packets: +// ENVIRONMENT (`face_geometry::Environment`, required) +// Describes an environment; includes the camera frame origin point location +// as well as virtual camera parameters. +// +// Options: +// environment (`face_geometry.Environment`, required): +// Defines an environment to be packed as the output side packet. +// +// Must be valid (for details, please refer to the proto message definition +// comments and/or `modules/face_geometry/libs/validation_utils.h/cc`) +// +class EnvGeneratorCalculator : public CalculatorBase { + public: + static absl::Status GetContract(CalculatorContract* cc) { + cc->OutputSidePackets() + .Tag(kEnvironmentTag) + .Set(); + return absl::OkStatus(); + } + + absl::Status Open(CalculatorContext* cc) override { + cc->SetOffset(mediapipe::TimestampDiff(0)); + + const face_geometry::Environment& environment = + cc->Options().environment(); + + MP_RETURN_IF_ERROR(face_geometry::ValidateEnvironment(environment)) + << "Invalid environment!"; + + cc->OutputSidePackets() + .Tag(kEnvironmentTag) + .Set(mediapipe::MakePacket(environment)); + + return absl::OkStatus(); + } + + absl::Status Process(CalculatorContext* cc) override { + return absl::OkStatus(); + } + + absl::Status Close(CalculatorContext* cc) override { + return absl::OkStatus(); + } +}; + +} // namespace + +using FaceGeometryEnvGeneratorCalculator = EnvGeneratorCalculator; + +REGISTER_CALCULATOR(FaceGeometryEnvGeneratorCalculator); + +} // namespace mediapipe diff --git a/mediapipe/modules/face_geometry/env_generator_calculator.proto b/mediapipe/modules/face_geometry/env_generator_calculator.proto new file mode 100644 index 0000000..dea2ae0 --- /dev/null +++ b/mediapipe/modules/face_geometry/env_generator_calculator.proto @@ -0,0 +1,32 @@ +// Copyright 2020 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +syntax = "proto2"; + +package mediapipe; + +import "mediapipe/framework/calculator_options.proto"; +import "mediapipe/modules/face_geometry/protos/environment.proto"; + +message FaceGeometryEnvGeneratorCalculatorOptions { + extend CalculatorOptions { + optional FaceGeometryEnvGeneratorCalculatorOptions ext = 323693810; + } + + // Defines an environment to be packed as the output side packet. + // + // Must be valid (for details, please refer to the proto message definition + // comments and/or `modules/face_geometry/libs/validation_utils.h/cc`) + optional face_geometry.Environment environment = 1; +} diff --git a/mediapipe/modules/face_geometry/face_geometry.pbtxt b/mediapipe/modules/face_geometry/face_geometry.pbtxt new file mode 100644 index 0000000..76228d4 --- /dev/null +++ b/mediapipe/modules/face_geometry/face_geometry.pbtxt @@ -0,0 +1,48 @@ +# MediaPipe graph to extract geometry from face landmarks for multiple faces. +# +# It is required that "geometry_pipeline_metadata.binarypb" is available at +# "mediapipe/modules/face_geometry/data/geometry_pipeline_metadata.binarypb" +# path during execution. +# +# This is a deprecated subgraph kept for backward-compatibility reasons. Please, +# be explicit and use the `FaceGeometryFromLandmarks` subgraph in the new code +# to enable the same runtime behaviour. + +type: "FaceGeometry" + +# The size of the input frame. The first element of the pair is the frame width; +# the other one is the frame height. +# +# The face landmarks should have been detected on a frame with the same +# ratio. If used as-is, the resulting face geometry visualization should be +# happening on a frame with the same ratio as well. +# +# (std::pair) +input_stream: "IMAGE_SIZE:image_size" + +# Collection of detected/predicted faces, each represented as a list of face +# landmarks. (std::vector) +input_stream: "MULTI_FACE_LANDMARKS:multi_face_landmarks" + +# Environment that describes the current virtual scene. +# (face_geometry::Environment) +input_side_packet: "ENVIRONMENT:environment" + +# A list of geometry data for each detected face. +# (std::vector) +output_stream: "MULTI_FACE_GEOMETRY:multi_face_geometry" + +# Extracts face geometry for multiple faces from a vector of face landmark +# lists. +node { + calculator: "FaceGeometryPipelineCalculator" + input_side_packet: "ENVIRONMENT:environment" + input_stream: "IMAGE_SIZE:image_size" + input_stream: "MULTI_FACE_LANDMARKS:multi_face_landmarks" + output_stream: "MULTI_FACE_GEOMETRY:multi_face_geometry" + options: { + [mediapipe.FaceGeometryPipelineCalculatorOptions.ext] { + metadata_path: "mediapipe/modules/face_geometry/data/geometry_pipeline_metadata.binarypb" + } + } +} diff --git a/mediapipe/modules/face_geometry/face_geometry_from_detection.pbtxt b/mediapipe/modules/face_geometry/face_geometry_from_detection.pbtxt new file mode 100644 index 0000000..f570286 --- /dev/null +++ b/mediapipe/modules/face_geometry/face_geometry_from_detection.pbtxt @@ -0,0 +1,87 @@ +# MediaPipe graph to extract geometry from face detection for multiple faces. +# +# It is required that "geometry_pipeline_metadata_detection.binarypb" is +# available at +# "mediapipe/modules/face_geometry/data/geometry_pipeline_metadata_detection.binarypb" +# path during execution. +# +# EXAMPLE: +# node { +# calculator: "FaceGeometryFromDetection" +# input_stream: "IMAGE_SIZE:image_size" +# input_stream: "MULTI_FACE_DETECTION:multi_face_detection" +# input_side_packet: "ENVIRONMENT:environment" +# output_stream: "MULTI_FACE_GEOMETRY:multi_face_geometry" +# } + +type: "FaceGeometryFromDetection" + +# The size of the input frame. The first element of the pair is the frame width; +# the other one is the frame height. +# +# The face landmarks should have been detected on a frame with the same +# ratio. If used as-is, the resulting face geometry visualization should be +# happening on a frame with the same ratio as well. +# +# (std::pair) +input_stream: "IMAGE_SIZE:image_size" + +# Collection of detected/predicted faces, each represented as a detection. +# (std::vector) +input_stream: "MULTI_FACE_DETECTION:multi_face_detection" + +# Environment that describes the current virtual scene. +# (face_geometry::Environment) +input_side_packet: "ENVIRONMENT:environment" + +# A list of geometry data for each detected face. +# (std::vector) +# +# NOTE: the triangular topology of the face meshes is only useful when derived +# from the 468 face landmarks, not from the 6 face detection landmarks +# (keypoints). The former don't cover the entire face and this mesh is +# defined here only to comply with the API. It should be considered as +# a placeholder and/or for debugging purposes. +# +# Use the face geometry derived from the face detection landmarks +# (keypoints) for the face pose transformation matrix, not the mesh. +output_stream: "MULTI_FACE_GEOMETRY:multi_face_geometry" + +# Begin iterating over a vector of the face detections. +node { + calculator: "BeginLoopDetectionCalculator" + input_stream: "ITERABLE:multi_face_detection" + output_stream: "ITEM:face_detection" + output_stream: "BATCH_END:detection_timestamp" +} + +# Extracts face detection keypoints as a normalized landmarks. +node { + calculator: "DetectionToLandmarksCalculator" + input_stream: "DETECTION:face_detection" + output_stream: "LANDMARKS:face_landmarks" +} + +# End iterating over a vector of the face detections and receive a vector of +# face landmark lists as a result. +node { + calculator: "EndLoopNormalizedLandmarkListVectorCalculator" + input_stream: "ITEM:face_landmarks" + input_stream: "BATCH_END:detection_timestamp" + output_stream: "ITERABLE:multi_face_landmarks" +} + +# Extracts face geometry for multiple faces from a vector of face detection +# landmark lists. +node { + calculator: "FaceGeometryPipelineCalculator" + input_side_packet: "ENVIRONMENT:environment" + input_stream: "IMAGE_SIZE:image_size" + input_stream: "MULTI_FACE_LANDMARKS:multi_face_landmarks" + output_stream: "MULTI_FACE_GEOMETRY:multi_face_geometry" + options: { + [mediapipe.FaceGeometryPipelineCalculatorOptions.ext] { + metadata_path: "mediapipe/modules/face_geometry/data/geometry_pipeline_metadata_detection.binarypb" + } + } +} diff --git a/mediapipe/modules/face_geometry/face_geometry_from_landmarks.pbtxt b/mediapipe/modules/face_geometry/face_geometry_from_landmarks.pbtxt new file mode 100644 index 0000000..3291476 --- /dev/null +++ b/mediapipe/modules/face_geometry/face_geometry_from_landmarks.pbtxt @@ -0,0 +1,54 @@ +# MediaPipe graph to extract geometry from face landmarks for multiple faces. +# +# It is required that "geometry_pipeline_metadata_from_landmark.binarypb" is +# available at +# "mediapipe/modules/face_geometry/data/geometry_pipeline_metadata_from_landmarks.binarypb" +# path during execution. +# +# EXAMPLE: +# node { +# calculator: "FaceGeometryFromLandmarks" +# input_stream: "IMAGE_SIZE:image_size" +# input_stream: "MULTI_FACE_LANDMARKS:multi_face_landmarks" +# input_side_packet: "ENVIRONMENT:environment" +# output_stream: "MULTI_FACE_GEOMETRY:multi_face_geometry" +# } + +type: "FaceGeometryFromLandmarks" + +# The size of the input frame. The first element of the pair is the frame width; +# the other one is the frame height. +# +# The face landmarks should have been detected on a frame with the same +# ratio. If used as-is, the resulting face geometry visualization should be +# happening on a frame with the same ratio as well. +# +# (std::pair) +input_stream: "IMAGE_SIZE:image_size" + +# Collection of detected/predicted faces, each represented as a list of face +# landmarks. (std::vector) +input_stream: "MULTI_FACE_LANDMARKS:multi_face_landmarks" + +# Environment that describes the current virtual scene. +# (face_geometry::Environment) +input_side_packet: "ENVIRONMENT:environment" + +# A list of geometry data for each detected face. +# (std::vector) +output_stream: "MULTI_FACE_GEOMETRY:multi_face_geometry" + +# Extracts face geometry for multiple faces from a vector of face landmark +# lists. +node { + calculator: "FaceGeometryPipelineCalculator" + input_side_packet: "ENVIRONMENT:environment" + input_stream: "IMAGE_SIZE:image_size" + input_stream: "MULTI_FACE_LANDMARKS:multi_face_landmarks" + output_stream: "MULTI_FACE_GEOMETRY:multi_face_geometry" + options: { + [mediapipe.FaceGeometryPipelineCalculatorOptions.ext] { + metadata_path: "mediapipe/modules/face_geometry/data/geometry_pipeline_metadata_landmarks.binarypb" + } + } +} diff --git a/mediapipe/modules/face_geometry/geometry_pipeline_calculator.cc b/mediapipe/modules/face_geometry/geometry_pipeline_calculator.cc new file mode 100644 index 0000000..87e710e --- /dev/null +++ b/mediapipe/modules/face_geometry/geometry_pipeline_calculator.cc @@ -0,0 +1,197 @@ +// Copyright 2020 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include +#include + +#include "absl/memory/memory.h" +#include "mediapipe/framework/calculator_framework.h" +#include "mediapipe/framework/formats/landmark.pb.h" +#include "mediapipe/framework/port/ret_check.h" +#include "mediapipe/framework/port/status.h" +#include "mediapipe/framework/port/status_macros.h" +#include "mediapipe/framework/port/statusor.h" +#include "mediapipe/modules/face_geometry/geometry_pipeline_calculator.pb.h" +#include "mediapipe/modules/face_geometry/libs/geometry_pipeline.h" +#include "mediapipe/modules/face_geometry/libs/validation_utils.h" +#include "mediapipe/modules/face_geometry/protos/environment.pb.h" +#include "mediapipe/modules/face_geometry/protos/face_geometry.pb.h" +#include "mediapipe/modules/face_geometry/protos/geometry_pipeline_metadata.pb.h" +#include "mediapipe/util/resource_util.h" + +namespace mediapipe { +namespace { + +static constexpr char kEnvironmentTag[] = "ENVIRONMENT"; +static constexpr char kImageSizeTag[] = "IMAGE_SIZE"; +static constexpr char kMultiFaceGeometryTag[] = "MULTI_FACE_GEOMETRY"; +static constexpr char kMultiFaceLandmarksTag[] = "MULTI_FACE_LANDMARKS"; + +// A calculator that renders a visual effect for multiple faces. +// +// Inputs: +// IMAGE_SIZE (`std::pair`, required): +// The size of the current frame. The first element of the pair is the frame +// width; the other one is the frame height. +// +// The face landmarks should have been detected on a frame with the same +// ratio. If used as-is, the resulting face geometry visualization should be +// happening on a frame with the same ratio as well. +// +// MULTI_FACE_LANDMARKS (`std::vector`, required): +// A vector of face landmark lists. +// +// Input side packets: +// ENVIRONMENT (`face_geometry::Environment`, required) +// Describes an environment; includes the camera frame origin point location +// as well as virtual camera parameters. +// +// Output: +// MULTI_FACE_GEOMETRY (`std::vector`, required): +// A vector of face geometry data. +// +// Options: +// metadata_path (`string`, optional): +// Defines a path for the geometry pipeline metadata file. +// +// The geometry pipeline metadata file format must be the binary +// `face_geometry.GeometryPipelineMetadata` proto. +// +class GeometryPipelineCalculator : public CalculatorBase { + public: + static absl::Status GetContract(CalculatorContract* cc) { + cc->InputSidePackets() + .Tag(kEnvironmentTag) + .Set(); + cc->Inputs().Tag(kImageSizeTag).Set>(); + cc->Inputs() + .Tag(kMultiFaceLandmarksTag) + .Set>(); + cc->Outputs() + .Tag(kMultiFaceGeometryTag) + .Set>(); + + return absl::OkStatus(); + } + + absl::Status Open(CalculatorContext* cc) override { + cc->SetOffset(mediapipe::TimestampDiff(0)); + + const auto& options = cc->Options(); + + ASSIGN_OR_RETURN( + face_geometry::GeometryPipelineMetadata metadata, + ReadMetadataFromFile(options.metadata_path()), + _ << "Failed to read the geometry pipeline metadata from file!"); + + MP_RETURN_IF_ERROR( + face_geometry::ValidateGeometryPipelineMetadata(metadata)) + << "Invalid geometry pipeline metadata!"; + + const face_geometry::Environment& environment = + cc->InputSidePackets() + .Tag(kEnvironmentTag) + .Get(); + + MP_RETURN_IF_ERROR(face_geometry::ValidateEnvironment(environment)) + << "Invalid environment!"; + + ASSIGN_OR_RETURN( + geometry_pipeline_, + face_geometry::CreateGeometryPipeline(environment, metadata), + _ << "Failed to create a geometry pipeline!"); + + return absl::OkStatus(); + } + + absl::Status Process(CalculatorContext* cc) override { + // Both the `IMAGE_SIZE` and the `MULTI_FACE_LANDMARKS` streams are required + // to have a non-empty packet. In case this requirement is not met, there's + // nothing to be processed at the current timestamp. + if (cc->Inputs().Tag(kImageSizeTag).IsEmpty() || + cc->Inputs().Tag(kMultiFaceLandmarksTag).IsEmpty()) { + return absl::OkStatus(); + } + + const auto& image_size = + cc->Inputs().Tag(kImageSizeTag).Get>(); + const auto& multi_face_landmarks = + cc->Inputs() + .Tag(kMultiFaceLandmarksTag) + .Get>(); + + auto multi_face_geometry = + absl::make_unique>(); + + ASSIGN_OR_RETURN( + *multi_face_geometry, + geometry_pipeline_->EstimateFaceGeometry( + multi_face_landmarks, // + /*frame_width*/ image_size.first, + /*frame_height*/ image_size.second), + _ << "Failed to estimate face geometry for multiple faces!"); + + cc->Outputs() + .Tag(kMultiFaceGeometryTag) + .AddPacket(mediapipe::Adopt>( + multi_face_geometry.release()) + .At(cc->InputTimestamp())); + + return absl::OkStatus(); + } + + absl::Status Close(CalculatorContext* cc) override { + return absl::OkStatus(); + } + + private: + static absl::StatusOr + ReadMetadataFromFile(const std::string& metadata_path) { + ASSIGN_OR_RETURN(std::string metadata_blob, + ReadContentBlobFromFile(metadata_path), + _ << "Failed to read a metadata blob from file!"); + + face_geometry::GeometryPipelineMetadata metadata; + RET_CHECK(metadata.ParseFromString(metadata_blob)) + << "Failed to parse a metadata proto from a binary blob!"; + + return metadata; + } + + static absl::StatusOr ReadContentBlobFromFile( + const std::string& unresolved_path) { + ASSIGN_OR_RETURN(std::string resolved_path, + mediapipe::PathToResourceAsFile(unresolved_path), + _ << "Failed to resolve path! Path = " << unresolved_path); + + std::string content_blob; + MP_RETURN_IF_ERROR( + mediapipe::GetResourceContents(resolved_path, &content_blob)) + << "Failed to read content blob! Resolved path = " << resolved_path; + + return content_blob; + } + + std::unique_ptr geometry_pipeline_; +}; + +} // namespace + +using FaceGeometryPipelineCalculator = GeometryPipelineCalculator; + +REGISTER_CALCULATOR(FaceGeometryPipelineCalculator); + +} // namespace mediapipe diff --git a/mediapipe/modules/face_geometry/geometry_pipeline_calculator.proto b/mediapipe/modules/face_geometry/geometry_pipeline_calculator.proto new file mode 100644 index 0000000..638bb45 --- /dev/null +++ b/mediapipe/modules/face_geometry/geometry_pipeline_calculator.proto @@ -0,0 +1,27 @@ +// Copyright 2020 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +syntax = "proto2"; + +package mediapipe; + +import "mediapipe/framework/calculator_options.proto"; + +message FaceGeometryPipelineCalculatorOptions { + extend CalculatorOptions { + optional FaceGeometryPipelineCalculatorOptions ext = 323693812; + } + + optional string metadata_path = 1; +} diff --git a/mediapipe/modules/face_geometry/libs/BUILD b/mediapipe/modules/face_geometry/libs/BUILD new file mode 100644 index 0000000..35dc451 --- /dev/null +++ b/mediapipe/modules/face_geometry/libs/BUILD @@ -0,0 +1,103 @@ +# Copyright 2020 The MediaPipe Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +licenses(["notice"]) + +package(default_visibility = ["//visibility:public"]) + +cc_library( + name = "effect_renderer", + srcs = ["effect_renderer.cc"], + hdrs = ["effect_renderer.h"], + deps = [ + ":mesh_3d_utils", + ":validation_utils", + "//mediapipe/framework/formats:image_format_cc_proto", + "//mediapipe/framework/formats:image_frame", + "//mediapipe/framework/formats:matrix_data_cc_proto", + "//mediapipe/framework/port:ret_check", + "//mediapipe/framework/port:status", + "//mediapipe/framework/port:statusor", + "//mediapipe/gpu:gl_base", + "//mediapipe/gpu:shader_util", + "//mediapipe/modules/face_geometry/protos:environment_cc_proto", + "//mediapipe/modules/face_geometry/protos:face_geometry_cc_proto", + "//mediapipe/modules/face_geometry/protos:mesh_3d_cc_proto", + "@com_google_absl//absl/memory", + "@com_google_absl//absl/types:optional", + ], +) + +cc_library( + name = "geometry_pipeline", + srcs = ["geometry_pipeline.cc"], + hdrs = ["geometry_pipeline.h"], + deps = [ + ":mesh_3d_utils", + ":procrustes_solver", + ":validation_utils", + "//mediapipe/framework/formats:landmark_cc_proto", + "//mediapipe/framework/formats:matrix", + "//mediapipe/framework/formats:matrix_data_cc_proto", + "//mediapipe/framework/port:ret_check", + "//mediapipe/framework/port:status", + "//mediapipe/framework/port:statusor", + "//mediapipe/modules/face_geometry/protos:environment_cc_proto", + "//mediapipe/modules/face_geometry/protos:face_geometry_cc_proto", + "//mediapipe/modules/face_geometry/protos:geometry_pipeline_metadata_cc_proto", + "//mediapipe/modules/face_geometry/protos:mesh_3d_cc_proto", + "@com_google_absl//absl/memory", + "@eigen_archive//:eigen3", + ], +) + +cc_library( + name = "mesh_3d_utils", + srcs = ["mesh_3d_utils.cc"], + hdrs = ["mesh_3d_utils.h"], + deps = [ + "//mediapipe/framework/port:ret_check", + "//mediapipe/framework/port:statusor", + "//mediapipe/modules/face_geometry/protos:mesh_3d_cc_proto", + ], +) + +cc_library( + name = "procrustes_solver", + srcs = ["procrustes_solver.cc"], + hdrs = ["procrustes_solver.h"], + deps = [ + "//mediapipe/framework/port:ret_check", + "//mediapipe/framework/port:status", + "//mediapipe/framework/port:statusor", + "@com_google_absl//absl/memory", + "@eigen_archive//:eigen3", + ], +) + +cc_library( + name = "validation_utils", + srcs = ["validation_utils.cc"], + hdrs = ["validation_utils.h"], + deps = [ + ":mesh_3d_utils", + "//mediapipe/framework/formats:matrix_data_cc_proto", + "//mediapipe/framework/port:ret_check", + "//mediapipe/framework/port:status", + "//mediapipe/modules/face_geometry/protos:environment_cc_proto", + "//mediapipe/modules/face_geometry/protos:face_geometry_cc_proto", + "//mediapipe/modules/face_geometry/protos:geometry_pipeline_metadata_cc_proto", + "//mediapipe/modules/face_geometry/protos:mesh_3d_cc_proto", + ], +) diff --git a/mediapipe/modules/face_geometry/libs/effect_renderer.cc b/mediapipe/modules/face_geometry/libs/effect_renderer.cc new file mode 100644 index 0000000..27a54e0 --- /dev/null +++ b/mediapipe/modules/face_geometry/libs/effect_renderer.cc @@ -0,0 +1,733 @@ +// Copyright 2020 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "mediapipe/modules/face_geometry/libs/effect_renderer.h" + +#include +#include +#include +#include +#include +#include + +#include "absl/memory/memory.h" +#include "absl/types/optional.h" +#include "mediapipe/framework/formats/image_format.pb.h" +#include "mediapipe/framework/formats/image_frame.h" +#include "mediapipe/framework/formats/matrix_data.pb.h" +#include "mediapipe/framework/port/ret_check.h" +#include "mediapipe/framework/port/status.h" +#include "mediapipe/framework/port/status_macros.h" +#include "mediapipe/framework/port/statusor.h" +#include "mediapipe/gpu/gl_base.h" +#include "mediapipe/gpu/shader_util.h" +#include "mediapipe/modules/face_geometry/libs/mesh_3d_utils.h" +#include "mediapipe/modules/face_geometry/libs/validation_utils.h" +#include "mediapipe/modules/face_geometry/protos/environment.pb.h" +#include "mediapipe/modules/face_geometry/protos/face_geometry.pb.h" +#include "mediapipe/modules/face_geometry/protos/mesh_3d.pb.h" + +namespace mediapipe::face_geometry { +namespace { + +struct RenderableMesh3d { + static absl::StatusOr CreateFromProtoMesh3d( + const Mesh3d& proto_mesh_3d) { + Mesh3d::VertexType vertex_type = proto_mesh_3d.vertex_type(); + + RenderableMesh3d renderable_mesh_3d; + renderable_mesh_3d.vertex_size = GetVertexSize(vertex_type); + ASSIGN_OR_RETURN( + renderable_mesh_3d.vertex_position_size, + GetVertexComponentSize(vertex_type, VertexComponent::POSITION), + _ << "Failed to get the position vertex size!"); + ASSIGN_OR_RETURN( + renderable_mesh_3d.tex_coord_position_size, + GetVertexComponentSize(vertex_type, VertexComponent::TEX_COORD), + _ << "Failed to get the tex coord vertex size!"); + ASSIGN_OR_RETURN( + renderable_mesh_3d.vertex_position_offset, + GetVertexComponentOffset(vertex_type, VertexComponent::POSITION), + _ << "Failed to get the position vertex offset!"); + ASSIGN_OR_RETURN( + renderable_mesh_3d.tex_coord_position_offset, + GetVertexComponentOffset(vertex_type, VertexComponent::TEX_COORD), + _ << "Failed to get the tex coord vertex offset!"); + + switch (proto_mesh_3d.primitive_type()) { + case Mesh3d::TRIANGLE: + renderable_mesh_3d.primitive_type = GL_TRIANGLES; + break; + + default: + RET_CHECK_FAIL() << "Only triangle primitive types are supported!"; + } + + renderable_mesh_3d.vertex_buffer.reserve( + proto_mesh_3d.vertex_buffer_size()); + for (float vertex_element : proto_mesh_3d.vertex_buffer()) { + renderable_mesh_3d.vertex_buffer.push_back(vertex_element); + } + + renderable_mesh_3d.index_buffer.reserve(proto_mesh_3d.index_buffer_size()); + for (uint32_t index_element : proto_mesh_3d.index_buffer()) { + RET_CHECK_LE(index_element, std::numeric_limits::max()) + << "Index buffer elements must fit into the `uint16` type in order " + "to be renderable!"; + + renderable_mesh_3d.index_buffer.push_back( + static_cast(index_element)); + } + + return renderable_mesh_3d; + } + + uint32_t vertex_size; + uint32_t vertex_position_size; + uint32_t tex_coord_position_size; + uint32_t vertex_position_offset; + uint32_t tex_coord_position_offset; + uint32_t primitive_type; + + std::vector vertex_buffer; + std::vector index_buffer; +}; + +class Texture { + public: + static absl::StatusOr> WrapExternalTexture( + GLuint handle, GLenum target, int width, int height) { + RET_CHECK(handle) << "External texture must have a non-null handle!"; + return absl::WrapUnique(new Texture(handle, target, width, height, + /*is_owned*/ false)); + } + + static absl::StatusOr> CreateFromImageFrame( + const ImageFrame& image_frame) { + RET_CHECK(image_frame.IsAligned(ImageFrame::kGlDefaultAlignmentBoundary)) + << "Image frame memory must be aligned for GL usage!"; + + RET_CHECK(image_frame.Width() > 0 && image_frame.Height() > 0) + << "Image frame must have positive dimensions!"; + + RET_CHECK(image_frame.Format() == ImageFormat::SRGB || + image_frame.Format() == ImageFormat::SRGBA) + << "Image frame format must be either SRGB or SRGBA!"; + + GLint image_format; + switch (image_frame.NumberOfChannels()) { + case 3: + image_format = GL_RGB; + break; + case 4: + image_format = GL_RGBA; + break; + default: + RET_CHECK_FAIL() + << "Unexpected number of channels; expected 3 or 4, got " + << image_frame.NumberOfChannels() << "!"; + } + + GLuint handle; + glGenTextures(1, &handle); + RET_CHECK(handle) << "Failed to initialize an OpenGL texture!"; + + glBindTexture(GL_TEXTURE_2D, handle); + glTexParameteri(GL_TEXTURE_2D, GL_NEAREST_MIPMAP_LINEAR, GL_LINEAR); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); + glTexImage2D(GL_TEXTURE_2D, 0, image_format, image_frame.Width(), + image_frame.Height(), 0, image_format, GL_UNSIGNED_BYTE, + image_frame.PixelData()); + glGenerateMipmap(GL_TEXTURE_2D); + glBindTexture(GL_TEXTURE_2D, 0); + + return absl::WrapUnique(new Texture( + handle, GL_TEXTURE_2D, image_frame.Width(), image_frame.Height(), + /*is_owned*/ true)); + } + + ~Texture() { + if (is_owned_) { + glDeleteProgram(handle_); + } + } + + GLuint handle() const { return handle_; } + GLenum target() const { return target_; } + int width() const { return width_; } + int height() const { return height_; } + + private: + Texture(GLuint handle, GLenum target, int width, int height, bool is_owned) + : handle_(handle), + target_(target), + width_(width), + height_(height), + is_owned_(is_owned) {} + + GLuint handle_; + GLenum target_; + int width_; + int height_; + bool is_owned_; +}; + +class RenderTarget { + public: + static absl::StatusOr> Create() { + GLuint framebuffer_handle; + glGenFramebuffers(1, &framebuffer_handle); + RET_CHECK(framebuffer_handle) + << "Failed to initialize an OpenGL framebuffer!"; + + return absl::WrapUnique(new RenderTarget(framebuffer_handle)); + } + + ~RenderTarget() { + glDeleteFramebuffers(1, &framebuffer_handle_); + // Renderbuffer handle might have never been created if this render target + // is destroyed before `SetColorbuffer()` is called for the first time. + if (renderbuffer_handle_) { + glDeleteFramebuffers(1, &renderbuffer_handle_); + } + } + + absl::Status SetColorbuffer(const Texture& colorbuffer_texture) { + glBindFramebuffer(GL_FRAMEBUFFER, framebuffer_handle_); + glViewport(0, 0, colorbuffer_texture.width(), colorbuffer_texture.height()); + + glActiveTexture(GL_TEXTURE0); + glBindTexture(colorbuffer_texture.target(), colorbuffer_texture.handle()); + glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, + colorbuffer_texture.target(), + colorbuffer_texture.handle(), + /*level*/ 0); + glBindTexture(colorbuffer_texture.target(), 0); + + // If the existing depth buffer has different dimensions, delete it. + if (renderbuffer_handle_ && + (viewport_width_ != colorbuffer_texture.width() || + viewport_height_ != colorbuffer_texture.height())) { + glDeleteRenderbuffers(1, &renderbuffer_handle_); + renderbuffer_handle_ = 0; + } + + // If there is no depth buffer, create one. + if (!renderbuffer_handle_) { + glGenRenderbuffers(1, &renderbuffer_handle_); + RET_CHECK(renderbuffer_handle_) + << "Failed to initialize an OpenGL renderbuffer!"; + glBindRenderbuffer(GL_RENDERBUFFER, renderbuffer_handle_); + glRenderbufferStorage(GL_RENDERBUFFER, GL_DEPTH_COMPONENT16, + colorbuffer_texture.width(), + colorbuffer_texture.height()); + glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, + GL_RENDERBUFFER, renderbuffer_handle_); + glBindRenderbuffer(GL_RENDERBUFFER, 0); + } + + viewport_width_ = colorbuffer_texture.width(); + viewport_height_ = colorbuffer_texture.height(); + + glBindFramebuffer(GL_FRAMEBUFFER, 0); + glFlush(); + + return absl::OkStatus(); + } + + void Bind() const { + glBindFramebuffer(GL_FRAMEBUFFER, framebuffer_handle_); + glViewport(0, 0, viewport_width_, viewport_height_); + } + + void Unbind() const { glBindFramebuffer(GL_FRAMEBUFFER, 0); } + + void Clear() const { + Bind(); + glEnable(GL_DEPTH_TEST); + glDepthMask(GL_TRUE); + + glClearColor(0.f, 0.f, 0.f, 0.f); + glClearDepthf(1.f); + glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT); + + glDepthMask(GL_FALSE); + glDisable(GL_DEPTH_TEST); + + Unbind(); + glFlush(); + } + + private: + explicit RenderTarget(GLuint framebuffer_handle) + : framebuffer_handle_(framebuffer_handle), + renderbuffer_handle_(0), + viewport_width_(-1), + viewport_height_(-1) {} + + GLuint framebuffer_handle_; + GLuint renderbuffer_handle_; + int viewport_width_; + int viewport_height_; +}; + +class Renderer { + public: + enum class RenderMode { OPAQUE, OVERDRAW, OCCLUSION }; + + static absl::StatusOr> Create() { + static const GLint kAttrLocation[NUM_ATTRIBUTES] = { + ATTRIB_VERTEX, + ATTRIB_TEXTURE_POSITION, + }; + static const GLchar* kAttrName[NUM_ATTRIBUTES] = { + "position", + "tex_coord", + }; + + static const GLchar* kVertSrc = R"( + uniform mat4 projection_mat; + uniform mat4 model_mat; + + attribute vec4 position; + attribute vec4 tex_coord; + + varying vec2 v_tex_coord; + + void main() { + v_tex_coord = tex_coord.xy; + gl_Position = projection_mat * model_mat * position; + } + )"; + + static const GLchar* kFragSrc = R"( + precision mediump float; + + varying vec2 v_tex_coord; + uniform sampler2D texture; + + void main() { + gl_FragColor = texture2D(texture, v_tex_coord); + } + )"; + + GLuint program_handle = 0; + GlhCreateProgram(kVertSrc, kFragSrc, NUM_ATTRIBUTES, + (const GLchar**)&kAttrName[0], kAttrLocation, + &program_handle); + RET_CHECK(program_handle) << "Problem initializing the texture program!"; + GLint projection_mat_uniform = + glGetUniformLocation(program_handle, "projection_mat"); + GLint model_mat_uniform = glGetUniformLocation(program_handle, "model_mat"); + GLint texture_uniform = glGetUniformLocation(program_handle, "texture"); + + RET_CHECK_NE(projection_mat_uniform, -1) + << "Failed to find `projection_mat` uniform!"; + RET_CHECK_NE(model_mat_uniform, -1) + << "Failed to find `model_mat` uniform!"; + RET_CHECK_NE(texture_uniform, -1) << "Failed to find `texture` uniform!"; + + return absl::WrapUnique(new Renderer(program_handle, projection_mat_uniform, + model_mat_uniform, texture_uniform)); + } + + ~Renderer() { glDeleteProgram(program_handle_); } + + absl::Status Render(const RenderTarget& render_target, const Texture& texture, + const RenderableMesh3d& mesh_3d, + const std::array& projection_mat, + const std::array& model_mat, + RenderMode render_mode) const { + glUseProgram(program_handle_); + // Set up the GL state. + glEnable(GL_BLEND); + glFrontFace(GL_CCW); + switch (render_mode) { + case RenderMode::OPAQUE: + glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA); + glEnable(GL_DEPTH_TEST); + glDepthMask(GL_TRUE); + break; + + case RenderMode::OVERDRAW: + glBlendFunc(GL_ONE, GL_ZERO); + glDisable(GL_DEPTH_TEST); + glDepthMask(GL_FALSE); + break; + + case RenderMode::OCCLUSION: + glBlendFunc(GL_ZERO, GL_ONE); + glEnable(GL_DEPTH_TEST); + glDepthMask(GL_TRUE); + break; + } + + render_target.Bind(); + // Set up vertex attributes. + glVertexAttribPointer( + ATTRIB_VERTEX, mesh_3d.vertex_position_size, GL_FLOAT, 0, + mesh_3d.vertex_size * sizeof(float), + mesh_3d.vertex_buffer.data() + mesh_3d.vertex_position_offset); + glEnableVertexAttribArray(ATTRIB_VERTEX); + glVertexAttribPointer( + ATTRIB_TEXTURE_POSITION, mesh_3d.tex_coord_position_size, GL_FLOAT, 0, + mesh_3d.vertex_size * sizeof(float), + mesh_3d.vertex_buffer.data() + mesh_3d.tex_coord_position_offset); + glEnableVertexAttribArray(ATTRIB_TEXTURE_POSITION); + // Set up textures and uniforms. + glActiveTexture(GL_TEXTURE1); + glBindTexture(texture.target(), texture.handle()); + glUniform1i(texture_uniform_, 1); + glUniformMatrix4fv(projection_mat_uniform_, 1, GL_FALSE, + projection_mat.data()); + glUniformMatrix4fv(model_mat_uniform_, 1, GL_FALSE, model_mat.data()); + // Draw the mesh. + glDrawElements(mesh_3d.primitive_type, mesh_3d.index_buffer.size(), + GL_UNSIGNED_SHORT, mesh_3d.index_buffer.data()); + // Unbind textures and uniforms. + glActiveTexture(GL_TEXTURE1); + glBindTexture(texture.target(), 0); + render_target.Unbind(); + // Unbind vertex attributes. + glDisableVertexAttribArray(ATTRIB_TEXTURE_POSITION); + glDisableVertexAttribArray(ATTRIB_VERTEX); + // Restore the GL state. + glDepthMask(GL_FALSE); + glDisable(GL_DEPTH_TEST); + glDisable(GL_BLEND); + + glUseProgram(0); + glFlush(); + + return absl::OkStatus(); + } + + private: + enum { ATTRIB_VERTEX, ATTRIB_TEXTURE_POSITION, NUM_ATTRIBUTES }; + + Renderer(GLuint program_handle, GLint projection_mat_uniform, + GLint model_mat_uniform, GLint texture_uniform) + : program_handle_(program_handle), + projection_mat_uniform_(projection_mat_uniform), + model_mat_uniform_(model_mat_uniform), + texture_uniform_(texture_uniform) {} + + GLuint program_handle_; + GLint projection_mat_uniform_; + GLint model_mat_uniform_; + GLint texture_uniform_; +}; + +class EffectRendererImpl : public EffectRenderer { + public: + EffectRendererImpl( + const Environment& environment, + std::unique_ptr render_target, + std::unique_ptr renderer, + RenderableMesh3d&& renderable_quad_mesh_3d, + absl::optional&& renderable_effect_mesh_3d, + std::unique_ptr empty_color_texture, + std::unique_ptr effect_texture) + : environment_(environment), + render_target_(std::move(render_target)), + renderer_(std::move(renderer)), + renderable_quad_mesh_3d_(std::move(renderable_quad_mesh_3d)), + renderable_effect_mesh_3d_(std::move(renderable_effect_mesh_3d)), + empty_color_texture_(std::move(empty_color_texture)), + effect_texture_(std::move(effect_texture)), + identity_matrix_(Create4x4IdentityMatrix()) {} + + absl::Status RenderEffect( + const std::vector& multi_face_geometry, + int frame_width, // + int frame_height, // + GLenum src_texture_target, // + GLuint src_texture_name, // + GLenum dst_texture_target, // + GLuint dst_texture_name) { + // Validate input arguments. + MP_RETURN_IF_ERROR(ValidateFrameDimensions(frame_width, frame_height)) + << "Invalid frame dimensions!"; + RET_CHECK(src_texture_name > 0 && dst_texture_name > 0) + << "Both source and destination texture names must be non-null!"; + RET_CHECK_NE(src_texture_name, dst_texture_name) + << "Source and destination texture names must be different!"; + + // Validate all input face geometries. + for (const FaceGeometry& face_geometry : multi_face_geometry) { + MP_RETURN_IF_ERROR(ValidateFaceGeometry(face_geometry)) + << "Invalid face geometry!"; + } + + // Wrap both source and destination textures. + ASSIGN_OR_RETURN( + std::unique_ptr src_texture, + Texture::WrapExternalTexture(src_texture_name, src_texture_target, + frame_width, frame_height), + _ << "Failed to wrap the external source texture"); + ASSIGN_OR_RETURN( + std::unique_ptr dst_texture, + Texture::WrapExternalTexture(dst_texture_name, dst_texture_target, + frame_width, frame_height), + _ << "Failed to wrap the external destination texture"); + + // Set the destination texture as the color buffer. Then, clear both the + // color and the depth buffers for the render target. + MP_RETURN_IF_ERROR(render_target_->SetColorbuffer(*dst_texture)) + << "Failed to set the destination texture as the colorbuffer!"; + render_target_->Clear(); + + // Render the source texture on top of the quad mesh (i.e. make a copy) + // into the render target. + MP_RETURN_IF_ERROR(renderer_->Render( + *render_target_, *src_texture, renderable_quad_mesh_3d_, + identity_matrix_, identity_matrix_, Renderer::RenderMode::OVERDRAW)) + << "Failed to render the source texture on top of the quad mesh!"; + + // Extract pose transform matrices and meshes from the face geometry data; + const int num_faces = multi_face_geometry.size(); + + std::vector> face_pose_transform_matrices(num_faces); + std::vector renderable_face_meshes(num_faces); + for (int i = 0; i < num_faces; ++i) { + const FaceGeometry& face_geometry = multi_face_geometry[i]; + + // Extract the face pose transformation matrix. + ASSIGN_OR_RETURN( + face_pose_transform_matrices[i], + Convert4x4MatrixDataToArrayFormat( + face_geometry.pose_transform_matrix()), + _ << "Failed to extract the face pose transformation matrix!"); + + // Extract the face mesh as a renderable. + ASSIGN_OR_RETURN( + renderable_face_meshes[i], + RenderableMesh3d::CreateFromProtoMesh3d(face_geometry.mesh()), + _ << "Failed to extract a renderable face mesh!"); + } + + // Create a perspective matrix using the frame aspect ratio. + std::array perspective_matrix = CreatePerspectiveMatrix( + /*aspect_ratio*/ static_cast(frame_width) / frame_height); + + // Render a face mesh occluder for each face. + for (int i = 0; i < num_faces; ++i) { + const std::array& face_pose_transform_matrix = + face_pose_transform_matrices[i]; + const RenderableMesh3d& renderable_face_mesh = renderable_face_meshes[i]; + + // Render the face mesh using the empty color texture, i.e. the face + // mesh occluder. + // + // For occlusion, the pose transformation is moved ~1mm away from camera + // in order to allow the face mesh texture to be rendered without + // failing the depth test. + std::array occlusion_face_pose_transform_matrix = + face_pose_transform_matrix; + occlusion_face_pose_transform_matrix[14] -= 0.1f; // ~ 1mm + MP_RETURN_IF_ERROR(renderer_->Render( + *render_target_, *empty_color_texture_, renderable_face_mesh, + perspective_matrix, occlusion_face_pose_transform_matrix, + Renderer::RenderMode::OCCLUSION)) + << "Failed to render the face mesh occluder!"; + } + + // Render the main face mesh effect component for each face. + for (int i = 0; i < num_faces; ++i) { + const std::array& face_pose_transform_matrix = + face_pose_transform_matrices[i]; + + // If there is no effect 3D mesh provided, then the face mesh itself is + // used as a topology for rendering (for example, this can be used for + // facepaint effects or AR makeup). + const RenderableMesh3d& main_effect_mesh_3d = + renderable_effect_mesh_3d_ ? *renderable_effect_mesh_3d_ + : renderable_face_meshes[i]; + + MP_RETURN_IF_ERROR(renderer_->Render( + *render_target_, *effect_texture_, main_effect_mesh_3d, + perspective_matrix, face_pose_transform_matrix, + Renderer::RenderMode::OPAQUE)) + << "Failed to render the main effect pass!"; + } + + // At this point in the code, the destination texture must contain the + // correctly renderer effect, so we should just return. + return absl::OkStatus(); + } + + private: + std::array CreatePerspectiveMatrix(float aspect_ratio) const { + static constexpr float kDegreesToRadians = M_PI / 180.f; + + std::array perspective_matrix; + perspective_matrix.fill(0.f); + + const auto& env_camera = environment_.perspective_camera(); + // Standard perspective projection matrix calculations. + const float f = 1.0f / std::tan(kDegreesToRadians * + env_camera.vertical_fov_degrees() / 2.f); + + const float denom = 1.0f / (env_camera.near() - env_camera.far()); + perspective_matrix[0] = f / aspect_ratio; + perspective_matrix[5] = f; + perspective_matrix[10] = (env_camera.near() + env_camera.far()) * denom; + perspective_matrix[11] = -1.f; + perspective_matrix[14] = 2.f * env_camera.far() * env_camera.near() * denom; + + // If the environment's origin point location is in the top left corner, + // then skip additional flip along Y-axis is required to render correctly. + if (environment_.origin_point_location() == + OriginPointLocation::TOP_LEFT_CORNER) { + perspective_matrix[5] *= -1.f; + } + + return perspective_matrix; + } + + static std::array Create4x4IdentityMatrix() { + return {1.f, 0.f, 0.f, 0.f, // + 0.f, 1.f, 0.f, 0.f, // + 0.f, 0.f, 1.f, 0.f, // + 0.f, 0.f, 0.f, 1.f}; + } + + static absl::StatusOr> + Convert4x4MatrixDataToArrayFormat(const MatrixData& matrix_data) { + RET_CHECK(matrix_data.rows() == 4 && // + matrix_data.cols() == 4 && // + matrix_data.packed_data_size() == 16) + << "The matrix data must define a 4x4 matrix!"; + + std::array matrix_array; + for (int i = 0; i < 16; i++) { + matrix_array[i] = matrix_data.packed_data(i); + } + + // Matrix array must be in the OpenGL-friendly column-major order. If + // `matrix_data` is in the row-major order, then transpose. + if (matrix_data.layout() == MatrixData::ROW_MAJOR) { + std::swap(matrix_array[1], matrix_array[4]); + std::swap(matrix_array[2], matrix_array[8]); + std::swap(matrix_array[3], matrix_array[12]); + std::swap(matrix_array[6], matrix_array[9]); + std::swap(matrix_array[7], matrix_array[13]); + std::swap(matrix_array[11], matrix_array[14]); + } + + return matrix_array; + } + + Environment environment_; + + std::unique_ptr render_target_; + std::unique_ptr renderer_; + + RenderableMesh3d renderable_quad_mesh_3d_; + absl::optional renderable_effect_mesh_3d_; + + std::unique_ptr empty_color_texture_; + std::unique_ptr effect_texture_; + + std::array identity_matrix_; +}; + +Mesh3d CreateQuadMesh3d() { + static constexpr float kQuadMesh3dVertexBuffer[] = { + -1.f, -1.f, 0.f, 0.f, 0.f, // + 1.f, -1.f, 0.f, 1.f, 0.f, // + -1.f, 1.f, 0.f, 0.f, 1.f, // + 1.f, 1.f, 0.f, 1.f, 1.f, // + }; + static constexpr uint16_t kQuadMesh3dIndexBuffer[] = {0, 1, 2, 1, 3, 2}; + + static constexpr int kQuadMesh3dVertexBufferSize = + sizeof(kQuadMesh3dVertexBuffer) / sizeof(float); + static constexpr int kQuadMesh3dIndexBufferSize = + sizeof(kQuadMesh3dIndexBuffer) / sizeof(uint16_t); + + Mesh3d quad_mesh_3d; + quad_mesh_3d.set_vertex_type(Mesh3d::VERTEX_PT); + quad_mesh_3d.set_primitive_type(Mesh3d::TRIANGLE); + for (int i = 0; i < kQuadMesh3dVertexBufferSize; ++i) { + quad_mesh_3d.add_vertex_buffer(kQuadMesh3dVertexBuffer[i]); + } + for (int i = 0; i < kQuadMesh3dIndexBufferSize; ++i) { + quad_mesh_3d.add_index_buffer(kQuadMesh3dIndexBuffer[i]); + } + + return quad_mesh_3d; +} + +ImageFrame CreateEmptyColorTexture() { + static constexpr ImageFormat::Format kEmptyColorTextureFormat = + ImageFormat::SRGBA; + static constexpr int kEmptyColorTextureWidth = 1; + static constexpr int kEmptyColorTextureHeight = 1; + + ImageFrame empty_color_texture( + kEmptyColorTextureFormat, kEmptyColorTextureWidth, + kEmptyColorTextureHeight, ImageFrame::kGlDefaultAlignmentBoundary); + empty_color_texture.SetToZero(); + + return empty_color_texture; +} + +} // namespace + +absl::StatusOr> CreateEffectRenderer( + const Environment& environment, // + const absl::optional& effect_mesh_3d, // + ImageFrame&& effect_texture) { + MP_RETURN_IF_ERROR(ValidateEnvironment(environment)) + << "Invalid environment!"; + if (effect_mesh_3d) { + MP_RETURN_IF_ERROR(ValidateMesh3d(*effect_mesh_3d)) + << "Invalid effect 3D mesh!"; + } + + ASSIGN_OR_RETURN(std::unique_ptr render_target, + RenderTarget::Create(), + _ << "Failed to create a render target!"); + ASSIGN_OR_RETURN(std::unique_ptr renderer, Renderer::Create(), + _ << "Failed to create a renderer!"); + ASSIGN_OR_RETURN(RenderableMesh3d renderable_quad_mesh_3d, + RenderableMesh3d::CreateFromProtoMesh3d(CreateQuadMesh3d()), + _ << "Failed to create a renderable quad mesh!"); + absl::optional renderable_effect_mesh_3d; + if (effect_mesh_3d) { + ASSIGN_OR_RETURN(renderable_effect_mesh_3d, + RenderableMesh3d::CreateFromProtoMesh3d(*effect_mesh_3d), + _ << "Failed to create a renderable effect mesh!"); + } + ASSIGN_OR_RETURN(std::unique_ptr empty_color_gl_texture, + Texture::CreateFromImageFrame(CreateEmptyColorTexture()), + _ << "Failed to create an empty color texture!"); + ASSIGN_OR_RETURN(std::unique_ptr effect_gl_texture, + Texture::CreateFromImageFrame(effect_texture), + _ << "Failed to create an effect texture!"); + + std::unique_ptr result = + absl::make_unique( + environment, std::move(render_target), std::move(renderer), + std::move(renderable_quad_mesh_3d), + std::move(renderable_effect_mesh_3d), + std::move(empty_color_gl_texture), std::move(effect_gl_texture)); + + return result; +} + +} // namespace mediapipe::face_geometry diff --git a/mediapipe/modules/face_geometry/libs/effect_renderer.h b/mediapipe/modules/face_geometry/libs/effect_renderer.h new file mode 100644 index 0000000..71330e7 --- /dev/null +++ b/mediapipe/modules/face_geometry/libs/effect_renderer.h @@ -0,0 +1,92 @@ +// Copyright 2020 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef MEDIAPIPE_MODULES_FACE_GEOMETRY_LIBS_EFFECT_RENDERER_H_ +#define MEDIAPIPE_MODULES_FACE_GEOMETRY_LIBS_EFFECT_RENDERER_H_ + +#include +#include + +#include "absl/types/optional.h" +#include "mediapipe/framework/formats/image_frame.h" +#include "mediapipe/framework/port/status.h" +#include "mediapipe/framework/port/statusor.h" +#include "mediapipe/gpu/gl_base.h" +#include "mediapipe/modules/face_geometry/protos/environment.pb.h" +#include "mediapipe/modules/face_geometry/protos/face_geometry.pb.h" +#include "mediapipe/modules/face_geometry/protos/mesh_3d.pb.h" + +namespace mediapipe::face_geometry { + +// Encapsulates a stateful face effect renderer. +class EffectRenderer { + public: + virtual ~EffectRenderer() = default; + + // Renders a face effect based on the multiple facial geometries. + // + // Must be called in the same GL context as was used upon initialization. + // + // Each of the `multi_face_geometry` must be valid (for details, please refer + // to the proto message definition comments and/or `validation_utils.h/cc`). + // Additionally, all face mesh index buffer elements must fit into the + // `uint16` type in order to be renderable. + // + // Both `frame_width` and `frame_height` must be positive. + // + // Both `src_texture_name` and `dst_texture_name` must be positive and + // reference existing OpenGL textures in the current context. They should also + // reference different textures as the in-place effect rendering is not yet + // supported. + virtual absl::Status RenderEffect( + const std::vector& multi_face_geometry, + int frame_width, // + int frame_height, // + GLenum src_texture_target, // + GLuint src_texture_name, // + GLenum dst_texture_target, // + GLuint dst_texture_name) = 0; +}; + +// Creates an instance of `EffectRenderer`. +// +// `effect_mesh_3d` defines a rigid 3d mesh which is "attached" to the face and +// is driven by the face pose transformation matrix. If is not present, the +// runtime face mesh will be used as the effect mesh - this mode is handy for +// facepaint effects. In both rendering modes, the face mesh is first rendered +// as an occluder straight into the depth buffer. This step helps to create a +// more believable effect via hiding invisible elements behind the face surface. +// +// `effect_texture` defines the color texture to be rendered on top of the +// effect mesh. Please be aware about the difference between the CPU texture +// memory layout and the GPU texture sampler coordinate space. This renderer +// follows conventions discussed here: https://open.gl/textures +// +// Must be called in the same GL context as will be used for rendering. +// +// Both `environment` and `effect_mesh_3d` (is present) must be valid (for +// details, please refer to the proto message definition comments and/or +// `validation_utils.h/cc`). Additionally, `effect_mesh_3d`s index buffer +// elements must fit into the `uint16` type in order to be renderable. +// +// `effect_texture` must have positive dimensions. Its format must be either +// `SRGB` or `SRGBA`. Its memory must be aligned for GL usage. +absl::StatusOr> CreateEffectRenderer( + const Environment& environment, // + const absl::optional& effect_mesh_3d, // + ImageFrame&& effect_texture); + +} // namespace mediapipe::face_geometry + +#endif // MEDIAPIPE_MODULES_FACE_GEOMETRY_LIBS_EFFECT_RENDERER_H_ diff --git a/mediapipe/modules/face_geometry/libs/geometry_pipeline.cc b/mediapipe/modules/face_geometry/libs/geometry_pipeline.cc new file mode 100644 index 0000000..bcfce7c --- /dev/null +++ b/mediapipe/modules/face_geometry/libs/geometry_pipeline.cc @@ -0,0 +1,466 @@ +// Copyright 2020 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "mediapipe/modules/face_geometry/libs/geometry_pipeline.h" + +#include +#include +#include +#include +#include + +#include "Eigen/Core" +#include "absl/memory/memory.h" +#include "mediapipe/framework/formats/landmark.pb.h" +#include "mediapipe/framework/formats/matrix.h" +#include "mediapipe/framework/formats/matrix_data.pb.h" +#include "mediapipe/framework/port/ret_check.h" +#include "mediapipe/framework/port/status.h" +#include "mediapipe/framework/port/status_macros.h" +#include "mediapipe/framework/port/statusor.h" +#include "mediapipe/modules/face_geometry/libs/mesh_3d_utils.h" +#include "mediapipe/modules/face_geometry/libs/procrustes_solver.h" +#include "mediapipe/modules/face_geometry/libs/validation_utils.h" +#include "mediapipe/modules/face_geometry/protos/environment.pb.h" +#include "mediapipe/modules/face_geometry/protos/face_geometry.pb.h" +#include "mediapipe/modules/face_geometry/protos/geometry_pipeline_metadata.pb.h" +#include "mediapipe/modules/face_geometry/protos/mesh_3d.pb.h" + +namespace mediapipe::face_geometry { +namespace { + +struct PerspectiveCameraFrustum { + // NOTE: all arguments must be validated prior to calling this constructor. + PerspectiveCameraFrustum(const PerspectiveCamera& perspective_camera, + int frame_width, int frame_height) { + static constexpr float kDegreesToRadians = 3.14159265358979323846f / 180.f; + + const float height_at_near = + 2.f * perspective_camera.near() * + std::tan(0.5f * kDegreesToRadians * + perspective_camera.vertical_fov_degrees()); + + const float width_at_near = frame_width * height_at_near / frame_height; + + left = -0.5f * width_at_near; + right = 0.5f * width_at_near; + bottom = -0.5f * height_at_near; + top = 0.5f * height_at_near; + near = perspective_camera.near(); + far = perspective_camera.far(); + } + + float left; + float right; + float bottom; + float top; + float near; + float far; +}; + +class ScreenToMetricSpaceConverter { + public: + ScreenToMetricSpaceConverter( + OriginPointLocation origin_point_location, // + InputSource input_source, // + Eigen::Matrix3Xf&& canonical_metric_landmarks, // + Eigen::VectorXf&& landmark_weights, // + std::unique_ptr procrustes_solver) + : origin_point_location_(origin_point_location), + input_source_(input_source), + canonical_metric_landmarks_(std::move(canonical_metric_landmarks)), + landmark_weights_(std::move(landmark_weights)), + procrustes_solver_(std::move(procrustes_solver)) {} + + // Converts `screen_landmark_list` into `metric_landmark_list` and estimates + // the `pose_transform_mat`. + // + // Here's the algorithm summary: + // + // (1) Project X- and Y- screen landmark coordinates at the Z near plane. + // + // (2) Estimate a canonical-to-runtime landmark set scale by running the + // Procrustes solver using the screen runtime landmarks. + // + // On this iteration, screen landmarks are used instead of unprojected + // metric landmarks as it is not safe to unproject due to the relative + // nature of the input screen landmark Z coordinate. + // + // (3) Use the canonical-to-runtime scale from (2) to unproject the screen + // landmarks. The result is referenced as "intermediate landmarks" because + // they are the first estimation of the resuling metric landmarks, but are + // not quite there yet. + // + // (4) Estimate a canonical-to-runtime landmark set scale by running the + // Procrustes solver using the intermediate runtime landmarks. + // + // (5) Use the product of the scale factors from (2) and (4) to unproject + // the screen landmarks the second time. This is the second and the final + // estimation of the metric landmarks. + // + // (6) Multiply each of the metric landmarks by the inverse pose + // transformation matrix to align the runtime metric face landmarks with + // the canonical metric face landmarks. + // + // Note: the input screen landmarks are in the left-handed coordinate system, + // however any metric landmarks - including the canonical metric + // landmarks, the final runtime metric landmarks and any intermediate + // runtime metric landmarks - are in the right-handed coordinate system. + // + // To keep the logic correct, the landmark set handedness is changed any + // time the screen-to-metric semantic barrier is passed. + absl::Status Convert(const NormalizedLandmarkList& screen_landmark_list, // + const PerspectiveCameraFrustum& pcf, // + LandmarkList& metric_landmark_list, // + Eigen::Matrix4f& pose_transform_mat) const { + RET_CHECK_EQ(screen_landmark_list.landmark_size(), + canonical_metric_landmarks_.cols()) + << "The number of landmarks doesn't match the number passed upon " + "initialization!"; + + Eigen::Matrix3Xf screen_landmarks; + ConvertLandmarkListToEigenMatrix(screen_landmark_list, screen_landmarks); + + ProjectXY(pcf, screen_landmarks); + const float depth_offset = screen_landmarks.row(2).mean(); + + // 1st iteration: don't unproject XY because it's unsafe to do so due to + // the relative nature of the Z coordinate. Instead, run the + // first estimation on the projected XY and use that scale to + // unproject for the 2nd iteration. + Eigen::Matrix3Xf intermediate_landmarks(screen_landmarks); + ChangeHandedness(intermediate_landmarks); + + ASSIGN_OR_RETURN(const float first_iteration_scale, + EstimateScale(intermediate_landmarks), + _ << "Failed to estimate first iteration scale!"); + + // 2nd iteration: unproject XY using the scale from the 1st iteration. + intermediate_landmarks = screen_landmarks; + MoveAndRescaleZ(pcf, depth_offset, first_iteration_scale, + intermediate_landmarks); + UnprojectXY(pcf, intermediate_landmarks); + ChangeHandedness(intermediate_landmarks); + + // For face detection input landmarks, re-write Z-coord from the canonical + // landmarks. + if (input_source_ == InputSource::FACE_DETECTION_PIPELINE) { + Eigen::Matrix4f intermediate_pose_transform_mat; + MP_RETURN_IF_ERROR(procrustes_solver_->SolveWeightedOrthogonalProblem( + canonical_metric_landmarks_, intermediate_landmarks, + landmark_weights_, intermediate_pose_transform_mat)) + << "Failed to estimate pose transform matrix!"; + + intermediate_landmarks.row(2) = + (intermediate_pose_transform_mat * + canonical_metric_landmarks_.colwise().homogeneous()) + .row(2); + } + ASSIGN_OR_RETURN(const float second_iteration_scale, + EstimateScale(intermediate_landmarks), + _ << "Failed to estimate second iteration scale!"); + + // Use the total scale to unproject the screen landmarks. + const float total_scale = first_iteration_scale * second_iteration_scale; + MoveAndRescaleZ(pcf, depth_offset, total_scale, screen_landmarks); + UnprojectXY(pcf, screen_landmarks); + ChangeHandedness(screen_landmarks); + + // At this point, screen landmarks are converted into metric landmarks. + Eigen::Matrix3Xf& metric_landmarks = screen_landmarks; + + MP_RETURN_IF_ERROR(procrustes_solver_->SolveWeightedOrthogonalProblem( + canonical_metric_landmarks_, metric_landmarks, landmark_weights_, + pose_transform_mat)) + << "Failed to estimate pose transform matrix!"; + + // For face detection input landmarks, re-write Z-coord from the canonical + // landmarks and run the pose transform estimation again. + if (input_source_ == InputSource::FACE_DETECTION_PIPELINE) { + metric_landmarks.row(2) = + (pose_transform_mat * + canonical_metric_landmarks_.colwise().homogeneous()) + .row(2); + + MP_RETURN_IF_ERROR(procrustes_solver_->SolveWeightedOrthogonalProblem( + canonical_metric_landmarks_, metric_landmarks, landmark_weights_, + pose_transform_mat)) + << "Failed to estimate pose transform matrix!"; + } + + // Multiply each of the metric landmarks by the inverse pose + // transformation matrix to align the runtime metric face landmarks with + // the canonical metric face landmarks. + metric_landmarks = (pose_transform_mat.inverse() * + metric_landmarks.colwise().homogeneous()) + .topRows(3); + + ConvertEigenMatrixToLandmarkList(metric_landmarks, metric_landmark_list); + + return absl::OkStatus(); + } + + private: + void ProjectXY(const PerspectiveCameraFrustum& pcf, + Eigen::Matrix3Xf& landmarks) const { + float x_scale = pcf.right - pcf.left; + float y_scale = pcf.top - pcf.bottom; + float x_translation = pcf.left; + float y_translation = pcf.bottom; + + if (origin_point_location_ == OriginPointLocation::TOP_LEFT_CORNER) { + landmarks.row(1) = 1.f - landmarks.row(1).array(); + } + + landmarks = + landmarks.array().colwise() * Eigen::Array3f(x_scale, y_scale, x_scale); + landmarks.colwise() += Eigen::Vector3f(x_translation, y_translation, 0.f); + } + + absl::StatusOr EstimateScale(Eigen::Matrix3Xf& landmarks) const { + Eigen::Matrix4f transform_mat; + MP_RETURN_IF_ERROR(procrustes_solver_->SolveWeightedOrthogonalProblem( + canonical_metric_landmarks_, landmarks, landmark_weights_, + transform_mat)) + << "Failed to estimate canonical-to-runtime landmark set transform!"; + + return transform_mat.col(0).norm(); + } + + static void MoveAndRescaleZ(const PerspectiveCameraFrustum& pcf, + float depth_offset, float scale, + Eigen::Matrix3Xf& landmarks) { + landmarks.row(2) = + (landmarks.array().row(2) - depth_offset + pcf.near) / scale; + } + + static void UnprojectXY(const PerspectiveCameraFrustum& pcf, + Eigen::Matrix3Xf& landmarks) { + landmarks.row(0) = + landmarks.row(0).cwiseProduct(landmarks.row(2)) / pcf.near; + landmarks.row(1) = + landmarks.row(1).cwiseProduct(landmarks.row(2)) / pcf.near; + } + + static void ChangeHandedness(Eigen::Matrix3Xf& landmarks) { + landmarks.row(2) *= -1.f; + } + + static void ConvertLandmarkListToEigenMatrix( + const NormalizedLandmarkList& landmark_list, + Eigen::Matrix3Xf& eigen_matrix) { + eigen_matrix = Eigen::Matrix3Xf(3, landmark_list.landmark_size()); + for (int i = 0; i < landmark_list.landmark_size(); ++i) { + const auto& landmark = landmark_list.landmark(i); + eigen_matrix(0, i) = landmark.x(); + eigen_matrix(1, i) = landmark.y(); + eigen_matrix(2, i) = landmark.z(); + } + } + + static void ConvertEigenMatrixToLandmarkList( + const Eigen::Matrix3Xf& eigen_matrix, LandmarkList& landmark_list) { + landmark_list.Clear(); + + for (int i = 0; i < eigen_matrix.cols(); ++i) { + auto& landmark = *landmark_list.add_landmark(); + landmark.set_x(eigen_matrix(0, i)); + landmark.set_y(eigen_matrix(1, i)); + landmark.set_z(eigen_matrix(2, i)); + } + } + + const OriginPointLocation origin_point_location_; + const InputSource input_source_; + Eigen::Matrix3Xf canonical_metric_landmarks_; + Eigen::VectorXf landmark_weights_; + + std::unique_ptr procrustes_solver_; +}; + +class GeometryPipelineImpl : public GeometryPipeline { + public: + GeometryPipelineImpl( + const PerspectiveCamera& perspective_camera, // + const Mesh3d& canonical_mesh, // + uint32_t canonical_mesh_vertex_size, // + uint32_t canonical_mesh_num_vertices, + uint32_t canonical_mesh_vertex_position_offset, + std::unique_ptr space_converter) + : perspective_camera_(perspective_camera), + canonical_mesh_(canonical_mesh), + canonical_mesh_vertex_size_(canonical_mesh_vertex_size), + canonical_mesh_num_vertices_(canonical_mesh_num_vertices), + canonical_mesh_vertex_position_offset_( + canonical_mesh_vertex_position_offset), + space_converter_(std::move(space_converter)) {} + + absl::StatusOr> EstimateFaceGeometry( + const std::vector& multi_face_landmarks, + int frame_width, int frame_height) const override { + MP_RETURN_IF_ERROR(ValidateFrameDimensions(frame_width, frame_height)) + << "Invalid frame dimensions!"; + + // Create a perspective camera frustum to be shared for geometry estimation + // per each face. + PerspectiveCameraFrustum pcf(perspective_camera_, frame_width, + frame_height); + + std::vector multi_face_geometry; + + // From this point, the meaning of "face landmarks" is clarified further as + // "screen face landmarks". This is done do distinguish from "metric face + // landmarks" that are derived during the face geometry estimation process. + for (const NormalizedLandmarkList& screen_face_landmarks : + multi_face_landmarks) { + // Having a too compact screen landmark list will result in numerical + // instabilities, therefore such faces are filtered. + if (IsScreenLandmarkListTooCompact(screen_face_landmarks)) { + continue; + } + + // Convert the screen landmarks into the metric landmarks and get the pose + // transformation matrix. + LandmarkList metric_face_landmarks; + Eigen::Matrix4f pose_transform_mat; + MP_RETURN_IF_ERROR(space_converter_->Convert(screen_face_landmarks, pcf, + metric_face_landmarks, + pose_transform_mat)) + << "Failed to convert landmarks from the screen to the metric space!"; + + // Pack geometry data for this face. + FaceGeometry face_geometry; + Mesh3d* mutable_mesh = face_geometry.mutable_mesh(); + // Copy the canonical face mesh as the face geometry mesh. + mutable_mesh->CopyFrom(canonical_mesh_); + // Replace XYZ vertex mesh coodinates with the metric landmark positions. + for (int i = 0; i < canonical_mesh_num_vertices_; ++i) { + uint32_t vertex_buffer_offset = canonical_mesh_vertex_size_ * i + + canonical_mesh_vertex_position_offset_; + + mutable_mesh->set_vertex_buffer(vertex_buffer_offset, + metric_face_landmarks.landmark(i).x()); + mutable_mesh->set_vertex_buffer(vertex_buffer_offset + 1, + metric_face_landmarks.landmark(i).y()); + mutable_mesh->set_vertex_buffer(vertex_buffer_offset + 2, + metric_face_landmarks.landmark(i).z()); + } + // Populate the face pose transformation matrix. + mediapipe::MatrixDataProtoFromMatrix( + pose_transform_mat, face_geometry.mutable_pose_transform_matrix()); + + multi_face_geometry.push_back(face_geometry); + } + + return multi_face_geometry; + } + + private: + static bool IsScreenLandmarkListTooCompact( + const NormalizedLandmarkList& screen_landmarks) { + float mean_x = 0.f; + float mean_y = 0.f; + for (int i = 0; i < screen_landmarks.landmark_size(); ++i) { + const auto& landmark = screen_landmarks.landmark(i); + mean_x += (landmark.x() - mean_x) / static_cast(i + 1); + mean_y += (landmark.y() - mean_y) / static_cast(i + 1); + } + + float max_sq_dist = 0.f; + for (const auto& landmark : screen_landmarks.landmark()) { + const float d_x = landmark.x() - mean_x; + const float d_y = landmark.y() - mean_y; + max_sq_dist = std::max(max_sq_dist, d_x * d_x + d_y * d_y); + } + + static constexpr float kIsScreenLandmarkListTooCompactThreshold = 1e-3f; + return std::sqrt(max_sq_dist) <= kIsScreenLandmarkListTooCompactThreshold; + } + + const PerspectiveCamera perspective_camera_; + const Mesh3d canonical_mesh_; + const uint32_t canonical_mesh_vertex_size_; + const uint32_t canonical_mesh_num_vertices_; + const uint32_t canonical_mesh_vertex_position_offset_; + + std::unique_ptr space_converter_; +}; + +} // namespace + +absl::StatusOr> CreateGeometryPipeline( + const Environment& environment, const GeometryPipelineMetadata& metadata) { + MP_RETURN_IF_ERROR(ValidateEnvironment(environment)) + << "Invalid environment!"; + MP_RETURN_IF_ERROR(ValidateGeometryPipelineMetadata(metadata)) + << "Invalid geometry pipeline metadata!"; + + const auto& canonical_mesh = metadata.canonical_mesh(); + RET_CHECK(HasVertexComponent(canonical_mesh.vertex_type(), + VertexComponent::POSITION)) + << "Canonical face mesh must have the `POSITION` vertex component!"; + RET_CHECK(HasVertexComponent(canonical_mesh.vertex_type(), + VertexComponent::TEX_COORD)) + << "Canonical face mesh must have the `TEX_COORD` vertex component!"; + + uint32_t canonical_mesh_vertex_size = + GetVertexSize(canonical_mesh.vertex_type()); + uint32_t canonical_mesh_num_vertices = + canonical_mesh.vertex_buffer_size() / canonical_mesh_vertex_size; + uint32_t canonical_mesh_vertex_position_offset = + GetVertexComponentOffset(canonical_mesh.vertex_type(), + VertexComponent::POSITION) + .value(); + + // Put the Procrustes landmark basis into Eigen matrices for an easier access. + Eigen::Matrix3Xf canonical_metric_landmarks = + Eigen::Matrix3Xf::Zero(3, canonical_mesh_num_vertices); + Eigen::VectorXf landmark_weights = + Eigen::VectorXf::Zero(canonical_mesh_num_vertices); + + for (int i = 0; i < canonical_mesh_num_vertices; ++i) { + uint32_t vertex_buffer_offset = + canonical_mesh_vertex_size * i + canonical_mesh_vertex_position_offset; + + canonical_metric_landmarks(0, i) = + canonical_mesh.vertex_buffer(vertex_buffer_offset); + canonical_metric_landmarks(1, i) = + canonical_mesh.vertex_buffer(vertex_buffer_offset + 1); + canonical_metric_landmarks(2, i) = + canonical_mesh.vertex_buffer(vertex_buffer_offset + 2); + } + + for (const WeightedLandmarkRef& wlr : metadata.procrustes_landmark_basis()) { + uint32_t landmark_id = wlr.landmark_id(); + landmark_weights(landmark_id) = wlr.weight(); + } + + std::unique_ptr result = + absl::make_unique( + environment.perspective_camera(), canonical_mesh, + canonical_mesh_vertex_size, canonical_mesh_num_vertices, + canonical_mesh_vertex_position_offset, + absl::make_unique( + environment.origin_point_location(), + metadata.input_source() == InputSource::DEFAULT + ? InputSource::FACE_LANDMARK_PIPELINE + : metadata.input_source(), + std::move(canonical_metric_landmarks), + std::move(landmark_weights), + CreateFloatPrecisionProcrustesSolver())); + + return result; +} + +} // namespace mediapipe::face_geometry diff --git a/mediapipe/modules/face_geometry/libs/geometry_pipeline.h b/mediapipe/modules/face_geometry/libs/geometry_pipeline.h new file mode 100644 index 0000000..ffa779c --- /dev/null +++ b/mediapipe/modules/face_geometry/libs/geometry_pipeline.h @@ -0,0 +1,67 @@ +// Copyright 2020 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef MEDIAPIPE_FACE_GEOMETRY_LIBS_GEOMETRY_PIPELINE_H_ +#define MEDIAPIPE_FACE_GEOMETRY_LIBS_GEOMETRY_PIPELINE_H_ + +#include +#include + +#include "mediapipe/framework/formats/landmark.pb.h" +#include "mediapipe/framework/port/statusor.h" +#include "mediapipe/modules/face_geometry/protos/environment.pb.h" +#include "mediapipe/modules/face_geometry/protos/face_geometry.pb.h" +#include "mediapipe/modules/face_geometry/protos/geometry_pipeline_metadata.pb.h" + +namespace mediapipe::face_geometry { + +// Encapsulates a stateless estimator of facial geometry in a Metric space based +// on the normalized face landmarks in the Screen space. +class GeometryPipeline { + public: + virtual ~GeometryPipeline() = default; + + // Estimates geometry data for multiple faces. + // + // Returns an error status if any of the passed arguments is invalid. + // + // The result includes face geometry data for a subset of the input faces, + // however geometry data for some faces might be missing. This may happen if + // it'd be unstable to estimate the facial geometry based on a corresponding + // face landmark list for any reason (for example, if the landmark list is too + // compact). + // + // Each face landmark list must have the same number of landmarks as was + // passed upon initialization via the canonical face mesh (as a part of the + // geometry pipeline metadata). + // + // Both `frame_width` and `frame_height` must be positive. + virtual absl::StatusOr> EstimateFaceGeometry( + const std::vector& multi_face_landmarks, + int frame_width, int frame_height) const = 0; +}; + +// Creates an instance of `GeometryPipeline`. +// +// Both `environment` and `metadata` must be valid (for details, please refer to +// the proto message definition comments and/or `validation_utils.h/cc`). +// +// Canonical face mesh (defined as a part of `metadata`) must have the +// `POSITION` and the `TEX_COORD` vertex components. +absl::StatusOr> CreateGeometryPipeline( + const Environment& environment, const GeometryPipelineMetadata& metadata); + +} // namespace mediapipe::face_geometry + +#endif // MEDIAPIPE_FACE_GEOMETRY_LIBS_GEOMETRY_PIPELINE_H_ diff --git a/mediapipe/modules/face_geometry/libs/mesh_3d_utils.cc b/mediapipe/modules/face_geometry/libs/mesh_3d_utils.cc new file mode 100644 index 0000000..2078ec6 --- /dev/null +++ b/mediapipe/modules/face_geometry/libs/mesh_3d_utils.cc @@ -0,0 +1,103 @@ +// Copyright 2020 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "mediapipe/modules/face_geometry/libs/mesh_3d_utils.h" + +#include +#include + +#include "mediapipe/framework/port/ret_check.h" +#include "mediapipe/framework/port/statusor.h" +#include "mediapipe/modules/face_geometry/protos/mesh_3d.pb.h" + +namespace mediapipe::face_geometry { +namespace { + +bool HasVertexComponentVertexPT(VertexComponent vertex_component) { + switch (vertex_component) { + case VertexComponent::POSITION: + case VertexComponent::TEX_COORD: + return true; + + default: + return false; + } +} + +uint32_t GetVertexComponentSizeVertexPT(VertexComponent vertex_component) { + switch (vertex_component) { + case VertexComponent::POSITION: + return 3; + case VertexComponent::TEX_COORD: + return 2; + } +} + +uint32_t GetVertexComponentOffsetVertexPT(VertexComponent vertex_component) { + switch (vertex_component) { + case VertexComponent::POSITION: + return 0; + case VertexComponent::TEX_COORD: + return GetVertexComponentSizeVertexPT(VertexComponent::POSITION); + } +} + +} // namespace + +std::size_t GetVertexSize(Mesh3d::VertexType vertex_type) { + switch (vertex_type) { + case Mesh3d::VERTEX_PT: + return GetVertexComponentSizeVertexPT(VertexComponent::POSITION) + + GetVertexComponentSizeVertexPT(VertexComponent::TEX_COORD); + } +} + +std::size_t GetPrimitiveSize(Mesh3d::PrimitiveType primitive_type) { + switch (primitive_type) { + case Mesh3d::TRIANGLE: + return 3; + } +} + +bool HasVertexComponent(Mesh3d::VertexType vertex_type, + VertexComponent vertex_component) { + switch (vertex_type) { + case Mesh3d::VERTEX_PT: + return HasVertexComponentVertexPT(vertex_component); + } +} + +absl::StatusOr GetVertexComponentOffset( + Mesh3d::VertexType vertex_type, VertexComponent vertex_component) { + RET_CHECK(HasVertexComponentVertexPT(vertex_component)) + << "A given vertex type doesn't have the requested component!"; + + switch (vertex_type) { + case Mesh3d::VERTEX_PT: + return GetVertexComponentOffsetVertexPT(vertex_component); + } +} + +absl::StatusOr GetVertexComponentSize( + Mesh3d::VertexType vertex_type, VertexComponent vertex_component) { + RET_CHECK(HasVertexComponentVertexPT(vertex_component)) + << "A given vertex type doesn't have the requested component!"; + + switch (vertex_type) { + case Mesh3d::VERTEX_PT: + return GetVertexComponentSizeVertexPT(vertex_component); + } +} + +} // namespace mediapipe::face_geometry diff --git a/mediapipe/modules/face_geometry/libs/mesh_3d_utils.h b/mediapipe/modules/face_geometry/libs/mesh_3d_utils.h new file mode 100644 index 0000000..a320aae --- /dev/null +++ b/mediapipe/modules/face_geometry/libs/mesh_3d_utils.h @@ -0,0 +1,51 @@ +// Copyright 2020 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef MEDIAPIPE_FACE_GEOMETRY_LIBS_MESH_3D_UTILS_H_ +#define MEDIAPIPE_FACE_GEOMETRY_LIBS_MESH_3D_UTILS_H_ + +#include +#include + +#include "mediapipe/framework/port/statusor.h" +#include "mediapipe/modules/face_geometry/protos/mesh_3d.pb.h" + +namespace mediapipe::face_geometry { + +enum class VertexComponent { POSITION, TEX_COORD }; + +std::size_t GetVertexSize(Mesh3d::VertexType vertex_type); + +std::size_t GetPrimitiveSize(Mesh3d::PrimitiveType primitive_type); + +bool HasVertexComponent(Mesh3d::VertexType vertex_type, + VertexComponent vertex_component); + +// Computes the vertex component offset. +// +// Returns an error status if a given vertex type doesn't have the requested +// component. +absl::StatusOr GetVertexComponentOffset( + Mesh3d::VertexType vertex_type, VertexComponent vertex_component); + +// Computes the vertex component size. +// +// Returns an error status if a given vertex type doesn't have the requested +// component. +absl::StatusOr GetVertexComponentSize( + Mesh3d::VertexType vertex_type, VertexComponent vertex_component); + +} // namespace mediapipe::face_geometry + +#endif // MEDIAPIPE_FACE_GEOMETRY_LIBS_MESH_3D_UTILS_H_ diff --git a/mediapipe/modules/face_geometry/libs/procrustes_solver.cc b/mediapipe/modules/face_geometry/libs/procrustes_solver.cc new file mode 100644 index 0000000..2ffae0e --- /dev/null +++ b/mediapipe/modules/face_geometry/libs/procrustes_solver.cc @@ -0,0 +1,266 @@ +// Copyright 2020 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "mediapipe/modules/face_geometry/libs/procrustes_solver.h" + +#include +#include + +#include "Eigen/Dense" +#include "absl/memory/memory.h" +#include "mediapipe/framework/port/ret_check.h" +#include "mediapipe/framework/port/status.h" +#include "mediapipe/framework/port/status_macros.h" +#include "mediapipe/framework/port/statusor.h" + +namespace mediapipe { +namespace face_geometry { +namespace { + +class FloatPrecisionProcrustesSolver : public ProcrustesSolver { + public: + FloatPrecisionProcrustesSolver() = default; + + absl::Status SolveWeightedOrthogonalProblem( + const Eigen::Matrix3Xf& source_points, // + const Eigen::Matrix3Xf& target_points, // + const Eigen::VectorXf& point_weights, + Eigen::Matrix4f& transform_mat) const override { + // Validate inputs. + MP_RETURN_IF_ERROR(ValidateInputPoints(source_points, target_points)) + << "Failed to validate weighted orthogonal problem input points!"; + MP_RETURN_IF_ERROR( + ValidatePointWeights(source_points.cols(), point_weights)) + << "Failed to validate weighted orthogonal problem point weights!"; + + // Extract square root from the point weights. + Eigen::VectorXf sqrt_weights = ExtractSquareRoot(point_weights); + + // Try to solve the WEOP problem. + MP_RETURN_IF_ERROR(InternalSolveWeightedOrthogonalProblem( + source_points, target_points, sqrt_weights, transform_mat)) + << "Failed to solve the WEOP problem!"; + + return absl::OkStatus(); + } + + private: + static constexpr float kAbsoluteErrorEps = 1e-9f; + + static absl::Status ValidateInputPoints( + const Eigen::Matrix3Xf& source_points, + const Eigen::Matrix3Xf& target_points) { + RET_CHECK_GT(source_points.cols(), 0) + << "The number of source points must be positive!"; + + RET_CHECK_EQ(source_points.cols(), target_points.cols()) + << "The number of source and target points must be equal!"; + + return absl::OkStatus(); + } + + static absl::Status ValidatePointWeights( + int num_points, const Eigen::VectorXf& point_weights) { + RET_CHECK_GT(point_weights.size(), 0) + << "The number of point weights must be positive!"; + + RET_CHECK_EQ(point_weights.size(), num_points) + << "The number of points and point weights must be equal!"; + + float total_weight = 0.f; + for (int i = 0; i < num_points; ++i) { + RET_CHECK_GE(point_weights(i), 0.f) + << "Each point weight must be non-negative!"; + + total_weight += point_weights(i); + } + + RET_CHECK_GT(total_weight, kAbsoluteErrorEps) + << "The total point weight is too small!"; + + return absl::OkStatus(); + } + + static Eigen::VectorXf ExtractSquareRoot( + const Eigen::VectorXf& point_weights) { + Eigen::VectorXf sqrt_weights(point_weights); + for (int i = 0; i < sqrt_weights.size(); ++i) { + sqrt_weights(i) = std::sqrt(sqrt_weights(i)); + } + + return sqrt_weights; + } + + // Combines a 3x3 rotation-and-scale matrix and a 3x1 translation vector into + // a single 4x4 transformation matrix. + static Eigen::Matrix4f CombineTransformMatrix(const Eigen::Matrix3f& r_and_s, + const Eigen::Vector3f& t) { + Eigen::Matrix4f result = Eigen::Matrix4f::Identity(); + result.leftCols(3).topRows(3) = r_and_s; + result.col(3).topRows(3) = t; + + return result; + } + + // The weighted problem is thoroughly addressed in Section 2.4 of: + // D. Akca, Generalized Procrustes analysis and its applications + // in photogrammetry, 2003, https://doi.org/10.3929/ethz-a-004656648 + // + // Notable differences in the code presented here are: + // + // * In the paper, the weights matrix W_p is Cholesky-decomposed as Q^T Q. + // Our W_p is diagonal (equal to diag(sqrt_weights^2)), + // so we can just set Q = diag(sqrt_weights) instead. + // + // * In the paper, the problem is presented as + // (for W_k = I and W_p = tranposed(Q) Q): + // || Q (c A T + j tranposed(t) - B) || -> min. + // + // We reformulate it as an equivalent minimization of the transpose's + // norm: + // || (c tranposed(T) tranposed(A) - tranposed(B)) tranposed(Q) || -> min, + // where tranposed(A) and tranposed(B) are the source and the target point + // clouds, respectively, c tranposed(T) is the rotation+scaling R sought + // for, and Q is diag(sqrt_weights). + // + // Most of the derivations are therefore transposed. + // + // Note: the output `transform_mat` argument is used instead of `StatusOr<>` + // return type in order to avoid Eigen memory alignment issues. Details: + // https://eigen.tuxfamily.org/dox/group__TopicStructHavingEigenMembers.html + static absl::Status InternalSolveWeightedOrthogonalProblem( + const Eigen::Matrix3Xf& sources, const Eigen::Matrix3Xf& targets, + const Eigen::VectorXf& sqrt_weights, Eigen::Matrix4f& transform_mat) { + // tranposed(A_w). + Eigen::Matrix3Xf weighted_sources = + sources.array().rowwise() * sqrt_weights.array().transpose(); + // tranposed(B_w). + Eigen::Matrix3Xf weighted_targets = + targets.array().rowwise() * sqrt_weights.array().transpose(); + + // w = tranposed(j_w) j_w. + float total_weight = sqrt_weights.cwiseProduct(sqrt_weights).sum(); + + // Let C = (j_w tranposed(j_w)) / (tranposed(j_w) j_w). + // Note that C = tranposed(C), hence (I - C) = tranposed(I - C). + // + // tranposed(A_w) C = tranposed(A_w) j_w tranposed(j_w) / w = + // (tranposed(A_w) j_w) tranposed(j_w) / w = c_w tranposed(j_w), + // + // where c_w = tranposed(A_w) j_w / w is a k x 1 vector calculated here: + Eigen::Matrix3Xf twice_weighted_sources = + weighted_sources.array().rowwise() * sqrt_weights.array().transpose(); + Eigen::Vector3f source_center_of_mass = + twice_weighted_sources.rowwise().sum() / total_weight; + // tranposed((I - C) A_w) = tranposed(A_w) (I - C) = + // tranposed(A_w) - tranposed(A_w) C = tranposed(A_w) - c_w tranposed(j_w). + Eigen::Matrix3Xf centered_weighted_sources = + weighted_sources - source_center_of_mass * sqrt_weights.transpose(); + + Eigen::Matrix3f rotation; + MP_RETURN_IF_ERROR(ComputeOptimalRotation( + weighted_targets * centered_weighted_sources.transpose(), rotation)) + << "Failed to compute the optimal rotation!"; + ASSIGN_OR_RETURN( + float scale, + ComputeOptimalScale(centered_weighted_sources, weighted_sources, + weighted_targets, rotation), + _ << "Failed to compute the optimal scale!"); + + // R = c tranposed(T). + Eigen::Matrix3f rotation_and_scale = scale * rotation; + + // Compute optimal translation for the weighted problem. + + // tranposed(B_w - c A_w T) = tranposed(B_w) - R tranposed(A_w) in (54). + const auto pointwise_diffs = + weighted_targets - rotation_and_scale * weighted_sources; + // Multiplication by j_w is a respectively weighted column sum. + // (54) from the paper. + const auto weighted_pointwise_diffs = + pointwise_diffs.array().rowwise() * sqrt_weights.array().transpose(); + Eigen::Vector3f translation = + weighted_pointwise_diffs.rowwise().sum() / total_weight; + + transform_mat = CombineTransformMatrix(rotation_and_scale, translation); + + return absl::OkStatus(); + } + + // `design_matrix` is a transposed LHS of (51) in the paper. + // + // Note: the output `rotation` argument is used instead of `StatusOr<>` + // return type in order to avoid Eigen memory alignment issues. Details: + // https://eigen.tuxfamily.org/dox/group__TopicStructHavingEigenMembers.html + static absl::Status ComputeOptimalRotation( + const Eigen::Matrix3f& design_matrix, Eigen::Matrix3f& rotation) { + RET_CHECK_GT(design_matrix.norm(), kAbsoluteErrorEps) + << "Design matrix norm is too small!"; + + Eigen::JacobiSVD svd( + design_matrix, Eigen::ComputeFullU | Eigen::ComputeFullV); + + Eigen::Matrix3f postrotation = svd.matrixU(); + Eigen::Matrix3f prerotation = svd.matrixV().transpose(); + + // Disallow reflection by ensuring that det(`rotation`) = +1 (and not -1), + // see "4.6 Constrained orthogonal Procrustes problems" + // in the Gower & Dijksterhuis's book "Procrustes Analysis". + // We flip the sign of the least singular value along with a column in W. + // + // Note that now the sum of singular values doesn't work for scale + // estimation due to this sign flip. + if (postrotation.determinant() * prerotation.determinant() < + static_cast(0)) { + postrotation.col(2) *= static_cast(-1); + } + + // Transposed (52) from the paper. + rotation = postrotation * prerotation; + return absl::OkStatus(); + } + + static absl::StatusOr ComputeOptimalScale( + const Eigen::Matrix3Xf& centered_weighted_sources, + const Eigen::Matrix3Xf& weighted_sources, + const Eigen::Matrix3Xf& weighted_targets, + const Eigen::Matrix3f& rotation) { + // tranposed(T) tranposed(A_w) (I - C). + const auto rotated_centered_weighted_sources = + rotation * centered_weighted_sources; + // Use the identity trace(A B) = sum(A * B^T) + // to avoid building large intermediate matrices (* is Hadamard product). + // (53) from the paper. + float numerator = + rotated_centered_weighted_sources.cwiseProduct(weighted_targets).sum(); + float denominator = + centered_weighted_sources.cwiseProduct(weighted_sources).sum(); + + RET_CHECK_GT(denominator, kAbsoluteErrorEps) + << "Scale expression denominator is too small!"; + RET_CHECK_GT(numerator / denominator, kAbsoluteErrorEps) + << "Scale is too small!"; + + return numerator / denominator; + } +}; + +} // namespace + +std::unique_ptr CreateFloatPrecisionProcrustesSolver() { + return absl::make_unique(); +} + +} // namespace face_geometry +} // namespace mediapipe diff --git a/mediapipe/modules/face_geometry/libs/procrustes_solver.h b/mediapipe/modules/face_geometry/libs/procrustes_solver.h new file mode 100644 index 0000000..c34b8f6 --- /dev/null +++ b/mediapipe/modules/face_geometry/libs/procrustes_solver.h @@ -0,0 +1,70 @@ +// Copyright 2020 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef MEDIAPIPE_FACE_GEOMETRY_LIBS_PROCRUSTES_SOLVER_H_ +#define MEDIAPIPE_FACE_GEOMETRY_LIBS_PROCRUSTES_SOLVER_H_ + +#include + +#include "Eigen/Dense" +#include "mediapipe/framework/port/status.h" + +namespace mediapipe::face_geometry { + +// Encapsulates a stateless solver for the Weighted Extended Orthogonal +// Procrustes (WEOP) Problem, as defined in Section 2.4 of +// https://doi.org/10.3929/ethz-a-004656648. +// +// Given the source and the target point clouds, the algorithm estimates +// a 4x4 transformation matrix featuring the following semantic components: +// +// * Uniform scale +// * Rotation +// * Translation +// +// The matrix maps the source point cloud into the target point cloud minimizing +// the Mean Squared Error. +class ProcrustesSolver { + public: + virtual ~ProcrustesSolver() = default; + + // Solves the Weighted Extended Orthogonal Procrustes (WEOP) Problem. + // + // All `source_points`, `target_points` and `point_weights` must define the + // same number of points. Elements of `point_weights` must be non-negative. + // + // A too small diameter of either of the point clouds will likely lead to + // numerical instabilities and failure to estimate the transformation. + // + // A too small point cloud total weight will likely lead to numerical + // instabilities and failure to estimate the transformation too. + // + // Small point coordinate deviation for either of the point cloud will likely + // result in a failure as it will make the solution very unstable if possible. + // + // Note: the output `transform_mat` argument is used instead of `StatusOr<>` + // return type in order to avoid Eigen memory alignment issues. Details: + // https://eigen.tuxfamily.org/dox/group__TopicStructHavingEigenMembers.html + virtual absl::Status SolveWeightedOrthogonalProblem( + const Eigen::Matrix3Xf& source_points, // + const Eigen::Matrix3Xf& target_points, // + const Eigen::VectorXf& point_weights, // + Eigen::Matrix4f& transform_mat) const = 0; +}; + +std::unique_ptr CreateFloatPrecisionProcrustesSolver(); + +} // namespace mediapipe::face_geometry + +#endif // MEDIAPIPE_FACE_GEOMETRY_LIBS_PROCRUSTES_SOLVER_H_ diff --git a/mediapipe/modules/face_geometry/libs/validation_utils.cc b/mediapipe/modules/face_geometry/libs/validation_utils.cc new file mode 100644 index 0000000..eb4fd08 --- /dev/null +++ b/mediapipe/modules/face_geometry/libs/validation_utils.cc @@ -0,0 +1,126 @@ +// Copyright 2020 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "mediapipe/modules/face_geometry/libs/validation_utils.h" + +#include +#include + +#include "mediapipe/framework/formats/matrix_data.pb.h" +#include "mediapipe/framework/port/ret_check.h" +#include "mediapipe/framework/port/status.h" +#include "mediapipe/framework/port/status_macros.h" +#include "mediapipe/modules/face_geometry/libs/mesh_3d_utils.h" +#include "mediapipe/modules/face_geometry/protos/environment.pb.h" +#include "mediapipe/modules/face_geometry/protos/geometry_pipeline_metadata.pb.h" +#include "mediapipe/modules/face_geometry/protos/mesh_3d.pb.h" + +namespace mediapipe::face_geometry { + +absl::Status ValidatePerspectiveCamera( + const PerspectiveCamera& perspective_camera) { + static constexpr float kAbsoluteErrorEps = 1e-9f; + + RET_CHECK_GT(perspective_camera.near(), kAbsoluteErrorEps) + << "Near Z must be greater than 0 with a margin of 10^{-9}!"; + + RET_CHECK_GT(perspective_camera.far(), + perspective_camera.near() + kAbsoluteErrorEps) + << "Far Z must be greater than Near Z with a margin of 10^{-9}!"; + + RET_CHECK_GT(perspective_camera.vertical_fov_degrees(), kAbsoluteErrorEps) + << "Vertical FOV must be positive with a margin of 10^{-9}!"; + + RET_CHECK_LT(perspective_camera.vertical_fov_degrees() + kAbsoluteErrorEps, + 180.f) + << "Vertical FOV must be less than 180 degrees with a margin of 10^{-9}"; + + return absl::OkStatus(); +} + +absl::Status ValidateEnvironment(const Environment& environment) { + MP_RETURN_IF_ERROR( + ValidatePerspectiveCamera(environment.perspective_camera())) + << "Invalid perspective camera!"; + + return absl::OkStatus(); +} + +absl::Status ValidateMesh3d(const Mesh3d& mesh_3d) { + const std::size_t vertex_size = GetVertexSize(mesh_3d.vertex_type()); + const std::size_t primitive_type = GetPrimitiveSize(mesh_3d.primitive_type()); + + RET_CHECK_EQ(mesh_3d.vertex_buffer_size() % vertex_size, 0) + << "Vertex buffer size must a multiple of the vertex size!"; + + RET_CHECK_EQ(mesh_3d.index_buffer_size() % primitive_type, 0) + << "Index buffer size must a multiple of the primitive size!"; + + const int num_vertices = mesh_3d.vertex_buffer_size() / vertex_size; + for (uint32_t idx : mesh_3d.index_buffer()) { + RET_CHECK_LT(idx, num_vertices) + << "All mesh indices must refer to an existing vertex!"; + } + + return absl::OkStatus(); +} + +absl::Status ValidateFaceGeometry(const FaceGeometry& face_geometry) { + MP_RETURN_IF_ERROR(ValidateMesh3d(face_geometry.mesh())) << "Invalid mesh!"; + + static constexpr char kInvalid4x4MatrixMessage[] = + "Pose transformation matrix must be a 4x4 matrix!"; + + const MatrixData& pose_transform_matrix = + face_geometry.pose_transform_matrix(); + RET_CHECK_EQ(pose_transform_matrix.rows(), 4) << kInvalid4x4MatrixMessage; + RET_CHECK_EQ(pose_transform_matrix.rows(), 4) << kInvalid4x4MatrixMessage; + RET_CHECK_EQ(pose_transform_matrix.packed_data_size(), 16) + << kInvalid4x4MatrixMessage; + + return absl::OkStatus(); +} + +absl::Status ValidateGeometryPipelineMetadata( + const GeometryPipelineMetadata& metadata) { + MP_RETURN_IF_ERROR(ValidateMesh3d(metadata.canonical_mesh())) + << "Invalid canonical mesh!"; + + RET_CHECK_GT(metadata.procrustes_landmark_basis_size(), 0) + + << "Procrustes landmark basis must be non-empty!"; + + const int num_vertices = + metadata.canonical_mesh().vertex_buffer_size() / + GetVertexSize(metadata.canonical_mesh().vertex_type()); + for (const WeightedLandmarkRef& wlr : metadata.procrustes_landmark_basis()) { + RET_CHECK_LT(wlr.landmark_id(), num_vertices) + << "All Procrustes basis indices must refer to an existing canonical " + "mesh vertex!"; + + RET_CHECK_GE(wlr.weight(), 0.f) + << "All Procrustes basis landmarks must have a non-negative weight!"; + } + + return absl::OkStatus(); +} + +absl::Status ValidateFrameDimensions(int frame_width, int frame_height) { + RET_CHECK_GT(frame_width, 0) << "Frame width must be positive!"; + RET_CHECK_GT(frame_height, 0) << "Frame height must be positive!"; + + return absl::OkStatus(); +} + +} // namespace mediapipe::face_geometry diff --git a/mediapipe/modules/face_geometry/libs/validation_utils.h b/mediapipe/modules/face_geometry/libs/validation_utils.h new file mode 100644 index 0000000..c0a7e08 --- /dev/null +++ b/mediapipe/modules/face_geometry/libs/validation_utils.h @@ -0,0 +1,70 @@ +// Copyright 2020 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef MEDIAPIPE_FACE_GEOMETRY_LIBS_VALIDATION_UTILS_H_ +#define MEDIAPIPE_FACE_GEOMETRY_LIBS_VALIDATION_UTILS_H_ + +#include "mediapipe/framework/port/status.h" +#include "mediapipe/modules/face_geometry/protos/environment.pb.h" +#include "mediapipe/modules/face_geometry/protos/face_geometry.pb.h" +#include "mediapipe/modules/face_geometry/protos/geometry_pipeline_metadata.pb.h" +#include "mediapipe/modules/face_geometry/protos/mesh_3d.pb.h" + +namespace mediapipe::face_geometry { + +// Validates `perspective_camera`. +// +// Near Z must be greater than 0 with a margin of `1e-9`. +// Far Z must be greater than Near Z with a margin of `1e-9`. +// Vertical FOV must be in range (0, 180) with a margin of `1e-9` on the range +// edges. +absl::Status ValidatePerspectiveCamera( + const PerspectiveCamera& perspective_camera); + +// Validates `environment`. +// +// Environment's perspective camera must be valid. +absl::Status ValidateEnvironment(const Environment& environment); + +// Validates `mesh_3d`. +// +// Mesh vertex buffer size must a multiple of the vertex size. +// Mesh index buffer size must a multiple of the primitive size. +// All mesh indices must reference an existing mesh vertex. +absl::Status ValidateMesh3d(const Mesh3d& mesh_3d); + +// Validates `face_geometry`. +// +// Face mesh must be valid. +// Face pose transformation matrix must be a 4x4 matrix. +absl::Status ValidateFaceGeometry(const FaceGeometry& face_geometry); + +// Validates `metadata`. +// +// Canonical face mesh must be valid. +// Procrustes landmark basis must be non-empty. +// All Procrustes basis indices must reference an existing canonical mesh +// vertex. +// All Procrustes basis landmarks must have a non-negative weight. +absl::Status ValidateGeometryPipelineMetadata( + const GeometryPipelineMetadata& metadata); + +// Validates frame dimensions. +// +// Both frame width and frame height must be positive. +absl::Status ValidateFrameDimensions(int frame_width, int frame_height); + +} // namespace mediapipe::face_geometry + +#endif // MEDIAPIPE_FACE_GEOMETRY_LIBS_VALIDATION_UTILS_H_ diff --git a/mediapipe/modules/face_geometry/protos/BUILD b/mediapipe/modules/face_geometry/protos/BUILD new file mode 100644 index 0000000..48b7b66 --- /dev/null +++ b/mediapipe/modules/face_geometry/protos/BUILD @@ -0,0 +1,46 @@ +# Copyright 2020 The MediaPipe Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +load("//mediapipe/framework/port:build_config.bzl", "mediapipe_proto_library") + +licenses(["notice"]) + +package(default_visibility = ["//visibility:public"]) + +mediapipe_proto_library( + name = "environment_proto", + srcs = ["environment.proto"], +) + +mediapipe_proto_library( + name = "face_geometry_proto", + srcs = ["face_geometry.proto"], + deps = [ + ":mesh_3d_proto", + "//mediapipe/framework/formats:matrix_data_proto", + ], +) + +mediapipe_proto_library( + name = "geometry_pipeline_metadata_proto", + srcs = ["geometry_pipeline_metadata.proto"], + deps = [ + ":mesh_3d_proto", + ], +) + +mediapipe_proto_library( + name = "mesh_3d_proto", + srcs = ["mesh_3d.proto"], +) diff --git a/mediapipe/modules/face_geometry/protos/environment.proto b/mediapipe/modules/face_geometry/protos/environment.proto new file mode 100644 index 0000000..cca3f29 --- /dev/null +++ b/mediapipe/modules/face_geometry/protos/environment.proto @@ -0,0 +1,84 @@ +// Copyright 2020 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +syntax = "proto2"; + +package mediapipe.face_geometry; + +option java_package = "com.google.mediapipe.modules.facegeometry"; +option java_outer_classname = "EnvironmentProto"; + +// Defines the (0, 0) origin point location of the environment. +// +// The variation in the origin point location can be traced back to the memory +// layout of the camera video frame buffers. +// +// Usually, the memory layout for most CPU (and also some GPU) camera video +// frame buffers results in having the (0, 0) origin point located in the +// Top Left corner. +// +// On the contrary, the memory layout for most GPU camera video frame buffers +// results in having the (0, 0) origin point located in the Bottom Left corner. +// +// Let's consider the following example: +// +// (A) ---------------+ +// ___ | +// | (1) | | | +// | / \ | | | +// | |---|===|-| | +// | |---| | | | +// | / \ | | | +// | | | | | | +// | | (2) |=| | | +// | | | | | | +// | |_______| |_| | +// | |@| |@| | | | +// | ___________|_|_ | +// | +// (B) ---------------+ +// +// On this example, (1) and (2) have the same X coordinate regardless of the +// origin point location. However, having the origin point located at (A) +// (Top Left corner) results in (1) having a smaller Y coordinate if compared to +// (2). Similarly, having the origin point located at (B) (Bottom Left corner) +// results in (1) having a greater Y coordinate if compared to (2). +// +// Providing the correct origin point location for your environment and making +// sure all the input landmarks are in-sync with this location is crucial +// for receiving the correct output face geometry and visual renders. +enum OriginPointLocation { + BOTTOM_LEFT_CORNER = 1; + TOP_LEFT_CORNER = 2; +} + +// The perspective camera is defined through its vertical FOV angle and the +// Z-clipping planes. The aspect ratio is a runtime variable for the face +// geometry module and should be provided alongside the face landmarks in order +// to estimate the face geometry on a given frame. +// +// More info on Perspective Cameras: +// http://www.songho.ca/opengl/gl_projectionmatrix.html#perspective +message PerspectiveCamera { + // `0 < vertical_fov_degrees < 180`. + optional float vertical_fov_degrees = 1; + // `0 < near < far`. + optional float near = 2; + optional float far = 3; +} + +message Environment { + optional OriginPointLocation origin_point_location = 1; + optional PerspectiveCamera perspective_camera = 2; +} diff --git a/mediapipe/modules/face_geometry/protos/face_geometry.proto b/mediapipe/modules/face_geometry/protos/face_geometry.proto new file mode 100644 index 0000000..b91a7d7 --- /dev/null +++ b/mediapipe/modules/face_geometry/protos/face_geometry.proto @@ -0,0 +1,60 @@ +// Copyright 2020 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +syntax = "proto2"; + +package mediapipe.face_geometry; + +import "mediapipe/framework/formats/matrix_data.proto"; +import "mediapipe/modules/face_geometry/protos/mesh_3d.proto"; + +option java_package = "com.google.mediapipe.modules.facegeometry"; +option java_outer_classname = "FaceGeometryProto"; + +// Defines the face geometry pipeline estimation result format. +message FaceGeometry { + // Defines a mesh surface for a face. The face mesh vertex IDs are the same as + // the face landmark IDs. + // + // XYZ coordinates exist in the right-handed Metric 3D space configured by an + // environment. UV coodinates are taken from the canonical face mesh model. + // + // XY coordinates are guaranteed to match the screen positions of + // the input face landmarks after (1) being multiplied by the face pose + // transformation matrix and then (2) being projected with a perspective + // camera matrix of the same environment. + // + // NOTE: the triangular topology of the face mesh is only useful when derived + // from the 468 face landmarks, not from the 6 face detection landmarks + // (keypoints). The former don't cover the entire face and this mesh is + // defined here only to comply with the API. It should be considered as + // a placeholder and/or for debugging purposes. + // + // Use the face geometry derived from the face detection landmarks + // (keypoints) for the face pose transformation matrix, not the mesh. + optional Mesh3d mesh = 1; + + // Defines a face pose transformation matrix, which provides mapping from + // the static canonical face model to the runtime face. Tries to distinguish + // a head pose change from a facial expression change and to only reflect the + // former. + // + // Is a 4x4 matrix and contains only the following components: + // * Uniform scale + // * Rotation + // * Translation + // + // The last row is guaranteed to be `[0 0 0 1]`. + optional MatrixData pose_transform_matrix = 2; +} diff --git a/mediapipe/modules/face_geometry/protos/geometry_pipeline_metadata.proto b/mediapipe/modules/face_geometry/protos/geometry_pipeline_metadata.proto new file mode 100644 index 0000000..dac0e25 --- /dev/null +++ b/mediapipe/modules/face_geometry/protos/geometry_pipeline_metadata.proto @@ -0,0 +1,63 @@ +// Copyright 2020 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +syntax = "proto2"; + +package mediapipe.face_geometry; + +import "mediapipe/modules/face_geometry/protos/mesh_3d.proto"; + +option java_package = "com.google.mediapipe.modules.facegeometry"; +option java_outer_classname = "GeometryPipelineMetadataProto"; + +enum InputSource { + DEFAULT = 0; // FACE_LANDMARK_PIPELINE + FACE_LANDMARK_PIPELINE = 1; + FACE_DETECTION_PIPELINE = 2; +} + +message WeightedLandmarkRef { + // Defines the landmark ID. References an existing face landmark ID. + optional uint32 landmark_id = 1; + // Defines the landmark weight. The larger the weight the more influence this + // landmark has in the basis. + // + // Is positive. + optional float weight = 2; +} + +// Next field ID: 4 +message GeometryPipelineMetadata { + // Defines the source of the input landmarks to let the underlying geometry + // pipeline to adjust in order to produce the best results. + // + // Face landmark pipeline is expected to produce 3D landmarks with relative Z + // coordinate, which is scaled as the X coordinate assuming the weak + // perspective projection camera model. + // + // Face landmark pipeline is expected to produce 2D landmarks with Z + // coordinate being equal to 0. + optional InputSource input_source = 3; + // Defines a mesh surface for a canonical face. The canonical face mesh vertex + // IDs are the same as the face landmark IDs. + // + // XYZ coordinates are defined in centimeter units. + optional Mesh3d canonical_mesh = 1; + // Defines a weighted landmark basis for running the Procrustes solver + // algorithm inside the geometry pipeline. + // + // A good basis sets face landmark weights in way to distinguish a head pose + // change from a facial expression change and to only respond to the former. + repeated WeightedLandmarkRef procrustes_landmark_basis = 2; +} diff --git a/mediapipe/modules/face_geometry/protos/mesh_3d.proto b/mediapipe/modules/face_geometry/protos/mesh_3d.proto new file mode 100644 index 0000000..4db45c1 --- /dev/null +++ b/mediapipe/modules/face_geometry/protos/mesh_3d.proto @@ -0,0 +1,41 @@ +// Copyright 2020 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +syntax = "proto2"; + +package mediapipe.face_geometry; + +option java_package = "com.google.mediapipe.modules.facegeometry"; +option java_outer_classname = "Mesh3dProto"; + +message Mesh3d { + enum VertexType { + // Is defined by 5 coordinates: Position (XYZ) + Texture coordinate (UV). + VERTEX_PT = 0; + } + + enum PrimitiveType { + // Is defined by 3 indices: triangle vertex IDs. + TRIANGLE = 0; + } + + optional VertexType vertex_type = 1; + optional PrimitiveType primitive_type = 2; + // Vertex buffer size is a multiple of the vertex size (e.g., 5 for + // VERTEX_PT). + repeated float vertex_buffer = 3; + // Index buffer size is a multiple of the primitive size (e.g., 3 for + // TRIANGLE). + repeated uint32 index_buffer = 4; +} diff --git a/mediapipe/modules/face_landmark/BUILD b/mediapipe/modules/face_landmark/BUILD new file mode 100644 index 0000000..f155e46 --- /dev/null +++ b/mediapipe/modules/face_landmark/BUILD @@ -0,0 +1,190 @@ +# Copyright 2019 The MediaPipe Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +load( + "//mediapipe/framework/tool:mediapipe_graph.bzl", + "mediapipe_simple_subgraph", +) + +licenses(["notice"]) + +package(default_visibility = ["//visibility:public"]) + +mediapipe_simple_subgraph( + name = "face_landmark_cpu", + graph = "face_landmark_cpu.pbtxt", + register_as = "FaceLandmarkCpu", + deps = [ + ":face_landmarks_model_loader", + ":tensors_to_face_landmarks", + ":tensors_to_face_landmarks_with_attention", + "//mediapipe/calculators/core:gate_calculator", + "//mediapipe/calculators/core:split_vector_calculator", + "//mediapipe/calculators/tensor:image_to_tensor_calculator", + "//mediapipe/calculators/tensor:inference_calculator", + "//mediapipe/calculators/tensor:tensors_to_floats_calculator", + "//mediapipe/calculators/tensor:tensors_to_landmarks_calculator", + "//mediapipe/calculators/tflite:tflite_custom_op_resolver_calculator", + "//mediapipe/calculators/util:landmark_projection_calculator", + "//mediapipe/calculators/util:thresholding_calculator", + "//mediapipe/framework/tool:switch_container", + ], +) + +mediapipe_simple_subgraph( + name = "face_landmark_gpu", + graph = "face_landmark_gpu.pbtxt", + register_as = "FaceLandmarkGpu", + deps = [ + ":face_landmarks_model_loader", + ":tensors_to_face_landmarks", + ":tensors_to_face_landmarks_with_attention", + "//mediapipe/calculators/core:gate_calculator", + "//mediapipe/calculators/core:split_vector_calculator", + "//mediapipe/calculators/tensor:image_to_tensor_calculator", + "//mediapipe/calculators/tensor:inference_calculator", + "//mediapipe/calculators/tensor:tensors_to_floats_calculator", + "//mediapipe/calculators/tensor:tensors_to_landmarks_calculator", + "//mediapipe/calculators/tflite:tflite_custom_op_resolver_calculator", + "//mediapipe/calculators/util:landmark_projection_calculator", + "//mediapipe/calculators/util:thresholding_calculator", + "//mediapipe/framework/tool:switch_container", + ], +) + +mediapipe_simple_subgraph( + name = "face_landmark_front_cpu", + graph = "face_landmark_front_cpu.pbtxt", + register_as = "FaceLandmarkFrontCpu", + deps = [ + ":face_detection_front_detection_to_roi", + ":face_landmark_cpu", + ":face_landmark_landmarks_to_roi", + "//mediapipe/calculators/core:begin_loop_calculator", + "//mediapipe/calculators/core:clip_vector_size_calculator", + "//mediapipe/calculators/core:constant_side_packet_calculator", + "//mediapipe/calculators/core:end_loop_calculator", + "//mediapipe/calculators/core:gate_calculator", + "//mediapipe/calculators/core:previous_loopback_calculator", + "//mediapipe/calculators/image:image_properties_calculator", + "//mediapipe/calculators/util:association_norm_rect_calculator", + "//mediapipe/calculators/util:collection_has_min_size_calculator", + "//mediapipe/modules/face_detection:face_detection_short_range_cpu", + ], +) + +mediapipe_simple_subgraph( + name = "face_landmark_front_gpu", + graph = "face_landmark_front_gpu.pbtxt", + register_as = "FaceLandmarkFrontGpu", + deps = [ + ":face_detection_front_detection_to_roi", + ":face_landmark_gpu", + ":face_landmark_landmarks_to_roi", + "//mediapipe/calculators/core:begin_loop_calculator", + "//mediapipe/calculators/core:clip_vector_size_calculator", + "//mediapipe/calculators/core:constant_side_packet_calculator", + "//mediapipe/calculators/core:end_loop_calculator", + "//mediapipe/calculators/core:gate_calculator", + "//mediapipe/calculators/core:previous_loopback_calculator", + "//mediapipe/calculators/image:image_properties_calculator", + "//mediapipe/calculators/util:association_norm_rect_calculator", + "//mediapipe/calculators/util:collection_has_min_size_calculator", + "//mediapipe/modules/face_detection:face_detection_short_range_gpu", + ], +) + +mediapipe_simple_subgraph( + name = "face_landmark_front_cpu_image", + graph = "face_landmark_front_cpu_image.pbtxt", + register_as = "FaceLandmarkFrontCpuImage", + deps = [ + ":face_landmark_front_cpu", + "//mediapipe/calculators/core:flow_limiter_calculator", + "//mediapipe/calculators/image:image_transformation_calculator", + "//mediapipe/calculators/util:from_image_calculator", + ], +) + +mediapipe_simple_subgraph( + name = "face_landmark_front_gpu_image", + graph = "face_landmark_front_gpu_image.pbtxt", + register_as = "FaceLandmarkFrontGpuImage", + deps = [ + ":face_landmark_front_gpu", + "//mediapipe/calculators/core:flow_limiter_calculator", + "//mediapipe/calculators/image:image_transformation_calculator", + "//mediapipe/calculators/util:from_image_calculator", + ], +) + +exports_files( + srcs = [ + "face_landmark.tflite", + "face_landmark_with_attention.tflite", + ], +) + +mediapipe_simple_subgraph( + name = "face_detection_front_detection_to_roi", + graph = "face_detection_front_detection_to_roi.pbtxt", + register_as = "FaceDetectionFrontDetectionToRoi", + deps = [ + "//mediapipe/calculators/util:detections_to_rects_calculator", + "//mediapipe/calculators/util:rect_transformation_calculator", + ], +) + +mediapipe_simple_subgraph( + name = "face_landmark_landmarks_to_roi", + graph = "face_landmark_landmarks_to_roi.pbtxt", + register_as = "FaceLandmarkLandmarksToRoi", + deps = [ + "//mediapipe/calculators/util:detections_to_rects_calculator", + "//mediapipe/calculators/util:landmarks_to_detection_calculator", + "//mediapipe/calculators/util:rect_transformation_calculator", + ], +) + +mediapipe_simple_subgraph( + name = "face_landmarks_model_loader", + graph = "face_landmarks_model_loader.pbtxt", + register_as = "FaceLandmarksModelLoader", + deps = [ + "//mediapipe/calculators/core:constant_side_packet_calculator", + "//mediapipe/calculators/tflite:tflite_model_calculator", + "//mediapipe/calculators/util:local_file_contents_calculator", + "//mediapipe/framework/tool:switch_container", + ], +) + +mediapipe_simple_subgraph( + name = "tensors_to_face_landmarks", + graph = "tensors_to_face_landmarks.pbtxt", + register_as = "TensorsToFaceLandmarks", + deps = [ + "//mediapipe/calculators/tensor:tensors_to_landmarks_calculator", + ], +) + +mediapipe_simple_subgraph( + name = "tensors_to_face_landmarks_with_attention", + graph = "tensors_to_face_landmarks_with_attention.pbtxt", + register_as = "TensorsToFaceLandmarksWithAttention", + deps = [ + "//mediapipe/calculators/core:split_vector_calculator", + "//mediapipe/calculators/tensor:tensors_to_landmarks_calculator", + "//mediapipe/calculators/util:landmarks_refinement_calculator", + ], +) diff --git a/mediapipe/modules/face_landmark/README.md b/mediapipe/modules/face_landmark/README.md new file mode 100644 index 0000000..eed21a2 --- /dev/null +++ b/mediapipe/modules/face_landmark/README.md @@ -0,0 +1,9 @@ +# face_landmark + +Subgraphs|Details +:--- | :--- +[`FaceLandmarkCpu`](https://github.com/google/mediapipe/tree/master/mediapipe/modules/face_landmark/face_landmark_cpu.pbtxt)| Detects landmarks on a single face. (CPU input, and inference is executed on CPU.) +[`FaceLandmarkGpu`](https://github.com/google/mediapipe/tree/master/mediapipe/modules/face_landmark/face_landmark_gpu.pbtxt)| Detects landmarks on a single face. (GPU input, and inference is executed on GPU) +[`FaceLandmarkFrontCpu`](https://github.com/google/mediapipe/tree/master/mediapipe/modules/face_landmark/face_landmark_front_cpu.pbtxt)| Detects and tracks landmarks on multiple faces. (CPU input, and inference is executed on CPU) +[`FaceLandmarkFrontGpu`](https://github.com/google/mediapipe/tree/master/mediapipe/modules/face_landmark/face_landmark_front_gpu.pbtxt)| Detects and tracks landmarks on multiple faces. (GPU input, and inference is executed on GPU.) + diff --git a/mediapipe/modules/face_landmark/face_detection_front_detection_to_roi.pbtxt b/mediapipe/modules/face_landmark/face_detection_front_detection_to_roi.pbtxt new file mode 100644 index 0000000..acc9476 --- /dev/null +++ b/mediapipe/modules/face_landmark/face_detection_front_detection_to_roi.pbtxt @@ -0,0 +1,47 @@ +# MediaPipe graph to calculate face region of interest (ROI) from the very +# first face detection in the vector of detections provided by +# "FaceDetectionShortRangeCpu" or "FaceDetectionShortRangeGpu" +# +# NOTE: this graph is subject to change and should not be used directly. + +type: "FaceDetectionFrontDetectionToRoi" + +# Face detection. (Detection) +input_stream: "DETECTION:detection" +# Frame size (width and height). (std::pair) +input_stream: "IMAGE_SIZE:image_size" +# ROI according to the first detection of input detections. (NormalizedRect) +output_stream: "ROI:roi" + +# Converts results of face detection into a rectangle (normalized by image size) +# that encloses the face and is rotated such that the line connecting left eye +# and right eye is aligned with the X-axis of the rectangle. +node { + calculator: "DetectionsToRectsCalculator" + input_stream: "DETECTION:detection" + input_stream: "IMAGE_SIZE:image_size" + output_stream: "NORM_RECT:initial_roi" + options: { + [mediapipe.DetectionsToRectsCalculatorOptions.ext] { + rotation_vector_start_keypoint_index: 0 # Left eye. + rotation_vector_end_keypoint_index: 1 # Right eye. + rotation_vector_target_angle_degrees: 0 + } + } +} + +# Expands and shifts the rectangle that contains the face so that it's likely +# to cover the entire face. +node { + calculator: "RectTransformationCalculator" + input_stream: "NORM_RECT:initial_roi" + input_stream: "IMAGE_SIZE:image_size" + output_stream: "roi" + options: { + [mediapipe.RectTransformationCalculatorOptions.ext] { + scale_x: 1.5 + scale_y: 1.5 + square_long: true + } + } +} diff --git a/mediapipe/modules/face_landmark/face_landmark.tflite b/mediapipe/modules/face_landmark/face_landmark.tflite new file mode 100755 index 0000000..573285d Binary files /dev/null and b/mediapipe/modules/face_landmark/face_landmark.tflite differ diff --git a/mediapipe/modules/face_landmark/face_landmark_cpu.pbtxt b/mediapipe/modules/face_landmark/face_landmark_cpu.pbtxt new file mode 100644 index 0000000..4604fc7 --- /dev/null +++ b/mediapipe/modules/face_landmark/face_landmark_cpu.pbtxt @@ -0,0 +1,184 @@ +# MediaPipe graph to detect/predict face landmarks. (CPU input, and inference is +# executed on CPU.) +# +# It is required that "face_landmark.tflite" is available at +# "mediapipe/modules/face_landmark/face_landmark.tflite" +# path during execution if `with_attention` is not set or set to `false`. +# +# It is required that "face_landmark_with_attention.tflite" is available at +# "mediapipe/modules/face_landmark/face_landmark_with_attention.tflite" +# path during execution if `with_attention` is set to `true`. +# +# EXAMPLE: +# node { +# calculator: "FaceLandmarkCpu" +# input_stream: "IMAGE:image" +# input_stream: "ROI:face_roi" +# input_side_packet: "WITH_ATTENTION:with_attention" +# output_stream: "LANDMARKS:face_landmarks" +# } + +type: "FaceLandmarkCpu" + +# CPU image. (ImageFrame) +input_stream: "IMAGE:image" +# ROI (region of interest) within the given image where a face is located. +# (NormalizedRect) +input_stream: "ROI:roi" +# Whether to run face mesh model with attention on lips and eyes. (bool) +# Attention provides more accuracy on lips and eye regions as well as iris +# landmarks. +input_side_packet: "WITH_ATTENTION:with_attention" + +# 468 or 478 facial landmarks within the given ROI. (NormalizedLandmarkList) +# +# Number of landmarks depends on the WITH_ATTENTION flag. If it's `true` - then +# there will be 478 landmarks with refined lips, eyes and irises (10 extra +# landmarks are for irises), otherwise 468 non-refined landmarks are returned. +# +# NOTE: if a face is not present within the given ROI, for this particular +# timestamp there will not be an output packet in the LANDMARKS stream. However, +# the MediaPipe framework will internally inform the downstream calculators of +# the absence of this packet so that they don't wait for it unnecessarily. +output_stream: "LANDMARKS:face_landmarks" + +# Transforms the input image into a 192x192 tensor. +node: { + calculator: "ImageToTensorCalculator" + input_stream: "IMAGE:image" + input_stream: "NORM_RECT:roi" + output_stream: "TENSORS:input_tensors" + options: { + [mediapipe.ImageToTensorCalculatorOptions.ext] { + output_tensor_width: 192 + output_tensor_height: 192 + output_tensor_float_range { + min: 0.0 + max: 1.0 + } + } + } +} + +# Loads the face landmarks TF Lite model. +node { + calculator: "FaceLandmarksModelLoader" + input_side_packet: "WITH_ATTENTION:with_attention" + output_side_packet: "MODEL:model" +} + +# Generates a single side packet containing a TensorFlow Lite op resolver that +# supports custom ops needed by the model used in this graph. +node { + calculator: "TfLiteCustomOpResolverCalculator" + output_side_packet: "op_resolver" +} + +# Runs a TensorFlow Lite model on CPU that takes an image tensor and outputs a +# vector of tensors representing, for instance, detection boxes/keypoints and +# scores. +node { + calculator: "InferenceCalculator" + input_stream: "TENSORS:input_tensors" + input_side_packet: "MODEL:model" + input_side_packet: "CUSTOM_OP_RESOLVER:op_resolver" + output_stream: "TENSORS:output_tensors" + options: { + [mediapipe.InferenceCalculatorOptions.ext] { + delegate { xnnpack {} } + } + } +} + +# Splits a vector of tensors into landmark tensors and face flag tensor. +node { + calculator: "SwitchContainer" + input_side_packet: "ENABLE:with_attention" + input_stream: "output_tensors" + output_stream: "landmark_tensors" + output_stream: "face_flag_tensor" + options: { + [mediapipe.SwitchContainerOptions.ext] { + contained_node: { + calculator: "SplitTensorVectorCalculator" + options: { + [mediapipe.SplitVectorCalculatorOptions.ext] { + ranges: { begin: 0 end: 1 } + ranges: { begin: 1 end: 2 } + } + } + } + contained_node: { + calculator: "SplitTensorVectorCalculator" + options: { + [mediapipe.SplitVectorCalculatorOptions.ext] { + ranges: { begin: 0 end: 6 } + ranges: { begin: 6 end: 7 } + } + } + } + } + } +} + +# Converts the face-flag tensor into a float that represents the confidence +# score of face presence. +node { + calculator: "TensorsToFloatsCalculator" + input_stream: "TENSORS:face_flag_tensor" + output_stream: "FLOAT:face_presence_score" + options { + [mediapipe.TensorsToFloatsCalculatorOptions.ext] { + activation: SIGMOID + } + } +} + +# Applies a threshold to the confidence score to determine whether a face is +# present. +node { + calculator: "ThresholdingCalculator" + input_stream: "FLOAT:face_presence_score" + output_stream: "FLAG:face_presence" + options: { + [mediapipe.ThresholdingCalculatorOptions.ext] { + threshold: 0.5 + } + } +} + +# Drop landmarks tensors if face is not present. +node { + calculator: "GateCalculator" + input_stream: "landmark_tensors" + input_stream: "ALLOW:face_presence" + output_stream: "ensured_landmark_tensors" +} + +# Decodes the landmark tensors into a vector of landmarks, where the landmark +# coordinates are normalized by the size of the input image to the model. +node { + calculator: "SwitchContainer" + input_side_packet: "ENABLE:with_attention" + input_stream: "TENSORS:ensured_landmark_tensors" + output_stream: "LANDMARKS:landmarks" + options: { + [mediapipe.SwitchContainerOptions.ext] { + contained_node: { + calculator: "TensorsToFaceLandmarks" + } + contained_node: { + calculator: "TensorsToFaceLandmarksWithAttention" + } + } + } +} + +# Projects the landmarks from the cropped face image to the corresponding +# locations on the full image before cropping (input to the graph). +node { + calculator: "LandmarkProjectionCalculator" + input_stream: "NORM_LANDMARKS:landmarks" + input_stream: "NORM_RECT:roi" + output_stream: "NORM_LANDMARKS:face_landmarks" +} diff --git a/mediapipe/modules/face_landmark/face_landmark_front_cpu.pbtxt b/mediapipe/modules/face_landmark/face_landmark_front_cpu.pbtxt new file mode 100644 index 0000000..70a57b0 --- /dev/null +++ b/mediapipe/modules/face_landmark/face_landmark_front_cpu.pbtxt @@ -0,0 +1,247 @@ +# MediaPipe graph to detect/predict face landmarks. (CPU input, and inference is +# executed on CPU.) This graph tries to skip face detection as much as possible +# by using previously detected/predicted landmarks for new images. +# +# It is required that "face_detection_short_range.tflite" is available at +# "mediapipe/modules/face_detection/face_detection_short_range.tflite" +# path during execution. +# +# It is required that "face_landmark.tflite" is available at +# "mediapipe/modules/face_landmark/face_landmark.tflite" +# path during execution if `with_attention` is not set or set to `false`. +# +# It is required that "face_landmark_with_attention.tflite" is available at +# "mediapipe/modules/face_landmark/face_landmark_with_attention.tflite" +# path during execution if `with_attention` is set to `true`. +# +# EXAMPLE: +# node { +# calculator: "FaceLandmarkFrontCpu" +# input_stream: "IMAGE:image" +# input_side_packet: "NUM_FACES:num_faces" +# input_side_packet: "USE_PREV_LANDMARKS:use_prev_landmarks" +# input_side_packet: "WITH_ATTENTION:with_attention" +# output_stream: "LANDMARKS:multi_face_landmarks" +# } + +type: "FaceLandmarkFrontCpu" + +# CPU image. (ImageFrame) +input_stream: "IMAGE:image" + +# Max number of faces to detect/track. (int) +input_side_packet: "NUM_FACES:num_faces" + +# Whether landmarks on the previous image should be used to help localize +# landmarks on the current image. (bool) +input_side_packet: "USE_PREV_LANDMARKS:use_prev_landmarks" + +# Whether to run face mesh model with attention on lips and eyes. (bool) +# Attention provides more accuracy on lips and eye regions as well as iris +# landmarks. +input_side_packet: "WITH_ATTENTION:with_attention" + +# Collection of detected/predicted faces, each represented as a list of 468 face +# landmarks. (std::vector) +# NOTE: there will not be an output packet in the LANDMARKS stream for this +# particular timestamp if none of faces detected. However, the MediaPipe +# framework will internally inform the downstream calculators of the absence of +# this packet so that they don't wait for it unnecessarily. +output_stream: "LANDMARKS:multi_face_landmarks" + +# Extra outputs (for debugging, for instance). +# Detected faces. (std::vector) +output_stream: "DETECTIONS:face_detections" +# Regions of interest calculated based on landmarks. +# (std::vector) +output_stream: "ROIS_FROM_LANDMARKS:face_rects_from_landmarks" +# Regions of interest calculated based on face detections. +# (std::vector) +output_stream: "ROIS_FROM_DETECTIONS:face_rects_from_detections" + +# When the optional input side packet "use_prev_landmarks" is either absent or +# set to true, uses the landmarks on the previous image to help localize +# landmarks on the current image. +node { + calculator: "GateCalculator" + input_side_packet: "ALLOW:use_prev_landmarks" + input_stream: "prev_face_rects_from_landmarks" + output_stream: "gated_prev_face_rects_from_landmarks" + options: { + [mediapipe.GateCalculatorOptions.ext] { + allow: true + } + } +} + +# Determines if an input vector of NormalizedRect has a size greater than or +# equal to the provided num_faces. +node { + calculator: "NormalizedRectVectorHasMinSizeCalculator" + input_stream: "ITERABLE:gated_prev_face_rects_from_landmarks" + input_side_packet: "num_faces" + output_stream: "prev_has_enough_faces" +} + +# Drops the incoming image if enough faces have already been identified from the +# previous image. Otherwise, passes the incoming image through to trigger a new +# round of face detection. +node { + calculator: "GateCalculator" + input_stream: "image" + input_stream: "DISALLOW:prev_has_enough_faces" + output_stream: "gated_image" + options: { + [mediapipe.GateCalculatorOptions.ext] { + empty_packets_as_allow: true + } + } +} + +# Detects faces. +node { + calculator: "FaceDetectionShortRangeCpu" + input_stream: "IMAGE:gated_image" + output_stream: "DETECTIONS:all_face_detections" +} + +# Makes sure there are no more detections than the provided num_faces. +node { + calculator: "ClipDetectionVectorSizeCalculator" + input_stream: "all_face_detections" + output_stream: "face_detections" + input_side_packet: "num_faces" +} + +# Calculate size of the image. +node { + calculator: "ImagePropertiesCalculator" + input_stream: "IMAGE:gated_image" + output_stream: "SIZE:gated_image_size" +} + +# Outputs each element of face_detections at a fake timestamp for the rest of +# the graph to process. Clones the image size packet for each face_detection at +# the fake timestamp. At the end of the loop, outputs the BATCH_END timestamp +# for downstream calculators to inform them that all elements in the vector have +# been processed. +node { + calculator: "BeginLoopDetectionCalculator" + input_stream: "ITERABLE:face_detections" + input_stream: "CLONE:gated_image_size" + output_stream: "ITEM:face_detection" + output_stream: "CLONE:detections_loop_image_size" + output_stream: "BATCH_END:detections_loop_end_timestamp" +} + +# Calculates region of interest based on face detections, so that can be used +# to detect landmarks. +node { + calculator: "FaceDetectionFrontDetectionToRoi" + input_stream: "DETECTION:face_detection" + input_stream: "IMAGE_SIZE:detections_loop_image_size" + output_stream: "ROI:face_rect_from_detection" +} + +# Collects a NormalizedRect for each face into a vector. Upon receiving the +# BATCH_END timestamp, outputs the vector of NormalizedRect at the BATCH_END +# timestamp. +node { + calculator: "EndLoopNormalizedRectCalculator" + input_stream: "ITEM:face_rect_from_detection" + input_stream: "BATCH_END:detections_loop_end_timestamp" + output_stream: "ITERABLE:face_rects_from_detections" +} + +# Performs association between NormalizedRect vector elements from previous +# image and rects based on face detections from the current image. This +# calculator ensures that the output face_rects vector doesn't contain +# overlapping regions based on the specified min_similarity_threshold. +node { + calculator: "AssociationNormRectCalculator" + input_stream: "face_rects_from_detections" + input_stream: "gated_prev_face_rects_from_landmarks" + output_stream: "face_rects" + options: { + [mediapipe.AssociationCalculatorOptions.ext] { + min_similarity_threshold: 0.5 + } + } +} + +# Calculate size of the image. +node { + calculator: "ImagePropertiesCalculator" + input_stream: "IMAGE:image" + output_stream: "SIZE:image_size" +} + +# Outputs each element of face_rects at a fake timestamp for the rest of the +# graph to process. Clones image and image size packets for each +# single_face_rect at the fake timestamp. At the end of the loop, outputs the +# BATCH_END timestamp for downstream calculators to inform them that all +# elements in the vector have been processed. +node { + calculator: "BeginLoopNormalizedRectCalculator" + input_stream: "ITERABLE:face_rects" + input_stream: "CLONE:0:image" + input_stream: "CLONE:1:image_size" + output_stream: "ITEM:face_rect" + output_stream: "CLONE:0:landmarks_loop_image" + output_stream: "CLONE:1:landmarks_loop_image_size" + output_stream: "BATCH_END:landmarks_loop_end_timestamp" +} + +# Detects face landmarks within specified region of interest of the image. +node { + calculator: "FaceLandmarkCpu" + input_stream: "IMAGE:landmarks_loop_image" + input_stream: "ROI:face_rect" + input_side_packet: "WITH_ATTENTION:with_attention" + output_stream: "LANDMARKS:face_landmarks" +} + +# Calculates region of interest based on face landmarks, so that can be reused +# for subsequent image. +node { + calculator: "FaceLandmarkLandmarksToRoi" + input_stream: "LANDMARKS:face_landmarks" + input_stream: "IMAGE_SIZE:landmarks_loop_image_size" + output_stream: "ROI:face_rect_from_landmarks" +} + +# Collects a set of landmarks for each face into a vector. Upon receiving the +# BATCH_END timestamp, outputs the vector of landmarks at the BATCH_END +# timestamp. +node { + calculator: "EndLoopNormalizedLandmarkListVectorCalculator" + input_stream: "ITEM:face_landmarks" + input_stream: "BATCH_END:landmarks_loop_end_timestamp" + output_stream: "ITERABLE:multi_face_landmarks" +} + +# Collects a NormalizedRect for each face into a vector. Upon receiving the +# BATCH_END timestamp, outputs the vector of NormalizedRect at the BATCH_END +# timestamp. +node { + calculator: "EndLoopNormalizedRectCalculator" + input_stream: "ITEM:face_rect_from_landmarks" + input_stream: "BATCH_END:landmarks_loop_end_timestamp" + output_stream: "ITERABLE:face_rects_from_landmarks" +} + +# Caches face rects calculated from landmarks, and upon the arrival of the next +# input image, sends out the cached rects with timestamps replaced by that of +# the input image, essentially generating a packet that carries the previous +# face rects. Note that upon the arrival of the very first input image, a +# timestamp bound update occurs to jump start the feedback loop. +node { + calculator: "PreviousLoopbackCalculator" + input_stream: "MAIN:image" + input_stream: "LOOP:face_rects_from_landmarks" + input_stream_info: { + tag_index: "LOOP" + back_edge: true + } + output_stream: "PREV_LOOP:prev_face_rects_from_landmarks" +} diff --git a/mediapipe/modules/face_landmark/face_landmark_front_cpu_image.pbtxt b/mediapipe/modules/face_landmark/face_landmark_front_cpu_image.pbtxt new file mode 100644 index 0000000..7d0c46a --- /dev/null +++ b/mediapipe/modules/face_landmark/face_landmark_front_cpu_image.pbtxt @@ -0,0 +1,87 @@ +# MediaPipe graph to detect/predict face landmarks on CPU. + +type: "FaceLandmarkFrontCpuImage" + +# Input image. (Image) +input_stream: "IMAGE:image" + +# Max number of faces to detect/track. (int) +input_side_packet: "NUM_FACES:num_faces" + +# Whether landmarks on the previous image should be used to help localize +# landmarks on the current image. (bool) +input_side_packet: "USE_PREV_LANDMARKS:use_prev_landmarks" + +# Whether to run face mesh model with attention on lips and eyes. (bool) +# Attention provides more accuracy on lips and eye regions as well as iris +# landmarks. +input_side_packet: "WITH_ATTENTION:with_attention" + +# The throttled input image. (Image) +output_stream: "IMAGE:throttled_image" +# Collection of detected/predicted faces, each represented as a list of 468 face +# landmarks. (std::vector) +# NOTE: there will not be an output packet in the LANDMARKS stream for this +# particular timestamp if none of faces detected. However, the MediaPipe +# framework will internally inform the downstream calculators of the absence of +# this packet so that they don't wait for it unnecessarily. +output_stream: "LANDMARKS:multi_face_landmarks" + +# Extra outputs (for debugging, for instance). +# Detected faces. (std::vector) +output_stream: "DETECTIONS:face_detections" +# Regions of interest calculated based on landmarks. +# (std::vector) +output_stream: "ROIS_FROM_LANDMARKS:face_rects_from_landmarks" +# Regions of interest calculated based on face detections. +# (std::vector) +output_stream: "ROIS_FROM_DETECTIONS:face_rects_from_detections" + +node { + calculator: "FlowLimiterCalculator" + input_stream: "image" + input_stream: "FINISHED:multi_face_landmarks" + input_stream_info: { + tag_index: "FINISHED" + back_edge: true + } + output_stream: "throttled_image" + options: { + [mediapipe.FlowLimiterCalculatorOptions.ext] { + max_in_flight: 1 + max_in_queue: 1 + } + } +} + +# Converts Image to ImageFrame for FaceLandmarkFrontCpu to consume. +node { + calculator: "FromImageCalculator" + input_stream: "IMAGE:throttled_image" + output_stream: "IMAGE_CPU:raw_image_frame" + output_stream: "SOURCE_ON_GPU:is_gpu_image" +} + +# TODO: Remove the extra flipping once adopting MlImage. +# If the source images are on gpu, flip the data vertically before sending them +# into FaceLandmarkFrontCpu. This maybe needed because OpenGL represents images +# assuming the image origin is at the bottom-left corner, whereas MediaPipe in +# general assumes the image origin is at the top-left corner. +node: { + calculator: "ImageTransformationCalculator" + input_stream: "IMAGE:raw_image_frame" + input_stream: "FLIP_VERTICALLY:is_gpu_image" + output_stream: "IMAGE:image_frame" +} + +node { + calculator: "FaceLandmarkFrontCpu" + input_stream: "IMAGE:image_frame" + input_side_packet: "NUM_FACES:num_faces" + input_side_packet: "USE_PREV_LANDMARKS:use_prev_landmarks" + input_side_packet: "WITH_ATTENTION:with_attention" + output_stream: "LANDMARKS:multi_face_landmarks" + output_stream: "DETECTIONS:face_detections" + output_stream: "ROIS_FROM_LANDMARKS:face_rects_from_landmarks" + output_stream: "ROIS_FROM_DETECTIONS:face_rects_from_detections" +} diff --git a/mediapipe/modules/face_landmark/face_landmark_front_gpu.pbtxt b/mediapipe/modules/face_landmark/face_landmark_front_gpu.pbtxt new file mode 100644 index 0000000..fd89565 --- /dev/null +++ b/mediapipe/modules/face_landmark/face_landmark_front_gpu.pbtxt @@ -0,0 +1,247 @@ +# MediaPipe graph to detect/predict face landmarks. (GPU input, and inference is +# executed on GPU.) This graph tries to skip face detection as much as possible +# by using previously detected/predicted landmarks for new images. +# +# It is required that "face_detection_short_range.tflite" is available at +# "mediapipe/modules/face_detection/face_detection_short_range.tflite" +# path during execution. +# +# It is required that "face_landmark.tflite" is available at +# "mediapipe/modules/face_landmark/face_landmark.tflite" +# path during execution if `with_attention` is not set or set to `false`. +# +# It is required that "face_landmark_with_attention.tflite" is available at +# "mediapipe/modules/face_landmark/face_landmark_with_attention.tflite" +# path during execution if `with_attention` is set to `true`. +# +# EXAMPLE: +# node { +# calculator: "FaceLandmarkFrontGpu" +# input_stream: "IMAGE:image" +# input_side_packet: "NUM_FACES:num_faces" +# input_side_packet: "USE_PREV_LANDMARKS:use_prev_landmarks" +# input_side_packet: "WITH_ATTENTION:with_attention" +# output_stream: "LANDMARKS:multi_face_landmarks" +# } + +type: "FaceLandmarkFrontGpu" + +# GPU image. (GpuBuffer) +input_stream: "IMAGE:image" + +# Max number of faces to detect/track. (int) +input_side_packet: "NUM_FACES:num_faces" + +# Whether landmarks on the previous image should be used to help localize +# landmarks on the current image. (bool) +input_side_packet: "USE_PREV_LANDMARKS:use_prev_landmarks" + +# Whether to run face mesh model with attention on lips and eyes. (bool) +# Attention provides more accuracy on lips and eye regions as well as iris +# landmarks. +input_side_packet: "WITH_ATTENTION:with_attention" + +# Collection of detected/predicted faces, each represented as a list of 468 face +# landmarks. (std::vector) +# NOTE: there will not be an output packet in the LANDMARKS stream for this +# particular timestamp if none of faces detected. However, the MediaPipe +# framework will internally inform the downstream calculators of the absence of +# this packet so that they don't wait for it unnecessarily. +output_stream: "LANDMARKS:multi_face_landmarks" + +# Extra outputs (for debugging, for instance). +# Detected faces. (std::vector) +output_stream: "DETECTIONS:face_detections" +# Regions of interest calculated based on landmarks. +# (std::vector) +output_stream: "ROIS_FROM_LANDMARKS:face_rects_from_landmarks" +# Regions of interest calculated based on face detections. +# (std::vector) +output_stream: "ROIS_FROM_DETECTIONS:face_rects_from_detections" + +# When the optional input side packet "use_prev_landmarks" is either absent or +# set to true, uses the landmarks on the previous image to help localize +# landmarks on the current image. +node { + calculator: "GateCalculator" + input_side_packet: "ALLOW:use_prev_landmarks" + input_stream: "prev_face_rects_from_landmarks" + output_stream: "gated_prev_face_rects_from_landmarks" + options: { + [mediapipe.GateCalculatorOptions.ext] { + allow: true + } + } +} + +# Determines if an input vector of NormalizedRect has a size greater than or +# equal to the provided num_faces. +node { + calculator: "NormalizedRectVectorHasMinSizeCalculator" + input_stream: "ITERABLE:gated_prev_face_rects_from_landmarks" + input_side_packet: "num_faces" + output_stream: "prev_has_enough_faces" +} + +# Drops the incoming image if enough faces have already been identified from the +# previous image. Otherwise, passes the incoming image through to trigger a new +# round of face detection. +node { + calculator: "GateCalculator" + input_stream: "image" + input_stream: "DISALLOW:prev_has_enough_faces" + output_stream: "gated_image" + options: { + [mediapipe.GateCalculatorOptions.ext] { + empty_packets_as_allow: true + } + } +} + +# Detects faces. +node { + calculator: "FaceDetectionShortRangeGpu" + input_stream: "IMAGE:gated_image" + output_stream: "DETECTIONS:all_face_detections" +} + +# Makes sure there are no more detections than the provided num_faces. +node { + calculator: "ClipDetectionVectorSizeCalculator" + input_stream: "all_face_detections" + output_stream: "face_detections" + input_side_packet: "num_faces" +} + +# Calculate size of the image. +node { + calculator: "ImagePropertiesCalculator" + input_stream: "IMAGE_GPU:gated_image" + output_stream: "SIZE:gated_image_size" +} + +# Outputs each element of face_detections at a fake timestamp for the rest of +# the graph to process. Clones the image size packet for each face_detection at +# the fake timestamp. At the end of the loop, outputs the BATCH_END timestamp +# for downstream calculators to inform them that all elements in the vector have +# been processed. +node { + calculator: "BeginLoopDetectionCalculator" + input_stream: "ITERABLE:face_detections" + input_stream: "CLONE:gated_image_size" + output_stream: "ITEM:face_detection" + output_stream: "CLONE:detections_loop_image_size" + output_stream: "BATCH_END:detections_loop_end_timestamp" +} + +# Calculates region of interest based on face detections, so that can be used +# to detect landmarks. +node { + calculator: "FaceDetectionFrontDetectionToRoi" + input_stream: "DETECTION:face_detection" + input_stream: "IMAGE_SIZE:detections_loop_image_size" + output_stream: "ROI:face_rect_from_detection" +} + +# Collects a NormalizedRect for each face into a vector. Upon receiving the +# BATCH_END timestamp, outputs the vector of NormalizedRect at the BATCH_END +# timestamp. +node { + calculator: "EndLoopNormalizedRectCalculator" + input_stream: "ITEM:face_rect_from_detection" + input_stream: "BATCH_END:detections_loop_end_timestamp" + output_stream: "ITERABLE:face_rects_from_detections" +} + +# Performs association between NormalizedRect vector elements from previous +# image and rects based on face detections from the current image. This +# calculator ensures that the output face_rects vector doesn't contain +# overlapping regions based on the specified min_similarity_threshold. +node { + calculator: "AssociationNormRectCalculator" + input_stream: "face_rects_from_detections" + input_stream: "gated_prev_face_rects_from_landmarks" + output_stream: "face_rects" + options: { + [mediapipe.AssociationCalculatorOptions.ext] { + min_similarity_threshold: 0.5 + } + } +} + +# Calculate size of the image. +node { + calculator: "ImagePropertiesCalculator" + input_stream: "IMAGE_GPU:image" + output_stream: "SIZE:image_size" +} + +# Outputs each element of face_rects at a fake timestamp for the rest of the +# graph to process. Clones image and image size packets for each +# single_face_rect at the fake timestamp. At the end of the loop, outputs the +# BATCH_END timestamp for downstream calculators to inform them that all +# elements in the vector have been processed. +node { + calculator: "BeginLoopNormalizedRectCalculator" + input_stream: "ITERABLE:face_rects" + input_stream: "CLONE:0:image" + input_stream: "CLONE:1:image_size" + output_stream: "ITEM:face_rect" + output_stream: "CLONE:0:landmarks_loop_image" + output_stream: "CLONE:1:landmarks_loop_image_size" + output_stream: "BATCH_END:landmarks_loop_end_timestamp" +} + +# Detects face landmarks within specified region of interest of the image. +node { + calculator: "FaceLandmarkGpu" + input_stream: "IMAGE:landmarks_loop_image" + input_stream: "ROI:face_rect" + input_side_packet: "WITH_ATTENTION:with_attention" + output_stream: "LANDMARKS:face_landmarks" +} + +# Calculates region of interest based on face landmarks, so that can be reused +# for subsequent image. +node { + calculator: "FaceLandmarkLandmarksToRoi" + input_stream: "LANDMARKS:face_landmarks" + input_stream: "IMAGE_SIZE:landmarks_loop_image_size" + output_stream: "ROI:face_rect_from_landmarks" +} + +# Collects a set of landmarks for each face into a vector. Upon receiving the +# BATCH_END timestamp, outputs the vector of landmarks at the BATCH_END +# timestamp. +node { + calculator: "EndLoopNormalizedLandmarkListVectorCalculator" + input_stream: "ITEM:face_landmarks" + input_stream: "BATCH_END:landmarks_loop_end_timestamp" + output_stream: "ITERABLE:multi_face_landmarks" +} + +# Collects a NormalizedRect for each face into a vector. Upon receiving the +# BATCH_END timestamp, outputs the vector of NormalizedRect at the BATCH_END +# timestamp. +node { + calculator: "EndLoopNormalizedRectCalculator" + input_stream: "ITEM:face_rect_from_landmarks" + input_stream: "BATCH_END:landmarks_loop_end_timestamp" + output_stream: "ITERABLE:face_rects_from_landmarks" +} + +# Caches face rects calculated from landmarks, and upon the arrival of the next +# input image, sends out the cached rects with timestamps replaced by that of +# the input image, essentially generating a packet that carries the previous +# face rects. Note that upon the arrival of the very first input image, a +# timestamp bound update occurs to jump start the feedback loop. +node { + calculator: "PreviousLoopbackCalculator" + input_stream: "MAIN:image" + input_stream: "LOOP:face_rects_from_landmarks" + input_stream_info: { + tag_index: "LOOP" + back_edge: true + } + output_stream: "PREV_LOOP:prev_face_rects_from_landmarks" +} diff --git a/mediapipe/modules/face_landmark/face_landmark_front_gpu_image.pbtxt b/mediapipe/modules/face_landmark/face_landmark_front_gpu_image.pbtxt new file mode 100644 index 0000000..31da4b8 --- /dev/null +++ b/mediapipe/modules/face_landmark/face_landmark_front_gpu_image.pbtxt @@ -0,0 +1,87 @@ +# MediaPipe graph to detect/predict face landmarks on GPU. + +type: "FaceLandmarkFrontGpuImage" + +# Input image. (Image) +input_stream: "IMAGE:image" + +# Max number of faces to detect/track. (int) +input_side_packet: "NUM_FACES:num_faces" + +# Whether landmarks on the previous image should be used to help localize +# landmarks on the current image. (bool) +input_side_packet: "USE_PREV_LANDMARKS:use_prev_landmarks" + +# Whether to run face mesh model with attention on lips and eyes. (bool) +# Attention provides more accuracy on lips and eye regions as well as iris +# landmarks. +input_side_packet: "WITH_ATTENTION:with_attention" + +# The throttled input image. (Image) +output_stream: "IMAGE:throttled_image" +# Collection of detected/predicted faces, each represented as a list of 468 face +# landmarks. (std::vector) +# NOTE: there will not be an output packet in the LANDMARKS stream for this +# particular timestamp if none of faces detected. However, the MediaPipe +# framework will internally inform the downstream calculators of the absence of +# this packet so that they don't wait for it unnecessarily. +output_stream: "LANDMARKS:multi_face_landmarks" + +# Extra outputs (for debugging, for instance). +# Detected faces. (std::vector) +output_stream: "DETECTIONS:face_detections" +# Regions of interest calculated based on landmarks. +# (std::vector) +output_stream: "ROIS_FROM_LANDMARKS:face_rects_from_landmarks" +# Regions of interest calculated based on face detections. +# (std::vector) +output_stream: "ROIS_FROM_DETECTIONS:face_rects_from_detections" + +node { + calculator: "FlowLimiterCalculator" + input_stream: "image" + input_stream: "FINISHED:multi_face_landmarks" + input_stream_info: { + tag_index: "FINISHED" + back_edge: true + } + output_stream: "throttled_image" + options: { + [mediapipe.FlowLimiterCalculatorOptions.ext] { + max_in_flight: 1 + max_in_queue: 1 + } + } +} + +# Converts Image to GpuBuffer for FaceLandmarkFrontGpu to consume. +node { + calculator: "FromImageCalculator" + input_stream: "IMAGE:throttled_image" + output_stream: "IMAGE_GPU:raw_gpu_buffer" + output_stream: "SOURCE_ON_GPU:is_gpu_image" +} + +# TODO: Remove the extra flipping once adopting MlImage. +# If the source images are on gpu, flip the data vertically before sending them +# into FaceLandmarkFrontGpu. This maybe needed because OpenGL represents images +# assuming the image origin is at the bottom-left corner, whereas MediaPipe in +# general assumes the image origin is at the top-left corner. +node: { + calculator: "ImageTransformationCalculator" + input_stream: "IMAGE_GPU:raw_gpu_buffer" + input_stream: "FLIP_VERTICALLY:is_gpu_image" + output_stream: "IMAGE_GPU:gpu_buffer" +} + +node { + calculator: "FaceLandmarkFrontGpu" + input_stream: "IMAGE:gpu_buffer" + input_side_packet: "NUM_FACES:num_faces" + input_side_packet: "USE_PREV_LANDMARKS:use_prev_landmarks" + input_side_packet: "WITH_ATTENTION:with_attention" + output_stream: "LANDMARKS:multi_face_landmarks" + output_stream: "DETECTIONS:face_detections" + output_stream: "ROIS_FROM_LANDMARKS:face_rects_from_landmarks" + output_stream: "ROIS_FROM_DETECTIONS:face_rects_from_detections" +} diff --git a/mediapipe/modules/face_landmark/face_landmark_front_side_model_cpu.pbtxt b/mediapipe/modules/face_landmark/face_landmark_front_side_model_cpu.pbtxt new file mode 100644 index 0000000..d3d26c0 --- /dev/null +++ b/mediapipe/modules/face_landmark/face_landmark_front_side_model_cpu.pbtxt @@ -0,0 +1,224 @@ +# MediaPipe graph to detect/predict face landmarks. (CPU input, and inference is +# executed on CPU.) This graph tries to skip face detection as much as possible +# by using previously detected/predicted landmarks for new images. +# +# EXAMPLE: +# node { +# calculator: "FaceLandmarkFrontSideModelCpu" +# input_stream: "IMAGE:image" +# input_side_packet: "NUM_FACES:num_faces" +# input_side_packet: "MODEL:0:face_detection_model" +# input_side_packet: "MODEL:1:face_landmark_model" +# output_stream: "LANDMARKS:multi_face_landmarks" +# } + +type: "FaceLandmarkFrontSideModelCpu" + +# CPU image. (ImageFrame) +input_stream: "IMAGE:image" + +# Max number of faces to detect/track. (int) +input_side_packet: "NUM_FACES:num_faces" +# TfLite model to detect faces. +# (std::unique_ptr>) +# NOTE: mediapipe/modules/face_detection/face_detection_short_range.tflite +# model only, can be passed here, otherwise - results are undefined. +input_side_packet: "MODEL:0:face_detection_model" +# TfLite model to detect face landmarks. +# (std::unique_ptr>) +# NOTE: mediapipe/modules/face_landmark/face_landmark.tflite model +# only, can be passed here, otherwise - results are undefined. +input_side_packet: "MODEL:1:face_landmark_model" + +# Collection of detected/predicted faces, each represented as a list of 468 face +# landmarks. (std::vector) +# NOTE: there will not be an output packet in the LANDMARKS stream for this +# particular timestamp if none of faces detected. However, the MediaPipe +# framework will internally inform the downstream calculators of the absence of +# this packet so that they don't wait for it unnecessarily. +output_stream: "LANDMARKS:multi_face_landmarks" + +# Extra outputs (for debugging, for instance). +# Detected faces. (std::vector) +output_stream: "DETECTIONS:face_detections" +# Regions of interest calculated based on landmarks. +# (std::vector) +output_stream: "ROIS_FROM_LANDMARKS:face_rects_from_landmarks" +# Regions of interest calculated based on face detections. +# (std::vector) +output_stream: "ROIS_FROM_DETECTIONS:face_rects_from_detections" + +# Determines if an input vector of NormalizedRect has a size greater than or +# equal to the provided num_faces. +node { + calculator: "NormalizedRectVectorHasMinSizeCalculator" + input_stream: "ITERABLE:prev_face_rects_from_landmarks" + input_side_packet: "num_faces" + output_stream: "prev_has_enough_faces" +} + +# Drops the incoming image if FaceLandmarkCpu was able to identify face presence +# in the previous image. Otherwise, passes the incoming image through to trigger +# a new round of face detection in FaceDetectionShortRangeCpu. +node { + calculator: "GateCalculator" + input_stream: "image" + input_stream: "DISALLOW:prev_has_enough_faces" + output_stream: "gated_image" + options: { + [mediapipe.GateCalculatorOptions.ext] { + empty_packets_as_allow: true + } + } +} + +# Detects faces. +node { + calculator: "FaceDetectionShortRangeSideModelCpu" + input_stream: "IMAGE:gated_image" + input_side_packet: "MODEL:face_detection_model" + output_stream: "DETECTIONS:all_face_detections" +} + +# Makes sure there are no more detections than the provided num_faces. +node { + calculator: "ClipDetectionVectorSizeCalculator" + input_stream: "all_face_detections" + output_stream: "face_detections" + input_side_packet: "num_faces" +} + +# Calculate size of the image. +node { + calculator: "ImagePropertiesCalculator" + input_stream: "IMAGE:gated_image" + output_stream: "SIZE:gated_image_size" +} + +# Outputs each element of face_detections at a fake timestamp for the rest of +# the graph to process. Clones the image size packet for each face_detection at +# the fake timestamp. At the end of the loop, outputs the BATCH_END timestamp +# for downstream calculators to inform them that all elements in the vector have +# been processed. +node { + calculator: "BeginLoopDetectionCalculator" + input_stream: "ITERABLE:face_detections" + input_stream: "CLONE:gated_image_size" + output_stream: "ITEM:face_detection" + output_stream: "CLONE:detections_loop_image_size" + output_stream: "BATCH_END:detections_loop_end_timestamp" +} + +# Calculates region of interest based on face detections, so that can be used +# to detect landmarks. +node { + calculator: "FaceDetectionFrontDetectionToRoi" + input_stream: "DETECTION:face_detection" + input_stream: "IMAGE_SIZE:detections_loop_image_size" + output_stream: "ROI:face_rect_from_detection" +} + +# Collects a NormalizedRect for each face into a vector. Upon receiving the +# BATCH_END timestamp, outputs the vector of NormalizedRect at the BATCH_END +# timestamp. +node { + calculator: "EndLoopNormalizedRectCalculator" + input_stream: "ITEM:face_rect_from_detection" + input_stream: "BATCH_END:detections_loop_end_timestamp" + output_stream: "ITERABLE:face_rects_from_detections" +} + +# Performs association between NormalizedRect vector elements from previous +# image and rects based on face detections from the current image. This +# calculator ensures that the output face_rects vector doesn't contain +# overlapping regions based on the specified min_similarity_threshold. +node { + calculator: "AssociationNormRectCalculator" + input_stream: "face_rects_from_detections" + input_stream: "prev_face_rects_from_landmarks" + output_stream: "face_rects" + options: { + [mediapipe.AssociationCalculatorOptions.ext] { + min_similarity_threshold: 0.5 + } + } +} + +# Calculate size of the image. +node { + calculator: "ImagePropertiesCalculator" + input_stream: "IMAGE:image" + output_stream: "SIZE:image_size" +} + +# Outputs each element of face_rects at a fake timestamp for the rest of the +# graph to process. Clones image and image size packets for each +# single_face_rect at the fake timestamp. At the end of the loop, outputs the +# BATCH_END timestamp for downstream calculators to inform them that all +# elements in the vector have been processed. +node { + calculator: "BeginLoopNormalizedRectCalculator" + input_stream: "ITERABLE:face_rects" + input_stream: "CLONE:0:image" + input_stream: "CLONE:1:image_size" + output_stream: "ITEM:face_rect" + output_stream: "CLONE:0:landmarks_loop_image" + output_stream: "CLONE:1:landmarks_loop_image_size" + output_stream: "BATCH_END:landmarks_loop_end_timestamp" +} + +# Detects face landmarks within specified region of interest of the image. +node { + calculator: "FaceLandmarkSideModelCpu" + input_stream: "IMAGE:landmarks_loop_image" + input_stream: "ROI:face_rect" + input_side_packet: "MODEL:face_landmark_model" + output_stream: "LANDMARKS:face_landmarks" +} + +# Calculates region of interest based on face landmarks, so that can be reused +# for subsequent image. +node { + calculator: "FaceLandmarkLandmarksToRoi" + input_stream: "LANDMARKS:face_landmarks" + input_stream: "IMAGE_SIZE:landmarks_loop_image_size" + output_stream: "ROI:face_rect_from_landmarks" +} + +# Collects a set of landmarks for each face into a vector. Upon receiving the +# BATCH_END timestamp, outputs the vector of landmarks at the BATCH_END +# timestamp. +node { + calculator: "EndLoopNormalizedLandmarkListVectorCalculator" + input_stream: "ITEM:face_landmarks" + input_stream: "BATCH_END:landmarks_loop_end_timestamp" + output_stream: "ITERABLE:multi_face_landmarks" +} + +# Collects a NormalizedRect for each face into a vector. Upon receiving the +# BATCH_END timestamp, outputs the vector of NormalizedRect at the BATCH_END +# timestamp. +node { + calculator: "EndLoopNormalizedRectCalculator" + input_stream: "ITEM:face_rect_from_landmarks" + input_stream: "BATCH_END:landmarks_loop_end_timestamp" + output_stream: "ITERABLE:face_rects_from_landmarks" +} + +# Caches face rects calculated from landmarks, and upon the arrival of the next +# input image, sends out the cached rects with timestamps replaced by that of +# the input image, essentially generating a packet that carries the previous +# face rects. Note that upon the arrival of the very first input image, a +# timestamp bound update occurs to jump start the feedback loop. +node { + calculator: "PreviousLoopbackCalculator" + input_stream: "MAIN:image" + input_stream: "LOOP:face_rects_from_landmarks" + input_stream_info: { + tag_index: "LOOP" + back_edge: true + } + output_stream: "PREV_LOOP:prev_face_rects_from_landmarks" +} diff --git a/mediapipe/modules/face_landmark/face_landmark_front_side_model_gpu.pbtxt b/mediapipe/modules/face_landmark/face_landmark_front_side_model_gpu.pbtxt new file mode 100644 index 0000000..9832c2f --- /dev/null +++ b/mediapipe/modules/face_landmark/face_landmark_front_side_model_gpu.pbtxt @@ -0,0 +1,224 @@ +# MediaPipe graph to detect/predict face landmarks. (GPU input, and inference is +# executed on GPU.) This graph tries to skip face detection as much as possible +# by using previously detected/predicted landmarks for new images. +# +# EXAMPLE: +# node { +# calculator: "FaceLandmarkFrontSideModelGpu" +# input_stream: "IMAGE:image" +# input_side_packet: "NUM_FACES:num_faces" +# input_side_packet: "MODEL:0:face_detection_model" +# input_side_packet: "MODEL:1:face_landmark_model" +# output_stream: "LANDMARKS:multi_face_landmarks" +# } + +type: "FaceLandmarkFrontSideModelGpu" + +# GPU image. (GpuBuffer) +input_stream: "IMAGE:image" + +# Max number of faces to detect/track. (int) +input_side_packet: "NUM_FACES:num_faces" +# TfLite model to detect faces. +# (std::unique_ptr>) +# NOTE: mediapipe/modules/face_detection/face_detection_short_range.tflite +# model only, can be passed here, otherwise - results are undefined. +input_side_packet: "MODEL:0:face_detection_model" +# TfLite model to detect face landmarks. +# (std::unique_ptr>) +# NOTE: mediapipe/modules/face_landmark/face_landmark.tflite model +# only, can be passed here, otherwise - results are undefined. +input_side_packet: "MODEL:1:face_landmark_model" + +# Collection of detected/predicted faces, each represented as a list of 468 face +# landmarks. (std::vector) +# NOTE: there will not be an output packet in the LANDMARKS stream for this +# particular timestamp if none of faces detected. However, the MediaPipe +# framework will internally inform the downstream calculators of the absence of +# this packet so that they don't wait for it unnecessarily. +output_stream: "LANDMARKS:multi_face_landmarks" + +# Extra outputs (for debugging, for instance). +# Detected faces. (std::vector) +output_stream: "DETECTIONS:face_detections" +# Regions of interest calculated based on landmarks. +# (std::vector) +output_stream: "ROIS_FROM_LANDMARKS:face_rects_from_landmarks" +# Regions of interest calculated based on face detections. +# (std::vector) +output_stream: "ROIS_FROM_DETECTIONS:face_rects_from_detections" + +# Determines if an input vector of NormalizedRect has a size greater than or +# equal to the provided num_faces. +node { + calculator: "NormalizedRectVectorHasMinSizeCalculator" + input_stream: "ITERABLE:prev_face_rects_from_landmarks" + input_side_packet: "num_faces" + output_stream: "prev_has_enough_faces" +} + +# Drops the incoming image if FaceLandmarkGpu was able to identify face presence +# in the previous image. Otherwise, passes the incoming image through to trigger +# a new round of face detection in FaceDetectionShortRangeGpu. +node { + calculator: "GateCalculator" + input_stream: "image" + input_stream: "DISALLOW:prev_has_enough_faces" + output_stream: "gated_image" + options: { + [mediapipe.GateCalculatorOptions.ext] { + empty_packets_as_allow: true + } + } +} + +# Detects faces. +node { + calculator: "FaceDetectionShortRangeSideModelGpu" + input_stream: "IMAGE:gated_image" + input_side_packet: "MODEL:face_detection_model" + output_stream: "DETECTIONS:all_face_detections" +} + +# Makes sure there are no more detections than the provided num_faces. +node { + calculator: "ClipDetectionVectorSizeCalculator" + input_stream: "all_face_detections" + output_stream: "face_detections" + input_side_packet: "num_faces" +} + +# Calculate size of the image. +node { + calculator: "ImagePropertiesCalculator" + input_stream: "IMAGE_GPU:gated_image" + output_stream: "SIZE:gated_image_size" +} + +# Outputs each element of face_detections at a fake timestamp for the rest of +# the graph to process. Clones the image size packet for each face_detection at +# the fake timestamp. At the end of the loop, outputs the BATCH_END timestamp +# for downstream calculators to inform them that all elements in the vector have +# been processed. +node { + calculator: "BeginLoopDetectionCalculator" + input_stream: "ITERABLE:face_detections" + input_stream: "CLONE:gated_image_size" + output_stream: "ITEM:face_detection" + output_stream: "CLONE:detections_loop_image_size" + output_stream: "BATCH_END:detections_loop_end_timestamp" +} + +# Calculates region of interest based on face detections, so that can be used +# to detect landmarks. +node { + calculator: "FaceDetectionFrontDetectionToRoi" + input_stream: "DETECTION:face_detection" + input_stream: "IMAGE_SIZE:detections_loop_image_size" + output_stream: "ROI:face_rect_from_detection" +} + +# Collects a NormalizedRect for each face into a vector. Upon receiving the +# BATCH_END timestamp, outputs the vector of NormalizedRect at the BATCH_END +# timestamp. +node { + calculator: "EndLoopNormalizedRectCalculator" + input_stream: "ITEM:face_rect_from_detection" + input_stream: "BATCH_END:detections_loop_end_timestamp" + output_stream: "ITERABLE:face_rects_from_detections" +} + +# Performs association between NormalizedRect vector elements from previous +# image and rects based on face detections from the current image. This +# calculator ensures that the output face_rects vector doesn't contain +# overlapping regions based on the specified min_similarity_threshold. +node { + calculator: "AssociationNormRectCalculator" + input_stream: "face_rects_from_detections" + input_stream: "prev_face_rects_from_landmarks" + output_stream: "face_rects" + options: { + [mediapipe.AssociationCalculatorOptions.ext] { + min_similarity_threshold: 0.5 + } + } +} + +# Calculate size of the image. +node { + calculator: "ImagePropertiesCalculator" + input_stream: "IMAGE_GPU:image" + output_stream: "SIZE:image_size" +} + +# Outputs each element of face_rects at a fake timestamp for the rest of the +# graph to process. Clones image and image size packets for each +# single_face_rect at the fake timestamp. At the end of the loop, outputs the +# BATCH_END timestamp for downstream calculators to inform them that all +# elements in the vector have been processed. +node { + calculator: "BeginLoopNormalizedRectCalculator" + input_stream: "ITERABLE:face_rects" + input_stream: "CLONE:0:image" + input_stream: "CLONE:1:image_size" + output_stream: "ITEM:face_rect" + output_stream: "CLONE:0:landmarks_loop_image" + output_stream: "CLONE:1:landmarks_loop_image_size" + output_stream: "BATCH_END:landmarks_loop_end_timestamp" +} + +# Detects face landmarks within specified region of interest of the image. +node { + calculator: "FaceLandmarkSideModelGpu" + input_stream: "IMAGE:landmarks_loop_image" + input_stream: "ROI:face_rect" + input_side_packet: "MODEL:face_landmark_model" + output_stream: "LANDMARKS:face_landmarks" +} + +# Calculates region of interest based on face landmarks, so that can be reused +# for subsequent image. +node { + calculator: "FaceLandmarkLandmarksToRoi" + input_stream: "LANDMARKS:face_landmarks" + input_stream: "IMAGE_SIZE:landmarks_loop_image_size" + output_stream: "ROI:face_rect_from_landmarks" +} + +# Collects a set of landmarks for each face into a vector. Upon receiving the +# BATCH_END timestamp, outputs the vector of landmarks at the BATCH_END +# timestamp. +node { + calculator: "EndLoopNormalizedLandmarkListVectorCalculator" + input_stream: "ITEM:face_landmarks" + input_stream: "BATCH_END:landmarks_loop_end_timestamp" + output_stream: "ITERABLE:multi_face_landmarks" +} + +# Collects a NormalizedRect for each face into a vector. Upon receiving the +# BATCH_END timestamp, outputs the vector of NormalizedRect at the BATCH_END +# timestamp. +node { + calculator: "EndLoopNormalizedRectCalculator" + input_stream: "ITEM:face_rect_from_landmarks" + input_stream: "BATCH_END:landmarks_loop_end_timestamp" + output_stream: "ITERABLE:face_rects_from_landmarks" +} + +# Caches face rects calculated from landmarks, and upon the arrival of the next +# input image, sends out the cached rects with timestamps replaced by that of +# the input image, essentially generating a packet that carries the previous +# face rects. Note that upon the arrival of the very first input image, a +# timestamp bound update occurs to jump start the feedback loop. +node { + calculator: "PreviousLoopbackCalculator" + input_stream: "MAIN:image" + input_stream: "LOOP:face_rects_from_landmarks" + input_stream_info: { + tag_index: "LOOP" + back_edge: true + } + output_stream: "PREV_LOOP:prev_face_rects_from_landmarks" +} diff --git a/mediapipe/modules/face_landmark/face_landmark_gpu.pbtxt b/mediapipe/modules/face_landmark/face_landmark_gpu.pbtxt new file mode 100644 index 0000000..854ceaf --- /dev/null +++ b/mediapipe/modules/face_landmark/face_landmark_gpu.pbtxt @@ -0,0 +1,185 @@ +# MediaPipe graph to detect/predict face landmarks. (CPU input, and inference is +# executed on CPU.) +# +# It is required that "face_landmark.tflite" is available at +# "mediapipe/modules/face_landmark/face_landmark.tflite" +# path during execution if `with_attention` is not set or set to `false`. +# +# It is required that "face_landmark_with_attention.tflite" is available at +# "mediapipe/modules/face_landmark/face_landmark_with_attention.tflite" +# path during execution if `with_attention` is set to `true`. +# +# EXAMPLE: +# node { +# calculator: "FaceLandmarkGpu" +# input_stream: "IMAGE:image" +# input_stream: "ROI:face_roi" +# input_side_packet: "WITH_ATTENTION:with_attention" +# output_stream: "LANDMARKS:face_landmarks" +# } + +type: "FaceLandmarkGpu" + +# GPU image. (GpuBuffer) +input_stream: "IMAGE:image" +# ROI (region of interest) within the given image where a face is located. +# (NormalizedRect) +input_stream: "ROI:roi" +# Whether to run face mesh model with attention on lips and eyes. (bool) +# Attention provides more accuracy on lips and eye regions as well as iris +# landmarks. +input_side_packet: "WITH_ATTENTION:with_attention" + +# 468 or 478 facial landmarks within the given ROI. (NormalizedLandmarkList) +# +# Number of landmarks depends on the WITH_ATTENTION flag. If it's `true` - then +# there will be 478 landmarks with refined lips, eyes and irises (10 extra +# landmarks are for irises), otherwise 468 non-refined landmarks are returned. +# +# NOTE: if a face is not present within the given ROI, for this particular +# timestamp there will not be an output packet in the LANDMARKS stream. However, +# the MediaPipe framework will internally inform the downstream calculators of +# the absence of this packet so that they don't wait for it unnecessarily. +output_stream: "LANDMARKS:face_landmarks" + +# Transforms the input image into a 192x192 tensor. +node: { + calculator: "ImageToTensorCalculator" + input_stream: "IMAGE_GPU:image" + input_stream: "NORM_RECT:roi" + output_stream: "TENSORS:input_tensors" + options: { + [mediapipe.ImageToTensorCalculatorOptions.ext] { + output_tensor_width: 192 + output_tensor_height: 192 + output_tensor_float_range { + min: 0.0 + max: 1.0 + } + gpu_origin: TOP_LEFT + } + } +} + +# Loads the face landmarks TF Lite model. +node { + calculator: "FaceLandmarksModelLoader" + input_side_packet: "WITH_ATTENTION:with_attention" + output_side_packet: "MODEL:model" +} + +# Generates a single side packet containing a TensorFlow Lite op resolver that +# supports custom ops needed by the model used in this graph. +node { + calculator: "TfLiteCustomOpResolverCalculator" + output_side_packet: "op_resolver" +} + +# Runs a TensorFlow Lite model on GPU that takes an image tensor and outputs a +# vector of GPU tensors representing, for instance, detection boxes/keypoints +# and scores. +node { + calculator: "InferenceCalculator" + input_stream: "TENSORS:input_tensors" + input_side_packet: "MODEL:model" + input_side_packet: "CUSTOM_OP_RESOLVER:op_resolver" + output_stream: "TENSORS:output_tensors" + options: { + [mediapipe.InferenceCalculatorOptions.ext] { + # Do not remove. Used for generation of XNNPACK/NNAPI graphs. + } + } +} + +# Splits a vector of tensors into landmark tensors and face flag tensor. +node { + calculator: "SwitchContainer" + input_side_packet: "ENABLE:with_attention" + input_stream: "output_tensors" + output_stream: "landmark_tensors" + output_stream: "face_flag_tensor" + options { + [mediapipe.SwitchContainerOptions.ext] { + contained_node: { + calculator: "SplitTensorVectorCalculator" + options: { + [mediapipe.SplitVectorCalculatorOptions.ext] { + ranges: { begin: 0 end: 1 } + ranges: { begin: 1 end: 2 } + } + } + } + contained_node: { + calculator: "SplitTensorVectorCalculator" + options: { + [mediapipe.SplitVectorCalculatorOptions.ext] { + ranges: { begin: 0 end: 6 } + ranges: { begin: 6 end: 7 } + } + } + } + } + } +} + +# Converts the face-flag tensor into a float that represents the confidence +# score of face presence. +node { + calculator: "TensorsToFloatsCalculator" + input_stream: "TENSORS:face_flag_tensor" + output_stream: "FLOAT:face_presence_score" + options: { + [mediapipe.TensorsToFloatsCalculatorOptions.ext] { + activation: SIGMOID + } + } +} + +# Applies a threshold to the confidence score to determine whether a face is +# present. +node { + calculator: "ThresholdingCalculator" + input_stream: "FLOAT:face_presence_score" + output_stream: "FLAG:face_presence" + options: { + [mediapipe.ThresholdingCalculatorOptions.ext] { + threshold: 0.5 + } + } +} + +# Drop landmarks tensors if face is not present. +node { + calculator: "GateCalculator" + input_stream: "landmark_tensors" + input_stream: "ALLOW:face_presence" + output_stream: "ensured_landmark_tensors" +} + +# Decodes the landmark tensors into a vector of landmarks, where the landmark +# coordinates are normalized by the size of the input image to the model. +node { + calculator: "SwitchContainer" + input_side_packet: "ENABLE:with_attention" + input_stream: "TENSORS:ensured_landmark_tensors" + output_stream: "LANDMARKS:landmarks" + options: { + [mediapipe.SwitchContainerOptions.ext] { + contained_node: { + calculator: "TensorsToFaceLandmarks" + } + contained_node: { + calculator: "TensorsToFaceLandmarksWithAttention" + } + } + } +} + +# Projects the landmarks from the cropped face image to the corresponding +# locations on the full image before cropping (input to the graph). +node { + calculator: "LandmarkProjectionCalculator" + input_stream: "NORM_LANDMARKS:landmarks" + input_stream: "NORM_RECT:roi" + output_stream: "NORM_LANDMARKS:face_landmarks" +} diff --git a/mediapipe/modules/face_landmark/face_landmark_landmarks_to_roi.pbtxt b/mediapipe/modules/face_landmark/face_landmark_landmarks_to_roi.pbtxt new file mode 100644 index 0000000..9f634b0 --- /dev/null +++ b/mediapipe/modules/face_landmark/face_landmark_landmarks_to_roi.pbtxt @@ -0,0 +1,54 @@ +# MediaPipe graph to calculate face region of interest (ROI) from landmarks +# detected by "FaceLandmarkCpu" or "FaceLandmarkGpu". +# +# NOTE: this graph is subject to change and should not be used directly. + +type: "FaceLandmarkLandmarksToRoi" + +# Normalized landmarks. (NormalizedLandmarkList) +input_stream: "LANDMARKS:landmarks" +# Frame size (width & height). (std::pair) +input_stream: "IMAGE_SIZE:image_size" +# ROI according to landmarks. (NormalizedRect) +output_stream: "ROI:roi" + +# Converts face landmarks to a detection that tightly encloses all landmarks. +node { + calculator: "LandmarksToDetectionCalculator" + input_stream: "NORM_LANDMARKS:landmarks" + output_stream: "DETECTION:face_detection" +} + +# Converts the face detection into a rectangle (normalized by image size) +# that encloses the face and is rotated such that the line connecting left side +# of the left eye and right side of the right eye is aligned with the X-axis of +# the rectangle. +node { + calculator: "DetectionsToRectsCalculator" + input_stream: "DETECTION:face_detection" + input_stream: "IMAGE_SIZE:image_size" + output_stream: "NORM_RECT:face_rect_from_landmarks" + options: { + [mediapipe.DetectionsToRectsCalculatorOptions.ext] { + rotation_vector_start_keypoint_index: 33 # Left side of left eye. + rotation_vector_end_keypoint_index: 263 # Right side of right eye. + rotation_vector_target_angle_degrees: 0 + } + } +} + +# Expands the face rectangle so that in the next video image it's likely to +# still contain the face even with some motion. +node { + calculator: "RectTransformationCalculator" + input_stream: "NORM_RECT:face_rect_from_landmarks" + input_stream: "IMAGE_SIZE:image_size" + output_stream: "roi" + options: { + [mediapipe.RectTransformationCalculatorOptions.ext] { + scale_x: 1.5 + scale_y: 1.5 + square_long: true + } + } +} diff --git a/mediapipe/modules/face_landmark/face_landmark_with_attention.tflite b/mediapipe/modules/face_landmark/face_landmark_with_attention.tflite new file mode 100755 index 0000000..fe0a93a Binary files /dev/null and b/mediapipe/modules/face_landmark/face_landmark_with_attention.tflite differ diff --git a/mediapipe/modules/face_landmark/face_landmarks_model_loader.pbtxt b/mediapipe/modules/face_landmark/face_landmarks_model_loader.pbtxt new file mode 100644 index 0000000..ecac1a6 --- /dev/null +++ b/mediapipe/modules/face_landmark/face_landmarks_model_loader.pbtxt @@ -0,0 +1,58 @@ +# MediaPipe graph to load a selected face landmarks TF Lite model. + +type: "FaceLandmarksModelLoader" + +# Whether to run face mesh model with attention on lips and eyes. (bool) +# Attention provides more accuracy on lips and eye regions as well as iris +# landmarks. +input_side_packet: "WITH_ATTENTION:with_attention" + +# TF Lite model represented as a FlatBuffer. +# (std::unique_ptr>) +output_side_packet: "MODEL:model" + +# Determines path to the desired face landmark model file based on specification +# in the input side packet. +node { + calculator: "SwitchContainer" + input_side_packet: "ENABLE:with_attention" + output_side_packet: "PACKET:model_path" + options: { + [mediapipe.SwitchContainerOptions.ext] { + contained_node: { + calculator: "ConstantSidePacketCalculator" + options: { + [mediapipe.ConstantSidePacketCalculatorOptions.ext]: { + packet { + string_value: "mediapipe/modules/face_landmark/face_landmark.tflite" + } + } + } + } + contained_node: { + calculator: "ConstantSidePacketCalculator" + options: { + [mediapipe.ConstantSidePacketCalculatorOptions.ext]: { + packet { + string_value: "mediapipe/modules/face_landmark/face_landmark_with_attention.tflite" + } + } + } + } + } + } +} + +# Loads the file in the specified path into a blob. +node { + calculator: "LocalFileContentsCalculator" + input_side_packet: "FILE_PATH:model_path" + output_side_packet: "CONTENTS:model_blob" +} + +# Converts the input blob into a TF Lite model. +node { + calculator: "TfLiteModelCalculator" + input_side_packet: "MODEL_BLOB:model_blob" + output_side_packet: "MODEL:model" +} diff --git a/mediapipe/modules/face_landmark/tensors_to_face_landmarks.pbtxt b/mediapipe/modules/face_landmark/tensors_to_face_landmarks.pbtxt new file mode 100644 index 0000000..0adbdf3 --- /dev/null +++ b/mediapipe/modules/face_landmark/tensors_to_face_landmarks.pbtxt @@ -0,0 +1,24 @@ +# MediaPipe graph to transform single tensor into 468 facial landmarks. + +type: "TensorsToFaceLandmarks" + +# Vector with a single tensor that contains 468 landmarks. (std::vector) +input_stream: "TENSORS:tensors" + +# 468 facial landmarks (NormalizedLandmarkList) +output_stream: "LANDMARKS:landmarks" + +# Decodes the landmark tensors into a vector of lanmarks, where the landmark +# coordinates are normalized by the size of the input image to the model. +node { + calculator: "TensorsToLandmarksCalculator" + input_stream: "TENSORS:tensors" + output_stream: "NORM_LANDMARKS:landmarks" + options: { + [mediapipe.TensorsToLandmarksCalculatorOptions.ext] { + num_landmarks: 468 + input_image_width: 192 + input_image_height: 192 + } + } +} diff --git a/mediapipe/modules/face_landmark/tensors_to_face_landmarks_with_attention.pbtxt b/mediapipe/modules/face_landmark/tensors_to_face_landmarks_with_attention.pbtxt new file mode 100644 index 0000000..4f9b994 --- /dev/null +++ b/mediapipe/modules/face_landmark/tensors_to_face_landmarks_with_attention.pbtxt @@ -0,0 +1,299 @@ +# MediaPipe graph to transform model output tensors into 478 facial landmarks +# with refined lips, eyes and irises. + +type: "TensorsToFaceLandmarksWithAttention" + +# Vector with a six tensors to parse landmarks from. (std::vector) +# Landmark tensors order: +# - mesh_tensor +# - lips_tensor +# - left_eye_tensor +# - right_eye_tensor +# - left_iris_tensor +# - right_iris_tensor +input_stream: "TENSORS:tensors" + +# 478 facial landmarks (NormalizedLandmarkList) +output_stream: "LANDMARKS:landmarks" + +# Splits a vector of tensors into multiple vectors. +node { + calculator: "SplitTensorVectorCalculator" + input_stream: "tensors" + output_stream: "mesh_tensor" + output_stream: "lips_tensor" + output_stream: "left_eye_tensor" + output_stream: "right_eye_tensor" + output_stream: "left_iris_tensor" + output_stream: "right_iris_tensor" + options: { + [mediapipe.SplitVectorCalculatorOptions.ext] { + ranges: { begin: 0 end: 1 } + ranges: { begin: 1 end: 2 } + ranges: { begin: 2 end: 3 } + ranges: { begin: 3 end: 4 } + ranges: { begin: 4 end: 5 } + ranges: { begin: 5 end: 6 } + } + } +} + +# Decodes mesh landmarks tensor into a vector of normalized lanmarks. +node { + calculator: "TensorsToLandmarksCalculator" + input_stream: "TENSORS:mesh_tensor" + output_stream: "NORM_LANDMARKS:mesh_landmarks" + options: { + [mediapipe.TensorsToLandmarksCalculatorOptions.ext] { + num_landmarks: 468 + input_image_width: 192 + input_image_height: 192 + } + } +} + +# Decodes lips landmarks tensor into a vector of normalized lanmarks. +node { + calculator: "TensorsToLandmarksCalculator" + input_stream: "TENSORS:lips_tensor" + output_stream: "NORM_LANDMARKS:lips_landmarks" + options: { + [mediapipe.TensorsToLandmarksCalculatorOptions.ext] { + num_landmarks: 80 + input_image_width: 192 + input_image_height: 192 + } + } +} + +# Decodes left eye landmarks tensor into a vector of normalized lanmarks. +node { + calculator: "TensorsToLandmarksCalculator" + input_stream: "TENSORS:left_eye_tensor" + output_stream: "NORM_LANDMARKS:left_eye_landmarks" + options: { + [mediapipe.TensorsToLandmarksCalculatorOptions.ext] { + num_landmarks: 71 + input_image_width: 192 + input_image_height: 192 + } + } +} + +# Decodes right eye landmarks tensor into a vector of normalized lanmarks. +node { + calculator: "TensorsToLandmarksCalculator" + input_stream: "TENSORS:right_eye_tensor" + output_stream: "NORM_LANDMARKS:right_eye_landmarks" + options: { + [mediapipe.TensorsToLandmarksCalculatorOptions.ext] { + num_landmarks: 71 + input_image_width: 192 + input_image_height: 192 + } + } +} + +# Decodes left iris landmarks tensor into a vector of normalized lanmarks. +node { + calculator: "TensorsToLandmarksCalculator" + input_stream: "TENSORS:left_iris_tensor" + output_stream: "NORM_LANDMARKS:left_iris_landmarks" + options: { + [mediapipe.TensorsToLandmarksCalculatorOptions.ext] { + num_landmarks: 5 + input_image_width: 192 + input_image_height: 192 + } + } +} + +# Decodes right iris landmarks tensor into a vector of normalized lanmarks. +node { + calculator: "TensorsToLandmarksCalculator" + input_stream: "TENSORS:right_iris_tensor" + output_stream: "NORM_LANDMARKS:right_iris_landmarks" + options: { + [mediapipe.TensorsToLandmarksCalculatorOptions.ext] { + num_landmarks: 5 + input_image_width: 192 + input_image_height: 192 + } + } +} + +# Refine mesh landmarks with lips, eyes and irises. +node { + calculator: "LandmarksRefinementCalculator" + input_stream: "LANDMARKS:0:mesh_landmarks" + input_stream: "LANDMARKS:1:lips_landmarks" + input_stream: "LANDMARKS:2:left_eye_landmarks" + input_stream: "LANDMARKS:3:right_eye_landmarks" + input_stream: "LANDMARKS:4:left_iris_landmarks" + input_stream: "LANDMARKS:5:right_iris_landmarks" + output_stream: "REFINED_LANDMARKS:landmarks" + options: { + [mediapipe.LandmarksRefinementCalculatorOptions.ext] { + # 0 - mesh + refinement: { + indexes_mapping: [ + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, + 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, + 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, + 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, + 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, + 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, + 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, + 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, + 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, + 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, + 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, + 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, + 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, + 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, + 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, + 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, + 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, + 258, 259, 260, 261, 262, 263, 264, 265, 266, 267, 268, 269, 270, 271, + 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, + 286, 287, 288, 289, 290, 291, 292, 293, 294, 295, 296, 297, 298, 299, + 300, 301, 302, 303, 304, 305, 306, 307, 308, 309, 310, 311, 312, 313, + 314, 315, 316, 317, 318, 319, 320, 321, 322, 323, 324, 325, 326, 327, + 328, 329, 330, 331, 332, 333, 334, 335, 336, 337, 338, 339, 340, 341, + 342, 343, 344, 345, 346, 347, 348, 349, 350, 351, 352, 353, 354, 355, + 356, 357, 358, 359, 360, 361, 362, 363, 364, 365, 366, 367, 368, 369, + 370, 371, 372, 373, 374, 375, 376, 377, 378, 379, 380, 381, 382, 383, + 384, 385, 386, 387, 388, 389, 390, 391, 392, 393, 394, 395, 396, 397, + 398, 399, 400, 401, 402, 403, 404, 405, 406, 407, 408, 409, 410, 411, + 412, 413, 414, 415, 416, 417, 418, 419, 420, 421, 422, 423, 424, 425, + 426, 427, 428, 429, 430, 431, 432, 433, 434, 435, 436, 437, 438, 439, + 440, 441, 442, 443, 444, 445, 446, 447, 448, 449, 450, 451, 452, 453, + 454, 455, 456, 457, 458, 459, 460, 461, 462, 463, 464, 465, 466, 467 + ] + z_refinement: { copy {} } + } + # 1 - lips + refinement: { + indexes_mapping: [ + # Lower outer. + 61, 146, 91, 181, 84, 17, 314, 405, 321, 375, 291, + # Upper outer (excluding corners). + 185, 40, 39, 37, 0, 267, 269, 270, 409, + # Lower inner. + 78, 95, 88, 178, 87, 14, 317, 402, 318, 324, 308, + # Upper inner (excluding corners). + 191, 80, 81, 82, 13, 312, 311, 310, 415, + # Lower semi-outer. + 76, 77, 90, 180, 85, 16, 315, 404, 320, 307, 306, + # Upper semi-outer (excluding corners). + 184, 74, 73, 72, 11, 302, 303, 304, 408, + # Lower semi-inner. + 62, 96, 89, 179, 86, 15, 316, 403, 319, 325, 292, + # Upper semi-inner (excluding corners). + 183, 42, 41, 38, 12, 268, 271, 272, 407 + ] + z_refinement: { none {} } + } + # 2 - left eye + refinement: { + indexes_mapping: [ + # Lower contour. + 33, 7, 163, 144, 145, 153, 154, 155, 133, + # upper contour (excluding corners). + 246, 161, 160, 159, 158, 157, 173, + # Halo x2 lower contour. + 130, 25, 110, 24, 23, 22, 26, 112, 243, + # Halo x2 upper contour (excluding corners). + 247, 30, 29, 27, 28, 56, 190, + # Halo x3 lower contour. + 226, 31, 228, 229, 230, 231, 232, 233, 244, + # Halo x3 upper contour (excluding corners). + 113, 225, 224, 223, 222, 221, 189, + # Halo x4 upper contour (no lower because of mesh structure) or + # eyebrow inner contour. + 35, 124, 46, 53, 52, 65, + # Halo x5 lower contour. + 143, 111, 117, 118, 119, 120, 121, 128, 245, + # Halo x5 upper contour (excluding corners) or eyebrow outer contour. + 156, 70, 63, 105, 66, 107, 55, 193 + ] + z_refinement: { none {} } + } + # 3 - right eye + refinement: { + indexes_mapping: [ + # Lower contour. + 263, 249, 390, 373, 374, 380, 381, 382, 362, + # Upper contour (excluding corners). + 466, 388, 387, 386, 385, 384, 398, + # Halo x2 lower contour. + 359, 255, 339, 254, 253, 252, 256, 341, 463, + # Halo x2 upper contour (excluding corners). + 467, 260, 259, 257, 258, 286, 414, + # Halo x3 lower contour. + 446, 261, 448, 449, 450, 451, 452, 453, 464, + # Halo x3 upper contour (excluding corners). + 342, 445, 444, 443, 442, 441, 413, + # Halo x4 upper contour (no lower because of mesh structure) or + # eyebrow inner contour. + 265, 353, 276, 283, 282, 295, + # Halo x5 lower contour. + 372, 340, 346, 347, 348, 349, 350, 357, 465, + # Halo x5 upper contour (excluding corners) or eyebrow outer contour. + 383, 300, 293, 334, 296, 336, 285, 417 + ] + z_refinement: { none {} } + } + # 4 - left iris + refinement: { + indexes_mapping: [ + # Center. + 468, + # Iris right edge. + 469, + # Iris top edge. + 470, + # Iris left edge. + 471, + # Iris bottom edge. + 472 + ] + z_refinement: { + assign_average: { + indexes_for_average: [ + # Lower contour. + 33, 7, 163, 144, 145, 153, 154, 155, 133, + # Upper contour (excluding corners). + 246, 161, 160, 159, 158, 157, 173 + ] + } + } + } + # 5 - right iris + refinement: { + indexes_mapping: [ + # Center. + 473, + # Iris right edge. + 474, + # Iris top edge. + 475, + # Iris left edge. + 476, + # Iris bottom edge. + 477 + ] + z_refinement: { + assign_average: { + indexes_for_average: [ + # Lower contour. + 263, 249, 390, 373, 374, 380, 381, 382, 362, + # Upper contour (excluding corners). + 466, 388, 387, 386, 385, 384, 398 + ] + } + } + } + } + } +} diff --git a/mediapipe/modules/hand_landmark/BUILD b/mediapipe/modules/hand_landmark/BUILD new file mode 100644 index 0000000..b28dc78 --- /dev/null +++ b/mediapipe/modules/hand_landmark/BUILD @@ -0,0 +1,171 @@ +# Copyright 2020 The MediaPipe Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +load( + "//mediapipe/framework/tool:mediapipe_graph.bzl", + "mediapipe_simple_subgraph", +) + +licenses(["notice"]) + +package(default_visibility = ["//visibility:public"]) + +exports_files([ + "hand_landmark_full.tflite", + "hand_landmark_lite.tflite", + "handedness.txt", +]) + +mediapipe_simple_subgraph( + name = "hand_landmark_model_loader", + graph = "hand_landmark_model_loader.pbtxt", + register_as = "HandLandmarkModelLoader", + deps = [ + "//mediapipe/calculators/core:constant_side_packet_calculator", + "//mediapipe/calculators/tflite:tflite_model_calculator", + "//mediapipe/calculators/util:local_file_contents_calculator", + "//mediapipe/framework/tool:switch_container", + ], +) + +mediapipe_simple_subgraph( + name = "hand_landmark_cpu", + graph = "hand_landmark_cpu.pbtxt", + register_as = "HandLandmarkCpu", + deps = [ + ":hand_landmark_model_loader", + "//mediapipe/calculators/core:gate_calculator", + "//mediapipe/calculators/core:split_vector_calculator", + "//mediapipe/calculators/tensor:image_to_tensor_calculator", + "//mediapipe/calculators/tensor:inference_calculator", + "//mediapipe/calculators/tensor:tensors_to_classification_calculator", + "//mediapipe/calculators/tensor:tensors_to_floats_calculator", + "//mediapipe/calculators/tensor:tensors_to_landmarks_calculator", + "//mediapipe/calculators/util:landmark_letterbox_removal_calculator", + "//mediapipe/calculators/util:landmark_projection_calculator", + "//mediapipe/calculators/util:thresholding_calculator", + "//mediapipe/calculators/util:world_landmark_projection_calculator", + ], +) + +mediapipe_simple_subgraph( + name = "hand_landmark_gpu", + graph = "hand_landmark_gpu.pbtxt", + register_as = "HandLandmarkGpu", + deps = [ + ":hand_landmark_model_loader", + "//mediapipe/calculators/core:gate_calculator", + "//mediapipe/calculators/core:split_vector_calculator", + "//mediapipe/calculators/tensor:image_to_tensor_calculator", + "//mediapipe/calculators/tensor:inference_calculator", + "//mediapipe/calculators/tensor:tensors_to_classification_calculator", + "//mediapipe/calculators/tensor:tensors_to_floats_calculator", + "//mediapipe/calculators/tensor:tensors_to_landmarks_calculator", + "//mediapipe/calculators/util:landmark_letterbox_removal_calculator", + "//mediapipe/calculators/util:landmark_projection_calculator", + "//mediapipe/calculators/util:thresholding_calculator", + "//mediapipe/calculators/util:world_landmark_projection_calculator", + ], +) + +mediapipe_simple_subgraph( + name = "hand_landmark_tracking_gpu", + graph = "hand_landmark_tracking_gpu.pbtxt", + register_as = "HandLandmarkTrackingGpu", + deps = [ + ":hand_landmark_gpu", + ":hand_landmark_landmarks_to_roi", + ":palm_detection_detection_to_roi", + "//mediapipe/calculators/core:begin_loop_calculator", + "//mediapipe/calculators/core:clip_vector_size_calculator", + "//mediapipe/calculators/core:constant_side_packet_calculator", + "//mediapipe/calculators/core:end_loop_calculator", + "//mediapipe/calculators/core:flow_limiter_calculator", + "//mediapipe/calculators/core:gate_calculator", + "//mediapipe/calculators/core:previous_loopback_calculator", + "//mediapipe/calculators/image:image_properties_calculator", + "//mediapipe/calculators/util:association_norm_rect_calculator", + "//mediapipe/calculators/util:collection_has_min_size_calculator", + "//mediapipe/calculators/util:filter_collection_calculator", + "//mediapipe/modules/palm_detection:palm_detection_gpu", + ], +) + +mediapipe_simple_subgraph( + name = "hand_landmark_tracking_cpu_image", + graph = "hand_landmark_tracking_cpu_image.pbtxt", + register_as = "HandLandmarkTrackingCpuImage", + deps = [ + ":hand_landmark_tracking_cpu", + "//mediapipe/calculators/core:flow_limiter_calculator", + "//mediapipe/calculators/image:image_transformation_calculator", + "//mediapipe/calculators/util:from_image_calculator", + ], +) + +mediapipe_simple_subgraph( + name = "hand_landmark_tracking_gpu_image", + graph = "hand_landmark_tracking_gpu_image.pbtxt", + register_as = "HandLandmarkTrackingGpuImage", + deps = [ + ":hand_landmark_tracking_gpu", + "//mediapipe/calculators/core:flow_limiter_calculator", + "//mediapipe/calculators/image:image_transformation_calculator", + "//mediapipe/calculators/util:from_image_calculator", + ], +) + +mediapipe_simple_subgraph( + name = "hand_landmark_tracking_cpu", + graph = "hand_landmark_tracking_cpu.pbtxt", + register_as = "HandLandmarkTrackingCpu", + deps = [ + ":hand_landmark_cpu", + ":hand_landmark_landmarks_to_roi", + ":palm_detection_detection_to_roi", + "//mediapipe/calculators/core:begin_loop_calculator", + "//mediapipe/calculators/core:clip_vector_size_calculator", + "//mediapipe/calculators/core:constant_side_packet_calculator", + "//mediapipe/calculators/core:end_loop_calculator", + "//mediapipe/calculators/core:flow_limiter_calculator", + "//mediapipe/calculators/core:gate_calculator", + "//mediapipe/calculators/core:previous_loopback_calculator", + "//mediapipe/calculators/image:image_properties_calculator", + "//mediapipe/calculators/util:association_norm_rect_calculator", + "//mediapipe/calculators/util:collection_has_min_size_calculator", + "//mediapipe/calculators/util:filter_collection_calculator", + "//mediapipe/modules/palm_detection:palm_detection_cpu", + ], +) + +mediapipe_simple_subgraph( + name = "palm_detection_detection_to_roi", + graph = "palm_detection_detection_to_roi.pbtxt", + register_as = "PalmDetectionDetectionToRoi", + deps = [ + "//mediapipe/calculators/util:detections_to_rects_calculator", + "//mediapipe/calculators/util:rect_transformation_calculator", + ], +) + +mediapipe_simple_subgraph( + name = "hand_landmark_landmarks_to_roi", + graph = "hand_landmark_landmarks_to_roi.pbtxt", + register_as = "HandLandmarkLandmarksToRoi", + deps = [ + "//mediapipe/calculators/core:split_landmarks_calculator", + "//mediapipe/calculators/util:rect_transformation_calculator", + "//mediapipe/modules/hand_landmark/calculators:hand_landmarks_to_rect_calculator", + ], +) diff --git a/mediapipe/modules/hand_landmark/README.md b/mediapipe/modules/hand_landmark/README.md new file mode 100644 index 0000000..31fe6f7 --- /dev/null +++ b/mediapipe/modules/hand_landmark/README.md @@ -0,0 +1,8 @@ +# hand_landmark + +Subgraphs|Details +:--- | :--- +[`HandLandmarkCpu`](https://github.com/google/mediapipe/tree/master/mediapipe/modules/hand_landmark/hand_landmark_cpu.pbtxt)| Detects landmarks of a single hand. (CPU input.) +[`HandLandmarkGpu`](https://github.com/google/mediapipe/tree/master/mediapipe/modules/hand_landmark/hand_landmark_gpu.pbtxt)| Detects landmarks of a single hand. (GPU input.) +[`HandLandmarkTrackingCpu`](https://github.com/google/mediapipe/tree/master/mediapipe/modules/hand_landmark/hand_landmark_tracking_cpu.pbtxt)| Detects and tracks landmarks of multiple hands. (CPU input.) +[`HandLandmarkTrackingGpu`](https://github.com/google/mediapipe/tree/master/mediapipe/modules/hand_landmark/hand_landmark_tracking_gpu.pbtxt)| Detects and tracks landmarks of multiple hands. (GPU input.) diff --git a/mediapipe/modules/hand_landmark/calculators/BUILD b/mediapipe/modules/hand_landmark/calculators/BUILD new file mode 100644 index 0000000..b2a8efe --- /dev/null +++ b/mediapipe/modules/hand_landmark/calculators/BUILD @@ -0,0 +1,33 @@ +# Copyright 2020 The MediaPipe Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +licenses(["notice"]) + +package(default_visibility = ["//visibility:public"]) + +cc_library( + name = "hand_landmarks_to_rect_calculator", + srcs = ["hand_landmarks_to_rect_calculator.cc"], + visibility = ["//visibility:public"], + deps = [ + "//mediapipe/framework:calculator_framework", + "//mediapipe/framework:calculator_options_cc_proto", + "//mediapipe/framework/formats:landmark_cc_proto", + "//mediapipe/framework/formats:location_data_cc_proto", + "//mediapipe/framework/formats:rect_cc_proto", + "//mediapipe/framework/port:ret_check", + "//mediapipe/framework/port:status", + ], + alwayslink = 1, +) diff --git a/mediapipe/modules/hand_landmark/calculators/hand_landmarks_to_rect_calculator.cc b/mediapipe/modules/hand_landmark/calculators/hand_landmarks_to_rect_calculator.cc new file mode 100644 index 0000000..3e3f5c8 --- /dev/null +++ b/mediapipe/modules/hand_landmark/calculators/hand_landmarks_to_rect_calculator.cc @@ -0,0 +1,167 @@ +// Copyright 2020 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#include + +#include "mediapipe/framework/calculator_framework.h" +#include "mediapipe/framework/calculator_options.pb.h" +#include "mediapipe/framework/formats/landmark.pb.h" +#include "mediapipe/framework/formats/rect.pb.h" +#include "mediapipe/framework/port/ret_check.h" +#include "mediapipe/framework/port/status.h" + +namespace mediapipe { + +namespace { + +constexpr char kNormalizedLandmarksTag[] = "NORM_LANDMARKS"; +constexpr char kNormRectTag[] = "NORM_RECT"; +constexpr char kImageSizeTag[] = "IMAGE_SIZE"; +constexpr int kWristJoint = 0; +constexpr int kMiddleFingerPIPJoint = 6; +constexpr int kIndexFingerPIPJoint = 4; +constexpr int kRingFingerPIPJoint = 8; +constexpr float kTargetAngle = M_PI * 0.5f; + +inline float NormalizeRadians(float angle) { + return angle - 2 * M_PI * std::floor((angle - (-M_PI)) / (2 * M_PI)); +} + +float ComputeRotation(const NormalizedLandmarkList& landmarks, + const std::pair& image_size) { + const float x0 = landmarks.landmark(kWristJoint).x() * image_size.first; + const float y0 = landmarks.landmark(kWristJoint).y() * image_size.second; + + float x1 = (landmarks.landmark(kIndexFingerPIPJoint).x() + + landmarks.landmark(kRingFingerPIPJoint).x()) / + 2.f; + float y1 = (landmarks.landmark(kIndexFingerPIPJoint).y() + + landmarks.landmark(kRingFingerPIPJoint).y()) / + 2.f; + x1 = (x1 + landmarks.landmark(kMiddleFingerPIPJoint).x()) / 2.f * + image_size.first; + y1 = (y1 + landmarks.landmark(kMiddleFingerPIPJoint).y()) / 2.f * + image_size.second; + + const float rotation = + NormalizeRadians(kTargetAngle - std::atan2(-(y1 - y0), x1 - x0)); + return rotation; +} + +absl::Status NormalizedLandmarkListToRect( + const NormalizedLandmarkList& landmarks, + const std::pair& image_size, NormalizedRect* rect) { + const float rotation = ComputeRotation(landmarks, image_size); + const float reverse_angle = NormalizeRadians(-rotation); + + // Find boundaries of landmarks. + float max_x = std::numeric_limits::min(); + float max_y = std::numeric_limits::min(); + float min_x = std::numeric_limits::max(); + float min_y = std::numeric_limits::max(); + for (int i = 0; i < landmarks.landmark_size(); ++i) { + max_x = std::max(max_x, landmarks.landmark(i).x()); + max_y = std::max(max_y, landmarks.landmark(i).y()); + min_x = std::min(min_x, landmarks.landmark(i).x()); + min_y = std::min(min_y, landmarks.landmark(i).y()); + } + const float axis_aligned_center_x = (max_x + min_x) / 2.f; + const float axis_aligned_center_y = (max_y + min_y) / 2.f; + + // Find boundaries of rotated landmarks. + max_x = std::numeric_limits::min(); + max_y = std::numeric_limits::min(); + min_x = std::numeric_limits::max(); + min_y = std::numeric_limits::max(); + for (int i = 0; i < landmarks.landmark_size(); ++i) { + const float original_x = + (landmarks.landmark(i).x() - axis_aligned_center_x) * image_size.first; + const float original_y = + (landmarks.landmark(i).y() - axis_aligned_center_y) * image_size.second; + + const float projected_x = original_x * std::cos(reverse_angle) - + original_y * std::sin(reverse_angle); + const float projected_y = original_x * std::sin(reverse_angle) + + original_y * std::cos(reverse_angle); + + max_x = std::max(max_x, projected_x); + max_y = std::max(max_y, projected_y); + min_x = std::min(min_x, projected_x); + min_y = std::min(min_y, projected_y); + } + const float projected_center_x = (max_x + min_x) / 2.f; + const float projected_center_y = (max_y + min_y) / 2.f; + + const float center_x = projected_center_x * std::cos(rotation) - + projected_center_y * std::sin(rotation) + + image_size.first * axis_aligned_center_x; + const float center_y = projected_center_x * std::sin(rotation) + + projected_center_y * std::cos(rotation) + + image_size.second * axis_aligned_center_y; + const float width = (max_x - min_x) / image_size.first; + const float height = (max_y - min_y) / image_size.second; + + rect->set_x_center(center_x / image_size.first); + rect->set_y_center(center_y / image_size.second); + rect->set_width(width); + rect->set_height(height); + rect->set_rotation(rotation); + + return absl::OkStatus(); +} + +} // namespace + +// A calculator that converts subset of hand landmarks to a bounding box +// NormalizedRect. The rotation angle of the bounding box is computed based on +// 1) the wrist joint and 2) the average of PIP joints of index finger, middle +// finger and ring finger. After rotation, the vector from the wrist to the mean +// of PIP joints is expected to be vertical with wrist at the bottom and the +// mean of PIP joints at the top. +class HandLandmarksToRectCalculator : public CalculatorBase { + public: + static absl::Status GetContract(CalculatorContract* cc) { + cc->Inputs().Tag(kNormalizedLandmarksTag).Set(); + cc->Inputs().Tag(kImageSizeTag).Set>(); + cc->Outputs().Tag(kNormRectTag).Set(); + return absl::OkStatus(); + } + + absl::Status Open(CalculatorContext* cc) override { + cc->SetOffset(TimestampDiff(0)); + return absl::OkStatus(); + } + + absl::Status Process(CalculatorContext* cc) override { + if (cc->Inputs().Tag(kNormalizedLandmarksTag).IsEmpty()) { + return absl::OkStatus(); + } + RET_CHECK(!cc->Inputs().Tag(kImageSizeTag).IsEmpty()); + + std::pair image_size = + cc->Inputs().Tag(kImageSizeTag).Get>(); + const auto& landmarks = + cc->Inputs().Tag(kNormalizedLandmarksTag).Get(); + auto output_rect = absl::make_unique(); + MP_RETURN_IF_ERROR( + NormalizedLandmarkListToRect(landmarks, image_size, output_rect.get())); + cc->Outputs() + .Tag(kNormRectTag) + .Add(output_rect.release(), cc->InputTimestamp()); + + return absl::OkStatus(); + } +}; +REGISTER_CALCULATOR(HandLandmarksToRectCalculator); + +} // namespace mediapipe diff --git a/mediapipe/modules/hand_landmark/hand_landmark_cpu.pbtxt b/mediapipe/modules/hand_landmark/hand_landmark_cpu.pbtxt new file mode 100644 index 0000000..6ecbfad --- /dev/null +++ b/mediapipe/modules/hand_landmark/hand_landmark_cpu.pbtxt @@ -0,0 +1,219 @@ +# MediaPipe graph to detect/predict hand landmarks on CPU. + +type: "HandLandmarkCpu" + +# CPU image. (ImageFrame) +input_stream: "IMAGE:image" +# ROI (region of interest) within the given image where a palm/hand is located. +# (NormalizedRect) +input_stream: "ROI:hand_rect" + +# Complexity of the hand landmark model: 0 or 1. Landmark accuracy as well as +# inference latency generally go up with the model complexity. If unspecified, +# functions as set to 1. (int) +input_side_packet: "MODEL_COMPLEXITY:model_complexity" + +# 21 hand landmarks within the given ROI. (NormalizedLandmarkList) +# NOTE: if a hand is not present within the given ROI, for this particular +# timestamp there will not be an output packet in the LANDMARKS stream. However, +# the MediaPipe framework will internally inform the downstream calculators of +# the absence of this packet so that they don't wait for it unnecessarily. +output_stream: "LANDMARKS:hand_landmarks" + +# Hand world landmarks within the given ROI. (LandmarkList) +# World landmarks are real-world 3D coordinates in meters with the origin in the +# center of the given ROI. +# +# WORLD_LANDMARKS shares the same landmark topology as LANDMARKS. However, +# LANDMARKS provides coordinates (in pixels) of a 3D object projected onto the +# 2D image surface, while WORLD_LANDMARKS provides coordinates (in meters) of +# the 3D object itself. +output_stream: "WORLD_LANDMARKS:hand_world_landmarks" + +# Handedness of the detected hand (i.e. is hand left or right). +# (ClassificationList) +output_stream: "HANDEDNESS:handedness" + +# Transforms a region of image into a 224x224 tensor while keeping the aspect +# ratio, and therefore may result in potential letterboxing. +node { + calculator: "ImageToTensorCalculator" + input_stream: "IMAGE:image" + input_stream: "NORM_RECT:hand_rect" + output_stream: "TENSORS:input_tensor" + output_stream: "LETTERBOX_PADDING:letterbox_padding" + options: { + [mediapipe.ImageToTensorCalculatorOptions.ext] { + output_tensor_width: 224 + output_tensor_height: 224 + keep_aspect_ratio: true + output_tensor_float_range { + min: 0.0 + max: 1.0 + } + } + } +} + +# Loads the hand landmark TF Lite model. +node { + calculator: "HandLandmarkModelLoader" + input_side_packet: "MODEL_COMPLEXITY:model_complexity" + output_side_packet: "MODEL:model" +} + +# Runs a TensorFlow Lite model on CPU that takes an image tensor and outputs a +# vector of tensors representing, for instance, detection boxes/keypoints and +# scores. +node { + calculator: "InferenceCalculator" + input_side_packet: "MODEL:model" + input_stream: "TENSORS:input_tensor" + output_stream: "TENSORS:output_tensors" + options: { + [mediapipe.InferenceCalculatorOptions.ext] { + delegate { + xnnpack {} + } + } + } +} + +# Splits a vector of tensors to multiple vectors according to the ranges +# specified in option. +node { + calculator: "SplitTensorVectorCalculator" + input_stream: "output_tensors" + output_stream: "landmark_tensors" + output_stream: "hand_flag_tensor" + output_stream: "handedness_tensor" + output_stream: "world_landmark_tensor" + options: { + [mediapipe.SplitVectorCalculatorOptions.ext] { + ranges: { begin: 0 end: 1 } + ranges: { begin: 1 end: 2 } + ranges: { begin: 2 end: 3 } + ranges: { begin: 3 end: 4 } + } + } +} + +# Converts the hand-flag tensor into a float that represents the confidence +# score of hand presence. +node { + calculator: "TensorsToFloatsCalculator" + input_stream: "TENSORS:hand_flag_tensor" + output_stream: "FLOAT:hand_presence_score" +} + +# Applies a threshold to the confidence score to determine whether a hand is +# present. +node { + calculator: "ThresholdingCalculator" + input_stream: "FLOAT:hand_presence_score" + output_stream: "FLAG:hand_presence" + options: { + [mediapipe.ThresholdingCalculatorOptions.ext] { + threshold: 0.5 + } + } +} + +# Drops handedness tensor if hand is not present. +node { + calculator: "GateCalculator" + input_stream: "handedness_tensor" + input_stream: "ALLOW:hand_presence" + output_stream: "ensured_handedness_tensor" +} + +# Converts the handedness tensor into a float that represents the classification +# score of handedness. +node { + calculator: "TensorsToClassificationCalculator" + input_stream: "TENSORS:ensured_handedness_tensor" + output_stream: "CLASSIFICATIONS:handedness" + options: { + [mediapipe.TensorsToClassificationCalculatorOptions.ext] { + top_k: 1 + label_map_path: "mediapipe/modules/hand_landmark/handedness.txt" + binary_classification: true + } + } +} + +# Drops landmarks tensors if hand is not present. +node { + calculator: "GateCalculator" + input_stream: "landmark_tensors" + input_stream: "ALLOW:hand_presence" + output_stream: "ensured_landmark_tensors" +} + +# Decodes the landmark tensors into a list of landmarks, where the landmark +# coordinates are normalized by the size of the input image to the model. +node { + calculator: "TensorsToLandmarksCalculator" + input_stream: "TENSORS:ensured_landmark_tensors" + output_stream: "NORM_LANDMARKS:landmarks" + options: { + [mediapipe.TensorsToLandmarksCalculatorOptions.ext] { + num_landmarks: 21 + input_image_width: 224 + input_image_height: 224 + # The additional scaling factor is used to account for the Z coordinate + # distribution in the training data. + normalize_z: 0.4 + } + } +} + +# Adjusts landmarks (already normalized to [0.f, 1.f]) on the letterboxed hand +# image (after image transformation with the FIT scale mode) to the +# corresponding locations on the same image with the letterbox removed (hand +# image before image transformation). +node { + calculator: "LandmarkLetterboxRemovalCalculator" + input_stream: "LANDMARKS:landmarks" + input_stream: "LETTERBOX_PADDING:letterbox_padding" + output_stream: "LANDMARKS:scaled_landmarks" +} + +# Projects the landmarks from the cropped hand image to the corresponding +# locations on the full image before cropping (input to the graph). +node { + calculator: "LandmarkProjectionCalculator" + input_stream: "NORM_LANDMARKS:scaled_landmarks" + input_stream: "NORM_RECT:hand_rect" + output_stream: "NORM_LANDMARKS:hand_landmarks" +} + +# Drops world landmarks tensors if hand is not present. +node { + calculator: "GateCalculator" + input_stream: "world_landmark_tensor" + input_stream: "ALLOW:hand_presence" + output_stream: "ensured_world_landmark_tensor" +} + +# Decodes the landmark tensors into a list of landmarks, where the landmark +# coordinates are normalized by the size of the input image to the model. +node { + calculator: "TensorsToLandmarksCalculator" + input_stream: "TENSORS:ensured_world_landmark_tensor" + output_stream: "LANDMARKS:unprojected_world_landmarks" + options: { + [mediapipe.TensorsToLandmarksCalculatorOptions.ext] { + num_landmarks: 21 + } + } +} + +# Projects the world landmarks from the cropped hand image to the corresponding +# locations on the full image before cropping (input to the graph). +node { + calculator: "WorldLandmarkProjectionCalculator" + input_stream: "LANDMARKS:unprojected_world_landmarks" + input_stream: "NORM_RECT:hand_rect" + output_stream: "LANDMARKS:hand_world_landmarks" +} diff --git a/mediapipe/modules/hand_landmark/hand_landmark_full.tflite b/mediapipe/modules/hand_landmark/hand_landmark_full.tflite new file mode 100755 index 0000000..a2b0114 Binary files /dev/null and b/mediapipe/modules/hand_landmark/hand_landmark_full.tflite differ diff --git a/mediapipe/modules/hand_landmark/hand_landmark_gpu.pbtxt b/mediapipe/modules/hand_landmark/hand_landmark_gpu.pbtxt new file mode 100644 index 0000000..033ad44 --- /dev/null +++ b/mediapipe/modules/hand_landmark/hand_landmark_gpu.pbtxt @@ -0,0 +1,213 @@ +# MediaPipe graph to detect/predict hand landmarks on CPU. + +type: "HandLandmarkGpu" + +# GPU image. (GpuBuffer) +input_stream: "IMAGE:image" +# ROI (region of interest) within the given image where a palm/hand is located. +# (NormalizedRect) +input_stream: "ROI:hand_rect" + +# Complexity of the hand landmark model: 0 or 1. Landmark accuracy as well as +# inference latency generally go up with the model complexity. If unspecified, +# functions as set to 1. (int) +input_side_packet: "MODEL_COMPLEXITY:model_complexity" + +# 21 hand landmarks within the given ROI. (NormalizedLandmarkList) +# NOTE: if a hand is not present within the given ROI, for this particular +# timestamp there will not be an output packet in the LANDMARKS stream. However, +# the MediaPipe framework will internally inform the downstream calculators of +# the absence of this packet so that they don't wait for it unnecessarily. +output_stream: "LANDMARKS:hand_landmarks" + +# Hand world landmarks within the given ROI. (LandmarkList) +# World landmarks are real-world 3D coordinates in meters with the origin in the +# center of the given ROI. +# +# WORLD_LANDMARKS shares the same landmark topology as LANDMARKS. However, +# LANDMARKS provides coordinates (in pixels) of a 3D object projected onto the +# 2D image surface, while WORLD_LANDMARKS provides coordinates (in meters) of +# the 3D object itself. +output_stream: "WORLD_LANDMARKS:hand_world_landmarks" + +# Handedness of the detected hand (i.e. is hand left or right). +# (ClassificationList) +output_stream: "HANDEDNESS:handedness" + +# Transforms a region of image into a 224x224 tensor while keeping the aspect +# ratio, and therefore may result in potential letterboxing. +node { + calculator: "ImageToTensorCalculator" + input_stream: "IMAGE_GPU:image" + input_stream: "NORM_RECT:hand_rect" + output_stream: "TENSORS:input_tensor" + output_stream: "LETTERBOX_PADDING:letterbox_padding" + options: { + [mediapipe.ImageToTensorCalculatorOptions.ext] { + output_tensor_width: 224 + output_tensor_height: 224 + keep_aspect_ratio: true + output_tensor_float_range { + min: 0.0 + max: 1.0 + } + gpu_origin: TOP_LEFT + } + } +} + +# Loads the hand landmark TF Lite model. +node { + calculator: "HandLandmarkModelLoader" + input_side_packet: "MODEL_COMPLEXITY:model_complexity" + output_side_packet: "MODEL:model" +} + +# Runs a TensorFlow Lite model on GPU that takes an image tensor and outputs a +# vector of tensors representing, for instance, detection boxes/keypoints and +# scores. +node { + calculator: "InferenceCalculator" + input_side_packet: "MODEL:model" + input_stream: "TENSORS:input_tensor" + output_stream: "TENSORS:output_tensors" +} + +# Splits a vector of tensors to multiple vectors according to the ranges +# specified in option. +node { + calculator: "SplitTensorVectorCalculator" + input_stream: "output_tensors" + output_stream: "landmark_tensors" + output_stream: "hand_flag_tensor" + output_stream: "handedness_tensor" + output_stream: "world_landmark_tensor" + options: { + [mediapipe.SplitVectorCalculatorOptions.ext] { + ranges: { begin: 0 end: 1 } + ranges: { begin: 1 end: 2 } + ranges: { begin: 2 end: 3 } + ranges: { begin: 3 end: 4 } + } + } +} + +# Converts the hand-flag tensor into a float that represents the confidence +# score of hand presence. +node { + calculator: "TensorsToFloatsCalculator" + input_stream: "TENSORS:hand_flag_tensor" + output_stream: "FLOAT:hand_presence_score" +} + +# Applies a threshold to the confidence score to determine whether a hand is +# present. +node { + calculator: "ThresholdingCalculator" + input_stream: "FLOAT:hand_presence_score" + output_stream: "FLAG:hand_presence" + options: { + [mediapipe.ThresholdingCalculatorOptions.ext] { + threshold: 0.5 + } + } +} + +# Drops handedness tensor if hand is not present. +node { + calculator: "GateCalculator" + input_stream: "handedness_tensor" + input_stream: "ALLOW:hand_presence" + output_stream: "ensured_handedness_tensor" +} + +# Converts the handedness tensor into a float that represents the classification +# score of handedness. +node { + calculator: "TensorsToClassificationCalculator" + input_stream: "TENSORS:ensured_handedness_tensor" + output_stream: "CLASSIFICATIONS:handedness" + options: { + [mediapipe.TensorsToClassificationCalculatorOptions.ext] { + top_k: 1 + label_map_path: "mediapipe/modules/hand_landmark/handedness.txt" + binary_classification: true + } + } +} + +# Drops landmarks tensors if hand is not present. +node { + calculator: "GateCalculator" + input_stream: "landmark_tensors" + input_stream: "ALLOW:hand_presence" + output_stream: "ensured_landmark_tensors" +} + +# Decodes the landmark tensors into a list of landmarks, where the landmark +# coordinates are normalized by the size of the input image to the model. +node { + calculator: "TensorsToLandmarksCalculator" + input_stream: "TENSORS:ensured_landmark_tensors" + output_stream: "NORM_LANDMARKS:landmarks" + options: { + [mediapipe.TensorsToLandmarksCalculatorOptions.ext] { + num_landmarks: 21 + input_image_width: 224 + input_image_height: 224 + # The additional scaling factor is used to account for the Z coordinate + # distribution in the training data. + normalize_z: 0.4 + } + } +} + +# Adjusts landmarks (already normalized to [0.f, 1.f]) on the letterboxed hand +# image (after image transformation with the FIT scale mode) to the +# corresponding locations on the same image with the letterbox removed (hand +# image before image transformation). +node { + calculator: "LandmarkLetterboxRemovalCalculator" + input_stream: "LANDMARKS:landmarks" + input_stream: "LETTERBOX_PADDING:letterbox_padding" + output_stream: "LANDMARKS:scaled_landmarks" +} + +# Projects the landmarks from the cropped hand image to the corresponding +# locations on the full image before cropping (input to the graph). +node { + calculator: "LandmarkProjectionCalculator" + input_stream: "NORM_LANDMARKS:scaled_landmarks" + input_stream: "NORM_RECT:hand_rect" + output_stream: "NORM_LANDMARKS:hand_landmarks" +} + +# Drops world landmarks tensors if hand is not present. +node { + calculator: "GateCalculator" + input_stream: "world_landmark_tensor" + input_stream: "ALLOW:hand_presence" + output_stream: "ensured_world_landmark_tensor" +} + +# Decodes the landmark tensors into a list of landmarks, where the landmark +# coordinates are normalized by the size of the input image to the model. +node { + calculator: "TensorsToLandmarksCalculator" + input_stream: "TENSORS:ensured_world_landmark_tensor" + output_stream: "LANDMARKS:unprojected_world_landmarks" + options: { + [mediapipe.TensorsToLandmarksCalculatorOptions.ext] { + num_landmarks: 21 + } + } +} + +# Projects the world landmarks from the cropped hand image to the corresponding +# locations on the full image before cropping (input to the graph). +node { + calculator: "WorldLandmarkProjectionCalculator" + input_stream: "LANDMARKS:unprojected_world_landmarks" + input_stream: "NORM_RECT:hand_rect" + output_stream: "LANDMARKS:hand_world_landmarks" +} diff --git a/mediapipe/modules/hand_landmark/hand_landmark_landmarks_to_roi.pbtxt b/mediapipe/modules/hand_landmark/hand_landmark_landmarks_to_roi.pbtxt new file mode 100644 index 0000000..1d82d76 --- /dev/null +++ b/mediapipe/modules/hand_landmark/hand_landmark_landmarks_to_roi.pbtxt @@ -0,0 +1,63 @@ +# MediaPipe graph to calculate hand region of interest (ROI) from landmarks +# detected by "HandLandmarkCpu" or "HandLandmarkGpu". + +type: "HandLandmarkLandmarksToRoi" + +# Normalized landmarks. (NormalizedLandmarkList) +input_stream: "LANDMARKS:landmarks" +# Image size (width & height). (std::pair) +input_stream: "IMAGE_SIZE:image_size" + +# ROI according to landmarks. (NormalizedRect) +output_stream: "ROI:roi" + +# Extracts a subset of the hand landmarks that are relatively more stable across +# frames (e.g. comparing to finger tips) for computing the bounding box. The box +# will later be expanded to contain the entire hand. In this approach, it is +# more robust to drastically changing hand size. +# The landmarks extracted are: wrist, MCP/PIP of five fingers. +node { + calculator: "SplitNormalizedLandmarkListCalculator" + input_stream: "landmarks" + output_stream: "partial_landmarks" + options: { + [mediapipe.SplitVectorCalculatorOptions.ext] { + ranges: { begin: 0 end: 4 } + ranges: { begin: 5 end: 7 } + ranges: { begin: 9 end: 11 } + ranges: { begin: 13 end: 15 } + ranges: { begin: 17 end: 19 } + combine_outputs: true + } + } +} + +# Converts the hand landmarks into a rectangle (normalized by image size) +# that encloses the hand. The calculator uses a subset of all hand landmarks +# extracted from SplitNormalizedLandmarkListCalculator above to +# calculate the bounding box and the rotation of the output rectangle. Please +# see the comments in the calculator for more detail. +node { + calculator: "HandLandmarksToRectCalculator" + input_stream: "NORM_LANDMARKS:partial_landmarks" + input_stream: "IMAGE_SIZE:image_size" + output_stream: "NORM_RECT:hand_rect_from_landmarks" +} + +# Expands the hand rectangle so that the box contains the entire hand and it's +# big enough so that it's likely to still contain the hand even with some motion +# in the next video frame . +node { + calculator: "RectTransformationCalculator" + input_stream: "NORM_RECT:hand_rect_from_landmarks" + input_stream: "IMAGE_SIZE:image_size" + output_stream: "roi" + options: { + [mediapipe.RectTransformationCalculatorOptions.ext] { + scale_x: 2.0 + scale_y: 2.0 + shift_y: -0.1 + square_long: true + } + } +} diff --git a/mediapipe/modules/hand_landmark/hand_landmark_lite.tflite b/mediapipe/modules/hand_landmark/hand_landmark_lite.tflite new file mode 100755 index 0000000..0a0a2ba Binary files /dev/null and b/mediapipe/modules/hand_landmark/hand_landmark_lite.tflite differ diff --git a/mediapipe/modules/hand_landmark/hand_landmark_model_loader.pbtxt b/mediapipe/modules/hand_landmark/hand_landmark_model_loader.pbtxt new file mode 100644 index 0000000..c9ecf8a --- /dev/null +++ b/mediapipe/modules/hand_landmark/hand_landmark_model_loader.pbtxt @@ -0,0 +1,63 @@ +# MediaPipe graph to load a selected hand landmark TF Lite model. + +type: "HandLandmarkModelLoader" + +# Complexity of the hand landmark model: 0 or 1. Landmark accuracy as well as +# inference latency generally go up with the model complexity. If unspecified, +# functions as set to 1. (int) +input_side_packet: "MODEL_COMPLEXITY:model_complexity" + +# TF Lite model represented as a FlatBuffer. +# (std::unique_ptr>) +output_side_packet: "MODEL:model" + +# Determines path to the desired pose landmark model file. +node { + calculator: "SwitchContainer" + input_side_packet: "SELECT:model_complexity" + output_side_packet: "PACKET:model_path" + options: { + [mediapipe.SwitchContainerOptions.ext] { + select: 1 + contained_node: { + calculator: "ConstantSidePacketCalculator" + options: { + [mediapipe.ConstantSidePacketCalculatorOptions.ext]: { + packet { + string_value: "mediapipe/modules/hand_landmark/hand_landmark_lite.tflite" + } + } + } + } + contained_node: { + calculator: "ConstantSidePacketCalculator" + options: { + [mediapipe.ConstantSidePacketCalculatorOptions.ext]: { + packet { + string_value: "mediapipe/modules/hand_landmark/hand_landmark_full.tflite" + } + } + } + } + } + } +} + +# Loads the file in the specified path into a blob. +node { + calculator: "LocalFileContentsCalculator" + input_side_packet: "FILE_PATH:model_path" + output_side_packet: "CONTENTS:model_blob" + options: { + [mediapipe.LocalFileContentsCalculatorOptions.ext]: { + text_mode: false + } + } +} + +# Converts the input blob into a TF Lite model. +node { + calculator: "TfLiteModelCalculator" + input_side_packet: "MODEL_BLOB:model_blob" + output_side_packet: "MODEL:model" +} diff --git a/mediapipe/modules/hand_landmark/hand_landmark_tracking_cpu.pbtxt b/mediapipe/modules/hand_landmark/hand_landmark_tracking_cpu.pbtxt new file mode 100644 index 0000000..2ee8316 --- /dev/null +++ b/mediapipe/modules/hand_landmark/hand_landmark_tracking_cpu.pbtxt @@ -0,0 +1,271 @@ +# MediaPipe graph to detect/predict hand landmarks on CPU. +# +# The procedure is done in two steps: +# - locate palms/hands +# - detect landmarks for each palm/hand. +# This graph tries to skip palm detection as much as possible by reusing +# previously detected/predicted landmarks for new images. + +type: "HandLandmarkTrackingCpu" + +# CPU image. (ImageFrame) +input_stream: "IMAGE:image" + +# Max number of hands to detect/track. (int) +input_side_packet: "NUM_HANDS:num_hands" + +# Complexity of hand landmark and palm detection models: 0 or 1. Accuracy as +# well as inference latency generally go up with the model complexity. If +# unspecified, functions as set to 1. (int) +input_side_packet: "MODEL_COMPLEXITY:model_complexity" + +# Whether landmarks on the previous image should be used to help localize +# landmarks on the current image. (bool) +input_side_packet: "USE_PREV_LANDMARKS:use_prev_landmarks" + +# Collection of detected/predicted hands, each represented as a list of +# landmarks. (std::vector) +# NOTE: there will not be an output packet in the LANDMARKS stream for this +# particular timestamp if none of hands detected. However, the MediaPipe +# framework will internally inform the downstream calculators of the absence of +# this packet so that they don't wait for it unnecessarily. +output_stream: "LANDMARKS:multi_hand_landmarks" + +# Collection of detected/predicted hand world landmarks. +# (std::vector) +# +# World landmarks are real-world 3D coordinates in meters with the origin in the +# center of the hand bounding box calculated from the landmarks. +# +# WORLD_LANDMARKS shares the same landmark topology as LANDMARKS. However, +# LANDMARKS provides coordinates (in pixels) of a 3D object projected onto the +# 2D image surface, while WORLD_LANDMARKS provides coordinates (in meters) of +# the 3D object itself. +output_stream: "WORLD_LANDMARKS:multi_hand_world_landmarks" + +# Collection of handedness of the detected hands (i.e. is hand left or right), +# each represented as a ClassificationList proto with a single Classification +# entry. (std::vector) +# Note that handedness is determined assuming the input image is mirrored, +# i.e., taken with a front-facing/selfie camera with images flipped +# horizontally. +output_stream: "HANDEDNESS:multi_handedness" + +# Extra outputs (for debugging, for instance). +# Detected palms. (std::vector) +output_stream: "PALM_DETECTIONS:palm_detections" +# Regions of interest calculated based on landmarks. +# (std::vector) +output_stream: "HAND_ROIS_FROM_LANDMARKS:hand_rects" +# Regions of interest calculated based on palm detections. +# (std::vector) +output_stream: "HAND_ROIS_FROM_PALM_DETECTIONS:hand_rects_from_palm_detections" + +# When the optional input side packet "use_prev_landmarks" is either absent or +# set to true, uses the landmarks on the previous image to help localize +# landmarks on the current image. +node { + calculator: "GateCalculator" + input_side_packet: "ALLOW:use_prev_landmarks" + input_stream: "prev_hand_rects_from_landmarks" + output_stream: "gated_prev_hand_rects_from_landmarks" + options: { + [mediapipe.GateCalculatorOptions.ext] { + allow: true + } + } +} + +# Determines if an input vector of NormalizedRect has a size greater than or +# equal to the provided num_hands. +node { + calculator: "NormalizedRectVectorHasMinSizeCalculator" + input_stream: "ITERABLE:gated_prev_hand_rects_from_landmarks" + input_side_packet: "num_hands" + output_stream: "prev_has_enough_hands" +} + +# Drops the incoming image if enough hands have already been identified from the +# previous image. Otherwise, passes the incoming image through to trigger a new +# round of palm detection. +node { + calculator: "GateCalculator" + input_stream: "image" + input_stream: "DISALLOW:prev_has_enough_hands" + output_stream: "palm_detection_image" + options: { + [mediapipe.GateCalculatorOptions.ext] { + empty_packets_as_allow: true + } + } +} + +# Detects palms. +node { + calculator: "PalmDetectionCpu" + input_side_packet: "MODEL_COMPLEXITY:model_complexity" + input_stream: "IMAGE:palm_detection_image" + output_stream: "DETECTIONS:all_palm_detections" +} + +# Makes sure there are no more detections than the provided num_hands. +node { + calculator: "ClipDetectionVectorSizeCalculator" + input_stream: "all_palm_detections" + output_stream: "palm_detections" + input_side_packet: "num_hands" +} + +# Extracts image size. +node { + calculator: "ImagePropertiesCalculator" + input_stream: "IMAGE:palm_detection_image" + output_stream: "SIZE:palm_detection_image_size" +} + +# Outputs each element of palm_detections at a fake timestamp for the rest of +# the graph to process. Clones the image size packet for each palm_detection at +# the fake timestamp. At the end of the loop, outputs the BATCH_END timestamp +# for downstream calculators to inform them that all elements in the vector have +# been processed. +node { + calculator: "BeginLoopDetectionCalculator" + input_stream: "ITERABLE:palm_detections" + input_stream: "CLONE:palm_detection_image_size" + output_stream: "ITEM:palm_detection" + output_stream: "CLONE:image_size_for_palms" + output_stream: "BATCH_END:palm_detections_timestamp" +} + +# Calculates region of interest (ROI) based on the specified palm. +node { + calculator: "PalmDetectionDetectionToRoi" + input_stream: "DETECTION:palm_detection" + input_stream: "IMAGE_SIZE:image_size_for_palms" + output_stream: "ROI:hand_rect_from_palm_detection" +} + +# Collects a NormalizedRect for each hand into a vector. Upon receiving the +# BATCH_END timestamp, outputs the vector of NormalizedRect at the BATCH_END +# timestamp. +node { + calculator: "EndLoopNormalizedRectCalculator" + input_stream: "ITEM:hand_rect_from_palm_detection" + input_stream: "BATCH_END:palm_detections_timestamp" + output_stream: "ITERABLE:hand_rects_from_palm_detections" +} + +# Performs association between NormalizedRect vector elements from previous +# image and rects based on palm detections from the current image. This +# calculator ensures that the output hand_rects vector doesn't contain +# overlapping regions based on the specified min_similarity_threshold. +node { + calculator: "AssociationNormRectCalculator" + input_stream: "hand_rects_from_palm_detections" + input_stream: "gated_prev_hand_rects_from_landmarks" + output_stream: "hand_rects" + options: { + [mediapipe.AssociationCalculatorOptions.ext] { + min_similarity_threshold: 0.5 + } + } +} + +# Extracts image size. +node { + calculator: "ImagePropertiesCalculator" + input_stream: "IMAGE_CPU:image" + output_stream: "SIZE:image_size" +} + +# Outputs each element of hand_rects at a fake timestamp for the rest of the +# graph to process. Clones image and image size packets for each +# single_hand_rect at the fake timestamp. At the end of the loop, outputs the +# BATCH_END timestamp for downstream calculators to inform them that all +# elements in the vector have been processed. +node { + calculator: "BeginLoopNormalizedRectCalculator" + input_stream: "ITERABLE:hand_rects" + input_stream: "CLONE:0:image" + input_stream: "CLONE:1:image_size" + output_stream: "ITEM:single_hand_rect" + output_stream: "CLONE:0:image_for_landmarks" + output_stream: "CLONE:1:image_size_for_landmarks" + output_stream: "BATCH_END:hand_rects_timestamp" +} + +# Detect hand landmarks for the specific hand rect. +node { + calculator: "HandLandmarkCpu" + input_side_packet: "MODEL_COMPLEXITY:model_complexity" + input_stream: "IMAGE:image_for_landmarks" + input_stream: "ROI:single_hand_rect" + output_stream: "LANDMARKS:single_hand_landmarks" + output_stream: "WORLD_LANDMARKS:single_hand_world_landmarks" + output_stream: "HANDEDNESS:single_handedness" +} + +# Collects the handedness for each single hand into a vector. Upon receiving the +# BATCH_END timestamp, outputs a vector of ClassificationList at the BATCH_END +# timestamp. +node { + calculator: "EndLoopClassificationListCalculator" + input_stream: "ITEM:single_handedness" + input_stream: "BATCH_END:hand_rects_timestamp" + output_stream: "ITERABLE:multi_handedness" +} + +# Calculate region of interest (ROI) based on detected hand landmarks to reuse +# on the subsequent runs of the graph. +node { + calculator: "HandLandmarkLandmarksToRoi" + input_stream: "IMAGE_SIZE:image_size_for_landmarks" + input_stream: "LANDMARKS:single_hand_landmarks" + output_stream: "ROI:single_hand_rect_from_landmarks" +} + +# Collects a set of landmarks for each hand into a vector. Upon receiving the +# BATCH_END timestamp, outputs the vector of landmarks at the BATCH_END +# timestamp. +node { + calculator: "EndLoopNormalizedLandmarkListVectorCalculator" + input_stream: "ITEM:single_hand_landmarks" + input_stream: "BATCH_END:hand_rects_timestamp" + output_stream: "ITERABLE:multi_hand_landmarks" +} + +# Collects a set of world landmarks for each hand into a vector. Upon receiving +# the BATCH_END timestamp, outputs the vector of landmarks at the BATCH_END +# timestamp. +node { + calculator: "EndLoopLandmarkListVectorCalculator" + input_stream: "ITEM:single_hand_world_landmarks" + input_stream: "BATCH_END:hand_rects_timestamp" + output_stream: "ITERABLE:multi_hand_world_landmarks" +} + +# Collects a NormalizedRect for each hand into a vector. Upon receiving the +# BATCH_END timestamp, outputs the vector of NormalizedRect at the BATCH_END +# timestamp. +node { + calculator: "EndLoopNormalizedRectCalculator" + input_stream: "ITEM:single_hand_rect_from_landmarks" + input_stream: "BATCH_END:hand_rects_timestamp" + output_stream: "ITERABLE:hand_rects_from_landmarks" +} + +# Caches hand rects calculated from landmarks, and upon the arrival of the next +# input image, sends out the cached rects with timestamps replaced by that of +# the input image, essentially generating a packet that carries the previous +# hand rects. Note that upon the arrival of the very first input image, a +# timestamp bound update occurs to jump start the feedback loop. +node { + calculator: "PreviousLoopbackCalculator" + input_stream: "MAIN:image" + input_stream: "LOOP:hand_rects_from_landmarks" + input_stream_info: { + tag_index: "LOOP" + back_edge: true + } + output_stream: "PREV_LOOP:prev_hand_rects_from_landmarks" +} diff --git a/mediapipe/modules/hand_landmark/hand_landmark_tracking_cpu_image.pbtxt b/mediapipe/modules/hand_landmark/hand_landmark_tracking_cpu_image.pbtxt new file mode 100644 index 0000000..0bdabb9 --- /dev/null +++ b/mediapipe/modules/hand_landmark/hand_landmark_tracking_cpu_image.pbtxt @@ -0,0 +1,116 @@ +# MediaPipe graph to detect/predict hand landmarks on CPU. +# +# The procedure is done in two steps: +# - locate palms/hands +# - detect landmarks for each palm/hand. +# This graph tries to skip palm detection as much as possible by reusing +# previously detected/predicted landmarks for new images. + +type: "HandLandmarkTrackingCpuImage" + +# Input image. (Image) +input_stream: "IMAGE:image" + +# Max number of hands to detect/track. (int) +input_side_packet: "NUM_HANDS:num_hands" + +# Complexity of hand landmark and palm detection models: 0 or 1. Accuracy as +# well as inference latency generally go up with the model complexity. If +# unspecified, functions as set to 1. (int) +input_side_packet: "MODEL_COMPLEXITY:model_complexity" + +# Whether landmarks on the previous image should be used to help localize +# landmarks on the current image. (bool) +input_side_packet: "USE_PREV_LANDMARKS:use_prev_landmarks" + +# The throttled input image. (Image) +output_stream: "IMAGE:throttled_image" + +# Collection of detected/predicted hands, each represented as a list of +# landmarks. (std::vector) +# NOTE: there will not be an output packet in the LANDMARKS stream for this +# particular timestamp if none of hands detected. However, the MediaPipe +# framework will internally inform the downstream calculators of the absence of +# this packet so that they don't wait for it unnecessarily. +output_stream: "LANDMARKS:multi_hand_landmarks" + +# Collection of detected/predicted hand world landmarks. +# (std::vector) +# +# World landmarks are real-world 3D coordinates in meters with the origin in the +# center of the hand bounding box calculated from the landmarks. +# +# WORLD_LANDMARKS shares the same landmark topology as LANDMARKS. However, +# LANDMARKS provides coordinates (in pixels) of a 3D object projected onto the +# 2D image surface, while WORLD_LANDMARKS provides coordinates (in meters) of +# the 3D object itself. +output_stream: "WORLD_LANDMARKS:multi_hand_world_landmarks" + +# Collection of handedness of the detected hands (i.e. is hand left or right), +# each represented as a ClassificationList proto with a single Classification +# entry. (std::vector) +# Note that handedness is determined assuming the input image is mirrored, +# i.e., taken with a front-facing/selfie camera with images flipped +# horizontally. +output_stream: "HANDEDNESS:multi_handedness" + +# Extra outputs (for debugging, for instance). +# Detected palms. (std::vector) +output_stream: "PALM_DETECTIONS:palm_detections" +# Regions of interest calculated based on landmarks. +# (std::vector) +output_stream: "HAND_ROIS_FROM_LANDMARKS:hand_rects" +# Regions of interest calculated based on palm detections. +# (std::vector) +output_stream: "HAND_ROIS_FROM_PALM_DETECTIONS:hand_rects_from_palm_detections" + +node { + calculator: "FlowLimiterCalculator" + input_stream: "image" + input_stream: "FINISHED:multi_hand_landmarks" + input_stream_info: { + tag_index: "FINISHED" + back_edge: true + } + output_stream: "throttled_image" + options: { + [mediapipe.FlowLimiterCalculatorOptions.ext] { + max_in_flight: 1 + max_in_queue: 1 + } + } +} + +# Converts Image to ImageFrame for HandLandmarkTrackingCpu to consume. +node { + calculator: "FromImageCalculator" + input_stream: "IMAGE:throttled_image" + output_stream: "IMAGE_CPU:raw_image_frame" + output_stream: "SOURCE_ON_GPU:is_gpu_image" +} + +# TODO: Remove the extra flipping once adopting MlImage. +# If the source images are on gpu, flip the data vertically before sending them +# into HandLandmarkTrackingCpu. This maybe needed because OpenGL represents +# images assuming the image origin is at the bottom-left corner, whereas +# MediaPipe in general assumes the image origin is at the top-left corner. +node: { + calculator: "ImageTransformationCalculator" + input_stream: "IMAGE:raw_image_frame" + input_stream: "FLIP_VERTICALLY:is_gpu_image" + output_stream: "IMAGE:image_frame" +} + +node { + calculator: "HandLandmarkTrackingCpu" + input_stream: "IMAGE:image_frame" + input_side_packet: "NUM_HANDS:num_hands" + input_side_packet: "MODEL_COMPLEXITY:model_complexity" + input_side_packet: "USE_PREV_LANDMARKS:use_prev_landmarks" + output_stream: "LANDMARKS:multi_hand_landmarks" + output_stream: "WORLD_LANDMARKS:multi_hand_world_landmarks" + output_stream: "HANDEDNESS:multi_handedness" + output_stream: "PALM_DETECTIONS:palm_detections" + output_stream: "HAND_ROIS_FROM_LANDMARKS:hand_rects" + output_stream: "HAND_ROIS_FROM_PALM_DETECTIONS:hand_rects_from_palm_detections" +} diff --git a/mediapipe/modules/hand_landmark/hand_landmark_tracking_gpu.pbtxt b/mediapipe/modules/hand_landmark/hand_landmark_tracking_gpu.pbtxt new file mode 100644 index 0000000..da76f4a --- /dev/null +++ b/mediapipe/modules/hand_landmark/hand_landmark_tracking_gpu.pbtxt @@ -0,0 +1,272 @@ +# MediaPipe graph to detect/predict hand landmarks on GPU. +# +# The procedure is done in two steps: +# - locate palms/hands +# - detect landmarks for each palm/hand. +# This graph tries to skip palm detection as much as possible by reusing +# previously detected/predicted landmarks for new images. + +type: "HandLandmarkTrackingGpu" + +# GPU image. (GpuBuffer) +input_stream: "IMAGE:image" + +# Max number of hands to detect/track. (int) +input_side_packet: "NUM_HANDS:num_hands" + +# Complexity of hand landmark and palm detection models: 0 or 1. Accuracy as +# well as inference latency generally go up with the model complexity. If +# unspecified, functions as set to 1. (int) +input_side_packet: "MODEL_COMPLEXITY:model_complexity" + +# Whether landmarks on the previous image should be used to help localize +# landmarks on the current image. (bool) +input_side_packet: "USE_PREV_LANDMARKS:use_prev_landmarks" + +# Collection of detected/predicted hands, each represented as a list of +# landmarks. (std::vector) +# NOTE: there will not be an output packet in the LANDMARKS stream for this +# particular timestamp if none of hands detected. However, the MediaPipe +# framework will internally inform the downstream calculators of the absence of +# this packet so that they don't wait for it unnecessarily. +output_stream: "LANDMARKS:multi_hand_landmarks" + +# Collection of detected/predicted hand world landmarks. +# (std::vector) +# +# World landmarks are real-world 3D coordinates in meters with the origin in the +# center of the hand bounding box calculated from the landmarks. +# +# WORLD_LANDMARKS shares the same landmark topology as LANDMARKS. However, +# LANDMARKS provides coordinates (in pixels) of a 3D object projected onto the +# 2D image surface, while WORLD_LANDMARKS provides coordinates (in meters) of +# the 3D object itself. +output_stream: "WORLD_LANDMARKS:multi_hand_world_landmarks" + +# Collection of handedness of the detected hands (i.e. is hand left or right), +# each represented as a ClassificationList proto with a single Classification +# entry. (std::vector) +# Note that handedness is determined assuming the input image is mirrored, +# i.e., taken with a front-facing/selfie camera with images flipped +# horizontally. +output_stream: "HANDEDNESS:multi_handedness" + +# Extra outputs (for debugging, for instance). +# Detected palms. (std::vector) +output_stream: "PALM_DETECTIONS:palm_detections" +# Regions of interest calculated based on landmarks. +# (std::vector) +output_stream: "HAND_ROIS_FROM_LANDMARKS:hand_rects" +# Regions of interest calculated based on palm detections. +# (std::vector) +output_stream: "HAND_ROIS_FROM_PALM_DETECTIONS:hand_rects_from_palm_detections" + +# When the optional input side packet "use_prev_landmarks" is either absent or +# set to true, uses the landmarks on the previous image to help localize +# landmarks on the current image. +node { + calculator: "GateCalculator" + input_side_packet: "ALLOW:use_prev_landmarks" + input_stream: "prev_hand_rects_from_landmarks" + output_stream: "gated_prev_hand_rects_from_landmarks" + options: { + [mediapipe.GateCalculatorOptions.ext] { + allow: true + } + } +} + +# Determines if an input vector of NormalizedRect has a size greater than or +# equal to the provided num_hands. +node { + calculator: "NormalizedRectVectorHasMinSizeCalculator" + input_stream: "ITERABLE:gated_prev_hand_rects_from_landmarks" + input_side_packet: "num_hands" + output_stream: "prev_has_enough_hands" +} + +# Drops the incoming image if enough hands have already been identified from the +# previous image. Otherwise, passes the incoming image through to trigger a new +# round of palm detection. +node { + calculator: "GateCalculator" + input_stream: "image" + input_stream: "DISALLOW:prev_has_enough_hands" + output_stream: "palm_detection_image" + options: { + [mediapipe.GateCalculatorOptions.ext] { + empty_packets_as_allow: true + } + } +} + +# Detects palms. +node { + calculator: "PalmDetectionGpu" + input_side_packet: "MODEL_COMPLEXITY:model_complexity" + input_stream: "IMAGE:palm_detection_image" + output_stream: "DETECTIONS:all_palm_detections" +} + +# Makes sure there are no more detections than provided num_hands. +node { + calculator: "ClipDetectionVectorSizeCalculator" + input_stream: "all_palm_detections" + output_stream: "palm_detections" + input_side_packet: "num_hands" +} + +# Extracts image size. +node { + calculator: "ImagePropertiesCalculator" + input_stream: "IMAGE_GPU:palm_detection_image" + output_stream: "SIZE:palm_detection_image_size" +} + +# Outputs each element of palm_detections at a fake timestamp for the rest of +# the graph to process. Clones the image_size packet for each palm_detection at +# the fake timestamp. At the end of the loop, outputs the BATCH_END timestamp +# for downstream calculators to inform them that all elements in the vector have +# been processed. +node { + calculator: "BeginLoopDetectionCalculator" + input_stream: "ITERABLE:palm_detections" + input_stream: "CLONE:palm_detection_image_size" + output_stream: "ITEM:palm_detection" + output_stream: "CLONE:image_size_for_palms" + output_stream: "BATCH_END:palm_detections_timestamp" +} + +# Calculates region of interest (ROI) base on the specified palm. +node { + calculator: "PalmDetectionDetectionToRoi" + input_stream: "DETECTION:palm_detection" + input_stream: "IMAGE_SIZE:image_size_for_palms" + output_stream: "ROI:hand_rect_from_palm_detection" +} + +# Collects a NormalizedRect for each hand into a vector. Upon receiving the +# BATCH_END timestamp, outputs the vector of NormalizedRect at the BATCH_END +# timestamp. +node { + name: "EndLoopForPalmDetections" + calculator: "EndLoopNormalizedRectCalculator" + input_stream: "ITEM:hand_rect_from_palm_detection" + input_stream: "BATCH_END:palm_detections_timestamp" + output_stream: "ITERABLE:hand_rects_from_palm_detections" +} + +# Performs association between NormalizedRect vector elements from previous +# image and rects based on palm detections from the current image. This +# calculator ensures that the output hand_rects vector doesn't contain +# overlapping regions based on the specified min_similarity_threshold. +node { + calculator: "AssociationNormRectCalculator" + input_stream: "hand_rects_from_palm_detections" + input_stream: "gated_prev_hand_rects_from_landmarks" + output_stream: "hand_rects" + options: { + [mediapipe.AssociationCalculatorOptions.ext] { + min_similarity_threshold: 0.5 + } + } +} + +# Extracts image size. +node { + calculator: "ImagePropertiesCalculator" + input_stream: "IMAGE_GPU:image" + output_stream: "SIZE:image_size" +} + +# Outputs each element of hand_rects at a fake timestamp for the rest of the +# graph to process. Clones image and image size packets for each +# single_hand_rect at the fake timestamp. At the end of the loop, outputs the +# BATCH_END timestamp for downstream calculators to inform them that all +# elements in the vector have been processed. +node { + calculator: "BeginLoopNormalizedRectCalculator" + input_stream: "ITERABLE:hand_rects" + input_stream: "CLONE:0:image" + input_stream: "CLONE:1:image_size" + output_stream: "ITEM:single_hand_rect" + output_stream: "CLONE:0:image_for_landmarks" + output_stream: "CLONE:1:image_size_for_landmarks" + output_stream: "BATCH_END:hand_rects_timestamp" +} + +# Detect hand landmarks for the specific hand rect. +node { + calculator: "HandLandmarkGpu" + input_side_packet: "MODEL_COMPLEXITY:model_complexity" + input_stream: "IMAGE:image_for_landmarks" + input_stream: "ROI:single_hand_rect" + output_stream: "LANDMARKS:single_hand_landmarks" + output_stream: "WORLD_LANDMARKS:single_hand_world_landmarks" + output_stream: "HANDEDNESS:single_handedness" +} + +# Collects the handedness for each single hand into a vector. Upon receiving the +# BATCH_END timestamp, outputs a vector of ClassificationList at the BATCH_END +# timestamp. +node { + calculator: "EndLoopClassificationListCalculator" + input_stream: "ITEM:single_handedness" + input_stream: "BATCH_END:hand_rects_timestamp" + output_stream: "ITERABLE:multi_handedness" +} + +# Calculate region of interest (ROI) based on detected hand landmarks to reuse +# on the subsequent runs of the graph. +node { + calculator: "HandLandmarkLandmarksToRoi" + input_stream: "IMAGE_SIZE:image_size_for_landmarks" + input_stream: "LANDMARKS:single_hand_landmarks" + output_stream: "ROI:single_hand_rect_from_landmarks" +} + +# Collects a set of landmarks for each hand into a vector. Upon receiving the +# BATCH_END timestamp, outputs the vector of landmarks at the BATCH_END +# timestamp. +node { + calculator: "EndLoopNormalizedLandmarkListVectorCalculator" + input_stream: "ITEM:single_hand_landmarks" + input_stream: "BATCH_END:hand_rects_timestamp" + output_stream: "ITERABLE:multi_hand_landmarks" +} + +# Collects a set of world landmarks for each hand into a vector. Upon receiving +# the BATCH_END timestamp, outputs the vector of landmarks at the BATCH_END +# timestamp. +node { + calculator: "EndLoopLandmarkListVectorCalculator" + input_stream: "ITEM:single_hand_world_landmarks" + input_stream: "BATCH_END:hand_rects_timestamp" + output_stream: "ITERABLE:multi_hand_world_landmarks" +} + +# Collects a NormalizedRect for each hand into a vector. Upon receiving the +# BATCH_END timestamp, outputs the vector of NormalizedRect at the BATCH_END +# timestamp. +node { + calculator: "EndLoopNormalizedRectCalculator" + input_stream: "ITEM:single_hand_rect_from_landmarks" + input_stream: "BATCH_END:hand_rects_timestamp" + output_stream: "ITERABLE:hand_rects_from_landmarks" +} + +# Caches hand rects calculated from landmarks, and upon the arrival of the next +# input image, sends out the cached rects with timestamps replaced by that of +# the input image, essentially generating a packet that carries the previous +# hand rects. Note that upon the arrival of the very first input image, a +# timestamp bound update occurs to jump start the feedback loop. +node { + calculator: "PreviousLoopbackCalculator" + input_stream: "MAIN:image" + input_stream: "LOOP:hand_rects_from_landmarks" + input_stream_info: { + tag_index: "LOOP" + back_edge: true + } + output_stream: "PREV_LOOP:prev_hand_rects_from_landmarks" +} diff --git a/mediapipe/modules/hand_landmark/hand_landmark_tracking_gpu_image.pbtxt b/mediapipe/modules/hand_landmark/hand_landmark_tracking_gpu_image.pbtxt new file mode 100644 index 0000000..8b8e466 --- /dev/null +++ b/mediapipe/modules/hand_landmark/hand_landmark_tracking_gpu_image.pbtxt @@ -0,0 +1,115 @@ +# MediaPipe graph to detect/predict hand landmarks on GPU. +# +# The procedure is done in two steps: +# - locate palms/hands +# - detect landmarks for each palm/hand. +# This graph tries to skip palm detection as much as possible by reusing +# previously detected/predicted landmarks for new images. + +type: "HandLandmarkTrackingGpuImage" + +# Input image. (Image) +input_stream: "IMAGE:image" + +# Max number of hands to detect/track. (int) +input_side_packet: "NUM_HANDS:num_hands" + +# Complexity of hand landmark and palm detection models: 0 or 1. Accuracy as +# well as inference latency generally go up with the model complexity. If +# unspecified, functions as set to 1. (int) +input_side_packet: "MODEL_COMPLEXITY:model_complexity" + +# Whether landmarks on the previous image should be used to help localize +# landmarks on the current image. (bool) +input_side_packet: "USE_PREV_LANDMARKS:use_prev_landmarks" + +# Collection of detected/predicted hands, each represented as a list of +# landmarks. (std::vector) +# NOTE: there will not be an output packet in the LANDMARKS stream for this +# particular timestamp if none of hands detected. However, the MediaPipe +# framework will internally inform the downstream calculators of the absence of +# this packet so that they don't wait for it unnecessarily. +output_stream: "LANDMARKS:multi_hand_landmarks" + +# Collection of detected/predicted hand world landmarks. +# (std::vector) +# +# World landmarks are real-world 3D coordinates in meters with the origin in the +# center of the hand bounding box calculated from the landmarks. +# +# WORLD_LANDMARKS shares the same landmark topology as LANDMARKS. However, +# LANDMARKS provides coordinates (in pixels) of a 3D object projected onto the +# 2D image surface, while WORLD_LANDMARKS provides coordinates (in meters) of +# the 3D object itself. +output_stream: "WORLD_LANDMARKS:multi_hand_world_landmarks" + +# Collection of handedness of the detected hands (i.e. is hand left or right), +# each represented as a ClassificationList proto with a single Classification +# entry. (std::vector) +# Note that handedness is determined assuming the input image is mirrored, +# i.e., taken with a front-facing/selfie camera with images flipped +# horizontally. +output_stream: "HANDEDNESS:multi_handedness" + +# The throttled input image. (Image) +output_stream: "IMAGE:throttled_image" +# Extra outputs (for debugging, for instance). +# Detected palms. (std::vector) +output_stream: "PALM_DETECTIONS:palm_detections" +# Regions of interest calculated based on landmarks. +# (std::vector) +output_stream: "HAND_ROIS_FROM_LANDMARKS:hand_rects" +# Regions of interest calculated based on palm detections. +# (std::vector) +output_stream: "HAND_ROIS_FROM_PALM_DETECTIONS:hand_rects_from_palm_detections" + +node { + calculator: "FlowLimiterCalculator" + input_stream: "image" + input_stream: "FINISHED:multi_hand_landmarks" + input_stream_info: { + tag_index: "FINISHED" + back_edge: true + } + output_stream: "throttled_image" + options: { + [mediapipe.FlowLimiterCalculatorOptions.ext] { + max_in_flight: 1 + max_in_queue: 1 + } + } +} + +# Converts Image to GpuBuffer for HandLandmarkTrackingGpu to consume. +node { + calculator: "FromImageCalculator" + input_stream: "IMAGE:throttled_image" + output_stream: "IMAGE_GPU:raw_gpu_buffer" + output_stream: "SOURCE_ON_GPU:is_gpu_image" +} + +# TODO: Remove the extra flipping once adopting MlImage. +# If the source images are on gpu, flip the data vertically before sending them +# into HandLandmarkTrackingGpu. This maybe needed because OpenGL represents +# images assuming the image origin is at the bottom-left corner, whereas +# MediaPipe in general assumes the image origin is at the top-left corner. +node: { + calculator: "ImageTransformationCalculator" + input_stream: "IMAGE_GPU:raw_gpu_buffer" + input_stream: "FLIP_VERTICALLY:is_gpu_image" + output_stream: "IMAGE_GPU:gpu_buffer" +} + +node { + calculator: "HandLandmarkTrackingGpu" + input_stream: "IMAGE:gpu_buffer" + input_side_packet: "NUM_HANDS:num_hands" + input_side_packet: "MODEL_COMPLEXITY:model_complexity" + input_side_packet: "USE_PREV_LANDMARKS:use_prev_landmarks" + output_stream: "LANDMARKS:multi_hand_landmarks" + output_stream: "WORLD_LANDMARKS:multi_hand_world_landmarks" + output_stream: "HANDEDNESS:multi_handedness" + output_stream: "PALM_DETECTIONS:palm_detections" + output_stream: "HAND_ROIS_FROM_LANDMARKS:hand_rects" + output_stream: "HAND_ROIS_FROM_PALM_DETECTIONS:hand_rects_from_palm_detections" +} diff --git a/mediapipe/modules/hand_landmark/handedness.txt b/mediapipe/modules/hand_landmark/handedness.txt new file mode 100644 index 0000000..9f636db --- /dev/null +++ b/mediapipe/modules/hand_landmark/handedness.txt @@ -0,0 +1,2 @@ +Left +Right diff --git a/mediapipe/modules/hand_landmark/palm_detection_detection_to_roi.pbtxt b/mediapipe/modules/hand_landmark/palm_detection_detection_to_roi.pbtxt new file mode 100644 index 0000000..838633b --- /dev/null +++ b/mediapipe/modules/hand_landmark/palm_detection_detection_to_roi.pbtxt @@ -0,0 +1,47 @@ +# MediaPipe subgraph that calculates hand ROI from palm detection. + +type: "PalmDetectionDetectionToRoi" + +# Palm detection. (Detection) +input_stream: "DETECTION:detection" +# Frame size. (std::pair) +input_stream: "IMAGE_SIZE:image_size" + +# ROI (region of interest) according to landmarks, represented as normalized +# rect. (NormalizedRect) +output_stream: "ROI:roi" + +# Converts results of palm detection into a rectangle (normalized by image size) +# that encloses the palm and is rotated such that the line connecting center of +# the wrist and MCP of the middle finger is aligned with the Y-axis of the +# rectangle. +node { + calculator: "DetectionsToRectsCalculator" + input_stream: "DETECTION:detection" + input_stream: "IMAGE_SIZE:image_size" + output_stream: "NORM_RECT:raw_roi" + options: { + [mediapipe.DetectionsToRectsCalculatorOptions.ext] { + rotation_vector_start_keypoint_index: 0 # Center of wrist. + rotation_vector_end_keypoint_index: 2 # MCP of middle finger. + rotation_vector_target_angle_degrees: 90 + } + } +} + +# Expands and shifts the rectangle that contains the palm so that it's likely +# to cover the entire hand. +node { + calculator: "RectTransformationCalculator" + input_stream: "NORM_RECT:raw_roi" + input_stream: "IMAGE_SIZE:image_size" + output_stream: "roi" + options: { + [mediapipe.RectTransformationCalculatorOptions.ext] { + scale_x: 2.6 + scale_y: 2.6 + shift_y: -0.5 + square_long: true + } + } +} diff --git a/mediapipe/modules/holistic_landmark/BUILD b/mediapipe/modules/holistic_landmark/BUILD new file mode 100644 index 0000000..44854c0 --- /dev/null +++ b/mediapipe/modules/holistic_landmark/BUILD @@ -0,0 +1,267 @@ +# Copyright 2020 The MediaPipe Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +load("//mediapipe/framework/tool:mediapipe_graph.bzl", "mediapipe_simple_subgraph") + +# TODO: revert to private. +package(default_visibility = ["//visibility:public"]) + +licenses(["notice"]) + +exports_files([ + "hand_recrop.tflite", +]) + +mediapipe_simple_subgraph( + name = "face_landmarks_from_pose_gpu", + graph = "face_landmarks_from_pose_gpu.pbtxt", + register_as = "FaceLandmarksFromPoseGpu", + deps = [ + ":face_detection_front_detections_to_roi", + ":face_landmarks_from_pose_to_recrop_roi", + ":face_tracking", + "//mediapipe/calculators/core:split_landmarks_calculator", + "//mediapipe/calculators/image:image_properties_calculator", + "//mediapipe/modules/face_detection:face_detection_short_range_by_roi_gpu", + "//mediapipe/modules/face_landmark:face_landmark_gpu", + ], +) + +mediapipe_simple_subgraph( + name = "face_landmarks_from_pose_cpu", + graph = "face_landmarks_from_pose_cpu.pbtxt", + register_as = "FaceLandmarksFromPoseCpu", + deps = [ + ":face_detection_front_detections_to_roi", + ":face_landmarks_from_pose_to_recrop_roi", + ":face_tracking", + "//mediapipe/calculators/core:split_landmarks_calculator", + "//mediapipe/calculators/image:image_properties_calculator", + "//mediapipe/modules/face_detection:face_detection_short_range_by_roi_cpu", + "//mediapipe/modules/face_landmark:face_landmark_cpu", + ], +) + +mediapipe_simple_subgraph( + name = "face_landmarks_to_roi", + graph = "face_landmarks_to_roi.pbtxt", + register_as = "FaceLandmarksToRoi", + deps = [ + "//mediapipe/calculators/util:detections_to_rects_calculator", + "//mediapipe/calculators/util:landmarks_to_detection_calculator", + "//mediapipe/calculators/util:rect_transformation_calculator", + ], +) + +mediapipe_simple_subgraph( + name = "face_detection_front_detections_to_roi", + graph = "face_detection_front_detections_to_roi.pbtxt", + register_as = "FaceDetectionFrontDetectionsToRoi", + deps = [ + "//mediapipe/calculators/util:detections_to_rects_calculator", + "//mediapipe/calculators/util:rect_transformation_calculator", + ], +) + +mediapipe_simple_subgraph( + name = "face_tracking", + graph = "face_tracking.pbtxt", + register_as = "FaceTracking", + deps = [ + ":face_landmarks_to_roi", + "//mediapipe/calculators/core:previous_loopback_calculator", + "//mediapipe/modules/holistic_landmark/calculators:roi_tracking_calculator", + ], +) + +mediapipe_simple_subgraph( + name = "face_landmarks_from_pose_to_recrop_roi", + graph = "face_landmarks_from_pose_to_recrop_roi.pbtxt", + register_as = "FaceLandmarksFromPoseToRecropRoi", + deps = [ + "//mediapipe/calculators/util:detections_to_rects_calculator", + "//mediapipe/calculators/util:landmarks_to_detection_calculator", + "//mediapipe/calculators/util:rect_transformation_calculator", + ], +) + +mediapipe_simple_subgraph( + name = "hand_landmarks_from_pose_gpu", + graph = "hand_landmarks_from_pose_gpu.pbtxt", + register_as = "HandLandmarksFromPoseGpu", + deps = [ + ":hand_landmarks_from_pose_to_recrop_roi", + ":hand_recrop_by_roi_gpu", + ":hand_tracking", + ":hand_visibility_from_hand_landmarks_from_pose", + "//mediapipe/calculators/core:gate_calculator", + "//mediapipe/calculators/image:image_properties_calculator", + "//mediapipe/modules/hand_landmark:hand_landmark_gpu", + ], +) + +mediapipe_simple_subgraph( + name = "hand_landmarks_from_pose_cpu", + graph = "hand_landmarks_from_pose_cpu.pbtxt", + register_as = "HandLandmarksFromPoseCpu", + deps = [ + ":hand_landmarks_from_pose_to_recrop_roi", + ":hand_recrop_by_roi_cpu", + ":hand_tracking", + ":hand_visibility_from_hand_landmarks_from_pose", + "//mediapipe/calculators/core:gate_calculator", + "//mediapipe/calculators/image:image_properties_calculator", + "//mediapipe/modules/hand_landmark:hand_landmark_cpu", + ], +) + +mediapipe_simple_subgraph( + name = "hand_landmarks_to_roi", + graph = "hand_landmarks_to_roi.pbtxt", + register_as = "HandLandmarksToRoi", + deps = [ + "//mediapipe/calculators/core:split_landmarks_calculator", + "//mediapipe/calculators/util:rect_transformation_calculator", + "//mediapipe/modules/hand_landmark/calculators:hand_landmarks_to_rect_calculator", + ], +) + +mediapipe_simple_subgraph( + name = "hand_recrop_by_roi_gpu", + graph = "hand_recrop_by_roi_gpu.pbtxt", + register_as = "HandRecropByRoiGpu", + deps = [ + "//mediapipe/calculators/image:image_properties_calculator", + "//mediapipe/calculators/tensor:image_to_tensor_calculator", + "//mediapipe/calculators/tensor:inference_calculator", + "//mediapipe/calculators/tensor:tensors_to_landmarks_calculator", + "//mediapipe/calculators/util:alignment_points_to_rects_calculator", + "//mediapipe/calculators/util:landmark_letterbox_removal_calculator", + "//mediapipe/calculators/util:landmark_projection_calculator", + "//mediapipe/calculators/util:landmarks_to_detection_calculator", + "//mediapipe/calculators/util:rect_transformation_calculator", + ], +) + +mediapipe_simple_subgraph( + name = "hand_recrop_by_roi_cpu", + graph = "hand_recrop_by_roi_cpu.pbtxt", + register_as = "HandRecropByRoiCpu", + deps = [ + "//mediapipe/calculators/image:image_properties_calculator", + "//mediapipe/calculators/tensor:image_to_tensor_calculator", + "//mediapipe/calculators/tensor:inference_calculator", + "//mediapipe/calculators/tensor:tensors_to_landmarks_calculator", + "//mediapipe/calculators/util:alignment_points_to_rects_calculator", + "//mediapipe/calculators/util:landmark_letterbox_removal_calculator", + "//mediapipe/calculators/util:landmark_projection_calculator", + "//mediapipe/calculators/util:landmarks_to_detection_calculator", + "//mediapipe/calculators/util:rect_transformation_calculator", + ], +) + +mediapipe_simple_subgraph( + name = "hand_tracking", + graph = "hand_tracking.pbtxt", + register_as = "HandTracking", + deps = [ + ":hand_landmarks_to_roi", + "//mediapipe/calculators/core:previous_loopback_calculator", + "//mediapipe/modules/holistic_landmark/calculators:roi_tracking_calculator", + ], +) + +# TODO: parametrize holistic_landmark graph with visibility and make private. +mediapipe_simple_subgraph( + name = "hand_wrist_for_pose", + graph = "hand_wrist_for_pose.pbtxt", + register_as = "HandWristForPose", + visibility = ["//visibility:public"], + deps = [ + "//mediapipe/calculators/core:constant_side_packet_calculator", + "//mediapipe/calculators/core:side_packet_to_stream_calculator", + "//mediapipe/calculators/core:split_landmarks_calculator", + "//mediapipe/calculators/util:set_landmark_visibility_calculator", + ], +) + +mediapipe_simple_subgraph( + name = "hand_landmarks_left_and_right_gpu", + graph = "hand_landmarks_left_and_right_gpu.pbtxt", + register_as = "HandLandmarksLeftAndRightGpu", + deps = [ + ":hand_landmarks_from_pose_gpu", + "//mediapipe/calculators/core:split_landmarks_calculator", + ], +) + +mediapipe_simple_subgraph( + name = "hand_landmarks_left_and_right_cpu", + graph = "hand_landmarks_left_and_right_cpu.pbtxt", + register_as = "HandLandmarksLeftAndRightCpu", + deps = [ + ":hand_landmarks_from_pose_cpu", + "//mediapipe/calculators/core:split_landmarks_calculator", + ], +) + +mediapipe_simple_subgraph( + name = "hand_landmarks_from_pose_to_recrop_roi", + graph = "hand_landmarks_from_pose_to_recrop_roi.pbtxt", + register_as = "HandLandmarksFromPoseToRecropRoi", + deps = [ + "//mediapipe/calculators/util:landmarks_to_detection_calculator", + "//mediapipe/calculators/util:rect_transformation_calculator", + "//mediapipe/modules/holistic_landmark/calculators:hand_detections_from_pose_to_rects_calculator", + ], +) + +mediapipe_simple_subgraph( + name = "hand_visibility_from_hand_landmarks_from_pose", + graph = "hand_visibility_from_hand_landmarks_from_pose.pbtxt", + register_as = "HandVisibilityFromHandLandmarksFromPose", + deps = [ + "//mediapipe/calculators/core:split_landmarks_calculator", + "//mediapipe/calculators/util:landmark_visibility_calculator", + "//mediapipe/calculators/util:thresholding_calculator", + ], +) + +mediapipe_simple_subgraph( + name = "holistic_landmark_gpu", + graph = "holistic_landmark_gpu.pbtxt", + register_as = "HolisticLandmarkGpu", + visibility = ["//visibility:public"], + deps = [ + ":face_landmarks_from_pose_gpu", + ":hand_landmarks_left_and_right_gpu", + "//mediapipe/calculators/core:flow_limiter_calculator", + "//mediapipe/calculators/image:image_transformation_calculator", + "//mediapipe/modules/pose_landmark:pose_landmark_gpu", + ], +) + +mediapipe_simple_subgraph( + name = "holistic_landmark_cpu", + graph = "holistic_landmark_cpu.pbtxt", + register_as = "HolisticLandmarkCpu", + visibility = ["//visibility:public"], + deps = [ + ":face_landmarks_from_pose_cpu", + ":hand_landmarks_left_and_right_cpu", + "//mediapipe/calculators/core:flow_limiter_calculator", + "//mediapipe/calculators/image:image_transformation_calculator", + "//mediapipe/modules/pose_landmark:pose_landmark_cpu", + ], +) diff --git a/mediapipe/modules/holistic_landmark/README.md b/mediapipe/modules/holistic_landmark/README.md new file mode 100644 index 0000000..d285f15 --- /dev/null +++ b/mediapipe/modules/holistic_landmark/README.md @@ -0,0 +1,6 @@ +# holistic_landmark + +Subgraphs|Details +:--- | :--- +[`HolisticLandmarkCpu`](https://github.com/google/mediapipe/tree/master/mediapipe/modules/holistic_landmark/holistic_landmark_cpu.pbtxt)| Predicts pose + left/right hand + face landmarks. (CPU input) +[`HolisticLandmarkGpu`](https://github.com/google/mediapipe/tree/master/mediapipe/modules/holistic_landmark/holistic_landmark_gpu.pbtxt)| Predicts pose + left/right hand + face landmarks. (GPU input.) diff --git a/mediapipe/modules/holistic_landmark/calculators/BUILD b/mediapipe/modules/holistic_landmark/calculators/BUILD new file mode 100644 index 0000000..c3c0919 --- /dev/null +++ b/mediapipe/modules/holistic_landmark/calculators/BUILD @@ -0,0 +1,63 @@ +# Copyright 2020 The MediaPipe Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +load("//mediapipe/framework/port:build_config.bzl", "mediapipe_proto_library") + +licenses(["notice"]) + +package(default_visibility = ["//visibility:public"]) + +cc_library( + name = "hand_detections_from_pose_to_rects_calculator", + srcs = ["hand_detections_from_pose_to_rects_calculator.cc"], + visibility = ["//visibility:public"], + deps = [ + "//mediapipe/calculators/util:detections_to_rects_calculator", + "//mediapipe/calculators/util:detections_to_rects_calculator_cc_proto", + "//mediapipe/framework:calculator_framework", + "//mediapipe/framework:calculator_options_cc_proto", + "//mediapipe/framework/formats:detection_cc_proto", + "//mediapipe/framework/formats:location_data_cc_proto", + "//mediapipe/framework/formats:rect_cc_proto", + "//mediapipe/framework/port:ret_check", + "//mediapipe/framework/port:status", + ], + alwayslink = 1, +) + +mediapipe_proto_library( + name = "roi_tracking_calculator_proto", + srcs = ["roi_tracking_calculator.proto"], + visibility = ["//visibility:public"], + deps = [ + "//mediapipe/framework:calculator_options_proto", + "//mediapipe/framework:calculator_proto", + ], +) + +cc_library( + name = "roi_tracking_calculator", + srcs = ["roi_tracking_calculator.cc"], + visibility = ["//visibility:public"], + deps = [ + ":roi_tracking_calculator_cc_proto", + "//mediapipe/framework:calculator_framework", + "//mediapipe/framework/formats:landmark_cc_proto", + "//mediapipe/framework/formats:rect_cc_proto", + "//mediapipe/framework/port:logging", + "//mediapipe/framework/port:rectangle", + "@com_google_absl//absl/strings:str_format", + ], + alwayslink = 1, +) diff --git a/mediapipe/modules/holistic_landmark/calculators/hand_detections_from_pose_to_rects_calculator.cc b/mediapipe/modules/holistic_landmark/calculators/hand_detections_from_pose_to_rects_calculator.cc new file mode 100644 index 0000000..5afdb8a --- /dev/null +++ b/mediapipe/modules/holistic_landmark/calculators/hand_detections_from_pose_to_rects_calculator.cc @@ -0,0 +1,156 @@ +#include + +#include "mediapipe/calculators/util/detections_to_rects_calculator.h" +#include "mediapipe/calculators/util/detections_to_rects_calculator.pb.h" +#include "mediapipe/framework/calculator_framework.h" +#include "mediapipe/framework/calculator_options.pb.h" +#include "mediapipe/framework/formats/detection.pb.h" +#include "mediapipe/framework/formats/location_data.pb.h" +#include "mediapipe/framework/formats/rect.pb.h" +#include "mediapipe/framework/port/ret_check.h" +#include "mediapipe/framework/port/status.h" + +namespace mediapipe { + +namespace {} // namespace + +// Generates a hand ROI based on a hand detection derived from hand-related pose +// landmarks. +// +// Inputs: +// DETECTION - Detection. +// Detection to convert to ROI. Must contain 3 key points indicating: wrist, +// pinky and index fingers. +// +// IMAGE_SIZE - std::pair +// Image width and height. +// +// Outputs: +// NORM_RECT - NormalizedRect. +// ROI based on passed input. +// +// Examples +// node { +// calculator: "HandDetectionsFromPoseToRectsCalculator" +// input_stream: "DETECTION:hand_detection_from_pose" +// input_stream: "IMAGE_SIZE:image_size" +// output_stream: "NORM_RECT:hand_roi_from_pose" +// } +class HandDetectionsFromPoseToRectsCalculator + : public DetectionsToRectsCalculator { + public: + absl::Status Open(CalculatorContext* cc) override; + + private: + ::absl::Status DetectionToNormalizedRect(const Detection& detection, + const DetectionSpec& detection_spec, + NormalizedRect* rect) override; + absl::Status ComputeRotation(const Detection& detection, + const DetectionSpec& detection_spec, + float* rotation) override; +}; +REGISTER_CALCULATOR(HandDetectionsFromPoseToRectsCalculator); + +namespace { + +constexpr int kWrist = 0; +constexpr int kPinky = 1; +constexpr int kIndex = 2; + +constexpr char kImageSizeTag[] = "IMAGE_SIZE"; + +} // namespace + +::absl::Status HandDetectionsFromPoseToRectsCalculator::Open( + CalculatorContext* cc) { + RET_CHECK(cc->Inputs().HasTag(kImageSizeTag)) + << "Image size is required to calculate rotated rect."; + cc->SetOffset(TimestampDiff(0)); + target_angle_ = M_PI * 0.5f; + rotate_ = true; + options_ = cc->Options(); + output_zero_rect_for_empty_detections_ = + options_.output_zero_rect_for_empty_detections(); + + return ::absl::OkStatus(); +} + +::absl::Status +HandDetectionsFromPoseToRectsCalculator ::DetectionToNormalizedRect( + const Detection& detection, const DetectionSpec& detection_spec, + NormalizedRect* rect) { + const auto& location_data = detection.location_data(); + const auto& image_size = detection_spec.image_size; + RET_CHECK(image_size) << "Image size is required to calculate rotation"; + + const float x_wrist = + location_data.relative_keypoints(kWrist).x() * image_size->first; + const float y_wrist = + location_data.relative_keypoints(kWrist).y() * image_size->second; + + const float x_index = + location_data.relative_keypoints(kIndex).x() * image_size->first; + const float y_index = + location_data.relative_keypoints(kIndex).y() * image_size->second; + + const float x_pinky = + location_data.relative_keypoints(kPinky).x() * image_size->first; + const float y_pinky = + location_data.relative_keypoints(kPinky).y() * image_size->second; + + // Estimate middle finger. + const float x_middle = (2.f * x_index + x_pinky) / 3.f; + const float y_middle = (2.f * y_index + y_pinky) / 3.f; + + // Crop center as middle finger. + const float center_x = x_middle; + const float center_y = y_middle; + + // Bounding box size as double distance from middle finger to wrist. + const float box_size = + std::sqrt((x_middle - x_wrist) * (x_middle - x_wrist) + + (y_middle - y_wrist) * (y_middle - y_wrist)) * + 2.0; + + // Set resulting bounding box. + rect->set_x_center(center_x / image_size->first); + rect->set_y_center(center_y / image_size->second); + rect->set_width(box_size / image_size->first); + rect->set_height(box_size / image_size->second); + + return ::absl::OkStatus(); +} + +absl::Status HandDetectionsFromPoseToRectsCalculator::ComputeRotation( + const Detection& detection, const DetectionSpec& detection_spec, + float* rotation) { + const auto& location_data = detection.location_data(); + const auto& image_size = detection_spec.image_size; + RET_CHECK(image_size) << "Image size is required to calculate rotation"; + + const float x_wrist = + location_data.relative_keypoints(kWrist).x() * image_size->first; + const float y_wrist = + location_data.relative_keypoints(kWrist).y() * image_size->second; + + const float x_index = + location_data.relative_keypoints(kIndex).x() * image_size->first; + const float y_index = + location_data.relative_keypoints(kIndex).y() * image_size->second; + + const float x_pinky = + location_data.relative_keypoints(kPinky).x() * image_size->first; + const float y_pinky = + location_data.relative_keypoints(kPinky).y() * image_size->second; + + // Estimate middle finger. + const float x_middle = (2.f * x_index + x_pinky) / 3.f; + const float y_middle = (2.f * y_index + y_pinky) / 3.f; + + *rotation = NormalizeRadians( + target_angle_ - std::atan2(-(y_middle - y_wrist), x_middle - x_wrist)); + + return ::absl::OkStatus(); +} + +} // namespace mediapipe diff --git a/mediapipe/modules/holistic_landmark/calculators/roi_tracking_calculator.cc b/mediapipe/modules/holistic_landmark/calculators/roi_tracking_calculator.cc new file mode 100644 index 0000000..0da6cd7 --- /dev/null +++ b/mediapipe/modules/holistic_landmark/calculators/roi_tracking_calculator.cc @@ -0,0 +1,358 @@ +// Copyright 2020 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include + +#include + +#include "absl/strings/str_format.h" +#include "mediapipe/framework/calculator_framework.h" +#include "mediapipe/framework/formats/landmark.pb.h" +#include "mediapipe/framework/formats/rect.pb.h" +#include "mediapipe/framework/port/logging.h" +#include "mediapipe/framework/port/rectangle.h" +#include "mediapipe/modules/holistic_landmark/calculators/roi_tracking_calculator.pb.h" + +namespace mediapipe { + +namespace { + +constexpr char kPrevLandmarksTag[] = "PREV_LANDMARKS"; +constexpr char kPrevLandmarksRectTag[] = "PREV_LANDMARKS_RECT"; +constexpr char kRecropRectTag[] = "RECROP_RECT"; +constexpr char kImageSizeTag[] = "IMAGE_SIZE"; +constexpr char kTrackingRectTag[] = "TRACKING_RECT"; + +// TODO: Use rect rotation. +// Verifies that Intersection over Union of previous frame rect and current +// frame re-crop rect is less than threshold. +bool IouRequirementsSatisfied(const NormalizedRect& prev_rect, + const NormalizedRect& recrop_rect, + const std::pair& image_size, + const float min_iou) { + auto r1 = Rectangle_f(prev_rect.x_center() * image_size.first, + prev_rect.y_center() * image_size.second, + prev_rect.width() * image_size.first, + prev_rect.height() * image_size.second); + auto r2 = Rectangle_f(recrop_rect.x_center() * image_size.first, + recrop_rect.y_center() * image_size.second, + recrop_rect.width() * image_size.first, + recrop_rect.height() * image_size.second); + + const float intersection_area = r1.Intersect(r2).Area(); + const float union_area = r1.Area() + r2.Area() - intersection_area; + + const float intersection_threshold = union_area * min_iou; + if (intersection_area < intersection_threshold) { + VLOG(1) << absl::StrFormat("Lost tracking: IoU intersection %f < %f", + intersection_area, intersection_threshold); + return false; + } + + return true; +} + +// Verifies that current frame re-crop rect rotation/translation/scale didn't +// change much comparing to the previous frame rect. Translation and scale are +// normalized by current frame re-crop rect. +bool RectRequirementsSatisfied(const NormalizedRect& prev_rect, + const NormalizedRect& recrop_rect, + const std::pair image_size, + const float rotation_degrees, + const float translation, const float scale) { + // Rotate both rects so that re-crop rect edges are parallel to XY axes. That + // will allow to compute x/y translation of the previous frame rect along axes + // of the current frame re-crop rect. + const float rotation = -recrop_rect.rotation(); + + const float cosa = cos(rotation); + const float sina = sin(rotation); + + // Rotate previous frame rect and get its parameters. + const float prev_rect_x = prev_rect.x_center() * image_size.first * cosa - + prev_rect.y_center() * image_size.second * sina; + const float prev_rect_y = prev_rect.x_center() * image_size.first * sina + + prev_rect.y_center() * image_size.second * cosa; + const float prev_rect_width = prev_rect.width() * image_size.first; + const float prev_rect_height = prev_rect.height() * image_size.second; + const float prev_rect_rotation = prev_rect.rotation() / M_PI * 180.f; + + // Rotate current frame re-crop rect and get its parameters. + const float recrop_rect_x = recrop_rect.x_center() * image_size.first * cosa - + recrop_rect.y_center() * image_size.second * sina; + const float recrop_rect_y = recrop_rect.x_center() * image_size.first * sina + + recrop_rect.y_center() * image_size.second * cosa; + const float recrop_rect_width = recrop_rect.width() * image_size.first; + const float recrop_rect_height = recrop_rect.height() * image_size.second; + const float recrop_rect_rotation = recrop_rect.rotation() / M_PI * 180.f; + + // Rect requirements are satisfied unless one of the checks below fails. + bool satisfied = true; + + // Ensure that rotation diff is in [0, 180] range. + float rotation_diff = prev_rect_rotation - recrop_rect_rotation; + if (rotation_diff > 180.f) { + rotation_diff -= 360.f; + } + if (rotation_diff < -180.f) { + rotation_diff += 360.f; + } + rotation_diff = abs(rotation_diff); + if (rotation_diff > rotation_degrees) { + satisfied = false; + VLOG(1) << absl::StrFormat("Lost tracking: rect rotation %f > %f", + rotation_diff, rotation_degrees); + } + + const float x_diff = abs(prev_rect_x - recrop_rect_x); + const float x_threshold = recrop_rect_width * translation; + if (x_diff > x_threshold) { + satisfied = false; + VLOG(1) << absl::StrFormat("Lost tracking: rect x translation %f > %f", + x_diff, x_threshold); + } + + const float y_diff = abs(prev_rect_y - recrop_rect_y); + const float y_threshold = recrop_rect_height * translation; + if (y_diff > y_threshold) { + satisfied = false; + VLOG(1) << absl::StrFormat("Lost tracking: rect y translation %f > %f", + y_diff, y_threshold); + } + + const float width_diff = abs(prev_rect_width - recrop_rect_width); + const float width_threshold = recrop_rect_width * scale; + if (width_diff > width_threshold) { + satisfied = false; + VLOG(1) << absl::StrFormat("Lost tracking: rect width %f > %f", width_diff, + width_threshold); + } + + const float height_diff = abs(prev_rect_height - recrop_rect_height); + const float height_threshold = recrop_rect_height * scale; + if (height_diff > height_threshold) { + satisfied = false; + VLOG(1) << absl::StrFormat("Lost tracking: rect height %f > %f", + height_diff, height_threshold); + } + + return satisfied; +} + +// Verifies that landmarks from the previous frame are within re-crop rectangle +// bounds on the current frame. +bool LandmarksRequirementsSatisfied(const NormalizedLandmarkList& landmarks, + const NormalizedRect& recrop_rect, + const std::pair image_size, + const float recrop_rect_margin) { + // Rotate both re-crop rectangle and landmarks so that re-crop rectangle edges + // are parallel to XY axes. It will allow to easily check if landmarks are + // within re-crop rect bounds along re-crop rect axes. + // + // Rect rotation is specified clockwise. To apply cos/sin functions we + // transform it into counterclockwise. + const float rotation = -recrop_rect.rotation(); + + const float cosa = cos(rotation); + const float sina = sin(rotation); + + // Rotate rect. + const float rect_x = recrop_rect.x_center() * image_size.first * cosa - + recrop_rect.y_center() * image_size.second * sina; + const float rect_y = recrop_rect.x_center() * image_size.first * sina + + recrop_rect.y_center() * image_size.second * cosa; + const float rect_width = + recrop_rect.width() * image_size.first * (1.f + recrop_rect_margin); + const float rect_height = + recrop_rect.height() * image_size.second * (1.f + recrop_rect_margin); + + // Get rect bounds. + const float rect_left = rect_x - rect_width * 0.5f; + const float rect_right = rect_x + rect_width * 0.5f; + const float rect_top = rect_y - rect_height * 0.5f; + const float rect_bottom = rect_y + rect_height * 0.5f; + + for (int i = 0; i < landmarks.landmark_size(); ++i) { + const auto& landmark = landmarks.landmark(i); + const float x = landmark.x() * image_size.first * cosa - + landmark.y() * image_size.second * sina; + const float y = landmark.x() * image_size.first * sina + + landmark.y() * image_size.second * cosa; + + if (!(rect_left < x && x < rect_right && rect_top < y && y < rect_bottom)) { + VLOG(1) << "Lost tracking: landmarks out of re-crop rect"; + return false; + } + } + + return true; +} + +} // namespace + +// A calculator to track object rectangle between frames. +// +// Calculator checks that all requirements for tracking are satisfied and uses +// rectangle from the previous frame in this case, otherwise - uses current +// frame re-crop rectangle. +// +// There are several types of tracking requirements that can be configured via +// options: +// IoU: Verifies that IoU of the previous frame rectangle and current frame +// re-crop rectangle is less than a given threshold. +// Rect parameters: Verifies that rotation/translation/scale of the re-crop +// rectangle on the current frame is close to the rectangle from the +// previous frame within given thresholds. +// Landmarks: Verifies that landmarks from the previous frame are within +// the re-crop rectangle on the current frame. +// +// Inputs: +// PREV_LANDMARKS: Object landmarks from the previous frame. +// PREV_LANDMARKS_RECT: Object rectangle based on the landmarks from the +// previous frame. +// RECROP_RECT: Object re-crop rectangle from the current frame. +// IMAGE_SIZE: Image size to transform normalized coordinates to absolute. +// +// Outputs: +// TRACKING_RECT: Rectangle to use for object prediction on the current frame. +// It will be either object rectangle from the previous frame (if all +// tracking requirements are satisfied) or re-crop rectangle from the +// current frame (if tracking lost the object). +// +// Example config: +// node { +// calculator: "RoiTrackingCalculator" +// input_stream: "PREV_LANDMARKS:prev_hand_landmarks" +// input_stream: "PREV_LANDMARKS_RECT:prev_hand_landmarks_rect" +// input_stream: "RECROP_RECT:hand_recrop_rect" +// input_stream: "IMAGE_SIZE:image_size" +// output_stream: "TRACKING_RECT:hand_tracking_rect" +// options: { +// [mediapipe.RoiTrackingCalculatorOptions.ext] { +// rect_requirements: { +// rotation_degrees: 40.0 +// translation: 0.2 +// scale: 0.4 +// } +// landmarks_requirements: { +// recrop_rect_margin: -0.1 +// } +// } +// } +// } +class RoiTrackingCalculator : public CalculatorBase { + public: + static absl::Status GetContract(CalculatorContract* cc); + absl::Status Open(CalculatorContext* cc) override; + absl::Status Process(CalculatorContext* cc) override; + + private: + RoiTrackingCalculatorOptions options_; +}; +REGISTER_CALCULATOR(RoiTrackingCalculator); + +absl::Status RoiTrackingCalculator::GetContract(CalculatorContract* cc) { + cc->Inputs().Tag(kPrevLandmarksTag).Set(); + cc->Inputs().Tag(kPrevLandmarksRectTag).Set(); + cc->Inputs().Tag(kRecropRectTag).Set(); + cc->Inputs().Tag(kImageSizeTag).Set>(); + cc->Outputs().Tag(kTrackingRectTag).Set(); + + return absl::OkStatus(); +} + +absl::Status RoiTrackingCalculator::Open(CalculatorContext* cc) { + cc->SetOffset(TimestampDiff(0)); + options_ = cc->Options(); + return absl::OkStatus(); +} + +absl::Status RoiTrackingCalculator::Process(CalculatorContext* cc) { + // If there is no current frame re-crop rect (i.e. object is not present on + // the current frame) - return empty packet. + if (cc->Inputs().Tag(kRecropRectTag).IsEmpty()) { + return absl::OkStatus(); + } + + // If there is no previous rect, but there is current re-crop rect - return + // current re-crop rect as is. + if (cc->Inputs().Tag(kPrevLandmarksRectTag).IsEmpty()) { + cc->Outputs() + .Tag(kTrackingRectTag) + .AddPacket(cc->Inputs().Tag(kRecropRectTag).Value()); + return absl::OkStatus(); + } + + // At this point we have both previous rect (which also means we have previous + // landmarks) and currrent re-crop rect. + const auto& prev_landmarks = + cc->Inputs().Tag(kPrevLandmarksTag).Get(); + const auto& prev_rect = + cc->Inputs().Tag(kPrevLandmarksRectTag).Get(); + const auto& recrop_rect = + cc->Inputs().Tag(kRecropRectTag).Get(); + const auto& image_size = + cc->Inputs().Tag(kImageSizeTag).Get>(); + + // Keep tracking unless one of the requirements below is not satisfied. + bool keep_tracking = true; + + // If IoU of the previous rect and current re-crop rect is lower than allowed + // threshold - use current re-crop rect. + if (options_.has_iou_requirements() && + !IouRequirementsSatisfied(prev_rect, recrop_rect, image_size, + options_.iou_requirements().min_iou())) { + keep_tracking = false; + } + + // If previous rect and current re-crop rect differ more than it is allowed by + // the augmentations (used during the model training) - use current re-crop + // rect. + if (options_.has_rect_requirements() && + !RectRequirementsSatisfied( + prev_rect, recrop_rect, image_size, + options_.rect_requirements().rotation_degrees(), + options_.rect_requirements().translation(), + options_.rect_requirements().scale())) { + keep_tracking = false; + } + + // If landmarks from the previous frame are not in the current re-crop rect + // (i.e. object moved too fast and using previous frame rect won't cover + // landmarks on the current frame) - use current re-crop rect. + if (options_.has_landmarks_requirements() && + !LandmarksRequirementsSatisfied( + prev_landmarks, recrop_rect, image_size, + options_.landmarks_requirements().recrop_rect_margin())) { + keep_tracking = false; + } + + // If object didn't move a lot comparing to the previous frame - we'll keep + // tracking it and will return rect from the previous frame, otherwise - + // return re-crop rect from the current frame. + if (keep_tracking) { + cc->Outputs() + .Tag(kTrackingRectTag) + .AddPacket(cc->Inputs().Tag(kPrevLandmarksRectTag).Value()); + } else { + cc->Outputs() + .Tag(kTrackingRectTag) + .AddPacket(cc->Inputs().Tag(kRecropRectTag).Value()); + VLOG(1) << "Lost tracking: check messages above for details"; + } + + return absl::OkStatus(); +} + +} // namespace mediapipe diff --git a/mediapipe/modules/holistic_landmark/calculators/roi_tracking_calculator.proto b/mediapipe/modules/holistic_landmark/calculators/roi_tracking_calculator.proto new file mode 100644 index 0000000..ec3cf22 --- /dev/null +++ b/mediapipe/modules/holistic_landmark/calculators/roi_tracking_calculator.proto @@ -0,0 +1,59 @@ +// Copyright 2020 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +syntax = "proto2"; + +package mediapipe; + +import "mediapipe/framework/calculator.proto"; + +message RoiTrackingCalculatorOptions { + extend CalculatorOptions { + optional RoiTrackingCalculatorOptions ext = 329994630; + } + + // Verifies that Intersection over Union of previous frame rect and current + // frame re-crop rect is less than threshold. + message IouRequirements { + optional float min_iou = 1 [default = 0.5]; + } + + // Verifies that current frame re-crop rect rotation/translation/scale didn't + // change much comparing to the previous frame rect. + message RectRequirements { + // Allowed rotation change defined in degrees. + optional float rotation_degrees = 1 [default = 10.0]; + + // Allowed translation change defined as absolute translation normalized by + // re-crop rectangle size. + optional float translation = 2 [default = 0.1]; + + // Allowed scale change defined as absolute translation normalized by + // re-crop rectangle size. + optional float scale = 3 [default = 0.1]; + } + + // Verifies that landmarks from the previous frame are within re-crop + // rectangle bounds on the current frame. + message LandmarksRequirements { + // Margin to apply to re-crop rectangle before checking verifing landmarks. + optional float recrop_rect_margin = 1 [default = 0.0]; + } + + optional IouRequirements iou_requirements = 1; + + optional RectRequirements rect_requirements = 2; + + optional LandmarksRequirements landmarks_requirements = 3; +} diff --git a/mediapipe/modules/holistic_landmark/face_detection_front_detections_to_roi.pbtxt b/mediapipe/modules/holistic_landmark/face_detection_front_detections_to_roi.pbtxt new file mode 100644 index 0000000..7d9fa9e --- /dev/null +++ b/mediapipe/modules/holistic_landmark/face_detection_front_detections_to_roi.pbtxt @@ -0,0 +1,48 @@ +# Calculates ROI from detections provided by `face_detection_short_range.tflite` +# model. +type: "FaceDetectionFrontDetectionsToRoi" + +# Detected faces. (std::vector) +input_stream: "DETECTIONS:detections" +# Image size (width & height). (std::pair) +input_stream: "IMAGE_SIZE:image_size" + +# Refined (more accurate) ROI to use for face landmarks prediction. +# (NormalizedRect) +output_stream: "ROI:roi" + +# Converts the face detection into a rectangle (normalized by image size) +# that encloses the face and is rotated such that the line connecting right side +# of the right eye and left side of the left eye is aligned with the X-axis of +# the rectangle. +node { + calculator: "DetectionsToRectsCalculator" + input_stream: "DETECTIONS:detections" + input_stream: "IMAGE_SIZE:image_size" + output_stream: "NORM_RECT:raw_roi" + options: { + [mediapipe.DetectionsToRectsCalculatorOptions.ext] { + rotation_vector_start_keypoint_index: 0 # Right eye. + rotation_vector_end_keypoint_index: 1 # Left eye. + rotation_vector_target_angle_degrees: 0 + conversion_mode: USE_KEYPOINTS + } + } +} + +# Expands and shifts the rectangle that contains the face so that it's likely +# to cover the entire face. +node { + calculator: "RectTransformationCalculator" + input_stream: "NORM_RECT:raw_roi" + input_stream: "IMAGE_SIZE:image_size" + output_stream: "roi" + options: { + [mediapipe.RectTransformationCalculatorOptions.ext] { + scale_x: 2.0 + scale_y: 2.0 + shift_y: -0.1 + square_long: true + } + } +} diff --git a/mediapipe/modules/holistic_landmark/face_landmarks_from_pose_cpu.pbtxt b/mediapipe/modules/holistic_landmark/face_landmarks_from_pose_cpu.pbtxt new file mode 100644 index 0000000..1d99672 --- /dev/null +++ b/mediapipe/modules/holistic_landmark/face_landmarks_from_pose_cpu.pbtxt @@ -0,0 +1,82 @@ +# Predicts face landmarks within an ROI derived from face-related pose +# landmarks. + +type: "FaceLandmarksFromPoseCpu" + +# CPU image. (ImageFrame) +input_stream: "IMAGE:input_video" +# Face-related pose landmarks. (NormalizedLandmarkList) +input_stream: "FACE_LANDMARKS_FROM_POSE:face_landmarks_from_pose" + +# Whether to run the face landmark model with attention on lips and eyes to +# provide more accuracy, and additionally output iris landmarks. If unspecified, +# functions as set to false. (bool) +input_side_packet: "REFINE_LANDMARKS:refine_landmarks" + +# Face landmarks. (NormalizedLandmarkList) +output_stream: "FACE_LANDMARKS:face_landmarks" + +# Debug outputs. +# Face ROI derived from face-related pose landmarks, which defines the search +# region for the face detection model. (NormalizedRect) +output_stream: "FACE_ROI_FROM_POSE:face_roi_from_pose" +# Refined face crop rectangle predicted by face detection model. +# (NormalizedRect) +output_stream: "FACE_ROI_FROM_DETECTION:face_roi_from_detection" +# Rectangle used to predict face landmarks. (NormalizedRect) +output_stream: "FACE_TRACKING_ROI:face_tracking_roi" + +# TODO: do not predict face when most of the face landmarks from +# pose are invisible. + +# Extracts image size from the input images. +node { + calculator: "ImagePropertiesCalculator" + input_stream: "IMAGE:input_video" + output_stream: "SIZE:image_size" +} + +# Gets ROI for re-crop model from face-related pose landmarks. +node { + calculator: "FaceLandmarksFromPoseToRecropRoi" + input_stream: "FACE_LANDMARKS_FROM_POSE:face_landmarks_from_pose" + input_stream: "IMAGE_SIZE:image_size" + output_stream: "ROI:face_roi_from_pose" +} + +# Detects faces within the face ROI calculated from pose landmarks. This is done +# to refine face ROI for further landmark detection as ROI calculated from +# pose landmarks may be inaccurate. +node { + calculator: "FaceDetectionShortRangeByRoiCpu" + input_stream: "IMAGE:input_video" + input_stream: "ROI:face_roi_from_pose" + output_stream: "DETECTIONS:face_detections" +} + +# Calculates refined face ROI. +node { + calculator: "FaceDetectionFrontDetectionsToRoi" + input_stream: "DETECTIONS:face_detections" + input_stream: "IMAGE_SIZE:image_size" + output_stream: "ROI:face_roi_from_detection" +} + +# Gets face tracking rectangle (either face rectangle from the previous +# frame or face re-crop rectangle from the current frame) for face prediction. +node { + calculator: "FaceTracking" + input_stream: "LANDMARKS:face_landmarks" + input_stream: "FACE_RECROP_ROI:face_roi_from_detection" + input_stream: "IMAGE_SIZE:image_size" + output_stream: "FACE_TRACKING_ROI:face_tracking_roi" +} + +# Predicts face landmarks from the tracking rectangle. +node { + calculator: "FaceLandmarkCpu" + input_stream: "IMAGE:input_video" + input_stream: "ROI:face_tracking_roi" + input_side_packet: "WITH_ATTENTION:refine_landmarks" + output_stream: "LANDMARKS:face_landmarks" +} diff --git a/mediapipe/modules/holistic_landmark/face_landmarks_from_pose_gpu.pbtxt b/mediapipe/modules/holistic_landmark/face_landmarks_from_pose_gpu.pbtxt new file mode 100644 index 0000000..24a9854 --- /dev/null +++ b/mediapipe/modules/holistic_landmark/face_landmarks_from_pose_gpu.pbtxt @@ -0,0 +1,82 @@ +# Predicts face landmarks within an ROI derived from face-related pose +# landmarks. + +type: "FaceLandmarksFromPoseGpu" + +# GPU image. (ImageFrame) +input_stream: "IMAGE:input_video" +# Face-related pose landmarks. (NormalizedLandmarkList) +input_stream: "FACE_LANDMARKS_FROM_POSE:face_landmarks_from_pose" + +# Whether to run the face landmark model with attention on lips and eyes to +# provide more accuracy, and additionally output iris landmarks. If unspecified, +# functions as set to false. (bool) +input_side_packet: "REFINE_LANDMARKS:refine_landmarks" + +# Face landmarks. (NormalizedLandmarkList) +output_stream: "FACE_LANDMARKS:face_landmarks" + +# Debug outputs. +# Face ROI derived from face-related pose landmarks, which defines the search +# region for the face detection model. (NormalizedRect) +output_stream: "FACE_ROI_FROM_POSE:face_roi_from_pose" +# Refined face crop rectangle predicted by face detection model. +# (NormalizedRect) +output_stream: "FACE_ROI_FROM_DETECTION:face_roi_from_detection" +# Rectangle used to predict face landmarks. (NormalizedRect) +output_stream: "FACE_TRACKING_ROI:face_tracking_roi" + +# TODO: do not predict face when most of the face landmarks from +# pose are invisible. + +# Extracts image size from the input images. +node { + calculator: "ImagePropertiesCalculator" + input_stream: "IMAGE_GPU:input_video" + output_stream: "SIZE:image_size" +} + +# Gets ROI for re-crop model from face-related pose landmarks. +node { + calculator: "FaceLandmarksFromPoseToRecropRoi" + input_stream: "FACE_LANDMARKS_FROM_POSE:face_landmarks_from_pose" + input_stream: "IMAGE_SIZE:image_size" + output_stream: "ROI:face_roi_from_pose" +} + +# Detects faces within the face ROI calculated from pose landmarks. This is done +# to refine face ROI for further landmark detection as ROI calculated from +# pose landmarks may be inaccurate. +node { + calculator: "FaceDetectionShortRangeByRoiGpu" + input_stream: "IMAGE:input_video" + input_stream: "ROI:face_roi_from_pose" + output_stream: "DETECTIONS:face_detections" +} + +# Calculates refined face ROI. +node { + calculator: "FaceDetectionFrontDetectionsToRoi" + input_stream: "DETECTIONS:face_detections" + input_stream: "IMAGE_SIZE:image_size" + output_stream: "ROI:face_roi_from_detection" +} + +# Gets face tracking rectangle (either face rectangle from the previous +# frame or face re-crop rectangle from the current frame) for face prediction. +node { + calculator: "FaceTracking" + input_stream: "LANDMARKS:face_landmarks" + input_stream: "FACE_RECROP_ROI:face_roi_from_detection" + input_stream: "IMAGE_SIZE:image_size" + output_stream: "FACE_TRACKING_ROI:face_tracking_roi" +} + +# Predicts face landmarks from the tracking rectangle. +node { + calculator: "FaceLandmarkGpu" + input_stream: "IMAGE:input_video" + input_stream: "ROI:face_tracking_roi" + input_side_packet: "WITH_ATTENTION:refine_landmarks" + output_stream: "LANDMARKS:face_landmarks" +} diff --git a/mediapipe/modules/holistic_landmark/face_landmarks_from_pose_to_recrop_roi.pbtxt b/mediapipe/modules/holistic_landmark/face_landmarks_from_pose_to_recrop_roi.pbtxt new file mode 100644 index 0000000..65bd340 --- /dev/null +++ b/mediapipe/modules/holistic_landmark/face_landmarks_from_pose_to_recrop_roi.pbtxt @@ -0,0 +1,51 @@ +# Converts face-related pose landmarks to re-crop ROI. + +type: "FaceLandmarksFromPoseToRecropRoi" + +# Face-related pose landmarks (There should be 11 of them). +# (NormalizedLandmarkList) +input_stream: "FACE_LANDMARKS_FROM_POSE:face_landmarks_from_pose" +# Image size (width & height). (std::pair) +input_stream: "IMAGE_SIZE:image_size" + +# ROI to be used for face detection. (NormalizedRect) +output_stream: "ROI:roi" + +# Converts face-related pose landmarks to a detection that tightly encloses all +# landmarks. +node { + calculator: "LandmarksToDetectionCalculator" + input_stream: "NORM_LANDMARKS:face_landmarks_from_pose" + output_stream: "DETECTION:pose_face_detection" +} + +# Converts face detection to a normalized face rectangle. +node { + calculator: "DetectionsToRectsCalculator" + input_stream: "DETECTION:pose_face_detection" + input_stream: "IMAGE_SIZE:image_size" + output_stream: "NORM_RECT:pose_face_rect" + options: { + [mediapipe.DetectionsToRectsCalculatorOptions.ext] { + rotation_vector_start_keypoint_index: 5 # Right eye. + rotation_vector_end_keypoint_index: 2 # Left eye. + rotation_vector_target_angle_degrees: 0 + } + } +} + +# Expands face rectangle so that it becomes big enough for face detector to +# localize it accurately. +node { + calculator: "RectTransformationCalculator" + input_stream: "NORM_RECT:pose_face_rect" + input_stream: "IMAGE_SIZE:image_size" + output_stream: "roi" + options: { + [mediapipe.RectTransformationCalculatorOptions.ext] { + scale_x: 3.0 + scale_y: 3.0 + square_long: true + } + } +} diff --git a/mediapipe/modules/holistic_landmark/face_landmarks_to_roi.pbtxt b/mediapipe/modules/holistic_landmark/face_landmarks_to_roi.pbtxt new file mode 100644 index 0000000..8913cc1 --- /dev/null +++ b/mediapipe/modules/holistic_landmark/face_landmarks_to_roi.pbtxt @@ -0,0 +1,53 @@ +# Converts face landmarks to ROI. + +type: "FaceLandmarksToRoi" + +# Face landmarks. (NormalizedLandmarkList) +input_stream: "LANDMARKS:face_landmarks" +# Image size (width & height). (std::pair) +input_stream: "IMAGE_SIZE:image_size" + +# ROI according to landmarks. (NormalizedRect) +output_stream: "ROI:roi" + +# Converts face landmarks to a detection that tightly encloses all landmarks. +node { + calculator: "LandmarksToDetectionCalculator" + input_stream: "NORM_LANDMARKS:face_landmarks" + output_stream: "DETECTION:face_detection" +} + +# Converts the face detection into a rectangle (normalized by image size) +# that encloses the face and is rotated such that the line connecting center of +# the wrist and MCP of the middle finger is aligned with the Y-axis of the +# rectangle. +node { + calculator: "DetectionsToRectsCalculator" + input_stream: "DETECTION:face_detection" + input_stream: "IMAGE_SIZE:image_size" + output_stream: "NORM_RECT:face_landmarks_rect_tight" + options: { + [mediapipe.DetectionsToRectsCalculatorOptions.ext] { + rotation_vector_start_keypoint_index: 33 # Right side of left eye. + rotation_vector_end_keypoint_index: 263 # Left side of right eye. + rotation_vector_target_angle_degrees: 0 + } + } +} + +# Expands the face rectangle so that it's likely to contain the face even with +# some motion. +node { + calculator: "RectTransformationCalculator" + input_stream: "NORM_RECT:face_landmarks_rect_tight" + input_stream: "IMAGE_SIZE:image_size" + output_stream: "roi" + options: { + [mediapipe.RectTransformationCalculatorOptions.ext] { + scale_x: 1.5 + scale_y: 1.5 + # TODO: remove `square_long` where appropriat + square_long: true + } + } +} diff --git a/mediapipe/modules/holistic_landmark/face_tracking.pbtxt b/mediapipe/modules/holistic_landmark/face_tracking.pbtxt new file mode 100644 index 0000000..53022d3 --- /dev/null +++ b/mediapipe/modules/holistic_landmark/face_tracking.pbtxt @@ -0,0 +1,61 @@ +# Decides what ROI to use for face landmarks prediction: either previous frame +# landmarks ROI or the current frame face re-crop ROI. + +type: "FaceTracking" + +# Face landmarks from the current frame. They will be memorized for tracking on +# the next frame. (NormalizedLandmarkList) +input_stream: "LANDMARKS:face_landmarks" +# Face re-crop ROI from the current frame. (NormalizedRect) +input_stream: "FACE_RECROP_ROI:face_recrop_roi" +# Image size (width & height). (std::pair) +input_stream: "IMAGE_SIZE:image_size" + +# Face tracking ROI. Which is either face landmarks ROI from the previous frame +# if face is still tracked, or face re-crop ROI from the current frame +# otherwise. (NormalizedRect) +output_stream: "FACE_TRACKING_ROI:face_tracking_roi" + +# Keeps track of face landmarks from the previous frame. +node { + calculator: "PreviousLoopbackCalculator" + input_stream: "MAIN:image_size" + input_stream: "LOOP:face_landmarks" + input_stream_info: { + tag_index: "LOOP" + back_edge: true + } + output_stream: "PREV_LOOP:prev_face_landmarks" +} + +# Gets hand landarmsk rect. +node { + calculator: "FaceLandmarksToRoi" + input_stream: "LANDMARKS:prev_face_landmarks" + input_stream: "IMAGE_SIZE:image_size" + output_stream: "ROI:prev_face_landmarks_rect" +} + +# Checks that all requirements for tracking are satisfied and use face rectangle +# from the previous frame in that case. Otherwise - use face re-crop rectangle +# from the current frame. +node { + calculator: "RoiTrackingCalculator" + input_stream: "PREV_LANDMARKS:prev_face_landmarks" + input_stream: "PREV_LANDMARKS_RECT:prev_face_landmarks_rect" + input_stream: "RECROP_RECT:face_recrop_roi" + input_stream: "IMAGE_SIZE:image_size" + output_stream: "TRACKING_RECT:face_tracking_roi" + options: { + [mediapipe.RoiTrackingCalculatorOptions.ext] { + rect_requirements: { + rotation_degrees: 15.0 + translation: 0.1 + scale: 0.3 + } + landmarks_requirements: { + recrop_rect_margin: -0.2 + } + } + } +} diff --git a/mediapipe/modules/holistic_landmark/hand_landmarks_from_pose_cpu.pbtxt b/mediapipe/modules/holistic_landmark/hand_landmarks_from_pose_cpu.pbtxt new file mode 100644 index 0000000..0a44bcb --- /dev/null +++ b/mediapipe/modules/holistic_landmark/hand_landmarks_from_pose_cpu.pbtxt @@ -0,0 +1,78 @@ +# Predicts hand landmarks within a ROI derived from hand-related pose landmarks. + +type: "HandLandmarksFromPoseCpu" + +# CPU image. (ImageFrame) +input_stream: "IMAGE:input_video" +# Hand-related pose landmarks in [wrist, pinky, index] order. +# (NormalizedLandmarkList) +input_stream: "HAND_LANDMARKS_FROM_POSE:hand_landmarks_from_pose" + +# Hand landmarks. (NormalizedLandmarkList) +output_stream: "HAND_LANDMARKS:hand_landmarks" + +# Debug outputs. +# Hand ROI derived from hand-related landmarks, which defines the search region +# for the hand re-crop model. (NormalizedRect) +output_stream: "HAND_ROI_FROM_POSE:hand_roi_from_pose" +# Refined hand crop rectangle predicted by hand re-crop model. (NormalizedRect) +output_stream: "HAND_ROI_FROM_RECROP:hand_roi_from_recrop" +# Rectangle used to predict hand landmarks. (NormalizedRect) +output_stream: "HAND_TRACKING_ROI:hand_tracking_roi" + +# Gets hand visibility. +node { + calculator: "HandVisibilityFromHandLandmarksFromPose" + input_stream: "HAND_LANDMARKS_FROM_POSE:hand_landmarks_from_pose" + output_stream: "VISIBILITY:hand_visibility" +} + +# Drops hand-related pose landmarks if pose wrist is not visible. It will +# prevent from predicting hand landmarks on the current frame. +node { + calculator: "GateCalculator" + input_stream: "hand_landmarks_from_pose" + input_stream: "ALLOW:hand_visibility" + output_stream: "ensured_hand_landmarks_from_pose" +} + +# Extracts image size from the input images. +node { + calculator: "ImagePropertiesCalculator" + input_stream: "IMAGE:input_video" + output_stream: "SIZE:image_size" +} + +# Gets ROI for re-crop model from hand-related pose landmarks. +node { + calculator: "HandLandmarksFromPoseToRecropRoi" + input_stream: "HAND_LANDMARKS_FROM_POSE:ensured_hand_landmarks_from_pose" + input_stream: "IMAGE_SIZE:image_size" + output_stream: "ROI:hand_roi_from_pose" +} + +# Predicts hand re-crop rectangle on the current frame. +node { + calculator: "HandRecropByRoiCpu", + input_stream: "IMAGE:input_video" + input_stream: "ROI:hand_roi_from_pose" + output_stream: "HAND_ROI_FROM_RECROP:hand_roi_from_recrop" +} + +# Gets hand tracking rectangle (either hand rectangle from the previous +# frame or hand re-crop rectangle from the current frame) for hand prediction. +node { + calculator: "HandTracking" + input_stream: "LANDMARKS:hand_landmarks" + input_stream: "HAND_ROI_FROM_RECROP:hand_roi_from_recrop" + input_stream: "IMAGE_SIZE:image_size" + output_stream: "HAND_TRACKING_ROI:hand_tracking_roi" +} + +# Predicts hand landmarks from the tracking rectangle. +node { + calculator: "HandLandmarkCpu" + input_stream: "IMAGE:input_video" + input_stream: "ROI:hand_tracking_roi" + output_stream: "LANDMARKS:hand_landmarks" +} diff --git a/mediapipe/modules/holistic_landmark/hand_landmarks_from_pose_gpu.pbtxt b/mediapipe/modules/holistic_landmark/hand_landmarks_from_pose_gpu.pbtxt new file mode 100644 index 0000000..0296e7d --- /dev/null +++ b/mediapipe/modules/holistic_landmark/hand_landmarks_from_pose_gpu.pbtxt @@ -0,0 +1,78 @@ +# Predicts hand landmarks within a ROI derived from hand-related pose landmarks. + +type: "HandLandmarksFromPoseGpu" + +# GPU image. (ImageFrame) +input_stream: "IMAGE:input_video" +# Hand-related pose landmarks in [wrist, pinky, index] order. +# (NormalizedLandmarkList) +input_stream: "HAND_LANDMARKS_FROM_POSE:hand_landmarks_from_pose" + +# Hand landmarks. (NormalizedLandmarkList) +output_stream: "HAND_LANDMARKS:hand_landmarks" + +# Debug outputs. +# Hand ROI derived from hand-related landmarks, which defines the search region +# for the hand re-crop model. (NormalizedRect) +output_stream: "HAND_ROI_FROM_POSE:hand_roi_from_pose" +# Refined hand crop rectangle predicted by hand re-crop model. (NormalizedRect) +output_stream: "HAND_ROI_FROM_RECROP:hand_roi_from_recrop" +# Rectangle used to predict hand landmarks. (NormalizedRect) +output_stream: "HAND_TRACKING_ROI:hand_tracking_roi" + +# Gets hand visibility. +node { + calculator: "HandVisibilityFromHandLandmarksFromPose" + input_stream: "HAND_LANDMARKS_FROM_POSE:hand_landmarks_from_pose" + output_stream: "VISIBILITY:hand_visibility" +} + +# Drops hand-related pose landmarks if pose wrist is not visible. It will +# prevent from predicting hand landmarks on the current frame. +node { + calculator: "GateCalculator" + input_stream: "hand_landmarks_from_pose" + input_stream: "ALLOW:hand_visibility" + output_stream: "ensured_hand_landmarks_from_pose" +} + +# Extracts image size from the input images. +node { + calculator: "ImagePropertiesCalculator" + input_stream: "IMAGE_GPU:input_video" + output_stream: "SIZE:image_size" +} + +# Gets ROI for re-crop model from hand-related pose landmarks. +node { + calculator: "HandLandmarksFromPoseToRecropRoi" + input_stream: "HAND_LANDMARKS_FROM_POSE:ensured_hand_landmarks_from_pose" + input_stream: "IMAGE_SIZE:image_size" + output_stream: "ROI:hand_roi_from_pose" +} + +# Predicts hand re-crop rectangle on the current frame. +node { + calculator: "HandRecropByRoiGpu", + input_stream: "IMAGE:input_video" + input_stream: "ROI:hand_roi_from_pose" + output_stream: "HAND_ROI_FROM_RECROP:hand_roi_from_recrop" +} + +# Gets hand tracking rectangle (either hand rectangle from the previous +# frame or hand re-crop rectangle from the current frame) for hand prediction. +node { + calculator: "HandTracking" + input_stream: "LANDMARKS:hand_landmarks" + input_stream: "HAND_ROI_FROM_RECROP:hand_roi_from_recrop" + input_stream: "IMAGE_SIZE:image_size" + output_stream: "HAND_TRACKING_ROI:hand_tracking_roi" +} + +# Predicts hand landmarks from the tracking rectangle. +node { + calculator: "HandLandmarkGpu" + input_stream: "IMAGE:input_video" + input_stream: "ROI:hand_tracking_roi" + output_stream: "LANDMARKS:hand_landmarks" +} diff --git a/mediapipe/modules/holistic_landmark/hand_landmarks_from_pose_to_recrop_roi.pbtxt b/mediapipe/modules/holistic_landmark/hand_landmarks_from_pose_to_recrop_roi.pbtxt new file mode 100644 index 0000000..1c2cfe5 --- /dev/null +++ b/mediapipe/modules/holistic_landmark/hand_landmarks_from_pose_to_recrop_roi.pbtxt @@ -0,0 +1,45 @@ +# Converts hand-related pose landmarks to hand re-crop ROI. + +type: "HandLandmarksFromPoseToRecropRoi" + +# Hand-related pose landmarks in [wrist, pinky, index] order. +# (NormalizedLandmarkList) +input_stream: "HAND_LANDMARKS_FROM_POSE:hand_landmarks_from_pose" +# Image size (width & height). (std::pair) +input_stream: "IMAGE_SIZE:image_size" + +# ROI to be used for re-crop prediction. (NormalizedRect) +output_stream: "ROI:roi" + +# Converts hand-related pose landmarks to a detection that tightly encloses all +# of them. +node { + calculator: "LandmarksToDetectionCalculator" + input_stream: "NORM_LANDMARKS:hand_landmarks_from_pose" + output_stream: "DETECTION:hand_detection_from_pose" +} + +# Converts hand detection to a normalized hand rectangle. +node { + calculator: "HandDetectionsFromPoseToRectsCalculator" + input_stream: "DETECTION:hand_detection_from_pose" + input_stream: "IMAGE_SIZE:image_size" + output_stream: "NORM_RECT:hand_roi_from_pose" +} + +# Expands the palm rectangle so that it becomes big enough for hand re-crop +# model to localize it accurately. +node { + calculator: "RectTransformationCalculator" + input_stream: "NORM_RECT:hand_roi_from_pose" + input_stream: "IMAGE_SIZE:image_size" + output_stream: "roi" + options: { + [mediapipe.RectTransformationCalculatorOptions.ext] { + scale_x: 2.7 + scale_y: 2.7 + shift_y: -0.1 + square_long: true + } + } +} diff --git a/mediapipe/modules/holistic_landmark/hand_landmarks_left_and_right_cpu.pbtxt b/mediapipe/modules/holistic_landmark/hand_landmarks_left_and_right_cpu.pbtxt new file mode 100644 index 0000000..75e0133 --- /dev/null +++ b/mediapipe/modules/holistic_landmark/hand_landmarks_left_and_right_cpu.pbtxt @@ -0,0 +1,76 @@ +# Predicts left and right hand landmarks within corresponding ROIs derived from +# hand-related pose landmarks. + +type: "HandLandmarksLeftAndRightCpu" + +# CPU image. (ImageFrame) +input_stream: "IMAGE:input_video" +# Pose landmarks to derive initial hand location from. (NormalizedLandmarkList) +input_stream: "POSE_LANDMARKS:pose_landmarks" + +# Left hand landmarks. (NormalizedLandmarkList) +output_stream: "LEFT_HAND_LANDMARKS:left_hand_landmarks" +# RIght hand landmarks. (NormalizedLandmarkList) +output_stream: "RIGHT_HAND_LANDMARKS:right_hand_landmarks" + +# Debug outputs. +output_stream: "LEFT_HAND_ROI_FROM_POSE:left_hand_roi_from_pose" +output_stream: "LEFT_HAND_ROI_FROM_RECROP:left_hand_roi_from_recrop" +output_stream: "LEFT_HAND_TRACKING_ROI:left_hand_tracking_roi" +output_stream: "RIGHT_HAND_ROI_FROM_POSE:right_hand_roi_from_pose" +output_stream: "RIGHT_HAND_ROI_FROM_RECROP:right_hand_roi_from_recrop" +output_stream: "RIGHT_HAND_TRACKING_ROI:right_hand_tracking_roi" + +# Extracts left-hand-related landmarks from the pose landmarks. +node { + calculator: "SplitNormalizedLandmarkListCalculator" + input_stream: "pose_landmarks" + output_stream: "left_hand_landmarks_from_pose" + options: { + [mediapipe.SplitVectorCalculatorOptions.ext] { + ranges: { begin: 15 end: 16 } + ranges: { begin: 17 end: 18 } + ranges: { begin: 19 end: 20 } + combine_outputs: true + } + } +} + +# Predicts left hand landmarks. +node { + calculator: "HandLandmarksFromPoseCpu" + input_stream: "IMAGE:input_video" + input_stream: "HAND_LANDMARKS_FROM_POSE:left_hand_landmarks_from_pose" + output_stream: "HAND_LANDMARKS:left_hand_landmarks" + # Debug outputs. + output_stream: "HAND_ROI_FROM_POSE:left_hand_roi_from_pose" + output_stream: "HAND_ROI_FROM_RECROP:left_hand_roi_from_recrop" + output_stream: "HAND_TRACKING_ROI:left_hand_tracking_roi" +} + +# Extracts right-hand-related landmarks from the pose landmarks. +node { + calculator: "SplitNormalizedLandmarkListCalculator" + input_stream: "pose_landmarks" + output_stream: "right_hand_landmarks_from_pose" + options: { + [mediapipe.SplitVectorCalculatorOptions.ext] { + ranges: { begin: 16 end: 17 } + ranges: { begin: 18 end: 19 } + ranges: { begin: 20 end: 21 } + combine_outputs: true + } + } +} + +# Extracts right-hand-related landmarks from the pose landmarks. +node { + calculator: "HandLandmarksFromPoseCpu" + input_stream: "IMAGE:input_video" + input_stream: "HAND_LANDMARKS_FROM_POSE:right_hand_landmarks_from_pose" + output_stream: "HAND_LANDMARKS:right_hand_landmarks" + # Debug outputs. + output_stream: "HAND_ROI_FROM_POSE:right_hand_roi_from_pose" + output_stream: "HAND_ROI_FROM_RECROP:right_hand_roi_from_recrop" + output_stream: "HAND_TRACKING_ROI:right_hand_tracking_roi" +} diff --git a/mediapipe/modules/holistic_landmark/hand_landmarks_left_and_right_gpu.pbtxt b/mediapipe/modules/holistic_landmark/hand_landmarks_left_and_right_gpu.pbtxt new file mode 100644 index 0000000..adeec2b --- /dev/null +++ b/mediapipe/modules/holistic_landmark/hand_landmarks_left_and_right_gpu.pbtxt @@ -0,0 +1,76 @@ +# Predicts left and right hand landmarks within corresponding ROIs derived from +# hand-related pose landmarks. + +type: "HandLandmarksLeftAndRightGpu" + +# GPU image. (ImageFrame) +input_stream: "IMAGE:input_video" +# Pose landmarks to derive initial hand location from. (NormalizedLandmarkList) +input_stream: "POSE_LANDMARKS:pose_landmarks" + +# Left hand landmarks. (NormalizedLandmarkList) +output_stream: "LEFT_HAND_LANDMARKS:left_hand_landmarks" +# RIght hand landmarks. (NormalizedLandmarkList) +output_stream: "RIGHT_HAND_LANDMARKS:right_hand_landmarks" + +# Debug outputs. +output_stream: "LEFT_HAND_ROI_FROM_POSE:left_hand_roi_from_pose" +output_stream: "LEFT_HAND_ROI_FROM_RECROP:left_hand_roi_from_recrop" +output_stream: "LEFT_HAND_TRACKING_ROI:left_hand_tracking_roi" +output_stream: "RIGHT_HAND_ROI_FROM_POSE:right_hand_roi_from_pose" +output_stream: "RIGHT_HAND_ROI_FROM_RECROP:right_hand_roi_from_recrop" +output_stream: "RIGHT_HAND_TRACKING_ROI:right_hand_tracking_roi" + +# Extracts left-hand-related landmarks from the pose landmarks. +node { + calculator: "SplitNormalizedLandmarkListCalculator" + input_stream: "pose_landmarks" + output_stream: "left_hand_landmarks_from_pose" + options: { + [mediapipe.SplitVectorCalculatorOptions.ext] { + ranges: { begin: 15 end: 16 } + ranges: { begin: 17 end: 18 } + ranges: { begin: 19 end: 20 } + combine_outputs: true + } + } +} + +# Predicts left hand landmarks. +node { + calculator: "HandLandmarksFromPoseGpu" + input_stream: "IMAGE:input_video" + input_stream: "HAND_LANDMARKS_FROM_POSE:left_hand_landmarks_from_pose" + output_stream: "HAND_LANDMARKS:left_hand_landmarks" + # Debug outputs. + output_stream: "HAND_ROI_FROM_POSE:left_hand_roi_from_pose" + output_stream: "HAND_ROI_FROM_RECROP:left_hand_roi_from_recrop" + output_stream: "HAND_TRACKING_ROI:left_hand_tracking_roi" +} + +# Extracts right-hand-related landmarks from the pose landmarks. +node { + calculator: "SplitNormalizedLandmarkListCalculator" + input_stream: "pose_landmarks" + output_stream: "right_hand_landmarks_from_pose" + options: { + [mediapipe.SplitVectorCalculatorOptions.ext] { + ranges: { begin: 16 end: 17 } + ranges: { begin: 18 end: 19 } + ranges: { begin: 20 end: 21 } + combine_outputs: true + } + } +} + +# Extracts right-hand-related landmarks from the pose landmarks. +node { + calculator: "HandLandmarksFromPoseGpu" + input_stream: "IMAGE:input_video" + input_stream: "HAND_LANDMARKS_FROM_POSE:right_hand_landmarks_from_pose" + output_stream: "HAND_LANDMARKS:right_hand_landmarks" + # Debug outputs. + output_stream: "HAND_ROI_FROM_POSE:right_hand_roi_from_pose" + output_stream: "HAND_ROI_FROM_RECROP:right_hand_roi_from_recrop" + output_stream: "HAND_TRACKING_ROI:right_hand_tracking_roi" +} diff --git a/mediapipe/modules/holistic_landmark/hand_landmarks_to_roi.pbtxt b/mediapipe/modules/holistic_landmark/hand_landmarks_to_roi.pbtxt new file mode 100644 index 0000000..b874c1d --- /dev/null +++ b/mediapipe/modules/holistic_landmark/hand_landmarks_to_roi.pbtxt @@ -0,0 +1,57 @@ +# Converts hand landmarks to ROI. + +type: "HandLandmarksToRoi" + +# Hand landmarks. (NormalizedLandmarkList) +input_stream: "LANDMARKS:hand_landmarks" +# Image size (width & height). (std::pair) +input_stream: "IMAGE_SIZE:image_size" + +# ROI according to the hand landmarks. (NormalizedRect) +output_stream: "ROI:roi" + +# Gets hand palm landmarks. +node { + calculator: "SplitNormalizedLandmarkListCalculator" + input_stream: "hand_landmarks" + output_stream: "palm_landmarks" + options: { + [mediapipe.SplitVectorCalculatorOptions.ext] { + ranges: { begin: 0 end: 4 } + ranges: { begin: 5 end: 7 } + ranges: { begin: 9 end: 11 } + ranges: { begin: 13 end: 15 } + ranges: { begin: 17 end: 19 } + combine_outputs: true + } + } +} + +# Converts the hand landmarks into a rectangle (normalized by image size) +# that encloses the hand. The calculator uses a subset of all hand landmarks +# extracted from SplitNormalizedLandmarkListCalculator above to +# calculate the bounding box and the rotation of the output rectangle. Please +# see the comments in the calculator for more detail. +node { + calculator: "HandLandmarksToRectCalculator" + input_stream: "NORM_LANDMARKS:palm_landmarks" + input_stream: "IMAGE_SIZE:image_size" + output_stream: "NORM_RECT:palm_landmarks_rect" +} + +# Expands the hand rectangle so that it's likely to contain the hand even with +# some motion. +node { + calculator: "RectTransformationCalculator" + input_stream: "NORM_RECT:palm_landmarks_rect" + input_stream: "IMAGE_SIZE:image_size" + output_stream: "roi" + options: { + [mediapipe.RectTransformationCalculatorOptions.ext] { + scale_x: 2.0 + scale_y: 2.0 + shift_y: -0.1 + square_long: true + } + } +} diff --git a/mediapipe/modules/holistic_landmark/hand_recrop.tflite b/mediapipe/modules/holistic_landmark/hand_recrop.tflite new file mode 100755 index 0000000..dcfd276 Binary files /dev/null and b/mediapipe/modules/holistic_landmark/hand_recrop.tflite differ diff --git a/mediapipe/modules/holistic_landmark/hand_recrop_by_roi_cpu.pbtxt b/mediapipe/modules/holistic_landmark/hand_recrop_by_roi_cpu.pbtxt new file mode 100644 index 0000000..75141d2 --- /dev/null +++ b/mediapipe/modules/holistic_landmark/hand_recrop_by_roi_cpu.pbtxt @@ -0,0 +1,137 @@ +# Predicts more accurate hand location (re-crop ROI) within a given ROI. + +type: "HandRecropByRoiCpu" + +# CPU image. (ImageFrame) +input_stream: "IMAGE:input_video" +# ROI (region of interest) within the given image where a palm/hand is located. +# (NormalizedRect) +input_stream: "ROI:roi" + +# Refined (more accurate) ROI to use for hand landmark prediction. +# (NormalizedRect) +output_stream: "HAND_ROI_FROM_RECROP:hand_roi_from_recrop_refined" + +# Transforms hand ROI from the input image to a 256x256 tensor. Preserves aspect +# ratio, which results in a letterbox padding. +node { + calculator: "ImageToTensorCalculator" + input_stream: "IMAGE:input_video" + input_stream: "NORM_RECT:roi" + output_stream: "TENSORS:initial_crop_tensor" + output_stream: "LETTERBOX_PADDING:letterbox_padding" + options: { + [mediapipe.ImageToTensorCalculatorOptions.ext] { + output_tensor_width: 256 + output_tensor_height: 256 + keep_aspect_ratio: true + output_tensor_float_range { + min: 0.0 + max: 1.0 + } + # For OpenGL origin should be at the top left corner. + gpu_origin: TOP_LEFT, + } + } +} + +# Predicts hand re-crop rectangle. +node { + calculator: "InferenceCalculator" + input_stream: "TENSORS:initial_crop_tensor" + output_stream: "TENSORS:landmark_tensors" + options: { + [mediapipe.InferenceCalculatorOptions.ext] { + model_path: "mediapipe/modules/holistic_landmark/hand_recrop.tflite" + delegate { xnnpack {} } + } + } +} + +# Decodes the landmark tensors into a vector of landmarks, where the landmark +# coordinates are normalized by the size of the input image to the model. Two +# landmarks represent two virtual points: crop and scale of the new crop. +node { + calculator: "TensorsToLandmarksCalculator" + input_stream: "TENSORS:landmark_tensors" + output_stream: "NORM_LANDMARKS:landmarks" + options: { + [mediapipe.TensorsToLandmarksCalculatorOptions.ext] { + num_landmarks: 2 + input_image_width: 256 + input_image_height: 256 + } + } +} + +# Adjusts landmarks (already normalized to [0.f, 1.f]) on the letterboxed hand +# image (after image transformation with the FIT scale mode) to the +# corresponding locations on the same image with the letterbox removed (hand +# image before image transformation). +node { + calculator: "LandmarkLetterboxRemovalCalculator" + input_stream: "LANDMARKS:landmarks" + input_stream: "LETTERBOX_PADDING:letterbox_padding" + output_stream: "LANDMARKS:scaled_landmarks" +} + +# Projects the landmarks from the cropped hand image to the corresponding +# locations on the full image before cropping (input to the graph). +node { + calculator: "LandmarkProjectionCalculator" + input_stream: "NORM_LANDMARKS:scaled_landmarks" + input_stream: "NORM_RECT:roi" + output_stream: "NORM_LANDMARKS:alignment_landmarks" +} + +# Converts hand landmarks to a detection that tightly encloses all landmarks. +node { + calculator: "LandmarksToDetectionCalculator" + input_stream: "NORM_LANDMARKS:alignment_landmarks" + output_stream: "DETECTION:hand_detection" +} + +# Extracts image size from the input images. +node { + calculator: "ImagePropertiesCalculator" + input_stream: "IMAGE:input_video" + output_stream: "SIZE:image_size" +} + +# Converts hand detection into a rectangle based on center and scale alignment +# points. +node { + calculator: "AlignmentPointsRectsCalculator" + input_stream: "DETECTION:hand_detection" + input_stream: "IMAGE_SIZE:image_size" + output_stream: "NORM_RECT:hand_roi_from_recrop" + options: { + [mediapipe.DetectionsToRectsCalculatorOptions.ext] { + rotation_vector_start_keypoint_index: 0 + rotation_vector_end_keypoint_index: 1 + rotation_vector_target_angle_degrees: -90 + } + } +} + +# TODO: revise hand recrop roi calculation. +# Slighly moves hand re-crop rectangle from wrist towards fingertips. Due to the +# new hand cropping logic, crop border is to close to finger tips while a lot of +# space is below the wrist. And when moving hand up fast (with fingers pointing +# up) and using hand rect from the previous frame for tracking - fingertips can +# be cropped. This adjustment partially solves it, but hand cropping logic +# should be reviewed. +node { + calculator: "RectTransformationCalculator" + input_stream: "NORM_RECT:hand_roi_from_recrop" + input_stream: "IMAGE_SIZE:image_size" + output_stream: "hand_roi_from_recrop_refined" + options: { + [mediapipe.RectTransformationCalculatorOptions.ext] { + scale_x: 1.0 + scale_y: 1.0 + shift_y: -0.1 + square_long: true + } + } +} diff --git a/mediapipe/modules/holistic_landmark/hand_recrop_by_roi_gpu.pbtxt b/mediapipe/modules/holistic_landmark/hand_recrop_by_roi_gpu.pbtxt new file mode 100644 index 0000000..4fa8f29 --- /dev/null +++ b/mediapipe/modules/holistic_landmark/hand_recrop_by_roi_gpu.pbtxt @@ -0,0 +1,136 @@ +# Predicts more accurate hand location (re-crop ROI) within a given ROI. + +type: "HandRecropByRoiGpu" + +# GPU image. (ImageFrame) +input_stream: "IMAGE:input_video" +# ROI (region of interest) within the given image where a palm/hand is located. +# (NormalizedRect) +input_stream: "ROI:roi" + +# Refined (more accurate) ROI to use for hand landmark prediction. +# (NormalizedRect) +output_stream: "HAND_ROI_FROM_RECROP:hand_roi_from_recrop_refined" + +# Transforms hand ROI from the input image to a 256x256 tensor. Preserves aspect +# ratio, which results in a letterbox padding. +node { + calculator: "ImageToTensorCalculator" + input_stream: "IMAGE_GPU:input_video" + input_stream: "NORM_RECT:roi" + output_stream: "TENSORS:initial_crop_tensor" + output_stream: "LETTERBOX_PADDING:letterbox_padding" + options: { + [mediapipe.ImageToTensorCalculatorOptions.ext] { + output_tensor_width: 256 + output_tensor_height: 256 + keep_aspect_ratio: true + output_tensor_float_range { + min: 0.0 + max: 1.0 + } + # For OpenGL origin should be at the top left corner. + gpu_origin: TOP_LEFT, + } + } +} + +# Predicts hand re-crop rectangle. +node { + calculator: "InferenceCalculator" + input_stream: "TENSORS:initial_crop_tensor" + output_stream: "TENSORS:landmark_tensors" + options: { + [mediapipe.InferenceCalculatorOptions.ext] { + model_path: "mediapipe/modules/holistic_landmark/hand_recrop.tflite" + } + } +} + +# Decodes the landmark tensors into a vector of landmarks, where the landmark +# coordinates are normalized by the size of the input image to the model. Two +# landmarks represent two virtual points: crop and scale of the new crop. +node { + calculator: "TensorsToLandmarksCalculator" + input_stream: "TENSORS:landmark_tensors" + output_stream: "NORM_LANDMARKS:landmarks" + options: { + [mediapipe.TensorsToLandmarksCalculatorOptions.ext] { + num_landmarks: 2 + input_image_width: 256 + input_image_height: 256 + } + } +} + +# Adjusts landmarks (already normalized to [0.f, 1.f]) on the letterboxed hand +# image (after image transformation with the FIT scale mode) to the +# corresponding locations on the same image with the letterbox removed (hand +# image before image transformation). +node { + calculator: "LandmarkLetterboxRemovalCalculator" + input_stream: "LANDMARKS:landmarks" + input_stream: "LETTERBOX_PADDING:letterbox_padding" + output_stream: "LANDMARKS:scaled_landmarks" +} + +# Projects the landmarks from the cropped hand image to the corresponding +# locations on the full image before cropping (input to the graph). +node { + calculator: "LandmarkProjectionCalculator" + input_stream: "NORM_LANDMARKS:scaled_landmarks" + input_stream: "NORM_RECT:roi" + output_stream: "NORM_LANDMARKS:alignment_landmarks" +} + +# Converts hand landmarks to a detection that tightly encloses all landmarks. +node { + calculator: "LandmarksToDetectionCalculator" + input_stream: "NORM_LANDMARKS:alignment_landmarks" + output_stream: "DETECTION:hand_detection" +} + +# Extracts image size from the input images. +node { + calculator: "ImagePropertiesCalculator" + input_stream: "IMAGE_GPU:input_video" + output_stream: "SIZE:image_size" +} + +# Converts hand detection into a rectangle based on center and scale alignment +# points. +node { + calculator: "AlignmentPointsRectsCalculator" + input_stream: "DETECTION:hand_detection" + input_stream: "IMAGE_SIZE:image_size" + output_stream: "NORM_RECT:hand_roi_from_recrop" + options: { + [mediapipe.DetectionsToRectsCalculatorOptions.ext] { + rotation_vector_start_keypoint_index: 0 + rotation_vector_end_keypoint_index: 1 + rotation_vector_target_angle_degrees: -90 + } + } +} + +# TODO: revise hand recrop roi calculation. +# Slighly moves hand re-crop rectangle from wrist towards fingertips. Due to the +# new hand cropping logic, crop border is to close to finger tips while a lot of +# space is below the wrist. And when moving hand up fast (with fingers pointing +# up) and using hand rect from the previous frame for tracking - fingertips can +# be cropped. This adjustment partially solves it, but hand cropping logic +# should be reviewed. +node { + calculator: "RectTransformationCalculator" + input_stream: "NORM_RECT:hand_roi_from_recrop" + input_stream: "IMAGE_SIZE:image_size" + output_stream: "hand_roi_from_recrop_refined" + options: { + [mediapipe.RectTransformationCalculatorOptions.ext] { + scale_x: 1.0 + scale_y: 1.0 + shift_y: -0.1 + square_long: true + } + } +} diff --git a/mediapipe/modules/holistic_landmark/hand_tracking.pbtxt b/mediapipe/modules/holistic_landmark/hand_tracking.pbtxt new file mode 100644 index 0000000..07f734e --- /dev/null +++ b/mediapipe/modules/holistic_landmark/hand_tracking.pbtxt @@ -0,0 +1,63 @@ +# Decides what ROI to use for hand landmark prediction: either previous frame +# landmarks ROI or current frame re-crop ROI. + +type: "HandTracking" + +# Hand landmarks from the current frame. They will be memorized for tracking on +# the next frame. (NormalizedLandmarkList) +input_stream: "LANDMARKS:hand_landmarks" +# Hand re-crop ROI from the current frame. (NormalizedRect) +input_stream: "HAND_ROI_FROM_RECROP:hand_roi_from_recrop" +# Image size (width & height). (std::pair) +input_stream: "IMAGE_SIZE:image_size" + +# Hand tracking ROI. Which is either hand landmarks ROI from the previous frame +# if hand is still tracked, or hand re-crop ROI from the current frame +# othervise. (NormalizedRect) +output_stream: "HAND_TRACKING_ROI:hand_tracking_roi" + +# Keeps track of hand landmarks from the previous frame. +node { + calculator: "PreviousLoopbackCalculator" + # TODO: check that loop works with image size instead of video. + input_stream: "MAIN:image_size" + input_stream: "LOOP:hand_landmarks" + input_stream_info: { + tag_index: "LOOP" + back_edge: true + } + output_stream: "PREV_LOOP:prev_hand_landmarks" +} + +# Gets hand landarmsk rect. +node { + calculator: "HandLandmarksToRoi" + input_stream: "LANDMARKS:prev_hand_landmarks" + input_stream: "IMAGE_SIZE:image_size" + output_stream: "ROI:prev_hand_landmarks_roi" +} + +# Checks that all requirements for tracking are satisfied and use hand rectangle +# from the previous frame in that case. Otherwise - use hand re-crop rectangle +# from the current frame. +node { + calculator: "RoiTrackingCalculator" + input_stream: "PREV_LANDMARKS:prev_hand_landmarks" + input_stream: "PREV_LANDMARKS_RECT:prev_hand_landmarks_roi" + input_stream: "RECROP_RECT:hand_roi_from_recrop" + input_stream: "IMAGE_SIZE:image_size" + output_stream: "TRACKING_RECT:hand_tracking_roi" + options: { + [mediapipe.RoiTrackingCalculatorOptions.ext] { + rect_requirements: { + rotation_degrees: 40.0 + translation: 0.2 + # TODO: adjust scale for hand tracking. + scale: 0.4 + } + landmarks_requirements: { + recrop_rect_margin: -0.1 + } + } + } +} diff --git a/mediapipe/modules/holistic_landmark/hand_visibility_from_hand_landmarks_from_pose.pbtxt b/mediapipe/modules/holistic_landmark/hand_visibility_from_hand_landmarks_from_pose.pbtxt new file mode 100644 index 0000000..02db672 --- /dev/null +++ b/mediapipe/modules/holistic_landmark/hand_visibility_from_hand_landmarks_from_pose.pbtxt @@ -0,0 +1,44 @@ +# Determines hand visibility from the visibility prediction values in the +# hand-related pose landmarks. + +type: "HandVisibilityFromHandLandmarksFromPose" + +# Hand-related pose landmarks in [wrist, pinky, index] order. +# (NormalizedLandmarkList) +input_stream: "HAND_LANDMARKS_FROM_POSE:hand_landmarks_from_pose" + +# Hand visibility to be used as a trigger for hand landmark prediction. (bool) +output_stream: "VISIBILITY:wrist_visibility" + +# Gets pose wrist landmark. +node { + calculator: "SplitNormalizedLandmarkListCalculator" + input_stream: "hand_landmarks_from_pose" + output_stream: "pose_wrist_landmark" + options: { + [mediapipe.SplitVectorCalculatorOptions.ext] { + ranges: { begin: 0 end: 1 } + } + } +} + +# TODO: Use other than pose wrist palm landmarks. +# Gets pose wrist visiblity. +node { + calculator: "LandmarkVisibilityCalculator" + input_stream: "NORM_LANDMARKS:pose_wrist_landmark" + output_stream: "VISIBILITY:wrist_visibility_score" +} + +# TODO: ensure the same threshold in rendering. +# Converts pose wrist visibility score into boolean flag. +node { + calculator: "ThresholdingCalculator" + input_stream: "FLOAT:wrist_visibility_score" + output_stream: "FLAG:wrist_visibility" + options: { + [mediapipe.ThresholdingCalculatorOptions.ext] { + threshold: 0.1 + } + } +} diff --git a/mediapipe/modules/holistic_landmark/hand_wrist_for_pose.pbtxt b/mediapipe/modules/holistic_landmark/hand_wrist_for_pose.pbtxt new file mode 100644 index 0000000..f6551bb --- /dev/null +++ b/mediapipe/modules/holistic_landmark/hand_wrist_for_pose.pbtxt @@ -0,0 +1,52 @@ +# Extracts hand wrist landmark to be used instead of pose wrist landmark. + +type: "HandWristForPose" + +# Hand landmarks to take wrist landmark from. (NormalizedLandmarkList) +input_stream: "HAND_LANDMARKS:hand_landmarks" + +# Hand wrist landmark to replace original pose wrist landmark with updated +# visibility. (NormalizedLandmarkList) +output_stream: "WRIST_LANDMARK:hand_wrist_landmark_with_visibility" + +# Side packet with constant for visibility score. As score is `x` from +# `sigmoid(x)` we pick some big value that doesn't affect pose landmarks +# visibility rendering threshold. +node { + calculator: "ConstantSidePacketCalculator" + output_side_packet: "PACKET:0:visible_score_side_packet" + options: { + [mediapipe.ConstantSidePacketCalculatorOptions.ext]: { + packet { float_value: 100.0 } + } + } +} + +# Converts side packet with visibility score to a stream. +node { + calculator: "SidePacketToStreamCalculator" + input_stream: "TICK:hand_landmarks" + input_side_packet: "visible_score_side_packet" + output_stream: "AT_TICK:visible_score" +} + +# Extracts wrist landmark from the hand landmarks. +node { + calculator: "SplitNormalizedLandmarkListCalculator" + input_stream: "hand_landmarks" + output_stream: "hand_wrist_landmark" + options: { + [mediapipe.SplitVectorCalculatorOptions.ext] { + ranges: { begin: 0 end: 1 } + } + } +} + +# Sets wrist landmark visibility score. If HAND_LANDMARKS is non-empty - wrist +# will always be visible. +node { + calculator: "SetLandmarkVisibilityCalculator" + input_stream: "NORM_LANDMARKS:hand_wrist_landmark" + input_stream: "VISIBILITY:visible_score" + output_stream: "NORM_LANDMARKS:hand_wrist_landmark_with_visibility" +} diff --git a/mediapipe/modules/holistic_landmark/holistic_landmark_cpu.pbtxt b/mediapipe/modules/holistic_landmark/holistic_landmark_cpu.pbtxt new file mode 100644 index 0000000..ce86d1d --- /dev/null +++ b/mediapipe/modules/holistic_landmark/holistic_landmark_cpu.pbtxt @@ -0,0 +1,146 @@ +# Predicts pose + left/right hand + face landmarks. +# +# It is required that: +# - "face_detection_short_range.tflite" is available at +# "mediapipe/modules/face_detection/face_detection_short_range.tflite" +# +# - "face_landmark.tflite" is available at +# "mediapipe/modules/face_landmark/face_landmark.tflite" +# +# - "hand_landmark_full.tflite" is available at +# "mediapipe/modules/hand_landmark/hand_landmark_full.tflite" +# +# - "hand_recrop.tflite" is available at +# "mediapipe/modules/holistic_landmark/hand_recrop.tflite" +# +# - "handedness.txt" is available at +# "mediapipe/modules/hand_landmark/handedness.txt" +# +# - "pose_detection.tflite" is available at +# "mediapipe/modules/pose_detection/pose_detection.tflite" +# +# - "pose_landmark_lite.tflite" or "pose_landmark_full.tflite" or +# "pose_landmark_heavy.tflite" is available at +# "mediapipe/modules/pose_landmark/pose_landmark_lite.tflite" or +# "mediapipe/modules/pose_landmark/pose_landmark_full.tflite" or +# "mediapipe/modules/pose_landmark/pose_landmark_heavy.tflite" +# path respectively during execution, depending on the specification in the +# MODEL_COMPLEXITY input side packet. +# +# EXAMPLE: +# node { +# calculator: "HolisticLandmarkCpu" +# input_stream: "IMAGE:input_video" +# input_side_packet: "MODEL_COMPLEXITY:model_complexity" +# input_side_packet: "SMOOTH_LANDMARKS:smooth_landmarks" +# input_side_packet: "ENABLE_SEGMENTATION:enable_segmentation" +# input_side_packet: "SMOOTH_SEGMENTATION:smooth_segmentation" +# input_side_packet: "REFINE_FACE_LANDMARKS:refine_face_landmarks" +# input_side_packet: "USE_PREV_LANDMARKS:use_prev_landmarks" +# output_stream: "POSE_LANDMARKS:pose_landmarks" +# output_stream: "FACE_LANDMARKS:face_landmarks" +# output_stream: "LEFT_HAND_LANDMARKS:left_hand_landmarks" +# output_stream: "RIGHT_HAND_LANDMARKS:right_hand_landmarks" +# } +# +# NOTE: if a pose/hand/face output is not present in the image, for this +# particular timestamp there will not be an output packet in the corresponding +# output stream below. However, the MediaPipe framework will internally inform +# the downstream calculators of the absence of this packet so that they don't +# wait for it unnecessarily. + +type: "HolisticLandmarkCpu" + +# CPU image. (ImageFrame) +input_stream: "IMAGE:image" + +# Complexity of the pose landmark model: 0, 1 or 2. Landmark accuracy as well as +# inference latency generally go up with the model complexity. If unspecified, +# functions as set to 1. (int) +input_side_packet: "MODEL_COMPLEXITY:model_complexity" + +# Whether to filter landmarks across different input images to reduce jitter. +# If unspecified, functions as set to true. (bool) +input_side_packet: "SMOOTH_LANDMARKS:smooth_landmarks" + +# Whether to predict the segmentation mask. If unspecified, functions as set to +# false. (bool) +input_side_packet: "ENABLE_SEGMENTATION:enable_segmentation" + +# Whether to filter segmentation mask across different input images to reduce +# jitter. If unspecified, functions as set to true. (bool) +input_side_packet: "SMOOTH_SEGMENTATION:smooth_segmentation" + +# Whether to run the face landmark model with attention on lips and eyes to +# provide more accuracy, and additionally output iris landmarks. If unspecified, +# functions as set to false. (bool) +input_side_packet: "REFINE_FACE_LANDMARKS:refine_face_landmarks" + +# Whether landmarks on the previous image should be used to help localize +# landmarks on the current image. (bool) +input_side_packet: "USE_PREV_LANDMARKS:use_prev_landmarks" + +# Pose landmarks. (NormalizedLandmarkList) +# 33 pose landmarks. +output_stream: "POSE_LANDMARKS:pose_landmarks" +# 33 pose world landmarks. (LandmarkList) +output_stream: "WORLD_LANDMARKS:pose_world_landmarks" +# 21 left hand landmarks. (NormalizedLandmarkList) +output_stream: "LEFT_HAND_LANDMARKS:left_hand_landmarks" +# 21 right hand landmarks. (NormalizedLandmarkList) +output_stream: "RIGHT_HAND_LANDMARKS:right_hand_landmarks" +# 468 face landmarks. (NormalizedLandmarkList) +output_stream: "FACE_LANDMARKS:face_landmarks" + +# Segmentation mask. (ImageFrame in ImageFormat::VEC32F1) +output_stream: "SEGMENTATION_MASK:segmentation_mask" + +# Debug outputs +output_stream: "POSE_ROI:pose_landmarks_roi" +output_stream: "POSE_DETECTION:pose_detection" + +# Predicts pose landmarks. +node { + calculator: "PoseLandmarkCpu" + input_stream: "IMAGE:image" + input_side_packet: "MODEL_COMPLEXITY:model_complexity" + input_side_packet: "SMOOTH_LANDMARKS:smooth_landmarks" + input_side_packet: "ENABLE_SEGMENTATION:enable_segmentation" + input_side_packet: "SMOOTH_SEGMENTATION:smooth_segmentation" + input_side_packet: "USE_PREV_LANDMARKS:use_prev_landmarks" + output_stream: "LANDMARKS:pose_landmarks" + output_stream: "WORLD_LANDMARKS:pose_world_landmarks" + output_stream: "SEGMENTATION_MASK:segmentation_mask" + output_stream: "ROI_FROM_LANDMARKS:pose_landmarks_roi" + output_stream: "DETECTION:pose_detection" +} + +# Predicts left and right hand landmarks based on the initial pose landmarks. +node { + calculator: "HandLandmarksLeftAndRightCpu" + input_stream: "IMAGE:image" + input_stream: "POSE_LANDMARKS:pose_landmarks" + output_stream: "LEFT_HAND_LANDMARKS:left_hand_landmarks" + output_stream: "RIGHT_HAND_LANDMARKS:right_hand_landmarks" +} + +# Extracts face-related pose landmarks. +node { + calculator: "SplitNormalizedLandmarkListCalculator" + input_stream: "pose_landmarks" + output_stream: "face_landmarks_from_pose" + options: { + [mediapipe.SplitVectorCalculatorOptions.ext] { + ranges: { begin: 0 end: 11 } + } + } +} + +# Predicts face landmarks based on the initial pose landmarks. +node { + calculator: "FaceLandmarksFromPoseCpu" + input_stream: "IMAGE:image" + input_stream: "FACE_LANDMARKS_FROM_POSE:face_landmarks_from_pose" + input_side_packet: "REFINE_LANDMARKS:refine_face_landmarks" + output_stream: "FACE_LANDMARKS:face_landmarks" +} diff --git a/mediapipe/modules/holistic_landmark/holistic_landmark_gpu.pbtxt b/mediapipe/modules/holistic_landmark/holistic_landmark_gpu.pbtxt new file mode 100644 index 0000000..33ed880 --- /dev/null +++ b/mediapipe/modules/holistic_landmark/holistic_landmark_gpu.pbtxt @@ -0,0 +1,146 @@ +# Predicts pose + left/right hand + face landmarks. +# +# It is required that: +# - "face_detection_short_range.tflite" is available at +# "mediapipe/modules/face_detection/face_detection_short_range.tflite" +# +# - "face_landmark.tflite" is available at +# "mediapipe/modules/face_landmark/face_landmark.tflite" +# +# - "hand_landmark_full.tflite" is available at +# "mediapipe/modules/hand_landmark/hand_landmark_full.tflite" +# +# - "hand_recrop.tflite" is available at +# "mediapipe/modules/holistic_landmark/hand_recrop.tflite" +# +# - "handedness.txt" is available at +# "mediapipe/modules/hand_landmark/handedness.txt" +# +# - "pose_detection.tflite" is available at +# "mediapipe/modules/pose_detection/pose_detection.tflite" +# +# - "pose_landmark_lite.tflite" or "pose_landmark_full.tflite" or +# "pose_landmark_heavy.tflite" is available at +# "mediapipe/modules/pose_landmark/pose_landmark_lite.tflite" or +# "mediapipe/modules/pose_landmark/pose_landmark_full.tflite" or +# "mediapipe/modules/pose_landmark/pose_landmark_heavy.tflite" +# path respectively during execution, depending on the specification in the +# MODEL_COMPLEXITY input side packet. +# +# EXAMPLE: +# node { +# calculator: "HolisticLandmarkGpu" +# input_stream: "IMAGE:input_video" +# input_side_packet: "MODEL_COMPLEXITY:model_complexity" +# input_side_packet: "SMOOTH_LANDMARKS:smooth_landmarks" +# input_side_packet: "ENABLE_SEGMENTATION:enable_segmentation" +# input_side_packet: "SMOOTH_SEGMENTATION:smooth_segmentation" +# input_side_packet: "REFINE_FACE_LANDMARKS:refine_face_landmarks" +# input_side_packet: "USE_PREV_LANDMARKS:use_prev_landmarks" +# output_stream: "POSE_LANDMARKS:pose_landmarks" +# output_stream: "FACE_LANDMARKS:face_landmarks" +# output_stream: "LEFT_HAND_LANDMARKS:left_hand_landmarks" +# output_stream: "RIGHT_HAND_LANDMARKS:right_hand_landmarks" +# } +# +# NOTE: if a pose/hand/face output is not present in the image, for this +# particular timestamp there will not be an output packet in the corresponding +# output stream below. However, the MediaPipe framework will internally inform +# the downstream calculators of the absence of this packet so that they don't +# wait for it unnecessarily. + +type: "HolisticLandmarkGpu" + +# GPU image. (GpuBuffer) +input_stream: "IMAGE:image" + +# Complexity of the pose landmark model: 0, 1 or 2. Landmark accuracy as well as +# inference latency generally go up with the model complexity. If unspecified, +# functions as set to 1. (int) +input_side_packet: "MODEL_COMPLEXITY:model_complexity" + +# Whether to filter landmarks across different input images to reduce jitter. +# If unspecified, functions as set to true. (bool) +input_side_packet: "SMOOTH_LANDMARKS:smooth_landmarks" + +# Whether to predict the segmentation mask. If unspecified, functions as set to +# false. (bool) +input_side_packet: "ENABLE_SEGMENTATION:enable_segmentation" + +# Whether to filter segmentation mask across different input images to reduce +# jitter. If unspecified, functions as set to true. (bool) +input_side_packet: "SMOOTH_SEGMENTATION:smooth_segmentation" + +# Whether to run the face landmark model with attention on lips and eyes to +# provide more accuracy, and additionally output iris landmarks. If unspecified, +# functions as set to false. (bool) +input_side_packet: "REFINE_FACE_LANDMARKS:refine_face_landmarks" + +# Whether landmarks on the previous image should be used to help localize +# landmarks on the current image. (bool) +input_side_packet: "USE_PREV_LANDMARKS:use_prev_landmarks" + +# Pose landmarks. (NormalizedLandmarkList) +# 33 pose landmarks. +output_stream: "POSE_LANDMARKS:pose_landmarks" +# 33 pose world landmarks. (LandmarkList) +output_stream: "WORLD_LANDMARKS:pose_world_landmarks" +# 21 left hand landmarks. (NormalizedLandmarkList) +output_stream: "LEFT_HAND_LANDMARKS:left_hand_landmarks" +# 21 right hand landmarks. (NormalizedLandmarkList) +output_stream: "RIGHT_HAND_LANDMARKS:right_hand_landmarks" +# 468 face landmarks. (NormalizedLandmarkList) +output_stream: "FACE_LANDMARKS:face_landmarks" + +# Segmentation mask. (GpuBuffer in RGBA, with the same mask values in R and A) +output_stream: "SEGMENTATION_MASK:segmentation_mask" + +# Debug outputs +output_stream: "POSE_ROI:pose_landmarks_roi" +output_stream: "POSE_DETECTION:pose_detection" + +# Predicts pose landmarks. +node { + calculator: "PoseLandmarkGpu" + input_stream: "IMAGE:image" + input_side_packet: "MODEL_COMPLEXITY:model_complexity" + input_side_packet: "SMOOTH_LANDMARKS:smooth_landmarks" + input_side_packet: "ENABLE_SEGMENTATION:enable_segmentation" + input_side_packet: "SMOOTH_SEGMENTATION:smooth_segmentation" + input_side_packet: "USE_PREV_LANDMARKS:use_prev_landmarks" + output_stream: "LANDMARKS:pose_landmarks" + output_stream: "WORLD_LANDMARKS:pose_world_landmarks" + output_stream: "SEGMENTATION_MASK:segmentation_mask" + output_stream: "ROI_FROM_LANDMARKS:pose_landmarks_roi" + output_stream: "DETECTION:pose_detection" +} + +# Predicts left and right hand landmarks based on the initial pose landmarks. +node { + calculator: "HandLandmarksLeftAndRightGpu" + input_stream: "IMAGE:image" + input_stream: "POSE_LANDMARKS:pose_landmarks" + output_stream: "LEFT_HAND_LANDMARKS:left_hand_landmarks" + output_stream: "RIGHT_HAND_LANDMARKS:right_hand_landmarks" +} + +# Extracts face-related pose landmarks. +node { + calculator: "SplitNormalizedLandmarkListCalculator" + input_stream: "pose_landmarks" + output_stream: "face_landmarks_from_pose" + options: { + [mediapipe.SplitVectorCalculatorOptions.ext] { + ranges: { begin: 0 end: 11 } + } + } +} + +# Predicts face landmarks based on the initial pose landmarks. +node { + calculator: "FaceLandmarksFromPoseGpu" + input_stream: "IMAGE:image" + input_stream: "FACE_LANDMARKS_FROM_POSE:face_landmarks_from_pose" + input_side_packet: "REFINE_LANDMARKS:refine_face_landmarks" + output_stream: "FACE_LANDMARKS:face_landmarks" +} diff --git a/mediapipe/modules/iris_landmark/BUILD b/mediapipe/modules/iris_landmark/BUILD new file mode 100644 index 0000000..e16a79b --- /dev/null +++ b/mediapipe/modules/iris_landmark/BUILD @@ -0,0 +1,103 @@ +# Copyright 2020 The MediaPipe Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +load( + "//mediapipe/framework/tool:mediapipe_graph.bzl", + "mediapipe_simple_subgraph", +) + +licenses(["notice"]) + +package(default_visibility = ["//visibility:public"]) + +mediapipe_simple_subgraph( + name = "iris_landmark_cpu", + graph = "iris_landmark_cpu.pbtxt", + register_as = "IrisLandmarkCpu", + deps = [ + "//mediapipe/calculators/core:clip_vector_size_calculator", + "//mediapipe/calculators/core:split_vector_calculator", + "//mediapipe/calculators/image:image_cropping_calculator", + "//mediapipe/calculators/image:image_properties_calculator", + "//mediapipe/calculators/image:image_transformation_calculator", + "//mediapipe/calculators/tflite:tflite_converter_calculator", + "//mediapipe/calculators/tflite:tflite_inference_calculator", + "//mediapipe/calculators/tflite:tflite_tensors_to_floats_calculator", + "//mediapipe/calculators/tflite:tflite_tensors_to_landmarks_calculator", + "//mediapipe/calculators/util:landmark_letterbox_removal_calculator", + "//mediapipe/calculators/util:landmark_projection_calculator", + ], +) + +mediapipe_simple_subgraph( + name = "iris_landmark_gpu", + graph = "iris_landmark_gpu.pbtxt", + register_as = "IrisLandmarkGpu", + deps = [ + "//mediapipe/calculators/core:clip_vector_size_calculator", + "//mediapipe/calculators/core:split_vector_calculator", + "//mediapipe/calculators/image:image_cropping_calculator", + "//mediapipe/calculators/image:image_properties_calculator", + "//mediapipe/calculators/image:image_transformation_calculator", + "//mediapipe/calculators/tflite:tflite_converter_calculator", + "//mediapipe/calculators/tflite:tflite_inference_calculator", + "//mediapipe/calculators/tflite:tflite_tensors_to_floats_calculator", + "//mediapipe/calculators/tflite:tflite_tensors_to_landmarks_calculator", + "//mediapipe/calculators/util:landmark_letterbox_removal_calculator", + "//mediapipe/calculators/util:landmark_projection_calculator", + ], +) + +mediapipe_simple_subgraph( + name = "iris_landmark_left_and_right_gpu", + graph = "iris_landmark_left_and_right_gpu.pbtxt", + register_as = "IrisLandmarkLeftAndRightGpu", + deps = [ + ":iris_landmark_gpu", + ":iris_landmark_landmarks_to_roi", + "//mediapipe/calculators/core:constant_side_packet_calculator", + "//mediapipe/calculators/core:side_packet_to_stream_calculator", + "//mediapipe/calculators/image:image_properties_calculator", + ], +) + +mediapipe_simple_subgraph( + name = "iris_landmark_left_and_right_cpu", + graph = "iris_landmark_left_and_right_cpu.pbtxt", + register_as = "IrisLandmarkLeftAndRightCpu", + deps = [ + ":iris_landmark_cpu", + ":iris_landmark_landmarks_to_roi", + "//mediapipe/calculators/core:constant_side_packet_calculator", + "//mediapipe/calculators/core:side_packet_to_stream_calculator", + "//mediapipe/calculators/image:image_properties_calculator", + ], +) + +exports_files( + srcs = [ + "iris_landmark.tflite", + ], +) + +mediapipe_simple_subgraph( + name = "iris_landmark_landmarks_to_roi", + graph = "iris_landmark_landmarks_to_roi.pbtxt", + register_as = "IrisLandmarkLandmarksToRoi", + deps = [ + "//mediapipe/calculators/util:detections_to_rects_calculator", + "//mediapipe/calculators/util:landmarks_to_detection_calculator", + "//mediapipe/calculators/util:rect_transformation_calculator", + ], +) diff --git a/mediapipe/modules/iris_landmark/README.md b/mediapipe/modules/iris_landmark/README.md new file mode 100644 index 0000000..f99fcee --- /dev/null +++ b/mediapipe/modules/iris_landmark/README.md @@ -0,0 +1,8 @@ +# iris_landmark + +Subgraphs|Details +:--- | :--- +[`IrisLandmarkCpu`](https://github.com/google/mediapipe/tree/master/mediapipe/modules/iris_landmark/iris_landmark_cpu.pbtxt)| Detects iris landmarks for left or right eye. (CPU input, and inference is executed on CPU.) +[`IrisLandmarkGpu`](https://github.com/google/mediapipe/tree/master/mediapipe/modules/iris_landmark/iris_landmark_gpu.pbtxt)| Detects iris landmarks for left or right eye. (GPU input, and inference is executed on GPU) +[`IrisLandmarkLeftAndRightCpu`](https://github.com/google/mediapipe/tree/master/mediapipe/modules/iris_landmark/iris_landmark_left_and_right_cpu.pbtxt)| Detects iris landmarks for both left and right eyes. (CPU input, and inference is executed on CPU) +[`IrisLandmarkLeftAndRightGpu`](https://github.com/google/mediapipe/tree/master/mediapipe/modules/iris_landmark/iris_landmark_left_and_right_gpu.pbtxt)| Detects iris landmarks for both left and right eyes. (GPU input, and inference is executed on GPU.) diff --git a/mediapipe/modules/iris_landmark/iris_landmark.tflite b/mediapipe/modules/iris_landmark/iris_landmark.tflite new file mode 100755 index 0000000..974b910 Binary files /dev/null and b/mediapipe/modules/iris_landmark/iris_landmark.tflite differ diff --git a/mediapipe/modules/iris_landmark/iris_landmark_cpu.pbtxt b/mediapipe/modules/iris_landmark/iris_landmark_cpu.pbtxt new file mode 100644 index 0000000..f2c4b04 --- /dev/null +++ b/mediapipe/modules/iris_landmark/iris_landmark_cpu.pbtxt @@ -0,0 +1,156 @@ +# MediaPipe subgraph to calculate iris landmarks and eye contour landmarks for +# a single eye. (CPU input, and inference is executed on CPU.) +# +# It is required that "iris_landmark.tflite" is available at +# "mediapipe/modules/iris_landmark/iris_landmark.tflite" +# path during execution. +# +# EXAMPLE: +# node { +# calculator: "IrisLandmarkCpu" +# input_stream: "IMAGE:image" +# input_stream: "ROI:eye_roi" +# input_stream: "IS_RIGHT_EYE:is_right_eye" +# output_stream: "EYE_CONTOUR_LANDMARKS:eye_contour_landmarks" +# output_stream: "IRIS_LANDMARKS:iris_landmarks" +# } + +type: "IrisLandmarkCpu" + +# CPU image. (ImageFrame) +input_stream: "IMAGE:image" +# ROI (region of interest) within the given image where an eye is located. +# (NormalizedRect) +input_stream: "ROI:roi" +# Is right eye. (bool) +# (Model is trained to detect left eye landmarks only, hence for right eye, +# flipping is required to immitate left eye.) +input_stream: "IS_RIGHT_EYE:is_right_eye" + +# 71 refined normalized eye contour landmarks. (NormalizedLandmarkList) +output_stream: "EYE_CONTOUR_LANDMARKS:projected_eye_landmarks" +# 5 normalized iris landmarks. (NormalizedLandmarkList) +output_stream: "IRIS_LANDMARKS:projected_iris_landmarks" + +node { + calculator: "ImageCroppingCalculator" + input_stream: "IMAGE:image" + input_stream: "NORM_RECT:roi" + output_stream: "IMAGE:eye_image" + options: { + [mediapipe.ImageCroppingCalculatorOptions.ext] { + border_mode: BORDER_REPLICATE + } + } +} + +node { + calculator: "ImageTransformationCalculator" + input_stream: "IMAGE:eye_image" + input_stream: "FLIP_HORIZONTALLY:is_right_eye" + output_stream: "IMAGE:transformed_eye_image" + output_stream: "LETTERBOX_PADDING:eye_letterbox_padding" + options: { + [mediapipe.ImageTransformationCalculatorOptions.ext] { + output_width: 64 + output_height: 64 + scale_mode: FIT + } + } +} + +# Converts the transformed input image on CPU into an image tensor stored as a +# TfLiteTensor. +node { + calculator: "TfLiteConverterCalculator" + input_stream: "IMAGE:transformed_eye_image" + output_stream: "TENSORS:image_tensor" + options: { + [mediapipe.TfLiteConverterCalculatorOptions.ext] { + zero_center: false + } + } +} + +# Runs a TensorFlow Lite model on CPU that takes an image tensor and outputs a +# vector of tensors representing, for instance, detection boxes/keypoints and +# scores. +node { + calculator: "TfLiteInferenceCalculator" + input_stream: "TENSORS:image_tensor" + output_stream: "TENSORS:output_tensors" + options: { + [mediapipe.TfLiteInferenceCalculatorOptions.ext] { + model_path: "mediapipe/modules/iris_landmark/iris_landmark.tflite" + delegate { xnnpack {} } + } + } +} + +# Splits a vector of TFLite tensors to multiple vectors according to the ranges +# specified in option. +node { + calculator: "SplitTfLiteTensorVectorCalculator" + input_stream: "output_tensors" + output_stream: "eye_landmarks_tensor" + output_stream: "iris_landmarks_tensor" + options: { + [mediapipe.SplitVectorCalculatorOptions.ext] { + ranges: { begin: 0 end: 1 } + ranges: { begin: 1 end: 2 } + } + } +} + +# Decodes the landmark tensors into a vector of landmarks, where the landmark +# coordinates are normalized by the size of the input image to the model. +node { + calculator: "TfLiteTensorsToLandmarksCalculator" + input_stream: "TENSORS:iris_landmarks_tensor" + input_stream: "FLIP_HORIZONTALLY:is_right_eye" + output_stream: "NORM_LANDMARKS:iris_landmarks" + options: { + [mediapipe.TfLiteTensorsToLandmarksCalculatorOptions.ext] { + num_landmarks: 5 + input_image_width: 64 + input_image_height: 64 + } + } +} + +# Decodes the landmark tensors into a vector of landmarks, where the landmark +# coordinates are normalized by the size of the input image to the model. +node { + calculator: "TfLiteTensorsToLandmarksCalculator" + input_stream: "TENSORS:eye_landmarks_tensor" + input_stream: "FLIP_HORIZONTALLY:is_right_eye" + output_stream: "NORM_LANDMARKS:eye_landmarks" + options: { + [mediapipe.TfLiteTensorsToLandmarksCalculatorOptions.ext] { + num_landmarks: 71 + input_image_width: 64 + input_image_height: 64 + } + } +} + +node { + calculator: "LandmarkLetterboxRemovalCalculator" + input_stream: "LANDMARKS:0:iris_landmarks" + input_stream: "LANDMARKS:1:eye_landmarks" + input_stream: "LETTERBOX_PADDING:eye_letterbox_padding" + output_stream: "LANDMARKS:0:padded_iris_landmarks" + output_stream: "LANDMARKS:1:padded_eye_landmarks" +} + +# Projects the landmarks from the cropped face image to the corresponding +# locations on the full image before cropping (input to the graph). +node { + calculator: "LandmarkProjectionCalculator" + input_stream: "NORM_LANDMARKS:0:padded_iris_landmarks" + input_stream: "NORM_LANDMARKS:1:padded_eye_landmarks" + input_stream: "NORM_RECT:roi" + output_stream: "NORM_LANDMARKS:0:projected_iris_landmarks" + output_stream: "NORM_LANDMARKS:1:projected_eye_landmarks" +} + diff --git a/mediapipe/modules/iris_landmark/iris_landmark_gpu.pbtxt b/mediapipe/modules/iris_landmark/iris_landmark_gpu.pbtxt new file mode 100644 index 0000000..9fb7898 --- /dev/null +++ b/mediapipe/modules/iris_landmark/iris_landmark_gpu.pbtxt @@ -0,0 +1,162 @@ +# MediaPipe subgraph to calculate iris landmarks and eye contour landmarks for +# a single eye. (GPU input, and inference is executed on GPU.) +# +# It is required that "iris_landmark.tflite" is available at +# "mediapipe/modules/iris_landmark/iris_landmark.tflite" +# path during execution. +# +# EXAMPLE: +# node { +# calculator: "IrisLandmarkGpu" +# input_stream: "IMAGE:image" +# input_stream: "ROI:eye_roi" +# input_stream: "IS_RIGHT_EYE:is_right_eye" +# output_stream: "EYE_CONTOUR_LANDMARKS:eye_contour_landmarks" +# output_stream: "IRIS_LANDMARKS:iris_landmarks" +# } + +type: "IrisLandmarkGpu" + +# GPU buffer. (GpuBuffer) +input_stream: "IMAGE:image" +# ROI (region of interest) within the given image where an eye is located. +# (NormalizedRect) +input_stream: "ROI:roi" +# Is right eye. (bool) +# (Model is trained to detect left eye landmarks only, hence for right eye, +# flipping is required to immitate left eye.) +input_stream: "IS_RIGHT_EYE:is_right_eye" + +# TfLite model to detect iris landmarks. +# (std::unique_ptr>) +# NOTE: currently, mediapipe/modules/iris_landmark/iris_landmark.tflite model +# only, can be passed here, otherwise - results are undefined. +input_side_packet: "MODEL:model" + +# 71 refined normalized eye contour landmarks. (NormalizedLandmarkList) +output_stream: "EYE_CONTOUR_LANDMARKS:projected_eye_landmarks" +# 5 normalized iris landmarks. (NormalizedLandmarkList) +output_stream: "IRIS_LANDMARKS:projected_iris_landmarks" + +node { + calculator: "ImageCroppingCalculator" + input_stream: "IMAGE_GPU:image" + input_stream: "NORM_RECT:roi" + output_stream: "IMAGE_GPU:eye_image" + options: { + [mediapipe.ImageCroppingCalculatorOptions.ext] { + border_mode: BORDER_REPLICATE + } + } +} + +node { + calculator: "ImageTransformationCalculator" + input_stream: "IMAGE_GPU:eye_image" + input_stream: "FLIP_HORIZONTALLY:is_right_eye" + output_stream: "IMAGE_GPU:transformed_eye_image" + output_stream: "LETTERBOX_PADDING:eye_letterbox_padding" + options: { + [mediapipe.ImageTransformationCalculatorOptions.ext] { + output_width: 64 + output_height: 64 + scale_mode: FIT + } + } +} + +# Converts the transformed input image on CPU into an image tensor stored as a +# TfLiteTensor. +node { + calculator: "TfLiteConverterCalculator" + input_stream: "IMAGE_GPU:transformed_eye_image" + output_stream: "TENSORS_GPU:image_tensor" + options: { + [mediapipe.TfLiteConverterCalculatorOptions.ext] { + zero_center: false + } + } +} + +# Runs a TensorFlow Lite model on CPU that takes an image tensor and outputs a +# vector of tensors representing, for instance, detection boxes/keypoints and +# scores. +node { + calculator: "TfLiteInferenceCalculator" + input_stream: "TENSORS_GPU:image_tensor" + output_stream: "TENSORS:output_tensors" + options: { + [mediapipe.TfLiteInferenceCalculatorOptions.ext] { + model_path: "mediapipe/modules/iris_landmark/iris_landmark.tflite" + } + } +} + +# Splits a vector of TFLite tensors to multiple vectors according to the ranges +# specified in option. +node { + calculator: "SplitTfLiteTensorVectorCalculator" + input_stream: "output_tensors" + output_stream: "eye_landmarks_tensor" + output_stream: "iris_landmarks_tensor" + options: { + [mediapipe.SplitVectorCalculatorOptions.ext] { + ranges: { begin: 0 end: 1 } + ranges: { begin: 1 end: 2 } + } + } +} + +# Decodes the landmark tensors into a vector of landmarks, where the landmark +# coordinates are normalized by the size of the input image to the model. +node { + calculator: "TfLiteTensorsToLandmarksCalculator" + input_stream: "TENSORS:iris_landmarks_tensor" + input_stream: "FLIP_HORIZONTALLY:is_right_eye" + output_stream: "NORM_LANDMARKS:iris_landmarks" + options: { + [mediapipe.TfLiteTensorsToLandmarksCalculatorOptions.ext] { + num_landmarks: 5 + input_image_width: 64 + input_image_height: 64 + } + } +} + +# Decodes the landmark tensors into a vector of landmarks, where the landmark +# coordinates are normalized by the size of the input image to the model. +node { + calculator: "TfLiteTensorsToLandmarksCalculator" + input_stream: "TENSORS:eye_landmarks_tensor" + input_stream: "FLIP_HORIZONTALLY:is_right_eye" + output_stream: "NORM_LANDMARKS:eye_landmarks" + options: { + [mediapipe.TfLiteTensorsToLandmarksCalculatorOptions.ext] { + num_landmarks: 71 + input_image_width: 64 + input_image_height: 64 + } + } +} + +node { + calculator: "LandmarkLetterboxRemovalCalculator" + input_stream: "LANDMARKS:0:iris_landmarks" + input_stream: "LANDMARKS:1:eye_landmarks" + input_stream: "LETTERBOX_PADDING:eye_letterbox_padding" + output_stream: "LANDMARKS:0:padded_iris_landmarks" + output_stream: "LANDMARKS:1:padded_eye_landmarks" +} + +# Projects the landmarks from the cropped face image to the corresponding +# locations on the full image before cropping (input to the graph). +node { + calculator: "LandmarkProjectionCalculator" + input_stream: "NORM_LANDMARKS:0:padded_iris_landmarks" + input_stream: "NORM_LANDMARKS:1:padded_eye_landmarks" + input_stream: "NORM_RECT:roi" + output_stream: "NORM_LANDMARKS:0:projected_iris_landmarks" + output_stream: "NORM_LANDMARKS:1:projected_eye_landmarks" +} + diff --git a/mediapipe/modules/iris_landmark/iris_landmark_landmarks_to_roi.pbtxt b/mediapipe/modules/iris_landmark/iris_landmark_landmarks_to_roi.pbtxt new file mode 100644 index 0000000..fc53a16 --- /dev/null +++ b/mediapipe/modules/iris_landmark/iris_landmark_landmarks_to_roi.pbtxt @@ -0,0 +1,50 @@ +# MediaPipe subgraph to calculate region of interest (ROI) which is then can +# be used to calculate iris landmarks and eye contour landmarks. +# +# NOTE: this graph is subject to change and should not be used directly. + +type: "IrisLandmarkLandmarksToRoi" + +# List of two normalized landmarks: left and right corners of an eye. +# (NormalizedLandmarkList) +input_stream: "LANDMARKS:landmarks" +# Image size. (std::pair) +input_stream: "IMAGE_SIZE:image_size" + +# ROI (region of interest) within the given image where an eye is located. +# (NormalizedRect) +output_stream: "ROI:roi" + +node { + calculator: "LandmarksToDetectionCalculator" + input_stream: "NORM_LANDMARKS:landmarks" + output_stream: "DETECTION:detection" +} + +node { + calculator: "DetectionsToRectsCalculator" + input_stream: "DETECTION:detection" + input_stream: "IMAGE_SIZE:image_size" + output_stream: "NORM_RECT:raw_roi" + options: { + [mediapipe.DetectionsToRectsCalculatorOptions.ext] { + rotation_vector_start_keypoint_index: 0 + rotation_vector_end_keypoint_index: 1 + rotation_vector_target_angle_degrees: 0 + } + } +} + +node { + calculator: "RectTransformationCalculator" + input_stream: "NORM_RECT:raw_roi" + input_stream: "IMAGE_SIZE:image_size" + output_stream: "roi" + options: { + [mediapipe.RectTransformationCalculatorOptions.ext] { + scale_x: 2.3 + scale_y: 2.3 + square_long: true + } + } +} diff --git a/mediapipe/modules/iris_landmark/iris_landmark_left_and_right_cpu.pbtxt b/mediapipe/modules/iris_landmark/iris_landmark_left_and_right_cpu.pbtxt new file mode 100644 index 0000000..7fb72de --- /dev/null +++ b/mediapipe/modules/iris_landmark/iris_landmark_left_and_right_cpu.pbtxt @@ -0,0 +1,120 @@ +# MediaPipe subgraph to calculate iris landmarks and eye contour landmarks for +# two eyes: left and right. (CPU input, and inference is executed on CPU.) +# +# It is required that "iris_landmark.tflite" is available at +# "mediapipe/modules/iris_landmark/iris_landmark.tflite" +# path during execution. +# +# EXAMPLE: +# node { +# calculator: "IrisLandmarkLeftAndRightCpu" +# input_stream: "IMAGE:image" +# input_stream: "LEFT_EYE_BOUNDARY_LANDMARKS:left_eye_boundary_landmarks" +# input_stream: "RIGHT_EYE_BOUNDARY_LANDMARKS:right_eye_boundary_landmarks" +# output_stream: "LEFT_EYE_CONTOUR_LANDMARKS:left_eye_contour_landmarks" +# output_stream: "LEFT_EYE_IRIS_LANDMARKS:left_iris_landmarks" +# output_stream: "RIGHT_EYE_CONTOUR_LANDMARKS:right_eye_contour_landmarks" +# output_stream: "RIGHT_EYE_IRIS_LANDMARKS:right_iris_landmarks" +# } + +type: "IrisLandmarkLeftAndRightCpu" + +# CPU image. (ImageFrame) +input_stream: "IMAGE:image" +# List of two landmarks defining LEFT eye boundaries - left and right corners. +# (NormalizedLandmarkList) +input_stream: "LEFT_EYE_BOUNDARY_LANDMARKS:left_eye_boundary_landmarks" +# List of two landmarks defining RIGHT eye boundaries - left and right corners. +# (NormalizedLandmarkList) +input_stream: "RIGHT_EYE_BOUNDARY_LANDMARKS:right_eye_boundary_landmarks" + +# 71 normalized eye contour landmarks. (NormalizedLandmarkList) +output_stream: "LEFT_EYE_CONTOUR_LANDMARKS:left_eye_contour_landmarks" +# 5 normalized iris landmarks. (NormalizedLandmarkList) +output_stream: "LEFT_EYE_IRIS_LANDMARKS:left_iris_landmarks" +# Region of interest used to do calculations for the left eye. (NormalizedRect) +output_stream: "LEFT_EYE_ROI:left_eye_roi" + +# 71 normalized eye contour landmarks. (NormalizedLandmarkList) +output_stream: "RIGHT_EYE_CONTOUR_LANDMARKS:right_eye_contour_landmarks" +# 5 normalized iris landmarks. (NormalizedLandmarkList) +output_stream: "RIGHT_EYE_IRIS_LANDMARKS:right_iris_landmarks" +# Region of interest used to do calculations for the right eye. (NormalizedRect) +output_stream: "RIGHT_EYE_ROI:right_eye_roi" + +node { + calculator: "ImagePropertiesCalculator" + input_stream: "IMAGE:image" + output_stream: "SIZE:image_size" +} + +### Processing left eye ### + +node { + calculator: "IrisLandmarkLandmarksToRoi" + input_stream: "LANDMARKS:left_eye_boundary_landmarks" + input_stream: "IMAGE_SIZE:image_size" + output_stream: "ROI:left_eye_roi" +} + +node { + calculator: "ConstantSidePacketCalculator" + output_side_packet: "PACKET:left_eye_flag_side_packet" + options { + [mediapipe.ConstantSidePacketCalculatorOptions.ext] { + packet { bool_value: false } + } + } +} + +node { + calculator: "SidePacketToStreamCalculator" + input_stream: "TICK:image" + input_side_packet: "left_eye_flag_side_packet" + output_stream: "AT_TICK:left_eye_flag" +} + +node { + calculator: "IrisLandmarkCpu" + input_stream: "IMAGE:image" + input_stream: "ROI:left_eye_roi" + input_stream: "IS_RIGHT_EYE:left_eye_flag" + output_stream: "EYE_CONTOUR_LANDMARKS:left_eye_contour_landmarks" + output_stream: "IRIS_LANDMARKS:left_iris_landmarks" +} + +### Processing right eye ### + +node { + calculator: "IrisLandmarkLandmarksToRoi" + input_stream: "LANDMARKS:right_eye_boundary_landmarks" + input_stream: "IMAGE_SIZE:image_size" + output_stream: "ROI:right_eye_roi" +} + +node { + calculator: "ConstantSidePacketCalculator" + output_side_packet: "PACKET:right_eye_flag_side_packet" + options { + [mediapipe.ConstantSidePacketCalculatorOptions.ext] { + packet { bool_value: true } + } + } +} + +node { + calculator: "SidePacketToStreamCalculator" + input_stream: "TICK:image" + input_side_packet: "right_eye_flag_side_packet" + output_stream: "AT_TICK:right_eye_flag" +} + +node { + calculator: "IrisLandmarkCpu" + input_stream: "IMAGE:image" + input_stream: "ROI:right_eye_roi" + input_stream: "IS_RIGHT_EYE:right_eye_flag" + output_stream: "EYE_CONTOUR_LANDMARKS:right_eye_contour_landmarks" + output_stream: "IRIS_LANDMARKS:right_iris_landmarks" +} + diff --git a/mediapipe/modules/iris_landmark/iris_landmark_left_and_right_gpu.pbtxt b/mediapipe/modules/iris_landmark/iris_landmark_left_and_right_gpu.pbtxt new file mode 100644 index 0000000..eeff026 --- /dev/null +++ b/mediapipe/modules/iris_landmark/iris_landmark_left_and_right_gpu.pbtxt @@ -0,0 +1,120 @@ +# MediaPipe subgraph to calculate iris landmarks and eye contour landmarks for +# two eyes: left and right. (GPU input, and inference is executed on GPU.) +# +# It is required that "iris_landmark.tflite" is available at +# "mediapipe/modules/iris_landmark/iris_landmark.tflite" +# path during execution. +# +# EXAMPLE: +# node { +# calculator: "IrisLandmarkLeftAndRightGpu" +# input_stream: "IMAGE:image" +# input_stream: "LEFT_EYE_BOUNDARY_LANDMARKS:left_eye_boundary_landmarks" +# input_stream: "RIGHT_EYE_BOUNDARY_LANDMARKS:right_eye_boundary_landmarks" +# output_stream: "LEFT_EYE_CONTOUR_LANDMARKS:left_eye_contour_landmarks" +# output_stream: "LEFT_EYE_IRIS_LANDMARKS:left_iris_landmarks" +# output_stream: "RIGHT_EYE_CONTOUR_LANDMARKS:right_eye_contour_landmarks" +# output_stream: "RIGHT_EYE_IRIS_LANDMARKS:right_iris_landmarks" +# } + +type: "IrisLandmarkLeftAndRightGpu" + +# GPU buffer. (GpuBuffer) +input_stream: "IMAGE:image" +# List of two landmarks defining LEFT eye boundaries - left and right corners. +# (NormalizedLandmarkList) +input_stream: "LEFT_EYE_BOUNDARY_LANDMARKS:left_eye_boundary_landmarks" +# List of two landmarks defining RIGHT eye boundaries - left and right corners. +# (NormalizedLandmarkList) +input_stream: "RIGHT_EYE_BOUNDARY_LANDMARKS:right_eye_boundary_landmarks" + +# 71 normalized eye contour landmarks. (NormalizedLandmarkList) +output_stream: "LEFT_EYE_CONTOUR_LANDMARKS:left_eye_contour_landmarks" +# 5 normalized iris landmarks. (NormalizedLandmarkList) +output_stream: "LEFT_EYE_IRIS_LANDMARKS:left_iris_landmarks" +# Region of interest used to do calculations for the left eye. (NormalizedRect) +output_stream: "LEFT_EYE_ROI:left_eye_roi" + +# 71 normalized eye contour landmarks. (NormalizedLandmarkList) +output_stream: "RIGHT_EYE_CONTOUR_LANDMARKS:right_eye_contour_landmarks" +# 5 normalized iris landmarks. (NormalizedLandmarkList) +output_stream: "RIGHT_EYE_IRIS_LANDMARKS:right_iris_landmarks" +# Region of interest used to do calculations for the right eye. (NormalizedRect) +output_stream: "RIGHT_EYE_ROI:right_eye_roi" + +node { + calculator: "ImagePropertiesCalculator" + input_stream: "IMAGE_GPU:image" + output_stream: "SIZE:image_size" +} + +### Processing left eye ### + +node { + calculator: "IrisLandmarkLandmarksToRoi" + input_stream: "LANDMARKS:left_eye_boundary_landmarks" + input_stream: "IMAGE_SIZE:image_size" + output_stream: "ROI:left_eye_roi" +} + +node { + calculator: "ConstantSidePacketCalculator" + output_side_packet: "PACKET:left_eye_flag_side_packet" + options { + [mediapipe.ConstantSidePacketCalculatorOptions.ext] { + packet { bool_value: false } + } + } +} + +node { + calculator: "SidePacketToStreamCalculator" + input_stream: "TICK:image" + input_side_packet: "left_eye_flag_side_packet" + output_stream: "AT_TICK:left_eye_flag" +} + +node { + calculator: "IrisLandmarkGpu" + input_stream: "IMAGE:image" + input_stream: "ROI:left_eye_roi" + input_stream: "IS_RIGHT_EYE:left_eye_flag" + output_stream: "EYE_CONTOUR_LANDMARKS:left_eye_contour_landmarks" + output_stream: "IRIS_LANDMARKS:left_iris_landmarks" +} + +### Processing right eye ### + +node { + calculator: "IrisLandmarkLandmarksToRoi" + input_stream: "LANDMARKS:right_eye_boundary_landmarks" + input_stream: "IMAGE_SIZE:image_size" + output_stream: "ROI:right_eye_roi" +} + +node { + calculator: "ConstantSidePacketCalculator" + output_side_packet: "PACKET:right_eye_flag_side_packet" + options { + [mediapipe.ConstantSidePacketCalculatorOptions.ext] { + packet { bool_value: true } + } + } +} + +node { + calculator: "SidePacketToStreamCalculator" + input_stream: "TICK:image" + input_side_packet: "right_eye_flag_side_packet" + output_stream: "AT_TICK:right_eye_flag" +} + +node { + calculator: "IrisLandmarkGpu" + input_stream: "IMAGE:image" + input_stream: "ROI:right_eye_roi" + input_stream: "IS_RIGHT_EYE:right_eye_flag" + output_stream: "EYE_CONTOUR_LANDMARKS:right_eye_contour_landmarks" + output_stream: "IRIS_LANDMARKS:right_iris_landmarks" +} + diff --git a/mediapipe/modules/objectron/BUILD b/mediapipe/modules/objectron/BUILD new file mode 100644 index 0000000..cee5768 --- /dev/null +++ b/mediapipe/modules/objectron/BUILD @@ -0,0 +1,183 @@ +# Copyright 2020 The MediaPipe Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +load( + "//mediapipe/framework/tool:mediapipe_graph.bzl", + "mediapipe_simple_subgraph", +) + +licenses(["notice"]) + +package(default_visibility = ["//visibility:public"]) + +exports_files([ + "object_detection_3d_camera.tflite", + "object_detection_3d_chair.tflite", + "object_detection_3d_chair_1stage.tflite", + "object_detection_3d_cup.tflite", + "object_detection_3d_sneakers.tflite", + "object_detection_3d_sneakers_1stage.tflite", + "object_detection_oidv4_labelmap.txt", + "object_detection_ssd_mobilenetv2_oidv4_fp16.tflite", +]) + +mediapipe_simple_subgraph( + name = "objectron_detection_1stage_gpu", + graph = "objectron_detection_1stage_gpu.pbtxt", + register_as = "ObjectronDetection1StageSubgraphGpu", + deps = [ + "//mediapipe/calculators/image:image_transformation_calculator", + "//mediapipe/calculators/tflite:tflite_converter_calculator", + "//mediapipe/calculators/tflite:tflite_custom_op_resolver_calculator", + "//mediapipe/calculators/tflite:tflite_inference_calculator", + "//mediapipe/modules/objectron/calculators:tflite_tensors_to_objects_calculator", + ], +) + +mediapipe_simple_subgraph( + name = "objectron_tracking_1stage_gpu", + graph = "objectron_tracking_1stage_gpu.pbtxt", + register_as = "ObjectronTracking1StageSubgraphGpu", + deps = [ + "//mediapipe/calculators/image:image_transformation_calculator", + "//mediapipe/calculators/video:box_tracker_calculator", + "//mediapipe/calculators/video:flow_packager_calculator", + "//mediapipe/calculators/video:motion_analysis_calculator", + "//mediapipe/framework/stream_handler:sync_set_input_stream_handler", + "//mediapipe/gpu:gpu_buffer_to_image_frame_calculator", + "//mediapipe/modules/objectron/calculators:frame_annotation_to_timed_box_list_calculator", + "//mediapipe/modules/objectron/calculators:frame_annotation_tracker_calculator", + "//mediapipe/modules/objectron/calculators:lift_2d_frame_annotation_to_3d_calculator", + ], +) + +mediapipe_simple_subgraph( + name = "box_landmark_gpu", + graph = "box_landmark_gpu.pbtxt", + register_as = "BoxLandmarkSubgraph", + deps = [ + "//mediapipe/calculators/core:gate_calculator", + "//mediapipe/calculators/core:split_vector_calculator", + "//mediapipe/calculators/image:image_properties_calculator", + "//mediapipe/calculators/tensor:image_to_tensor_calculator", + "//mediapipe/calculators/tensor:inference_calculator", + "//mediapipe/calculators/tensor:tensors_to_floats_calculator", + "//mediapipe/calculators/tensor:tensors_to_landmarks_calculator", + "//mediapipe/calculators/util:detections_to_rects_calculator", + "//mediapipe/calculators/util:landmark_letterbox_removal_calculator", + "//mediapipe/calculators/util:landmark_projection_calculator", + "//mediapipe/calculators/util:landmarks_to_detection_calculator", + "//mediapipe/calculators/util:rect_transformation_calculator", + "//mediapipe/calculators/util:thresholding_calculator", + ], +) + +mediapipe_simple_subgraph( + name = "box_landmark_cpu", + graph = "box_landmark_cpu.pbtxt", + register_as = "BoxLandmarkSubgraph", + deps = [ + "//mediapipe/calculators/core:gate_calculator", + "//mediapipe/calculators/core:split_vector_calculator", + "//mediapipe/calculators/image:image_properties_calculator", + "//mediapipe/calculators/tensor:image_to_tensor_calculator", + "//mediapipe/calculators/tensor:inference_calculator", + "//mediapipe/calculators/tensor:tensors_to_floats_calculator", + "//mediapipe/calculators/tensor:tensors_to_landmarks_calculator", + "//mediapipe/calculators/util:detections_to_rects_calculator", + "//mediapipe/calculators/util:landmark_letterbox_removal_calculator", + "//mediapipe/calculators/util:landmark_projection_calculator", + "//mediapipe/calculators/util:landmarks_to_detection_calculator", + "//mediapipe/calculators/util:rect_transformation_calculator", + "//mediapipe/calculators/util:thresholding_calculator", + ], +) + +mediapipe_simple_subgraph( + name = "object_detection_oid_v4_gpu", + graph = "object_detection_oid_v4_gpu.pbtxt", + register_as = "ObjectDetectionOidV4Subgraph", + deps = [ + "//mediapipe/calculators/tensor:image_to_tensor_calculator", + "//mediapipe/calculators/tensor:inference_calculator", + "//mediapipe/calculators/tensor:tensors_to_detections_calculator", + "//mediapipe/calculators/tflite:ssd_anchors_calculator", + "//mediapipe/calculators/util:detection_label_id_to_text_calculator", + "//mediapipe/calculators/util:non_max_suppression_calculator", + "//mediapipe/modules/objectron/calculators:filter_detection_calculator", + ], +) + +mediapipe_simple_subgraph( + name = "object_detection_oid_v4_cpu", + graph = "object_detection_oid_v4_cpu.pbtxt", + register_as = "ObjectDetectionOidV4Subgraph", + deps = [ + "//mediapipe/calculators/tensor:image_to_tensor_calculator", + "//mediapipe/calculators/tensor:inference_calculator", + "//mediapipe/calculators/tensor:tensors_to_detections_calculator", + "//mediapipe/calculators/tflite:ssd_anchors_calculator", + "//mediapipe/calculators/util:detection_label_id_to_text_calculator", + "//mediapipe/calculators/util:non_max_suppression_calculator", + "//mediapipe/modules/objectron/calculators:filter_detection_calculator", + ], +) + +mediapipe_simple_subgraph( + name = "objectron_cpu", + graph = "objectron_cpu.pbtxt", + register_as = "ObjectronCpuSubgraph", + deps = [ + ":box_landmark_cpu", + ":object_detection_oid_v4_cpu", + "//mediapipe/calculators/core:begin_loop_calculator", + "//mediapipe/calculators/core:clip_vector_size_calculator", + "//mediapipe/calculators/core:constant_side_packet_calculator", + "//mediapipe/calculators/core:end_loop_calculator", + "//mediapipe/calculators/core:gate_calculator", + "//mediapipe/calculators/core:previous_loopback_calculator", + "//mediapipe/calculators/image:image_properties_calculator", + "//mediapipe/calculators/tflite:tflite_model_calculator", + "//mediapipe/calculators/util:association_norm_rect_calculator", + "//mediapipe/calculators/util:collection_has_min_size_calculator", + "//mediapipe/calculators/util:detections_to_rects_calculator", + "//mediapipe/calculators/util:local_file_contents_calculator", + "//mediapipe/modules/objectron/calculators:frame_annotation_to_rect_calculator", + "//mediapipe/modules/objectron/calculators:landmarks_to_frame_annotation_calculator", + "//mediapipe/modules/objectron/calculators:lift_2d_frame_annotation_to_3d_calculator", + ], +) + +mediapipe_simple_subgraph( + name = "objectron_gpu", + graph = "objectron_gpu.pbtxt", + register_as = "ObjectronGpuSubgraph", + deps = [ + ":box_landmark_gpu", + ":object_detection_oid_v4_gpu", + "//mediapipe/calculators/core:begin_loop_calculator", + "//mediapipe/calculators/core:clip_vector_size_calculator", + "//mediapipe/calculators/core:constant_side_packet_calculator", + "//mediapipe/calculators/core:end_loop_calculator", + "//mediapipe/calculators/core:gate_calculator", + "//mediapipe/calculators/core:previous_loopback_calculator", + "//mediapipe/calculators/image:image_properties_calculator", + "//mediapipe/calculators/util:association_norm_rect_calculator", + "//mediapipe/calculators/util:collection_has_min_size_calculator", + "//mediapipe/calculators/util:detections_to_rects_calculator", + "//mediapipe/modules/objectron/calculators:frame_annotation_to_rect_calculator", + "//mediapipe/modules/objectron/calculators:landmarks_to_frame_annotation_calculator", + "//mediapipe/modules/objectron/calculators:lift_2d_frame_annotation_to_3d_calculator", + ], +) diff --git a/mediapipe/modules/objectron/README.md b/mediapipe/modules/objectron/README.md new file mode 100644 index 0000000..00883fe --- /dev/null +++ b/mediapipe/modules/objectron/README.md @@ -0,0 +1,6 @@ +# objectron + +Subgraphs|Details +:--- | :--- +[`ObjectronCpuSubgraph`](https://github.com/google/mediapipe/tree/master/mediapipe/modules/objectron/objectron_cpu.pbtxt)| Detects and tracks 3D bounding boxes for objects. (CPU input, and inference is executed on CPU.) +[`ObjectronGpuSubgraph`](https://github.com/google/mediapipe/tree/master/mediapipe/modules/objectron/objectron_gpu.pbtxt)| Detects and tracks 3D bounding boxes for objects. (GPU input, and inference is executed on GPU.) diff --git a/mediapipe/modules/objectron/box_landmark_cpu.pbtxt b/mediapipe/modules/objectron/box_landmark_cpu.pbtxt new file mode 100644 index 0000000..bb638d1 --- /dev/null +++ b/mediapipe/modules/objectron/box_landmark_cpu.pbtxt @@ -0,0 +1,147 @@ +# MediaPipe Box landmark localization CPU subgraph. + +type: "BoxLandmarkSubgraph" + +input_stream: "IMAGE:image" +input_stream: "NORM_RECT:box_rect" +input_side_packet: "MODEL:model" +output_stream: "NORM_LANDMARKS:box_landmarks" + +# Extracts image size from the input images. +node { + calculator: "ImagePropertiesCalculator" + input_stream: "IMAGE:image" + output_stream: "SIZE:image_size" +} + +# Expands the rectangle that contain the box so that it's likely to cover the +# entire box. +node { + calculator: "RectTransformationCalculator" + input_stream: "NORM_RECT:box_rect" + input_stream: "IMAGE_SIZE:image_size" + output_stream: "box_rect_scaled" + options: { + [mediapipe.RectTransformationCalculatorOptions.ext] { + scale_x: 1.5 + scale_y: 1.5 + square_long: true + } + } +} + +# Crops, resizes, and converts the input video into tensor. +# Preserves aspect ratio of the images. +node { + calculator: "ImageToTensorCalculator" + input_stream: "IMAGE:image" + input_stream: "NORM_RECT:box_rect_scaled" + output_stream: "TENSORS:image_tensor" + output_stream: "LETTERBOX_PADDING:letterbox_padding" + options { + [mediapipe.ImageToTensorCalculatorOptions.ext] { + output_tensor_width: 224 + output_tensor_height: 224 + keep_aspect_ratio: true + output_tensor_float_range { + min: 0.0 + max: 1.0 + } + gpu_origin: TOP_LEFT + border_mode: BORDER_REPLICATE + } + } +} + +# Runs a TensorFlow Lite model on CPU that takes an image tensor and outputs a +# vector of tensors representing, for instance, detection boxes/keypoints and +# scores. +node { + calculator: "InferenceCalculator" + input_stream: "TENSORS:image_tensor" + input_side_packet: "MODEL:model" + output_stream: "TENSORS:output_tensors" + options: { + [mediapipe.InferenceCalculatorOptions.ext] { + delegate { xnnpack {} } + } + } +} + +# Splits a vector of tensors into multiple vectors. +node { + calculator: "SplitTensorVectorCalculator" + input_stream: "output_tensors" + output_stream: "landmark_tensors" + output_stream: "box_flag_tensor" + options: { + [mediapipe.SplitVectorCalculatorOptions.ext] { + ranges: { begin: 0 end: 1 } + ranges: { begin: 1 end: 2 } + } + } +} + +# Converts the box-flag tensor into a float that represents the confidence +# score of box presence. +node { + calculator: "TensorsToFloatsCalculator" + input_stream: "TENSORS:box_flag_tensor" + output_stream: "FLOAT:box_presence_score" +} + +# Applies a threshold to the confidence score to determine whether a box is +# present. +node { + calculator: "ThresholdingCalculator" + input_stream: "FLOAT:box_presence_score" + output_stream: "FLAG:box_presence" + options: { + [mediapipe.ThresholdingCalculatorOptions.ext] { + threshold: 0.99 + } + } +} + +# Drops landmarks tensors if box is not present. +node { + calculator: "GateCalculator" + input_stream: "landmark_tensors" + input_stream: "ALLOW:box_presence" + output_stream: "gated_landmark_tensors" +} + +# Decodes the landmark tensors into a list of landmarks, where the landmark +# coordinates are normalized by the size of the input image to the model. +node { + calculator: "TensorsToLandmarksCalculator" + input_stream: "TENSORS:gated_landmark_tensors" + output_stream: "NORM_LANDMARKS:landmarks" + options: { + [mediapipe.TensorsToLandmarksCalculatorOptions.ext] { + num_landmarks: 9 + input_image_width: 224 + input_image_height: 224 + } + } +} + +# Adjusts landmarks (already normalized to [0.f, 1.f]) on the letterboxed box +# image (after image transformation with the FIT scale mode) to the +# corresponding locations on the same image with the letterbox removed (box +# image before image transformation). +node { + calculator: "LandmarkLetterboxRemovalCalculator" + input_stream: "LANDMARKS:landmarks" + input_stream: "LETTERBOX_PADDING:letterbox_padding" + output_stream: "LANDMARKS:scaled_landmarks" +} + +# Projects the landmarks from the cropped box image to the corresponding +# locations on the full image before cropping (input to the graph). +node { + calculator: "LandmarkProjectionCalculator" + input_stream: "NORM_LANDMARKS:scaled_landmarks" + input_stream: "NORM_RECT:box_rect_scaled" + output_stream: "NORM_LANDMARKS:box_landmarks" +} diff --git a/mediapipe/modules/objectron/box_landmark_gpu.pbtxt b/mediapipe/modules/objectron/box_landmark_gpu.pbtxt new file mode 100644 index 0000000..ac95880 --- /dev/null +++ b/mediapipe/modules/objectron/box_landmark_gpu.pbtxt @@ -0,0 +1,147 @@ +# MediaPipe Box landmark localization GPU subgraph. + +type: "BoxLandmarkSubgraph" + +input_stream: "IMAGE:image" +input_stream: "NORM_RECT:box_rect" +output_stream: "NORM_LANDMARKS:box_landmarks" + +# Extracts image size from the input images. +node { + calculator: "ImagePropertiesCalculator" + input_stream: "IMAGE_GPU:image" + output_stream: "SIZE:image_size" +} + +# Expands the rectangle that contain the box so that it's likely to cover the +# entire box. +node { + calculator: "RectTransformationCalculator" + input_stream: "NORM_RECT:box_rect" + input_stream: "IMAGE_SIZE:image_size" + output_stream: "box_rect_scaled" + options: { + [mediapipe.RectTransformationCalculatorOptions.ext] { + scale_x: 1.5 + scale_y: 1.5 + square_long: true + } + } +} + +# Crops, resizes, and converts the input video into tensor. +# Preserves aspect ratio of the images. +node { + calculator: "ImageToTensorCalculator" + input_stream: "IMAGE_GPU:image" + input_stream: "NORM_RECT:box_rect_scaled" + output_stream: "TENSORS:image_tensor" + output_stream: "LETTERBOX_PADDING:letterbox_padding" + options { + [mediapipe.ImageToTensorCalculatorOptions.ext] { + output_tensor_width: 224 + output_tensor_height: 224 + keep_aspect_ratio: true + output_tensor_float_range { + min: 0.0 + max: 1.0 + } + gpu_origin: TOP_LEFT + border_mode: BORDER_REPLICATE + } + } +} + +# Runs a TensorFlow Lite model on GPU that takes an image tensor and outputs a +# vector of tensors representing, for instance, detection boxes/keypoints and +# scores. +node { + calculator: "InferenceCalculator" + input_stream: "TENSORS:image_tensor" + output_stream: "TENSORS:output_tensors" + options: { + [mediapipe.InferenceCalculatorOptions.ext] { + model_path: "object_detection_3d.tflite" + delegate { gpu {} } + } + } +} + +# Splits a vector of tensors to multiple vectors according to the ranges +# specified in option. +node { + calculator: "SplitTensorVectorCalculator" + input_stream: "output_tensors" + output_stream: "landmark_tensors" + output_stream: "box_flag_tensor" + options: { + [mediapipe.SplitVectorCalculatorOptions.ext] { + ranges: { begin: 0 end: 1 } + ranges: { begin: 1 end: 2 } + } + } +} + +# Converts the box-flag tensor into a float that represents the confidence +# score of box presence. +node { + calculator: "TensorsToFloatsCalculator" + input_stream: "TENSORS:box_flag_tensor" + output_stream: "FLOAT:box_presence_score" +} + +# Applies a threshold to the confidence score to determine whether a box is +# present. +node { + calculator: "ThresholdingCalculator" + input_stream: "FLOAT:box_presence_score" + output_stream: "FLAG:box_presence" + options: { + [mediapipe.ThresholdingCalculatorOptions.ext] { + threshold: 0.99 + } + } +} + +# Drops landmarks tensors if box is not present. +node { + calculator: "GateCalculator" + input_stream: "landmark_tensors" + input_stream: "ALLOW:box_presence" + output_stream: "gated_landmark_tensors" +} + +# Decodes the landmark tensors into a list of landmarks, where the landmark +# coordinates are normalized by the size of the input image to the model. +node { + calculator: "TensorsToLandmarksCalculator" + input_stream: "TENSORS:gated_landmark_tensors" + output_stream: "NORM_LANDMARKS:landmarks" + options: { + [mediapipe.TensorsToLandmarksCalculatorOptions.ext] { + num_landmarks: 9 + input_image_width: 224 + input_image_height: 224 + } + } +} + +# Adjusts landmarks (already normalized to [0.f, 1.f]) on the letterboxed box +# image (after image transformation with the FIT scale mode) to the +# corresponding locations on the same image with the letterbox removed (box +# image before image transformation). +node { + calculator: "LandmarkLetterboxRemovalCalculator" + input_stream: "LANDMARKS:landmarks" + input_stream: "LETTERBOX_PADDING:letterbox_padding" + output_stream: "LANDMARKS:scaled_landmarks" +} + +# Projects the landmarks from the cropped box image to the corresponding +# locations on the full image before cropping (input to the graph). +node { + calculator: "LandmarkProjectionCalculator" + input_stream: "NORM_LANDMARKS:scaled_landmarks" + input_stream: "NORM_RECT:box_rect_scaled" + output_stream: "NORM_LANDMARKS:box_landmarks" +} diff --git a/mediapipe/modules/objectron/calculators/BUILD b/mediapipe/modules/objectron/calculators/BUILD new file mode 100644 index 0000000..fb75eb3 --- /dev/null +++ b/mediapipe/modules/objectron/calculators/BUILD @@ -0,0 +1,407 @@ +# Copyright 2020 The MediaPipe Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +load("//mediapipe/framework/port:build_config.bzl", "mediapipe_proto_library") +load("//mediapipe/framework:mediapipe_register_type.bzl", "mediapipe_register_type") + +licenses(["notice"]) + +package(default_visibility = ["//visibility:public"]) + +mediapipe_proto_library( + name = "object_proto", + srcs = ["object.proto"], + visibility = ["//visibility:public"], +) + +mediapipe_proto_library( + name = "a_r_capture_metadata_proto", + srcs = ["a_r_capture_metadata.proto"], + visibility = ["//visibility:public"], +) + +mediapipe_proto_library( + name = "annotation_proto", + srcs = ["annotation_data.proto"], + def_options_lib = False, + visibility = ["//visibility:public"], + deps = [ + ":a_r_capture_metadata_proto", + ":object_proto", + ], +) + +mediapipe_register_type( + base_name = "annotation", + include_headers = ["mediapipe/modules/objectron/calculators/annotation_data.pb.h"], + types = [ + "::mediapipe::FrameAnnotation", + ], + deps = [":annotation_cc_proto"], +) + +mediapipe_proto_library( + name = "camera_parameters_proto", + srcs = ["camera_parameters.proto"], + visibility = ["//visibility:public"], +) + +mediapipe_proto_library( + name = "frame_annotation_tracker_calculator_proto", + srcs = ["frame_annotation_tracker_calculator.proto"], + visibility = ["//visibility:public"], + deps = [ + "//mediapipe/framework:calculator_options_proto", + "//mediapipe/framework:calculator_proto", + ], +) + +mediapipe_proto_library( + name = "belief_decoder_config_proto", + srcs = ["belief_decoder_config.proto"], + visibility = ["//visibility:public"], +) + +mediapipe_proto_library( + name = "tflite_tensors_to_objects_calculator_proto", + srcs = ["tflite_tensors_to_objects_calculator.proto"], + visibility = ["//visibility:public"], + deps = [ + ":belief_decoder_config_proto", + "//mediapipe/framework:calculator_options_proto", + "//mediapipe/framework:calculator_proto", + ], +) + +mediapipe_proto_library( + name = "tensors_to_objects_calculator_proto", + srcs = ["tensors_to_objects_calculator.proto"], + visibility = ["//visibility:public"], + deps = [ + ":belief_decoder_config_proto", + "//mediapipe/framework:calculator_options_proto", + "//mediapipe/framework:calculator_proto", + ], +) + +mediapipe_proto_library( + name = "lift_2d_frame_annotation_to_3d_calculator_proto", + srcs = ["lift_2d_frame_annotation_to_3d_calculator.proto"], + visibility = ["//visibility:public"], + deps = [ + ":belief_decoder_config_proto", + "//mediapipe/framework:calculator_options_proto", + "//mediapipe/framework:calculator_proto", + ], +) + +mediapipe_proto_library( + name = "frame_annotation_to_rect_calculator_proto", + srcs = ["frame_annotation_to_rect_calculator.proto"], + visibility = ["//visibility:public"], + deps = [ + "//mediapipe/framework:calculator_options_proto", + "//mediapipe/framework:calculator_proto", + ], +) + +mediapipe_proto_library( + name = "filter_detection_calculator_proto", + srcs = ["filter_detection_calculator.proto"], + visibility = ["//visibility:public"], + deps = [ + "//mediapipe/framework:calculator_options_proto", + "//mediapipe/framework:calculator_proto", + ], +) + +cc_library( + name = "box_util", + srcs = ["box_util.cc"], + hdrs = ["box_util.h"], + deps = [ + "//mediapipe/framework/port:logging", + "//mediapipe/framework/port:opencv_core", + "//mediapipe/framework/port:opencv_imgproc", + "//mediapipe/util/tracking:box_tracker_cc_proto", + ], +) + +cc_library( + name = "frame_annotation_tracker", + srcs = ["frame_annotation_tracker.cc"], + hdrs = ["frame_annotation_tracker.h"], + deps = [ + ":annotation_cc_proto", + ":box_util", + "//mediapipe/framework/port:integral_types", + "//mediapipe/framework/port:logging", + "//mediapipe/util/tracking:box_tracker_cc_proto", + "@com_google_absl//absl/container:btree", + "@com_google_absl//absl/container:flat_hash_set", + ], +) + +cc_library( + name = "epnp", + srcs = [ + "epnp.cc", + ], + hdrs = [ + "epnp.h", + ], + deps = [ + "//mediapipe/framework/port:logging", + "@com_google_absl//absl/status", + "@com_google_absl//absl/strings:str_format", + "@eigen_archive//:eigen3", + ], +) + +cc_library( + name = "decoder", + srcs = [ + "decoder.cc", + ], + hdrs = [ + "decoder.h", + ], + deps = [ + ":annotation_cc_proto", + ":belief_decoder_config_cc_proto", + ":box", + ":epnp", + "//mediapipe/framework/port:logging", + "//mediapipe/framework/port:opencv_core", + "//mediapipe/framework/port:opencv_imgproc", + "//mediapipe/framework/port:status", + "@com_google_absl//absl/status", + "@eigen_archive//:eigen3", + ], +) + +cc_library( + name = "tensor_util", + srcs = [ + "tensor_util.cc", + ], + hdrs = [ + "tensor_util.h", + ], + deps = [ + "//mediapipe/framework/formats:tensor", + "//mediapipe/framework/port:logging", + "//mediapipe/framework/port:opencv_core", + "@org_tensorflow//tensorflow/lite:framework", + ], +) + +cc_library( + name = "box", + srcs = [ + "box.cc", + "model.cc", + ], + hdrs = [ + "box.h", + "model.h", + "types.h", + ], + visibility = ["//visibility:public"], + deps = [ + ":annotation_cc_proto", + ":object_cc_proto", + "//mediapipe/framework/port:logging", + "@eigen_archive//:eigen3", + ], +) + +cc_library( + name = "frame_annotation_to_timed_box_list_calculator", + srcs = ["frame_annotation_to_timed_box_list_calculator.cc"], + visibility = ["//visibility:public"], + deps = [ + ":annotation_cc_proto", + ":box_util", + "//mediapipe/framework:calculator_framework", + "//mediapipe/framework/port:opencv_core", + "//mediapipe/framework/port:opencv_imgproc", + "//mediapipe/framework/port:ret_check", + "//mediapipe/framework/port:status", + "//mediapipe/util/tracking:box_tracker_cc_proto", + "@com_google_absl//absl/memory", + ], + alwayslink = 1, +) + +cc_library( + name = "frame_annotation_tracker_calculator", + srcs = ["frame_annotation_tracker_calculator.cc"], + visibility = ["//visibility:public"], + deps = [ + ":annotation_cc_proto", + ":frame_annotation_tracker", + ":frame_annotation_tracker_calculator_cc_proto", + "//mediapipe/framework:calculator_framework", + "//mediapipe/framework/port:ret_check", + "//mediapipe/framework/port:status", + "//mediapipe/util/tracking:box_tracker_cc_proto", + "@com_google_absl//absl/container:flat_hash_set", + "@com_google_absl//absl/memory", + ], + alwayslink = 1, +) + +cc_library( + name = "tflite_tensors_to_objects_calculator", + srcs = ["tflite_tensors_to_objects_calculator.cc"], + visibility = ["//visibility:public"], + deps = [ + ":annotation_cc_proto", + ":belief_decoder_config_cc_proto", + ":decoder", + ":tensor_util", + ":tflite_tensors_to_objects_calculator_cc_proto", + "//mediapipe/framework:calculator_framework", + "//mediapipe/framework/deps:file_path", + "//mediapipe/framework/formats:detection_cc_proto", + "//mediapipe/framework/port:opencv_core", + "//mediapipe/framework/port:ret_check", + "@com_google_absl//absl/memory", + "@com_google_absl//absl/strings:str_format", + "@com_google_absl//absl/types:span", + "@eigen_archive//:eigen3", + "@org_tensorflow//tensorflow/lite:framework", + ], + alwayslink = 1, +) + +cc_library( + name = "tensors_to_objects_calculator", + srcs = ["tensors_to_objects_calculator.cc"], + visibility = ["//visibility:public"], + deps = [ + ":annotation_cc_proto", + ":belief_decoder_config_cc_proto", + ":decoder", + ":tensor_util", + ":tensors_to_objects_calculator_cc_proto", + "//mediapipe/framework:calculator_framework", + "//mediapipe/framework/deps:file_path", + "//mediapipe/framework/formats:detection_cc_proto", + "//mediapipe/framework/port:opencv_core", + "//mediapipe/framework/port:ret_check", + "@com_google_absl//absl/memory", + "@com_google_absl//absl/strings:str_format", + "@com_google_absl//absl/types:span", + "@eigen_archive//:eigen3", + ], + alwayslink = 1, +) + +cc_library( + name = "lift_2d_frame_annotation_to_3d_calculator", + srcs = ["lift_2d_frame_annotation_to_3d_calculator.cc"], + visibility = ["//visibility:public"], + deps = [ + ":annotation_cc_proto", + ":belief_decoder_config_cc_proto", + ":decoder", + ":lift_2d_frame_annotation_to_3d_calculator_cc_proto", + ":tensor_util", + "//mediapipe/framework:calculator_framework", + "//mediapipe/framework/deps:file_path", + "//mediapipe/framework/formats:detection_cc_proto", + "//mediapipe/framework/port:opencv_core", + "//mediapipe/framework/port:ret_check", + "@com_google_absl//absl/memory", + "@com_google_absl//absl/strings:str_format", + "@com_google_absl//absl/types:span", + "@eigen_archive//:eigen3", + "@org_tensorflow//tensorflow/lite:framework", + ], + alwayslink = 1, +) + +cc_library( + name = "frame_annotation_to_rect_calculator", + srcs = ["frame_annotation_to_rect_calculator.cc"], + deps = [ + ":annotation_cc_proto", + ":frame_annotation_to_rect_calculator_cc_proto", + "//mediapipe/framework:calculator_framework", + "//mediapipe/framework/formats:rect_cc_proto", + "//mediapipe/framework/port:ret_check", + "//mediapipe/framework/port:status", + "@com_google_absl//absl/memory", + "@eigen_archive//:eigen3", + ], + alwayslink = 1, +) + +cc_library( + name = "landmarks_to_frame_annotation_calculator", + srcs = ["landmarks_to_frame_annotation_calculator.cc"], + deps = [ + ":annotation_cc_proto", + "//mediapipe/framework:calculator_framework", + "//mediapipe/framework/formats:landmark_cc_proto", + "//mediapipe/framework/port:ret_check", + "//mediapipe/framework/port:status", + "@com_google_absl//absl/memory", + ], + alwayslink = 1, +) + +cc_library( + name = "filter_detection_calculator", + srcs = ["filter_detection_calculator.cc"], + deps = [ + ":filter_detection_calculator_cc_proto", + "//mediapipe/framework:calculator_framework", + "//mediapipe/framework/formats:detection_cc_proto", + "//mediapipe/framework/formats:location_data_cc_proto", + "//mediapipe/framework/port:logging", + "//mediapipe/framework/port:map_util", + "//mediapipe/framework/port:re2", + "//mediapipe/framework/port:status", + "@com_google_absl//absl/container:node_hash_set", + "@com_google_absl//absl/strings", + ], + alwayslink = 1, +) + +cc_test( + name = "box_util_test", + srcs = ["box_util_test.cc"], + deps = [ + ":box_util", + "//mediapipe/framework/port:gtest_main", + "//mediapipe/framework/port:opencv_core", + "//mediapipe/util/tracking:box_tracker_cc_proto", + ], +) + +cc_test( + name = "frame_annotation_tracker_test", + srcs = ["frame_annotation_tracker_test.cc"], + deps = [ + ":annotation_cc_proto", + ":frame_annotation_tracker", + "//mediapipe/framework/port:gtest_main", + "//mediapipe/framework/port:logging", + "//mediapipe/util/tracking:box_tracker_cc_proto", + "@com_google_absl//absl/container:flat_hash_set", + ], +) diff --git a/mediapipe/modules/objectron/calculators/a_r_capture_metadata.proto b/mediapipe/modules/objectron/calculators/a_r_capture_metadata.proto new file mode 100644 index 0000000..edc8c4b --- /dev/null +++ b/mediapipe/modules/objectron/calculators/a_r_capture_metadata.proto @@ -0,0 +1,551 @@ +// Copyright 2020 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +syntax = "proto2"; + +package mediapipe; + +// Info about the camera characteristics used to capture images and depth data. +// See developer.apple.com/documentation/avfoundation/avcameracalibrationdata +// for more information. +message AVCameraCalibrationData { + // 3x3 row-major matrix relating a camera's internal properties to an ideal + // pinhole-camera model. + // See + // developer.apple.com/documentation/avfoundation/avcameracalibrationdata/2881135-intrinsicmatrix + // for detailed usage information. + repeated float intrinsic_matrix = 1 [packed = true]; + + // The image dimensions to which the intrinsic_matrix values are relative. + optional float intrinsic_matrix_reference_dimension_width = 2; + optional float intrinsic_matrix_reference_dimension_height = 3; + + // 3x4 row-major matrix relating a camera's position and orientation to a + // world or scene coordinate system. Consists of a unitless 3x3 rotation + // matrix (R) on the left and a translation (t) 3x1 vector on the right. The + // translation vector's units are millimeters. For example: + // + // |r1,1 r2,1 r3,1 | t1| + // [R | t] = |r1,2 r2,2 r3,2 | t2| + // |r1,3 r2,3 r3,3 | t3| + // + // is stored as [r11, r21, r31, t1, r12, r22, r32, t2, ...] + // + // See + // developer.apple.com/documentation/avfoundation/avcameracalibrationdata/2881130-extrinsicmatrix?language=objc + // for more information. + repeated float extrinsic_matrix = 4 [packed = true]; + + // The size, in millimeters, of one image pixel. + optional float pixel_size = 5; + + // A list of floating-point values describing radial distortions imparted by + // the camera lens, for use in rectifying camera images. + // See + // developer.apple.com/documentation/avfoundation/avcameracalibrationdata/2881129-lensdistortionlookuptable?language=objc + // for more information. + repeated float lens_distortion_lookup_values = 6 [packed = true]; + + // A list of floating-point values describing radial distortions for use in + // reapplying camera geometry to a rectified image. + // See + // developer.apple.com/documentation/avfoundation/avcameracalibrationdata/2881132-inverselensdistortionlookuptable?language=objc + // for more information. + repeated float inverse_lens_distortion_lookup_values = 7 [packed = true]; + + // The offset of the distortion center of the camera lens from the top-left + // corner of the image. + // See + // developer.apple.com/documentation/avfoundation/avcameracalibrationdata/2881131-lensdistortioncenter?language=objc + // for more information. + optional float lens_distortion_center_x = 8; + optional float lens_distortion_center_y = 9; +} + +// Container for depth data information. +// See developer.apple.com/documentation/avfoundation/avdepthdata for more info. +message AVDepthData { + // PNG representation of the grayscale depth data map. See discussion about + // depth_data_map_original_minimum_value, below, for information about how + // to interpret the pixel values. + optional bytes depth_data_map = 1; + + // Pixel format type of the original captured depth data. + // See + // developer.apple.com/documentation/corevideo/1563591-pixel_format_identifiers?language=objc + // for the complete list of possible pixel format types. This value represents + // a string for the associated OSType/FourCharCode. + optional string depth_data_type = 2; + + // Indicates the general accuracy of the depth_data_map. + // See developer.apple.com/documentation/avfoundation/avdepthdataaccuracy for + // more information. + enum Accuracy { + UNDEFINED_ACCURACY = 0; + // Values in the depth map are usable for foreground/background separation + // but are not absolutely accurate in the physical world. + RELATIVE = 1; + // Values in the depth map are absolutely accurate in the physical world. + ABSOLUTE = 2; + } + optional Accuracy depth_data_accuracy = 3 [default = RELATIVE]; + + // Indicates whether the depth_data_map contains temporally smoothed data. + optional bool depth_data_filtered = 4; + + // Quality of the depth_data_map. + enum Quality { + UNDEFINED_QUALITY = 0; + HIGH = 1; + LOW = 2; + } + optional Quality depth_data_quality = 5; + + // Associated calibration data for the depth_data_map. + optional AVCameraCalibrationData camera_calibration_data = 6; + + // The original range of values expressed by the depth_data_map, before + // grayscale normalization. For example, if the minimum and maximum values + // indicate a range of [0.5, 2.2], and the depth_data_type value indicates + // it was a depth map, then white pixels (255, 255, 255) will map to 0.5 and + // black pixels (0, 0, 0) will map to 2.2 with the grayscale range linearly + // interpolated inbetween. Conversely, if the depth_data_type value indicates + // it was a disparity map, then white pixels will map to 2.2 and black pixels + // will map to 0.5. + optional float depth_data_map_original_minimum_value = 7; + optional float depth_data_map_original_maximum_value = 8; + + // The width of the depth buffer map. + optional int32 depth_data_map_width = 9; + + // The height of the depth buffer map. + optional int32 depth_data_map_height = 10; + + // The row-major flattened array of the depth buffer map pixels. This will be + // either a float32 or float16 byte array, depending on 'depth_data_type'. + optional bytes depth_data_map_raw_values = 11; +} + +// Estimated scene lighting information associated with a captured video frame. +// See developer.apple.com/documentation/arkit/arlightestimate for more info. +message ARLightEstimate { + // The estimated intensity, in lumens, of ambient light throughout the scene. + optional double ambient_intensity = 1; + + // The estimated color temperature, in degrees Kelvin, of ambient light + // throughout the scene. + optional double ambient_color_temperature = 2; + + // Data describing the estimated lighting environment in all directions. + // Second-level spherical harmonics in separate red, green, and blue data + // planes. Thus, this buffer contains 3 sets of 9 coefficients, or a total of + // 27 values. + // See + // https://developer.apple.com/documentation/arkit/ardirectionallightestimate/2928222-sphericalharmonicscoefficients?language=objc + // for more information. + repeated float spherical_harmonics_coefficients = 3 [packed = true]; + + message DirectionVector { + optional float x = 1; + optional float y = 2; + optional float z = 3; + } + // A vector indicating the orientation of the strongest directional light + // source, normalized in the world-coordinate space. + // See + // https://developer.apple.com/documentation/arkit/ardirectionallightestimate/2928221-primarylightdirection?language=objc + // for more information; + optional DirectionVector primary_light_direction = 4; + + // The estimated intensity, in lumens, of the strongest directional light + // source in the scene. + // See + // https://developer.apple.com/documentation/arkit/ardirectionallightestimate/2928219-primarylightintensity?language=objc + // for more information. + optional float primary_light_intensity = 5; +} + +// Information about the camera position and imaging characteristics for a +// captured video frame. +// See developer.apple.com/documentation/arkit/arcamera for more information. +message ARCamera { + // The general quality of position tracking available when the camera captured + // a frame. + enum TrackingState { + UNDEFINED_TRACKING_STATE = 0; + // Camera position tracking is not available. + UNAVAILABLE = 1; + // Tracking is available, but the quality of results is questionable. + LIMITED = 2; + // Camera position tracking is providing optimal results. + NORMAL = 3; + } + optional TrackingState tracking_state = 1 [default = UNAVAILABLE]; + + // A possible diagnosis for limited position tracking quality as of when the + // frame was captured. + enum TrackingStateReason { + UNDEFINED_TRACKING_STATE_REASON = 0; + // The current tracking state is not limited. + NONE = 1; + // Not yet enough camera or motion data to provide tracking information. + INITIALIZING = 2; + // The device is moving too fast for accurate image-based position tracking. + EXCESSIVE_MOTION = 3; + // Not enough distinguishable features for image-based position tracking. + INSUFFICIENT_FEATURES = 4; + // Tracking is limited due to a relocalization in progress. + RELOCALIZING = 5; + } + optional TrackingStateReason tracking_state_reason = 2 [default = NONE]; + + // 4x4 row-major matrix expressing position and orientation of the camera in + // world coordinate space. + // See developer.apple.com/documentation/arkit/arcamera/2866108-transform for + // more information. + repeated float transform = 3 [packed = true]; + + // The orientation of the camera, expressed as roll, pitch, and yaw values. + message EulerAngles { + optional float roll = 1; + optional float pitch = 2; + optional float yaw = 3; + } + optional EulerAngles euler_angles = 4; + + // The width and height, in pixels, of the captured camera image. + optional int32 image_resolution_width = 5; + optional int32 image_resolution_height = 6; + + // 3x3 row-major matrix that converts between the 2D camera plane and 3D world + // coordinate space. + // See developer.apple.com/documentation/arkit/arcamera/2875730-intrinsics for + // usage information. + repeated float intrinsics = 7 [packed = true]; + + // 4x4 row-major transform matrix appropriate for rendering 3D content to + // match the image captured by the camera. + // See + // developer.apple.com/documentation/arkit/arcamera/2887458-projectionmatrix + // for usage information. + repeated float projection_matrix = 8 [packed = true]; + + // 4x4 row-major transform matrix appropriate for converting from world-space + // to camera space. Relativized for the captured_image orientation (i.e. + // UILandscapeOrientationRight). + // See + // https://developer.apple.com/documentation/arkit/arcamera/2921672-viewmatrixfororientation?language=objc + // for more information. + repeated float view_matrix = 9 [packed = true]; +} + +// Container for a 3D mesh describing face topology. +message ARFaceGeometry { + // Each vertex represents a 3D point in the face mesh, in the face coordinate + // space. + // See developer.apple.com/documentation/arkit/arfacegeometry/2928201-vertices + // for more information. + message Vertex { + optional float x = 1; + optional float y = 2; + optional float z = 3; + } + repeated Vertex vertices = 1; + + // The number of elements in the vertices list. + optional int32 vertex_count = 2; + + // Each texture coordinate represents UV texture coordinates for the vertex at + // the corresponding index in the vertices buffer. + // See + // developer.apple.com/documentation/arkit/arfacegeometry/2928203-texturecoordinates + // for more information. + message TextureCoordinate { + optional float u = 1; + optional float v = 2; + } + repeated TextureCoordinate texture_coordinates = 3; + + // The number of elements in the texture_coordinates list. + optional int32 texture_coordinate_count = 4; + + // Each integer value in this ordered list represents an index into the + // vertices and texture_coordinates lists. Each set of three indices + // identifies the vertices comprising a single triangle in the mesh. Each set + // of three indices forms a triangle, so the number of indices in the + // triangle_indices buffer is three times the triangle_count value. + // See + // developer.apple.com/documentation/arkit/arfacegeometry/2928199-triangleindices + // for more information. + repeated int32 triangle_indices = 5 [packed = true]; + + // The number of triangles described by the triangle_indices buffer. + // See + // developer.apple.com/documentation/arkit/arfacegeometry/2928207-trianglecount + // for more information. + optional int32 triangle_count = 6; +} + +// Contains a list of blend shape entries wherein each item maps a specific +// blend shape location to its associated coefficient. +message ARBlendShapeMap { + message MapEntry { + // Identifier for the specific facial feature. + // See developer.apple.com/documentation/arkit/arblendshapelocation for a + // complete list of identifiers. + optional string blend_shape_location = 1; + + // Indicates the current position of the feature relative to its neutral + // configuration, ranging from 0.0 (neutral) to 1.0 (maximum movement). + optional float blend_shape_coefficient = 2; + } + repeated MapEntry entries = 1; +} + +// Information about the pose, topology, and expression of a detected face. +// See developer.apple.com/documentation/arkit/arfaceanchor for more info. +message ARFaceAnchor { + // A coarse triangle mesh representing the topology of the detected face. + optional ARFaceGeometry geometry = 1; + + // A map of named coefficients representing the detected facial expression in + // terms of the movement of specific facial features. + optional ARBlendShapeMap blend_shapes = 2; + + // 4x4 row-major matrix encoding the position, orientation, and scale of the + // anchor relative to the world coordinate space. + // See + // https://developer.apple.com/documentation/arkit/aranchor/2867981-transform?language=objc + // for more information. + repeated float transform = 3; + + // Indicates whether the anchor's transform is valid. Frames that have a face + // anchor with this value set to NO should probably be ignored. + optional bool is_tracked = 4; +} + +// Container for a 3D mesh. +message ARPlaneGeometry { + message Vertex { + optional float x = 1; + optional float y = 2; + optional float z = 3; + } + + // Each texture coordinate represents UV texture coordinates for the vertex at + // the corresponding index in the vertices buffer. + // See + // https://developer.apple.com/documentation/arkit/arfacegeometry/2928203-texturecoordinates + // for more information. + message TextureCoordinate { + optional float u = 1; + optional float v = 2; + } + + // A buffer of vertex positions for each point in the plane mesh. + repeated Vertex vertices = 1; + + // The number of elements in the vertices buffer. + optional int32 vertex_count = 2; + + // A buffer of texture coordinate values for each point in the plane mesh. + repeated TextureCoordinate texture_coordinates = 3; + + // The number of elements in the texture_coordinates buffer. + optional int32 texture_coordinate_count = 4; + + // Each integer value in this ordered list represents an index into the + // vertices and texture_coordinates lists. Each set of three indices + // identifies the vertices comprising a single triangle in the mesh. Each set + // of three indices forms a triangle, so the number of indices in the + // triangle_indices buffer is three times the triangle_count value. + // See + // https://developer.apple.com/documentation/arkit/arplanegeometry/2941051-triangleindices + // for more information. + repeated int32 triangle_indices = 5 [packed = true]; + + // Each set of three indices forms a triangle, so the number of indices in the + // triangle_indices buffer is three times the triangle_count value. + // See + // https://developer.apple.com/documentation/arkit/arplanegeometry/2941058-trianglecount + // for more information. + optional int32 triangle_count = 6; + + // Each value in this buffer represents the position of a vertex along the + // boundary polygon of the estimated plane. The owning plane anchor's + // transform matrix defines the coordinate system for these points. + // See + // https://developer.apple.com/documentation/arkit/arplanegeometry/2941052-boundaryvertices + // for more information. + repeated Vertex boundary_vertices = 7; + + // The number of elements in the boundary_vertices buffer. + optional int32 boundary_vertex_count = 8; +} + +// Information about the position and orientation of a real-world flat surface. +// See https://developer.apple.com/documentation/arkit/arplaneanchor for more +// information. +message ARPlaneAnchor { + enum Alignment { + UNDEFINED = 0; + // The plane is perpendicular to gravity. + HORIZONTAL = 1; + // The plane is parallel to gravity. + VERTICAL = 2; + } + + // Wrapper for a 3D point / vector within the plane. See extent and center + // values for more information. + message PlaneVector { + optional float x = 1; + optional float y = 2; + optional float z = 3; + } + + enum PlaneClassification { + NONE = 0; + WALL = 1; + FLOOR = 2; + CEILING = 3; + TABLE = 4; + SEAT = 5; + } + + // The classification status for the plane. + enum PlaneClassificationStatus { + // The classfication process for the plane anchor has completed but the + // result is inconclusive. + UNKNOWN = 0; + // No classication information can be provided (set on error or if the + // device does not support plane classification). + UNAVAILABLE = 1; + // The classification process has not completed. + UNDETERMINED = 2; + // The classfication process for the plane anchor has completed. + KNOWN = 3; + } + + // The ID of the plane. + optional string identifier = 1; + + // 4x4 row-major matrix encoding the position, orientation, and scale of the + // anchor relative to the world coordinate space. + // See + // https://developer.apple.com/documentation/arkit/aranchor/2867981-transform + // for more information. + repeated float transform = 2; + + // The general orientation of the detected plane with respect to gravity. + optional Alignment alignment = 3; + + // A coarse triangle mesh representing the general shape of the detected + // plane. + optional ARPlaneGeometry geometry = 4; + + // The center point of the plane relative to its anchor position. + // Although the type of this property is a 3D vector, a plane anchor is always + // two-dimensional, and is always positioned in only the x and z directions + // relative to its transform position. (That is, the y-component of this + // vector is always zero.) + // See + // https://developer.apple.com/documentation/arkit/arplaneanchor/2882056-center + // for more information. + optional PlaneVector center = 5; + + // The estimated width and length of the detected plane. + // See + // https://developer.apple.com/documentation/arkit/arplaneanchor/2882055-extent + // for more information. + optional PlaneVector extent = 6; + + // A Boolean value that indicates whether plane classification is available on + // the current device. On devices without plane classification support, all + // plane anchors report a classification value of NONE + // and a classification_status value of UNAVAILABLE. + optional bool classification_supported = 7; + + // A general characterization of what kind of real-world surface the plane + // anchor represents. + // See + // https://developer.apple.com/documentation/arkit/arplaneanchor/2990936-classification + // for more information. + optional PlaneClassification classification = 8; + + // The current state of ARKit's process for classifying the plane anchor. + // When this property's value is KNOWN, the classification property represents + // ARKit's characterization of the real-world surface corresponding to the + // plane anchor. + // See + // https://developer.apple.com/documentation/arkit/arplaneanchor/2990937-classificationstatus + // for more information. + optional PlaneClassificationStatus classification_status = 9; +} + +// A collection of points in the world coordinate space. +// See https://developer.apple.com/documentation/arkit/arpointcloud for more +// information. +message ARPointCloud { + message Point { + optional float x = 1; + optional float y = 2; + optional float z = 3; + } + + // The number of points in the cloud. + optional int32 count = 1; + + // The list of detected points. + repeated Point point = 2; + + // A list of unique identifiers corresponding to detected feature points. + // Each identifier in this list corresponds to the point at the same index + // in the points array. + repeated int64 identifier = 3 [packed = true]; +} + +// Video image and face position tracking information. +// See developer.apple.com/documentation/arkit/arframe for more information. +message ARFrame { + // The timestamp for the frame. + optional double timestamp = 1; + + // The depth data associated with the frame. Not all frames have depth data. + optional AVDepthData depth_data = 2; + + // The depth data object timestamp associated with the frame. May differ from + // the frame timestamp value. Is only set when the frame has depth_data. + optional double depth_data_timestamp = 3; + + // Camera information associated with the frame. + optional ARCamera camera = 4; + + // Light information associated with the frame. + optional ARLightEstimate light_estimate = 5; + + // Face anchor information associated with the frame. Not all frames have an + // active face anchor. + optional ARFaceAnchor face_anchor = 6; + + // Plane anchors associated with the frame. Not all frames have a plane + // anchor. Plane anchors and face anchors are mutually exclusive. + repeated ARPlaneAnchor plane_anchor = 7; + + // The current intermediate results of the scene analysis used to perform + // world tracking. + // See + // https://developer.apple.com/documentation/arkit/arframe/2887449-rawfeaturepoints + // for more information. + optional ARPointCloud raw_feature_points = 8; +} diff --git a/mediapipe/modules/objectron/calculators/annotation_data.proto b/mediapipe/modules/objectron/calculators/annotation_data.proto new file mode 100644 index 0000000..6c26d29 --- /dev/null +++ b/mediapipe/modules/objectron/calculators/annotation_data.proto @@ -0,0 +1,108 @@ +// Copyright 2020 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +syntax = "proto3"; + +package mediapipe; + +import "mediapipe/modules/objectron/calculators/a_r_capture_metadata.proto"; +import "mediapipe/modules/objectron/calculators/object.proto"; + +// Projection of a 3D point on an image, and its metric depth. +message NormalizedPoint2D { + // x-y position of the 2d keypoint in the image coordinate system. + // u,v \in [0, 1], where top left corner is (0, 0) and the bottom-right corner + // is (1, 1). + float x = 1; + float y = 2; + + // The depth of the point in the camera coordinate system (in meters). + float depth = 3; +} + +// The 3D point in the camera coordinate system, the scales are in meters. +message Point3D { + float x = 1; + float y = 2; + float z = 3; +} + +message AnnotatedKeyPoint { + int32 id = 1; + Point3D point_3d = 2; + NormalizedPoint2D point_2d = 3; + // Indicates whether this keypoint is hidden or not. The hidden attribute is + // determined from the object's skeleton. For box model, none of the keypoints + // are hidden. + bool hidden = 4; +} + +message ObjectAnnotation { + // Reference to the object identifier in ObjectInstance. + int32 object_id = 1; + + // For each objects, list all the annotated keypoints here. + // E.g. for bounding-boxes, we have 8 keypoints, hands = 21 keypoints, etc. + // These normalized points are the projection of the Object's 3D keypoint + // on the current frame's camera poses. + repeated AnnotatedKeyPoint keypoints = 2; + + // Visibiity of this annotation in a frame. + float visibility = 3; + + // 3x3 row-major rotation matrix describing the orientation of the rigid + // object's frame of reference in the camera-coordinate system. + repeated float rotation = 4; + + // 3x1 vector describing the translation of the rigid object's frame of + // reference in the camera-coordinate system in meters. + repeated float translation = 5; + + // 3x1 vector describing the scale of the rigid object's frame of reference in + // the camera-coordinate system. + repeated float scale = 6; +} + +message FrameAnnotation { + // Unique frame id, corresponds to images. + int32 frame_id = 1; + + // List of the annotated objects in this frame. Depending on how many object + // are observable in this frame, we might have non or as much as + // sequence.objects_size() annotations. + repeated ObjectAnnotation annotations = 2; + + // Information about the camera transformation (in the world coordinate) and + // imaging characteristics for a captured video frame. + ARCamera camera = 3; + + // The timestamp for the frame. + double timestamp = 4; + + // Plane center and normal in camera frame. + repeated float plane_center = 5; + repeated float plane_normal = 6; +} + +// The sequence protocol contains the annotation data for the entire video clip. +message Sequence { + // List of all the annotated 3D objects in this sequence in the world + // Coordinate system. Given the camera poses of each frame (also in the + // world-coordinate) these objects bounding boxes can be projected to each + // frame to get the per-frame annotation (i.e. image_annotation below). + repeated Object objects = 1; + + // List of annotated data per each frame in sequence + frame information. + repeated FrameAnnotation frame_annotations = 2; +} diff --git a/mediapipe/modules/objectron/calculators/belief_decoder_config.proto b/mediapipe/modules/objectron/calculators/belief_decoder_config.proto new file mode 100644 index 0000000..f0f10ae --- /dev/null +++ b/mediapipe/modules/objectron/calculators/belief_decoder_config.proto @@ -0,0 +1,38 @@ +// Copyright 2020 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +syntax = "proto2"; + +package mediapipe; + +message BeliefDecoderConfig { + optional float heatmap_threshold = 1 [default = 0.9]; + // Maximum distance in pixels between two local max heatmap values. + optional float local_max_distance = 2 [default = 10.0]; + // Coefficient of offset_scale. + // offset_scale = offset_scale_coef * min(rows, cols). + // offset_scale is used to multiply the offset predictions from the network. + optional float offset_scale_coef = 3 [default = 0.5, deprecated = true]; + + // The radius for vertex voting. Use no voting if the radius is less than or + // euqal to 1. Example: 10. + optional int32 voting_radius = 4; + + // The number of pixels to determine whether two points are the same. + // Example: 5 (voting_radius / 2). + optional int32 voting_allowance = 5; + + // The threshold of beliefs, with which the points can vote. Example: 0.2. + optional float voting_threshold = 6; +} diff --git a/mediapipe/modules/objectron/calculators/box.cc b/mediapipe/modules/objectron/calculators/box.cc new file mode 100644 index 0000000..bd2ce57 --- /dev/null +++ b/mediapipe/modules/objectron/calculators/box.cc @@ -0,0 +1,255 @@ +// Copyright 2020 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "mediapipe/modules/objectron/calculators/box.h" + +#include "Eigen/Core" +#include "mediapipe/framework/port/logging.h" + +namespace mediapipe { + +namespace { +constexpr int kFrontFaceId = 4; +constexpr int kTopFaceId = 2; +constexpr int kNumKeypoints = 8 + 1; +constexpr int kNumberOfAxis = 3; +constexpr int kEdgesPerAxis = 4; + +} // namespace + +Box::Box(const std::string& category) + : Model(kBoundingBox, kNumKeypoints, category), + bounding_box_(kNumKeypoints) { + transformation_.setIdentity(); + + scale_ << 0.1, 0.1, 0.1; + + // The vertices are ordered according to the left-hand rule, so the normal + // vector of each face will point inward the box. + faces_.push_back({5, 6, 8, 7}); // +x on yz plane + faces_.push_back({1, 3, 4, 2}); // -x on yz plane + + faces_.push_back({3, 7, 8, 4}); // +y on xz plane = top + faces_.push_back({1, 2, 6, 5}); // -y on xz plane + + faces_.push_back({2, 4, 8, 6}); // +z on xy plane = front + faces_.push_back({1, 5, 7, 3}); // -z on xy plane + + // Add the edges in the cube, they are sorted according to axis (x-y-z). + edges_.push_back({1, 5}); + edges_.push_back({2, 6}); + edges_.push_back({3, 7}); + edges_.push_back({4, 8}); + + edges_.push_back({1, 3}); + edges_.push_back({5, 7}); + edges_.push_back({2, 4}); + edges_.push_back({6, 8}); + + edges_.push_back({1, 2}); + edges_.push_back({3, 4}); + edges_.push_back({5, 6}); + edges_.push_back({7, 8}); + Update(); +} + +void Box::Update() { + // Compute the eight vertices of the bounding box from Box's parameters + auto w = scale_[0] / 2.f; + auto h = scale_[1] / 2.f; + auto d = scale_[2] / 2.f; + + // Define the local coordinate system, w.r.t. the center of the boxs + bounding_box_[0] << 0., 0., 0.; + bounding_box_[1] << -w, -h, -d; + bounding_box_[2] << -w, -h, +d; + bounding_box_[3] << -w, +h, -d; + bounding_box_[4] << -w, +h, +d; + bounding_box_[5] << +w, -h, -d; + bounding_box_[6] << +w, -h, +d; + bounding_box_[7] << +w, +h, -d; + bounding_box_[8] << +w, +h, +d; + + // Convert to world coordinate system + for (int i = 0; i < kNumKeypoints; ++i) { + bounding_box_[i] = + transformation_.topLeftCorner<3, 3>() * bounding_box_[i] + + transformation_.col(3).head<3>(); + } +} + +void Box::Adjust(const std::vector& variables) { + Eigen::Vector3f translation; + translation << variables[0], variables[1], variables[2]; + SetTranslation(translation); + + const float roll = variables[3]; + const float pitch = variables[4]; + const float yaw = variables[5]; + SetRotation(roll, pitch, yaw); + + Eigen::Vector3f scale; + scale << variables[6], variables[7], variables[8]; + + SetScale(scale); + Update(); +} + +float* Box::GetVertex(size_t vertex_id) { + CHECK_LT(vertex_id, kNumKeypoints); + return bounding_box_[vertex_id].data(); +} + +const float* Box::GetVertex(size_t vertex_id) const { + CHECK_LT(vertex_id, kNumKeypoints); + return bounding_box_[vertex_id].data(); +} + +bool Box::InsideTest(const Eigen::Vector3f& point, int check_axis) const { + const float* v0 = GetVertex(1); + const float* v1 = GetVertex(2); + const float* v2 = GetVertex(3); + const float* v4 = GetVertex(5); + + switch (check_axis) { + case 1: + return (v0[0] <= point[0] && point[0] <= v1[0]); // X-axis + case 2: + return (v0[1] <= point[1] && point[1] <= v2[1]); // Y-axis + case 3: + return (v0[2] <= point[2] && point[2] <= v4[2]); // Z-axis + default: + return false; + } +} + +void Box::Deserialize(const Object& obj) { + CHECK_EQ(obj.keypoints_size(), kNumKeypoints); + Model::Deserialize(obj); +} + +void Box::Serialize(Object* obj) { + Model::Serialize(obj); + obj->set_type(Object::BOUNDING_BOX); + std::vector local_bounding_box(9); + // Define the local coordinate system, w.r.t. the center of the boxs + local_bounding_box[0] << 0., 0., 0.; + local_bounding_box[1] << -0.5, -0.5, -0.5; + local_bounding_box[2] << -0.5, -0.5, +0.5; + local_bounding_box[3] << -0.5, +0.5, -0.5; + local_bounding_box[4] << -0.5, +0.5, +0.5; + local_bounding_box[5] << +0.5, -0.5, -0.5; + local_bounding_box[6] << +0.5, -0.5, +0.5; + local_bounding_box[7] << +0.5, +0.5, -0.5; + local_bounding_box[8] << +0.5, +0.5, +0.5; + for (int i = 0; i < kNumKeypoints; ++i) { + KeyPoint* keypoint = obj->add_keypoints(); + keypoint->set_x(local_bounding_box[i][0]); + keypoint->set_y(local_bounding_box[i][1]); + keypoint->set_z(local_bounding_box[i][2]); + keypoint->set_confidence_radius(0.); + } +} + +const Face& Box::GetFrontFace() const { return faces_[kFrontFaceId]; } + +const Face& Box::GetTopFace() const { return faces_[kTopFaceId]; } + +std::pair Box::GetGroundPlane() const { + const Vector3f gravity = Vector3f(0., 1., 0.); + int ground_plane_id = 0; + float ground_plane_error = 10.0; + + auto get_face_center = [&](const Face& face) { + Vector3f center = Vector3f::Zero(); + for (const int vertex_id : face) { + center += Map(GetVertex(vertex_id)); + } + center /= face.size(); + return center; + }; + + auto get_face_normal = [&](const Face& face, const Vector3f& center) { + Vector3f v1 = Map(GetVertex(face[0])) - center; + Vector3f v2 = Map(GetVertex(face[1])) - center; + Vector3f normal = v1.cross(v2); + return normal; + }; + + // The ground plane is defined as a plane aligned with gravity. + // gravity is the (0, 1, 0) vector in the world coordinate system. + const auto& faces = GetFaces(); + for (int face_id = 0; face_id < faces.size(); face_id += 2) { + const auto& face = faces[face_id]; + Vector3f center = get_face_center(face); + Vector3f normal = get_face_normal(face, center); + Vector3f w = gravity.cross(normal); + const float w_sq_norm = w.squaredNorm(); + if (w_sq_norm < ground_plane_error) { + ground_plane_error = w_sq_norm; + ground_plane_id = face_id; + } + } + + Vector3f center = get_face_center(faces[ground_plane_id]); + Vector3f normal = get_face_normal(faces[ground_plane_id], center); + + // For each face, we also have a parallel face that it's normal is also + // aligned with gravity vector. We pick the face with lower height (y-value). + // The parallel to face 0 is 1, face 2 is 3, and face 4 is 5. + int parallel_face_id = ground_plane_id + 1; + const auto& parallel_face = faces[parallel_face_id]; + Vector3f parallel_face_center = get_face_center(parallel_face); + Vector3f parallel_face_normal = + get_face_normal(parallel_face, parallel_face_center); + if (parallel_face_center[1] < center[1]) { + center = parallel_face_center; + normal = parallel_face_normal; + } + return {center, normal}; +} + +template +void Box::Fit(const std::vector& vertices) { + CHECK_EQ(vertices.size(), kNumKeypoints); + scale_.setZero(); + // The scale would remain invariant under rotation and translation. + // We can safely estimate the scale from the oriented box. + for (int axis = 0; axis < kNumberOfAxis; ++axis) { + for (int edge_id = 0; edge_id < kEdgesPerAxis; ++edge_id) { + // The edges are stored in quadruples according to each axis + const std::array& edge = edges_[axis * kEdgesPerAxis + edge_id]; + scale_[axis] += (vertices[edge[0]] - vertices[edge[1]]).norm(); + } + scale_[axis] /= kEdgesPerAxis; + } + // Create a scaled axis-aligned box + transformation_.setIdentity(); + Update(); + + using MatrixN3_RM = Eigen::Matrix; + Eigen::Map v(vertices[0].data()); + Eigen::Map system(bounding_box_[0].data()); + auto system_h = system.rowwise().homogeneous().eval(); + auto system_g = system_h.colPivHouseholderQr(); + auto solution = system_g.solve(v).eval(); + transformation_.topLeftCorner<3, 4>() = solution.transpose(); + Update(); +} + +template void Box::Fit(const std::vector&); +template void Box::Fit>(const std::vector>&); +template void Box::Fit>( + const std::vector>&); +} // namespace mediapipe diff --git a/mediapipe/modules/objectron/calculators/box.h b/mediapipe/modules/objectron/calculators/box.h new file mode 100644 index 0000000..17218f7 --- /dev/null +++ b/mediapipe/modules/objectron/calculators/box.h @@ -0,0 +1,132 @@ +// Copyright 2020 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef MEDIAPIPE_MODULES_OBJECTRON_CALCULATORS_BOX_H_ +#define MEDIAPIPE_MODULES_OBJECTRON_CALCULATORS_BOX_H_ + +#include + +#include "mediapipe/modules/objectron/calculators/model.h" + +namespace mediapipe { + +// Model for the bounding box in 3D +// The box has 9 degrees of freedom, which uniquely defines 8 keypoints in the +// fixed world-coordinate system. +// +// The 8 keypoints are defined as follows +// +// kp-id axis +// 0 000 --- +// 1 001 --+ +// 2 010 -+- +// 3 011 -++ +// 4 100 +-- +// 5 101 +-+ +// 6 110 ++- +// 7 111 +++ +// +// where xyz means positive or negative vector along the axis where the center +// of the box is the origin. The resulting bounding box is +// +// x x +// 0 + + + + + + + + 4 .------- +// +\ +\ |\ +// + \ y + \ z | \ y +// + \ + \ | \ +// + 2 + + + + + + + + 6 +// z + + + + +// + + + + +// + + C + + +// + + + + +// 1 + + + + + + + + 5 + +// \ + \ + +// \ + \ + +// \+ \+ +// 3 + + + + + + + + 7 +// +// World coordinate system: +y is up (aligned with gravity), +// +z is toward the user, +x follows right hand rule. +// The front face is defined as +z axis on xy plane. +// The top face is defined as +y axis on xz plane. +// + +class Box : public Model { + public: + EIGEN_MAKE_ALIGNED_OPERATOR_NEW + + explicit Box(const std::string& category); + ~Box() override = default; + + bool InsideTest(const Vector3f& point, int check_axis) const; + + const std::vector& GetFaces() const { return faces_; } + const Face& GetFace(size_t face_id) const { return faces_[face_id]; } + + const std::vector>& GetEdges() const { return edges_; } + const std::array& GetEdge(size_t edge_id) const { + return edges_[edge_id]; + } + + // Returns the keypoints for the front face of the box. + // The front face is defind as a face with +z normal vector on xy plane + // In Box's c'tor, the top face is set to {1, 3, 7, 5} + const Face& GetFrontFace() const; + + // Returns the keypoints for the top face of the box. + // The top face is defind as a face with +z normal vector on xy plane + // In Box's c'tor, the top face is set to {1, 3, 7, 5} + const Face& GetTopFace() const; + + void Update() override; + void Adjust(const std::vector& variables) override; + float* GetVertex(size_t vertex_id) override; + const float* GetVertex(size_t vertex_id) const override; + void Deserialize(const Object& obj) override; + void Serialize(Object* obj) override; + + // Computes the plane center and the normal vector for the plane the object + // is sitting on in the world cooordinate system. The normal vector is roughly + // aligned with gravity. + std::pair GetGroundPlane() const; + + // Estimates a box 9-dof parameters from the given vertices. Directly computes + // the scale of the box, then solves for orientation and translation. + // Expects a std::vector of size 9 of a Eigen::Vector3f or mapped Vector3f. + // If mapping proto messages, we recommend to use the Map. + // For example: + // + // using T = Map; + // std::vector vertices; + // for (const auto& point : message) { // point is a repeated float message. + // T p(point.data()); + // vertices.emplace_back(p); + // } + // box.Fit(vertices); + // + // The Points must be arranged as 1 + 8 (center keypoint followed by 8 box + // vertices) vector. This function will overwrite the scale and transformation + // properties of the class. + template > + void Fit(const std::vector& vertices); + + private: + std::vector faces_; + std::vector> edges_; + std::vector bounding_box_; +}; + +} // namespace mediapipe + +#endif // MEDIAPIPE_MODULES_OBJECTRON_CALCULATORS_BOX_H_ diff --git a/mediapipe/modules/objectron/calculators/box_util.cc b/mediapipe/modules/objectron/calculators/box_util.cc new file mode 100644 index 0000000..0663b5b --- /dev/null +++ b/mediapipe/modules/objectron/calculators/box_util.cc @@ -0,0 +1,153 @@ +// Copyright 2020 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "mediapipe/modules/objectron/calculators/box_util.h" + +#include + +#include "mediapipe/framework/port/logging.h" +#include "mediapipe/framework/port/opencv_core_inc.h" +#include "mediapipe/framework/port/opencv_imgproc_inc.h" +#include "mediapipe/util/tracking/box_tracker.pb.h" + +namespace mediapipe { +void ComputeBoundingRect(const std::vector& points, + mediapipe::TimedBoxProto* box) { + CHECK(box != nullptr); + float top = 1.0f; + float bottom = 0.0f; + float left = 1.0f; + float right = 0.0f; + for (const auto& point : points) { + top = std::min(top, point.y); + bottom = std::max(bottom, point.y); + left = std::min(left, point.x); + right = std::max(right, point.x); + } + box->set_top(top); + box->set_bottom(bottom); + box->set_left(left); + box->set_right(right); + // We are currently only doing axis aligned bounding box. If we need to + // compute rotated bounding box, then we need the original image aspect ratio, + // map back to original image space, compute cv::convexHull, then for each + // edge of the hull, rotate according to edge orientation, find the box. + box->set_rotation(0.0f); +} + +float ComputeBoxIoU(const TimedBoxProto& box1, const TimedBoxProto& box2) { + cv::Point2f box1_center((box1.left() + box1.right()) * 0.5f, + (box1.top() + box1.bottom()) * 0.5f); + cv::Size2f box1_size(box1.right() - box1.left(), box1.bottom() - box1.top()); + cv::RotatedRect rect1(box1_center, box1_size, + -box1.rotation() * 180.0f / M_PI); + cv::Point2f box2_center((box2.left() + box2.right()) * 0.5f, + (box2.top() + box2.bottom()) * 0.5f); + cv::Size2f box2_size(box2.right() - box2.left(), box2.bottom() - box2.top()); + cv::RotatedRect rect2(box2_center, box2_size, + -box2.rotation() * 180.0f / M_PI); + std::vector intersections_unsorted; + std::vector intersections; + cv::rotatedRectangleIntersection(rect1, rect2, intersections_unsorted); + if (intersections_unsorted.size() < 3) { + return 0.0f; + } + cv::convexHull(intersections_unsorted, intersections); + + // We use Shoelace formula to compute area of polygons. + float intersection_area = 0.0f; + for (int i = 0; i < intersections.size(); ++i) { + const auto& curr_pt = intersections[i]; + const int i_next = (i + 1) == intersections.size() ? 0 : (i + 1); + const auto& next_pt = intersections[i_next]; + intersection_area += (curr_pt.x * next_pt.y - next_pt.x * curr_pt.y); + } + intersection_area = std::abs(intersection_area) * 0.5f; + + // Compute union area + const float union_area = + rect1.size.area() + rect2.size.area() - intersection_area + 1e-5f; + + const float iou = intersection_area / union_area; + return iou; +} + +std::vector ComputeBoxCorners(const TimedBoxProto& box, + float width, float height) { + // Rotate 4 corner w.r.t. center. + const cv::Point2f center(0.5f * (box.left() + box.right()) * width, + 0.5f * (box.top() + box.bottom()) * height); + const std::vector corners{ + cv::Point2f(box.left() * width, box.top() * height), + cv::Point2f(box.left() * width, box.bottom() * height), + cv::Point2f(box.right() * width, box.bottom() * height), + cv::Point2f(box.right() * width, box.top() * height)}; + + const float cos_a = std::cos(box.rotation()); + const float sin_a = std::sin(box.rotation()); + std::vector transformed_corners(4); + for (int k = 0; k < 4; ++k) { + // Scale and rotate w.r.t. center. + const cv::Point2f rad = corners[k] - center; + const cv::Point2f rot_rad(cos_a * rad.x - sin_a * rad.y, + sin_a * rad.x + cos_a * rad.y); + transformed_corners[k] = center + rot_rad; + transformed_corners[k].x /= width; + transformed_corners[k].y /= height; + } + return transformed_corners; +} + +cv::Mat PerspectiveTransformBetweenBoxes(const TimedBoxProto& src_box, + const TimedBoxProto& dst_box, + const float aspect_ratio) { + std::vector box1_corners = + ComputeBoxCorners(src_box, /*width*/ aspect_ratio, /*height*/ 1.0f); + std::vector box2_corners = + ComputeBoxCorners(dst_box, /*width*/ aspect_ratio, /*height*/ 1.0f); + cv::Mat affine_transform = cv::getPerspectiveTransform( + /*src*/ box1_corners, /*dst*/ box2_corners); + cv::Mat output_affine; + affine_transform.convertTo(output_affine, CV_32FC1); + return output_affine; +} + +cv::Point2f MapPoint(const TimedBoxProto& src_box, const TimedBoxProto& dst_box, + const cv::Point2f& src_point, float width, float height) { + const cv::Point2f src_center( + 0.5f * (src_box.left() + src_box.right()) * width, + 0.5f * (src_box.top() + src_box.bottom()) * height); + const cv::Point2f dst_center( + 0.5f * (dst_box.left() + dst_box.right()) * width, + 0.5f * (dst_box.top() + dst_box.bottom()) * height); + const float scale_x = + (dst_box.right() - dst_box.left()) / (src_box.right() - src_box.left()); + const float scale_y = + (dst_box.bottom() - dst_box.top()) / (src_box.bottom() - src_box.top()); + const float rotation = dst_box.rotation() - src_box.rotation(); + const cv::Point2f rad = + cv::Point2f(src_point.x * width, src_point.y * height) - src_center; + const float rad_x = rad.x * scale_x; + const float rad_y = rad.y * scale_y; + const float cos_a = std::cos(rotation); + const float sin_a = std::sin(rotation); + const cv::Point2f rot_rad(cos_a * rad_x - sin_a * rad_y, + sin_a * rad_x + cos_a * rad_y); + const cv::Point2f dst_point_image = dst_center + rot_rad; + const cv::Point2f dst_point(dst_point_image.x / width, + dst_point_image.y / height); + return dst_point; +} + +} // namespace mediapipe diff --git a/mediapipe/modules/objectron/calculators/box_util.h b/mediapipe/modules/objectron/calculators/box_util.h new file mode 100644 index 0000000..fed21c0 --- /dev/null +++ b/mediapipe/modules/objectron/calculators/box_util.h @@ -0,0 +1,50 @@ +// Copyright 2020 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef MEDIAPIPE_MODULES_OBJECTRON_CALCULATORS_BOX_UTIL_H_ +#define MEDIAPIPE_MODULES_OBJECTRON_CALCULATORS_BOX_UTIL_H_ + +#include "mediapipe/framework/port/opencv_core_inc.h" +#include "mediapipe/util/tracking/box_tracker.pb.h" + +namespace mediapipe { + +// This function fills the geometry of the TimedBoxProto. Id, timestamp etc. +// need to be set outside this function. +void ComputeBoundingRect(const std::vector& points, + mediapipe::TimedBoxProto* box); + +// This function computes the intersection over union between two boxes. +float ComputeBoxIoU(const TimedBoxProto& box1, const TimedBoxProto& box2); + +// Computes corners of the box. +// width and height are image width and height, which is typically +// needed since the box is in normalized coordinates. +std::vector ComputeBoxCorners(const TimedBoxProto& box, + float width, float height); + +// Computes the perspective transform from box1 to box2. +// The input argument aspect_ratio is width / height of the image. +// The returned matrix should be a 3x3 matrix. +cv::Mat PerspectiveTransformBetweenBoxes(const TimedBoxProto& src_box, + const TimedBoxProto& dst_box, + const float aspect_ratio); + +// Map point according to source and destination box location. +cv::Point2f MapPoint(const TimedBoxProto& src_box, const TimedBoxProto& dst_box, + const cv::Point2f& src_point, float width, float height); + +} // namespace mediapipe + +#endif // MEDIAPIPE_MODULES_OBJECTRON_CALCULATORS_BOX_UTIL_H_ diff --git a/mediapipe/modules/objectron/calculators/box_util_test.cc b/mediapipe/modules/objectron/calculators/box_util_test.cc new file mode 100644 index 0000000..2a3895f --- /dev/null +++ b/mediapipe/modules/objectron/calculators/box_util_test.cc @@ -0,0 +1,123 @@ +// Copyright 2020 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "mediapipe/modules/objectron/calculators/box_util.h" + +#include "mediapipe/framework/port/gmock.h" +#include "mediapipe/framework/port/gtest.h" +#include "mediapipe/framework/port/opencv_core_inc.h" +#include "mediapipe/util/tracking/box_tracker.pb.h" + +namespace mediapipe { +namespace { + +TEST(BoxUtilTest, TestComputeBoundingRect) { + std::vector points{ + cv::Point2f(0.35f, 0.25f), cv::Point2f(0.3f, 0.3f), + cv::Point2f(0.2f, 0.4f), cv::Point2f(0.3f, 0.1f), + cv::Point2f(0.2f, 0.2f), cv::Point2f(0.5f, 0.3f), + cv::Point2f(0.4f, 0.4f), cv::Point2f(0.5f, 0.1f), + cv::Point2f(0.4f, 0.2f)}; + TimedBoxProto box; + ComputeBoundingRect(points, &box); + EXPECT_FLOAT_EQ(0.1f, box.top()); + EXPECT_FLOAT_EQ(0.4f, box.bottom()); + EXPECT_FLOAT_EQ(0.2f, box.left()); + EXPECT_FLOAT_EQ(0.5f, box.right()); +} + +TEST(BoxUtilTest, TestComputeBoxIoU) { + TimedBoxProto box1; + box1.set_top(0.2f); + box1.set_bottom(0.6f); + box1.set_left(0.1f); + box1.set_right(0.3f); + box1.set_rotation(0.0f); + TimedBoxProto box2 = box1; + box2.set_rotation(/*pi/2*/ 1.570796f); + const float box_area = + (box1.bottom() - box1.top()) * (box1.right() - box1.left()); + const float box_intersection = + (box1.right() - box1.left()) * (box1.right() - box1.left()); + const float expected_iou = + box_intersection / (box_area * 2 - box_intersection); + EXPECT_NEAR(expected_iou, ComputeBoxIoU(box1, box2), 3e-5f); + + TimedBoxProto box3; + box3.set_top(0.2f); + box3.set_bottom(0.6f); + box3.set_left(0.5f); + box3.set_right(0.7f); + EXPECT_NEAR(0.0f, ComputeBoxIoU(box1, box3), 3e-5f); +} + +TEST(BoxUtilTest, TestPerspectiveTransformBetweenBoxes) { + TimedBoxProto box1; + const float height = 4.0f; + const float width = 3.0f; + box1.set_top(1.0f / height); + box1.set_bottom(2.0f / height); + box1.set_left(1.0f / width); + box1.set_right(2.0f / width); + TimedBoxProto box2; + box2.set_top(1.0f / height); + box2.set_bottom(2.0f / height); + box2.set_left(1.0f / width); + box2.set_right(2.0f / width); + box2.set_rotation(/*pi/4*/ -0.785398f); + cv::Mat transform = + PerspectiveTransformBetweenBoxes(box1, box2, width / height); + const float kTolerence = 1e-5f; + const cv::Vec3f original_position(1.5f / width, 1.0f / height, 1.0f); + const cv::Mat transformed_position = transform * cv::Mat(original_position); + EXPECT_NEAR( + (1.5f - 0.5f * std::sqrt(2) / 2.0f) / width, + transformed_position.at(0) / transformed_position.at(2), + kTolerence); + EXPECT_NEAR( + (1.5f - 0.5f * std::sqrt(2) / 2.0f) / height, + transformed_position.at(1) / transformed_position.at(2), + kTolerence); +} + +TEST(BoxUtilTest, TestMapPoint) { + const float height = 4.0f; + const float width = 3.0f; + TimedBoxProto box1; + box1.set_top(1.0f / height); + box1.set_bottom(2.0f / height); + box1.set_left(1.0f / width); + box1.set_right(2.0f / width); + TimedBoxProto box2; + box2.set_top(1.0f / height); + box2.set_bottom(2.0f / height); + box2.set_left(1.0f / width); + box2.set_right(2.0f / width); + box2.set_rotation(/*pi/4*/ -0.785398f); + + cv::Point2f src_point1(1.2f / width, 1.4f / height); + cv::Point2f src_point2(1.3f / width, 1.8f / height); + const float distance1 = std::sqrt(0.1 * 0.1 + 0.4 * 0.4); + cv::Point2f dst_point1 = MapPoint(box1, box2, src_point1, width, height); + cv::Point2f dst_point2 = MapPoint(box1, box2, src_point2, width, height); + const float distance2 = + std::sqrt((dst_point1.x * width - dst_point2.x * width) * + (dst_point1.x * width - dst_point2.x * width) + + (dst_point1.y * height - dst_point2.y * height) * + (dst_point1.y * height - dst_point2.y * height)); + EXPECT_NEAR(distance1, distance2, 1e-5f); +} + +} // namespace +} // namespace mediapipe diff --git a/mediapipe/modules/objectron/calculators/camera_parameters.proto b/mediapipe/modules/objectron/calculators/camera_parameters.proto new file mode 100644 index 0000000..f5c843b --- /dev/null +++ b/mediapipe/modules/objectron/calculators/camera_parameters.proto @@ -0,0 +1,47 @@ +// Copyright 2020 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +syntax = "proto2"; + +package mediapipe; + +message CameraParametersProto { + // This number is non-negative, it represents camera height above ground + // normalized by focal length. + optional float height_above_ground = 1 [default = 100.0]; + // Width of image in portrait orientation normalized by focal length + optional float portrait_width = 2 [default = 1.0103]; + // Height of image in portrait orientation normalized by focal length + optional float portrait_height = 3 [default = 1.3435]; + enum ImageOrientation { + PORTRAIT_ORIENTATION = 0; + LANDSCAPE_ORIENTATION = 1; + } + // The input image orientation + optional ImageOrientation image_orientation = 4 + [default = PORTRAIT_ORIENTATION]; + + // This defines the projection method from 2D screen to 3D. + enum ProjectionMode { + UNSPECIFIED = 0; + // Projects 2D point to ground plane (horizontal plane). + GROUND_PLANE = 1; + // Projects 2D point to sphere. + SPHERE = 2; + } + optional ProjectionMode projection_mode = 5 [default = GROUND_PLANE]; + // Radius of sphere when using the SPHERE projection mode above. + // The value is normalized by focal length. + optional float projection_sphere_radius = 6 [default = 100.0]; +} diff --git a/mediapipe/modules/objectron/calculators/decoder.cc b/mediapipe/modules/objectron/calculators/decoder.cc new file mode 100644 index 0000000..0af3458 --- /dev/null +++ b/mediapipe/modules/objectron/calculators/decoder.cc @@ -0,0 +1,252 @@ +// Copyright 2020 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "mediapipe/modules/objectron/calculators/decoder.h" + +#include +#include + +#include "Eigen/Core" +#include "Eigen/Dense" +#include "absl/status/status.h" +#include "mediapipe/framework/port/canonical_errors.h" +#include "mediapipe/framework/port/logging.h" +#include "mediapipe/framework/port/opencv_imgproc_inc.h" +#include "mediapipe/framework/port/status.h" +#include "mediapipe/modules/objectron/calculators/annotation_data.pb.h" +#include "mediapipe/modules/objectron/calculators/box.h" +#include "mediapipe/modules/objectron/calculators/epnp.h" +#include "mediapipe/modules/objectron/calculators/types.h" + +namespace mediapipe { + +constexpr int Decoder::kNumOffsetmaps = 16; +constexpr int kNumKeypoints = 9; + +namespace { + +inline void SetPoint3d(const Eigen::Vector3f& point_vec, Point3D* point_3d) { + point_3d->set_x(point_vec.x()); + point_3d->set_y(point_vec.y()); + point_3d->set_z(point_vec.z()); +} + +} // namespace + +FrameAnnotation Decoder::DecodeBoundingBoxKeypoints( + const cv::Mat& heatmap, const cv::Mat& offsetmap) const { + CHECK_EQ(1, heatmap.channels()); + CHECK_EQ(kNumOffsetmaps, offsetmap.channels()); + CHECK_EQ(heatmap.cols, offsetmap.cols); + CHECK_EQ(heatmap.rows, offsetmap.rows); + + const float offset_scale = std::min(offsetmap.cols, offsetmap.rows); + const std::vector center_points = ExtractCenterKeypoints(heatmap); + std::vector boxes; + for (const auto& center_point : center_points) { + BeliefBox box; + box.box_2d.emplace_back(center_point.x, center_point.y); + const int center_x = static_cast(std::round(center_point.x)); + const int center_y = static_cast(std::round(center_point.y)); + box.belief = heatmap.at(center_y, center_x); + if (config_.voting_radius() > 1) { + DecodeByVoting(heatmap, offsetmap, center_x, center_y, offset_scale, + offset_scale, &box); + } else { + DecodeByPeak(offsetmap, center_x, center_y, offset_scale, offset_scale, + &box); + } + if (IsNewBox(&boxes, &box)) { + boxes.push_back(std::move(box)); + } + } + + const float x_scale = 1.0f / offsetmap.cols; + const float y_scale = 1.0f / offsetmap.rows; + FrameAnnotation frame_annotations; + for (const auto& box : boxes) { + auto* object = frame_annotations.add_annotations(); + for (const auto& point : box.box_2d) { + auto* point2d = object->add_keypoints()->mutable_point_2d(); + point2d->set_x(point.first * x_scale); + point2d->set_y(point.second * y_scale); + } + } + return frame_annotations; +} + +void Decoder::DecodeByPeak(const cv::Mat& offsetmap, int center_x, int center_y, + float offset_scale_x, float offset_scale_y, + BeliefBox* box) const { + const auto& offset = offsetmap.at>( + /*row*/ center_y, /*col*/ center_x); + for (int i = 0; i < kNumOffsetmaps / 2; ++i) { + const float x_offset = offset[2 * i] * offset_scale_x; + const float y_offset = offset[2 * i + 1] * offset_scale_y; + box->box_2d.emplace_back(center_x + x_offset, center_y + y_offset); + } +} + +void Decoder::DecodeByVoting(const cv::Mat& heatmap, const cv::Mat& offsetmap, + int center_x, int center_y, float offset_scale_x, + float offset_scale_y, BeliefBox* box) const { + // Votes at the center. + const auto& center_offset = offsetmap.at>( + /*row*/ center_y, /*col*/ center_x); + std::vector center_votes(kNumOffsetmaps, 0.f); + for (int i = 0; i < kNumOffsetmaps / 2; ++i) { + center_votes[2 * i] = center_x + center_offset[2 * i] * offset_scale_x; + center_votes[2 * i + 1] = + center_y + center_offset[2 * i + 1] * offset_scale_y; + } + + // Find voting window. + int x_min = std::max(0, center_x - config_.voting_radius()); + int y_min = std::max(0, center_y - config_.voting_radius()); + int width = std::min(heatmap.cols - x_min, config_.voting_radius() * 2 + 1); + int height = std::min(heatmap.rows - y_min, config_.voting_radius() * 2 + 1); + cv::Rect rect(x_min, y_min, width, height); + cv::Mat heat = heatmap(rect); + cv::Mat offset = offsetmap(rect); + + for (int i = 0; i < kNumOffsetmaps / 2; ++i) { + float x_sum = 0.f; + float y_sum = 0.f; + float votes = 0.f; + for (int r = 0; r < heat.rows; ++r) { + for (int c = 0; c < heat.cols; ++c) { + const float belief = heat.at(r, c); + if (belief < config_.voting_threshold()) { + continue; + } + float offset_x = + offset.at>(r, c)[2 * i] * + offset_scale_x; + float offset_y = + offset.at>(r, c)[2 * i + 1] * + offset_scale_y; + float vote_x = c + rect.x + offset_x; + float vote_y = r + rect.y + offset_y; + float x_diff = std::abs(vote_x - center_votes[2 * i]); + float y_diff = std::abs(vote_y - center_votes[2 * i + 1]); + if (x_diff > config_.voting_allowance() || + y_diff > config_.voting_allowance()) { + continue; + } + x_sum += vote_x * belief; + y_sum += vote_y * belief; + votes += belief; + } + } + box->box_2d.emplace_back(x_sum / votes, y_sum / votes); + } +} + +bool Decoder::IsNewBox(std::vector* boxes, BeliefBox* box) const { + for (auto& b : *boxes) { + if (IsIdentical(b, *box)) { + if (b.belief < box->belief) { + std::swap(b, *box); + } + return false; + } + } + return true; +} + +bool Decoder::IsIdentical(const BeliefBox& box_1, + const BeliefBox& box_2) const { + // Skip the center point. + for (int i = 1; i < box_1.box_2d.size(); ++i) { + const float x_diff = + std::abs(box_1.box_2d[i].first - box_2.box_2d[i].first); + const float y_diff = + std::abs(box_1.box_2d[i].second - box_2.box_2d[i].second); + if (x_diff > config_.voting_allowance() || + y_diff > config_.voting_allowance()) { + return false; + } + } + return true; +} + +std::vector Decoder::ExtractCenterKeypoints( + const cv::Mat& center_heatmap) const { + cv::Mat max_filtered_heatmap(center_heatmap.rows, center_heatmap.cols, + center_heatmap.type()); + const int kernel_size = + static_cast(config_.local_max_distance() * 2 + 1 + 0.5f); + const cv::Size morph_size(kernel_size, kernel_size); + cv::dilate(center_heatmap, max_filtered_heatmap, + cv::getStructuringElement(cv::MORPH_RECT, morph_size)); + cv::Mat peak_map; + cv::bitwise_and((center_heatmap >= max_filtered_heatmap), + (center_heatmap >= config_.heatmap_threshold()), peak_map); + std::vector locations; // output, locations of non-zero pixels + cv::findNonZero(peak_map, locations); + return locations; +} + +absl::Status Decoder::Lift2DTo3D( + const Eigen::Matrix& projection_matrix, + bool portrait, FrameAnnotation* estimated_box) const { + CHECK(estimated_box != nullptr); + + for (auto& annotation : *estimated_box->mutable_annotations()) { + CHECK_EQ(kNumKeypoints, annotation.keypoints_size()); + + // Fill input 2D Points; + std::vector input_points_2d; + input_points_2d.reserve(kNumKeypoints); + for (const auto& keypoint : annotation.keypoints()) { + input_points_2d.emplace_back(keypoint.point_2d().x(), + keypoint.point_2d().y()); + } + + // Run EPnP. + std::vector output_points_3d; + output_points_3d.reserve(kNumKeypoints); + auto status = SolveEpnp(projection_matrix, portrait, input_points_2d, + &output_points_3d); + if (!status.ok()) { + LOG(ERROR) << status; + return status; + } + + // Fill 3D keypoints; + for (int i = 0; i < kNumKeypoints; ++i) { + SetPoint3d(output_points_3d[i], + annotation.mutable_keypoints(i)->mutable_point_3d()); + } + + // Fit a box to the 3D points to get box scale, rotation, translation. + Box box("category"); + box.Fit(output_points_3d); + const Eigen::Matrix rotation = + box.GetRotation(); + const Eigen::Vector3f translation = box.GetTranslation(); + const Eigen::Vector3f scale = box.GetScale(); + // Fill box rotation. + *annotation.mutable_rotation() = {rotation.data(), + rotation.data() + rotation.size()}; + // Fill box translation. + *annotation.mutable_translation() = { + translation.data(), translation.data() + translation.size()}; + // Fill box scale. + *annotation.mutable_scale() = {scale.data(), scale.data() + scale.size()}; + } + return absl::OkStatus(); +} + +} // namespace mediapipe diff --git a/mediapipe/modules/objectron/calculators/decoder.h b/mediapipe/modules/objectron/calculators/decoder.h new file mode 100644 index 0000000..be69939 --- /dev/null +++ b/mediapipe/modules/objectron/calculators/decoder.h @@ -0,0 +1,109 @@ +// Copyright 2020 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef MEDIAPIPE_MODULES_OBJECTRON_CALCULATORS_DECODER_H_ +#define MEDIAPIPE_MODULES_OBJECTRON_CALCULATORS_DECODER_H_ + +#include + +#include "Eigen/Dense" +#include "absl/status/status.h" +#include "mediapipe/framework/port/opencv_core_inc.h" +#include "mediapipe/modules/objectron/calculators/annotation_data.pb.h" +#include "mediapipe/modules/objectron/calculators/belief_decoder_config.pb.h" + +namespace mediapipe { + +// Decodes 3D bounding box from heatmaps and offset maps. In the future, +// if we want to develop decoder for generic skeleton, then we need to +// generalize this class, and make a few child classes. +class Decoder { + public: + static const int kNumOffsetmaps; + + explicit Decoder(const BeliefDecoderConfig& config) : config_(config) { + epnp_alpha_ << 4.0f, -1.0f, -1.0f, -1.0f, 2.0f, -1.0f, -1.0f, 1.0f, 2.0f, + -1.0f, 1.0f, -1.0f, 0.0f, -1.0f, 1.0f, 1.0f, 2.0f, 1.0f, -1.0f, -1.0f, + 0.0f, 1.0f, -1.0f, 1.0f, 0.0f, 1.0f, 1.0f, -1.0f, -2.0f, 1.0f, 1.0f, + 1.0f; + } + + // Decodes bounding boxes from predicted heatmap and offset maps. + // Input: + // heatmap: a single channel cv::Mat representing center point heatmap + // offsetmap: a 16 channel cv::Mat representing the 16 offset maps + // (2 for each of the 8 vertices) + // Output: + // Outputs 3D bounding boxes 2D vertices, represented by 'point_2d' field + // in each 'keypoints' field of object annotations. + FrameAnnotation DecodeBoundingBoxKeypoints(const cv::Mat& heatmap, + const cv::Mat& offsetmap) const; + + // Lifts the estimated 2D projections of bounding box vertices to 3D. + // This function uses the EPnP approach described in this paper: + // https://icwww.epfl.ch/~lepetit/papers/lepetit_ijcv08.pdf . + // Input: + // projection_matrix: the projection matrix from 3D coordinate + // to screen coordinate. + // The 2D screen coordinate is defined as: u is along the long + // edge of the device, pointing down; v is along the short edge + // of the device, pointing right. + // portrait: a boolen variable indicating whether our images are + // obtained in portrait orientation or not. + // estimated_box: annotation with point_2d field populated with + // 2d vertices. + // Output: + // estimated_box: annotation with point_3d field populated with + // 3d vertices. + absl::Status Lift2DTo3D( + const Eigen::Matrix& projection_matrix, + bool portrait, FrameAnnotation* estimated_box) const; + + private: + struct BeliefBox { + float belief; + std::vector> box_2d; + }; + + std::vector ExtractCenterKeypoints( + const cv::Mat& center_heatmap) const; + + // Decodes 2D keypoints at the peak point. + void DecodeByPeak(const cv::Mat& offsetmap, int center_x, int center_y, + float offset_scale_x, float offset_scale_y, + BeliefBox* box) const; + + // Decodes 2D keypoints by voting around the peak. + void DecodeByVoting(const cv::Mat& heatmap, const cv::Mat& offsetmap, + int center_x, int center_y, float offset_scale_x, + float offset_scale_y, BeliefBox* box) const; + + // Returns true if it is a new box. Otherwise, it may replace an existing box + // if the new box's belief is higher. + bool IsNewBox(std::vector* boxes, BeliefBox* box) const; + + // Returns true if the two boxes are identical. + bool IsIdentical(const BeliefBox& box_1, const BeliefBox& box_2) const; + + BeliefDecoderConfig config_; + // Following equation (1) in this paper + // https://icwww.epfl.ch/~lepetit/papers/lepetit_ijcv08.pdf, + // this variable denotes the coefficients for the 4 control points + // for each of the 8 3D box vertices. + Eigen::Matrix epnp_alpha_; +}; + +} // namespace mediapipe + +#endif // MEDIAPIPE_MODULES_OBJECTRON_CALCULATORS_DECODER_H_ diff --git a/mediapipe/modules/objectron/calculators/epnp.cc b/mediapipe/modules/objectron/calculators/epnp.cc new file mode 100644 index 0000000..8bd7151 --- /dev/null +++ b/mediapipe/modules/objectron/calculators/epnp.cc @@ -0,0 +1,167 @@ +// Copyright 2021 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "mediapipe/modules/objectron/calculators/epnp.h" + +namespace mediapipe { + +namespace { + +// NUmber of keypoints. +constexpr int kNumKeypoints = 9; + +using Eigen::Map; +using Eigen::Matrix; +using Eigen::Matrix4f; +using Eigen::Vector2f; +using Eigen::Vector3f; + +} // namespace + +absl::Status SolveEpnp(const float focal_x, const float focal_y, + const float center_x, const float center_y, + const bool portrait, + const std::vector& input_points_2d, + std::vector* output_points_3d) { + if (input_points_2d.size() != kNumKeypoints) { + return absl::InvalidArgumentError( + absl::StrFormat("Input must has %d 2D points.", kNumKeypoints)); + } + + if (output_points_3d == nullptr) { + return absl::InvalidArgumentError( + "Output pointer output_points_3d is Null."); + } + + Matrix m = + Matrix::Zero(); + + Matrix epnp_alpha; + // The epnp_alpha is the Nx4 weight matrix from the EPnP paper, which is used + // to express the N box vertices as the weighted sum of 4 control points. The + // value of epnp_alpha is depedent on the set of control points been used. + // In our case we used the 4 control points as below (coordinates are in world + // coordinate system): + // c0 = (0.0, 0.0, 0.0) // Box center + // c1 = (1.0, 0.0, 0.0) // Right face center + // c2 = (0.0, 1.0, 0.0) // Top face center + // c3 = (0.0, 0.0, 1.0) // Front face center + // + // 3 + + + + + + + + 7 + // +\ +\ UP + // + \ + \ + // + \ + \ | + // + 4 + + + + + + + + 8 | y + // + + + + | + // + + + + | + // + + (0) + + .------- x + // + + + + \ + // 1 + + + + + + + + 5 + \ + // \ + \ + \ z + // \ + \ + \ + // \+ \+ + // 2 + + + + + + + + 6 + // + // For each box vertex shown above, we have the below weighted sum expression: + // v1 = c0 - (c1 - c0) - (c2 - c0) - (c3 - c0) = 4*c0 - c1 - c2 - c3; + // v2 = c0 - (c1 - c0) - (c2 - c0) + (c3 - c0) = 2*c0 - c1 - c2 + c3; + // v3 = c0 - (c1 - c0) + (c2 - c0) - (c3 - c0) = 2*c0 - c1 + c2 - c3; + // ... + // Thus we can determine the value of epnp_alpha as been used below. + // + // clang-format off + epnp_alpha << 4.0f, -1.0f, -1.0f, -1.0f, + 2.0f, -1.0f, -1.0f, 1.0f, + 2.0f, -1.0f, 1.0f, -1.0f, + 0.0f, -1.0f, 1.0f, 1.0f, + 2.0f, 1.0f, -1.0f, -1.0f, + 0.0f, 1.0f, -1.0f, 1.0f, + 0.0f, 1.0f, 1.0f, -1.0f, + -2.0f, 1.0f, 1.0f, 1.0f; + // clang-format on + + for (int i = 0; i < input_points_2d.size() - 1; ++i) { + // Skip 0th landmark which is object center. + const auto& point_2d = input_points_2d[i + 1]; + + // Convert 2d point from `pixel coordinates` to `NDC coordinates`([-1, 1]) + // following to the definitions in: + // https://google.github.io/mediapipe/solutions/objectron#ndc-space + // If portrait mode is been used, it's the caller's responsibility to + // convert the input 2d points' coordinates. + float x_ndc, y_ndc; + if (portrait) { + x_ndc = point_2d.y() * 2 - 1; + y_ndc = point_2d.x() * 2 - 1; + } else { + x_ndc = point_2d.x() * 2 - 1; + y_ndc = 1 - point_2d.y() * 2; + } + + for (int j = 0; j < 4; ++j) { + // For each of the 4 control points, formulate two rows of the + // m matrix (two equations). + const float control_alpha = epnp_alpha(i, j); + m(i * 2, j * 3) = focal_x * control_alpha; + m(i * 2, j * 3 + 2) = (center_x + x_ndc) * control_alpha; + m(i * 2 + 1, j * 3 + 1) = focal_y * control_alpha; + m(i * 2 + 1, j * 3 + 2) = (center_y + y_ndc) * control_alpha; + } + } + // This is a self adjoint matrix. Use SelfAdjointEigenSolver for a fast + // and stable solution. + Matrix mt_m = m.transpose() * m; + Eigen::SelfAdjointEigenSolver> eigen_solver(mt_m); + if (eigen_solver.info() != Eigen::Success) { + return absl::AbortedError("Eigen decomposition failed."); + } + CHECK_EQ(12, eigen_solver.eigenvalues().size()); + + // Eigenvalues are sorted in increasing order for SelfAdjointEigenSolver + // only! If you use other Eigen Solvers, it's not guaranteed to be in + // increasing order. Here, we just take the eigen vector corresponding + // to first/smallest eigen value, since we used SelfAdjointEigenSolver. + Eigen::VectorXf eigen_vec = eigen_solver.eigenvectors().col(0); + Map> control_matrix(eigen_vec.data()); + + // All 3D points should be in front of camera (z < 0). + if (control_matrix(0, 2) > 0) { + control_matrix = -control_matrix; + } + Matrix vertices = epnp_alpha * control_matrix; + + // Fill 0th 3D points. + output_points_3d->emplace_back(control_matrix(0, 0), control_matrix(0, 1), + control_matrix(0, 2)); + // Fill the rest 3D points. + for (int i = 0; i < kNumKeypoints - 1; ++i) { + output_points_3d->emplace_back(vertices(i, 0), vertices(i, 1), + vertices(i, 2)); + } + return absl::OkStatus(); +} + +absl::Status SolveEpnp(const Eigen::Matrix4f& projection_matrix, + const bool portrait, + const std::vector& input_points_2d, + std::vector* output_points_3d) { + const float focal_x = projection_matrix(0, 0); + const float focal_y = projection_matrix(1, 1); + const float center_x = projection_matrix(0, 2); + const float center_y = projection_matrix(1, 2); + return SolveEpnp(focal_x, focal_y, center_x, center_y, portrait, + input_points_2d, output_points_3d); +} + +} // namespace mediapipe diff --git a/mediapipe/modules/objectron/calculators/epnp.h b/mediapipe/modules/objectron/calculators/epnp.h new file mode 100644 index 0000000..85be6f9 --- /dev/null +++ b/mediapipe/modules/objectron/calculators/epnp.h @@ -0,0 +1,62 @@ +// Copyright 2021 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef MEDIAPIPE_MODULES_OBJECTRON_CALCULATORS_EPNP_H_ +#define MEDIAPIPE_MODULES_OBJECTRON_CALCULATORS_EPNP_H_ + +#include + +#include "Eigen/Dense" +#include "absl/status/status.h" +#include "absl/strings/str_format.h" +#include "mediapipe/framework/port/logging.h" + +namespace mediapipe { + +// This function performs EPnP algorithm, lifting normalized 2D points in pixel +// space to 3D points in camera coordinate. +// +// Inputs: +// focal_x: camera focal length along x. +// focal_y: camera focal length along y. +// center_x: camera center along x. +// center_y: camera center along y. +// portrait: a boolen variable indicating whether our images are obtained in +// portrait orientation or not. +// input_points_2d: input 2D points to be lifted to 3D. +// output_points_3d: ouput 3D points in camera coordinate. +absl::Status SolveEpnp(const float focal_x, const float focal_y, + const float center_x, const float center_y, + const bool portrait, + const std::vector& input_points_2d, + std::vector* output_points_3d); + +// This function performs EPnP algorithm, lifting normalized 2D points in pixel +// space to 3D points in camera coordinate. +// +// Inputs: +// projection_matrix: the projection matrix from 3D coordinate +// to screen coordinate. +// portrait: a boolen variable indicating whether our images are obtained in +// portrait orientation or not. +// input_points_2d: input 2D points to be lifted to 3D. +// output_points_3d: ouput 3D points in camera coordinate. +absl::Status SolveEpnp(const Eigen::Matrix4f& projection_matrix, + const bool portrait, + const std::vector& input_points_2d, + std::vector* output_points_3d); + +} // namespace mediapipe + +#endif // MEDIAPIPE_MODULES_OBJECTRON_CALCULATORS_EPNP_H_ diff --git a/mediapipe/modules/objectron/calculators/epnp_test.cc b/mediapipe/modules/objectron/calculators/epnp_test.cc new file mode 100644 index 0000000..8cf218a --- /dev/null +++ b/mediapipe/modules/objectron/calculators/epnp_test.cc @@ -0,0 +1,169 @@ +#include "mediapipe/modules/objectron/calculators/epnp.h" + +#include "mediapipe/framework/port/gmock.h" +#include "mediapipe/framework/port/gtest.h" +#include "mediapipe/framework/tool/test_util.h" + +namespace mediapipe { +namespace { + +using Eigen::AngleAxisf; +using Eigen::Map; +using Eigen::Matrix; +using Eigen::Matrix4f; +using Eigen::RowMajor; +using Eigen::Vector2f; +using Eigen::Vector3f; +using ::testing::HasSubstr; +using ::testing::Test; +using ::testing::status::StatusIs; +using Matrix3f = Eigen::Matrix; + +constexpr uint8_t kNumKeypoints = 9; + +// clang-format off +constexpr float kUnitBox[] = { 0.0f, 0.0f, 0.0f, + -0.5f, -0.5f, -0.5f, + -0.5f, -0.5f, 0.5f, + -0.5f, 0.5f, -0.5f, + -0.5f, 0.5f, 0.5f, + 0.5f, -0.5f, -0.5f, + 0.5f, -0.5f, 0.5f, + 0.5f, 0.5f, -0.5f, + 0.5f, 0.5f, 0.5f, }; +// clang-format on + +constexpr float kFocalX = 1.0f; +constexpr float kFocalY = 1.0f; +constexpr float kCenterX = 0.0f; +constexpr float kCenterY = 0.0f; + +constexpr float kAzimuth = 90.0f * M_PI / 180.0f; +constexpr float kElevation = 45.0f * M_PI / 180.0f; +constexpr float kTilt = 15.0f * M_PI / 180.0f; + +constexpr float kTranslationArray[] = {0.0f, 0.0f, -100.0f}; + +constexpr float kScaleArray[] = {50.0f, 50.0f, 50.0f}; + +class SolveEpnpTest : public Test { + protected: + SolveEpnpTest() {} + + void SetUp() override { + // Create vertices in world frame. + Map> vertices_w(kUnitBox); + + // Create Pose. + Matrix3f rotation; + rotation = AngleAxisf(kTilt, Vector3f::UnitZ()) * + AngleAxisf(kElevation, Vector3f::UnitX()) * + AngleAxisf(kAzimuth, Vector3f::UnitY()); + Map translation(kTranslationArray); + Map scale(kScaleArray); + + // Generate 3d vertices in camera frame. + const auto vertices_c = + ((rotation * scale.asDiagonal() * vertices_w.transpose()).colwise() + + translation) + .transpose(); + + // Generate input 2d points. + std::vector input_2d_points; + std::vector expected_3d_points; + for (int i = 0; i < kNumKeypoints; ++i) { + const auto x = vertices_c(i, 0); + const auto y = vertices_c(i, 1); + const auto z = vertices_c(i, 2); + + const float x_ndc = -kFocalX * x / z + kCenterX; + const float y_ndc = -kFocalY * y / z + kCenterY; + + const float x_pixel = (1.0f + x_ndc) / 2.0f; + const float y_pixel = (1.0f - y_ndc) / 2.0f; + + expected_3d_points_.emplace_back(x, y, z); + input_2d_points_.emplace_back(x_pixel, y_pixel); + } + } + + void VerifyOutput3dPoints(const std::vector& output_3d_points) { + EXPECT_EQ(kNumKeypoints, output_3d_points.size()); + const float scale = output_3d_points[0].z() / expected_3d_points_[0].z(); + for (int i = 0; i < kNumKeypoints; ++i) { + EXPECT_NEAR(output_3d_points[i].x(), expected_3d_points_[i].x() * scale, + 2.e-6f); + EXPECT_NEAR(output_3d_points[i].y(), expected_3d_points_[i].y() * scale, + 2.e-6f); + EXPECT_NEAR(output_3d_points[i].z(), expected_3d_points_[i].z() * scale, + 2.e-6f); + } + } + + std::vector input_2d_points_; + std::vector expected_3d_points_; +}; + +TEST_F(SolveEpnpTest, SolveEpnp) { + std::vector output_3d_points; + MP_ASSERT_OK(SolveEpnp(kFocalX, kFocalY, kCenterX, kCenterY, + /*portrait*/ false, input_2d_points_, + &output_3d_points)); + // Test output 3D points. + VerifyOutput3dPoints(output_3d_points); +} + +TEST_F(SolveEpnpTest, SolveEpnppPortrait) { + std::vector output_3d_points; + MP_ASSERT_OK(SolveEpnp(kFocalX, kFocalY, kCenterX, kCenterY, + /*portrait*/ true, input_2d_points_, + &output_3d_points)); + // Test output 3D points. + for (auto& point_3d : output_3d_points) { + const auto x = point_3d.x(); + const auto y = point_3d.y(); + // Convert from portrait mode to normal mode, y => x, x => -y. + point_3d.x() = y; + point_3d.y() = -x; + } + VerifyOutput3dPoints(output_3d_points); +} + +TEST_F(SolveEpnpTest, SolveEpnpProjectionMatrix) { + Matrix4f projection_matrix; + // clang-format off + projection_matrix << kFocalX, 0.0f, kCenterX, 0.0f, + 0.0f, kFocalY, kCenterY, 0.0f, + 0.0f, 0.0f, -1.0f, 0.0f, + 0.0f, 0.0f, -1.0f, 0.0f; + // clang-format on + + std::vector output_3d_points; + MP_ASSERT_OK(SolveEpnp(projection_matrix, /*portrait*/ false, + input_2d_points_, &output_3d_points)); + + // Test output 3D points. + VerifyOutput3dPoints(output_3d_points); +} + +TEST_F(SolveEpnpTest, BadInput2dPoints) { + // Generate empty input 2D points. + std::vector input_2d_points; + std::vector output_3d_points; + EXPECT_THAT(SolveEpnp(kFocalX, kFocalY, kCenterX, kCenterY, + /*portrait*/ false, input_2d_points, &output_3d_points), + StatusIs(absl::StatusCode::kInvalidArgument, + HasSubstr("Input must has"))); +} + +TEST_F(SolveEpnpTest, BadOutput3dPoints) { + // Generate null output 3D points. + std::vector* output_3d_points = nullptr; + EXPECT_THAT(SolveEpnp(kFocalX, kFocalY, kCenterX, kCenterY, + /*portrait*/ false, input_2d_points_, output_3d_points), + StatusIs(absl::StatusCode::kInvalidArgument, + "Output pointer output_points_3d is Null.")); +} + +} // namespace +} // namespace mediapipe diff --git a/mediapipe/modules/objectron/calculators/filter_detection_calculator.cc b/mediapipe/modules/objectron/calculators/filter_detection_calculator.cc new file mode 100644 index 0000000..0f29f9c --- /dev/null +++ b/mediapipe/modules/objectron/calculators/filter_detection_calculator.cc @@ -0,0 +1,262 @@ +// Copyright 2020 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include + +#include "absl/container/node_hash_set.h" +#include "absl/strings/str_split.h" +#include "absl/strings/string_view.h" +#include "absl/strings/strip.h" +#include "mediapipe/framework/calculator_framework.h" +#include "mediapipe/framework/formats/detection.pb.h" +#include "mediapipe/framework/formats/location_data.pb.h" +#include "mediapipe/framework/port/logging.h" +#include "mediapipe/framework/port/map_util.h" +#include "mediapipe/framework/port/re2.h" +#include "mediapipe/framework/port/status.h" +#include "mediapipe/modules/objectron/calculators/filter_detection_calculator.pb.h" + +namespace mediapipe { + +namespace { + +constexpr char kDetectionTag[] = "DETECTION"; +constexpr char kDetectionsTag[] = "DETECTIONS"; +constexpr char kLabelsTag[] = "LABELS"; +constexpr char kLabelsCsvTag[] = "LABELS_CSV"; + +using mediapipe::RE2; +using Detections = std::vector; +using Strings = std::vector; + +} // namespace + +// Filters the entries in a Detection to only those with valid scores +// for the specified allowed labels. Allowed labels are provided as a +// vector in an optional input side packet. Allowed labels can +// contain simple strings or regular expressions. The valid score range +// can be set in the options.The allowed labels can be provided as +// vector (LABELS) or CSV std::string (LABELS_CSV) containing class +// names of allowed labels. Note: Providing an empty vector in the input side +// packet Packet causes this calculator to act as a sink if +// empty_allowed_labels_means_allow_everything is set to false (default value). +// To allow all labels, use the calculator with no input side packet stream, or +// set empty_allowed_labels_means_allow_everything to true. +// +// Example config: +// node { +// calculator: "FilterDetectionCalculator" +// input_stream: "DETECTIONS:detections" +// output_stream: "DETECTIONS:filtered_detections" +// input_side_packet: "LABELS:allowed_labels" +// options: { +// [mediapipe.FilterDetectionCalculatorOptions.ext]: { +// min_score: 0.5 +// } +// } +// } + +struct FirstGreaterComparator { + bool operator()(const std::pair& a, + const std::pair& b) const { + return a.first > b.first; + } +}; + +absl::Status SortLabelsByDecreasingScore(const Detection& detection, + Detection* sorted_detection) { + RET_CHECK(sorted_detection); + RET_CHECK_EQ(detection.score_size(), detection.label_size()); + if (!detection.label_id().empty()) { + RET_CHECK_EQ(detection.score_size(), detection.label_id_size()); + } + // Copies input to keep all fields unchanged, and to reserve space for + // repeated fields. Repeated fields (score, label, and label_id) will be + // overwritten. + *sorted_detection = detection; + + std::vector> scores_and_indices(detection.score_size()); + for (int i = 0; i < detection.score_size(); ++i) { + scores_and_indices[i].first = detection.score(i); + scores_and_indices[i].second = i; + } + + std::sort(scores_and_indices.begin(), scores_and_indices.end(), + FirstGreaterComparator()); + + for (int i = 0; i < detection.score_size(); ++i) { + const int index = scores_and_indices[i].second; + sorted_detection->set_score(i, detection.score(index)); + sorted_detection->set_label(i, detection.label(index)); + } + + if (!detection.label_id().empty()) { + for (int i = 0; i < detection.score_size(); ++i) { + const int index = scores_and_indices[i].second; + sorted_detection->set_label_id(i, detection.label_id(index)); + } + } + return absl::OkStatus(); +} + +class FilterDetectionCalculator : public CalculatorBase { + public: + static absl::Status GetContract(CalculatorContract* cc); + absl::Status Open(CalculatorContext* cc) override; + absl::Status Process(CalculatorContext* cc) override; + + private: + bool IsValidLabel(const std::string& label); + bool IsValidScore(float score); + // Stores numeric limits for filtering on the score. + FilterDetectionCalculatorOptions options_; + // We use the next two fields to possibly filter to a limited set of + // classes. The hash_set will be empty in two cases: 1) if no input + // side packet stream is provided (not filtering on labels), or 2) + // if the input side packet contains an empty vector (no labels are + // allowed). We use limit_labels_ to distinguish between the two cases. + bool limit_labels_ = true; + absl::node_hash_set allowed_labels_; +}; +REGISTER_CALCULATOR(FilterDetectionCalculator); + +absl::Status FilterDetectionCalculator::GetContract(CalculatorContract* cc) { + RET_CHECK(!cc->Inputs().GetTags().empty()); + RET_CHECK(!cc->Outputs().GetTags().empty()); + + if (cc->Inputs().HasTag(kDetectionTag)) { + cc->Inputs().Tag(kDetectionTag).Set(); + cc->Outputs().Tag(kDetectionTag).Set(); + } + if (cc->Inputs().HasTag(kDetectionsTag)) { + cc->Inputs().Tag(kDetectionsTag).Set(); + cc->Outputs().Tag(kDetectionsTag).Set(); + } + if (cc->InputSidePackets().HasTag(kLabelsTag)) { + cc->InputSidePackets().Tag(kLabelsTag).Set(); + } + if (cc->InputSidePackets().HasTag(kLabelsCsvTag)) { + cc->InputSidePackets().Tag(kLabelsCsvTag).Set(); + } + return absl::OkStatus(); +} + +absl::Status FilterDetectionCalculator::Open(CalculatorContext* cc) { + cc->SetOffset(TimestampDiff(0)); + options_ = cc->Options(); + limit_labels_ = cc->InputSidePackets().HasTag(kLabelsTag) || + cc->InputSidePackets().HasTag(kLabelsCsvTag); + if (limit_labels_) { + Strings allowlist_labels; + if (cc->InputSidePackets().HasTag(kLabelsCsvTag)) { + allowlist_labels = absl::StrSplit( + cc->InputSidePackets().Tag(kLabelsCsvTag).Get(), ',', + absl::SkipWhitespace()); + for (auto& e : allowlist_labels) { + absl::StripAsciiWhitespace(&e); + } + } else { + allowlist_labels = cc->InputSidePackets().Tag(kLabelsTag).Get(); + } + allowed_labels_.insert(allowlist_labels.begin(), allowlist_labels.end()); + } + if (limit_labels_ && allowed_labels_.empty()) { + if (options_.fail_on_empty_labels()) { + cc->GetCounter("VideosWithEmptyLabelsAllowlist")->Increment(); + return tool::StatusFail( + "FilterDetectionCalculator received empty allowlist with " + "fail_on_empty_labels = true."); + } + if (options_.empty_allowed_labels_means_allow_everything()) { + // Continue as if side_input was not provided, i.e. pass all labels. + limit_labels_ = false; + } + } + return absl::OkStatus(); +} + +absl::Status FilterDetectionCalculator::Process(CalculatorContext* cc) { + if (limit_labels_ && allowed_labels_.empty()) { + return absl::OkStatus(); + } + Detections detections; + if (cc->Inputs().HasTag(kDetectionsTag)) { + detections = cc->Inputs().Tag(kDetectionsTag).Get(); + } else if (cc->Inputs().HasTag(kDetectionTag)) { + detections.emplace_back(cc->Inputs().Tag(kDetectionsTag).Get()); + } + std::unique_ptr outputs(new Detections); + for (const auto& input : detections) { + Detection output; + for (int i = 0; i < input.label_size(); ++i) { + const std::string& label = input.label(i); + const float score = input.score(i); + if (IsValidLabel(label) && IsValidScore(score)) { + output.add_label(label); + output.add_score(score); + } + } + if (output.label_size() > 0) { + if (input.has_location_data()) { + *output.mutable_location_data() = input.location_data(); + } + Detection output_sorted; + if (!SortLabelsByDecreasingScore(output, &output_sorted).ok()) { + // Uses the orginal output if fails to sort. + cc->GetCounter("FailedToSortLabelsInDetection")->Increment(); + output_sorted = output; + } + outputs->emplace_back(output_sorted); + } + } + + if (cc->Outputs().HasTag(kDetectionsTag)) { + cc->Outputs() + .Tag(kDetectionsTag) + .Add(outputs.release(), cc->InputTimestamp()); + } else if (!outputs->empty()) { + cc->Outputs() + .Tag(kDetectionsTag) + .Add(new Detection((*outputs)[0]), cc->InputTimestamp()); + } + return absl::OkStatus(); +} + +bool FilterDetectionCalculator::IsValidLabel(const std::string& label) { + bool match = !limit_labels_ || allowed_labels_.contains(label); + if (!match) { + // If no exact match is found, check for regular expression + // comparions in the allowed_labels. + for (const auto& label_regexp : allowed_labels_) { + match = match || RE2::FullMatch(label, RE2(label_regexp)); + } + } + return match; +} + +bool FilterDetectionCalculator::IsValidScore(float score) { + if (options_.has_min_score() && score < options_.min_score()) { + LOG(ERROR) << "Filter out detection with low score " << score; + return false; + } + if (options_.has_max_score() && score > options_.max_score()) { + LOG(ERROR) << "Filter out detection with high score " << score; + return false; + } + return true; +} + +} // namespace mediapipe diff --git a/mediapipe/modules/objectron/calculators/filter_detection_calculator.proto b/mediapipe/modules/objectron/calculators/filter_detection_calculator.proto new file mode 100644 index 0000000..ea79b8d --- /dev/null +++ b/mediapipe/modules/objectron/calculators/filter_detection_calculator.proto @@ -0,0 +1,45 @@ +// Copyright 2020 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +syntax = "proto2"; + +package mediapipe; + +import "mediapipe/framework/calculator.proto"; + +message FilterDetectionCalculatorOptions { + extend CalculatorOptions { + optional FilterDetectionCalculatorOptions ext = 339582987; + } + optional float min_score = 1; + optional float max_score = 2; + // Setting fail_on_empty_labels to true will cause the calculator to return a + // failure status on Open() if an empty list is provided on the external + // input, immediately terminating the graph run. + optional bool fail_on_empty_labels = 3 [default = false]; + // If fail_on_empty_labels is set to false setting + // empty_allowed_labels_means_allow_everything to + // false will cause the calculator to close output stream and ignore remaining + // inputs if an empty list is provided. If + // empty_allowed_labels_means_allow_everything is set to true this will force + // calculator to pass all labels. + optional bool empty_allowed_labels_means_allow_everything = 6 + [default = false]; + // Determines whether the input format is a vector (use-case object + // detectors) or Detection (use-case classifiers). + optional bool use_detection_vector = 4 [deprecated = true]; + // Determines whether the input side packet format is a vector of labels, or + // a string with comma separated labels. + optional bool use_allowed_labels_csv = 5 [deprecated = true]; +} diff --git a/mediapipe/modules/objectron/calculators/frame_annotation_to_rect_calculator.cc b/mediapipe/modules/objectron/calculators/frame_annotation_to_rect_calculator.cc new file mode 100644 index 0000000..476f8cb --- /dev/null +++ b/mediapipe/modules/objectron/calculators/frame_annotation_to_rect_calculator.cc @@ -0,0 +1,177 @@ +// Copyright 2020 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and + +#include +#include + +#include "Eigen/Dense" +#include "absl/memory/memory.h" +#include "mediapipe/framework/calculator_framework.h" +#include "mediapipe/framework/formats/rect.pb.h" +#include "mediapipe/framework/port/ret_check.h" +#include "mediapipe/framework/port/status.h" +#include "mediapipe/modules/objectron/calculators/annotation_data.pb.h" +#include "mediapipe/modules/objectron/calculators/frame_annotation_to_rect_calculator.pb.h" + +namespace mediapipe { + +using Matrix3fRM = Eigen::Matrix; +using Eigen::Vector2f; +using Eigen::Vector3f; + +namespace { + +constexpr char kInputFrameAnnotationTag[] = "FRAME_ANNOTATION"; +constexpr char kOutputNormRectsTag[] = "NORM_RECTS"; + +} // namespace + +// A calculator that converts FrameAnnotation proto to NormalizedRect. +// The rotation angle of the NormalizedRect is derived from object's 3d pose. +// The angle is calculated such that after rotation the 2d projection of y-axis. +// on the image plane is always vertical. +class FrameAnnotationToRectCalculator : public CalculatorBase { + public: + enum ViewStatus { + TOP_VIEW_ON, + TOP_VIEW_OFF, + }; + + static absl::Status GetContract(CalculatorContract* cc); + absl::Status Open(CalculatorContext* cc) override; + absl::Status Process(CalculatorContext* cc) override; + + private: + void AddAnnotationToRect(const ObjectAnnotation& annotation, + std::vector* rect); + float RotationAngleFromAnnotation(const ObjectAnnotation& annotation); + + float RotationAngleFromPose(const Matrix3fRM& rotation, + const Vector3f& translation, const Vector3f& vec); + ViewStatus status_; + float off_threshold_; + float on_threshold_; +}; +REGISTER_CALCULATOR(FrameAnnotationToRectCalculator); + +absl::Status FrameAnnotationToRectCalculator::GetContract( + CalculatorContract* cc) { + RET_CHECK(!cc->Inputs().GetTags().empty()); + RET_CHECK(!cc->Outputs().GetTags().empty()); + + if (cc->Inputs().HasTag(kInputFrameAnnotationTag)) { + cc->Inputs().Tag(kInputFrameAnnotationTag).Set(); + } + + if (cc->Outputs().HasTag(kOutputNormRectsTag)) { + cc->Outputs().Tag(kOutputNormRectsTag).Set>(); + } + return absl::OkStatus(); +} + +absl::Status FrameAnnotationToRectCalculator::Open(CalculatorContext* cc) { + cc->SetOffset(TimestampDiff(0)); + status_ = TOP_VIEW_OFF; + const auto& options = cc->Options(); + off_threshold_ = options.off_threshold(); + on_threshold_ = options.on_threshold(); + RET_CHECK(off_threshold_ <= on_threshold_); + return absl::OkStatus(); +} + +absl::Status FrameAnnotationToRectCalculator::Process(CalculatorContext* cc) { + if (cc->Inputs().Tag(kInputFrameAnnotationTag).IsEmpty()) { + return absl::OkStatus(); + } + auto output_rects = absl::make_unique>(); + const auto& frame_annotation = + cc->Inputs().Tag(kInputFrameAnnotationTag).Get(); + for (const auto& object_annotation : frame_annotation.annotations()) { + AddAnnotationToRect(object_annotation, output_rects.get()); + } + + // Output. + cc->Outputs() + .Tag(kOutputNormRectsTag) + .Add(output_rects.release(), cc->InputTimestamp()); + return absl::OkStatus(); +} + +void FrameAnnotationToRectCalculator::AddAnnotationToRect( + const ObjectAnnotation& annotation, std::vector* rects) { + float x_min = std::numeric_limits::max(); + float x_max = std::numeric_limits::min(); + float y_min = std::numeric_limits::max(); + float y_max = std::numeric_limits::min(); + for (const auto& keypoint : annotation.keypoints()) { + const auto& point_2d = keypoint.point_2d(); + x_min = std::min(x_min, point_2d.x()); + x_max = std::max(x_max, point_2d.x()); + y_min = std::min(y_min, point_2d.y()); + y_max = std::max(y_max, point_2d.y()); + } + NormalizedRect new_rect; + new_rect.set_x_center((x_min + x_max) / 2); + new_rect.set_y_center((y_min + y_max) / 2); + new_rect.set_width(x_max - x_min); + new_rect.set_height(y_max - y_min); + new_rect.set_rotation(RotationAngleFromAnnotation(annotation)); + rects->push_back(new_rect); +} + +float FrameAnnotationToRectCalculator::RotationAngleFromAnnotation( + const ObjectAnnotation& annotation) { + // Get box rotation and translation from annotation. + const auto box_rotation = + Eigen::Map(annotation.rotation().data()); + const auto box_translation = + Eigen::Map(annotation.translation().data()); + + // Rotation angle to use when top-view is on(top-view on), + // Which will make z-axis upright after the rotation. + const float angle_on = + RotationAngleFromPose(box_rotation, box_translation, Vector3f::UnitZ()); + // Rotation angle to use when side-view is on(top-view off), + // Which will make y-axis upright after the rotation. + const float angle_off = + RotationAngleFromPose(box_rotation, box_translation, Vector3f::UnitY()); + + // Calculate angle between z-axis and viewing ray in degrees. + const float view_to_z_angle = std::acos(box_rotation(2, 1)) * 180 / M_PI; + + // Determine threshold based on current status, + // on_threshold_ is used for TOP_VIEW_ON -> TOP_VIEW_OFF transition, + // off_threshold_ is used for TOP_VIEW_OFF -> TOP_VIEW_ON transition. + const float thresh = + (status_ == TOP_VIEW_ON) ? on_threshold_ : off_threshold_; + + // If view_to_z_angle is smaller than threshold, then top-view is on; + // Otherwise top-view is off. + status_ = (view_to_z_angle < thresh) ? TOP_VIEW_ON : TOP_VIEW_OFF; + + // Determine which angle to used based on current status_. + float angle_to_rotate = (status_ == TOP_VIEW_ON) ? angle_on : angle_off; + return angle_to_rotate; +} + +float FrameAnnotationToRectCalculator::RotationAngleFromPose( + const Matrix3fRM& rotation, const Vector3f& translation, + const Vector3f& vec) { + auto p1 = rotation * vec + translation; + auto p2 = -rotation * vec + translation; + const float dy = p2[2] * p2[1] - p1[2] * p1[1]; + const float dx = p2[2] * p2[0] - p1[2] * p1[0]; + return M_PI / 2 - std::atan2(dy, dx); +} + +} // namespace mediapipe diff --git a/mediapipe/modules/objectron/calculators/frame_annotation_to_rect_calculator.proto b/mediapipe/modules/objectron/calculators/frame_annotation_to_rect_calculator.proto new file mode 100644 index 0000000..8959cb8 --- /dev/null +++ b/mediapipe/modules/objectron/calculators/frame_annotation_to_rect_calculator.proto @@ -0,0 +1,31 @@ +// Copyright 2020 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +syntax = "proto2"; + +package mediapipe; + +import "mediapipe/framework/calculator.proto"; + +message FrameAnnotationToRectCalculatorOptions { + extend CalculatorOptions { + optional FrameAnnotationToRectCalculatorOptions ext = 338119067; + } + + // The threshold to use when top-view is off,to enable hysteresis, + // It's required that off_threshold <= on_threshold. + optional float off_threshold = 1 [default = 40.0]; + // The threshold to use when top-view is on. + optional float on_threshold = 2 [default = 41.0]; +} diff --git a/mediapipe/modules/objectron/calculators/frame_annotation_to_timed_box_list_calculator.cc b/mediapipe/modules/objectron/calculators/frame_annotation_to_timed_box_list_calculator.cc new file mode 100644 index 0000000..7467880 --- /dev/null +++ b/mediapipe/modules/objectron/calculators/frame_annotation_to_timed_box_list_calculator.cc @@ -0,0 +1,115 @@ +// Copyright 2020 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include + +#include "absl/memory/memory.h" +#include "mediapipe/framework/calculator_framework.h" +#include "mediapipe/framework/port/opencv_core_inc.h" +#include "mediapipe/framework/port/opencv_imgproc_inc.h" +#include "mediapipe/framework/port/ret_check.h" +#include "mediapipe/framework/port/status.h" +#include "mediapipe/modules/objectron/calculators/annotation_data.pb.h" +#include "mediapipe/modules/objectron/calculators/box_util.h" +#include "mediapipe/util/tracking/box_tracker.pb.h" + +namespace { +constexpr char kInputStreamTag[] = "FRAME_ANNOTATION"; +constexpr char kOutputStreamTag[] = "BOXES"; +} // namespace + +namespace mediapipe { + +// Convert FrameAnnotation 3d bounding box detections to TimedBoxListProto +// 2d bounding boxes. +// +// Input: +// FRAME_ANNOTATION - 3d bounding box annotation. +// Output: +// BOXES - 2d bounding box enclosing the projection of 3d box. +// +// Usage example: +// node { +// calculator: "FrameAnnotationToTimedBoxListCalculator" +// input_stream: "FRAME_ANNOTATION:frame_annotation" +// output_stream: "BOXES:boxes" +// } +class FrameAnnotationToTimedBoxListCalculator : public CalculatorBase { + public: + static absl::Status GetContract(CalculatorContract* cc); + + absl::Status Open(CalculatorContext* cc) override; + absl::Status Process(CalculatorContext* cc) override; + absl::Status Close(CalculatorContext* cc) override; +}; +REGISTER_CALCULATOR(FrameAnnotationToTimedBoxListCalculator); + +absl::Status FrameAnnotationToTimedBoxListCalculator::GetContract( + CalculatorContract* cc) { + RET_CHECK(!cc->Inputs().GetTags().empty()); + RET_CHECK(!cc->Outputs().GetTags().empty()); + + if (cc->Inputs().HasTag(kInputStreamTag)) { + cc->Inputs().Tag(kInputStreamTag).Set(); + } + + if (cc->Outputs().HasTag(kOutputStreamTag)) { + cc->Outputs().Tag(kOutputStreamTag).Set(); + } + return absl::OkStatus(); +} + +absl::Status FrameAnnotationToTimedBoxListCalculator::Open( + CalculatorContext* cc) { + return absl::OkStatus(); +} + +absl::Status FrameAnnotationToTimedBoxListCalculator::Process( + CalculatorContext* cc) { + if (cc->Inputs().HasTag(kInputStreamTag) && + !cc->Inputs().Tag(kInputStreamTag).IsEmpty()) { + const auto& frame_annotation = + cc->Inputs().Tag(kInputStreamTag).Get(); + auto output_objects = absl::make_unique(); + for (const auto& annotation : frame_annotation.annotations()) { + std::vector key_points; + for (const auto& keypoint : annotation.keypoints()) { + key_points.push_back( + cv::Point2f(keypoint.point_2d().x(), keypoint.point_2d().y())); + } + TimedBoxProto* added_box = output_objects->add_box(); + ComputeBoundingRect(key_points, added_box); + added_box->set_id(annotation.object_id()); + const int64 time_msec = + static_cast(std::round(frame_annotation.timestamp() / 1000)); + added_box->set_time_msec(time_msec); + } + + // Output + if (cc->Outputs().HasTag(kOutputStreamTag)) { + cc->Outputs() + .Tag(kOutputStreamTag) + .Add(output_objects.release(), cc->InputTimestamp()); + } + } + + return absl::OkStatus(); +} + +absl::Status FrameAnnotationToTimedBoxListCalculator::Close( + CalculatorContext* cc) { + return absl::OkStatus(); +} + +} // namespace mediapipe diff --git a/mediapipe/modules/objectron/calculators/frame_annotation_tracker.cc b/mediapipe/modules/objectron/calculators/frame_annotation_tracker.cc new file mode 100644 index 0000000..eebf885 --- /dev/null +++ b/mediapipe/modules/objectron/calculators/frame_annotation_tracker.cc @@ -0,0 +1,102 @@ +// Copyright 2020 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "mediapipe/modules/objectron/calculators/frame_annotation_tracker.h" + +#include "absl/container/flat_hash_set.h" +#include "mediapipe/framework/port/logging.h" +#include "mediapipe/modules/objectron/calculators/annotation_data.pb.h" +#include "mediapipe/modules/objectron/calculators/box_util.h" +#include "mediapipe/util/tracking/box_tracker.pb.h" + +namespace mediapipe { + +void FrameAnnotationTracker::AddDetectionResult( + const FrameAnnotation& frame_annotation) { + const int64 time_us = + static_cast(std::round(frame_annotation.timestamp())); + for (const auto& object_annotation : frame_annotation.annotations()) { + detected_objects_[time_us + object_annotation.object_id()] = + object_annotation; + } +} + +FrameAnnotation FrameAnnotationTracker::ConsolidateTrackingResult( + const TimedBoxProtoList& tracked_boxes, + absl::flat_hash_set* cancel_object_ids) { + CHECK(cancel_object_ids != nullptr); + FrameAnnotation frame_annotation; + std::vector keys_to_be_deleted; + for (const auto& detected_obj : detected_objects_) { + const int object_id = detected_obj.second.object_id(); + if (cancel_object_ids->contains(object_id)) { + // Remember duplicated detections' keys. + keys_to_be_deleted.push_back(detected_obj.first); + continue; + } + TimedBoxProto ref_box; + for (const auto& box : tracked_boxes.box()) { + if (box.id() == object_id) { + ref_box = box; + break; + } + } + if (!ref_box.has_id() || ref_box.id() < 0) { + LOG(ERROR) << "Can't find matching tracked box for object id: " + << object_id << ". Likely lost tracking of it."; + keys_to_be_deleted.push_back(detected_obj.first); + continue; + } + + // Find duplicated boxes + for (const auto& box : tracked_boxes.box()) { + if (box.id() != object_id) { + if (ComputeBoxIoU(ref_box, box) > iou_threshold_) { + cancel_object_ids->insert(box.id()); + } + } + } + + // Map ObjectAnnotation from detection to tracked time. + // First, gather all keypoints from source detection. + std::vector key_points; + for (const auto& keypoint : detected_obj.second.keypoints()) { + key_points.push_back( + cv::Point2f(keypoint.point_2d().x(), keypoint.point_2d().y())); + } + // Second, find source box. + TimedBoxProto src_box; + ComputeBoundingRect(key_points, &src_box); + ObjectAnnotation* tracked_obj = frame_annotation.add_annotations(); + tracked_obj->set_object_id(ref_box.id()); + // Finally, map all keypoints in the source detection to tracked location. + for (const auto& keypoint : detected_obj.second.keypoints()) { + cv::Point2f dst = MapPoint( + src_box, ref_box, + cv::Point2f(keypoint.point_2d().x(), keypoint.point_2d().y()), + img_width_, img_height_); + auto* dst_point = tracked_obj->add_keypoints()->mutable_point_2d(); + dst_point->set_x(dst.x); + dst_point->set_y(dst.y); + } + } + + for (const auto& key : keys_to_be_deleted) { + detected_objects_.erase(key); + } + + return frame_annotation; +} + +} // namespace mediapipe diff --git a/mediapipe/modules/objectron/calculators/frame_annotation_tracker.h b/mediapipe/modules/objectron/calculators/frame_annotation_tracker.h new file mode 100644 index 0000000..11a469c --- /dev/null +++ b/mediapipe/modules/objectron/calculators/frame_annotation_tracker.h @@ -0,0 +1,62 @@ +// Copyright 2020 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef MEDIAPIPE_MODULES_OBJECTRON_CALCULATORS_FRAME_ANNOTATION_TRACKER_H_ +#define MEDIAPIPE_MODULES_OBJECTRON_CALCULATORS_FRAME_ANNOTATION_TRACKER_H_ + +#include + +#include "absl/container/btree_map.h" +#include "absl/container/flat_hash_set.h" +#include "mediapipe/framework/port/integral_types.h" +#include "mediapipe/modules/objectron/calculators/annotation_data.pb.h" +#include "mediapipe/util/tracking/box_tracker.pb.h" + +namespace mediapipe { + +class FrameAnnotationTracker { + public: + // If two bounding boxes have IoU over iou_threshold, then we consider them + // describing the same object. + FrameAnnotationTracker(float iou_threshold, float img_width, float img_height) + : iou_threshold_(iou_threshold), + img_width_(img_width), + img_height_(img_height) {} + + // Adds detection results from an external detector. + void AddDetectionResult(const FrameAnnotation& frame_annotation); + + // Consolidates tracking result from an external tracker, associates with + // the detection result by the object id, and produces the corresponding + // result in FrameAnnotation. When there are duplicates, output the ids that + // need to be cancelled in cancel_object_ids. + // Note that the returned FrameAnnotation is missing timestamp. Need to fill + // that field. + FrameAnnotation ConsolidateTrackingResult( + const TimedBoxProtoList& tracked_boxes, + absl::flat_hash_set* cancel_object_ids); + + private: + float iou_threshold_; + float img_width_; + float img_height_; + // Cached detection results over time. + // Key is timestamp_us + object_id. + absl::btree_map> + detected_objects_; +}; + +} // namespace mediapipe + +#endif // MEDIAPIPE_MODULES_OBJECTRON_CALCULATORS_FRAME_ANNOTATION_TRACKER_H_ diff --git a/mediapipe/modules/objectron/calculators/frame_annotation_tracker_calculator.cc b/mediapipe/modules/objectron/calculators/frame_annotation_tracker_calculator.cc new file mode 100644 index 0000000..9079b9a --- /dev/null +++ b/mediapipe/modules/objectron/calculators/frame_annotation_tracker_calculator.cc @@ -0,0 +1,134 @@ +// Copyright 2020 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/container/flat_hash_set.h" +#include "absl/memory/memory.h" +#include "mediapipe/framework/calculator_framework.h" +#include "mediapipe/framework/port/ret_check.h" +#include "mediapipe/framework/port/status.h" +#include "mediapipe/modules/objectron/calculators/annotation_data.pb.h" +#include "mediapipe/modules/objectron/calculators/frame_annotation_tracker.h" +#include "mediapipe/modules/objectron/calculators/frame_annotation_tracker_calculator.pb.h" +#include "mediapipe/util/tracking/box_tracker.pb.h" + +namespace { +constexpr char kInputFrameAnnotationTag[] = "FRAME_ANNOTATION"; +constexpr char kInputTrackedBoxesTag[] = "TRACKED_BOXES"; +constexpr char kOutputTrackedFrameAnnotationTag[] = "TRACKED_FRAME_ANNOTATION"; +constexpr char kOutputCancelObjectIdTag[] = "CANCEL_OBJECT_ID"; +} // namespace + +namespace mediapipe { + +// Tracks frame annotations seeded/updated by FRAME_ANNOTATION input_stream. +// When using this calculator, make sure FRAME_ANNOTATION and TRACKED_BOXES +// are in different sync set. +// +// Input: +// FRAME_ANNOTATION - frame annotation. +// TRACKED_BOXES - 2d box tracking result +// Output: +// TRACKED_FRAME_ANNOTATION - annotation inferred from 2d tracking result. +// CANCEL_OBJECT_ID - object id that needs to be cancelled from the tracker. +// +// Usage example: +// node { +// calculator: "FrameAnnotationTrackerCalculator" +// input_stream: "FRAME_ANNOTATION:frame_annotation" +// input_stream: "TRACKED_BOXES:tracked_boxes" +// output_stream: "TRACKED_FRAME_ANNOTATION:tracked_frame_annotation" +// output_stream: "CANCEL_OBJECT_ID:cancel_object_id" +// } +class FrameAnnotationTrackerCalculator : public CalculatorBase { + public: + static absl::Status GetContract(CalculatorContract* cc); + + absl::Status Open(CalculatorContext* cc) override; + absl::Status Process(CalculatorContext* cc) override; + absl::Status Close(CalculatorContext* cc) override; + + private: + std::unique_ptr frame_annotation_tracker_; +}; +REGISTER_CALCULATOR(FrameAnnotationTrackerCalculator); + +absl::Status FrameAnnotationTrackerCalculator::GetContract( + CalculatorContract* cc) { + RET_CHECK(!cc->Inputs().GetTags().empty()); + RET_CHECK(!cc->Outputs().GetTags().empty()); + + if (cc->Inputs().HasTag(kInputFrameAnnotationTag)) { + cc->Inputs().Tag(kInputFrameAnnotationTag).Set(); + } + if (cc->Inputs().HasTag(kInputTrackedBoxesTag)) { + cc->Inputs().Tag(kInputTrackedBoxesTag).Set(); + } + if (cc->Outputs().HasTag(kOutputTrackedFrameAnnotationTag)) { + cc->Outputs().Tag(kOutputTrackedFrameAnnotationTag).Set(); + } + if (cc->Outputs().HasTag(kOutputCancelObjectIdTag)) { + cc->Outputs().Tag(kOutputCancelObjectIdTag).Set(); + } + return absl::OkStatus(); +} + +absl::Status FrameAnnotationTrackerCalculator::Open(CalculatorContext* cc) { + const auto& options = cc->Options(); + frame_annotation_tracker_ = absl::make_unique( + options.iou_threshold(), options.img_width(), options.img_height()); + return absl::OkStatus(); +} + +absl::Status FrameAnnotationTrackerCalculator::Process(CalculatorContext* cc) { + if (cc->Inputs().HasTag(kInputFrameAnnotationTag) && + !cc->Inputs().Tag(kInputFrameAnnotationTag).IsEmpty()) { + frame_annotation_tracker_->AddDetectionResult( + cc->Inputs().Tag(kInputFrameAnnotationTag).Get()); + } + if (cc->Inputs().HasTag(kInputTrackedBoxesTag) && + !cc->Inputs().Tag(kInputTrackedBoxesTag).IsEmpty() && + cc->Outputs().HasTag(kOutputTrackedFrameAnnotationTag)) { + absl::flat_hash_set cancel_object_ids; + auto output_frame_annotation = absl::make_unique(); + *output_frame_annotation = + frame_annotation_tracker_->ConsolidateTrackingResult( + cc->Inputs().Tag(kInputTrackedBoxesTag).Get(), + &cancel_object_ids); + output_frame_annotation->set_timestamp(cc->InputTimestamp().Microseconds()); + + cc->Outputs() + .Tag(kOutputTrackedFrameAnnotationTag) + .Add(output_frame_annotation.release(), cc->InputTimestamp()); + + if (cc->Outputs().HasTag(kOutputCancelObjectIdTag)) { + auto packet_timestamp = cc->InputTimestamp(); + for (const auto& id : cancel_object_ids) { + // The timestamp is incremented (by 1 us) because currently the box + // tracker calculator only accepts one cancel object ID for any given + // timestamp. + cc->Outputs() + .Tag(kOutputCancelObjectIdTag) + .AddPacket(mediapipe::MakePacket(id).At(packet_timestamp++)); + } + } + } + + return absl::OkStatus(); +} + +absl::Status FrameAnnotationTrackerCalculator::Close(CalculatorContext* cc) { + return absl::OkStatus(); +} + +} // namespace mediapipe diff --git a/mediapipe/modules/objectron/calculators/frame_annotation_tracker_calculator.proto b/mediapipe/modules/objectron/calculators/frame_annotation_tracker_calculator.proto new file mode 100644 index 0000000..f37308a --- /dev/null +++ b/mediapipe/modules/objectron/calculators/frame_annotation_tracker_calculator.proto @@ -0,0 +1,36 @@ +// Copyright 2020 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// The option proto for the FrameAnnotationTrackerCalculatorOptions. + +syntax = "proto2"; + +package mediapipe; + +import "mediapipe/framework/calculator.proto"; + +message FrameAnnotationTrackerCalculatorOptions { + extend CalculatorOptions { + optional FrameAnnotationTrackerCalculatorOptions ext = 291291253; + } + + // The threshold on intersection-over-union (IoU). We consider + // boxes with IoU larger than this threshold to be the duplicates. + optional float iou_threshold = 1 [default = 0.5]; + + // We need image dimension to properly compute annotation locations. + optional float img_width = 2; + + optional float img_height = 3; +} diff --git a/mediapipe/modules/objectron/calculators/frame_annotation_tracker_test.cc b/mediapipe/modules/objectron/calculators/frame_annotation_tracker_test.cc new file mode 100644 index 0000000..d155f8e --- /dev/null +++ b/mediapipe/modules/objectron/calculators/frame_annotation_tracker_test.cc @@ -0,0 +1,143 @@ +// Copyright 2020 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "mediapipe/modules/objectron/calculators/frame_annotation_tracker.h" + +#include "absl/container/flat_hash_set.h" +#include "mediapipe/framework/port/gmock.h" +#include "mediapipe/framework/port/gtest.h" +#include "mediapipe/framework/port/logging.h" +#include "mediapipe/modules/objectron/calculators/annotation_data.pb.h" +#include "mediapipe/util/tracking/box_tracker.pb.h" + +namespace mediapipe { +namespace { + +// Create a new object annotation by shifting a reference +// object annotation. +ObjectAnnotation ShiftObject2d(const ObjectAnnotation& ref_obj, float dx, + float dy) { + ObjectAnnotation obj = ref_obj; + for (auto& keypoint : *(obj.mutable_keypoints())) { + const float ref_x = keypoint.point_2d().x(); + const float ref_y = keypoint.point_2d().y(); + keypoint.mutable_point_2d()->set_x(ref_x + dx); + keypoint.mutable_point_2d()->set_y(ref_y + dy); + } + return obj; +} + +TimedBoxProto ShiftBox(const TimedBoxProto& ref_box, float dx, float dy) { + TimedBoxProto box = ref_box; + box.set_top(ref_box.top() + dy); + box.set_bottom(ref_box.bottom() + dy); + box.set_left(ref_box.left() + dx); + box.set_right(ref_box.right() + dx); + return box; +} + +// Constructs a fixed ObjectAnnotation. +ObjectAnnotation ConstructFixedObject( + const std::vector>& points) { + ObjectAnnotation obj; + for (const auto& point : points) { + auto* keypoint = obj.add_keypoints(); + CHECK_EQ(2, point.size()); + keypoint->mutable_point_2d()->set_x(point[0]); + keypoint->mutable_point_2d()->set_y(point[1]); + } + return obj; +} + +TEST(FrameAnnotationTrackerTest, TestConsolidation) { + // Add 4 detections represented by FrameAnnotation, of which 3 correspond + // to the same object. + ObjectAnnotation object1, object2, object3, object4; + // The bounding rectangle for these object keypoints is: + // x: [0.2, 0.5], y: [0.1, 0.4] + object3 = ConstructFixedObject({{0.35f, 0.25f}, + {0.3f, 0.3f}, + {0.2f, 0.4f}, + {0.3f, 0.1f}, + {0.2f, 0.2f}, + {0.5f, 0.3f}, + {0.4f, 0.4f}, + {0.5f, 0.1f}, + {0.4f, 0.2f}}); + object3.set_object_id(3); + object1 = ShiftObject2d(object3, -0.05f, -0.05f); + object1.set_object_id(1); + object2 = ShiftObject2d(object3, 0.05f, 0.05f); + object2.set_object_id(2); + object4 = ShiftObject2d(object3, 0.2f, 0.2f); + object4.set_object_id(4); + FrameAnnotation frame_annotation_1; + frame_annotation_1.set_timestamp(30 * 1000); // 30ms + *(frame_annotation_1.add_annotations()) = object1; + *(frame_annotation_1.add_annotations()) = object4; + FrameAnnotation frame_annotation_2; + frame_annotation_2.set_timestamp(60 * 1000); // 60ms + *(frame_annotation_2.add_annotations()) = object2; + FrameAnnotation frame_annotation_3; + frame_annotation_3.set_timestamp(90 * 1000); // 90ms + *(frame_annotation_3.add_annotations()) = object3; + + FrameAnnotationTracker frame_annotation_tracker(/*iou_threshold*/ 0.5f, 1.0f, + 1.0f); + frame_annotation_tracker.AddDetectionResult(frame_annotation_1); + frame_annotation_tracker.AddDetectionResult(frame_annotation_2); + frame_annotation_tracker.AddDetectionResult(frame_annotation_3); + + TimedBoxProtoList timed_box_proto_list; + TimedBoxProto* timed_box_proto = timed_box_proto_list.add_box(); + timed_box_proto->set_top(0.4f); + timed_box_proto->set_bottom(0.7f); + timed_box_proto->set_left(0.6f); + timed_box_proto->set_right(0.9f); + timed_box_proto->set_id(3); + timed_box_proto->set_time_msec(150); + timed_box_proto = timed_box_proto_list.add_box(); + *timed_box_proto = ShiftBox(timed_box_proto_list.box(0), 0.01f, 0.01f); + timed_box_proto->set_id(1); + timed_box_proto->set_time_msec(150); + timed_box_proto = timed_box_proto_list.add_box(); + *timed_box_proto = ShiftBox(timed_box_proto_list.box(0), -0.01f, -0.01f); + timed_box_proto->set_id(2); + timed_box_proto->set_time_msec(150); + absl::flat_hash_set cancel_object_ids; + FrameAnnotation tracked_detection = + frame_annotation_tracker.ConsolidateTrackingResult(timed_box_proto_list, + &cancel_object_ids); + EXPECT_EQ(2, cancel_object_ids.size()); + EXPECT_EQ(1, cancel_object_ids.count(1)); + EXPECT_EQ(1, cancel_object_ids.count(2)); + EXPECT_EQ(1, tracked_detection.annotations_size()); + EXPECT_EQ(3, tracked_detection.annotations(0).object_id()); + EXPECT_EQ(object3.keypoints_size(), + tracked_detection.annotations(0).keypoints_size()); + const float x_offset = 0.4f; + const float y_offset = 0.3f; + const float tolerance = 1e-5f; + for (int i = 0; i < object3.keypoints_size(); ++i) { + const auto& point_2d = + tracked_detection.annotations(0).keypoints(i).point_2d(); + EXPECT_NEAR(point_2d.x(), object3.keypoints(i).point_2d().x() + x_offset, + tolerance); + EXPECT_NEAR(point_2d.y(), object3.keypoints(i).point_2d().y() + y_offset, + tolerance); + } +} + +} // namespace +} // namespace mediapipe diff --git a/mediapipe/modules/objectron/calculators/landmarks_to_frame_annotation_calculator.cc b/mediapipe/modules/objectron/calculators/landmarks_to_frame_annotation_calculator.cc new file mode 100644 index 0000000..60c4876 --- /dev/null +++ b/mediapipe/modules/objectron/calculators/landmarks_to_frame_annotation_calculator.cc @@ -0,0 +1,112 @@ +// Copyright 2020 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and + +#include "absl/memory/memory.h" +#include "mediapipe/framework/calculator_framework.h" +#include "mediapipe/framework/formats/landmark.pb.h" +#include "mediapipe/framework/port/ret_check.h" +#include "mediapipe/framework/port/status.h" +#include "mediapipe/modules/objectron/calculators/annotation_data.pb.h" + +namespace mediapipe { + +namespace { + +constexpr char kInputLandmarksTag[] = "LANDMARKS"; +constexpr char kInputMultiLandmarksTag[] = "MULTI_LANDMARKS"; +constexpr char kOutputFrameAnnotationTag[] = "FRAME_ANNOTATION"; + +} // namespace + +// A calculator that converts NormalizedLandmarkList to FrameAnnotation proto. +class LandmarksToFrameAnnotationCalculator : public CalculatorBase { + public: + static absl::Status GetContract(CalculatorContract* cc); + absl::Status Open(CalculatorContext* cc) override; + absl::Status Process(CalculatorContext* cc) override; + + private: + void AddLandmarksToFrameAnnotation(const NormalizedLandmarkList& landmarks, + FrameAnnotation* frame_annotation); +}; +REGISTER_CALCULATOR(LandmarksToFrameAnnotationCalculator); + +absl::Status LandmarksToFrameAnnotationCalculator::GetContract( + CalculatorContract* cc) { + RET_CHECK(!cc->Inputs().GetTags().empty()); + RET_CHECK(!cc->Outputs().GetTags().empty()); + + if (cc->Inputs().HasTag(kInputLandmarksTag)) { + cc->Inputs().Tag(kInputLandmarksTag).Set(); + } + if (cc->Inputs().HasTag(kInputMultiLandmarksTag)) { + cc->Inputs() + .Tag(kInputMultiLandmarksTag) + .Set>(); + } + if (cc->Outputs().HasTag(kOutputFrameAnnotationTag)) { + cc->Outputs().Tag(kOutputFrameAnnotationTag).Set(); + } + return absl::OkStatus(); +} + +absl::Status LandmarksToFrameAnnotationCalculator::Open(CalculatorContext* cc) { + cc->SetOffset(TimestampDiff(0)); + return absl::OkStatus(); +} + +absl::Status LandmarksToFrameAnnotationCalculator::Process( + CalculatorContext* cc) { + auto frame_annotation = absl::make_unique(); + + // Handle the case when input has only one NormalizedLandmarkList. + if (cc->Inputs().HasTag(kInputLandmarksTag) && + !cc->Inputs().Tag(kInputLandmarksTag).IsEmpty()) { + const auto& landmarks = + cc->Inputs().Tag(kInputMultiLandmarksTag).Get(); + AddLandmarksToFrameAnnotation(landmarks, frame_annotation.get()); + } + + // Handle the case when input has muliple NormalizedLandmarkList. + if (cc->Inputs().HasTag(kInputMultiLandmarksTag) && + !cc->Inputs().Tag(kInputMultiLandmarksTag).IsEmpty()) { + const auto& landmarks_list = + cc->Inputs() + .Tag(kInputMultiLandmarksTag) + .Get>(); + for (const auto& landmarks : landmarks_list) { + AddLandmarksToFrameAnnotation(landmarks, frame_annotation.get()); + } + } + + // Output + if (cc->Outputs().HasTag(kOutputFrameAnnotationTag)) { + cc->Outputs() + .Tag(kOutputFrameAnnotationTag) + .Add(frame_annotation.release(), cc->InputTimestamp()); + } + return absl::OkStatus(); +} + +void LandmarksToFrameAnnotationCalculator::AddLandmarksToFrameAnnotation( + const NormalizedLandmarkList& landmarks, + FrameAnnotation* frame_annotation) { + auto* new_annotation = frame_annotation->add_annotations(); + for (const auto& landmark : landmarks.landmark()) { + auto* point2d = new_annotation->add_keypoints()->mutable_point_2d(); + point2d->set_x(landmark.x()); + point2d->set_y(landmark.y()); + } +} + +} // namespace mediapipe diff --git a/mediapipe/modules/objectron/calculators/lift_2d_frame_annotation_to_3d_calculator.cc b/mediapipe/modules/objectron/calculators/lift_2d_frame_annotation_to_3d_calculator.cc new file mode 100644 index 0000000..1405e5a --- /dev/null +++ b/mediapipe/modules/objectron/calculators/lift_2d_frame_annotation_to_3d_calculator.cc @@ -0,0 +1,169 @@ +// Copyright 2020 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include + +#include "Eigen/Dense" +#include "absl/memory/memory.h" +#include "absl/strings/str_format.h" +#include "absl/types/span.h" +#include "mediapipe/framework/calculator_framework.h" +#include "mediapipe/framework/deps/file_path.h" +#include "mediapipe/framework/port/ret_check.h" +#include "mediapipe/modules/objectron/calculators/annotation_data.pb.h" +#include "mediapipe/modules/objectron/calculators/decoder.h" +#include "mediapipe/modules/objectron/calculators/lift_2d_frame_annotation_to_3d_calculator.pb.h" +#include "mediapipe/modules/objectron/calculators/tensor_util.h" + +namespace { +constexpr char kInputStreamTag[] = "FRAME_ANNOTATION"; +constexpr char kOutputStreamTag[] = "LIFTED_FRAME_ANNOTATION"; + +// Each detection object will be assigned an unique id that starts from 1. +static int object_id = 0; + +inline int GetNextObjectId() { return ++object_id; } +} // namespace + +namespace mediapipe { + +// Lifted the 2D points in a tracked frame annotation to 3D. +// +// Input: +// FRAME_ANNOTATIONS - Frame annotations with detected 2D points +// Output: +// LIFTED_FRAME_ANNOTATIONS - Result FrameAnnotation with lifted 3D points. +// +// Usage example: +// node { +// calculator: "Lift2DFrameAnnotationTo3DCalculator" +// input_stream: "FRAME_ANNOTATIONS:tracked_annotations" +// output_stream: "LIFTED_FRAME_ANNOTATIONS:lifted_3d_annotations" +// } +class Lift2DFrameAnnotationTo3DCalculator : public CalculatorBase { + public: + static absl::Status GetContract(CalculatorContract* cc); + + absl::Status Open(CalculatorContext* cc) override; + absl::Status Process(CalculatorContext* cc) override; + absl::Status Close(CalculatorContext* cc) override; + + private: + absl::Status ProcessCPU(CalculatorContext* cc, + FrameAnnotation* output_objects); + absl::Status LoadOptions(CalculatorContext* cc); + + // Increment and assign object ID for each detected object. + // In a single MediaPipe session, the IDs are unique. + // Also assign timestamp for the FrameAnnotation to be the input packet + // timestamp. + void AssignObjectIdAndTimestamp(int64 timestamp_us, + FrameAnnotation* annotation); + std::unique_ptr decoder_; + Lift2DFrameAnnotationTo3DCalculatorOptions options_; + Eigen::Matrix projection_matrix_; +}; +REGISTER_CALCULATOR(Lift2DFrameAnnotationTo3DCalculator); + +absl::Status Lift2DFrameAnnotationTo3DCalculator::GetContract( + CalculatorContract* cc) { + RET_CHECK(cc->Inputs().HasTag(kInputStreamTag)); + RET_CHECK(cc->Outputs().HasTag(kOutputStreamTag)); + cc->Inputs().Tag(kInputStreamTag).Set(); + cc->Outputs().Tag(kOutputStreamTag).Set(); + + return absl::OkStatus(); +} + +absl::Status Lift2DFrameAnnotationTo3DCalculator::Open(CalculatorContext* cc) { + cc->SetOffset(TimestampDiff(0)); + MP_RETURN_IF_ERROR(LoadOptions(cc)); + // Load camera intrinsic matrix. + const float fx = options_.normalized_focal_x(); + const float fy = options_.normalized_focal_y(); + const float px = options_.normalized_principal_point_x(); + const float py = options_.normalized_principal_point_y(); + // clang-format off + projection_matrix_ << fx, 0., px, 0., + 0., fy, py, 0., + 0., 0., -1., 0., + 0., 0., -1., 0.; + // clang-format on + decoder_ = absl::make_unique( + BeliefDecoderConfig(options_.decoder_config())); + return absl::OkStatus(); +} + +absl::Status Lift2DFrameAnnotationTo3DCalculator::Process( + CalculatorContext* cc) { + if (cc->Inputs().Tag(kInputStreamTag).IsEmpty()) { + return absl::OkStatus(); + } + + auto output_objects = absl::make_unique(); + + MP_RETURN_IF_ERROR(ProcessCPU(cc, output_objects.get())); + + // Output + if (cc->Outputs().HasTag(kOutputStreamTag)) { + cc->Outputs() + .Tag(kOutputStreamTag) + .Add(output_objects.release(), cc->InputTimestamp()); + } + + return absl::OkStatus(); +} + +absl::Status Lift2DFrameAnnotationTo3DCalculator::ProcessCPU( + CalculatorContext* cc, FrameAnnotation* output_objects) { + const auto& input_frame_annotations = + cc->Inputs().Tag(kInputStreamTag).Get(); + // Copy the input frame annotation to the output + *output_objects = input_frame_annotations; + + auto status = decoder_->Lift2DTo3D(projection_matrix_, /*portrait*/ false, + output_objects); + if (!status.ok()) { + LOG(ERROR) << status; + return status; + } + AssignObjectIdAndTimestamp(cc->InputTimestamp().Microseconds(), + output_objects); + + return absl::OkStatus(); +} + +absl::Status Lift2DFrameAnnotationTo3DCalculator::Close(CalculatorContext* cc) { + return absl::OkStatus(); +} + +absl::Status Lift2DFrameAnnotationTo3DCalculator::LoadOptions( + CalculatorContext* cc) { + // Get calculator options specified in the graph. + options_ = cc->Options(); + + return absl::OkStatus(); +} + +void Lift2DFrameAnnotationTo3DCalculator::AssignObjectIdAndTimestamp( + int64 timestamp_us, FrameAnnotation* annotation) { + for (auto& ann : *annotation->mutable_annotations()) { + ann.set_object_id(GetNextObjectId()); + } + annotation->set_timestamp(timestamp_us); +} + +} // namespace mediapipe diff --git a/mediapipe/modules/objectron/calculators/lift_2d_frame_annotation_to_3d_calculator.proto b/mediapipe/modules/objectron/calculators/lift_2d_frame_annotation_to_3d_calculator.proto new file mode 100644 index 0000000..a3005c1 --- /dev/null +++ b/mediapipe/modules/objectron/calculators/lift_2d_frame_annotation_to_3d_calculator.proto @@ -0,0 +1,42 @@ +// Copyright 2020 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// The option proto for the Lift2DFrameAnnotationTo3DCalculatorOptions. + +syntax = "proto2"; + +package mediapipe; + +import "mediapipe/framework/calculator.proto"; +import "mediapipe/modules/objectron/calculators/belief_decoder_config.proto"; + +message Lift2DFrameAnnotationTo3DCalculatorOptions { + extend CalculatorOptions { + optional Lift2DFrameAnnotationTo3DCalculatorOptions ext = 290166284; + } + + optional BeliefDecoderConfig decoder_config = 1; + + // Camera focal length along x, normalized by width/2. + optional float normalized_focal_x = 2 [default = 1.0]; + + // Camera focal length along y, normalized by height/2. + optional float normalized_focal_y = 3 [default = 1.0]; + + // Camera principle point x, normalized by width/2, origin is image center. + optional float normalized_principal_point_x = 4 [default = 0.0]; + + // Camera principle point y, normalized by height/2, origin is image center. + optional float normalized_principal_point_y = 5 [default = 0.0]; +} diff --git a/mediapipe/modules/objectron/calculators/model.cc b/mediapipe/modules/objectron/calculators/model.cc new file mode 100644 index 0000000..40aca39 --- /dev/null +++ b/mediapipe/modules/objectron/calculators/model.cc @@ -0,0 +1,101 @@ +// Copyright 2020 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "mediapipe/modules/objectron/calculators/model.h" + +#include "mediapipe/framework/port/logging.h" + +namespace mediapipe { + +void Model::SetTransformation(const Eigen::Matrix4f& transform) { + transformation_ = transform; +} + +void Model::SetTranslation(const Eigen::Vector3f& translation) { + transformation_.col(3).template head<3>() = translation; +} + +void Model::SetRotation(float roll, float pitch, float yaw) { + // In our coordinate system, Y is up. We first rotate the object around Y + // (yaw), then around Z (pitch), and finally around X (roll). + Eigen::Matrix3f r; + r = Eigen::AngleAxisf(yaw, Eigen::Vector3f::UnitY()) * + Eigen::AngleAxisf(pitch, Eigen::Vector3f::UnitZ()) * + Eigen::AngleAxisf(roll, Eigen::Vector3f::UnitX()); + transformation_.topLeftCorner<3, 3>() = r; +} + +void Model::SetRotation(const Eigen::Matrix3f& rotation) { + transformation_.topLeftCorner<3, 3>() = rotation; +} + +void Model::SetScale(const Eigen::Vector3f& scale) { scale_ = scale; } + +void Model::SetCategory(const std::string& category) { category_ = category; } + +const Eigen::Vector3f Model::GetRotationAngles() const { + Vector3f ypr = transformation_.topLeftCorner<3, 3>().eulerAngles(1, 2, 0); + return Vector3f(ypr(2), ypr(1), ypr(0)); // swap YPR with RPY +} + +const Eigen::Matrix4f& Model::GetTransformation() const { + return transformation_; +} + +const Eigen::Vector3f& Model::GetScale() const { return scale_; } + +const Eigen::Ref Model::GetTranslation() const { + return transformation_.col(3).template head<3>(); +} + +const Eigen::Ref Model::GetRotation() const { + return transformation_.template topLeftCorner<3, 3>(); +} + +const std::string& Model::GetCategory() const { return category_; } + +void Model::Deserialize(const Object& obj) { + CHECK_EQ(obj.rotation_size(), 9); + CHECK_EQ(obj.translation_size(), 3); + CHECK_EQ(obj.scale_size(), 3); + category_ = obj.category(); + + using RotationMatrix = Eigen::Matrix; + transformation_.setIdentity(); + transformation_.topLeftCorner<3, 3>() = + Eigen::Map(obj.rotation().data()); + transformation_.col(3).head<3>() = + Eigen::Map(obj.translation().data()); + scale_ = Eigen::Map(obj.scale().data()); + Update(); +} + +void Model::Serialize(Object* obj) { + obj->set_category(category_); + for (int i = 0; i < 3; ++i) { + for (int j = 0; j < 3; ++j) { + obj->add_rotation(transformation_(i, j)); + } + } + + for (int i = 0; i < 3; ++i) { + obj->add_translation(transformation_(i, 3)); + } + + for (int i = 0; i < 3; ++i) { + obj->add_scale(scale_[i]); + } +} + +} // namespace mediapipe diff --git a/mediapipe/modules/objectron/calculators/model.h b/mediapipe/modules/objectron/calculators/model.h new file mode 100644 index 0000000..72b5eb2 --- /dev/null +++ b/mediapipe/modules/objectron/calculators/model.h @@ -0,0 +1,92 @@ +// Copyright 2020 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef MEDIAPIPE_MODULES_OBJECTRON_CALCULATORS_MODEL_H_ +#define MEDIAPIPE_MODULES_OBJECTRON_CALCULATORS_MODEL_H_ + +#include "mediapipe/modules/objectron/calculators/annotation_data.pb.h" +#include "mediapipe/modules/objectron/calculators/object.pb.h" +#include "mediapipe/modules/objectron/calculators/types.h" + +namespace mediapipe { + +class Model { + public: + EIGEN_MAKE_ALIGNED_OPERATOR_NEW + + enum Type { + kVisualizationOnly = 0, + kBoundingBox, + kSkeleton, + kShape, // Shape is a virtual object. + kNumModes, + }; + + virtual ~Model() = default; + + virtual void SetTransformation(const Eigen::Matrix4f& transform); + virtual void SetTranslation(const Eigen::Vector3f& translation); + + // Compute the rotation matrix from these angles and update the transformation + // matrix accordingly + virtual void SetRotation(float roll, float pitch, float yaw); + virtual void SetRotation(const Eigen::Matrix3f& rotation); + virtual void SetScale(const Eigen::Vector3f& scale); + virtual void SetCategory(const std::string& category); + virtual size_t GetNumberKeypoints() const { return number_keypoints_; } + + // Gets Euler angles in the order of roll, pitch, yaw. + virtual const Eigen::Vector3f GetRotationAngles() const; + virtual const Eigen::Matrix4f& GetTransformation() const; + virtual const Eigen::Vector3f& GetScale() const; + virtual const Eigen::Ref GetTranslation() const; + virtual const Eigen::Ref GetRotation() const; + virtual const std::string& GetCategory() const; + + // Update the model's keypoints in the world-coordinate system. + // The update includes transforming the model to the world-coordinate system + // as well as scaling the model. + // The user is expected to call this function after Setting the rotation, + // orientation or the scale of the model to get an updated model. + virtual void Update() = 0; + + // Update the model's parameters (orientation, position, and scale) from the + // user-provided variables. + virtual void Adjust(const std::vector& variables) = 0; + + // Returns a pointer to the model's keypoints. + // Use Eigen::Map to cast the pointer back to Vector3 or Vector4 + virtual const float* GetVertex(size_t id) const = 0; + virtual float* GetVertex(size_t id) = 0; + virtual void Deserialize(const Object& obj); + virtual void Serialize(Object* obj); + + // TODO: make member variables protected, and add public apis. + // 4x4 transformation matrix mapping the first keypoint to world coordinate + Eigen::Matrix4f transformation_; + Eigen::Vector3f scale_; // width, height, depth + Type model_type_; + size_t number_keypoints_; + std::string category_; + + protected: + Model(Type type, size_t number_keypoints, const std::string& category) + : model_type_(type), + number_keypoints_(number_keypoints), + category_(category) {} +}; + +} // namespace mediapipe + +#endif // MEDIAPIPE_MODULES_OBJECTRON_CALCULATORS_MODEL_H_ diff --git a/mediapipe/modules/objectron/calculators/object.proto b/mediapipe/modules/objectron/calculators/object.proto new file mode 100644 index 0000000..a07e83f --- /dev/null +++ b/mediapipe/modules/objectron/calculators/object.proto @@ -0,0 +1,124 @@ +// Copyright 2020 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +syntax = "proto3"; + +package mediapipe; + +message KeyPoint { + // The position of the keypoint in the local coordinate system of the rigid + // object. + float x = 1; + float y = 2; + float z = 3; + + // Sphere around the keypoint, indiciating annotator's confidence of the + // position in meters. + float confidence_radius = 4; + + // The name of the keypoint (e.g. legs, head, etc.). + // Does not have to be unique. + string name = 5; + + // Indicates whether the keypoint is hidden or not. + bool hidden = 6; +} + +message Object { + // Unique object id through a sequence. There might be multiple objects of + // the same label in this sequence. + int32 id = 1; + + // Describes what category an object is. E.g. object class, attribute, + // instance or person identity. This provides additional context for the + // object type. + string category = 2; + + enum Type { + UNDEFINED_TYPE = 0; + BOUNDING_BOX = 1; + SKELETON = 2; + } + + Type type = 3; + + // 3x3 row-major rotation matrix describing the orientation of the rigid + // object's frame of reference in the world-coordinate system. + repeated float rotation = 4; + + // 3x1 vector describing the translation of the rigid object's frame of + // reference in the world-coordinate system in meters. + repeated float translation = 5; + + // 3x1 vector describing the scale of the rigid object's frame of reference in + // the world-coordinate system in meters. + repeated float scale = 6; + + // List of all the key points associated with this object in the object + // coordinate system. + // The first keypoint is always the object's frame of reference, + // e.g. the centroid of the box. + // E.g. bounding box with its center as frame of reference, the 9 keypoints : + // {0., 0., 0.}, + // {-.5, -.5, -.5}, {-.5, -.5, +.5}, {-.5, +.5, -.5}, {-.5, +.5, +.5}, + // {+.5, -.5, -.5}, {+.5, -.5, +.5}, {+.5, +.5, -.5}, {+.5, +.5, +.5} + // To get the bounding box in the world-coordinate system, we first scale the + // box then transform the scaled box. + // For example, bounding box in the world coordinate system is + // rotation * scale * keypoints + translation + repeated KeyPoint keypoints = 7; + + // Enum to reflect how this object is created. + enum Method { + UNKNOWN_METHOD = 0; + ANNOTATION = 1; // Created by data annotation. + AUGMENTATION = 2; // Created by data augmentation. + } + Method method = 8; +} + +// The edge connecting two keypoints together +message Edge { + // keypoint id of the edge's source + int32 source = 1; + + // keypoint id of the edge's sink + int32 sink = 2; +} + +// The skeleton template for different objects (e.g. humans, chairs, hands, etc) +// The annotation tool reads the skeleton template dictionary. +message Skeleton { + // The origin keypoint in the object coordinate system. (i.e. Point 0, 0, 0) + int32 reference_keypoint = 1; + + // The skeleton's category (e.g. human, chair, hand.). Should be unique in the + // dictionary. + string category = 2; + + // Initialization value for all the keypoints in the skeleton in the object's + // local coordinate system. Pursuit will transform these points using object's + // transformation to get the keypoint in the world-cooridnate. + repeated KeyPoint keypoints = 3; + + // List of edges connecting keypoints + repeated Edge edges = 4; +} + +// The list of all the modeled skeletons in our library. These models can be +// objects (chairs, desks, etc), humans (full pose, hands, faces, etc), or box. +// We can have multiple skeletons in the same file. +message Skeletons { + repeated Skeleton object = 1; +} diff --git a/mediapipe/modules/objectron/calculators/tensor_util.cc b/mediapipe/modules/objectron/calculators/tensor_util.cc new file mode 100644 index 0000000..0004edd --- /dev/null +++ b/mediapipe/modules/objectron/calculators/tensor_util.cc @@ -0,0 +1,48 @@ +// Copyright 2020 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "mediapipe/modules/objectron/calculators/tensor_util.h" + +#include "mediapipe/framework/port/logging.h" + +namespace mediapipe { + +cv::Mat ConvertTfliteTensorToCvMat(const TfLiteTensor& tensor) { + // Check tensor is BxCxWxH (size = 4) and the batch size is one(data[0] = 1) + CHECK(tensor.dims->size == 4 && tensor.dims->data[0] == 1); + CHECK_EQ(kTfLiteFloat32, tensor.type) << "tflite_tensor type is not float"; + + const size_t num_output_channels = tensor.dims->data[3]; + const int dims = 2; + const int sizes[] = {tensor.dims->data[1], tensor.dims->data[2]}; + const int type = CV_MAKETYPE(CV_32F, num_output_channels); + return cv::Mat(dims, sizes, type, reinterpret_cast(tensor.data.f)); +} + +cv::Mat ConvertTensorToCvMat(const mediapipe::Tensor& tensor) { + // Check tensor is BxCxWxH (size = 4) and the batch size is one(data[0] = 1) + CHECK(tensor.shape().dims.size() == 4 && tensor.shape().dims[0] == 1); + CHECK_EQ(mediapipe::Tensor::ElementType::kFloat32 == tensor.element_type(), + true) + << "tensor type is not float"; + + const size_t num_output_channels = tensor.shape().dims[3]; + const int dims = 2; + const int sizes[] = {tensor.shape().dims[1], tensor.shape().dims[2]}; + const int type = CV_MAKETYPE(CV_32F, num_output_channels); + auto cpu_view = tensor.GetCpuReadView(); + return cv::Mat(dims, sizes, type, const_cast(cpu_view.buffer())); +} + +} // namespace mediapipe diff --git a/mediapipe/modules/objectron/calculators/tensor_util.h b/mediapipe/modules/objectron/calculators/tensor_util.h new file mode 100644 index 0000000..0b26209 --- /dev/null +++ b/mediapipe/modules/objectron/calculators/tensor_util.h @@ -0,0 +1,31 @@ +// Copyright 2020 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef MEDIAPIPE_MODULES_OBJECTRON_CALCULATORS_TENSOR_UTIL_H_ +#define MEDIAPIPE_MODULES_OBJECTRON_CALCULATORS_TENSOR_UTIL_H_ + +#include "mediapipe/framework/formats/tensor.h" +#include "mediapipe/framework/port/opencv_core_inc.h" +#include "tensorflow/lite/interpreter.h" + +namespace mediapipe { + +// Converts a single channel tflite tensor to a grayscale image +cv::Mat ConvertTfliteTensorToCvMat(const TfLiteTensor& tensor); + +// Converts a single channel tensor to grayscale image +cv::Mat ConvertTensorToCvMat(const mediapipe::Tensor& tensor); +} // namespace mediapipe + +#endif // MEDIAPIPE_MODULES_OBJECTRON_CALCULATORS_TENSOR_UTIL_H_ diff --git a/mediapipe/modules/objectron/calculators/tensors_to_objects_calculator.cc b/mediapipe/modules/objectron/calculators/tensors_to_objects_calculator.cc new file mode 100644 index 0000000..6989c34 --- /dev/null +++ b/mediapipe/modules/objectron/calculators/tensors_to_objects_calculator.cc @@ -0,0 +1,209 @@ +// Copyright 2020 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include + +#include "Eigen/Dense" +#include "absl/memory/memory.h" +#include "absl/strings/str_format.h" +#include "absl/types/span.h" +#include "mediapipe/framework/calculator_framework.h" +#include "mediapipe/framework/deps/file_path.h" +#include "mediapipe/framework/formats/tensor.h" +#include "mediapipe/framework/port/opencv_core_inc.h" +#include "mediapipe/framework/port/ret_check.h" +#include "mediapipe/modules/objectron/calculators/annotation_data.pb.h" +#include "mediapipe/modules/objectron/calculators/belief_decoder_config.pb.h" +#include "mediapipe/modules/objectron/calculators/decoder.h" +#include "mediapipe/modules/objectron/calculators/tensor_util.h" +#include "mediapipe/modules/objectron/calculators/tensors_to_objects_calculator.pb.h" + +namespace { +constexpr char kInputStreamTag[] = "TENSORS"; +constexpr char kOutputStreamTag[] = "ANNOTATIONS"; + +// Each detection object will be assigned an unique id that starts from 1. +static int object_id = 0; + +inline int GetNextObjectId() { return ++object_id; } +} // namespace + +namespace mediapipe { + +// Convert result Tensors from deep pursuit 3d model into FrameAnnotation. +// +// Input: +// TENSORS - Vector of Tensor of type kFloat32. +// Output: +// ANNOTATIONS - Result FrameAnnotation. +// +// Usage example: +// node { +// calculator: "TensorsToObjectsCalculator" +// input_stream: "TENSORS:tensors" +// output_stream: "ANNOTATIONS:annotations" +// } +class TensorsToObjectsCalculator : public CalculatorBase { + public: + static absl::Status GetContract(CalculatorContract* cc); + + absl::Status Open(CalculatorContext* cc) override; + absl::Status Process(CalculatorContext* cc) override; + absl::Status Close(CalculatorContext* cc) override; + + private: + absl::Status ProcessCPU(CalculatorContext* cc, + FrameAnnotation* output_objects); + absl::Status LoadOptions(CalculatorContext* cc); + // Takes point_3d in FrameAnnotation, projects to 2D, and overwrite the + // point_2d field with the projection. + void Project3DTo2D(bool portrait, FrameAnnotation* annotation) const; + // Increment and assign object ID for each detected object. + // In a single MediaPipe session, the IDs are unique. + // Also assign timestamp for the FrameAnnotation to be the input packet + // timestamp. + void AssignObjectIdAndTimestamp(int64 timestamp_us, + FrameAnnotation* annotation); + + int num_classes_ = 0; + int num_keypoints_ = 0; + + ::mediapipe::TensorsToObjectsCalculatorOptions options_; + std::unique_ptr decoder_; + Eigen::Matrix projection_matrix_; +}; +REGISTER_CALCULATOR(TensorsToObjectsCalculator); + +absl::Status TensorsToObjectsCalculator::GetContract(CalculatorContract* cc) { + RET_CHECK(!cc->Inputs().GetTags().empty()); + RET_CHECK(!cc->Outputs().GetTags().empty()); + + if (cc->Inputs().HasTag(kInputStreamTag)) { + cc->Inputs().Tag(kInputStreamTag).Set>(); + } + + if (cc->Outputs().HasTag(kOutputStreamTag)) { + cc->Outputs().Tag(kOutputStreamTag).Set(); + } + return absl::OkStatus(); +} + +absl::Status TensorsToObjectsCalculator::Open(CalculatorContext* cc) { + MP_RETURN_IF_ERROR(LoadOptions(cc)); + // clang-format off + projection_matrix_ << + 1.5731, 0, 0, 0, + 0, 2.0975, 0, 0, + 0, 0, -1.0002, -0.2, + 0, 0, -1, 0; + // clang-format on + decoder_ = absl::make_unique( + BeliefDecoderConfig(options_.decoder_config())); + + return absl::OkStatus(); +} + +absl::Status TensorsToObjectsCalculator::Process(CalculatorContext* cc) { + if (cc->Inputs().Tag(kInputStreamTag).IsEmpty()) { + return absl::OkStatus(); + } + + auto output_objects = absl::make_unique(); + + MP_RETURN_IF_ERROR(ProcessCPU(cc, output_objects.get())); + + // Output + if (cc->Outputs().HasTag(kOutputStreamTag)) { + cc->Outputs() + .Tag(kOutputStreamTag) + .Add(output_objects.release(), cc->InputTimestamp()); + } + + return absl::OkStatus(); +} + +absl::Status TensorsToObjectsCalculator::ProcessCPU( + CalculatorContext* cc, FrameAnnotation* output_objects) { + const auto& input_tensors = + cc->Inputs().Tag(kInputStreamTag).Get>(); + + cv::Mat prediction_heatmap = ConvertTensorToCvMat(input_tensors[0]); + cv::Mat offsetmap = ConvertTensorToCvMat(input_tensors[1]); + + *output_objects = + decoder_->DecodeBoundingBoxKeypoints(prediction_heatmap, offsetmap); + auto status = decoder_->Lift2DTo3D(projection_matrix_, /*portrait*/ true, + output_objects); + if (!status.ok()) { + LOG(ERROR) << status; + return status; + } + Project3DTo2D(/*portrait*/ true, output_objects); + AssignObjectIdAndTimestamp(cc->InputTimestamp().Microseconds(), + output_objects); + + return absl::OkStatus(); +} + +absl::Status TensorsToObjectsCalculator::Close(CalculatorContext* cc) { + return absl::OkStatus(); +} + +absl::Status TensorsToObjectsCalculator::LoadOptions(CalculatorContext* cc) { + // Get calculator options specified in the graph. + options_ = cc->Options<::mediapipe::TensorsToObjectsCalculatorOptions>(); + + num_classes_ = options_.num_classes(); + num_keypoints_ = options_.num_keypoints(); + + // Currently only support 2D when num_values_per_keypoint equals to 2. + CHECK_EQ(options_.num_values_per_keypoint(), 2); + + return absl::OkStatus(); +} + +void TensorsToObjectsCalculator::Project3DTo2D( + bool portrait, FrameAnnotation* annotation) const { + for (auto& ann : *annotation->mutable_annotations()) { + for (auto& key_point : *ann.mutable_keypoints()) { + Eigen::Vector4f point3d; + point3d << key_point.point_3d().x(), key_point.point_3d().y(), + key_point.point_3d().z(), 1.0f; + Eigen::Vector4f point3d_projection = projection_matrix_ * point3d; + float u, v; + const float inv_w = 1.0f / point3d_projection(3); + if (portrait) { + u = (point3d_projection(1) * inv_w + 1.0f) * 0.5f; + v = (point3d_projection(0) * inv_w + 1.0f) * 0.5f; + } else { + u = (point3d_projection(0) * inv_w + 1.0f) * 0.5f; + v = (1.0f - point3d_projection(1) * inv_w) * 0.5f; + } + key_point.mutable_point_2d()->set_x(u); + key_point.mutable_point_2d()->set_y(v); + } + } +} + +void TensorsToObjectsCalculator::AssignObjectIdAndTimestamp( + int64 timestamp_us, FrameAnnotation* annotation) { + for (auto& ann : *annotation->mutable_annotations()) { + ann.set_object_id(GetNextObjectId()); + } + annotation->set_timestamp(timestamp_us); +} + +} // namespace mediapipe diff --git a/mediapipe/modules/objectron/calculators/tensors_to_objects_calculator.proto b/mediapipe/modules/objectron/calculators/tensors_to_objects_calculator.proto new file mode 100644 index 0000000..8d46fce --- /dev/null +++ b/mediapipe/modules/objectron/calculators/tensors_to_objects_calculator.proto @@ -0,0 +1,39 @@ +// Copyright 2020 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// The option proto for the TensorsToObjectsCalculatorOptions. + +syntax = "proto2"; + +package mediapipe; + +import "mediapipe/framework/calculator.proto"; +import "mediapipe/modules/objectron/calculators/belief_decoder_config.proto"; + +message TensorsToObjectsCalculatorOptions { + extend CalculatorOptions { + optional TensorsToObjectsCalculatorOptions ext = 334361940; + } + + // The number of output classes predicted by the detection model. + optional int32 num_classes = 1; + + // The number of predicted keypoints. + optional int32 num_keypoints = 2; + // The dimension of each keypoint, e.g. number of values predicted for each + // keypoint. + optional int32 num_values_per_keypoint = 3 [default = 2]; + + optional BeliefDecoderConfig decoder_config = 4; +} diff --git a/mediapipe/modules/objectron/calculators/tflite_tensors_to_objects_calculator.cc b/mediapipe/modules/objectron/calculators/tflite_tensors_to_objects_calculator.cc new file mode 100644 index 0000000..e3686f6 --- /dev/null +++ b/mediapipe/modules/objectron/calculators/tflite_tensors_to_objects_calculator.cc @@ -0,0 +1,217 @@ +// Copyright 2020 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include + +#include "Eigen/Dense" +#include "absl/memory/memory.h" +#include "absl/strings/str_format.h" +#include "absl/types/span.h" +#include "mediapipe/framework/calculator_framework.h" +#include "mediapipe/framework/deps/file_path.h" +#include "mediapipe/framework/port/opencv_core_inc.h" +#include "mediapipe/framework/port/ret_check.h" +#include "mediapipe/modules/objectron/calculators/annotation_data.pb.h" +#include "mediapipe/modules/objectron/calculators/belief_decoder_config.pb.h" +#include "mediapipe/modules/objectron/calculators/decoder.h" +#include "mediapipe/modules/objectron/calculators/tensor_util.h" +#include "mediapipe/modules/objectron/calculators/tflite_tensors_to_objects_calculator.pb.h" +#include "tensorflow/lite/interpreter.h" + +namespace { +constexpr char kInputStreamTag[] = "TENSORS"; +constexpr char kOutputStreamTag[] = "ANNOTATIONS"; + +// Each detection object will be assigned an unique id that starts from 1. +static int object_id = 0; + +inline int GetNextObjectId() { return ++object_id; } +} // namespace + +namespace mediapipe { + +// Convert result TFLite tensors from deep pursuit 3d model into +// FrameAnnotation. +// +// Input: +// TENSORS - Vector of TfLiteTensor of type kTfLiteFloat32. +// Output: +// ANNOTATIONS - Result FrameAnnotation. +// +// Usage example: +// node { +// calculator: "TfLiteTensorsToObjectsCalculator" +// input_stream: "TENSORS:tensors" +// output_stream: "ANNOTATIONS:annotations" +// } +class TfLiteTensorsToObjectsCalculator : public CalculatorBase { + public: + static absl::Status GetContract(CalculatorContract* cc); + + absl::Status Open(CalculatorContext* cc) override; + absl::Status Process(CalculatorContext* cc) override; + absl::Status Close(CalculatorContext* cc) override; + + private: + absl::Status ProcessCPU(CalculatorContext* cc, + FrameAnnotation* output_objects); + absl::Status LoadOptions(CalculatorContext* cc); + // Takes point_3d in FrameAnnotation, projects to 2D, and overwrite the + // point_2d field with the projection. + void Project3DTo2D(bool portrait, FrameAnnotation* annotation) const; + // Increment and assign object ID for each detected object. + // In a single MediaPipe session, the IDs are unique. + // Also assign timestamp for the FrameAnnotation to be the input packet + // timestamp. + void AssignObjectIdAndTimestamp(int64 timestamp_us, + FrameAnnotation* annotation); + + int num_classes_ = 0; + int num_keypoints_ = 0; + + ::mediapipe::TfLiteTensorsToObjectsCalculatorOptions options_; + std::unique_ptr decoder_; + Eigen::Matrix projection_matrix_; +}; +REGISTER_CALCULATOR(TfLiteTensorsToObjectsCalculator); + +absl::Status TfLiteTensorsToObjectsCalculator::GetContract( + CalculatorContract* cc) { + RET_CHECK(!cc->Inputs().GetTags().empty()); + RET_CHECK(!cc->Outputs().GetTags().empty()); + + if (cc->Inputs().HasTag(kInputStreamTag)) { + cc->Inputs().Tag(kInputStreamTag).Set>(); + } + + if (cc->Outputs().HasTag(kOutputStreamTag)) { + cc->Outputs().Tag(kOutputStreamTag).Set(); + } + return absl::OkStatus(); +} + +absl::Status TfLiteTensorsToObjectsCalculator::Open(CalculatorContext* cc) { + MP_RETURN_IF_ERROR(LoadOptions(cc)); + // Load camera intrinsic matrix. + const float fx = options_.normalized_focal_x(); + const float fy = options_.normalized_focal_y(); + const float px = options_.normalized_principal_point_x(); + const float py = options_.normalized_principal_point_y(); + // clang-format off + projection_matrix_ << fx, 0., px, 0., + 0., fy, py, 0., + 0., 0., -1., 0., + 0., 0., -1., 0.; + // clang-format on + decoder_ = absl::make_unique( + BeliefDecoderConfig(options_.decoder_config())); + + return absl::OkStatus(); +} + +absl::Status TfLiteTensorsToObjectsCalculator::Process(CalculatorContext* cc) { + if (cc->Inputs().Tag(kInputStreamTag).IsEmpty()) { + return absl::OkStatus(); + } + + auto output_objects = absl::make_unique(); + + MP_RETURN_IF_ERROR(ProcessCPU(cc, output_objects.get())); + + // Output + if (cc->Outputs().HasTag(kOutputStreamTag)) { + cc->Outputs() + .Tag(kOutputStreamTag) + .Add(output_objects.release(), cc->InputTimestamp()); + } + + return absl::OkStatus(); +} + +absl::Status TfLiteTensorsToObjectsCalculator::ProcessCPU( + CalculatorContext* cc, FrameAnnotation* output_objects) { + const auto& input_tensors = + cc->Inputs().Tag(kInputStreamTag).Get>(); + + cv::Mat prediction_heatmap = ConvertTfliteTensorToCvMat(input_tensors[0]); + cv::Mat offsetmap = ConvertTfliteTensorToCvMat(input_tensors[1]); + + *output_objects = + decoder_->DecodeBoundingBoxKeypoints(prediction_heatmap, offsetmap); + auto status = decoder_->Lift2DTo3D(projection_matrix_, /*portrait*/ true, + output_objects); + if (!status.ok()) { + LOG(ERROR) << status; + return status; + } + Project3DTo2D(/*portrait*/ true, output_objects); + AssignObjectIdAndTimestamp(cc->InputTimestamp().Microseconds(), + output_objects); + + return absl::OkStatus(); +} + +absl::Status TfLiteTensorsToObjectsCalculator::Close(CalculatorContext* cc) { + return absl::OkStatus(); +} + +absl::Status TfLiteTensorsToObjectsCalculator::LoadOptions( + CalculatorContext* cc) { + // Get calculator options specified in the graph. + options_ = + cc->Options<::mediapipe::TfLiteTensorsToObjectsCalculatorOptions>(); + + num_classes_ = options_.num_classes(); + num_keypoints_ = options_.num_keypoints(); + + // Currently only support 2D when num_values_per_keypoint equals to 2. + CHECK_EQ(options_.num_values_per_keypoint(), 2); + + return absl::OkStatus(); +} + +void TfLiteTensorsToObjectsCalculator::Project3DTo2D( + bool portrait, FrameAnnotation* annotation) const { + for (auto& ann : *annotation->mutable_annotations()) { + for (auto& key_point : *ann.mutable_keypoints()) { + Eigen::Vector4f point3d; + point3d << key_point.point_3d().x(), key_point.point_3d().y(), + key_point.point_3d().z(), 1.0f; + Eigen::Vector4f point3d_projection = projection_matrix_ * point3d; + float u, v; + const float inv_w = 1.0f / point3d_projection(3); + if (portrait) { + u = (point3d_projection(1) * inv_w + 1.0f) * 0.5f; + v = (point3d_projection(0) * inv_w + 1.0f) * 0.5f; + } else { + u = (point3d_projection(0) * inv_w + 1.0f) * 0.5f; + v = (1.0f - point3d_projection(1) * inv_w) * 0.5f; + } + key_point.mutable_point_2d()->set_x(u); + key_point.mutable_point_2d()->set_y(v); + } + } +} + +void TfLiteTensorsToObjectsCalculator::AssignObjectIdAndTimestamp( + int64 timestamp_us, FrameAnnotation* annotation) { + for (auto& ann : *annotation->mutable_annotations()) { + ann.set_object_id(GetNextObjectId()); + } + annotation->set_timestamp(timestamp_us); +} + +} // namespace mediapipe diff --git a/mediapipe/modules/objectron/calculators/tflite_tensors_to_objects_calculator.proto b/mediapipe/modules/objectron/calculators/tflite_tensors_to_objects_calculator.proto new file mode 100644 index 0000000..32520d9 --- /dev/null +++ b/mediapipe/modules/objectron/calculators/tflite_tensors_to_objects_calculator.proto @@ -0,0 +1,51 @@ +// Copyright 2020 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// The option proto for the TfLiteTensorsToObjectsCalculatorOptions. + +syntax = "proto2"; + +package mediapipe; + +import "mediapipe/framework/calculator.proto"; +import "mediapipe/modules/objectron/calculators/belief_decoder_config.proto"; + +message TfLiteTensorsToObjectsCalculatorOptions { + extend CalculatorOptions { + optional TfLiteTensorsToObjectsCalculatorOptions ext = 263667646; + } + + // The number of output classes predicted by the detection model. + optional int32 num_classes = 1; + + // The number of predicted keypoints. + optional int32 num_keypoints = 2; + // The dimension of each keypoint, e.g. number of values predicted for each + // keypoint. + optional int32 num_values_per_keypoint = 3 [default = 2]; + + optional BeliefDecoderConfig decoder_config = 4; + + // Camera focal length along x, normalized by width/2. + optional float normalized_focal_x = 5 [default = 1.0]; + + // Camera focal length along y, normalized by height/2. + optional float normalized_focal_y = 6 [default = 1.0]; + + // Camera principle point x, normalized by width/2, origin is image center. + optional float normalized_principal_point_x = 7 [default = 0.0]; + + // Camera principle point y, normalized by height/2, origin is image center. + optional float normalized_principal_point_y = 8 [default = 0.0]; +} diff --git a/mediapipe/modules/objectron/calculators/types.h b/mediapipe/modules/objectron/calculators/types.h new file mode 100644 index 0000000..dcc477d --- /dev/null +++ b/mediapipe/modules/objectron/calculators/types.h @@ -0,0 +1,56 @@ +// Copyright 2020 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef MEDIAPIPE_MODULES_OBJECTRON_CALCULATORS_TYPES_H_ +#define MEDIAPIPE_MODULES_OBJECTRON_CALCULATORS_TYPES_H_ + +#include + +#include "Eigen/Geometry" + +namespace mediapipe { + +using Eigen::Map; +using Eigen::Vector2f; +using Eigen::Vector3f; +using Eigen::Vector4f; +using Matrix4f_RM = Eigen::Matrix; +using Matrix3f_RM = Eigen::Matrix; + +using Face = std::array; + +struct SuperPoint { + enum PointSourceType { kPointCloud = 0, kBoundingBox = 1, kSkeleton = 2 }; + // The id of the point in the point-cloud + int reference_point; + // The source of the + PointSourceType source; + // The id of the point in set of points in current frame + int id; + // If source is kBoundingBox or kSkeleton, object_id stores the id of which \ + // object this point belongs to. + int object_id; + // projected u-v value + Vector2f uv; + Vector2f pixel; + // the 3D point + Vector3f point_3d; + // Color + Eigen::Matrix color; + bool rendered; +}; + +} // namespace mediapipe + +#endif // MEDIAPIPE_MODULES_OBJECTRON_CALCULATORS_TYPES_H_ diff --git a/mediapipe/modules/objectron/object_detection_3d_camera.tflite b/mediapipe/modules/objectron/object_detection_3d_camera.tflite new file mode 100644 index 0000000..14cb826 Binary files /dev/null and b/mediapipe/modules/objectron/object_detection_3d_camera.tflite differ diff --git a/mediapipe/modules/objectron/object_detection_3d_chair.tflite b/mediapipe/modules/objectron/object_detection_3d_chair.tflite new file mode 100644 index 0000000..3a23dfd Binary files /dev/null and b/mediapipe/modules/objectron/object_detection_3d_chair.tflite differ diff --git a/mediapipe/modules/objectron/object_detection_3d_chair_1stage.tflite b/mediapipe/modules/objectron/object_detection_3d_chair_1stage.tflite new file mode 100644 index 0000000..718dc97 Binary files /dev/null and b/mediapipe/modules/objectron/object_detection_3d_chair_1stage.tflite differ diff --git a/mediapipe/modules/objectron/object_detection_3d_cup.tflite b/mediapipe/modules/objectron/object_detection_3d_cup.tflite new file mode 100644 index 0000000..1a7a5d3 Binary files /dev/null and b/mediapipe/modules/objectron/object_detection_3d_cup.tflite differ diff --git a/mediapipe/modules/objectron/object_detection_3d_sneakers.tflite b/mediapipe/modules/objectron/object_detection_3d_sneakers.tflite new file mode 100644 index 0000000..d64234d Binary files /dev/null and b/mediapipe/modules/objectron/object_detection_3d_sneakers.tflite differ diff --git a/mediapipe/modules/objectron/object_detection_3d_sneakers_1stage.tflite b/mediapipe/modules/objectron/object_detection_3d_sneakers_1stage.tflite new file mode 100644 index 0000000..2077114 Binary files /dev/null and b/mediapipe/modules/objectron/object_detection_3d_sneakers_1stage.tflite differ diff --git a/mediapipe/modules/objectron/object_detection_oid_v4_cpu.pbtxt b/mediapipe/modules/objectron/object_detection_oid_v4_cpu.pbtxt new file mode 100644 index 0000000..f7a09fc --- /dev/null +++ b/mediapipe/modules/objectron/object_detection_oid_v4_cpu.pbtxt @@ -0,0 +1,134 @@ +# MediaPipe Objectron object detection CPU subgraph. + +type: "ObjectDetectionOidV4Subgraph" + +input_stream: "IMAGE:input_video" +input_side_packet: "LABELS_CSV:allowed_labels" +output_stream: "DETECTIONS:detections" + +# Crops, resizes, and converts the input video into tensor. +# Preserves aspect ratio of the images. +node { + calculator: "ImageToTensorCalculator" + input_stream: "IMAGE:input_video" + output_stream: "TENSORS:image_tensor" + output_stream: "LETTERBOX_PADDING:letterbox_padding" + options { + [mediapipe.ImageToTensorCalculatorOptions.ext] { + output_tensor_width: 300 + output_tensor_height: 300 + keep_aspect_ratio: false + output_tensor_float_range { + min: -1.0 + max: 1.0 + } + } + } +} + +# Runs a TensorFlow Lite model on CPU that takes an image tensor and outputs a +# vector of tensors representing, for instance, detection boxes/keypoints and +# scores. +node { + calculator: "InferenceCalculator" + input_stream: "TENSORS:image_tensor" + output_stream: "TENSORS:detection_tensors" + options: { + [mediapipe.InferenceCalculatorOptions.ext] { + model_path: "mediapipe/modules/objectron/object_detection_ssd_mobilenetv2_oidv4_fp16.tflite" + delegate { xnnpack {} } + } + } +} + +# Generates a single side packet containing a vector of SSD anchors based on +# the specification in the options. +node { + calculator: "SsdAnchorsCalculator" + output_side_packet: "anchors" + options: { + [mediapipe.SsdAnchorsCalculatorOptions.ext] { + num_layers: 6 + min_scale: 0.2 + max_scale: 0.95 + input_size_height: 300 + input_size_width: 300 + anchor_offset_x: 0.5 + anchor_offset_y: 0.5 + strides: 16 + strides: 32 + strides: 64 + strides: 128 + strides: 256 + strides: 512 + aspect_ratios: 1.0 + aspect_ratios: 2.0 + aspect_ratios: 0.5 + aspect_ratios: 3.0 + aspect_ratios: 0.3333 + reduce_boxes_in_lowest_layer: true + } + } +} + +# Decodes the detection tensors generated by the TensorFlow Lite model, based on +# the SSD anchors and the specification in the options, into a vector of +# detections. Each detection describes a detected object. +node { + calculator: "TensorsToDetectionsCalculator" + input_stream: "TENSORS:detection_tensors" + input_side_packet: "ANCHORS:anchors" + output_stream: "DETECTIONS:all_detections" + options: { + [mediapipe.TensorsToDetectionsCalculatorOptions.ext] { + num_classes: 24 + num_boxes: 1917 + num_coords: 4 + ignore_classes: 0 + sigmoid_score: true + apply_exponential_on_box_size: true + x_scale: 10.0 + y_scale: 10.0 + h_scale: 5.0 + w_scale: 5.0 + min_score_thresh: 0.5 + } + } +} + +# Maps detection label IDs to the corresponding label text. The label map is +# provided in the label_map_path option. +node { + calculator: "DetectionLabelIdToTextCalculator" + input_stream: "all_detections" + output_stream: "labeled_detections" + options: { + [mediapipe.DetectionLabelIdToTextCalculatorOptions.ext] { + label_map_path: "mediapipe/modules/objectron/object_detection_oidv4_labelmap.txt" + } + } +} + +# Filters the detections to only those with valid scores +# for the specified allowed labels. +node { + calculator: "FilterDetectionCalculator" + input_stream: "DETECTIONS:labeled_detections" + output_stream: "DETECTIONS:filtered_detections" + input_side_packet: "LABELS_CSV:allowed_labels" +} + +# Performs non-max suppression to remove excessive detections. +node { + calculator: "NonMaxSuppressionCalculator" + input_stream: "filtered_detections" + output_stream: "detections" + options: { + [mediapipe.NonMaxSuppressionCalculatorOptions.ext] { + min_suppression_threshold: 0.5 + max_num_detections: 100 + overlap_type: INTERSECTION_OVER_UNION + return_empty_detections: true + } + } +} diff --git a/mediapipe/modules/objectron/object_detection_oid_v4_gpu.pbtxt b/mediapipe/modules/objectron/object_detection_oid_v4_gpu.pbtxt new file mode 100644 index 0000000..7873e80 --- /dev/null +++ b/mediapipe/modules/objectron/object_detection_oid_v4_gpu.pbtxt @@ -0,0 +1,136 @@ +# MediaPipe Objectron object detection GPU subgraph. + +type: "ObjectDetectionOidV4Subgraph" + +input_stream: "IMAGE_GPU:input_video" +input_side_packet: "LABELS_CSV:allowed_labels" +output_stream: "DETECTIONS:detections" + +# Crops, resizes, and converts the input video into tensor. +# Preserves aspect ratio of the images. +node { + calculator: "ImageToTensorCalculator" + input_stream: "IMAGE_GPU:input_video" + output_stream: "TENSORS:image_tensor" + output_stream: "LETTERBOX_PADDING:letterbox_padding" + options { + [mediapipe.ImageToTensorCalculatorOptions.ext] { + output_tensor_width: 300 + output_tensor_height: 300 + keep_aspect_ratio: false + output_tensor_float_range { + min: -1.0 + max: 1.0 + } + gpu_origin: TOP_LEFT + } + } +} + + +# Runs a TensorFlow Lite model on GPU that takes an image tensor and outputs a +# vector of tensors representing, for instance, detection boxes/keypoints and +# scores. +node { + calculator: "InferenceCalculator" + input_stream: "TENSORS:image_tensor" + output_stream: "TENSORS:detection_tensors" + options: { + [mediapipe.InferenceCalculatorOptions.ext] { + model_path: "mediapipe/modules/objectron/object_detection_ssd_mobilenetv2_oidv4_fp16.tflite" + delegate { gpu {} } + } + } +} + +# Generates a single side packet containing a vector of SSD anchors based on +# the specification in the options. +node { + calculator: "SsdAnchorsCalculator" + output_side_packet: "anchors" + options: { + [mediapipe.SsdAnchorsCalculatorOptions.ext] { + num_layers: 6 + min_scale: 0.2 + max_scale: 0.95 + input_size_height: 300 + input_size_width: 300 + anchor_offset_x: 0.5 + anchor_offset_y: 0.5 + strides: 16 + strides: 32 + strides: 64 + strides: 128 + strides: 256 + strides: 512 + aspect_ratios: 1.0 + aspect_ratios: 2.0 + aspect_ratios: 0.5 + aspect_ratios: 3.0 + aspect_ratios: 0.3333 + reduce_boxes_in_lowest_layer: true + } + } +} + +# Decodes the detection tensors generated by the TensorFlow Lite model, based on +# the SSD anchors and the specification in the options, into a vector of +# detections. Each detection describes a detected object. +node { + calculator: "TensorsToDetectionsCalculator" + input_stream: "TENSORS:detection_tensors" + input_side_packet: "ANCHORS:anchors" + output_stream: "DETECTIONS:all_detections" + options: { + [mediapipe.TensorsToDetectionsCalculatorOptions.ext] { + num_classes: 24 + num_boxes: 1917 + num_coords: 4 + ignore_classes: 0 + sigmoid_score: true + apply_exponential_on_box_size: true + x_scale: 10.0 + y_scale: 10.0 + h_scale: 5.0 + w_scale: 5.0 + min_score_thresh: 0.5 + } + } +} + +# Maps detection label IDs to the corresponding label text. The label map is +# provided in the label_map_path option. +node { + calculator: "DetectionLabelIdToTextCalculator" + input_stream: "all_detections" + output_stream: "labeled_detections" + options: { + [mediapipe.DetectionLabelIdToTextCalculatorOptions.ext] { + label_map_path: "object_detection_oidv4_labelmap.txt" + } + } +} + +# Filters the detections to only those with valid scores +# for the specified allowed labels. +node { + calculator: "FilterDetectionCalculator" + input_stream: "DETECTIONS:labeled_detections" + output_stream: "DETECTIONS:filtered_detections" + input_side_packet: "LABELS_CSV:allowed_labels" +} + +# Performs non-max suppression to remove excessive detections. +node { + calculator: "NonMaxSuppressionCalculator" + input_stream: "filtered_detections" + output_stream: "detections" + options: { + [mediapipe.NonMaxSuppressionCalculatorOptions.ext] { + min_suppression_threshold: 0.5 + max_num_detections: 100 + overlap_type: INTERSECTION_OVER_UNION + return_empty_detections: true + } + } +} diff --git a/mediapipe/modules/objectron/object_detection_oidv4_labelmap.txt b/mediapipe/modules/objectron/object_detection_oidv4_labelmap.txt new file mode 100644 index 0000000..ef9032c --- /dev/null +++ b/mediapipe/modules/objectron/object_detection_oidv4_labelmap.txt @@ -0,0 +1,24 @@ +??? +Bicycle +Boot +Laptop +Person +Chair +Cattle +Desk +Cat +Computer mouse +Computer monitor +Box +Mug +Coffee cup +Stationary bicycle +Table +Bottle +High heels +Vehicle +Footwear +Dog +Book +Camera +Car diff --git a/mediapipe/modules/objectron/object_detection_ssd_mobilenetv2_oidv4_fp16.tflite b/mediapipe/modules/objectron/object_detection_ssd_mobilenetv2_oidv4_fp16.tflite new file mode 100644 index 0000000..3cb7291 Binary files /dev/null and b/mediapipe/modules/objectron/object_detection_ssd_mobilenetv2_oidv4_fp16.tflite differ diff --git a/mediapipe/modules/objectron/objectron_cpu.pbtxt b/mediapipe/modules/objectron/objectron_cpu.pbtxt new file mode 100644 index 0000000..884da05 --- /dev/null +++ b/mediapipe/modules/objectron/objectron_cpu.pbtxt @@ -0,0 +1,224 @@ +# MediaPipe Objectron on CPU that produces 3D bounding boxes for objects. +type: "ObjectronCpuSubgraph" + +# Input/Output streams and input side packets. +input_stream: "IMAGE:image" +# Path to TfLite model for 3D bounding box landmark prediction +input_side_packet: "MODEL_PATH:box_landmark_model_path" +# Allowed category labels, e.g. Footwear, Coffee cup, Mug, Chair, Camera +input_side_packet: "LABELS_CSV:allowed_labels" +# Max number of objects to detect/track. (int) +input_side_packet: "MAX_NUM_OBJECTS:max_num_objects" +# Whether landmarks on the previous image should be used to help localize +# landmarks on the current image. (bool) +input_side_packet: "USE_PREV_LANDMARKS:use_prev_landmarks" +# Bounding box landmarks topology definition. +# The numbers are indices in the box_landmarks list. +# +# 3 + + + + + + + + 7 +# +\ +\ UP +# + \ + \ +# + \ + \ | +# + 4 + + + + + + + + 8 | y +# + + + + | +# + + + + | +# + + (0) + + .------- x +# + + + + \ +# 1 + + + + + + + + 5 + \ +# \ + \ + \ z +# \ + \ + \ +# \+ \+ +# 2 + + + + + + + + 6 + +# Collection of detected 3D objects, represented as a FrameAnnotation. +output_stream: "FRAME_ANNOTATION:detected_objects" +# Collection of box landmarks. (NormalizedLandmarkList) +output_stream: "MULTI_LANDMARKS:multi_box_landmarks" +# Crop rectangles derived from bounding box landmarks. +output_stream: "NORM_RECTS:multi_box_rects" + +# Loads the file in the specified path into a blob. +node { + calculator: "LocalFileContentsCalculator" + input_side_packet: "FILE_PATH:0:box_landmark_model_path" + output_side_packet: "CONTENTS:0:box_landmark_model_blob" +} + +# Converts the input blob into a TF Lite model. +node { + calculator: "TfLiteModelCalculator" + input_side_packet: "MODEL_BLOB:box_landmark_model_blob" + output_side_packet: "MODEL:box_landmark_model" +} + +# When the optional input side packet "use_prev_landmarks" is either absent or +# set to true, uses the landmarks on the previous image to help localize +# landmarks on the current image. +node { + calculator: "GateCalculator" + input_side_packet: "ALLOW:use_prev_landmarks" + input_stream: "prev_box_rects_from_landmarks" + output_stream: "gated_prev_box_rects_from_landmarks" + options: { + [mediapipe.GateCalculatorOptions.ext] { + allow: true + } + } +} + +# Determines if an input vector of NormalizedRect has a size greater than or +# equal to the provided max_num_objects. +node { + calculator: "NormalizedRectVectorHasMinSizeCalculator" + input_stream: "ITERABLE:gated_prev_box_rects_from_landmarks" + input_side_packet: "max_num_objects" + output_stream: "prev_has_enough_objects" +} + +# Drops the incoming image if BoxLandmarkSubgraph was able to identify box +# presence in the previous image. Otherwise, passes the incoming image through +# to trigger a new round of box detection in ObjectDetectionOidV4Subgraph. +node { + calculator: "GateCalculator" + input_stream: "image" + input_stream: "DISALLOW:prev_has_enough_objects" + output_stream: "detection_image" + + options: { + [mediapipe.GateCalculatorOptions.ext] { + empty_packets_as_allow: true + } + } +} + +# Subgraph that performs 2D object detection. +node { + calculator: "ObjectDetectionOidV4Subgraph" + input_stream: "IMAGE:detection_image" + input_side_packet: "LABELS_CSV:allowed_labels" + output_stream: "DETECTIONS:raw_detections" +} + +# Makes sure there are no more detections than provided max_num_objects. +node { + calculator: "ClipDetectionVectorSizeCalculator" + input_stream: "raw_detections" + output_stream: "detections" + input_side_packet: "max_num_objects" + +} + +# Extracts image size from the input images. +node { + calculator: "ImagePropertiesCalculator" + input_stream: "IMAGE_CPU:image" + output_stream: "SIZE:image_size" +} + +# Converts results of box detection into rectangles (normalized by image size) +# that encloses the box. +node { + calculator: "DetectionsToRectsCalculator" + input_stream: "DETECTIONS:detections" + input_stream: "IMAGE_SIZE:image_size" + output_stream: "NORM_RECTS:box_rects_from_detections" + options: { + [mediapipe.DetectionsToRectsCalculatorOptions.ext] { + output_zero_rect_for_empty_detections: false + } + } +} + +# Performs association between NormalizedRect vector elements from previous +# image and rects based on object detections from the current image. This +# calculator ensures that the output box_rects vector doesn't contain +# overlapping regions based on the specified min_similarity_threshold. +node { + calculator: "AssociationNormRectCalculator" + input_stream: "box_rects_from_detections" + input_stream: "gated_prev_box_rects_from_landmarks" + output_stream: "multi_box_rects" + options: { + [mediapipe.AssociationCalculatorOptions.ext] { + min_similarity_threshold: 0.2 + } + } +} + +# Outputs each element of box_rects at a fake timestamp for the rest of the +# graph to process. Clones image and image size packets for each +# single_box_rect at the fake timestamp. At the end of the loop, outputs the +# BATCH_END timestamp for downstream calculators to inform them that all +# elements in the vector have been processed. +node { + calculator: "BeginLoopNormalizedRectCalculator" + input_stream: "ITERABLE:multi_box_rects" + input_stream: "CLONE:image" + output_stream: "ITEM:single_box_rect" + output_stream: "CLONE:landmarks_image" + output_stream: "BATCH_END:box_rects_timestamp" +} + +# Subgraph that localizes box landmarks. +node { + calculator: "BoxLandmarkSubgraph" + input_stream: "IMAGE:landmarks_image" + input_side_packet: "MODEL:box_landmark_model" + input_stream: "NORM_RECT:single_box_rect" + output_stream: "NORM_LANDMARKS:single_box_landmarks" +} + +# Collects a set of landmarks for each hand into a vector. Upon receiving the +# BATCH_END timestamp, outputs the vector of landmarks at the BATCH_END +# timestamp. +node { + calculator: "EndLoopNormalizedLandmarkListVectorCalculator" + input_stream: "ITEM:single_box_landmarks" + input_stream: "BATCH_END:box_rects_timestamp" + output_stream: "ITERABLE:multi_box_landmarks" +} + +# Convert box landmarks to frame annotations. +node { + calculator: "LandmarksToFrameAnnotationCalculator" + input_stream: "MULTI_LANDMARKS:multi_box_landmarks" + output_stream: "FRAME_ANNOTATION:box_annotations" +} + +# Lift the 2D landmarks to 3D using EPnP algorithm. +node { + name: "Lift2DFrameAnnotationTo3DCalculator" + calculator: "Lift2DFrameAnnotationTo3DCalculator" + input_stream: "FRAME_ANNOTATION:box_annotations" + output_stream: "LIFTED_FRAME_ANNOTATION:detected_objects" + options: { + [mediapipe.Lift2DFrameAnnotationTo3DCalculatorOptions.ext] { + normalized_focal_x: 1.0 + normalized_focal_y: 1.0 + } + } +} + +# Get rotated rectangle from detected box. +node { + calculator: "FrameAnnotationToRectCalculator" + input_stream: "FRAME_ANNOTATION:detected_objects" + output_stream: "NORM_RECTS:box_rects_from_landmarks" +} + +# Caches a box rectangle fed back from boxLandmarkSubgraph, and upon the +# arrival of the next input image sends out the cached rectangle with the +# timestamp replaced by that of the input image, essentially generating a packet +# that carries the previous box rectangle. Note that upon the arrival of the +# very first input image, an empty packet is sent out to jump start the +# feedback loop. +node { + calculator: "PreviousLoopbackCalculator" + input_stream: "MAIN:image" + input_stream: "LOOP:box_rects_from_landmarks" + input_stream_info: { + tag_index: "LOOP" + back_edge: true + } + output_stream: "PREV_LOOP:prev_box_rects_from_landmarks" +} diff --git a/mediapipe/modules/objectron/objectron_detection_1stage_gpu.pbtxt b/mediapipe/modules/objectron/objectron_detection_1stage_gpu.pbtxt new file mode 100644 index 0000000..290b120 --- /dev/null +++ b/mediapipe/modules/objectron/objectron_detection_1stage_gpu.pbtxt @@ -0,0 +1,83 @@ +# MediaPipe Objectron detection gpu subgraph + +type: "ObjectronDetectionSubgraphGpu" + +input_stream: "IMAGE_GPU:input_video" +output_stream: "ANNOTATIONS:objects" + +# Transforms the input image on GPU to a 480x640 image. To scale the input +# image, the scale_mode option is set to FIT to preserve the aspect ratio, +# resulting in potential letterboxing in the transformed image. +node: { + calculator: "ImageTransformationCalculator" + input_stream: "IMAGE_GPU:input_video" + output_stream: "IMAGE_GPU:transformed_input_video" + options: { + [mediapipe.ImageTransformationCalculatorOptions.ext] { + output_width: 480 + output_height: 640 + scale_mode: FIT + } + } +} + +# Converts the transformed input image on GPU into an image tensor stored as a +# TfLiteTensor. +node { + calculator: "TfLiteConverterCalculator" + input_stream: "IMAGE_GPU:transformed_input_video" + output_stream: "TENSORS_GPU:image_tensor" +} + +# Generates a single side packet containing a TensorFlow Lite op resolver that +# supports custom ops needed by the model used in this graph. +node { + calculator: "TfLiteCustomOpResolverCalculator" + output_side_packet: "opresolver" + options: { + [mediapipe.TfLiteCustomOpResolverCalculatorOptions.ext] { + use_gpu: true + } + } +} + +# Runs a TensorFlow Lite model on GPU that takes an image tensor and outputs a +# vector of tensors representing, for instance, detection boxes/keypoints and +# scores. +node { + calculator: "TfLiteInferenceCalculator" + input_stream: "TENSORS_GPU:image_tensor" + output_stream: "TENSORS:detection_tensors" + input_side_packet: "CUSTOM_OP_RESOLVER:opresolver" + options: { + [mediapipe.TfLiteInferenceCalculatorOptions.ext] { + model_path: "object_detection_3d.tflite" + } + } +} + +# Decodes the model's output tensor (the heatmap and the distance fields) to 2D +# keypoints. There are nine 2D keypoints: one center keypoint and eight vertices +# for the 3D bounding box. The calculator parameters determine's the decoder's +# sensitivity. +node { + calculator: "TfLiteTensorsToObjectsCalculator" + input_stream: "TENSORS:detection_tensors" + output_stream: "ANNOTATIONS:objects" + options: { + [mediapipe.TfLiteTensorsToObjectsCalculatorOptions.ext] { + num_classes: 1 + num_keypoints: 9 + decoder_config { + heatmap_threshold: 0.6 + local_max_distance: 2 + offset_scale_coef: 1.0 + voting_radius: 2 + voting_allowance: 1 + voting_threshold: 0.2 + } + normalized_focal_x: 2.0975 + normalized_focal_y: 1.5731 + } + } +} diff --git a/mediapipe/modules/objectron/objectron_gpu.pbtxt b/mediapipe/modules/objectron/objectron_gpu.pbtxt new file mode 100644 index 0000000..7ef2b67 --- /dev/null +++ b/mediapipe/modules/objectron/objectron_gpu.pbtxt @@ -0,0 +1,186 @@ +# MediaPipe Objectron on GPU that produces 3D bounding boxes for objects. +type: "ObjectronGpuSubgraph" + +# Input/Output streams and input side packets. +# Note that the input image is assumed to have aspect ratio 3:4 (width:height). +input_stream: "IMAGE_GPU:image" +# Allowed category labels, e.g. Footwear, Coffee cup, Mug, Chair, Camera +input_side_packet: "LABELS_CSV:allowed_labels" +# Max number of objects to detect/track. (int) +input_side_packet: "MAX_NUM_OBJECTS:max_num_objects" +# Whether landmarks on the previous image should be used to help localize +# landmarks on the current image. (bool) +input_side_packet: "USE_PREV_LANDMARKS:use_prev_landmarks" + +# Collection of detected 3D objects, represented as a FrameAnnotation. +output_stream: "FRAME_ANNOTATION:detected_objects" + +# When the optional input side packet "use_prev_landmarks" is either absent or +# set to true, uses the landmarks on the previous image to help localize +# landmarks on the current image. +node { + calculator: "GateCalculator" + input_side_packet: "ALLOW:use_prev_landmarks" + input_stream: "prev_box_rects_from_landmarks" + output_stream: "gated_prev_box_rects_from_landmarks" + options: { + [mediapipe.GateCalculatorOptions.ext] { + allow: true + } + } +} + +# Determines if an input vector of NormalizedRect has a size greater than or +# equal to the provided max_num_objects. +node { + calculator: "NormalizedRectVectorHasMinSizeCalculator" + input_stream: "ITERABLE:gated_prev_box_rects_from_landmarks" + input_side_packet: "max_num_objects" + output_stream: "prev_has_enough_objects" +} + +# Drops the incoming image if BoxLandmarkSubgraph was able to identify box +# presence in the previous image. Otherwise, passes the incoming image through +# to trigger a new round of box detection in ObjectDetectionOidV4Subgraph. +node { + calculator: "GateCalculator" + input_stream: "image" + input_stream: "DISALLOW:prev_has_enough_objects" + output_stream: "detection_image" + + options: { + [mediapipe.GateCalculatorOptions.ext] { + empty_packets_as_allow: true + } + } +} + +# Subgraph that performs 2D object detection. +node { + calculator: "ObjectDetectionOidV4Subgraph" + input_stream: "IMAGE_GPU:detection_image" + input_side_packet: "LABELS_CSV:allowed_labels" + output_stream: "DETECTIONS:raw_detections" +} + +# Makes sure there are no more detections than provided max_num_objects. +node { + calculator: "ClipDetectionVectorSizeCalculator" + input_stream: "raw_detections" + output_stream: "detections" + input_side_packet: "max_num_objects" + +} + +# Extracts image size from the input images. +node { + calculator: "ImagePropertiesCalculator" + input_stream: "IMAGE_GPU:image" + output_stream: "SIZE:image_size" +} + +# Converts results of box detection into rectangles (normalized by image size) +# that encloses the box. +node { + calculator: "DetectionsToRectsCalculator" + input_stream: "DETECTIONS:detections" + input_stream: "IMAGE_SIZE:image_size" + output_stream: "NORM_RECTS:box_rects_from_detections" + options: { + [mediapipe.DetectionsToRectsCalculatorOptions.ext] { + output_zero_rect_for_empty_detections: false + } + } +} + +# Performs association between NormalizedRect vector elements from previous +# image and rects based on object detections from the current image. This +# calculator ensures that the output box_rects vector doesn't contain +# overlapping regions based on the specified min_similarity_threshold. +node { + calculator: "AssociationNormRectCalculator" + input_stream: "box_rects_from_detections" + input_stream: "gated_prev_box_rects_from_landmarks" + output_stream: "box_rects" + options: { + [mediapipe.AssociationCalculatorOptions.ext] { + min_similarity_threshold: 0.2 + } + } +} + +# Outputs each element of box_rects at a fake timestamp for the rest of the +# graph to process. Clones image and image size packets for each +# single_box_rect at the fake timestamp. At the end of the loop, outputs the +# BATCH_END timestamp for downstream calculators to inform them that all +# elements in the vector have been processed. +node { + calculator: "BeginLoopNormalizedRectCalculator" + input_stream: "ITERABLE:box_rects" + input_stream: "CLONE:image" + output_stream: "ITEM:single_box_rect" + output_stream: "CLONE:landmarks_image" + output_stream: "BATCH_END:box_rects_timestamp" +} + +# Subgraph that localizes box landmarks. +node { + calculator: "BoxLandmarkSubgraph" + input_stream: "IMAGE:landmarks_image" + input_stream: "NORM_RECT:single_box_rect" + output_stream: "NORM_LANDMARKS:single_box_landmarks" +} + +# Collects a set of landmarks for each hand into a vector. Upon receiving the +# BATCH_END timestamp, outputs the vector of landmarks at the BATCH_END +# timestamp. +node { + calculator: "EndLoopNormalizedLandmarkListVectorCalculator" + input_stream: "ITEM:single_box_landmarks" + input_stream: "BATCH_END:box_rects_timestamp" + output_stream: "ITERABLE:multi_box_landmarks" +} + +# Convert box landmarks to frame annotations. +node { + calculator: "LandmarksToFrameAnnotationCalculator" + input_stream: "MULTI_LANDMARKS:multi_box_landmarks" + output_stream: "FRAME_ANNOTATION:box_annotations" +} + +# Lift the 2D landmarks to 3D using EPnP algorithm. +node { + calculator: "Lift2DFrameAnnotationTo3DCalculator" + input_stream: "FRAME_ANNOTATION:box_annotations" + output_stream: "LIFTED_FRAME_ANNOTATION:detected_objects" + options: { + [mediapipe.Lift2DFrameAnnotationTo3DCalculatorOptions.ext] { + normalized_focal_x: 2.0975 + normalized_focal_y: 1.5731 + } + } +} + +# Get rotated rectangle from detected box. +node { + calculator: "FrameAnnotationToRectCalculator" + input_stream: "FRAME_ANNOTATION:detected_objects" + output_stream: "NORM_RECTS:box_rects_from_landmarks" +} + +# Caches a box rectangle fed back from boxLandmarkSubgraph, and upon the +# arrival of the next input image sends out the cached rectangle with the +# timestamp replaced by that of the input image, essentially generating a packet +# that carries the previous box rectangle. Note that upon the arrival of the +# very first input image, an empty packet is sent out to jump start the +# feedback loop. +node { + calculator: "PreviousLoopbackCalculator" + input_stream: "MAIN:image" + input_stream: "LOOP:box_rects_from_landmarks" + input_stream_info: { + tag_index: "LOOP" + back_edge: true + } + output_stream: "PREV_LOOP:prev_box_rects_from_landmarks" +} diff --git a/mediapipe/modules/objectron/objectron_tracking_1stage_gpu.pbtxt b/mediapipe/modules/objectron/objectron_tracking_1stage_gpu.pbtxt new file mode 100644 index 0000000..eb19a44 --- /dev/null +++ b/mediapipe/modules/objectron/objectron_tracking_1stage_gpu.pbtxt @@ -0,0 +1,176 @@ +# MediaPipe Objectron tracking gpu subgraph + +type: "ObjectronTrackingSubgraphGpu" + +input_stream: "FRAME_ANNOTATION:objects" +input_stream: "IMAGE_GPU:input_video" +output_stream: "LIFTED_FRAME_ANNOTATION:lifted_tracked_objects" + + +# Converts the detected keypoints to Boxes, used by the tracking subgraph. +node { + calculator: "FrameAnnotationToTimedBoxListCalculator" + input_stream: "FRAME_ANNOTATION:objects" + output_stream: "BOXES:start_pos" +} + +node: { + calculator: "ImageTransformationCalculator" + input_stream: "IMAGE_GPU:input_video" + output_stream: "IMAGE_GPU:downscaled_input_video" + options: { + [mediapipe.ImageTransformationCalculatorOptions.ext] { + output_width: 240 + output_height: 320 + } + } +} + +# Converts GPU buffer to ImageFrame for processing tracking. +node: { + calculator: "GpuBufferToImageFrameCalculator" + input_stream: "downscaled_input_video" + output_stream: "downscaled_input_video_cpu" +} + +# Performs motion analysis on an incoming video stream. +node: { + calculator: "MotionAnalysisCalculator" + input_stream: "VIDEO:downscaled_input_video_cpu" + output_stream: "CAMERA:camera_motion" + output_stream: "FLOW:region_flow" + + options: { + [mediapipe.MotionAnalysisCalculatorOptions.ext]: { + analysis_options { + analysis_policy: ANALYSIS_POLICY_CAMERA_MOBILE + flow_options { + fast_estimation_min_block_size: 100 + top_inlier_sets: 1 + frac_inlier_error_threshold: 3e-3 + downsample_mode: DOWNSAMPLE_TO_INPUT_SIZE + verification_distance: 5.0 + verify_long_feature_acceleration: true + verify_long_feature_trigger_ratio: 0.1 + tracking_options { + max_features: 500 + adaptive_extraction_levels: 2 + min_eig_val_settings { + adaptive_lowest_quality_level: 2e-4 + } + klt_tracker_implementation: KLT_OPENCV + } + } + } + } + } +} + +# Reads optical flow fields defined in +# mediapipe/framework/formats/motion/optical_flow_field.h, +# returns a VideoFrame with 2 channels (v_x and v_y), each channel is quantized +# to 0-255. +node: { + calculator: "FlowPackagerCalculator" + input_stream: "FLOW:region_flow" + input_stream: "CAMERA:camera_motion" + output_stream: "TRACKING:tracking_data" + + options: { + [mediapipe.FlowPackagerCalculatorOptions.ext]: { + flow_packager_options: { + binary_tracking_data_support: false + } + } + } +} + +# Tracks box positions over time. +node: { + calculator: "BoxTrackerCalculator" + input_stream: "TRACKING:tracking_data" + input_stream: "TRACK_TIME:input_video" + input_stream: "START_POS:start_pos" + input_stream: "CANCEL_OBJECT_ID:cancel_object_id" + input_stream_info: { + tag_index: "CANCEL_OBJECT_ID" + back_edge: true + } + output_stream: "BOXES:boxes" + + input_stream_handler { + input_stream_handler: "SyncSetInputStreamHandler" + options { + [mediapipe.SyncSetInputStreamHandlerOptions.ext] { + sync_set { + tag_index: "TRACKING" + tag_index: "TRACK_TIME" + } + sync_set { + tag_index: "START_POS" + } + sync_set { + tag_index: "CANCEL_OBJECT_ID" + } + } + } + } + + options: { + [mediapipe.BoxTrackerCalculatorOptions.ext]: { + tracker_options: { + track_step_options { + track_object_and_camera: true + tracking_degrees: TRACKING_DEGREE_OBJECT_ROTATION_SCALE + inlier_spring_force: 0.0 + static_motion_temporal_ratio: 3e-2 + } + } + visualize_tracking_data: false + streaming_track_data_cache_size: 100 + } + } +} + +# Consolidates tracking and detection results. +node { + calculator: "FrameAnnotationTrackerCalculator" + input_stream: "FRAME_ANNOTATION:objects" + input_stream: "TRACKED_BOXES:boxes" + output_stream: "TRACKED_FRAME_ANNOTATION:tracked_objects" + output_stream: "CANCEL_OBJECT_ID:cancel_object_id" + options: { + [mediapipe.FrameAnnotationTrackerCalculatorOptions.ext] { + img_width: 240 + img_height: 320 + iou_threshold: 0.1 + } + } + + input_stream_handler { + input_stream_handler: "SyncSetInputStreamHandler" + options { + [mediapipe.SyncSetInputStreamHandlerOptions.ext] { + sync_set { + tag_index: "FRAME_ANNOTATION" + } + sync_set { + tag_index: "TRACKED_BOXES" + } + } + } + } +} + +# Lift the tracked 2D keypoints to 3D using EPnP algorithm. +node { + calculator: "Lift2DFrameAnnotationTo3DCalculator" + input_stream: "FRAME_ANNOTATION:tracked_objects" + output_stream: "LIFTED_FRAME_ANNOTATION:lifted_tracked_objects" + options: { + [mediapipe.Lift2DFrameAnnotationTo3DCalculatorOptions.ext] { + normalized_focal_x: 2.0975 + normalized_focal_y: 1.5731 + } + } +} diff --git a/mediapipe/modules/palm_detection/BUILD b/mediapipe/modules/palm_detection/BUILD new file mode 100644 index 0000000..bed734b --- /dev/null +++ b/mediapipe/modules/palm_detection/BUILD @@ -0,0 +1,71 @@ +# Copyright 2020 The MediaPipe Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +load( + "//mediapipe/framework/tool:mediapipe_graph.bzl", + "mediapipe_simple_subgraph", +) + +licenses(["notice"]) + +package(default_visibility = ["//visibility:public"]) + +exports_files([ + "palm_detection_lite.tflite", + "palm_detection_full.tflite", +]) + +mediapipe_simple_subgraph( + name = "palm_detection_model_loader", + graph = "palm_detection_model_loader.pbtxt", + register_as = "PalmDetectionModelLoader", + deps = [ + "//mediapipe/calculators/core:constant_side_packet_calculator", + "//mediapipe/calculators/tflite:tflite_model_calculator", + "//mediapipe/calculators/util:local_file_contents_calculator", + "//mediapipe/framework/tool:switch_container", + ], +) + +mediapipe_simple_subgraph( + name = "palm_detection_cpu", + graph = "palm_detection_cpu.pbtxt", + register_as = "PalmDetectionCpu", + deps = [ + ":palm_detection_model_loader", + "//mediapipe/calculators/tensor:image_to_tensor_calculator", + "//mediapipe/calculators/tensor:inference_calculator", + "//mediapipe/calculators/tensor:tensors_to_detections_calculator", + "//mediapipe/calculators/tflite:ssd_anchors_calculator", + "//mediapipe/calculators/tflite:tflite_custom_op_resolver_calculator", + "//mediapipe/calculators/util:detection_letterbox_removal_calculator", + "//mediapipe/calculators/util:non_max_suppression_calculator", + ], +) + +mediapipe_simple_subgraph( + name = "palm_detection_gpu", + graph = "palm_detection_gpu.pbtxt", + register_as = "PalmDetectionGpu", + deps = [ + ":palm_detection_model_loader", + "//mediapipe/calculators/tensor:image_to_tensor_calculator", + "//mediapipe/calculators/tensor:inference_calculator", + "//mediapipe/calculators/tensor:tensors_to_detections_calculator", + "//mediapipe/calculators/tflite:ssd_anchors_calculator", + "//mediapipe/calculators/tflite:tflite_custom_op_resolver_calculator", + "//mediapipe/calculators/util:detection_letterbox_removal_calculator", + "//mediapipe/calculators/util:non_max_suppression_calculator", + ], +) diff --git a/mediapipe/modules/palm_detection/README.md b/mediapipe/modules/palm_detection/README.md new file mode 100644 index 0000000..c7fd610 --- /dev/null +++ b/mediapipe/modules/palm_detection/README.md @@ -0,0 +1,7 @@ +# palm_detection + +Subgraphs|Details +:--- | :--- +[`PalmDetectionCpu`](https://github.com/google/mediapipe/tree/master/mediapipe/modules/palm_detection/palm_detection_cpu.pbtxt)| Detects palms/hands. (CPU input.) +[`PalmDetectionGpu`](https://github.com/google/mediapipe/tree/master/mediapipe/modules/palm_detection/palm_detection_gpu.pbtxt)| Detects palms/hands. (GPU input.) + diff --git a/mediapipe/modules/palm_detection/palm_detection_cpu.pbtxt b/mediapipe/modules/palm_detection/palm_detection_cpu.pbtxt new file mode 100644 index 0000000..32b3927 --- /dev/null +++ b/mediapipe/modules/palm_detection/palm_detection_cpu.pbtxt @@ -0,0 +1,147 @@ +# MediaPipe graph to detect palms with TensorFlow Lite on CPU. + +type: "PalmDetectionCpu" + +# CPU image. (ImageFrame) +input_stream: "IMAGE:image" + +# Complexity of the palm detection model: 0 or 1. Accuracy as well as inference +# latency generally go up with the model complexity. If unspecified, functions +# as set to 1. (int) +input_side_packet: "MODEL_COMPLEXITY:model_complexity" + +# Detected palms. (std::vector) +# NOTE: there will not be an output packet in the DETECTIONS stream for this +# particular timestamp if none of palms detected. However, the MediaPipe +# framework will internally inform the downstream calculators of the absence of +# this packet so that they don't wait for it unnecessarily. +output_stream: "DETECTIONS:detections" + +# Transforms an image into a 128x128 tensor while keeping the aspect ratio, and +# therefore may result in potential letterboxing. +node { + calculator: "ImageToTensorCalculator" + input_stream: "IMAGE:image" + output_stream: "TENSORS:input_tensor" + output_stream: "LETTERBOX_PADDING:letterbox_padding" + options: { + [mediapipe.ImageToTensorCalculatorOptions.ext] { + output_tensor_width: 192 + output_tensor_height: 192 + keep_aspect_ratio: true + output_tensor_float_range { + min: 0.0 + max: 1.0 + } + border_mode: BORDER_ZERO + } + } +} +# Generates a single side packet containing a TensorFlow Lite op resolver that +# supports custom ops needed by the model used in this graph. +node { + calculator: "TfLiteCustomOpResolverCalculator" + output_side_packet: "opresolver" +} + +# Loads the palm detection TF Lite model. +node { + calculator: "PalmDetectionModelLoader" + input_side_packet: "MODEL_COMPLEXITY:model_complexity" + output_side_packet: "MODEL:model" +} + +# Runs a TensorFlow Lite model on CPU that takes an image tensor and outputs a +# vector of tensors representing, for instance, detection boxes/keypoints and +# scores. +node { + calculator: "InferenceCalculator" + input_stream: "TENSORS:input_tensor" + output_stream: "TENSORS:detection_tensors" + input_side_packet: "CUSTOM_OP_RESOLVER:opresolver" + input_side_packet: "MODEL:model" + options: { + [mediapipe.InferenceCalculatorOptions.ext] { + delegate { xnnpack {} } + } + } +} + +# Generates a single side packet containing a vector of SSD anchors based on +# the specification in the options. +node { + calculator: "SsdAnchorsCalculator" + output_side_packet: "anchors" + options: { + [mediapipe.SsdAnchorsCalculatorOptions.ext] { + num_layers: 4 + min_scale: 0.1484375 + max_scale: 0.75 + input_size_width: 192 + input_size_height: 192 + anchor_offset_x: 0.5 + anchor_offset_y: 0.5 + strides: 8 + strides: 16 + strides: 16 + strides: 16 + aspect_ratios: 1.0 + fixed_anchor_size: true + } + } +} + +# Decodes the detection tensors generated by the TensorFlow Lite model, based on +# the SSD anchors and the specification in the options, into a vector of +# detections. Each detection describes a detected object. +node { + calculator: "TensorsToDetectionsCalculator" + input_stream: "TENSORS:detection_tensors" + input_side_packet: "ANCHORS:anchors" + output_stream: "DETECTIONS:unfiltered_detections" + options: { + [mediapipe.TensorsToDetectionsCalculatorOptions.ext] { + num_classes: 1 + num_boxes: 2016 + num_coords: 18 + box_coord_offset: 0 + keypoint_coord_offset: 4 + num_keypoints: 7 + num_values_per_keypoint: 2 + sigmoid_score: true + score_clipping_thresh: 100.0 + reverse_output_order: true + + x_scale: 192.0 + y_scale: 192.0 + w_scale: 192.0 + h_scale: 192.0 + min_score_thresh: 0.5 + } + } +} + +# Performs non-max suppression to remove excessive detections. +node { + calculator: "NonMaxSuppressionCalculator" + input_stream: "unfiltered_detections" + output_stream: "filtered_detections" + options: { + [mediapipe.NonMaxSuppressionCalculatorOptions.ext] { + min_suppression_threshold: 0.3 + overlap_type: INTERSECTION_OVER_UNION + algorithm: WEIGHTED + } + } +} + +# Adjusts detection locations (already normalized to [0.f, 1.f]) on the +# letterboxed image (after image transformation with the FIT scale mode) to the +# corresponding locations on the same image with the letterbox removed (the +# input image to the graph before image transformation). +node { + calculator: "DetectionLetterboxRemovalCalculator" + input_stream: "DETECTIONS:filtered_detections" + input_stream: "LETTERBOX_PADDING:letterbox_padding" + output_stream: "DETECTIONS:detections" +} diff --git a/mediapipe/modules/palm_detection/palm_detection_full.tflite b/mediapipe/modules/palm_detection/palm_detection_full.tflite new file mode 100755 index 0000000..aee76a9 Binary files /dev/null and b/mediapipe/modules/palm_detection/palm_detection_full.tflite differ diff --git a/mediapipe/modules/palm_detection/palm_detection_gpu.pbtxt b/mediapipe/modules/palm_detection/palm_detection_gpu.pbtxt new file mode 100644 index 0000000..73e4127 --- /dev/null +++ b/mediapipe/modules/palm_detection/palm_detection_gpu.pbtxt @@ -0,0 +1,153 @@ +# MediaPipe graph to detect palms with TensorFlow Lite on GPU. + +type: "PalmDetectionGpu" + +# GPU image. (GpuBuffer) +input_stream: "IMAGE:image" + +# Complexity of the palm detection model: 0 or 1. Accuracy as well as inference +# latency generally go up with the model complexity. If unspecified, functions +# as set to 1. (int) +input_side_packet: "MODEL_COMPLEXITY:model_complexity" + +# Detected palms. (std::vector) +# NOTE: there will not be an output packet in the DETECTIONS stream for this +# particular timestamp if none of palms detected. However, the MediaPipe +# framework will internally inform the downstream calculators of the absence of +# this packet so that they don't wait for it unnecessarily. +output_stream: "DETECTIONS:detections" + +# Transforms an image into a 256x256 tensor while keeping the aspect ratio, and +# therefore may result in potential letterboxing. +node { + calculator: "ImageToTensorCalculator" + input_stream: "IMAGE_GPU:image" + output_stream: "TENSORS:input_tensor" + output_stream: "LETTERBOX_PADDING:letterbox_padding" + options: { + [mediapipe.ImageToTensorCalculatorOptions.ext] { + output_tensor_width: 192 + output_tensor_height: 192 + keep_aspect_ratio: true + output_tensor_float_range { + min: 0.0 + max: 1.0 + } + border_mode: BORDER_ZERO + gpu_origin: TOP_LEFT + } + } +} +# Generates a single side packet containing a TensorFlow Lite op resolver that +# supports custom ops needed by the model used in this graph. +node { + calculator: "TfLiteCustomOpResolverCalculator" + output_side_packet: "opresolver" + options: { + [mediapipe.TfLiteCustomOpResolverCalculatorOptions.ext] { + use_gpu: true + } + } +} + +# Loads the palm detection TF Lite model. +node { + calculator: "PalmDetectionModelLoader" + input_side_packet: "MODEL_COMPLEXITY:model_complexity" + output_side_packet: "MODEL:model" +} + +# Runs a TensorFlow Lite model on GPU that takes an image tensor and outputs a +# vector of tensors representing, for instance, detection boxes/keypoints and +# scores. +node { + calculator: "InferenceCalculator" + input_stream: "TENSORS:input_tensor" + output_stream: "TENSORS:detection_tensors" + input_side_packet: "CUSTOM_OP_RESOLVER:opresolver" + input_side_packet: "MODEL:model" + options: { + [mediapipe.InferenceCalculatorOptions.ext] { + delegate { gpu {} } + } + } +} + +# Generates a single side packet containing a vector of SSD anchors based on +# the specification in the options. +node { + calculator: "SsdAnchorsCalculator" + output_side_packet: "anchors" + options: { + [mediapipe.SsdAnchorsCalculatorOptions.ext] { + num_layers: 4 + min_scale: 0.1484375 + max_scale: 0.75 + input_size_width: 192 + input_size_height: 192 + anchor_offset_x: 0.5 + anchor_offset_y: 0.5 + strides: 8 + strides: 16 + strides: 16 + strides: 16 + aspect_ratios: 1.0 + fixed_anchor_size: true + } + } +} + +# Decodes the detection tensors generated by the TensorFlow Lite model, based on +# the SSD anchors and the specification in the options, into a vector of +# detections. Each detection describes a detected object. +node { + calculator: "TensorsToDetectionsCalculator" + input_stream: "TENSORS:detection_tensors" + input_side_packet: "ANCHORS:anchors" + output_stream: "DETECTIONS:unfiltered_detections" + options: { + [mediapipe.TensorsToDetectionsCalculatorOptions.ext] { + num_classes: 1 + num_boxes: 2016 + num_coords: 18 + box_coord_offset: 0 + keypoint_coord_offset: 4 + num_keypoints: 7 + num_values_per_keypoint: 2 + sigmoid_score: true + score_clipping_thresh: 100.0 + reverse_output_order: true + + x_scale: 192.0 + y_scale: 192.0 + w_scale: 192.0 + h_scale: 192.0 + min_score_thresh: 0.5 + } + } +} + +# Performs non-max suppression to remove excessive detections. +node { + calculator: "NonMaxSuppressionCalculator" + input_stream: "unfiltered_detections" + output_stream: "filtered_detections" + options: { + [mediapipe.NonMaxSuppressionCalculatorOptions.ext] { + min_suppression_threshold: 0.3 + overlap_type: INTERSECTION_OVER_UNION + algorithm: WEIGHTED + } + } +} + +# Adjusts detection locations (already normalized to [0.f, 1.f]) on the +# letterboxed image (after image transformation with the FIT scale mode) to the +# corresponding locations on the same image with the letterbox removed (the +# input image to the graph before image transformation). +node { + calculator: "DetectionLetterboxRemovalCalculator" + input_stream: "DETECTIONS:filtered_detections" + input_stream: "LETTERBOX_PADDING:letterbox_padding" + output_stream: "DETECTIONS:detections" +} diff --git a/mediapipe/modules/palm_detection/palm_detection_lite.tflite b/mediapipe/modules/palm_detection/palm_detection_lite.tflite new file mode 100755 index 0000000..a19339a Binary files /dev/null and b/mediapipe/modules/palm_detection/palm_detection_lite.tflite differ diff --git a/mediapipe/modules/palm_detection/palm_detection_model_loader.pbtxt b/mediapipe/modules/palm_detection/palm_detection_model_loader.pbtxt new file mode 100644 index 0000000..f33a76e --- /dev/null +++ b/mediapipe/modules/palm_detection/palm_detection_model_loader.pbtxt @@ -0,0 +1,63 @@ +# MediaPipe graph to load a selected palm detection TF Lite model. + +type: "PalmDetectionModelLoader" + +# Complexity of the palm detection model: 0 or 1. Accuracy as well as inference +# latency generally go up with the model complexity. If unspecified, functions +# as set to 1. (int) +input_side_packet: "MODEL_COMPLEXITY:model_complexity" + +# TF Lite model represented as a FlatBuffer. +# (std::unique_ptr>) +output_side_packet: "MODEL:model" + +# Determines path to the desired pose landmark model file. +node { + calculator: "SwitchContainer" + input_side_packet: "SELECT:model_complexity" + output_side_packet: "PACKET:model_path" + options: { + [mediapipe.SwitchContainerOptions.ext] { + select: 1 + contained_node: { + calculator: "ConstantSidePacketCalculator" + options: { + [mediapipe.ConstantSidePacketCalculatorOptions.ext]: { + packet { + string_value: "mediapipe/modules/palm_detection/palm_detection_lite.tflite" + } + } + } + } + contained_node: { + calculator: "ConstantSidePacketCalculator" + options: { + [mediapipe.ConstantSidePacketCalculatorOptions.ext]: { + packet { + string_value: "mediapipe/modules/palm_detection/palm_detection_full.tflite" + } + } + } + } + } + } +} + +# Loads the file in the specified path into a blob. +node { + calculator: "LocalFileContentsCalculator" + input_side_packet: "FILE_PATH:model_path" + output_side_packet: "CONTENTS:model_blob" + options: { + [mediapipe.LocalFileContentsCalculatorOptions.ext]: { + text_mode: false + } + } +} + +# Converts the input blob into a TF Lite model. +node { + calculator: "TfLiteModelCalculator" + input_side_packet: "MODEL_BLOB:model_blob" + output_side_packet: "MODEL:model" +} diff --git a/mediapipe/modules/pose_detection/BUILD b/mediapipe/modules/pose_detection/BUILD new file mode 100644 index 0000000..f460300 --- /dev/null +++ b/mediapipe/modules/pose_detection/BUILD @@ -0,0 +1,56 @@ +# Copyright 2019 The MediaPipe Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +load( + "//mediapipe/framework/tool:mediapipe_graph.bzl", + "mediapipe_simple_subgraph", +) + +licenses(["notice"]) + +package(default_visibility = ["//visibility:public"]) + +mediapipe_simple_subgraph( + name = "pose_detection_cpu", + graph = "pose_detection_cpu.pbtxt", + register_as = "PoseDetectionCpu", + deps = [ + "//mediapipe/calculators/tensor:image_to_tensor_calculator", + "//mediapipe/calculators/tensor:inference_calculator", + "//mediapipe/calculators/tensor:tensors_to_detections_calculator", + "//mediapipe/calculators/tflite:ssd_anchors_calculator", + "//mediapipe/calculators/util:detection_letterbox_removal_calculator", + "//mediapipe/calculators/util:non_max_suppression_calculator", + ], +) + +mediapipe_simple_subgraph( + name = "pose_detection_gpu", + graph = "pose_detection_gpu.pbtxt", + register_as = "PoseDetectionGpu", + deps = [ + "//mediapipe/calculators/tensor:image_to_tensor_calculator", + "//mediapipe/calculators/tensor:inference_calculator", + "//mediapipe/calculators/tensor:tensors_to_detections_calculator", + "//mediapipe/calculators/tflite:ssd_anchors_calculator", + "//mediapipe/calculators/util:detection_letterbox_removal_calculator", + "//mediapipe/calculators/util:non_max_suppression_calculator", + ], +) + +exports_files( + srcs = [ + "pose_detection.tflite", + ], +) diff --git a/mediapipe/modules/pose_detection/README.md b/mediapipe/modules/pose_detection/README.md new file mode 100644 index 0000000..e2e3b2f --- /dev/null +++ b/mediapipe/modules/pose_detection/README.md @@ -0,0 +1,7 @@ +# pose_detection + +Subgraphs|Details +:--- | :--- +[`PoseDetectionCpu`](https://github.com/google/mediapipe/tree/master/mediapipe/modules/pose_detection/pose_detection_cpu.pbtxt)| Detects poses. (CPU input, and inference is executed on CPU.) +[`PoseDetectionGpu`](https://github.com/google/mediapipe/tree/master/mediapipe/modules/pose_detection/pose_detection_gpu.pbtxt)| Detects poses. (GPU input, and inference is executed on GPU.) + diff --git a/mediapipe/modules/pose_detection/pose_detection.tflite b/mediapipe/modules/pose_detection/pose_detection.tflite new file mode 100755 index 0000000..4f1c521 Binary files /dev/null and b/mediapipe/modules/pose_detection/pose_detection.tflite differ diff --git a/mediapipe/modules/pose_detection/pose_detection_cpu.pbtxt b/mediapipe/modules/pose_detection/pose_detection_cpu.pbtxt new file mode 100644 index 0000000..79ee1ac --- /dev/null +++ b/mediapipe/modules/pose_detection/pose_detection_cpu.pbtxt @@ -0,0 +1,159 @@ +# MediaPipe graph to detect poses. (CPU input, and inference is executed on +# CPU.) +# +# It is required that "pose_detection.tflite" is available at +# "mediapipe/modules/pose_detection/pose_detection.tflite" +# path during execution. +# +# EXAMPLE: +# node { +# calculator: "PoseDetectionCpu" +# input_stream: "IMAGE:image" +# output_stream: "DETECTIONS:pose_detections" +# } + +type: "PoseDetectionCpu" + +# CPU image. (ImageFrame) +input_stream: "IMAGE:image" + +# Detected poses. (std::vector) +# Bounding box in each pose detection is currently set to the bounding box of +# the detected face. However, 4 additional key points are available in each +# detection, which are used to further calculate a (rotated) bounding box that +# encloses the body region of interest. Among the 4 key points, the first two +# are for identifying the full-body region, and the second two for upper body +# only: +# +# Key point 0 - mid hip center +# Key point 1 - point that encodes size & rotation (for full body) +# Key point 2 - mid shoulder center +# Key point 3 - point that encodes size & rotation (for upper body) +# +# NOTE: there will not be an output packet in the DETECTIONS stream for this +# particular timestamp if none of poses detected. However, the MediaPipe +# framework will internally inform the downstream calculators of the absence of +# this packet so that they don't wait for it unnecessarily. +output_stream: "DETECTIONS:detections" + +# Transforms the input image into a 224x224 one while keeping the aspect ratio +# (what is expected by the corresponding model), resulting in potential +# letterboxing in the transformed image. +node: { + calculator: "ImageToTensorCalculator" + input_stream: "IMAGE:image" + output_stream: "TENSORS:input_tensors" + output_stream: "LETTERBOX_PADDING:letterbox_padding" + options: { + [mediapipe.ImageToTensorCalculatorOptions.ext] { + output_tensor_width: 224 + output_tensor_height: 224 + keep_aspect_ratio: true + output_tensor_float_range { + min: -1.0 + max: 1.0 + } + border_mode: BORDER_ZERO + # If this calculator truly operates in the CPU, then gpu_origin is + # ignored, but if some build switch insists on GPU inference, then we will + # still need to set this. + gpu_origin: TOP_LEFT + } + } +} + +# Runs a TensorFlow Lite model on CPU that takes an image tensor and outputs a +# vector of tensors representing, for instance, detection boxes/keypoints and +# scores. +node { + calculator: "InferenceCalculator" + input_stream: "TENSORS:input_tensors" + output_stream: "TENSORS:detection_tensors" + options: { + [mediapipe.InferenceCalculatorOptions.ext] { + model_path: "mediapipe/modules/pose_detection/pose_detection.tflite" + delegate { + xnnpack {} + } + } + } +} + +# Generates a single side packet containing a vector of SSD anchors based on +# the specification in the options. +node { + calculator: "SsdAnchorsCalculator" + output_side_packet: "anchors" + options: { + [mediapipe.SsdAnchorsCalculatorOptions.ext] { + num_layers: 5 + min_scale: 0.1484375 + max_scale: 0.75 + input_size_height: 224 + input_size_width: 224 + anchor_offset_x: 0.5 + anchor_offset_y: 0.5 + strides: 8 + strides: 16 + strides: 32 + strides: 32 + strides: 32 + aspect_ratios: 1.0 + fixed_anchor_size: true + } + } +} + +# Decodes the detection tensors generated by the TensorFlow Lite model, based on +# the SSD anchors and the specification in the options, into a vector of +# detections. Each detection describes a detected object. +node { + calculator: "TensorsToDetectionsCalculator" + input_stream: "TENSORS:detection_tensors" + input_side_packet: "ANCHORS:anchors" + output_stream: "DETECTIONS:unfiltered_detections" + options: { + [mediapipe.TensorsToDetectionsCalculatorOptions.ext] { + num_classes: 1 + num_boxes: 2254 + num_coords: 12 + box_coord_offset: 0 + keypoint_coord_offset: 4 + num_keypoints: 4 + num_values_per_keypoint: 2 + sigmoid_score: true + score_clipping_thresh: 100.0 + reverse_output_order: true + x_scale: 224.0 + y_scale: 224.0 + h_scale: 224.0 + w_scale: 224.0 + min_score_thresh: 0.5 + } + } +} + +# Performs non-max suppression to remove excessive detections. +node { + calculator: "NonMaxSuppressionCalculator" + input_stream: "unfiltered_detections" + output_stream: "filtered_detections" + options: { + [mediapipe.NonMaxSuppressionCalculatorOptions.ext] { + min_suppression_threshold: 0.3 + overlap_type: INTERSECTION_OVER_UNION + algorithm: WEIGHTED + } + } +} + +# Adjusts detection locations (already normalized to [0.f, 1.f]) on the +# letterboxed image (after image transformation with the FIT scale mode) to the +# corresponding locations on the same image with the letterbox removed (the +# input image to the graph before image transformation). +node { + calculator: "DetectionLetterboxRemovalCalculator" + input_stream: "DETECTIONS:filtered_detections" + input_stream: "LETTERBOX_PADDING:letterbox_padding" + output_stream: "DETECTIONS:detections" +} diff --git a/mediapipe/modules/pose_detection/pose_detection_gpu.pbtxt b/mediapipe/modules/pose_detection/pose_detection_gpu.pbtxt new file mode 100644 index 0000000..b95a117 --- /dev/null +++ b/mediapipe/modules/pose_detection/pose_detection_gpu.pbtxt @@ -0,0 +1,155 @@ +# MediaPipe graph to detect poses. (GPU input, and inference is executed on +# GPU.) +# +# It is required that "pose_detection.tflite" is available at +# "mediapipe/modules/pose_detection/pose_detection.tflite" +# path during execution. +# +# EXAMPLE: +# node { +# calculator: "PoseDetectionGpu" +# input_stream: "IMAGE:image" +# output_stream: "DETECTIONS:pose_detections" +# } + +type: "PoseDetectionGpu" + +# GPU image. (GpuBuffer) +input_stream: "IMAGE:image" + +# Detected poses. (std::vector) +# Bounding box in each pose detection is currently set to the bounding box of +# the detected face. However, 4 additional key points are available in each +# detection, which are used to further calculate a (rotated) bounding box that +# encloses the body region of interest. Among the 4 key points, the first two +# are for identifying the full-body region, and the second two for upper body +# only: +# +# Key point 0 - mid hip center +# Key point 1 - point that encodes size & rotation (for full body) +# Key point 2 - mid shoulder center +# Key point 3 - point that encodes size & rotation (for upper body) +# +# NOTE: there will not be an output packet in the DETECTIONS stream for this +# particular timestamp if none of poses detected. However, the MediaPipe +# framework will internally inform the downstream calculators of the absence of +# this packet so that they don't wait for it unnecessarily. +output_stream: "DETECTIONS:detections" + +# Transforms the input image into a 224x224 one while keeping the aspect ratio +# (what is expected by the corresponding model), resulting in potential +# letterboxing in the transformed image. +node: { + calculator: "ImageToTensorCalculator" + input_stream: "IMAGE_GPU:image" + output_stream: "TENSORS:input_tensors" + output_stream: "LETTERBOX_PADDING:letterbox_padding" + options: { + [mediapipe.ImageToTensorCalculatorOptions.ext] { + output_tensor_width: 224 + output_tensor_height: 224 + keep_aspect_ratio: true + output_tensor_float_range { + min: -1.0 + max: 1.0 + } + border_mode: BORDER_ZERO + gpu_origin: TOP_LEFT + } + } +} + +# Runs a TensorFlow Lite model on CPU that takes an image tensor and outputs a +# vector of tensors representing, for instance, detection boxes/keypoints and +# scores. +node { + calculator: "InferenceCalculator" + input_stream: "TENSORS:input_tensors" + output_stream: "TENSORS:detection_tensors" + options: { + [mediapipe.InferenceCalculatorOptions.ext] { + model_path: "mediapipe/modules/pose_detection/pose_detection.tflite" + # + delegate: { gpu { use_advanced_gpu_api: true } } + } + } +} + +# Generates a single side packet containing a vector of SSD anchors based on +# the specification in the options. +node { + calculator: "SsdAnchorsCalculator" + output_side_packet: "anchors" + options: { + [mediapipe.SsdAnchorsCalculatorOptions.ext] { + num_layers: 5 + min_scale: 0.1484375 + max_scale: 0.75 + input_size_height: 224 + input_size_width: 224 + anchor_offset_x: 0.5 + anchor_offset_y: 0.5 + strides: 8 + strides: 16 + strides: 32 + strides: 32 + strides: 32 + aspect_ratios: 1.0 + fixed_anchor_size: true + } + } +} + +# Decodes the detection tensors generated by the TensorFlow Lite model, based on +# the SSD anchors and the specification in the options, into a vector of +# detections. Each detection describes a detected object. +node { + calculator: "TensorsToDetectionsCalculator" + input_stream: "TENSORS:detection_tensors" + input_side_packet: "ANCHORS:anchors" + output_stream: "DETECTIONS:unfiltered_detections" + options: { + [mediapipe.TensorsToDetectionsCalculatorOptions.ext] { + num_classes: 1 + num_boxes: 2254 + num_coords: 12 + box_coord_offset: 0 + keypoint_coord_offset: 4 + num_keypoints: 4 + num_values_per_keypoint: 2 + sigmoid_score: true + score_clipping_thresh: 100.0 + reverse_output_order: true + x_scale: 224.0 + y_scale: 224.0 + h_scale: 224.0 + w_scale: 224.0 + min_score_thresh: 0.5 + } + } +} + +# Performs non-max suppression to remove excessive detections. +node { + calculator: "NonMaxSuppressionCalculator" + input_stream: "unfiltered_detections" + output_stream: "filtered_detections" + options: { + [mediapipe.NonMaxSuppressionCalculatorOptions.ext] { + min_suppression_threshold: 0.3 + overlap_type: INTERSECTION_OVER_UNION + algorithm: WEIGHTED + } + } +} + +# Adjusts detection locations (already normalized to [0.f, 1.f]) on the +# letterboxed image (after image transformation with the FIT scale mode) to the +# corresponding locations on the same image with the letterbox removed (the +# input image to the graph before image transformation). +node { + calculator: "DetectionLetterboxRemovalCalculator" + input_stream: "DETECTIONS:filtered_detections" + input_stream: "LETTERBOX_PADDING:letterbox_padding" + output_stream: "DETECTIONS:detections" +} diff --git a/mediapipe/modules/pose_landmark/BUILD b/mediapipe/modules/pose_landmark/BUILD new file mode 100644 index 0000000..787f0e2 --- /dev/null +++ b/mediapipe/modules/pose_landmark/BUILD @@ -0,0 +1,189 @@ +# Copyright 2020 The MediaPipe Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +load( + "//mediapipe/framework/tool:mediapipe_graph.bzl", + "mediapipe_simple_subgraph", +) + +licenses(["notice"]) + +package(default_visibility = ["//visibility:public"]) + +mediapipe_simple_subgraph( + name = "pose_landmark_model_loader", + graph = "pose_landmark_model_loader.pbtxt", + register_as = "PoseLandmarkModelLoader", + deps = [ + "//mediapipe/calculators/core:constant_side_packet_calculator", + "//mediapipe/calculators/tflite:tflite_model_calculator", + "//mediapipe/calculators/util:local_file_contents_calculator", + "//mediapipe/framework/tool:switch_container", + ], +) + +mediapipe_simple_subgraph( + name = "pose_landmark_by_roi_gpu", + graph = "pose_landmark_by_roi_gpu.pbtxt", + register_as = "PoseLandmarkByRoiGpu", + deps = [ + ":pose_landmark_model_loader", + ":pose_landmarks_and_segmentation_inverse_projection", + ":tensors_to_pose_landmarks_and_segmentation", + "//mediapipe/calculators/image:image_properties_calculator", + "//mediapipe/calculators/tensor:image_to_tensor_calculator", + "//mediapipe/calculators/tensor:inference_calculator", + ], +) + +mediapipe_simple_subgraph( + name = "pose_landmark_by_roi_cpu", + graph = "pose_landmark_by_roi_cpu.pbtxt", + register_as = "PoseLandmarkByRoiCpu", + deps = [ + ":pose_landmark_model_loader", + ":pose_landmarks_and_segmentation_inverse_projection", + ":tensors_to_pose_landmarks_and_segmentation", + "//mediapipe/calculators/image:image_properties_calculator", + "//mediapipe/calculators/tensor:image_to_tensor_calculator", + "//mediapipe/calculators/tensor:inference_calculator", + ], +) + +mediapipe_simple_subgraph( + name = "tensors_to_pose_landmarks_and_segmentation", + graph = "tensors_to_pose_landmarks_and_segmentation.pbtxt", + register_as = "TensorsToPoseLandmarksAndSegmentation", + deps = [ + "//mediapipe/calculators/core:gate_calculator", + "//mediapipe/calculators/core:split_landmarks_calculator", + "//mediapipe/calculators/core:split_vector_calculator", + "//mediapipe/calculators/tensor:tensors_to_floats_calculator", + "//mediapipe/calculators/tensor:tensors_to_landmarks_calculator", + "//mediapipe/calculators/tensor:tensors_to_segmentation_calculator", + "//mediapipe/calculators/util:refine_landmarks_from_heatmap_calculator", + "//mediapipe/calculators/util:thresholding_calculator", + "//mediapipe/calculators/util:visibility_copy_calculator", + ], +) + +mediapipe_simple_subgraph( + name = "pose_landmarks_and_segmentation_inverse_projection", + graph = "pose_landmarks_and_segmentation_inverse_projection.pbtxt", + register_as = "PoseLandmarksAndSegmentationInverseProjection", + deps = [ + "//mediapipe/calculators/image:warp_affine_calculator", + "//mediapipe/calculators/util:inverse_matrix_calculator", + "//mediapipe/calculators/util:landmark_letterbox_removal_calculator", + "//mediapipe/calculators/util:landmark_projection_calculator", + "//mediapipe/calculators/util:world_landmark_projection_calculator", + ], +) + +mediapipe_simple_subgraph( + name = "pose_landmark_filtering", + graph = "pose_landmark_filtering.pbtxt", + register_as = "PoseLandmarkFiltering", + deps = [ + "//mediapipe/calculators/util:alignment_points_to_rects_calculator", + "//mediapipe/calculators/util:landmarks_smoothing_calculator", + "//mediapipe/calculators/util:landmarks_to_detection_calculator", + "//mediapipe/calculators/util:visibility_smoothing_calculator", + "//mediapipe/framework/tool:switch_container", + ], +) + +mediapipe_simple_subgraph( + name = "pose_segmentation_filtering", + graph = "pose_segmentation_filtering.pbtxt", + register_as = "PoseSegmentationFiltering", + deps = [ + "//mediapipe/calculators/core:gate_calculator", + "//mediapipe/calculators/core:previous_loopback_calculator", + "//mediapipe/calculators/image:segmentation_smoothing_calculator", + ], +) + +mediapipe_simple_subgraph( + name = "pose_landmark_gpu", + graph = "pose_landmark_gpu.pbtxt", + register_as = "PoseLandmarkGpu", + deps = [ + ":pose_detection_to_roi", + ":pose_landmark_by_roi_gpu", + ":pose_landmark_filtering", + ":pose_landmarks_to_roi", + ":pose_segmentation_filtering", + "//mediapipe/calculators/core:constant_side_packet_calculator", + "//mediapipe/calculators/core:gate_calculator", + "//mediapipe/calculators/core:merge_calculator", + "//mediapipe/calculators/core:packet_presence_calculator", + "//mediapipe/calculators/core:previous_loopback_calculator", + "//mediapipe/calculators/core:split_vector_calculator", + "//mediapipe/calculators/image:image_properties_calculator", + "//mediapipe/calculators/util:from_image_calculator", + "//mediapipe/modules/pose_detection:pose_detection_gpu", + ], +) + +mediapipe_simple_subgraph( + name = "pose_landmark_cpu", + graph = "pose_landmark_cpu.pbtxt", + register_as = "PoseLandmarkCpu", + deps = [ + ":pose_detection_to_roi", + ":pose_landmark_by_roi_cpu", + ":pose_landmark_filtering", + ":pose_landmarks_to_roi", + ":pose_segmentation_filtering", + "//mediapipe/calculators/core:constant_side_packet_calculator", + "//mediapipe/calculators/core:gate_calculator", + "//mediapipe/calculators/core:merge_calculator", + "//mediapipe/calculators/core:packet_presence_calculator", + "//mediapipe/calculators/core:previous_loopback_calculator", + "//mediapipe/calculators/core:split_vector_calculator", + "//mediapipe/calculators/image:image_properties_calculator", + "//mediapipe/calculators/util:from_image_calculator", + "//mediapipe/modules/pose_detection:pose_detection_cpu", + ], +) + +exports_files( + srcs = [ + "pose_landmark_full.tflite", + "pose_landmark_heavy.tflite", + "pose_landmark_lite.tflite", + ], +) + +mediapipe_simple_subgraph( + name = "pose_detection_to_roi", + graph = "pose_detection_to_roi.pbtxt", + register_as = "PoseDetectionToRoi", + deps = [ + "//mediapipe/calculators/util:alignment_points_to_rects_calculator", + "//mediapipe/calculators/util:rect_transformation_calculator", + ], +) + +mediapipe_simple_subgraph( + name = "pose_landmarks_to_roi", + graph = "pose_landmarks_to_roi.pbtxt", + register_as = "PoseLandmarksToRoi", + deps = [ + "//mediapipe/calculators/util:alignment_points_to_rects_calculator", + "//mediapipe/calculators/util:landmarks_to_detection_calculator", + "//mediapipe/calculators/util:rect_transformation_calculator", + ], +) diff --git a/mediapipe/modules/pose_landmark/README.md b/mediapipe/modules/pose_landmark/README.md new file mode 100644 index 0000000..5752838 --- /dev/null +++ b/mediapipe/modules/pose_landmark/README.md @@ -0,0 +1,8 @@ +# pose_landmark + +Subgraphs|Details +:--- | :--- +[`PoseLandmarkByRoiCpu`](https://github.com/google/mediapipe/tree/master/mediapipe/modules/pose_landmark/pose_landmark_by_roi_cpu.pbtxt)| Detects landmarks of a single body pose. See landmarks (aka keypoints) [scheme](https://github.com/google/mediapipe/tree/master/mediapipe/modules/pose_landmark/pose_landmark_topology.svg). (CPU input, and inference is executed on CPU.) +[`PoseLandmarkByRoiGpu`](https://github.com/google/mediapipe/tree/master/mediapipe/modules/pose_landmark/pose_landmark_by_roi_gpu.pbtxt)| Detects landmarks of a single body pose. See landmarks (aka keypoints) [scheme](https://github.com/google/mediapipe/tree/master/mediapipe/modules/pose_landmark/pose_landmark_topology.svg). (GPU input, and inference is executed on GPU) +[`PoseLandmarkCpu`](https://github.com/google/mediapipe/tree/master/mediapipe/modules/pose_landmark/pose_landmark_cpu.pbtxt)| Detects landmarks of a single body pose. See landmarks (aka keypoints) [scheme](https://github.com/google/mediapipe/tree/master/mediapipe/modules/pose_landmark/pose_landmark_topology.svg). (CPU input, and inference is executed on CPU) +[`PoseLandmarkGpu`](https://github.com/google/mediapipe/tree/master/mediapipe/modules/pose_landmark/pose_landmark_gpu.pbtxt)| Detects landmarks of a single body pose. See landmarks (aka keypoints) [scheme](https://github.com/google/mediapipe/tree/master/mediapipe/modules/pose_landmark/pose_landmark_topology.svg). (GPU input, and inference is executed on GPU.) diff --git a/mediapipe/modules/pose_landmark/pose_detection_to_roi.pbtxt b/mediapipe/modules/pose_landmark/pose_detection_to_roi.pbtxt new file mode 100644 index 0000000..47f82bb --- /dev/null +++ b/mediapipe/modules/pose_landmark/pose_detection_to_roi.pbtxt @@ -0,0 +1,45 @@ +# MediaPipe graph to calculate pose region of interest (ROI) from a detection +# provided by "PoseDetectionCpu" or "PoseDetectionGpu" +# +# NOTE: this graph is subject to change and should not be used directly. + +type: "PoseDetectionToRoi" + +# Pose detection. (Detection) +input_stream: "DETECTION:detection" +# Frame size (width and height). (std::pair) +input_stream: "IMAGE_SIZE:image_size" + +# ROI according to the first detection of input detections. (NormalizedRect) +output_stream: "ROI:roi" + +# Converts pose detection into a rectangle based on center and scale alignment +# points. +node { + calculator: "AlignmentPointsRectsCalculator" + input_stream: "DETECTION:detection" + input_stream: "IMAGE_SIZE:image_size" + output_stream: "NORM_RECT:raw_roi" + options: { + [mediapipe.DetectionsToRectsCalculatorOptions.ext] { + rotation_vector_start_keypoint_index: 0 + rotation_vector_end_keypoint_index: 1 + rotation_vector_target_angle_degrees: 90 + } + } +} + +# Expands pose rect with marging used during training. +node { + calculator: "RectTransformationCalculator" + input_stream: "NORM_RECT:raw_roi" + input_stream: "IMAGE_SIZE:image_size" + output_stream: "roi" + options: { + [mediapipe.RectTransformationCalculatorOptions.ext] { + scale_x: 1.25 + scale_y: 1.25 + square_long: true + } + } +} diff --git a/mediapipe/modules/pose_landmark/pose_landmark_by_roi_cpu.pbtxt b/mediapipe/modules/pose_landmark/pose_landmark_by_roi_cpu.pbtxt new file mode 100644 index 0000000..b674894 --- /dev/null +++ b/mediapipe/modules/pose_landmark/pose_landmark_by_roi_cpu.pbtxt @@ -0,0 +1,178 @@ +# MediaPipe graph to detect/predict pose landmarks and optionally segmentation +# within an ROI. (CPU input, and inference is executed on CPU.) +# +# It is required that "pose_landmark_lite.tflite" or +# "pose_landmark_full.tflite" or "pose_landmark_heavy.tflite" is available at +# "mediapipe/modules/pose_landmark/pose_landmark_lite.tflite" or +# "mediapipe/modules/pose_landmark/pose_landmark_full.tflite" or +# "mediapipe/modules/pose_landmark/pose_landmark_heavy.tflite" +# path respectively during execution, depending on the specification in the +# MODEL_COMPLEXITY input side packet. +# +# EXAMPLE: +# node { +# calculator: "PoseLandmarkByRoiCpu" +# input_side_packet: "MODEL_COMPLEXITY:model_complexity" +# input_side_packet: "ENABLE_SEGMENTATION:enable_segmentation" +# input_stream: "IMAGE:image" +# input_stream: "ROI:roi" +# output_stream: "LANDMARKS:landmarks" +# output_stream: "SEGMENTATION_MASK:segmentation_mask" +# } + +type: "PoseLandmarkByRoiCpu" + +# CPU image. (ImageFrame) +input_stream: "IMAGE:image" +# ROI (region of interest) within the given image where a pose is located. +# (NormalizedRect) +input_stream: "ROI:roi" + +# Whether to predict the segmentation mask. If unspecified, functions as set to +# false. (bool) +input_side_packet: "ENABLE_SEGMENTATION:enable_segmentation" + +# Complexity of the pose landmark model: 0, 1 or 2. Landmark accuracy as well as +# inference latency generally go up with the model complexity. If unspecified, +# functions as set to 1. (int) +input_side_packet: "MODEL_COMPLEXITY:model_complexity" + +# Pose landmarks within the given ROI. (NormalizedLandmarkList) +# We have 33 landmarks (see pose_landmark_topology.svg) and there are other +# auxiliary key points. +# 0 - nose +# 1 - left eye (inner) +# 2 - left eye +# 3 - left eye (outer) +# 4 - right eye (inner) +# 5 - right eye +# 6 - right eye (outer) +# 7 - left ear +# 8 - right ear +# 9 - mouth (left) +# 10 - mouth (right) +# 11 - left shoulder +# 12 - right shoulder +# 13 - left elbow +# 14 - right elbow +# 15 - left wrist +# 16 - right wrist +# 17 - left pinky +# 18 - right pinky +# 19 - left index +# 20 - right index +# 21 - left thumb +# 22 - right thumb +# 23 - left hip +# 24 - right hip +# 25 - left knee +# 26 - right knee +# 27 - left ankle +# 28 - right ankle +# 29 - left heel +# 30 - right heel +# 31 - left foot index +# 32 - right foot index +# +# NOTE: If a pose is not present within the given ROI, for this particular +# timestamp there will not be an output packet in the LANDMARKS stream. However, +# the MediaPipe framework will internally inform the downstream calculators of +# the absence of this packet so that they don't wait for it unnecessarily. +output_stream: "LANDMARKS:landmarks" +# Auxiliary landmarks for deriving the ROI in the subsequent image. +# (NormalizedLandmarkList) +output_stream: "AUXILIARY_LANDMARKS:auxiliary_landmarks" + +# Pose world landmarks within the given ROI. (LandmarkList) +# World landmarks are real-world 3D coordinates in meters with the origin at the +# center between hips. WORLD_LANDMARKS shares the same landmark topology as +# LANDMARKS. However, LANDMARKS provides coordinates (in pixels) of a 3D object +# projected onto the 2D image surface, while WORLD_LANDMARKS provides +# coordinates (in meters) of the 3D object itself. +output_stream: "WORLD_LANDMARKS:world_landmarks" + +# Segmentation mask on CPU in ImageFormat::VEC32F1. (Image) +output_stream: "SEGMENTATION_MASK:segmentation_mask" + +# Retrieves the image size. +node { + calculator: "ImagePropertiesCalculator" + input_stream: "IMAGE_CPU:image" + output_stream: "SIZE:image_size" +} + +# Crops and transforms the specified ROI in the input image into an image patch +# represented as a tensor of dimension expected by the corresponding ML model, +# while maintaining the aspect ratio of the ROI (which can be different from +# that of the image patch). Therefore, there can be letterboxing around the ROI +# in the generated tensor representation. +node: { + calculator: "ImageToTensorCalculator" + input_stream: "IMAGE:image" + input_stream: "NORM_RECT:roi" + output_stream: "TENSORS:input_tensors" + output_stream: "LETTERBOX_PADDING:letterbox_padding" + output_stream: "MATRIX:transformation_matrix" + options: { + [mediapipe.ImageToTensorCalculatorOptions.ext] { + output_tensor_width: 256 + output_tensor_height: 256 + keep_aspect_ratio: true + output_tensor_float_range { + min: 0.0 + max: 1.0 + } + } + } +} + +# Loads the pose landmark TF Lite model. +node { + calculator: "PoseLandmarkModelLoader" + input_side_packet: "MODEL_COMPLEXITY:model_complexity" + output_side_packet: "MODEL:model" +} + +# Runs model inference on CPU. +node { + calculator: "InferenceCalculator" + input_side_packet: "MODEL:model" + input_stream: "TENSORS:input_tensors" + output_stream: "TENSORS:output_tensors" + options: { + [mediapipe.InferenceCalculatorOptions.ext] { + delegate { xnnpack {} } + } + } +} + +# Decodes the tensors into the corresponding landmark and segmentation mask +# representation. +node { + calculator: "TensorsToPoseLandmarksAndSegmentation" + input_side_packet: "ENABLE_SEGMENTATION:enable_segmentation" + input_stream: "TENSORS:output_tensors" + output_stream: "LANDMARKS:roi_landmarks" + output_stream: "AUXILIARY_LANDMARKS:roi_auxiliary_landmarks" + output_stream: "WORLD_LANDMARKS:roi_world_landmarks" + output_stream: "SEGMENTATION_MASK:roi_segmentation_mask" +} + +# Projects the landmarks and segmentation mask in the local coordinates of the +# (potentially letterboxed) ROI back to the global coordinates of the full input +# image. +node { + calculator: "PoseLandmarksAndSegmentationInverseProjection" + input_stream: "IMAGE_SIZE:image_size" + input_stream: "NORM_RECT:roi" + input_stream: "LETTERBOX_PADDING:letterbox_padding" + input_stream: "MATRIX:transformation_matrix" + input_stream: "LANDMARKS:roi_landmarks" + input_stream: "AUXILIARY_LANDMARKS:roi_auxiliary_landmarks" + input_stream: "WORLD_LANDMARKS:roi_world_landmarks" + input_stream: "SEGMENTATION_MASK:roi_segmentation_mask" + output_stream: "LANDMARKS:landmarks" + output_stream: "AUXILIARY_LANDMARKS:auxiliary_landmarks" + output_stream: "WORLD_LANDMARKS:world_landmarks" + output_stream: "SEGMENTATION_MASK:segmentation_mask" +} diff --git a/mediapipe/modules/pose_landmark/pose_landmark_by_roi_gpu.pbtxt b/mediapipe/modules/pose_landmark/pose_landmark_by_roi_gpu.pbtxt new file mode 100644 index 0000000..7a2acce --- /dev/null +++ b/mediapipe/modules/pose_landmark/pose_landmark_by_roi_gpu.pbtxt @@ -0,0 +1,174 @@ +# MediaPipe graph to detect/predict pose landmarks and optionally segmentation +# within an ROI. (GPU input, and inference is executed on GPU.) +# +# It is required that "pose_landmark_lite.tflite" or +# "pose_landmark_full.tflite" or "pose_landmark_heavy.tflite" is available at +# "mediapipe/modules/pose_landmark/pose_landmark_lite.tflite" or +# "mediapipe/modules/pose_landmark/pose_landmark_full.tflite" or +# "mediapipe/modules/pose_landmark/pose_landmark_heavy.tflite" +# path respectively during execution, depending on the specification in the +# MODEL_COMPLEXITY input side packet. +# +# EXAMPLE: +# node { +# calculator: "PoseLandmarkByRoiGpu" +# input_side_packet: "MODEL_COMPLEXITY:model_complexity" +# input_side_packet: "ENABLE_SEGMENTATION:enable_segmentation" +# input_stream: "IMAGE:image" +# input_stream: "ROI:roi" +# output_stream: "LANDMARKS:landmarks" +# output_stream: "SEGMENTATION_MASK:segmentation_mask" +# } + +type: "PoseLandmarkByRoiGpu" + +# GPU image. (GpuBuffer) +input_stream: "IMAGE:image" +# ROI (region of interest) within the given image where a pose is located. +# (NormalizedRect) +input_stream: "ROI:roi" + +# Whether to predict the segmentation mask. If unspecified, functions as set to +# false. (bool) +input_side_packet: "ENABLE_SEGMENTATION:enable_segmentation" + +# Complexity of the pose landmark model: 0, 1 or 2. Landmark accuracy as well as +# inference latency generally go up with the model complexity. If unspecified, +# functions as set to 1. (int) +input_side_packet: "MODEL_COMPLEXITY:model_complexity" + +# Pose landmarks within the given ROI. (NormalizedLandmarkList) +# We have 33 landmarks (see pose_landmark_topology.svg), and there are other +# auxiliary key points. +# 0 - nose +# 1 - left eye (inner) +# 2 - left eye +# 3 - left eye (outer) +# 4 - right eye (inner) +# 5 - right eye +# 6 - right eye (outer) +# 7 - left ear +# 8 - right ear +# 9 - mouth (left) +# 10 - mouth (right) +# 11 - left shoulder +# 12 - right shoulder +# 13 - left elbow +# 14 - right elbow +# 15 - left wrist +# 16 - right wrist +# 17 - left pinky +# 18 - right pinky +# 19 - left index +# 20 - right index +# 21 - left thumb +# 22 - right thumb +# 23 - left hip +# 24 - right hip +# 25 - left knee +# 26 - right knee +# 27 - left ankle +# 28 - right ankle +# 29 - left heel +# 30 - right heel +# 31 - left foot index +# 32 - right foot index +# +# NOTE: If a pose is not present within the given ROI, for this particular +# timestamp there will not be an output packet in the LANDMARKS stream. However, +# the MediaPipe framework will internally inform the downstream calculators of +# the absence of this packet so that they don't wait for it unnecessarily. +output_stream: "LANDMARKS:landmarks" +# Auxiliary landmarks for deriving the ROI in the subsequent image. +# (NormalizedLandmarkList) +output_stream: "AUXILIARY_LANDMARKS:auxiliary_landmarks" + +# Pose world landmarks within the given ROI. (LandmarkList) +# World landmarks are real-world 3D coordinates in meters with the origin at the +# center between hips. WORLD_LANDMARKS shares the same landmark topology as +# LANDMARKS. However, LANDMARKS provides coordinates (in pixels) of a 3D object +# projected onto the 2D image surface, while WORLD_LANDMARKS provides +# coordinates (in meters) of the 3D object itself. +output_stream: "WORLD_LANDMARKS:world_landmarks" + +# Segmentation mask on GPU in RGBA with the same mask values in R and A. (Image) +output_stream: "SEGMENTATION_MASK:segmentation_mask" + +# Retrieves the image size. +node { + calculator: "ImagePropertiesCalculator" + input_stream: "IMAGE_GPU:image" + output_stream: "SIZE:image_size" +} + +# Crops and transforms the specified ROI in the input image into an image patch +# represented as a tensor of dimension expected by the corresponding ML model, +# while maintaining the aspect ratio of the ROI (which can be different from +# that of the image patch). Therefore, there can be letterboxing around the ROI +# in the generated tensor representation. +node: { + calculator: "ImageToTensorCalculator" + input_stream: "IMAGE_GPU:image" + input_stream: "NORM_RECT:roi" + output_stream: "TENSORS:input_tensors" + output_stream: "LETTERBOX_PADDING:letterbox_padding" + output_stream: "MATRIX:transformation_matrix" + options: { + [mediapipe.ImageToTensorCalculatorOptions.ext] { + output_tensor_width: 256 + output_tensor_height: 256 + keep_aspect_ratio: true + output_tensor_float_range { + min: 0.0 + max: 1.0 + } + gpu_origin: TOP_LEFT + } + } +} + +# Loads the pose landmark TF Lite model. +node { + calculator: "PoseLandmarkModelLoader" + input_side_packet: "MODEL_COMPLEXITY:model_complexity" + output_side_packet: "MODEL:model" +} + +# Runs model inference on GPU. +node { + calculator: "InferenceCalculator" + input_side_packet: "MODEL:model" + input_stream: "TENSORS:input_tensors" + output_stream: "TENSORS:output_tensors" +} + +# Decodes the tensors into the corresponding landmark and segmentation mask +# representation. +node { + calculator: "TensorsToPoseLandmarksAndSegmentation" + input_side_packet: "ENABLE_SEGMENTATION:enable_segmentation" + input_stream: "TENSORS:output_tensors" + output_stream: "LANDMARKS:roi_landmarks" + output_stream: "AUXILIARY_LANDMARKS:roi_auxiliary_landmarks" + output_stream: "WORLD_LANDMARKS:roi_world_landmarks" + output_stream: "SEGMENTATION_MASK:roi_segmentation_mask" +} + +# Projects the landmarks and segmentation mask in the local coordinates of the +# (potentially letterboxed) ROI back to the global coordinates of the full input +# image. +node { + calculator: "PoseLandmarksAndSegmentationInverseProjection" + input_stream: "IMAGE_SIZE:image_size" + input_stream: "NORM_RECT:roi" + input_stream: "LETTERBOX_PADDING:letterbox_padding" + input_stream: "MATRIX:transformation_matrix" + input_stream: "LANDMARKS:roi_landmarks" + input_stream: "AUXILIARY_LANDMARKS:roi_auxiliary_landmarks" + input_stream: "WORLD_LANDMARKS:roi_world_landmarks" + input_stream: "SEGMENTATION_MASK:roi_segmentation_mask" + output_stream: "LANDMARKS:landmarks" + output_stream: "AUXILIARY_LANDMARKS:auxiliary_landmarks" + output_stream: "WORLD_LANDMARKS:world_landmarks" + output_stream: "SEGMENTATION_MASK:segmentation_mask" +} diff --git a/mediapipe/modules/pose_landmark/pose_landmark_cpu.pbtxt b/mediapipe/modules/pose_landmark/pose_landmark_cpu.pbtxt new file mode 100644 index 0000000..5faf08a --- /dev/null +++ b/mediapipe/modules/pose_landmark/pose_landmark_cpu.pbtxt @@ -0,0 +1,268 @@ +# MediaPipe graph to detect/predict pose landmarks. (CPU input, and inference is +# executed on CPU.) This graph tries to skip pose detection as much as possible +# by using previously detected/predicted landmarks for new images. +# +# It is required that "pose_detection.tflite" is available at +# "mediapipe/modules/pose_detection/pose_detection.tflite" +# path during execution. +# +# It is required that "pose_landmark_lite.tflite" or +# "pose_landmark_full.tflite" or "pose_landmark_heavy.tflite" is available at +# "mediapipe/modules/pose_landmark/pose_landmark_lite.tflite" or +# "mediapipe/modules/pose_landmark/pose_landmark_full.tflite" or +# "mediapipe/modules/pose_landmark/pose_landmark_heavy.tflite" +# path respectively during execution, depending on the specification in the +# MODEL_COMPLEXITY input side packet. +# +# EXAMPLE: +# node { +# calculator: "PoseLandmarkCpu" +# input_side_packet: "MODEL_COMPLEXITY:model_complexity" +# input_side_packet: "SMOOTH_LANDMARKS:smooth_landmarks" +# input_side_packet: "ENABLE_SEGMENTATION:enable_segmentation" +# input_side_packet: "SMOOTH_SEGMENTATION:smooth_segmentation" +# input_side_packet: "USE_PREV_LANDMARKS:use_prev_landmarks" +# input_stream: "IMAGE:image" +# output_stream: "LANDMARKS:pose_landmarks" +# output_stream: "SEGMENTATION_MASK:segmentation_mask" +# } + +type: "PoseLandmarkCpu" + +# CPU image. (ImageFrame) +input_stream: "IMAGE:image" + +# Whether to filter landmarks across different input images to reduce jitter. +# If unspecified, functions as set to true. (bool) +input_side_packet: "SMOOTH_LANDMARKS:smooth_landmarks" + +# Whether to predict the segmentation mask. If unspecified, functions as set to +# false. (bool) +input_side_packet: "ENABLE_SEGMENTATION:enable_segmentation" + +# Whether to filter segmentation mask across different input images to reduce +# jitter. If unspecified, functions as set to true. (bool) +input_side_packet: "SMOOTH_SEGMENTATION:smooth_segmentation" + +# Complexity of the pose landmark model: 0, 1 or 2. Landmark accuracy as well as +# inference latency generally go up with the model complexity. If unspecified, +# functions as set to 1. (int) +input_side_packet: "MODEL_COMPLEXITY:model_complexity" + +# Whether landmarks on the previous image should be used to help localize +# landmarks on the current image. (bool) +input_side_packet: "USE_PREV_LANDMARKS:use_prev_landmarks" + +# Pose landmarks. (NormalizedLandmarkList) +# We have 33 landmarks (see pose_landmark_topology.svg), and there are other +# auxiliary key points. +# 0 - nose +# 1 - left eye (inner) +# 2 - left eye +# 3 - left eye (outer) +# 4 - right eye (inner) +# 5 - right eye +# 6 - right eye (outer) +# 7 - left ear +# 8 - right ear +# 9 - mouth (left) +# 10 - mouth (right) +# 11 - left shoulder +# 12 - right shoulder +# 13 - left elbow +# 14 - right elbow +# 15 - left wrist +# 16 - right wrist +# 17 - left pinky +# 18 - right pinky +# 19 - left index +# 20 - right index +# 21 - left thumb +# 22 - right thumb +# 23 - left hip +# 24 - right hip +# 25 - left knee +# 26 - right knee +# 27 - left ankle +# 28 - right ankle +# 29 - left heel +# 30 - right heel +# 31 - left foot index +# 32 - right foot index +# +# NOTE: if a pose is not present within the given ROI, for this particular +# timestamp there will not be an output packet in the LANDMARKS stream. However, +# the MediaPipe framework will internally inform the downstream calculators of +# the absence of this packet so that they don't wait for it unnecessarily. +output_stream: "LANDMARKS:pose_landmarks" + +# Pose world landmarks. (LandmarkList) +# World landmarks are real-world 3D coordinates in meters with the origin at the +# center between hips. WORLD_LANDMARKS shares the same landmark topology as +# LANDMARKS. However, LANDMARKS provides coordinates (in pixels) of a 3D object +# projected onto the 2D image surface, while WORLD_LANDMARKS provides +# coordinates (in meters) of the 3D object itself. +output_stream: "WORLD_LANDMARKS:pose_world_landmarks" + +# Segmentation mask. (ImageFrame in ImageFormat::VEC32F1) +output_stream: "SEGMENTATION_MASK:segmentation_mask" + +# Extra outputs (for debugging, for instance). +# Detected poses. (Detection) +output_stream: "DETECTION:pose_detection" +# Regions of interest calculated based on landmarks. (NormalizedRect) +output_stream: "ROI_FROM_LANDMARKS:pose_rect_from_landmarks" +# Regions of interest calculated based on pose detections. (NormalizedRect) +output_stream: "ROI_FROM_DETECTION:pose_rect_from_detection" + +# When the optional input side packet "use_prev_landmarks" is either absent or +# set to true, uses the landmarks on the previous image to help localize +# landmarks on the current image. +node { + calculator: "GateCalculator" + input_side_packet: "ALLOW:use_prev_landmarks" + input_stream: "prev_pose_rect_from_landmarks" + output_stream: "gated_prev_pose_rect_from_landmarks" + options: { + [mediapipe.GateCalculatorOptions.ext] { + allow: true + } + } +} + +# Checks if there's previous pose rect calculated from landmarks. +node: { + calculator: "PacketPresenceCalculator" + input_stream: "PACKET:gated_prev_pose_rect_from_landmarks" + output_stream: "PRESENCE:prev_pose_rect_from_landmarks_is_present" +} + +# Calculates size of the image. +node { + calculator: "ImagePropertiesCalculator" + input_stream: "IMAGE_CPU:image" + output_stream: "SIZE:image_size" +} + +# Drops the incoming image if the pose has already been identified from the +# previous image. Otherwise, passes the incoming image through to trigger a new +# round of pose detection. +node { + calculator: "GateCalculator" + input_stream: "image" + input_stream: "image_size" + input_stream: "DISALLOW:prev_pose_rect_from_landmarks_is_present" + output_stream: "image_for_pose_detection" + output_stream: "image_size_for_pose_detection" + options: { + [mediapipe.GateCalculatorOptions.ext] { + empty_packets_as_allow: true + } + } +} + +# Detects poses. +node { + calculator: "PoseDetectionCpu" + input_stream: "IMAGE:image_for_pose_detection" + output_stream: "DETECTIONS:pose_detections" +} + +# Gets the very first detection from "pose_detections" vector. +node { + calculator: "SplitDetectionVectorCalculator" + input_stream: "pose_detections" + output_stream: "pose_detection" + options: { + [mediapipe.SplitVectorCalculatorOptions.ext] { + ranges: { begin: 0 end: 1 } + element_only: true + } + } +} + +# Calculates region of interest based on pose detection, so that can be used +# to detect landmarks. +node { + calculator: "PoseDetectionToRoi" + input_stream: "DETECTION:pose_detection" + input_stream: "IMAGE_SIZE:image_size_for_pose_detection" + output_stream: "ROI:pose_rect_from_detection" +} + +# Selects either pose rect (or ROI) calculated from detection or from previously +# detected landmarks if available (in this case, calculation of pose rect from +# detection is skipped). +node { + calculator: "MergeCalculator" + input_stream: "pose_rect_from_detection" + input_stream: "gated_prev_pose_rect_from_landmarks" + output_stream: "pose_rect" +} + +# Detects pose landmarks within specified region of interest of the image. +node { + calculator: "PoseLandmarkByRoiCpu" + input_side_packet: "MODEL_COMPLEXITY:model_complexity" + input_side_packet: "ENABLE_SEGMENTATION:enable_segmentation" + input_stream: "IMAGE:image" + input_stream: "ROI:pose_rect" + output_stream: "LANDMARKS:unfiltered_pose_landmarks" + output_stream: "AUXILIARY_LANDMARKS:unfiltered_auxiliary_landmarks" + output_stream: "WORLD_LANDMARKS:unfiltered_world_landmarks" + output_stream: "SEGMENTATION_MASK:unfiltered_segmentation_mask" +} + +# Smoothes landmarks to reduce jitter. +node { + calculator: "PoseLandmarkFiltering" + input_side_packet: "ENABLE:smooth_landmarks" + input_stream: "IMAGE_SIZE:image_size" + input_stream: "NORM_LANDMARKS:unfiltered_pose_landmarks" + input_stream: "AUX_NORM_LANDMARKS:unfiltered_auxiliary_landmarks" + input_stream: "WORLD_LANDMARKS:unfiltered_world_landmarks" + output_stream: "FILTERED_NORM_LANDMARKS:pose_landmarks" + output_stream: "FILTERED_AUX_NORM_LANDMARKS:auxiliary_landmarks" + output_stream: "FILTERED_WORLD_LANDMARKS:pose_world_landmarks" +} + +# Calculates region of interest based on the auxiliary landmarks, to be used in +# the subsequent image. +node { + calculator: "PoseLandmarksToRoi" + input_stream: "LANDMARKS:auxiliary_landmarks" + input_stream: "IMAGE_SIZE:image_size" + output_stream: "ROI:pose_rect_from_landmarks" +} + +# Caches pose rects calculated from landmarks, and upon the arrival of the next +# input image, sends out the cached rects with timestamps replaced by that of +# the input image, essentially generating a packet that carries the previous +# pose rects. Note that upon the arrival of the very first input image, a +# timestamp bound update occurs to jump start the feedback loop. +node { + calculator: "PreviousLoopbackCalculator" + input_stream: "MAIN:image" + input_stream: "LOOP:pose_rect_from_landmarks" + input_stream_info: { + tag_index: "LOOP" + back_edge: true + } + output_stream: "PREV_LOOP:prev_pose_rect_from_landmarks" +} + +# Smoothes segmentation to reduce jitter. +node { + calculator: "PoseSegmentationFiltering" + input_side_packet: "ENABLE:smooth_segmentation" + input_stream: "SEGMENTATION_MASK:unfiltered_segmentation_mask" + output_stream: "FILTERED_SEGMENTATION_MASK:filtered_segmentation_mask" +} + +# Converts the incoming segmentation mask represented as an Image into the +# corresponding ImageFrame type. +node: { + calculator: "FromImageCalculator" + input_stream: "IMAGE:filtered_segmentation_mask" + output_stream: "IMAGE_CPU:segmentation_mask" +} diff --git a/mediapipe/modules/pose_landmark/pose_landmark_filtering.pbtxt b/mediapipe/modules/pose_landmark/pose_landmark_filtering.pbtxt new file mode 100644 index 0000000..bb3665f --- /dev/null +++ b/mediapipe/modules/pose_landmark/pose_landmark_filtering.pbtxt @@ -0,0 +1,247 @@ +# MediaPipe graph to filter landmarks temporally (across packets with +# incremental timestamps) to reduce jitter. +# +# EXAMPLE: +# node { +# calculator: "PoseLandmarkFiltering" +# input_side_packet: "ENABLE:enable" +# input_stream: "IMAGE_SIZE:image_size" +# input_stream: "NORM_LANDMARKS:landmarks" +# input_stream: "AUX_NORM_LANDMARKS:aux_landmarks" +# input_stream: "WORLD_LANDMARKS:world_landmarks" +# output_stream: "FILTERED_NORM_LANDMARKS:filtered_landmarks" +# output_stream: "FILTERED_AUX_NORM_LANDMARKS:filtered_aux_landmarks" +# output_stream: "FILTERED_WORLD_LANDMARKS:filtered_world_landmarks" +# } + +type: "PoseLandmarkFiltering" + +# Whether to enable filtering. If unspecified, functions as enabled. (bool) +input_side_packet: "ENABLE:enable" + +# Size of the image (width & height) where the landmarks are estimated from. +# (std::pair) +input_stream: "IMAGE_SIZE:image_size" +# Normalized landmarks. (NormalizedLandmarkList) +input_stream: "NORM_LANDMARKS:landmarks" +# Auxiliary set of normalized landmarks. (NormalizedLandmarkList) +input_stream: "AUX_NORM_LANDMARKS:aux_landmarks" +# World landmarks. (LandmarkList) +input_stream: "WORLD_LANDMARKS:world_landmarks" +# Filtered normalized landmarks. (NormalizedLandmarkList) +output_stream: "FILTERED_NORM_LANDMARKS:filtered_landmarks" +# Filtered auxiliary set of normalized landmarks. (NormalizedLandmarkList) +output_stream: "FILTERED_AUX_NORM_LANDMARKS:filtered_aux_landmarks" +# Filtered world landmarks. (LandmarkList) +output_stream: "FILTERED_WORLD_LANDMARKS:filtered_world_landmarks" + +# Converts landmarks to a detection that tightly encloses all landmarks. +node { + calculator: "LandmarksToDetectionCalculator" + input_stream: "NORM_LANDMARKS:aux_landmarks" + output_stream: "DETECTION:aux_detection" +} + +# Converts detection into a rectangle based on center and scale alignment +# points. +node { + calculator: "AlignmentPointsRectsCalculator" + input_stream: "DETECTION:aux_detection" + input_stream: "IMAGE_SIZE:image_size" + output_stream: "NORM_RECT:roi" + options: { + [mediapipe.DetectionsToRectsCalculatorOptions.ext] { + rotation_vector_start_keypoint_index: 0 + rotation_vector_end_keypoint_index: 1 + rotation_vector_target_angle_degrees: 90 + } + } +} + +# Smoothes pose landmark visibilities to reduce jitter. +node { + calculator: "SwitchContainer" + input_side_packet: "ENABLE:enable" + input_stream: "NORM_LANDMARKS:landmarks" + output_stream: "NORM_FILTERED_LANDMARKS:filtered_visibility" + options: { + [mediapipe.SwitchContainerOptions.ext] { + enable: true + contained_node: { + calculator: "VisibilitySmoothingCalculator" + options: { + [mediapipe.VisibilitySmoothingCalculatorOptions.ext] { + no_filter: {} + } + } + } + contained_node: { + calculator: "VisibilitySmoothingCalculator" + options: { + [mediapipe.VisibilitySmoothingCalculatorOptions.ext] { + low_pass_filter { + alpha: 0.1 + } + } + } + } + } + } +} + +# Smoothes pose landmark coordinates to reduce jitter. +node { + calculator: "SwitchContainer" + input_side_packet: "ENABLE:enable" + input_stream: "NORM_LANDMARKS:filtered_visibility" + input_stream: "IMAGE_SIZE:image_size" + input_stream: "OBJECT_SCALE_ROI:roi" + output_stream: "NORM_FILTERED_LANDMARKS:filtered_landmarks" + options: { + [mediapipe.SwitchContainerOptions.ext] { + enable: true + contained_node: { + calculator: "LandmarksSmoothingCalculator" + options: { + [mediapipe.LandmarksSmoothingCalculatorOptions.ext] { + no_filter: {} + } + } + } + contained_node: { + calculator: "LandmarksSmoothingCalculator" + options: { + [mediapipe.LandmarksSmoothingCalculatorOptions.ext] { + one_euro_filter { + # Min cutoff 0.1 results into ~0.01 alpha in landmark EMA filter + # when landmark is static. + min_cutoff: 0.05 + # Beta 80.0 in combintation with min_cutoff 0.05 results into + # ~0.94 alpha in landmark EMA filter when landmark is moving fast. + beta: 80.0 + # Derivative cutoff 1.0 results into ~0.17 alpha in landmark + # velocity EMA filter. + derivate_cutoff: 1.0 + } + } + } + } + } + } +} + +# Smoothes world landmark visibilities to reduce jitter. +node { + calculator: "SwitchContainer" + input_side_packet: "ENABLE:enable" + input_stream: "LANDMARKS:world_landmarks" + output_stream: "FILTERED_LANDMARKS:filtered_world_visibility" + options: { + [mediapipe.SwitchContainerOptions.ext] { + enable: true + contained_node: { + calculator: "VisibilitySmoothingCalculator" + options: { + [mediapipe.VisibilitySmoothingCalculatorOptions.ext] { + no_filter: {} + } + } + } + contained_node: { + calculator: "VisibilitySmoothingCalculator" + options: { + [mediapipe.VisibilitySmoothingCalculatorOptions.ext] { + low_pass_filter { + alpha: 0.1 + } + } + } + } + } + } +} + +# Smoothes world landmark coordinates to reduce jitter. +node { + calculator: "SwitchContainer" + input_side_packet: "ENABLE:enable" + input_stream: "LANDMARKS:filtered_world_visibility" + output_stream: "FILTERED_LANDMARKS:filtered_world_landmarks" + options: { + [mediapipe.SwitchContainerOptions.ext] { + enable: true + contained_node: { + calculator: "LandmarksSmoothingCalculator" + options: { + [mediapipe.LandmarksSmoothingCalculatorOptions.ext] { + no_filter: {} + } + } + } + contained_node: { + calculator: "LandmarksSmoothingCalculator" + options: { + [mediapipe.LandmarksSmoothingCalculatorOptions.ext] { + one_euro_filter { + # Min cutoff 0.1 results into ~ 0.02 alpha in landmark EMA filter + # when landmark is static. + min_cutoff: 0.1 + # Beta 40.0 in combintation with min_cutoff 0.1 results into ~0.8 + # alpha in landmark EMA filter when landmark is moving fast. + beta: 40.0 + # Derivative cutoff 1.0 results into ~0.17 alpha in landmark + # velocity EMA filter. + derivate_cutoff: 1.0 + # As world landmdarks are predicted in real world 3D coordintates + # in meters (rather than in pixels of input image) prediction + # scale does not depend on the pose size in the image. + disable_value_scaling: true + } + } + } + } + } + } +} + +# Smoothes auxiliary landmark visibilities to reduce jitter. +node { + calculator: "VisibilitySmoothingCalculator" + input_stream: "NORM_LANDMARKS:aux_landmarks" + output_stream: "NORM_FILTERED_LANDMARKS:filtered_aux_visibility" + options: { + [mediapipe.VisibilitySmoothingCalculatorOptions.ext] { + low_pass_filter { + alpha: 0.1 + } + } + } +} + +# Smoothes auxiliary landmarks to reduce jitter. +node { + calculator: "LandmarksSmoothingCalculator" + input_stream: "NORM_LANDMARKS:filtered_aux_visibility" + input_stream: "IMAGE_SIZE:image_size" + input_stream: "OBJECT_SCALE_ROI:roi" + output_stream: "NORM_FILTERED_LANDMARKS:filtered_aux_landmarks" + options: { + [mediapipe.LandmarksSmoothingCalculatorOptions.ext] { + # Auxiliary landmarks are smoothed heavier than main landmarks to + # make ROI crop for pose landmarks prediction very stable when + # object is not moving but responsive enough in case of sudden + # movements. + one_euro_filter { + # Min cutoff 0.01 results into ~0.002 alpha in landmark EMA + # filter when landmark is static. + min_cutoff: 0.01 + # Beta 10.0 in combintation with min_cutoff 0.01 results into ~0.68 + # alpha in landmark EMA filter when landmark is moving fast. + beta: 10.0 + # Derivative cutoff 1.0 results into ~0.17 alpha in landmark + # velocity EMA filter. + derivate_cutoff: 1.0 + } + } + } +} diff --git a/mediapipe/modules/pose_landmark/pose_landmark_full.tflite b/mediapipe/modules/pose_landmark/pose_landmark_full.tflite new file mode 100755 index 0000000..e2ee84f Binary files /dev/null and b/mediapipe/modules/pose_landmark/pose_landmark_full.tflite differ diff --git a/mediapipe/modules/pose_landmark/pose_landmark_gpu.pbtxt b/mediapipe/modules/pose_landmark/pose_landmark_gpu.pbtxt new file mode 100644 index 0000000..3ff9ac9 --- /dev/null +++ b/mediapipe/modules/pose_landmark/pose_landmark_gpu.pbtxt @@ -0,0 +1,268 @@ +# MediaPipe graph to detect/predict pose landmarks. (GPU input, and inference is +# executed on GPU.) This graph tries to skip pose detection as much as possible +# by using previously detected/predicted landmarks for new images. +# +# It is required that "pose_detection.tflite" is available at +# "mediapipe/modules/pose_detection/pose_detection.tflite" +# path during execution. +# +# It is required that "pose_landmark_lite.tflite" or +# "pose_landmark_full.tflite" or "pose_landmark_heavy.tflite" is available at +# "mediapipe/modules/pose_landmark/pose_landmark_lite.tflite" or +# "mediapipe/modules/pose_landmark/pose_landmark_full.tflite" or +# "mediapipe/modules/pose_landmark/pose_landmark_heavy.tflite" +# path respectively during execution, depending on the specification in the +# MODEL_COMPLEXITY input side packet. +# +# EXAMPLE: +# node { +# calculator: "PoseLandmarkGpu" +# input_side_packet: "MODEL_COMPLEXITY:model_complexity" +# input_side_packet: "SMOOTH_LANDMARKS:smooth_landmarks" +# input_side_packet: "ENABLE_SEGMENTATION:enable_segmentation" +# input_side_packet: "SMOOTH_SEGMENTATION:smooth_segmentation" +# input_side_packet: "USE_PREV_LANDMARKS:use_prev_landmarks" +# input_stream: "IMAGE:image" +# output_stream: "LANDMARKS:pose_landmarks" +# output_stream: "SEGMENTATION_MASK:segmentation_mask" +# } + +type: "PoseLandmarkGpu" + +# GPU image. (GpuBuffer) +input_stream: "IMAGE:image" + +# Whether to filter landmarks across different input images to reduce jitter. +# If unspecified, functions as set to true. (bool) +input_side_packet: "SMOOTH_LANDMARKS:smooth_landmarks" + +# Whether to predict the segmentation mask. If unspecified, functions as set to +# false. (bool) +input_side_packet: "ENABLE_SEGMENTATION:enable_segmentation" + +# Whether to filter segmentation mask across different input images to reduce +# jitter. If unspecified, functions as set to true. (bool) +input_side_packet: "SMOOTH_SEGMENTATION:smooth_segmentation" + +# Complexity of the pose landmark model: 0, 1 or 2. Landmark accuracy as well as +# inference latency generally go up with the model complexity. If unspecified, +# functions as set to 1. (int) +input_side_packet: "MODEL_COMPLEXITY:model_complexity" + +# Whether landmarks on the previous image should be used to help localize +# landmarks on the current image. (bool) +input_side_packet: "USE_PREV_LANDMARKS:use_prev_landmarks" + +# Pose landmarks. (NormalizedLandmarkList) +# We have 33 landmarks (see pose_landmark_topology.svg), and there are other +# auxiliary key points. +# 0 - nose +# 1 - left eye (inner) +# 2 - left eye +# 3 - left eye (outer) +# 4 - right eye (inner) +# 5 - right eye +# 6 - right eye (outer) +# 7 - left ear +# 8 - right ear +# 9 - mouth (left) +# 10 - mouth (right) +# 11 - left shoulder +# 12 - right shoulder +# 13 - left elbow +# 14 - right elbow +# 15 - left wrist +# 16 - right wrist +# 17 - left pinky +# 18 - right pinky +# 19 - left index +# 20 - right index +# 21 - left thumb +# 22 - right thumb +# 23 - left hip +# 24 - right hip +# 25 - left knee +# 26 - right knee +# 27 - left ankle +# 28 - right ankle +# 29 - left heel +# 30 - right heel +# 31 - left foot index +# 32 - right foot index +# +# NOTE: if a pose is not present within the given ROI, for this particular +# timestamp there will not be an output packet in the LANDMARKS stream. However, +# the MediaPipe framework will internally inform the downstream calculators of +# the absence of this packet so that they don't wait for it unnecessarily. +output_stream: "LANDMARKS:pose_landmarks" + +# Pose world landmarks. (LandmarkList) +# World landmarks are real-world 3D coordinates in meters with the origin at the +# center between hips. WORLD_LANDMARKS shares the same landmark topology as +# LANDMARKS. However, LANDMARKS provides coordinates (in pixels) of a 3D object +# projected onto the 2D image surface, while WORLD_LANDMARKS provides +# coordinates (in meters) of the 3D object itself. +output_stream: "WORLD_LANDMARKS:pose_world_landmarks" + +# Segmentation mask. (GpuBuffer in RGBA, with the same mask values in R and A) +output_stream: "SEGMENTATION_MASK:segmentation_mask" + +# Extra outputs (for debugging, for instance). +# Detected poses. (Detection) +output_stream: "DETECTION:pose_detection" +# Regions of interest calculated based on landmarks. (NormalizedRect) +output_stream: "ROI_FROM_LANDMARKS:pose_rect_from_landmarks" +# Regions of interest calculated based on pose detections. (NormalizedRect) +output_stream: "ROI_FROM_DETECTION:pose_rect_from_detection" + +# When the optional input side packet "use_prev_landmarks" is either absent or +# set to true, uses the landmarks on the previous image to help localize +# landmarks on the current image. +node { + calculator: "GateCalculator" + input_side_packet: "ALLOW:use_prev_landmarks" + input_stream: "prev_pose_rect_from_landmarks" + output_stream: "gated_prev_pose_rect_from_landmarks" + options: { + [mediapipe.GateCalculatorOptions.ext] { + allow: true + } + } +} + +# Checks if there's previous pose rect calculated from landmarks. +node: { + calculator: "PacketPresenceCalculator" + input_stream: "PACKET:gated_prev_pose_rect_from_landmarks" + output_stream: "PRESENCE:prev_pose_rect_from_landmarks_is_present" +} + +# Calculates size of the image. +node { + calculator: "ImagePropertiesCalculator" + input_stream: "IMAGE_GPU:image" + output_stream: "SIZE:image_size" +} + +# Drops the incoming image if the pose has already been identified from the +# previous image. Otherwise, passes the incoming image through to trigger a new +# round of pose detection. +node { + calculator: "GateCalculator" + input_stream: "image" + input_stream: "image_size" + input_stream: "DISALLOW:prev_pose_rect_from_landmarks_is_present" + output_stream: "image_for_pose_detection" + output_stream: "image_size_for_pose_detection" + options: { + [mediapipe.GateCalculatorOptions.ext] { + empty_packets_as_allow: true + } + } +} + +# Detects poses. +node { + calculator: "PoseDetectionGpu" + input_stream: "IMAGE:image_for_pose_detection" + output_stream: "DETECTIONS:pose_detections" +} + +# Gets the very first detection from "pose_detections" vector. +node { + calculator: "SplitDetectionVectorCalculator" + input_stream: "pose_detections" + output_stream: "pose_detection" + options: { + [mediapipe.SplitVectorCalculatorOptions.ext] { + ranges: { begin: 0 end: 1 } + element_only: true + } + } +} + +# Calculates region of interest based on pose detection, so that can be used +# to detect landmarks. +node { + calculator: "PoseDetectionToRoi" + input_stream: "DETECTION:pose_detection" + input_stream: "IMAGE_SIZE:image_size_for_pose_detection" + output_stream: "ROI:pose_rect_from_detection" +} + +# Selects either pose rect (or ROI) calculated from detection or from previously +# detected landmarks if available (in this case, calculation of pose rect from +# detection is skipped). +node { + calculator: "MergeCalculator" + input_stream: "pose_rect_from_detection" + input_stream: "gated_prev_pose_rect_from_landmarks" + output_stream: "pose_rect" +} + +# Detects pose landmarks within specified region of interest of the image. +node { + calculator: "PoseLandmarkByRoiGpu" + input_side_packet: "MODEL_COMPLEXITY:model_complexity" + input_side_packet: "ENABLE_SEGMENTATION:enable_segmentation" + input_stream: "IMAGE:image" + input_stream: "ROI:pose_rect" + output_stream: "LANDMARKS:unfiltered_pose_landmarks" + output_stream: "AUXILIARY_LANDMARKS:unfiltered_auxiliary_landmarks" + output_stream: "WORLD_LANDMARKS:unfiltered_world_landmarks" + output_stream: "SEGMENTATION_MASK:unfiltered_segmentation_mask" +} + +# Smoothes landmarks to reduce jitter. +node { + calculator: "PoseLandmarkFiltering" + input_side_packet: "ENABLE:smooth_landmarks" + input_stream: "IMAGE_SIZE:image_size" + input_stream: "NORM_LANDMARKS:unfiltered_pose_landmarks" + input_stream: "AUX_NORM_LANDMARKS:unfiltered_auxiliary_landmarks" + input_stream: "WORLD_LANDMARKS:unfiltered_world_landmarks" + output_stream: "FILTERED_NORM_LANDMARKS:pose_landmarks" + output_stream: "FILTERED_AUX_NORM_LANDMARKS:auxiliary_landmarks" + output_stream: "FILTERED_WORLD_LANDMARKS:pose_world_landmarks" +} + +# Calculates region of interest based on the auxiliary landmarks, to be used in +# the subsequent image. +node { + calculator: "PoseLandmarksToRoi" + input_stream: "LANDMARKS:auxiliary_landmarks" + input_stream: "IMAGE_SIZE:image_size" + output_stream: "ROI:pose_rect_from_landmarks" +} + +# Caches pose rects calculated from landmarks, and upon the arrival of the next +# input image, sends out the cached rects with timestamps replaced by that of +# the input image, essentially generating a packet that carries the previous +# pose rects. Note that upon the arrival of the very first input image, a +# timestamp bound update occurs to jump start the feedback loop. +node { + calculator: "PreviousLoopbackCalculator" + input_stream: "MAIN:image" + input_stream: "LOOP:pose_rect_from_landmarks" + input_stream_info: { + tag_index: "LOOP" + back_edge: true + } + output_stream: "PREV_LOOP:prev_pose_rect_from_landmarks" +} + +# Smoothes segmentation to reduce jitter. +node { + calculator: "PoseSegmentationFiltering" + input_side_packet: "ENABLE:smooth_segmentation" + input_stream: "SEGMENTATION_MASK:unfiltered_segmentation_mask" + output_stream: "FILTERED_SEGMENTATION_MASK:filtered_segmentation_mask" +} + +# Converts the incoming segmentation mask represented as an Image into the +# corresponding GpuBuffer type. +node: { + calculator: "FromImageCalculator" + input_stream: "IMAGE:filtered_segmentation_mask" + output_stream: "IMAGE_GPU:segmentation_mask" +} diff --git a/mediapipe/modules/pose_landmark/pose_landmark_heavy.tflite b/mediapipe/modules/pose_landmark/pose_landmark_heavy.tflite new file mode 100755 index 0000000..9b767e7 Binary files /dev/null and b/mediapipe/modules/pose_landmark/pose_landmark_heavy.tflite differ diff --git a/mediapipe/modules/pose_landmark/pose_landmark_lite.tflite b/mediapipe/modules/pose_landmark/pose_landmark_lite.tflite new file mode 100755 index 0000000..280cc72 Binary files /dev/null and b/mediapipe/modules/pose_landmark/pose_landmark_lite.tflite differ diff --git a/mediapipe/modules/pose_landmark/pose_landmark_model_loader.pbtxt b/mediapipe/modules/pose_landmark/pose_landmark_model_loader.pbtxt new file mode 100644 index 0000000..ce7036e --- /dev/null +++ b/mediapipe/modules/pose_landmark/pose_landmark_model_loader.pbtxt @@ -0,0 +1,73 @@ +# MediaPipe graph to load a selected pose landmark TF Lite model. + +type: "PoseLandmarkModelLoader" + +# Complexity of the pose landmark model: 0, 1 or 2. Landmark accuracy as well as +# inference latency generally go up with the model complexity. If unspecified, +# functions as set to 1. (int) +input_side_packet: "MODEL_COMPLEXITY:model_complexity" + +# TF Lite model represented as a FlatBuffer. +# (std::unique_ptr>) +output_side_packet: "MODEL:model" + +# Determines path to the desired pose landmark model file. +node { + calculator: "SwitchContainer" + input_side_packet: "SELECT:model_complexity" + output_side_packet: "PACKET:model_path" + options: { + [mediapipe.SwitchContainerOptions.ext] { + select: 1 + contained_node: { + calculator: "ConstantSidePacketCalculator" + options: { + [mediapipe.ConstantSidePacketCalculatorOptions.ext]: { + packet { + string_value: "mediapipe/modules/pose_landmark/pose_landmark_lite.tflite" + } + } + } + } + contained_node: { + calculator: "ConstantSidePacketCalculator" + options: { + [mediapipe.ConstantSidePacketCalculatorOptions.ext]: { + packet { + string_value: "mediapipe/modules/pose_landmark/pose_landmark_full.tflite" + } + } + } + } + contained_node: { + calculator: "ConstantSidePacketCalculator" + options: { + [mediapipe.ConstantSidePacketCalculatorOptions.ext]: { + packet { + string_value: "mediapipe/modules/pose_landmark/pose_landmark_heavy.tflite" + } + } + } + } + } + } +} + +# Loads the file in the specified path into a blob. +node { + calculator: "LocalFileContentsCalculator" + input_side_packet: "FILE_PATH:model_path" + output_side_packet: "CONTENTS:model_blob" + options: { + [mediapipe.LocalFileContentsCalculatorOptions.ext]: { + text_mode: false + } + } +} + +# Converts the input blob into a TF Lite model. +node { + calculator: "TfLiteModelCalculator" + input_side_packet: "MODEL_BLOB:model_blob" + output_side_packet: "MODEL:model" +} diff --git a/mediapipe/modules/pose_landmark/pose_landmark_topology.svg b/mediapipe/modules/pose_landmark/pose_landmark_topology.svg new file mode 100644 index 0000000..a57269d --- /dev/null +++ b/mediapipe/modules/pose_landmark/pose_landmark_topology.svg @@ -0,0 +1,651 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 6 + 5 + 4 + 1 + 2 + 3 + 0 + 8 + 7 + 10 + 9 + 12 + 11 + 21 + 22 + 20 + 18 + 16 + 14 + 13 + 15 + 17 + 19 + 23 + 24 + 26 + 25 + 27 + 28 + 31 + 32 + 30 + 29 + + diff --git a/mediapipe/modules/pose_landmark/pose_landmarks_and_segmentation_inverse_projection.pbtxt b/mediapipe/modules/pose_landmark/pose_landmarks_and_segmentation_inverse_projection.pbtxt new file mode 100644 index 0000000..eec3b9b --- /dev/null +++ b/mediapipe/modules/pose_landmark/pose_landmarks_and_segmentation_inverse_projection.pbtxt @@ -0,0 +1,125 @@ +# MediaPipe graph projecting the landmarks and segmentation mask defined in a +# local coordinate system within a (potentially letterboxed) ROI back to the +# global coordinate system of the full image that contains the ROI. +# +# EXAMPLE: +# node { +# calculator: "PoseLandmarksAndSegmentationInverseProjection" +# input_stream: "IMAGE_SIZE:image_size" +# input_stream: "NORM_RECT:roi" +# input_stream: "LETTERBOX_PADDING:letterbox_padding" +# input_stream: "MATRIX:transformation_matrix" +# input_stream: "LANDMARKS:roi_landmarks" +# input_stream: "AUXILIARY_LANDMARKS:roi_auxiliary_landmarks" +# input_stream: "WORLD_LANDMARKS:roi_world_landmarks" +# input_stream: "SEGMENTATION_MASK:roi_segmentation_mask" +# output_stream: "LANDMARKS:landmarks" +# output_stream: "AUXILIARY_LANDMARKS:auxiliary_landmarks" +# output_stream: "WORLD_LANDMARKS:world_landmarks" +# output_stream: "SEGMENTATION_MASK:segmentation_mask" +# } + +type: "PoseLandmarksAndSegmentationInverseProjection" + +# Size of the full image (width & height). (std::pair) +input_stream: "IMAGE_SIZE:image_size" + +# ROI within the full image. (NormalizedRect) +input_stream: "NORM_RECT:roi" + +# An array representing the letterbox padding around the ROI from the 4 sides: +# [left, top, right, bottom]. The padding is normalized to [0.f, 1.f] by the +# dimensions of the letterboxed/padded ROI. (std::array) +input_stream: "LETTERBOX_PADDING:letterbox_padding" + +# An array representing a 4x4 row-major-order matrix that maps a point within +# the ROI from the global coordinates of the full image to the local coordinates +# within the letterboxed ROI. (std::array) +input_stream: "MATRIX:transformation_matrix" + +# Input landmarks and segmentation mask in local coordinates within the +# letterboxed ROI, and the corresponding outputs in global coordinates of the +# full image. +# LANDMARKS & AUXILIARY_LANDMARKS (NormalizedLandmarkList) +# WORLD_LANDMARKS (LandmarkList) +# SEGMENTATION_MASK (Image) +input_stream: "LANDMARKS:roi_landmarks" +input_stream: "AUXILIARY_LANDMARKS:roi_auxiliary_landmarks" +input_stream: "WORLD_LANDMARKS:roi_world_landmarks" +input_stream: "SEGMENTATION_MASK:roi_segmentation_mask" +output_stream: "LANDMARKS:landmarks" +output_stream: "AUXILIARY_LANDMARKS:auxiliary_landmarks" +output_stream: "WORLD_LANDMARKS:world_landmarks" +output_stream: "SEGMENTATION_MASK:segmentation_mask" + +# ----------------------------------------------------------------------------- +# LANDMARKS +# ----------------------------------------------------------------------------- + +# Adjusts landmarks (already normalized to [0.f, 1.f]) in the letterboxed ROI +# to the corresponding coordinates with the letterbox removed. +node { + calculator: "LandmarkLetterboxRemovalCalculator" + input_stream: "LANDMARKS:roi_landmarks" + input_stream: "LETTERBOX_PADDING:letterbox_padding" + output_stream: "LANDMARKS:adjusted_landmarks" +} +node { + calculator: "LandmarkLetterboxRemovalCalculator" + input_stream: "LANDMARKS:roi_auxiliary_landmarks" + input_stream: "LETTERBOX_PADDING:letterbox_padding" + output_stream: "LANDMARKS:adjusted_auxiliary_landmarks" +} + +# Projects the landmarks from the letterbox-removed ROI back to the full image. +node { + calculator: "LandmarkProjectionCalculator" + input_stream: "NORM_LANDMARKS:adjusted_landmarks" + input_stream: "NORM_RECT:roi" + output_stream: "NORM_LANDMARKS:landmarks" +} +node { + calculator: "LandmarkProjectionCalculator" + input_stream: "NORM_LANDMARKS:adjusted_auxiliary_landmarks" + input_stream: "NORM_RECT:roi" + output_stream: "NORM_LANDMARKS:auxiliary_landmarks" +} + +# ----------------------------------------------------------------------------- +# WORLD_LANDMARKS +# ----------------------------------------------------------------------------- + +# Projects the world landmarks from the letterboxed ROI to the full image. +node { + calculator: "WorldLandmarkProjectionCalculator" + input_stream: "LANDMARKS:roi_world_landmarks" + input_stream: "NORM_RECT:roi" + output_stream: "LANDMARKS:world_landmarks" +} + +# ----------------------------------------------------------------------------- +# SEGMENTATION_MASK +# ----------------------------------------------------------------------------- + +# Calculates the inverse transformation matrix. +node { + calculator: "InverseMatrixCalculator" + input_stream: "MATRIX:transformation_matrix" + output_stream: "MATRIX:inverse_transformation_matrix" +} + +# Projects the segmentation mask from the letterboxed ROI back to the full +# image. +node { + calculator: "WarpAffineCalculator" + input_stream: "IMAGE:roi_segmentation_mask" + input_stream: "MATRIX:inverse_transformation_matrix" + input_stream: "OUTPUT_SIZE:image_size" + output_stream: "IMAGE:segmentation_mask" + options: { + [mediapipe.WarpAffineCalculatorOptions.ext] { + border_mode: BORDER_ZERO + gpu_origin: TOP_LEFT + } + } +} diff --git a/mediapipe/modules/pose_landmark/pose_landmarks_to_roi.pbtxt b/mediapipe/modules/pose_landmark/pose_landmarks_to_roi.pbtxt new file mode 100644 index 0000000..b1fe0e3 --- /dev/null +++ b/mediapipe/modules/pose_landmark/pose_landmarks_to_roi.pbtxt @@ -0,0 +1,51 @@ +# MediaPipe graph to calculate pose region of interest (ROI) from landmarks +# detected by "PoseLandmarkByRoiCpu" or "PoseLandmarkByRoiGpu". +# +# NOTE: this graph is subject to change and should not be used directly. + +type: "PoseLandmarksToRoi" + +# Normalized landmarks. (NormalizedLandmarkList) +input_stream: "LANDMARKS:landmarks" +# Image size (width & height). (std::pair) +input_stream: "IMAGE_SIZE:image_size" +# ROI according to landmarks. (NormalizedRect) +output_stream: "ROI:roi" + +# Converts landmarks to a detection that tightly encloses all landmarks. +node { + calculator: "LandmarksToDetectionCalculator" + input_stream: "NORM_LANDMARKS:landmarks" + output_stream: "DETECTION:detection" +} + +# Converts detection into a rectangle based on center and scale alignment +# points. +node { + calculator: "AlignmentPointsRectsCalculator" + input_stream: "DETECTION:detection" + input_stream: "IMAGE_SIZE:image_size" + output_stream: "NORM_RECT:raw_roi" + options: { + [mediapipe.DetectionsToRectsCalculatorOptions.ext] { + rotation_vector_start_keypoint_index: 0 + rotation_vector_end_keypoint_index: 1 + rotation_vector_target_angle_degrees: 90 + } + } +} + +# Expands pose rect with marging used during training. +node { + calculator: "RectTransformationCalculator" + input_stream: "NORM_RECT:raw_roi" + input_stream: "IMAGE_SIZE:image_size" + output_stream: "roi" + options: { + [mediapipe.RectTransformationCalculatorOptions.ext] { + scale_x: 1.25 + scale_y: 1.25 + square_long: true + } + } +} diff --git a/mediapipe/modules/pose_landmark/pose_segmentation_filtering.pbtxt b/mediapipe/modules/pose_landmark/pose_segmentation_filtering.pbtxt new file mode 100644 index 0000000..c3882ad --- /dev/null +++ b/mediapipe/modules/pose_landmark/pose_segmentation_filtering.pbtxt @@ -0,0 +1,61 @@ +# MediaPipe graph to filter segmentation masks temporally (across packets with +# incremental timestamps) to reduce jitter. +# +# EXAMPLE: +# node { +# calculator: "PoseSegmentationFiltering" +# input_side_packet: "ENABLE:enable" +# input_stream: "SEGMENTATION_MASK:segmentation_mask" +# output_stream: "FILTERED_SEGMENTATION_MASK:filtered_segmentation_mask" +# } + +type: "PoseSegmentationFiltering" + +# Whether to enable filtering. If unspecified, functions as enabled. (bool) +input_side_packet: "ENABLE:enable" + +# Segmentation mask. (Image) +input_stream: "SEGMENTATION_MASK:segmentation_mask" + +# Filtered segmentation mask. (Image) +output_stream: "FILTERED_SEGMENTATION_MASK:filtered_segmentation_mask" + +# Drops the filtered segmentation mask from the previous frame if filtering is +# not enabled. In that case, the downstream SegmentationSmoothingCalculator +# becomes a simple passthrough. +node { + calculator: "GateCalculator" + input_side_packet: "ALLOW:enable" + input_stream: "prev_filtered_segmentation_mask" + output_stream: "gated_prev_filtered_segmentation_mask" + options: { + [mediapipe.GateCalculatorOptions.ext] { + allow: true + } + } +} + +# Smoothes segmentation to reduce jitter. +node { + calculator: "SegmentationSmoothingCalculator" + input_stream: "MASK:segmentation_mask" + input_stream: "MASK_PREVIOUS:gated_prev_filtered_segmentation_mask" + output_stream: "MASK_SMOOTHED:filtered_segmentation_mask" + options { + [mediapipe.SegmentationSmoothingCalculatorOptions.ext] { + combine_with_previous_ratio: 0.7 + } + } +} + +# Caches the filtered segmentation mask, similar to above for the pose rect. +node { + calculator: "PreviousLoopbackCalculator" + input_stream: "MAIN:segmentation_mask" + input_stream: "LOOP:filtered_segmentation_mask" + input_stream_info: { + tag_index: "LOOP" + back_edge: true + } + output_stream: "PREV_LOOP:prev_filtered_segmentation_mask" +} diff --git a/mediapipe/modules/pose_landmark/tensors_to_pose_landmarks_and_segmentation.pbtxt b/mediapipe/modules/pose_landmark/tensors_to_pose_landmarks_and_segmentation.pbtxt new file mode 100644 index 0000000..ac86233 --- /dev/null +++ b/mediapipe/modules/pose_landmark/tensors_to_pose_landmarks_and_segmentation.pbtxt @@ -0,0 +1,265 @@ +# MediaPipe graph performing tensor post processing to detect/predict pose +# landmarks and segmenation mask. +# +# EXAMPLE: +# node { +# calculator: "TensorsToPoseLandmarksAndSegmentation" +# input_side_packet: "ENABLE_SEGMENTATION:enable_segmentation" +# input_stream: "TENSORS:tensors" +# output_stream: "LANDMARKS:landmarks" +# output_stream: "AUXILIARY_LANDMARKS:auxiliary_landmarks" +# output_stream: "WORLD_LANDMARKS:world_landmarks" +# output_stream: "SEGMENTATION_MASK:segmentation_mask" +# } + +type: "TensorsToPoseLandmarksAndSegmentation" + +# Whether to predict segmentation mask. If unspecified, functions as set to +# false. (bool) +input_side_packet: "ENABLE_SEGMENTATION:enable_segmentation" + +# Tensors from mode inference of +# "mediapipe/modules/pose_landmark/pose_landmark_lite|full|heavy.tflite". +# (std::vector) +# tensors[0]: landmarks +# tensors[1]: pose flag +# tensors[2]: segmentation +# tensors[3]: heatmap +# tensors[4]: world landmarks +input_stream: "TENSORS:tensors" + +# Pose landmarks. (NormalizedLandmarkList) +# We have 33 landmarks (see pose_landmark_topology.svg) and there are other +# auxiliary key points. +# 0 - nose +# 1 - left eye (inner) +# 2 - left eye +# 3 - left eye (outer) +# 4 - right eye (inner) +# 5 - right eye +# 6 - right eye (outer) +# 7 - left ear +# 8 - right ear +# 9 - mouth (left) +# 10 - mouth (right) +# 11 - left shoulder +# 12 - right shoulder +# 13 - left elbow +# 14 - right elbow +# 15 - left wrist +# 16 - right wrist +# 17 - left pinky +# 18 - right pinky +# 19 - left index +# 20 - right index +# 21 - left thumb +# 22 - right thumb +# 23 - left hip +# 24 - right hip +# 25 - left knee +# 26 - right knee +# 27 - left ankle +# 28 - right ankle +# 29 - left heel +# 30 - right heel +# 31 - left foot index +# 32 - right foot index +# +# NOTE: If a pose is not present, for this particular timestamp there will not +# be an output packet in the LANDMARKS stream. However, the MediaPipe framework +# will internally inform the downstream calculators of the absence of this +# packet so that they don't wait for it unnecessarily. +output_stream: "LANDMARKS:landmarks" +# Auxiliary landmarks (e.g., for deriving the ROI in the subsequent image). +# (NormalizedLandmarkList) +output_stream: "AUXILIARY_LANDMARKS:auxiliary_landmarks" + +# Pose world landmarks. (LandmarkList) +# World landmarks are real-world 3D coordinates in meters with the origin at the +# center between hips. WORLD_LANDMARKS shares the same landmark topology as +# LANDMARKS. However, LANDMARKS provides coordinates (in pixels) of a 3D object +# projected onto the 2D image surface, while WORLD_LANDMARKS provides +# coordinates (in meters) of the 3D object itself. +output_stream: "WORLD_LANDMARKS:world_landmarks" + +# Segmentation mask. (Image) +output_stream: "SEGMENTATION_MASK:segmentation_mask" + +# Splits a vector of tensors to multiple vectors according to the ranges +# specified in the option. +node { + calculator: "SplitTensorVectorCalculator" + input_stream: "tensors" + output_stream: "landmark_tensor" + output_stream: "pose_flag_tensor" + output_stream: "segmentation_tensor" + output_stream: "heatmap_tensor" + output_stream: "world_landmark_tensor" + options: { + [mediapipe.SplitVectorCalculatorOptions.ext] { + ranges: { begin: 0 end: 1 } + ranges: { begin: 1 end: 2 } + ranges: { begin: 2 end: 3 } + ranges: { begin: 3 end: 4 } + ranges: { begin: 4 end: 5 } + } + } +} + +# Converts the pose-flag tensor into a float that represents the confidence +# score of pose presence. +node { + calculator: "TensorsToFloatsCalculator" + input_stream: "TENSORS:pose_flag_tensor" + output_stream: "FLOAT:pose_presence_score" +} + +# Applies a threshold to the confidence score to determine whether a pose is +# present. +node { + calculator: "ThresholdingCalculator" + input_stream: "FLOAT:pose_presence_score" + output_stream: "FLAG:pose_presence" + options: { + [mediapipe.ThresholdingCalculatorOptions.ext] { + threshold: 0.5 + } + } +} + +# Drops input tensors if pose is not present. +node { + calculator: "GateCalculator" + input_stream: "landmark_tensor" + input_stream: "world_landmark_tensor" + input_stream: "segmentation_tensor" + input_stream: "heatmap_tensor" + input_stream: "ALLOW:pose_presence" + output_stream: "ensured_landmark_tensor" + output_stream: "ensured_world_landmark_tensor" + output_stream: "ensured_segmentation_tensor" + output_stream: "ensured_heatmap_tensor" +} + +# ----------------------------------------------------------------------------- +# LANDMARKS +# ----------------------------------------------------------------------------- + +# Decodes the landmark tensors into a vector of landmarks, where the landmark +# coordinates are normalized by the spatial dimensions of the tensor. +node { + calculator: "TensorsToLandmarksCalculator" + input_stream: "TENSORS:ensured_landmark_tensor" + output_stream: "NORM_LANDMARKS:raw_landmarks" + options: { + [mediapipe.TensorsToLandmarksCalculatorOptions.ext] { + num_landmarks: 39 + input_image_width: 256 + input_image_height: 256 + visibility_activation: SIGMOID + presence_activation: SIGMOID + } + } +} + +# Refines landmarks with the heatmap tensor. +node { + calculator: "RefineLandmarksFromHeatmapCalculator" + input_stream: "NORM_LANDMARKS:raw_landmarks" + input_stream: "TENSORS:ensured_heatmap_tensor" + output_stream: "NORM_LANDMARKS:all_landmarks" + options: { + [mediapipe.RefineLandmarksFromHeatmapCalculatorOptions.ext] { + kernel_size: 7 + } + } +} + +# Splits the landmarks into two sets: the actual pose landmarks and the +# auxiliary landmarks. +node { + calculator: "SplitNormalizedLandmarkListCalculator" + input_stream: "all_landmarks" + output_stream: "landmarks" + output_stream: "auxiliary_landmarks" + options: { + [mediapipe.SplitVectorCalculatorOptions.ext] { + ranges: { begin: 0 end: 33 } + ranges: { begin: 33 end: 35 } + } + } +} + +# ----------------------------------------------------------------------------- +# WORLD_LANDMARKS +# ----------------------------------------------------------------------------- + +# Decodes the world-landmark tensors into a vector of world landmarks. +node { + calculator: "TensorsToLandmarksCalculator" + input_stream: "TENSORS:ensured_world_landmark_tensor" + output_stream: "LANDMARKS:all_world_landmarks" + options: { + [mediapipe.TensorsToLandmarksCalculatorOptions.ext] { + num_landmarks: 39 + } + } +} + +# Keeps only the actual world landmarks. +node { + calculator: "SplitLandmarkListCalculator" + input_stream: "all_world_landmarks" + output_stream: "world_landmarks_without_visibility" + options: { + [mediapipe.SplitVectorCalculatorOptions.ext] { + ranges: { begin: 0 end: 33 } + } + } +} + +# Reuses the visibility and presence field in pose landmarks for the world +# landmarks. +node { + calculator: "VisibilityCopyCalculator" + input_stream: "NORM_LANDMARKS_FROM:landmarks" + input_stream: "LANDMARKS_TO:world_landmarks_without_visibility" + output_stream: "LANDMARKS_TO:world_landmarks" + options: { + [mediapipe.VisibilityCopyCalculatorOptions.ext] { + copy_visibility: true + copy_presence: true + } + } +} + +# ----------------------------------------------------------------------------- +# SEGMENTATION_MASK +# ----------------------------------------------------------------------------- + +# Drops segmentation tensors if segmentation is not enabled. +node { + calculator: "GateCalculator" + input_side_packet: "ALLOW:enable_segmentation" + input_stream: "ensured_segmentation_tensor" + output_stream: "enabled_segmentation_tensor" + options: { + [mediapipe.GateCalculatorOptions.ext] { + allow: false + } + } +} + +# Decodes the segmentation tensor into a mask image with pixel values in [0, 1] +# (1 for person and 0 for background). +node { + calculator: "TensorsToSegmentationCalculator" + input_stream: "TENSORS:enabled_segmentation_tensor" + output_stream: "MASK:segmentation_mask" + options: { + [mediapipe.TensorsToSegmentationCalculatorOptions.ext] { + activation: SIGMOID + gpu_origin: TOP_LEFT + } + } +} diff --git a/mediapipe/modules/selfie_segmentation/BUILD b/mediapipe/modules/selfie_segmentation/BUILD new file mode 100644 index 0000000..7fc271a --- /dev/null +++ b/mediapipe/modules/selfie_segmentation/BUILD @@ -0,0 +1,99 @@ +# Copyright 2021 The MediaPipe Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +load( + "//mediapipe/framework/tool:mediapipe_graph.bzl", + "mediapipe_simple_subgraph", +) + +licenses(["notice"]) + +package(default_visibility = ["//visibility:public"]) + +mediapipe_simple_subgraph( + name = "selfie_segmentation_model_loader", + graph = "selfie_segmentation_model_loader.pbtxt", + register_as = "SelfieSegmentationModelLoader", + deps = [ + "//mediapipe/calculators/core:constant_side_packet_calculator", + "//mediapipe/calculators/tflite:tflite_model_calculator", + "//mediapipe/calculators/util:local_file_contents_calculator", + "//mediapipe/framework/tool:switch_container", + ], +) + +mediapipe_simple_subgraph( + name = "selfie_segmentation_cpu", + graph = "selfie_segmentation_cpu.pbtxt", + register_as = "SelfieSegmentationCpu", + deps = [ + ":selfie_segmentation_model_loader", + "//mediapipe/calculators/image:image_properties_calculator", + "//mediapipe/calculators/tensor:image_to_tensor_calculator", + "//mediapipe/calculators/tensor:inference_calculator", + "//mediapipe/calculators/tensor:tensors_to_segmentation_calculator", + "//mediapipe/calculators/tflite:tflite_custom_op_resolver_calculator", + "//mediapipe/calculators/util:from_image_calculator", + "//mediapipe/framework/tool:switch_container", + ], +) + +mediapipe_simple_subgraph( + name = "selfie_segmentation_gpu", + graph = "selfie_segmentation_gpu.pbtxt", + register_as = "SelfieSegmentationGpu", + deps = [ + ":selfie_segmentation_model_loader", + "//mediapipe/calculators/image:image_properties_calculator", + "//mediapipe/calculators/tensor:image_to_tensor_calculator", + "//mediapipe/calculators/tensor:inference_calculator", + "//mediapipe/calculators/tensor:tensors_to_segmentation_calculator", + "//mediapipe/calculators/tflite:tflite_custom_op_resolver_calculator", + "//mediapipe/calculators/util:from_image_calculator", + "//mediapipe/framework/tool:switch_container", + ], +) + +mediapipe_simple_subgraph( + name = "selfie_segmentation_cpu_image", + graph = "selfie_segmentation_cpu_image.pbtxt", + register_as = "SelfieSegmentationCpuImage", + deps = [ + ":selfie_segmentation_cpu", + "//mediapipe/calculators/core:flow_limiter_calculator", + "//mediapipe/calculators/image:image_transformation_calculator", + "//mediapipe/calculators/util:from_image_calculator", + "//mediapipe/calculators/util:to_image_calculator", + ], +) + +mediapipe_simple_subgraph( + name = "selfie_segmentation_gpu_image", + graph = "selfie_segmentation_gpu_image.pbtxt", + register_as = "SelfieSegmentationGpuImage", + deps = [ + ":selfie_segmentation_gpu", + "//mediapipe/calculators/core:flow_limiter_calculator", + "//mediapipe/calculators/image:image_transformation_calculator", + "//mediapipe/calculators/util:from_image_calculator", + "//mediapipe/calculators/util:to_image_calculator", + ], +) + +exports_files( + srcs = [ + "selfie_segmentation.tflite", + "selfie_segmentation_landscape.tflite", + ], +) diff --git a/mediapipe/modules/selfie_segmentation/README.md b/mediapipe/modules/selfie_segmentation/README.md new file mode 100644 index 0000000..cd6c5e0 --- /dev/null +++ b/mediapipe/modules/selfie_segmentation/README.md @@ -0,0 +1,6 @@ +# selfie_segmentation + +Subgraphs|Details +:--- | :--- +[`SelfieSegmentationCpu`](https://github.com/google/mediapipe/tree/master/mediapipe/modules/selfie_segmentation/selfie_segmentation_cpu.pbtxt)| Segments the person from background in a selfie image. (CPU input, and inference is executed on CPU.) +[`SelfieSegmentationGpu`](https://github.com/google/mediapipe/tree/master/mediapipe/modules/selfie_segmentation/selfie_segmentation_gpu.pbtxt)| Segments the person from background in a selfie image. (GPU input, and inference is executed on GPU.) diff --git a/mediapipe/modules/selfie_segmentation/selfie_segmentation.tflite b/mediapipe/modules/selfie_segmentation/selfie_segmentation.tflite new file mode 100644 index 0000000..374c072 Binary files /dev/null and b/mediapipe/modules/selfie_segmentation/selfie_segmentation.tflite differ diff --git a/mediapipe/modules/selfie_segmentation/selfie_segmentation_cpu.pbtxt b/mediapipe/modules/selfie_segmentation/selfie_segmentation_cpu.pbtxt new file mode 100644 index 0000000..5918248 --- /dev/null +++ b/mediapipe/modules/selfie_segmentation/selfie_segmentation_cpu.pbtxt @@ -0,0 +1,132 @@ +# MediaPipe graph to perform selfie segmentation. (CPU input, and all processing +# and inference are also performed on CPU) +# +# It is required that "selfie_segmentation.tflite" or +# "selfie_segmentation_landscape.tflite" is available at +# "mediapipe/modules/selfie_segmentation/selfie_segmentation.tflite" +# or +# "mediapipe/modules/selfie_segmentation/selfie_segmentation_landscape.tflite" +# path respectively during execution, depending on the specification in the +# MODEL_SELECTION input side packet. +# +# EXAMPLE: +# node { +# calculator: "SelfieSegmentationCpu" +# input_side_packet: "MODEL_SELECTION:model_selection" +# input_stream: "IMAGE:image" +# output_stream: "SEGMENTATION_MASK:segmentation_mask" +# } + +type: "SelfieSegmentationCpu" + +# CPU image. (ImageFrame) +input_stream: "IMAGE:image" + +# An integer 0 or 1. Use 0 to select a general-purpose model (operating on a +# 256x256 tensor), and 1 to select a model (operating on a 256x144 tensor) more +# optimized for landscape images. If unspecified, functions as set to 0. (int) +input_side_packet: "MODEL_SELECTION:model_selection" + +# Segmentation mask. (ImageFrame in ImageFormat::VEC32F1) +output_stream: "SEGMENTATION_MASK:segmentation_mask" + +# Resizes the input image into a tensor with a dimension desired by the model. +node { + calculator: "SwitchContainer" + input_side_packet: "SELECT:model_selection" + input_stream: "IMAGE:image" + output_stream: "TENSORS:input_tensors" + options: { + [mediapipe.SwitchContainerOptions.ext] { + select: 0 + contained_node: { + calculator: "ImageToTensorCalculator" + options: { + [mediapipe.ImageToTensorCalculatorOptions.ext] { + output_tensor_width: 256 + output_tensor_height: 256 + keep_aspect_ratio: false + output_tensor_float_range { + min: 0.0 + max: 1.0 + } + border_mode: BORDER_ZERO + } + } + } + contained_node: { + calculator: "ImageToTensorCalculator" + options: { + [mediapipe.ImageToTensorCalculatorOptions.ext] { + output_tensor_width: 256 + output_tensor_height: 144 + keep_aspect_ratio: false + output_tensor_float_range { + min: 0.0 + max: 1.0 + } + border_mode: BORDER_ZERO + } + } + } + } + } +} + +# Generates a single side packet containing a TensorFlow Lite op resolver that +# supports custom ops needed by the model used in this graph. +node { + calculator: "TfLiteCustomOpResolverCalculator" + output_side_packet: "op_resolver" +} + +# Loads the selfie segmentation TF Lite model. +node { + calculator: "SelfieSegmentationModelLoader" + input_side_packet: "MODEL_SELECTION:model_selection" + output_side_packet: "MODEL:model" +} + +# Runs model inference on CPU. +node { + calculator: "InferenceCalculator" + input_stream: "TENSORS:input_tensors" + output_stream: "TENSORS:output_tensors" + input_side_packet: "MODEL:model" + input_side_packet: "CUSTOM_OP_RESOLVER:op_resolver" + options: { + [mediapipe.InferenceCalculatorOptions.ext] { + delegate { + xnnpack {} + } + } + } +} + +# Retrieves the size of the input image. +node { + calculator: "ImagePropertiesCalculator" + input_stream: "IMAGE_CPU:image" + output_stream: "SIZE:input_size" +} + +# Processes the output tensors into a segmentation mask that has the same size +# as the input image into the graph. +node { + calculator: "TensorsToSegmentationCalculator" + input_stream: "TENSORS:output_tensors" + input_stream: "OUTPUT_SIZE:input_size" + output_stream: "MASK:mask_image" + options: { + [mediapipe.TensorsToSegmentationCalculatorOptions.ext] { + activation: NONE + } + } +} + +# Converts the incoming Image into the corresponding ImageFrame type. +node: { + calculator: "FromImageCalculator" + input_stream: "IMAGE:mask_image" + output_stream: "IMAGE_CPU:segmentation_mask" +} diff --git a/mediapipe/modules/selfie_segmentation/selfie_segmentation_cpu_image.pbtxt b/mediapipe/modules/selfie_segmentation/selfie_segmentation_cpu_image.pbtxt new file mode 100644 index 0000000..a35ff0e --- /dev/null +++ b/mediapipe/modules/selfie_segmentation/selfie_segmentation_cpu_image.pbtxt @@ -0,0 +1,67 @@ +# MediaPipe graph to perform selfie segmentation. + +type: "SelfieSegmentationCpuImage" + +# Input image. (Image) +input_stream: "IMAGE:image" + +# The throttled input image. (Image) +output_stream: "IMAGE:throttled_image" + +# An integer 0 or 1. Use 0 to select a general-purpose model (operating on a +# 256x256 tensor), and 1 to select a model (operating on a 256x144 tensor) more +# optimized for landscape images. If unspecified, functions as set to 0. (int) +input_side_packet: "MODEL_SELECTION:model_selection" + +# Segmentation mask. (Image) +output_stream: "SEGMENTATION_MASK:segmentation_mask" + +node { + calculator: "FlowLimiterCalculator" + input_stream: "image" + input_stream: "FINISHED:segmentation_mask" + input_stream_info: { + tag_index: "FINISHED" + back_edge: true + } + output_stream: "throttled_image" + options: { + [mediapipe.FlowLimiterCalculatorOptions.ext] { + max_in_flight: 1 + max_in_queue: 1 + } + } +} + +# Converts Image to ImageFrame for SelfieSegmentationCpu to consume. +node { + calculator: "FromImageCalculator" + input_stream: "IMAGE:throttled_image" + output_stream: "IMAGE_CPU:raw_image_frame" + output_stream: "SOURCE_ON_GPU:is_gpu_image" +} + +# TODO: Remove the extra flipping once adopting MlImage. +# If the source images are on gpu, flip the data vertically before sending them +# into SelfieSegmentationCpu. This maybe needed because OpenGL represents images +# assuming the image origin is at the bottom-left corner, whereas MediaPipe in +# general assumes the image origin is at the top-left corner. +node: { + calculator: "ImageTransformationCalculator" + input_stream: "IMAGE:raw_image_frame" + input_stream: "FLIP_VERTICALLY:is_gpu_image" + output_stream: "IMAGE:image_frame" +} + +node { + calculator: "SelfieSegmentationCpu" + input_side_packet: "MODEL_SELECTION:model_selection" + input_stream: "IMAGE:image_frame" + output_stream: "SEGMENTATION_MASK:segmentation_mask_image_frame" +} + +node { + calculator: "ToImageCalculator" + input_stream: "IMAGE_CPU:segmentation_mask_image_frame" + output_stream: "IMAGE:segmentation_mask" +} diff --git a/mediapipe/modules/selfie_segmentation/selfie_segmentation_gpu.pbtxt b/mediapipe/modules/selfie_segmentation/selfie_segmentation_gpu.pbtxt new file mode 100644 index 0000000..5f9e55e --- /dev/null +++ b/mediapipe/modules/selfie_segmentation/selfie_segmentation_gpu.pbtxt @@ -0,0 +1,133 @@ +# MediaPipe graph to perform selfie segmentation. (GPU input, and all processing +# and inference are also performed on GPU) +# +# It is required that "selfie_segmentation.tflite" or +# "selfie_segmentation_landscape.tflite" is available at +# "mediapipe/modules/selfie_segmentation/selfie_segmentation.tflite" +# or +# "mediapipe/modules/selfie_segmentation/selfie_segmentation_landscape.tflite" +# path respectively during execution, depending on the specification in the +# MODEL_SELECTION input side packet. +# +# EXAMPLE: +# node { +# calculator: "SelfieSegmentationGpu" +# input_side_packet: "MODEL_SELECTION:model_selection" +# input_stream: "IMAGE:image" +# output_stream: "SEGMENTATION_MASK:segmentation_mask" +# } + +type: "SelfieSegmentationGpu" + +# GPU image. (GpuBuffer) +input_stream: "IMAGE:image" + +# An integer 0 or 1. Use 0 to select a general-purpose model (operating on a +# 256x256 tensor), and 1 to select a model (operating on a 256x144 tensor) more +# optimized for landscape images. If unspecified, functions as set to 0. (int) +input_side_packet: "MODEL_SELECTION:model_selection" + +# Segmentation mask. (GpuBuffer in RGBA, with the same mask values in R and A) +output_stream: "SEGMENTATION_MASK:segmentation_mask" + +# Resizes the input image into a tensor with a dimension desired by the model. +node { + calculator: "SwitchContainer" + input_side_packet: "SELECT:model_selection" + input_stream: "IMAGE_GPU:image" + output_stream: "TENSORS:input_tensors" + options: { + [mediapipe.SwitchContainerOptions.ext] { + select: 0 + contained_node: { + calculator: "ImageToTensorCalculator" + options: { + [mediapipe.ImageToTensorCalculatorOptions.ext] { + output_tensor_width: 256 + output_tensor_height: 256 + keep_aspect_ratio: false + output_tensor_float_range { + min: 0.0 + max: 1.0 + } + border_mode: BORDER_ZERO + gpu_origin: TOP_LEFT + } + } + } + contained_node: { + calculator: "ImageToTensorCalculator" + options: { + [mediapipe.ImageToTensorCalculatorOptions.ext] { + output_tensor_width: 256 + output_tensor_height: 144 + keep_aspect_ratio: false + output_tensor_float_range { + min: 0.0 + max: 1.0 + } + border_mode: BORDER_ZERO + gpu_origin: TOP_LEFT + } + } + } + } + } +} + +# Generates a single side packet containing a TensorFlow Lite op resolver that +# supports custom ops needed by the model used in this graph. +node { + calculator: "TfLiteCustomOpResolverCalculator" + output_side_packet: "op_resolver" + options: { + [mediapipe.TfLiteCustomOpResolverCalculatorOptions.ext] { + use_gpu: true + } + } +} + +# Loads the selfie segmentation TF Lite model. +node { + calculator: "SelfieSegmentationModelLoader" + input_side_packet: "MODEL_SELECTION:model_selection" + output_side_packet: "MODEL:model" +} + +# Runs model inference on GPU. +node { + calculator: "InferenceCalculator" + input_stream: "TENSORS:input_tensors" + output_stream: "TENSORS:output_tensors" + input_side_packet: "MODEL:model" + input_side_packet: "CUSTOM_OP_RESOLVER:op_resolver" +} + +# Retrieves the size of the input image. +node { + calculator: "ImagePropertiesCalculator" + input_stream: "IMAGE_GPU:image" + output_stream: "SIZE:input_size" +} + +# Processes the output tensors into a segmentation mask that has the same size +# as the input image into the graph. +node { + calculator: "TensorsToSegmentationCalculator" + input_stream: "TENSORS:output_tensors" + input_stream: "OUTPUT_SIZE:input_size" + output_stream: "MASK:mask_image" + options: { + [mediapipe.TensorsToSegmentationCalculatorOptions.ext] { + activation: NONE + gpu_origin: TOP_LEFT + } + } +} + +# Converts the incoming Image into the corresponding GpuBuffer type. +node: { + calculator: "FromImageCalculator" + input_stream: "IMAGE:mask_image" + output_stream: "IMAGE_GPU:segmentation_mask" +} diff --git a/mediapipe/modules/selfie_segmentation/selfie_segmentation_gpu_image.pbtxt b/mediapipe/modules/selfie_segmentation/selfie_segmentation_gpu_image.pbtxt new file mode 100644 index 0000000..d5c0935 --- /dev/null +++ b/mediapipe/modules/selfie_segmentation/selfie_segmentation_gpu_image.pbtxt @@ -0,0 +1,67 @@ +# MediaPipe graph to perform selfie segmentation. + +type: "SelfieSegmentationGpuImage" + +# Input image. (Image) +input_stream: "IMAGE:image" + +# The throttled input image. (Image) +output_stream: "IMAGE:throttled_image" + +# An integer 0 or 1. Use 0 to select a general-purpose model (operating on a +# 256x256 tensor), and 1 to select a model (operating on a 256x144 tensor) more +# optimized for landscape images. If unspecified, functions as set to 0. (int) +input_side_packet: "MODEL_SELECTION:model_selection" + +# Segmentation mask. (Image) +output_stream: "SEGMENTATION_MASK:segmentation_mask" + +node { + calculator: "FlowLimiterCalculator" + input_stream: "image" + input_stream: "FINISHED:segmentation_mask" + input_stream_info: { + tag_index: "FINISHED" + back_edge: true + } + output_stream: "throttled_image" + options: { + [mediapipe.FlowLimiterCalculatorOptions.ext] { + max_in_flight: 1 + max_in_queue: 1 + } + } +} + +# Converts Image to ImageFrame for SelfieSegmentationGpu to consume. +node { + calculator: "FromImageCalculator" + input_stream: "IMAGE:throttled_image" + output_stream: "IMAGE_GPU:raw_gpu_buffer" + output_stream: "SOURCE_ON_GPU:is_gpu_image" +} + +# TODO: Remove the extra flipping once adopting MlImage. +# If the source images are on gpu, flip the data vertically before sending them +# into SelfieSegmentationGpu. This maybe needed because OpenGL represents images +# assuming the image origin is at the bottom-left corner, whereas MediaPipe in +# general assumes the image origin is at the top-left corner. +node: { + calculator: "ImageTransformationCalculator" + input_stream: "IMAGE_GPU:raw_gpu_buffer" + input_stream: "FLIP_VERTICALLY:is_gpu_image" + output_stream: "IMAGE_GPU:gpu_buffer" +} + +node { + calculator: "SelfieSegmentationGpu" + input_side_packet: "MODEL_SELECTION:model_selection" + input_stream: "IMAGE:gpu_buffer" + output_stream: "SEGMENTATION_MASK:segmentation_mask_gpu_buffer" +} + +node { + calculator: "ToImageCalculator" + input_stream: "IMAGE_GPU:segmentation_mask_gpu_buffer" + output_stream: "IMAGE:segmentation_mask" +} diff --git a/mediapipe/modules/selfie_segmentation/selfie_segmentation_landscape.tflite b/mediapipe/modules/selfie_segmentation/selfie_segmentation_landscape.tflite new file mode 100755 index 0000000..4ea3f8a Binary files /dev/null and b/mediapipe/modules/selfie_segmentation/selfie_segmentation_landscape.tflite differ diff --git a/mediapipe/modules/selfie_segmentation/selfie_segmentation_model_loader.pbtxt b/mediapipe/modules/selfie_segmentation/selfie_segmentation_model_loader.pbtxt new file mode 100644 index 0000000..39495f8 --- /dev/null +++ b/mediapipe/modules/selfie_segmentation/selfie_segmentation_model_loader.pbtxt @@ -0,0 +1,63 @@ +# MediaPipe graph to load a selected selfie segmentation TF Lite model. + +type: "SelfieSegmentationModelLoader" + +# An integer 0 or 1. Use 0 to select a general-purpose model (operating on a +# 256x256 tensor), and 1 to select a model (operating on a 256x144 tensor) more +# optimized for landscape images. If unspecified, functions as set to 0. (int) +input_side_packet: "MODEL_SELECTION:model_selection" + +# TF Lite model represented as a FlatBuffer. +# (std::unique_ptr>) +output_side_packet: "MODEL:model" + +# Determines path to the desired pose landmark model file. +node { + calculator: "SwitchContainer" + input_side_packet: "SELECT:model_selection" + output_side_packet: "PACKET:model_path" + options: { + [mediapipe.SwitchContainerOptions.ext] { + select: 0 + contained_node: { + calculator: "ConstantSidePacketCalculator" + options: { + [mediapipe.ConstantSidePacketCalculatorOptions.ext]: { + packet { + string_value: "mediapipe/modules/selfie_segmentation/selfie_segmentation.tflite" + } + } + } + } + contained_node: { + calculator: "ConstantSidePacketCalculator" + options: { + [mediapipe.ConstantSidePacketCalculatorOptions.ext]: { + packet { + string_value: "mediapipe/modules/selfie_segmentation/selfie_segmentation_landscape.tflite" + } + } + } + } + } + } +} + +# Loads the file in the specified path into a blob. +node { + calculator: "LocalFileContentsCalculator" + input_side_packet: "FILE_PATH:model_path" + output_side_packet: "CONTENTS:model_blob" + options: { + [mediapipe.LocalFileContentsCalculatorOptions.ext]: { + text_mode: false + } + } +} + +# Converts the input blob into a TF Lite model. +node { + calculator: "TfLiteModelCalculator" + input_side_packet: "MODEL_BLOB:model_blob" + output_side_packet: "MODEL:model" +}