git ignore mediapipe and update readme to use symlink

This commit is contained in:
Jules Youngberg 2022-06-20 19:55:03 -07:00
parent bc2c8bb86c
commit c470510a19
212 changed files with 3 additions and 29077 deletions

1
.gitignore vendored
View File

@ -9,3 +9,4 @@ Cargo.lock
# These are backup files generated by rustfmt # These are backup files generated by rustfmt
**/*.rs.bk **/*.rs.bk
/refs/ /refs/
mediapipe

View File

@ -47,11 +47,10 @@ Add the following to your dependencies list in `Cargo.toml`:
mediapipe = { git = "https://github.com/julesyoungberg/mediapipe-rs" } mediapipe = { git = "https://github.com/julesyoungberg/mediapipe-rs" }
``` ```
Mediapipe relies on tflite files which must be available at `./mediapipe/modules/`. The easiest way to do this is Mediapipe relies on tflite files which must be available at `./mediapipe/modules/`. The easiest way to satisfy this is by creating a symbolic link to mediapipe. Run the following command from the project directory.
```shell ```shell
mkdir mediapipe ln -s ../mediapipe/mediapipe .
cp -R ../mediapipe/mediapipe/modules ./mediapipe/modules
``` ```
The path to mediapipe may be different depending on where you have cloned it to. The path to mediapipe may be different depending on where you have cloned it to.

View File

@ -1,18 +0,0 @@
# Modules
Each module (represented as a subfolder) provides subgraphs and corresponding resources (e.g. tflite models) to perform domain-specific tasks (e.g. detect faces, detect face landmarks).
*Modules listed below are already used in some of `mediapipe/graphs` and more graphs are being migrated to use existing and upcoming modules.*
| Module | Description |
| :--- | :--- |
| [`face_detection`](face_detection/README.md) | Subgraphs to detect faces. |
| [`face_geometry`](face_geometry/README.md) | Subgraphs to extract face geometry. |
| [`face_landmark`](face_landmark/README.md) | Subgraphs to detect and track face landmarks. |
| [`hand_landmark`](hand_landmark/README.md) | Subgraphs to detect and track hand landmarks. |
| [`holistic_landmark`](holistic_landmark/README.md) | Subgraphs to detect and track holistic pose which consists of pose, face and hand landmarks. |
| [`iris_landmark`](iris_landmark/README.md) | Subgraphs to detect iris landmarks. |
| [`palm_detection`](palm_detection/README.md) | Subgraphs to detect palms/hands. |
| [`pose_detection`](pose_detection/README.md) | Subgraphs to detect poses. |
| [`pose_landmark`](pose_landmark/README.md) | Subgraphs to detect and track pose landmarks. |
| [`objectron`](objectron/README.md) | Subgraphs to detect and track 3D objects. |

View File

@ -1,150 +0,0 @@
# Copyright 2019 The MediaPipe Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
load(
"//mediapipe/framework/tool:mediapipe_graph.bzl",
"mediapipe_simple_subgraph",
)
licenses(["notice"])
package(default_visibility = ["//visibility:public"])
mediapipe_simple_subgraph(
name = "face_detection_short_range_by_roi_cpu",
graph = "face_detection_short_range_by_roi_cpu.pbtxt",
register_as = "FaceDetectionShortRangeByRoiCpu",
deps = [
":face_detection_short_range_common",
"//mediapipe/calculators/tensor:image_to_tensor_calculator",
"//mediapipe/calculators/tensor:inference_calculator",
"//mediapipe/calculators/util:to_image_calculator",
],
)
mediapipe_simple_subgraph(
name = "face_detection_short_range_by_roi_gpu",
graph = "face_detection_short_range_by_roi_gpu.pbtxt",
register_as = "FaceDetectionShortRangeByRoiGpu",
deps = [
":face_detection_short_range_common",
"//mediapipe/calculators/tensor:image_to_tensor_calculator",
"//mediapipe/calculators/tensor:inference_calculator",
"//mediapipe/calculators/util:to_image_calculator",
],
)
mediapipe_simple_subgraph(
name = "face_detection_short_range_cpu",
graph = "face_detection_short_range_cpu.pbtxt",
register_as = "FaceDetectionShortRangeCpu",
deps = [
":face_detection_short_range_common",
"//mediapipe/calculators/tensor:image_to_tensor_calculator",
"//mediapipe/calculators/tensor:inference_calculator",
"//mediapipe/calculators/util:to_image_calculator",
],
)
mediapipe_simple_subgraph(
name = "face_detection_short_range_gpu",
graph = "face_detection_short_range_gpu.pbtxt",
register_as = "FaceDetectionShortRangeGpu",
deps = [
":face_detection_short_range_common",
"//mediapipe/calculators/tensor:image_to_tensor_calculator",
"//mediapipe/calculators/tensor:inference_calculator",
"//mediapipe/calculators/util:to_image_calculator",
],
)
mediapipe_simple_subgraph(
name = "face_detection_short_range_common",
graph = "face_detection_short_range_common.pbtxt",
register_as = "FaceDetectionShortRangeCommon",
deps = [
"//mediapipe/calculators/tensor:tensors_to_detections_calculator",
"//mediapipe/calculators/tflite:ssd_anchors_calculator",
"//mediapipe/calculators/util:detection_projection_calculator",
"//mediapipe/calculators/util:non_max_suppression_calculator",
],
)
mediapipe_simple_subgraph(
name = "face_detection_full_range_cpu",
graph = "face_detection_full_range_cpu.pbtxt",
register_as = "FaceDetectionFullRangeCpu",
deps = [
":face_detection_full_range_common",
"//mediapipe/calculators/tensor:image_to_tensor_calculator",
"//mediapipe/calculators/tensor:inference_calculator",
"//mediapipe/calculators/util:to_image_calculator",
],
)
mediapipe_simple_subgraph(
name = "face_detection_full_range_gpu",
graph = "face_detection_full_range_gpu.pbtxt",
register_as = "FaceDetectionFullRangeGpu",
deps = [
":face_detection_full_range_common",
"//mediapipe/calculators/tensor:image_to_tensor_calculator",
"//mediapipe/calculators/tensor:inference_calculator",
"//mediapipe/calculators/util:to_image_calculator",
],
)
mediapipe_simple_subgraph(
name = "face_detection_full_range_common",
graph = "face_detection_full_range_common.pbtxt",
register_as = "FaceDetectionFullRangeCommon",
deps = [
"//mediapipe/calculators/tensor:tensors_to_detections_calculator",
"//mediapipe/calculators/tflite:ssd_anchors_calculator",
"//mediapipe/calculators/util:detection_projection_calculator",
"//mediapipe/calculators/util:non_max_suppression_calculator",
],
)
mediapipe_simple_subgraph(
name = "face_detection_short_range_image",
graph = "face_detection_short_range_image.pbtxt",
register_as = "FaceDetectionShortRangeImage",
deps = [
":face_detection_short_range_common",
"//mediapipe/calculators/core:flow_limiter_calculator",
"//mediapipe/calculators/tensor:image_to_tensor_calculator",
"//mediapipe/calculators/tensor:inference_calculator",
],
)
mediapipe_simple_subgraph(
name = "face_detection_full_range_image",
graph = "face_detection_full_range_image.pbtxt",
register_as = "FaceDetectionFullRangeImage",
deps = [
":face_detection_full_range_common",
"//mediapipe/calculators/core:flow_limiter_calculator",
"//mediapipe/calculators/tensor:image_to_tensor_calculator",
"//mediapipe/calculators/tensor:inference_calculator",
],
)
exports_files(
srcs = [
"face_detection_full_range.tflite",
"face_detection_full_range_sparse.tflite",
"face_detection_short_range.tflite",
],
)

View File

@ -1,8 +0,0 @@
# face_detection
Subgraphs|Details
:--- | :---
[`FaceDetectionFullRangeCpu`](https://github.com/google/mediapipe/tree/master/mediapipe/modules/face_detection/face_detection_full_range_cpu.pbtxt)| Detects faces. Works best for faces within 5 meters from the camera. (CPU input, and inference is executed on CPU.)
[`FaceDetectionFullRangeGpu`](https://github.com/google/mediapipe/tree/master/mediapipe/modules/face_detection/face_detection_full_range_gpu.pbtxt)| Detects faces. Works best for faces within 5 meters from the camera. (GPU input, and inference is executed on GPU.)
[`FaceDetectionShortRangeCpu`](https://github.com/google/mediapipe/tree/master/mediapipe/modules/face_detection/face_detection_short_range_cpu.pbtxt)| Detects faces. Works best for faces within 2 meters from the camera. (CPU input, and inference is executed on CPU.)
[`FaceDetectionShortRangeGpu`](https://github.com/google/mediapipe/tree/master/mediapipe/modules/face_detection/face_detection_short_range_gpu.pbtxt)| Detects faces. Works best for faces within 2 meters from the camera. (GPU input, and inference is executed on GPU.)

View File

@ -1,102 +0,0 @@
# MediaPipe graph performing common processing to detect faces using
# face_detection_full_range_sparse.tflite model, currently consisting of tensor
# post processing.
#
# EXAMPLE:
# node {
# calculator: "FaceDetectionFullRangeCommon"
# input_stream: "TENSORS:detection_tensors"
# input_stream: "MATRIX:transform_matrix"
# output_stream: "DETECTIONS:detections"
# }
type: "FaceDetectionShortRangeCommon"
# Detection tensors. (std::vector<Tensor>)
input_stream: "TENSORS:detection_tensors"
# A 4x4 row-major-order matrix that maps a point represented in the detection
# tensors to a desired coordinate system, e.g., in the original input image
# before scaling/cropping. (std::array<float, 16>)
input_stream: "MATRIX:transform_matrix"
# Detected faces. (std::vector<Detection>)
# NOTE: there will not be an output packet in the DETECTIONS stream for this
# particular timestamp if none of faces detected. However, the MediaPipe
# framework will internally inform the downstream calculators of the absence of
# this packet so that they don't wait for it unnecessarily.
output_stream: "DETECTIONS:detections"
# Generates a single side packet containing a vector of SSD anchors based on
# the specification in the options.
node {
calculator: "SsdAnchorsCalculator"
output_side_packet: "anchors"
options: {
[mediapipe.SsdAnchorsCalculatorOptions.ext] {
num_layers: 1
min_scale: 0.1484375
max_scale: 0.75
input_size_height: 192
input_size_width: 192
anchor_offset_x: 0.5
anchor_offset_y: 0.5
strides: 4
aspect_ratios: 1.0
fixed_anchor_size: true
interpolated_scale_aspect_ratio: 0.0
}
}
}
# Decodes the detection tensors generated by the TensorFlow Lite model, based on
# the SSD anchors and the specification in the options, into a vector of
# detections. Each detection describes a detected object.
node {
calculator: "TensorsToDetectionsCalculator"
input_stream: "TENSORS:detection_tensors"
input_side_packet: "ANCHORS:anchors"
output_stream: "DETECTIONS:unfiltered_detections"
options: {
[mediapipe.TensorsToDetectionsCalculatorOptions.ext] {
num_classes: 1
num_boxes: 2304
num_coords: 16
box_coord_offset: 0
keypoint_coord_offset: 4
num_keypoints: 6
num_values_per_keypoint: 2
sigmoid_score: true
score_clipping_thresh: 100.0
reverse_output_order: true
x_scale: 192.0
y_scale: 192.0
h_scale: 192.0
w_scale: 192.0
min_score_thresh: 0.6
}
}
}
# Performs non-max suppression to remove excessive detections.
node {
calculator: "NonMaxSuppressionCalculator"
input_stream: "unfiltered_detections"
output_stream: "filtered_detections"
options: {
[mediapipe.NonMaxSuppressionCalculatorOptions.ext] {
min_suppression_threshold: 0.3
overlap_type: INTERSECTION_OVER_UNION
algorithm: WEIGHTED
}
}
}
# Projects the detections from input tensor to the corresponding locations on
# the original image (input to the graph).
node {
calculator: "DetectionProjectionCalculator"
input_stream: "DETECTIONS:filtered_detections"
input_stream: "PROJECTION_MATRIX:transform_matrix"
output_stream: "DETECTIONS:detections"
}

View File

@ -1,80 +0,0 @@
# MediaPipe graph to detect faces. (CPU input, and inference is executed on
# CPU.)
#
# It is required that "face_detection_full_range_sparse.tflite" is available at
# "mediapipe/modules/face_detection/face_detection_full_range_sparse.tflite"
# path during execution.
#
# EXAMPLE:
# node {
# calculator: "FaceDetectionFullRangeCpu"
# input_stream: "IMAGE:image"
# output_stream: "DETECTIONS:face_detections"
# }
type: "FaceDetectionFullRangeCpu"
# CPU image. (ImageFrame)
input_stream: "IMAGE:image"
# Detected faces. (std::vector<Detection>)
# NOTE: there will not be an output packet in the DETECTIONS stream for this
# particular timestamp if none of faces detected. However, the MediaPipe
# framework will internally inform the downstream calculators of the absence of
# this packet so that they don't wait for it unnecessarily.
output_stream: "DETECTIONS:detections"
# Converts the input CPU image (ImageFrame) to the multi-backend image type
# (Image).
node: {
calculator: "ToImageCalculator"
input_stream: "IMAGE_CPU:image"
output_stream: "IMAGE:multi_backend_image"
}
# Transforms the input image into a 192x192 tensor while keeping the aspect
# ratio (what is expected by the corresponding face detection model), resulting
# in potential letterboxing in the transformed image.
node: {
calculator: "ImageToTensorCalculator"
input_stream: "IMAGE:multi_backend_image"
output_stream: "TENSORS:input_tensors"
output_stream: "MATRIX:transform_matrix"
options: {
[mediapipe.ImageToTensorCalculatorOptions.ext] {
output_tensor_width: 192
output_tensor_height: 192
keep_aspect_ratio: true
output_tensor_float_range {
min: -1.0
max: 1.0
}
border_mode: BORDER_ZERO
}
}
}
# Runs a TensorFlow Lite model on CPU that takes an image tensor and outputs a
# vector of tensors representing, for instance, detection boxes/keypoints and
# scores.
node {
calculator: "InferenceCalculator"
input_stream: "TENSORS:input_tensors"
output_stream: "TENSORS:detection_tensors"
options: {
[mediapipe.InferenceCalculatorOptions.ext] {
model_path: "mediapipe/modules/face_detection/face_detection_full_range_sparse.tflite"
delegate {
xnnpack {}
}
}
}
}
# Performs tensor post processing to generate face detections.
node {
calculator: "FaceDetectionFullRangeCommon"
input_stream: "TENSORS:detection_tensors"
input_stream: "MATRIX:transform_matrix"
output_stream: "DETECTIONS:detections"
}

View File

@ -1,80 +0,0 @@
# MediaPipe graph to detect faces. (GPU input, and inference is executed on
# GPU.)
#
# It is required that "face_detection_full_range_sparse.tflite" is available at
# "mediapipe/modules/face_detection/face_detection_full_range_sparse.tflite"
# path during execution.
#
# EXAMPLE:
# node {
# calculator: "FaceDetectionFullRangeGpu"
# input_stream: "IMAGE:image"
# output_stream: "DETECTIONS:face_detections"
# }
type: "FaceDetectionFullRangeGpu"
# GPU image. (GpuBuffer)
input_stream: "IMAGE:image"
# Detected faces. (std::vector<Detection>)
# NOTE: there will not be an output packet in the DETECTIONS stream for this
# particular timestamp if none of faces detected. However, the MediaPipe
# framework will internally inform the downstream calculators of the absence of
# this packet so that they don't wait for it unnecessarily.
output_stream: "DETECTIONS:detections"
# Converts the input GPU image (GpuBuffer) to the multi-backend image type
# (Image).
node: {
calculator: "ToImageCalculator"
input_stream: "IMAGE_GPU:image"
output_stream: "IMAGE:multi_backend_image"
}
# Transforms the input image into a 128x128 tensor while keeping the aspect
# ratio (what is expected by the corresponding face detection model), resulting
# in potential letterboxing in the transformed image.
node: {
calculator: "ImageToTensorCalculator"
input_stream: "IMAGE:multi_backend_image"
output_stream: "TENSORS:input_tensors"
output_stream: "MATRIX:transform_matrix"
options: {
[mediapipe.ImageToTensorCalculatorOptions.ext] {
output_tensor_width: 192
output_tensor_height: 192
keep_aspect_ratio: true
output_tensor_float_range {
min: -1.0
max: 1.0
}
border_mode: BORDER_ZERO
gpu_origin: TOP_LEFT
}
}
}
# Runs a TensorFlow Lite model on GPU that takes an image tensor and outputs a
# vector of tensors representing, for instance, detection boxes/keypoints and
# scores.
node {
calculator: "InferenceCalculator"
input_stream: "TENSORS:input_tensors"
output_stream: "TENSORS:detection_tensors"
options: {
[mediapipe.InferenceCalculatorOptions.ext] {
model_path: "mediapipe/modules/face_detection/face_detection_full_range_sparse.tflite"
#
delegate: { gpu { use_advanced_gpu_api: true } }
}
}
}
# Performs tensor post processing to generate face detections.
node {
calculator: "FaceDetectionFullRangeCommon"
input_stream: "TENSORS:detection_tensors"
input_stream: "MATRIX:transform_matrix"
output_stream: "DETECTIONS:detections"
}

View File

@ -1,86 +0,0 @@
# MediaPipe graph to detect faces. (GPU/CPU input, and inference is executed on
# GPU.)
#
# It is required that "face_detection_full_range_sparse.tflite" is available at
# "mediapipe/modules/face_detection/face_detection_full_range_sparse.tflite"
# path during execution.
type: "FaceDetectionFullRangeImage"
# Image. (Image)
input_stream: "IMAGE:image"
# The throttled input image. (Image)
output_stream: "IMAGE:throttled_image"
# Detected faces. (std::vector<Detection>)
# NOTE: there will not be an output packet in the DETECTIONS stream for this
# particular timestamp if none of faces detected. However, the MediaPipe
# framework will internally inform the downstream calculators of the absence of
# this packet so that they don't wait for it unnecessarily.
output_stream: "DETECTIONS:detections"
node {
calculator: "FlowLimiterCalculator"
input_stream: "image"
input_stream: "FINISHED:detections"
input_stream_info: {
tag_index: "FINISHED"
back_edge: true
}
output_stream: "throttled_image"
options: {
[mediapipe.FlowLimiterCalculatorOptions.ext] {
max_in_flight: 1
max_in_queue: 1
}
}
}
# Transforms the input image into a 128x128 tensor while keeping the aspect
# ratio (what is expected by the corresponding face detection model), resulting
# in potential letterboxing in the transformed image.
node: {
calculator: "ImageToTensorCalculator"
input_stream: "IMAGE:throttled_image"
output_stream: "TENSORS:input_tensors"
output_stream: "MATRIX:transform_matrix"
options: {
[mediapipe.ImageToTensorCalculatorOptions.ext] {
output_tensor_width: 192
output_tensor_height: 192
keep_aspect_ratio: true
output_tensor_float_range {
min: -1.0
max: 1.0
}
border_mode: BORDER_ZERO
gpu_origin: CONVENTIONAL
}
}
}
# Runs a TensorFlow Lite model on GPU that takes an image tensor and outputs a
# vector of tensors representing, for instance, detection boxes/keypoints and
# scores.
# TODO: Use GraphOptions to modify the delegate field to be
# `delegate { xnnpack {} }` for the CPU only use cases.
node {
calculator: "InferenceCalculator"
input_stream: "TENSORS:input_tensors"
output_stream: "TENSORS:detection_tensors"
options: {
[mediapipe.InferenceCalculatorOptions.ext] {
model_path: "mediapipe/modules/face_detection/face_detection_full_range_sparse.tflite"
#
delegate: { gpu { use_advanced_gpu_api: true } }
}
}
}
# Performs tensor post processing to generate face detections.
node {
calculator: "FaceDetectionFullRangeCommon"
input_stream: "TENSORS:detection_tensors"
input_stream: "MATRIX:transform_matrix"
output_stream: "DETECTIONS:detections"
}

View File

@ -1,83 +0,0 @@
# MediaPipe graph to detect faces. (CPU input, and inference is executed on
# CPU.)
#
# It is required that "face_detection_short_range.tflite" is available at
# "mediapipe/modules/face_detection/face_detection_short_range.tflite"
# path during execution.
#
# EXAMPLE:
# node {
# calculator: "FaceDetectionShortRangeByRoiCpu"
# input_stream: "IMAGE:image"
# input_stream: "ROI:roi"
# output_stream: "DETECTIONS:face_detections"
# }
type: "FaceDetectionShortRangeByRoiCpu"
# CPU image. (ImageFrame)
input_stream: "IMAGE:image"
# ROI (region of interest) within the given image where faces should be
# detected. (NormalizedRect)
input_stream: "ROI:roi"
# Detected faces. (std::vector<Detection>)
# NOTE: there will not be an output packet in the DETECTIONS stream for this
# particular timestamp if none of faces detected. However, the MediaPipe
# framework will internally inform the downstream calculators of the absence of
# this packet so that they don't wait for it unnecessarily.
output_stream: "DETECTIONS:detections"
# Converts the input CPU image (ImageFrame) to the multi-backend image type
# (Image).
node: {
calculator: "ToImageCalculator"
input_stream: "IMAGE_CPU:image"
output_stream: "IMAGE:multi_backend_image"
}
# Transforms specified region of image into 128x128 tensor keeping aspect ratio
# (padding tensor if needed).
node {
calculator: "ImageToTensorCalculator"
input_stream: "IMAGE:multi_backend_image"
input_stream: "NORM_RECT:roi"
output_stream: "TENSORS:input_tensors"
output_stream: "MATRIX:transform_matrix"
options: {
[mediapipe.ImageToTensorCalculatorOptions.ext] {
output_tensor_width: 128
output_tensor_height: 128
keep_aspect_ratio: true
output_tensor_float_range {
min: -1.0
max: 1.0
}
border_mode: BORDER_ZERO
}
}
}
# Runs a TensorFlow Lite model on CPU that takes an image tensor and outputs a
# vector of tensors representing, for instance, detection boxes/keypoints and
# scores.
node {
calculator: "InferenceCalculator"
input_stream: "TENSORS:input_tensors"
output_stream: "TENSORS:detection_tensors"
options: {
[mediapipe.InferenceCalculatorOptions.ext] {
model_path: "mediapipe/modules/face_detection/face_detection_short_range.tflite"
delegate { xnnpack {} }
}
}
}
# Performs tensor post processing to generate face detections.
node {
calculator: "FaceDetectionShortRangeCommon"
input_stream: "TENSORS:detection_tensors"
input_stream: "MATRIX:transform_matrix"
output_stream: "DETECTIONS:detections"
}

View File

@ -1,83 +0,0 @@
# MediaPipe graph to detect faces. (CPU input, and inference is executed on
# CPU.)
#
# It is required that "face_detection_short_range.tflite" is available at
# "mediapipe/modules/face_detection/face_detection_short_range.tflite"
# path during execution.
#
# EXAMPLE:
# node {
# calculator: "FaceDetectionShortRangeByRoiGpu"
# input_stream: "IMAGE:image"
# input_stream: "ROI:roi"
# output_stream: "DETECTIONS:face_detections"
# }
type: "FaceDetectionShortRangeByRoiGpu"
# GPU image. (GpuBuffer)
input_stream: "IMAGE:image"
# ROI (region of interest) within the given image where faces should be
# detected. (NormalizedRect)
input_stream: "ROI:roi"
# Detected faces. (std::vector<Detection>)
# NOTE: there will not be an output packet in the DETECTIONS stream for this
# particular timestamp if none of faces detected. However, the MediaPipe
# framework will internally inform the downstream calculators of the absence of
# this packet so that they don't wait for it unnecessarily.
output_stream: "DETECTIONS:detections"
# Converts the input GPU image (GpuBuffer) to the multi-backend image type
# (Image).
node: {
calculator: "ToImageCalculator"
input_stream: "IMAGE_GPU:image"
output_stream: "IMAGE:multi_backend_image"
}
# Transforms specified region of image into 128x128 tensor keeping aspect ratio
# (padding tensor if needed).
node {
calculator: "ImageToTensorCalculator"
input_stream: "IMAGE:multi_backend_image"
input_stream: "NORM_RECT:roi"
output_stream: "TENSORS:input_tensors"
output_stream: "MATRIX:transform_matrix"
options: {
[mediapipe.ImageToTensorCalculatorOptions.ext] {
output_tensor_width: 128
output_tensor_height: 128
keep_aspect_ratio: true
output_tensor_float_range {
min: -1.0
max: 1.0
}
border_mode: BORDER_ZERO
gpu_origin: TOP_LEFT
}
}
}
# Runs a TensorFlow Lite model on GPU that takes an image tensor and outputs a
# vector of tensors representing, for instance, detection boxes/keypoints and
# scores.
node {
calculator: "InferenceCalculator"
input_stream: "TENSORS:input_tensors"
output_stream: "TENSORS:detection_tensors"
options: {
[mediapipe.InferenceCalculatorOptions.ext] {
model_path: "mediapipe/modules/face_detection/face_detection_short_range.tflite"
}
}
}
# Performs tensor post processing to generate face detections.
node {
calculator: "FaceDetectionShortRangeCommon"
input_stream: "TENSORS:detection_tensors"
input_stream: "MATRIX:transform_matrix"
output_stream: "DETECTIONS:detections"
}

View File

@ -1,103 +0,0 @@
# MediaPipe graph performing common processing to detect faces, currently
# consisting of tensor post processing.
#
# EXAMPLE:
# node {
# calculator: "FaceDetectionShortRangeCommon"
# input_stream: "TENSORS:detection_tensors"
# input_stream: "MATRIX:transform_matrix"
# output_stream: "DETECTIONS:detections"
# }
type: "FaceDetectionShortRangeCommon"
# Detection tensors. (std::vector<Tensor>)
input_stream: "TENSORS:detection_tensors"
# A 4x4 row-major-order matrix that maps a point represented in the detection
# tensors to a desired coordinate system, e.g., in the original input image
# before scaling/cropping. (std::array<float, 16>)
input_stream: "MATRIX:transform_matrix"
# Detected faces. (std::vector<Detection>)
# NOTE: there will not be an output packet in the DETECTIONS stream for this
# particular timestamp if none of faces detected. However, the MediaPipe
# framework will internally inform the downstream calculators of the absence of
# this packet so that they don't wait for it unnecessarily.
output_stream: "DETECTIONS:detections"
# Generates a single side packet containing a vector of SSD anchors based on
# the specification in the options.
node {
calculator: "SsdAnchorsCalculator"
output_side_packet: "anchors"
options: {
[mediapipe.SsdAnchorsCalculatorOptions.ext] {
num_layers: 4
min_scale: 0.1484375
max_scale: 0.75
input_size_height: 128
input_size_width: 128
anchor_offset_x: 0.5
anchor_offset_y: 0.5
strides: 8
strides: 16
strides: 16
strides: 16
aspect_ratios: 1.0
fixed_anchor_size: true
}
}
}
# Decodes the detection tensors generated by the TensorFlow Lite model, based on
# the SSD anchors and the specification in the options, into a vector of
# detections. Each detection describes a detected object.
node {
calculator: "TensorsToDetectionsCalculator"
input_stream: "TENSORS:detection_tensors"
input_side_packet: "ANCHORS:anchors"
output_stream: "DETECTIONS:unfiltered_detections"
options: {
[mediapipe.TensorsToDetectionsCalculatorOptions.ext] {
num_classes: 1
num_boxes: 896
num_coords: 16
box_coord_offset: 0
keypoint_coord_offset: 4
num_keypoints: 6
num_values_per_keypoint: 2
sigmoid_score: true
score_clipping_thresh: 100.0
reverse_output_order: true
x_scale: 128.0
y_scale: 128.0
h_scale: 128.0
w_scale: 128.0
min_score_thresh: 0.5
}
}
}
# Performs non-max suppression to remove excessive detections.
node {
calculator: "NonMaxSuppressionCalculator"
input_stream: "unfiltered_detections"
output_stream: "filtered_detections"
options: {
[mediapipe.NonMaxSuppressionCalculatorOptions.ext] {
min_suppression_threshold: 0.3
overlap_type: INTERSECTION_OVER_UNION
algorithm: WEIGHTED
}
}
}
# Projects the detections from input tensor to the corresponding locations on
# the original image (input to the graph).
node {
calculator: "DetectionProjectionCalculator"
input_stream: "DETECTIONS:filtered_detections"
input_stream: "PROJECTION_MATRIX:transform_matrix"
output_stream: "DETECTIONS:detections"
}

View File

@ -1,78 +0,0 @@
# MediaPipe graph to detect faces. (CPU input, and inference is executed on
# CPU.)
#
# It is required that "face_detection_short_range.tflite" is available at
# "mediapipe/modules/face_detection/face_detection_short_range.tflite"
# path during execution.
#
# EXAMPLE:
# node {
# calculator: "FaceDetectionShortRangeCpu"
# input_stream: "IMAGE:image"
# output_stream: "DETECTIONS:face_detections"
# }
type: "FaceDetectionShortRangeCpu"
# CPU image. (ImageFrame)
input_stream: "IMAGE:image"
# Detected faces. (std::vector<Detection>)
# NOTE: there will not be an output packet in the DETECTIONS stream for this
# particular timestamp if none of faces detected. However, the MediaPipe
# framework will internally inform the downstream calculators of the absence of
# this packet so that they don't wait for it unnecessarily.
output_stream: "DETECTIONS:detections"
# Converts the input CPU image (ImageFrame) to the multi-backend image type
# (Image).
node: {
calculator: "ToImageCalculator"
input_stream: "IMAGE_CPU:image"
output_stream: "IMAGE:multi_backend_image"
}
# Transforms the input image into a 128x128 tensor while keeping the aspect
# ratio (what is expected by the corresponding face detection model), resulting
# in potential letterboxing in the transformed image.
node: {
calculator: "ImageToTensorCalculator"
input_stream: "IMAGE:multi_backend_image"
output_stream: "TENSORS:input_tensors"
output_stream: "MATRIX:transform_matrix"
options: {
[mediapipe.ImageToTensorCalculatorOptions.ext] {
output_tensor_width: 128
output_tensor_height: 128
keep_aspect_ratio: true
output_tensor_float_range {
min: -1.0
max: 1.0
}
border_mode: BORDER_ZERO
}
}
}
# Runs a TensorFlow Lite model on CPU that takes an image tensor and outputs a
# vector of tensors representing, for instance, detection boxes/keypoints and
# scores.
node {
calculator: "InferenceCalculator"
input_stream: "TENSORS:input_tensors"
output_stream: "TENSORS:detection_tensors"
options: {
[mediapipe.InferenceCalculatorOptions.ext] {
model_path: "mediapipe/modules/face_detection/face_detection_short_range.tflite"
delegate { xnnpack {} }
}
}
}
# Performs tensor post processing to generate face detections.
node {
calculator: "FaceDetectionShortRangeCommon"
input_stream: "TENSORS:detection_tensors"
input_stream: "MATRIX:transform_matrix"
output_stream: "DETECTIONS:detections"
}

View File

@ -1,78 +0,0 @@
# MediaPipe graph to detect faces. (CPU input, and inference is executed on
# CPU.)
#
# It is required that "face_detection_short_range.tflite" is available at
# "mediapipe/modules/face_detection/face_detection_short_range.tflite"
# path during execution.
#
# EXAMPLE:
# node {
# calculator: "FaceDetectionShortRangeGpu"
# input_stream: "IMAGE:image"
# output_stream: "DETECTIONS:face_detections"
# }
type: "FaceDetectionShortRangeGpu"
# GPU image. (GpuBuffer)
input_stream: "IMAGE:image"
# Detected faces. (std::vector<Detection>)
# NOTE: there will not be an output packet in the DETECTIONS stream for this
# particular timestamp if none of faces detected. However, the MediaPipe
# framework will internally inform the downstream calculators of the absence of
# this packet so that they don't wait for it unnecessarily.
output_stream: "DETECTIONS:detections"
# Converts the input GPU image (GpuBuffer) to the multi-backend image type
# (Image).
node: {
calculator: "ToImageCalculator"
input_stream: "IMAGE_GPU:image"
output_stream: "IMAGE:multi_backend_image"
}
# Transforms the input image into a 128x128 tensor while keeping the aspect
# ratio (what is expected by the corresponding face detection model), resulting
# in potential letterboxing in the transformed image.
node: {
calculator: "ImageToTensorCalculator"
input_stream: "IMAGE:multi_backend_image"
output_stream: "TENSORS:input_tensors"
output_stream: "MATRIX:transform_matrix"
options: {
[mediapipe.ImageToTensorCalculatorOptions.ext] {
output_tensor_width: 128
output_tensor_height: 128
keep_aspect_ratio: true
output_tensor_float_range {
min: -1.0
max: 1.0
}
border_mode: BORDER_ZERO
gpu_origin: TOP_LEFT
}
}
}
# Runs a TensorFlow Lite model on GPU that takes an image tensor and outputs a
# vector of tensors representing, for instance, detection boxes/keypoints and
# scores.
node {
calculator: "InferenceCalculator"
input_stream: "TENSORS:input_tensors"
output_stream: "TENSORS:detection_tensors"
options: {
[mediapipe.InferenceCalculatorOptions.ext] {
model_path: "mediapipe/modules/face_detection/face_detection_short_range.tflite"
}
}
}
# Performs tensor post processing to generate face detections.
node {
calculator: "FaceDetectionShortRangeCommon"
input_stream: "TENSORS:detection_tensors"
input_stream: "MATRIX:transform_matrix"
output_stream: "DETECTIONS:detections"
}

View File

@ -1,94 +0,0 @@
# MediaPipe graph to detect faces. (GPU/CPU input, and inference is executed on
# GPU.)
#
# It is required that "face_detection_short_range.tflite" is available at
# "mediapipe/modules/face_detection/face_detection_short_range.tflite"
# path during execution.
#
# EXAMPLE:
# node {
# calculator: "FaceDetectionShortRangeCpu"
# input_stream: "IMAGE:image"
# output_stream: "DETECTIONS:face_detections"
# }
type: "FaceDetectionShortRangeCpu"
# Image. (Image)
input_stream: "IMAGE:image"
# The throttled input image. (Image)
output_stream: "IMAGE:throttled_image"
# Detected faces. (std::vector<Detection>)
# NOTE: there will not be an output packet in the DETECTIONS stream for this
# particular timestamp if none of faces detected. However, the MediaPipe
# framework will internally inform the downstream calculators of the absence of
# this packet so that they don't wait for it unnecessarily.
output_stream: "DETECTIONS:detections"
node {
calculator: "FlowLimiterCalculator"
input_stream: "image"
input_stream: "FINISHED:detections"
input_stream_info: {
tag_index: "FINISHED"
back_edge: true
}
output_stream: "throttled_image"
options: {
[mediapipe.FlowLimiterCalculatorOptions.ext] {
max_in_flight: 1
max_in_queue: 1
}
}
}
# Transforms the input image into a 128x128 tensor while keeping the aspect
# ratio (what is expected by the corresponding face detection model), resulting
# in potential letterboxing in the transformed image.
node: {
calculator: "ImageToTensorCalculator"
input_stream: "IMAGE:throttled_image"
output_stream: "TENSORS:input_tensors"
output_stream: "MATRIX:transform_matrix"
options: {
[mediapipe.ImageToTensorCalculatorOptions.ext] {
output_tensor_width: 128
output_tensor_height: 128
keep_aspect_ratio: true
output_tensor_float_range {
min: -1.0
max: 1.0
}
border_mode: BORDER_ZERO
gpu_origin: CONVENTIONAL
}
}
}
# Runs a TensorFlow Lite model on GPU that takes an image tensor and outputs a
# vector of tensors representing, for instance, detection boxes/keypoints and
# scores.
# TODO: Use GraphOptions to modify the delegate field to be
# `delegate { xnnpack {} }` for the CPU only use cases.
node {
calculator: "InferenceCalculator"
input_stream: "TENSORS:input_tensors"
output_stream: "TENSORS:detection_tensors"
options: {
[mediapipe.InferenceCalculatorOptions.ext] {
model_path: "mediapipe/modules/face_detection/face_detection_short_range.tflite"
#
delegate: { gpu { use_advanced_gpu_api: true } }
}
}
}
# Performs tensor post processing to generate face detections.
node {
calculator: "FaceDetectionShortRangeCommon"
input_stream: "TENSORS:detection_tensors"
input_stream: "MATRIX:transform_matrix"
output_stream: "DETECTIONS:detections"
}

View File

@ -1,137 +0,0 @@
# Copyright 2020 The MediaPipe Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
load("//mediapipe/framework/port:build_config.bzl", "mediapipe_proto_library")
load("//mediapipe/framework/tool:mediapipe_graph.bzl", "mediapipe_simple_subgraph")
licenses(["notice"])
package(default_visibility = ["//visibility:public"])
mediapipe_simple_subgraph(
name = "face_geometry",
graph = "face_geometry.pbtxt",
register_as = "FaceGeometry",
deps = [
":geometry_pipeline_calculator",
],
)
mediapipe_simple_subgraph(
name = "face_geometry_from_detection",
graph = "face_geometry_from_detection.pbtxt",
register_as = "FaceGeometryFromDetection",
deps = [
":geometry_pipeline_calculator",
"//mediapipe/calculators/core:begin_loop_calculator",
"//mediapipe/calculators/core:end_loop_calculator",
"//mediapipe/calculators/util:detection_to_landmarks_calculator",
],
)
mediapipe_simple_subgraph(
name = "face_geometry_from_landmarks",
graph = "face_geometry_from_landmarks.pbtxt",
register_as = "FaceGeometryFromLandmarks",
deps = [
":geometry_pipeline_calculator",
],
)
mediapipe_proto_library(
name = "effect_renderer_calculator_proto",
srcs = ["effect_renderer_calculator.proto"],
deps = [
"//mediapipe/framework:calculator_options_proto",
],
)
cc_library(
name = "effect_renderer_calculator",
srcs = ["effect_renderer_calculator.cc"],
deps = [
":effect_renderer_calculator_cc_proto",
"//mediapipe/framework:calculator_framework",
"//mediapipe/framework/formats:image_frame",
"//mediapipe/framework/formats:image_frame_opencv",
"//mediapipe/framework/port:opencv_core",
"//mediapipe/framework/port:opencv_imgcodecs",
"//mediapipe/framework/port:opencv_imgproc",
"//mediapipe/framework/port:ret_check",
"//mediapipe/framework/port:status",
"//mediapipe/framework/port:statusor",
"//mediapipe/gpu:gl_calculator_helper",
"//mediapipe/gpu:gpu_buffer",
"//mediapipe/modules/face_geometry/libs:effect_renderer",
"//mediapipe/modules/face_geometry/libs:validation_utils",
"//mediapipe/modules/face_geometry/protos:environment_cc_proto",
"//mediapipe/modules/face_geometry/protos:face_geometry_cc_proto",
"//mediapipe/modules/face_geometry/protos:mesh_3d_cc_proto",
"//mediapipe/util:resource_util",
"@com_google_absl//absl/types:optional",
],
alwayslink = 1,
)
mediapipe_proto_library(
name = "env_generator_calculator_proto",
srcs = ["env_generator_calculator.proto"],
deps = [
"//mediapipe/framework:calculator_options_proto",
"//mediapipe/modules/face_geometry/protos:environment_proto",
],
)
cc_library(
name = "env_generator_calculator",
srcs = ["env_generator_calculator.cc"],
deps = [
":env_generator_calculator_cc_proto",
"//mediapipe/framework:calculator_framework",
"//mediapipe/framework/port:status",
"//mediapipe/modules/face_geometry/libs:validation_utils",
"//mediapipe/modules/face_geometry/protos:environment_cc_proto",
],
alwayslink = 1,
)
mediapipe_proto_library(
name = "geometry_pipeline_calculator_proto",
srcs = ["geometry_pipeline_calculator.proto"],
deps = [
"//mediapipe/framework:calculator_options_proto",
],
)
cc_library(
name = "geometry_pipeline_calculator",
srcs = ["geometry_pipeline_calculator.cc"],
deps = [
":geometry_pipeline_calculator_cc_proto",
"//mediapipe/framework:calculator_framework",
"//mediapipe/framework/formats:landmark_cc_proto",
"//mediapipe/framework/port:logging",
"//mediapipe/framework/port:ret_check",
"//mediapipe/framework/port:status",
"//mediapipe/framework/port:statusor",
"//mediapipe/modules/face_geometry/libs:geometry_pipeline",
"//mediapipe/modules/face_geometry/libs:validation_utils",
"//mediapipe/modules/face_geometry/protos:environment_cc_proto",
"//mediapipe/modules/face_geometry/protos:face_geometry_cc_proto",
"//mediapipe/modules/face_geometry/protos:geometry_pipeline_metadata_cc_proto",
"//mediapipe/util:resource_util",
"@com_google_absl//absl/memory",
],
alwayslink = 1,
)

View File

@ -1,20 +0,0 @@
# face_geometry
Protos|Details
:--- | :---
[`face_geometry.Environment`](https://github.com/google/mediapipe/tree/master/mediapipe/modules/face_geometry/protos/environment.proto)| Describes an environment; includes the camera frame origin point location as well as virtual camera parameters.
[`face_geometry.GeometryPipelineMetadata`](https://github.com/google/mediapipe/tree/master/mediapipe/modules/face_geometry/protos/geometry_pipeline_metadata.proto)| Describes metadata needed to estimate face geometry based on the face landmark module result.
[`face_geometry.FaceGeometry`](https://github.com/google/mediapipe/tree/master/mediapipe/modules/face_geometry/protos/face_geometry.proto)| Describes geometry data for a single face; includes a face mesh surface and a face pose in a given environment.
[`face_geometry.Mesh3d`](https://github.com/google/mediapipe/tree/master/mediapipe/modules/face_geometry/protos/mesh_3d.proto)| Describes a 3D mesh surface.
Calculators|Details
:--- | :---
[`FaceGeometryEnvGeneratorCalculator`](https://github.com/google/mediapipe/tree/master/mediapipe/modules/face_geometry/env_generator_calculator.cc)| Generates an environment that describes a virtual scene.
[`FaceGeometryPipelineCalculator`](https://github.com/google/mediapipe/tree/master/mediapipe/modules/face_geometry/geometry_pipeline_calculator.cc)| Extracts face geometry for multiple faces from a vector of landmark lists.
[`FaceGeometryEffectRendererCalculator`](https://github.com/google/mediapipe/tree/master/mediapipe/modules/face_geometry/effect_renderer_calculator.cc)| Renders a face effect.
Subgraphs|Details
:--- | :---
[`FaceGeometryFromDetection`](https://github.com/google/mediapipe/tree/master/mediapipe/modules/face_geometry/face_geometry_from_detection.pbtxt)| Extracts geometry from face detection for multiple faces.
[`FaceGeometryFromLandmarks`](https://github.com/google/mediapipe/tree/master/mediapipe/modules/face_geometry/face_geometry_from_landmarks.pbtxt)| Extracts geometry from face landmarks for multiple faces.
[`FaceGeometry`](https://github.com/google/mediapipe/tree/master/mediapipe/modules/face_geometry/face_geometry.pbtxt)| Extracts geometry from face landmarks for multiple faces. Deprecated, please use `FaceGeometryFromLandmarks` in the new code.

View File

@ -1,59 +0,0 @@
# Copyright 2020 The MediaPipe Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
load("//mediapipe/framework:encode_binary_proto.bzl", "encode_binary_proto")
licenses(["notice"])
package(default_visibility = ["//visibility:public"])
encode_binary_proto(
name = "geometry_pipeline_metadata_detection",
input = "geometry_pipeline_metadata_detection.pbtxt",
message_type = "mediapipe.face_geometry.GeometryPipelineMetadata",
output = "geometry_pipeline_metadata_detection.binarypb",
deps = [
"//mediapipe/modules/face_geometry/protos:geometry_pipeline_metadata_proto",
],
)
encode_binary_proto(
name = "geometry_pipeline_metadata_landmarks",
input = "geometry_pipeline_metadata_landmarks.pbtxt",
message_type = "mediapipe.face_geometry.GeometryPipelineMetadata",
output = "geometry_pipeline_metadata_landmarks.binarypb",
deps = [
"//mediapipe/modules/face_geometry/protos:geometry_pipeline_metadata_proto",
],
)
# For backward-compatibility reasons, generate `geometry_pipeline_metadata.binarypb` from
# the `geometry_pipeline_metadata_landmarks.pbtxt` definition.
encode_binary_proto(
name = "geometry_pipeline_metadata",
input = "geometry_pipeline_metadata_landmarks.pbtxt",
message_type = "mediapipe.face_geometry.GeometryPipelineMetadata",
output = "geometry_pipeline_metadata.binarypb",
deps = [
"//mediapipe/modules/face_geometry/protos:geometry_pipeline_metadata_proto",
],
)
# These canonical face model files are not meant to be used in runtime, but rather for asset
# creation and/or reference.
exports_files([
"canonical_face_model.fbx",
"canonical_face_model.obj",
"canonical_face_model_uv_visualization.png",
])

File diff suppressed because it is too large Load Diff

Binary file not shown.

Before

Width:  |  Height:  |  Size: 731 KiB

View File

@ -1,78 +0,0 @@
# Copyright 2020 The MediaPipe Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
input_source: FACE_DETECTION_PIPELINE
procrustes_landmark_basis { landmark_id: 0 weight: 1.0 }
procrustes_landmark_basis { landmark_id: 1 weight: 1.0 }
procrustes_landmark_basis { landmark_id: 2 weight: 1.0 }
procrustes_landmark_basis { landmark_id: 3 weight: 1.0 }
procrustes_landmark_basis { landmark_id: 4 weight: 1.0 }
procrustes_landmark_basis { landmark_id: 5 weight: 1.0 }
# NOTE: the triangular topology of the face meshes is only useful when derived
# from the 468 face landmarks, not from the 6 face detection landmarks
# (keypoints). The former don't cover the entire face and this mesh is
# defined here only to comply with the API. It should be considered as
# a placeholder and/or for debugging purposes.
#
# Use the face geometry derived from the face detection landmarks
# (keypoints) for the face pose transformation matrix, not the mesh.
canonical_mesh: {
vertex_type: VERTEX_PT
primitive_type: TRIANGLE
vertex_buffer: -3.1511454582214355
vertex_buffer: 2.6246179342269897
vertex_buffer: 3.4656630754470825
vertex_buffer: 0.349575996398926
vertex_buffer: 0.38137748837470997
vertex_buffer: 3.1511454582214355
vertex_buffer: 2.6246179342269897
vertex_buffer: 3.4656630754470825
vertex_buffer: 0.650443494319916
vertex_buffer: 0.38137999176979054
vertex_buffer: 0.0
vertex_buffer: -1.126865029335022
vertex_buffer: 7.475604057312012
vertex_buffer: 0.500025987625122
vertex_buffer: 0.547487020492554
vertex_buffer: 0.0
vertex_buffer: -4.304508209228516
vertex_buffer: 4.162498950958252
vertex_buffer: 0.499989986419678
vertex_buffer: 0.694203019142151
vertex_buffer: -7.664182186126709
vertex_buffer: 0.673132002353668
vertex_buffer: -2.435867071151733
vertex_buffer: 0.007561000064015
vertex_buffer: 0.480777025222778
vertex_buffer: 7.664182186126709
vertex_buffer: 0.673132002353668
vertex_buffer: -2.435867071151733
vertex_buffer: 0.992439985275269
vertex_buffer: 0.480777025222778
index_buffer: 0
index_buffer: 1
index_buffer: 2
index_buffer: 1
index_buffer: 5
index_buffer: 2
index_buffer: 4
index_buffer: 0
index_buffer: 2
index_buffer: 4
index_buffer: 2
index_buffer: 3
index_buffer: 2
index_buffer: 5
index_buffer: 3
}

View File

@ -1,284 +0,0 @@
// Copyright 2020 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <memory>
#include <string>
#include <vector>
#include "absl/types/optional.h"
#include "mediapipe/framework/calculator_framework.h"
#include "mediapipe/framework/formats/image_frame.h"
#include "mediapipe/framework/formats/image_frame_opencv.h"
#include "mediapipe/framework/port/opencv_core_inc.h" // NOTYPO
#include "mediapipe/framework/port/opencv_imgcodecs_inc.h" // NOTYPO
#include "mediapipe/framework/port/opencv_imgproc_inc.h" // NOTYPO
#include "mediapipe/framework/port/ret_check.h"
#include "mediapipe/framework/port/status.h"
#include "mediapipe/framework/port/status_macros.h"
#include "mediapipe/framework/port/statusor.h"
#include "mediapipe/gpu/gl_calculator_helper.h"
#include "mediapipe/gpu/gpu_buffer.h"
#include "mediapipe/modules/face_geometry/effect_renderer_calculator.pb.h"
#include "mediapipe/modules/face_geometry/libs/effect_renderer.h"
#include "mediapipe/modules/face_geometry/libs/validation_utils.h"
#include "mediapipe/modules/face_geometry/protos/environment.pb.h"
#include "mediapipe/modules/face_geometry/protos/face_geometry.pb.h"
#include "mediapipe/modules/face_geometry/protos/mesh_3d.pb.h"
#include "mediapipe/util/resource_util.h"
namespace mediapipe {
namespace {
static constexpr char kEnvironmentTag[] = "ENVIRONMENT";
static constexpr char kImageGpuTag[] = "IMAGE_GPU";
static constexpr char kMultiFaceGeometryTag[] = "MULTI_FACE_GEOMETRY";
// A calculator that renders a visual effect for multiple faces.
//
// Inputs:
// IMAGE_GPU (`GpuBuffer`, required):
// A buffer containing input image.
//
// MULTI_FACE_GEOMETRY (`std::vector<face_geometry::FaceGeometry>`, optional):
// A vector of face geometry data.
//
// If absent, the input GPU buffer is copied over into the output GPU buffer
// without any effect being rendered.
//
// Input side packets:
// ENVIRONMENT (`face_geometry::Environment`, required)
// Describes an environment; includes the camera frame origin point location
// as well as virtual camera parameters.
//
// Output:
// IMAGE_GPU (`GpuBuffer`, required):
// A buffer with a visual effect being rendered for multiple faces.
//
// Options:
// effect_texture_path (`string`, required):
// Defines a path for the visual effect texture file. The effect texture is
// later rendered on top of the effect mesh.
//
// The texture file format must be supported by the OpenCV image decoder. It
// must also define either an RGB or an RGBA texture.
//
// effect_mesh_3d_path (`string`, optional):
// Defines a path for the visual effect mesh 3D file. The effect mesh is
// later "attached" to the face and is driven by the face pose
// transformation matrix.
//
// The mesh 3D file format must be the binary `face_geometry.Mesh3d` proto.
//
// If is not present, the runtime face mesh will be used as the effect mesh
// - this mode is handy for facepaint effects.
//
class EffectRendererCalculator : public CalculatorBase {
public:
static absl::Status GetContract(CalculatorContract* cc) {
MP_RETURN_IF_ERROR(mediapipe::GlCalculatorHelper::UpdateContract(cc))
<< "Failed to update contract for the GPU helper!";
cc->InputSidePackets()
.Tag(kEnvironmentTag)
.Set<face_geometry::Environment>();
cc->Inputs().Tag(kImageGpuTag).Set<GpuBuffer>();
cc->Inputs()
.Tag(kMultiFaceGeometryTag)
.Set<std::vector<face_geometry::FaceGeometry>>();
cc->Outputs().Tag(kImageGpuTag).Set<GpuBuffer>();
return mediapipe::GlCalculatorHelper::UpdateContract(cc);
}
absl::Status Open(CalculatorContext* cc) override {
cc->SetOffset(mediapipe::TimestampDiff(0));
MP_RETURN_IF_ERROR(gpu_helper_.Open(cc))
<< "Failed to open the GPU helper!";
return gpu_helper_.RunInGlContext([&]() -> absl::Status {
const auto& options =
cc->Options<FaceGeometryEffectRendererCalculatorOptions>();
const auto& environment = cc->InputSidePackets()
.Tag(kEnvironmentTag)
.Get<face_geometry::Environment>();
MP_RETURN_IF_ERROR(face_geometry::ValidateEnvironment(environment))
<< "Invalid environment!";
absl::optional<face_geometry::Mesh3d> effect_mesh_3d;
if (options.has_effect_mesh_3d_path()) {
ASSIGN_OR_RETURN(effect_mesh_3d,
ReadMesh3dFromFile(options.effect_mesh_3d_path()),
_ << "Failed to read the effect 3D mesh from file!");
MP_RETURN_IF_ERROR(face_geometry::ValidateMesh3d(*effect_mesh_3d))
<< "Invalid effect 3D mesh!";
}
ASSIGN_OR_RETURN(ImageFrame effect_texture,
ReadTextureFromFile(options.effect_texture_path()),
_ << "Failed to read the effect texture from file!");
ASSIGN_OR_RETURN(effect_renderer_,
CreateEffectRenderer(environment, effect_mesh_3d,
std::move(effect_texture)),
_ << "Failed to create the effect renderer!");
return absl::OkStatus();
});
}
absl::Status Process(CalculatorContext* cc) override {
// The `IMAGE_GPU` stream is required to have a non-empty packet. In case
// this requirement is not met, there's nothing to be processed at the
// current timestamp.
if (cc->Inputs().Tag(kImageGpuTag).IsEmpty()) {
return absl::OkStatus();
}
return gpu_helper_.RunInGlContext([this, cc]() -> absl::Status {
const auto& input_gpu_buffer =
cc->Inputs().Tag(kImageGpuTag).Get<GpuBuffer>();
GlTexture input_gl_texture =
gpu_helper_.CreateSourceTexture(input_gpu_buffer);
GlTexture output_gl_texture = gpu_helper_.CreateDestinationTexture(
input_gl_texture.width(), input_gl_texture.height());
std::vector<face_geometry::FaceGeometry> empty_multi_face_geometry;
const auto& multi_face_geometry =
cc->Inputs().Tag(kMultiFaceGeometryTag).IsEmpty()
? empty_multi_face_geometry
: cc->Inputs()
.Tag(kMultiFaceGeometryTag)
.Get<std::vector<face_geometry::FaceGeometry>>();
// Validate input multi face geometry data.
for (const face_geometry::FaceGeometry& face_geometry :
multi_face_geometry) {
MP_RETURN_IF_ERROR(face_geometry::ValidateFaceGeometry(face_geometry))
<< "Invalid face geometry!";
}
MP_RETURN_IF_ERROR(effect_renderer_->RenderEffect(
multi_face_geometry, input_gl_texture.width(),
input_gl_texture.height(), input_gl_texture.target(),
input_gl_texture.name(), output_gl_texture.target(),
output_gl_texture.name()))
<< "Failed to render the effect!";
std::unique_ptr<GpuBuffer> output_gpu_buffer =
output_gl_texture.GetFrame<GpuBuffer>();
cc->Outputs()
.Tag(kImageGpuTag)
.AddPacket(mediapipe::Adopt<GpuBuffer>(output_gpu_buffer.release())
.At(cc->InputTimestamp()));
output_gl_texture.Release();
input_gl_texture.Release();
return absl::OkStatus();
});
}
~EffectRendererCalculator() {
gpu_helper_.RunInGlContext([this]() { effect_renderer_.reset(); });
}
private:
static absl::StatusOr<ImageFrame> ReadTextureFromFile(
const std::string& texture_path) {
ASSIGN_OR_RETURN(std::string texture_blob,
ReadContentBlobFromFile(texture_path),
_ << "Failed to read texture blob from file!");
// Use OpenCV image decoding functionality to finish reading the texture.
std::vector<char> texture_blob_vector(texture_blob.begin(),
texture_blob.end());
cv::Mat decoded_mat =
cv::imdecode(texture_blob_vector, cv::IMREAD_UNCHANGED);
RET_CHECK(decoded_mat.type() == CV_8UC3 || decoded_mat.type() == CV_8UC4)
<< "Texture must have `char` as the underlying type and "
"must have either 3 or 4 channels!";
ImageFormat::Format image_format = ImageFormat::UNKNOWN;
cv::Mat output_mat;
switch (decoded_mat.channels()) {
case 3:
image_format = ImageFormat::SRGB;
cv::cvtColor(decoded_mat, output_mat, cv::COLOR_BGR2RGB);
break;
case 4:
image_format = ImageFormat::SRGBA;
cv::cvtColor(decoded_mat, output_mat, cv::COLOR_BGRA2RGBA);
break;
default:
RET_CHECK_FAIL()
<< "Unexpected number of channels; expected 3 or 4, got "
<< decoded_mat.channels() << "!";
}
ImageFrame output_image_frame(image_format, output_mat.size().width,
output_mat.size().height,
ImageFrame::kGlDefaultAlignmentBoundary);
output_mat.copyTo(formats::MatView(&output_image_frame));
return output_image_frame;
}
static absl::StatusOr<face_geometry::Mesh3d> ReadMesh3dFromFile(
const std::string& mesh_3d_path) {
ASSIGN_OR_RETURN(std::string mesh_3d_blob,
ReadContentBlobFromFile(mesh_3d_path),
_ << "Failed to read mesh 3D blob from file!");
face_geometry::Mesh3d mesh_3d;
RET_CHECK(mesh_3d.ParseFromString(mesh_3d_blob))
<< "Failed to parse a mesh 3D proto from a binary blob!";
return mesh_3d;
}
static absl::StatusOr<std::string> ReadContentBlobFromFile(
const std::string& unresolved_path) {
ASSIGN_OR_RETURN(std::string resolved_path,
mediapipe::PathToResourceAsFile(unresolved_path),
_ << "Failed to resolve path! Path = " << unresolved_path);
std::string content_blob;
MP_RETURN_IF_ERROR(
mediapipe::GetResourceContents(resolved_path, &content_blob))
<< "Failed to read content blob! Resolved path = " << resolved_path;
return content_blob;
}
mediapipe::GlCalculatorHelper gpu_helper_;
std::unique_ptr<face_geometry::EffectRenderer> effect_renderer_;
};
} // namespace
using FaceGeometryEffectRendererCalculator = EffectRendererCalculator;
REGISTER_CALCULATOR(FaceGeometryEffectRendererCalculator);
} // namespace mediapipe

View File

@ -1,46 +0,0 @@
// Copyright 2020 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
syntax = "proto2";
package mediapipe;
import "mediapipe/framework/calculator_options.proto";
message FaceGeometryEffectRendererCalculatorOptions {
extend CalculatorOptions {
optional FaceGeometryEffectRendererCalculatorOptions ext = 323693808;
}
// Defines a path for the visual effect texture file. The effect texture is
// later rendered on top of the effect mesh.
//
// Please be aware about the difference between the CPU texture memory layout
// and the GPU texture sampler coordinate space. This renderer follows
// conventions discussed here: https://open.gl/textures
//
// The texture file format must be supported by the OpenCV image decoder. It
// must also define either an RGB or an RGBA texture.
optional string effect_texture_path = 1;
// Defines a path for the visual effect mesh 3D file. The effect mesh is later
// "attached" to the face and is driven by the face pose transformation
// matrix.
//
// The mesh 3D file format must be the binary `face_system.Mesh3d` proto.
//
// If is not present, the runtime face mesh will be used as the effect mesh
// - this mode is handy for facepaint effects.
optional string effect_mesh_3d_path = 2;
}

View File

@ -1,81 +0,0 @@
// Copyright 2020 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "mediapipe/framework/calculator_framework.h"
#include "mediapipe/framework/port/status.h"
#include "mediapipe/framework/port/status_macros.h"
#include "mediapipe/modules/face_geometry/env_generator_calculator.pb.h"
#include "mediapipe/modules/face_geometry/libs/validation_utils.h"
#include "mediapipe/modules/face_geometry/protos/environment.pb.h"
namespace mediapipe {
namespace {
static constexpr char kEnvironmentTag[] = "ENVIRONMENT";
// A calculator that generates an environment, which describes a virtual scene.
//
// Output side packets:
// ENVIRONMENT (`face_geometry::Environment`, required)
// Describes an environment; includes the camera frame origin point location
// as well as virtual camera parameters.
//
// Options:
// environment (`face_geometry.Environment`, required):
// Defines an environment to be packed as the output side packet.
//
// Must be valid (for details, please refer to the proto message definition
// comments and/or `modules/face_geometry/libs/validation_utils.h/cc`)
//
class EnvGeneratorCalculator : public CalculatorBase {
public:
static absl::Status GetContract(CalculatorContract* cc) {
cc->OutputSidePackets()
.Tag(kEnvironmentTag)
.Set<face_geometry::Environment>();
return absl::OkStatus();
}
absl::Status Open(CalculatorContext* cc) override {
cc->SetOffset(mediapipe::TimestampDiff(0));
const face_geometry::Environment& environment =
cc->Options<FaceGeometryEnvGeneratorCalculatorOptions>().environment();
MP_RETURN_IF_ERROR(face_geometry::ValidateEnvironment(environment))
<< "Invalid environment!";
cc->OutputSidePackets()
.Tag(kEnvironmentTag)
.Set(mediapipe::MakePacket<face_geometry::Environment>(environment));
return absl::OkStatus();
}
absl::Status Process(CalculatorContext* cc) override {
return absl::OkStatus();
}
absl::Status Close(CalculatorContext* cc) override {
return absl::OkStatus();
}
};
} // namespace
using FaceGeometryEnvGeneratorCalculator = EnvGeneratorCalculator;
REGISTER_CALCULATOR(FaceGeometryEnvGeneratorCalculator);
} // namespace mediapipe

View File

@ -1,32 +0,0 @@
// Copyright 2020 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
syntax = "proto2";
package mediapipe;
import "mediapipe/framework/calculator_options.proto";
import "mediapipe/modules/face_geometry/protos/environment.proto";
message FaceGeometryEnvGeneratorCalculatorOptions {
extend CalculatorOptions {
optional FaceGeometryEnvGeneratorCalculatorOptions ext = 323693810;
}
// Defines an environment to be packed as the output side packet.
//
// Must be valid (for details, please refer to the proto message definition
// comments and/or `modules/face_geometry/libs/validation_utils.h/cc`)
optional face_geometry.Environment environment = 1;
}

View File

@ -1,48 +0,0 @@
# MediaPipe graph to extract geometry from face landmarks for multiple faces.
#
# It is required that "geometry_pipeline_metadata.binarypb" is available at
# "mediapipe/modules/face_geometry/data/geometry_pipeline_metadata.binarypb"
# path during execution.
#
# This is a deprecated subgraph kept for backward-compatibility reasons. Please,
# be explicit and use the `FaceGeometryFromLandmarks` subgraph in the new code
# to enable the same runtime behaviour.
type: "FaceGeometry"
# The size of the input frame. The first element of the pair is the frame width;
# the other one is the frame height.
#
# The face landmarks should have been detected on a frame with the same
# ratio. If used as-is, the resulting face geometry visualization should be
# happening on a frame with the same ratio as well.
#
# (std::pair<int, int>)
input_stream: "IMAGE_SIZE:image_size"
# Collection of detected/predicted faces, each represented as a list of face
# landmarks. (std::vector<NormalizedLandmarkList>)
input_stream: "MULTI_FACE_LANDMARKS:multi_face_landmarks"
# Environment that describes the current virtual scene.
# (face_geometry::Environment)
input_side_packet: "ENVIRONMENT:environment"
# A list of geometry data for each detected face.
# (std::vector<face_geometry::FaceGeometry>)
output_stream: "MULTI_FACE_GEOMETRY:multi_face_geometry"
# Extracts face geometry for multiple faces from a vector of face landmark
# lists.
node {
calculator: "FaceGeometryPipelineCalculator"
input_side_packet: "ENVIRONMENT:environment"
input_stream: "IMAGE_SIZE:image_size"
input_stream: "MULTI_FACE_LANDMARKS:multi_face_landmarks"
output_stream: "MULTI_FACE_GEOMETRY:multi_face_geometry"
options: {
[mediapipe.FaceGeometryPipelineCalculatorOptions.ext] {
metadata_path: "mediapipe/modules/face_geometry/data/geometry_pipeline_metadata.binarypb"
}
}
}

View File

@ -1,87 +0,0 @@
# MediaPipe graph to extract geometry from face detection for multiple faces.
#
# It is required that "geometry_pipeline_metadata_detection.binarypb" is
# available at
# "mediapipe/modules/face_geometry/data/geometry_pipeline_metadata_detection.binarypb"
# path during execution.
#
# EXAMPLE:
# node {
# calculator: "FaceGeometryFromDetection"
# input_stream: "IMAGE_SIZE:image_size"
# input_stream: "MULTI_FACE_DETECTION:multi_face_detection"
# input_side_packet: "ENVIRONMENT:environment"
# output_stream: "MULTI_FACE_GEOMETRY:multi_face_geometry"
# }
type: "FaceGeometryFromDetection"
# The size of the input frame. The first element of the pair is the frame width;
# the other one is the frame height.
#
# The face landmarks should have been detected on a frame with the same
# ratio. If used as-is, the resulting face geometry visualization should be
# happening on a frame with the same ratio as well.
#
# (std::pair<int, int>)
input_stream: "IMAGE_SIZE:image_size"
# Collection of detected/predicted faces, each represented as a detection.
# (std::vector<DETECTION>)
input_stream: "MULTI_FACE_DETECTION:multi_face_detection"
# Environment that describes the current virtual scene.
# (face_geometry::Environment)
input_side_packet: "ENVIRONMENT:environment"
# A list of geometry data for each detected face.
# (std::vector<face_geometry::FaceGeometry>)
#
# NOTE: the triangular topology of the face meshes is only useful when derived
# from the 468 face landmarks, not from the 6 face detection landmarks
# (keypoints). The former don't cover the entire face and this mesh is
# defined here only to comply with the API. It should be considered as
# a placeholder and/or for debugging purposes.
#
# Use the face geometry derived from the face detection landmarks
# (keypoints) for the face pose transformation matrix, not the mesh.
output_stream: "MULTI_FACE_GEOMETRY:multi_face_geometry"
# Begin iterating over a vector of the face detections.
node {
calculator: "BeginLoopDetectionCalculator"
input_stream: "ITERABLE:multi_face_detection"
output_stream: "ITEM:face_detection"
output_stream: "BATCH_END:detection_timestamp"
}
# Extracts face detection keypoints as a normalized landmarks.
node {
calculator: "DetectionToLandmarksCalculator"
input_stream: "DETECTION:face_detection"
output_stream: "LANDMARKS:face_landmarks"
}
# End iterating over a vector of the face detections and receive a vector of
# face landmark lists as a result.
node {
calculator: "EndLoopNormalizedLandmarkListVectorCalculator"
input_stream: "ITEM:face_landmarks"
input_stream: "BATCH_END:detection_timestamp"
output_stream: "ITERABLE:multi_face_landmarks"
}
# Extracts face geometry for multiple faces from a vector of face detection
# landmark lists.
node {
calculator: "FaceGeometryPipelineCalculator"
input_side_packet: "ENVIRONMENT:environment"
input_stream: "IMAGE_SIZE:image_size"
input_stream: "MULTI_FACE_LANDMARKS:multi_face_landmarks"
output_stream: "MULTI_FACE_GEOMETRY:multi_face_geometry"
options: {
[mediapipe.FaceGeometryPipelineCalculatorOptions.ext] {
metadata_path: "mediapipe/modules/face_geometry/data/geometry_pipeline_metadata_detection.binarypb"
}
}
}

View File

@ -1,54 +0,0 @@
# MediaPipe graph to extract geometry from face landmarks for multiple faces.
#
# It is required that "geometry_pipeline_metadata_from_landmark.binarypb" is
# available at
# "mediapipe/modules/face_geometry/data/geometry_pipeline_metadata_from_landmarks.binarypb"
# path during execution.
#
# EXAMPLE:
# node {
# calculator: "FaceGeometryFromLandmarks"
# input_stream: "IMAGE_SIZE:image_size"
# input_stream: "MULTI_FACE_LANDMARKS:multi_face_landmarks"
# input_side_packet: "ENVIRONMENT:environment"
# output_stream: "MULTI_FACE_GEOMETRY:multi_face_geometry"
# }
type: "FaceGeometryFromLandmarks"
# The size of the input frame. The first element of the pair is the frame width;
# the other one is the frame height.
#
# The face landmarks should have been detected on a frame with the same
# ratio. If used as-is, the resulting face geometry visualization should be
# happening on a frame with the same ratio as well.
#
# (std::pair<int, int>)
input_stream: "IMAGE_SIZE:image_size"
# Collection of detected/predicted faces, each represented as a list of face
# landmarks. (std::vector<NormalizedLandmarkList>)
input_stream: "MULTI_FACE_LANDMARKS:multi_face_landmarks"
# Environment that describes the current virtual scene.
# (face_geometry::Environment)
input_side_packet: "ENVIRONMENT:environment"
# A list of geometry data for each detected face.
# (std::vector<face_geometry::FaceGeometry>)
output_stream: "MULTI_FACE_GEOMETRY:multi_face_geometry"
# Extracts face geometry for multiple faces from a vector of face landmark
# lists.
node {
calculator: "FaceGeometryPipelineCalculator"
input_side_packet: "ENVIRONMENT:environment"
input_stream: "IMAGE_SIZE:image_size"
input_stream: "MULTI_FACE_LANDMARKS:multi_face_landmarks"
output_stream: "MULTI_FACE_GEOMETRY:multi_face_geometry"
options: {
[mediapipe.FaceGeometryPipelineCalculatorOptions.ext] {
metadata_path: "mediapipe/modules/face_geometry/data/geometry_pipeline_metadata_landmarks.binarypb"
}
}
}

View File

@ -1,197 +0,0 @@
// Copyright 2020 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <memory>
#include <string>
#include <utility>
#include <vector>
#include "absl/memory/memory.h"
#include "mediapipe/framework/calculator_framework.h"
#include "mediapipe/framework/formats/landmark.pb.h"
#include "mediapipe/framework/port/ret_check.h"
#include "mediapipe/framework/port/status.h"
#include "mediapipe/framework/port/status_macros.h"
#include "mediapipe/framework/port/statusor.h"
#include "mediapipe/modules/face_geometry/geometry_pipeline_calculator.pb.h"
#include "mediapipe/modules/face_geometry/libs/geometry_pipeline.h"
#include "mediapipe/modules/face_geometry/libs/validation_utils.h"
#include "mediapipe/modules/face_geometry/protos/environment.pb.h"
#include "mediapipe/modules/face_geometry/protos/face_geometry.pb.h"
#include "mediapipe/modules/face_geometry/protos/geometry_pipeline_metadata.pb.h"
#include "mediapipe/util/resource_util.h"
namespace mediapipe {
namespace {
static constexpr char kEnvironmentTag[] = "ENVIRONMENT";
static constexpr char kImageSizeTag[] = "IMAGE_SIZE";
static constexpr char kMultiFaceGeometryTag[] = "MULTI_FACE_GEOMETRY";
static constexpr char kMultiFaceLandmarksTag[] = "MULTI_FACE_LANDMARKS";
// A calculator that renders a visual effect for multiple faces.
//
// Inputs:
// IMAGE_SIZE (`std::pair<int, int>`, required):
// The size of the current frame. The first element of the pair is the frame
// width; the other one is the frame height.
//
// The face landmarks should have been detected on a frame with the same
// ratio. If used as-is, the resulting face geometry visualization should be
// happening on a frame with the same ratio as well.
//
// MULTI_FACE_LANDMARKS (`std::vector<NormalizedLandmarkList>`, required):
// A vector of face landmark lists.
//
// Input side packets:
// ENVIRONMENT (`face_geometry::Environment`, required)
// Describes an environment; includes the camera frame origin point location
// as well as virtual camera parameters.
//
// Output:
// MULTI_FACE_GEOMETRY (`std::vector<face_geometry::FaceGeometry>`, required):
// A vector of face geometry data.
//
// Options:
// metadata_path (`string`, optional):
// Defines a path for the geometry pipeline metadata file.
//
// The geometry pipeline metadata file format must be the binary
// `face_geometry.GeometryPipelineMetadata` proto.
//
class GeometryPipelineCalculator : public CalculatorBase {
public:
static absl::Status GetContract(CalculatorContract* cc) {
cc->InputSidePackets()
.Tag(kEnvironmentTag)
.Set<face_geometry::Environment>();
cc->Inputs().Tag(kImageSizeTag).Set<std::pair<int, int>>();
cc->Inputs()
.Tag(kMultiFaceLandmarksTag)
.Set<std::vector<NormalizedLandmarkList>>();
cc->Outputs()
.Tag(kMultiFaceGeometryTag)
.Set<std::vector<face_geometry::FaceGeometry>>();
return absl::OkStatus();
}
absl::Status Open(CalculatorContext* cc) override {
cc->SetOffset(mediapipe::TimestampDiff(0));
const auto& options = cc->Options<FaceGeometryPipelineCalculatorOptions>();
ASSIGN_OR_RETURN(
face_geometry::GeometryPipelineMetadata metadata,
ReadMetadataFromFile(options.metadata_path()),
_ << "Failed to read the geometry pipeline metadata from file!");
MP_RETURN_IF_ERROR(
face_geometry::ValidateGeometryPipelineMetadata(metadata))
<< "Invalid geometry pipeline metadata!";
const face_geometry::Environment& environment =
cc->InputSidePackets()
.Tag(kEnvironmentTag)
.Get<face_geometry::Environment>();
MP_RETURN_IF_ERROR(face_geometry::ValidateEnvironment(environment))
<< "Invalid environment!";
ASSIGN_OR_RETURN(
geometry_pipeline_,
face_geometry::CreateGeometryPipeline(environment, metadata),
_ << "Failed to create a geometry pipeline!");
return absl::OkStatus();
}
absl::Status Process(CalculatorContext* cc) override {
// Both the `IMAGE_SIZE` and the `MULTI_FACE_LANDMARKS` streams are required
// to have a non-empty packet. In case this requirement is not met, there's
// nothing to be processed at the current timestamp.
if (cc->Inputs().Tag(kImageSizeTag).IsEmpty() ||
cc->Inputs().Tag(kMultiFaceLandmarksTag).IsEmpty()) {
return absl::OkStatus();
}
const auto& image_size =
cc->Inputs().Tag(kImageSizeTag).Get<std::pair<int, int>>();
const auto& multi_face_landmarks =
cc->Inputs()
.Tag(kMultiFaceLandmarksTag)
.Get<std::vector<NormalizedLandmarkList>>();
auto multi_face_geometry =
absl::make_unique<std::vector<face_geometry::FaceGeometry>>();
ASSIGN_OR_RETURN(
*multi_face_geometry,
geometry_pipeline_->EstimateFaceGeometry(
multi_face_landmarks, //
/*frame_width*/ image_size.first,
/*frame_height*/ image_size.second),
_ << "Failed to estimate face geometry for multiple faces!");
cc->Outputs()
.Tag(kMultiFaceGeometryTag)
.AddPacket(mediapipe::Adopt<std::vector<face_geometry::FaceGeometry>>(
multi_face_geometry.release())
.At(cc->InputTimestamp()));
return absl::OkStatus();
}
absl::Status Close(CalculatorContext* cc) override {
return absl::OkStatus();
}
private:
static absl::StatusOr<face_geometry::GeometryPipelineMetadata>
ReadMetadataFromFile(const std::string& metadata_path) {
ASSIGN_OR_RETURN(std::string metadata_blob,
ReadContentBlobFromFile(metadata_path),
_ << "Failed to read a metadata blob from file!");
face_geometry::GeometryPipelineMetadata metadata;
RET_CHECK(metadata.ParseFromString(metadata_blob))
<< "Failed to parse a metadata proto from a binary blob!";
return metadata;
}
static absl::StatusOr<std::string> ReadContentBlobFromFile(
const std::string& unresolved_path) {
ASSIGN_OR_RETURN(std::string resolved_path,
mediapipe::PathToResourceAsFile(unresolved_path),
_ << "Failed to resolve path! Path = " << unresolved_path);
std::string content_blob;
MP_RETURN_IF_ERROR(
mediapipe::GetResourceContents(resolved_path, &content_blob))
<< "Failed to read content blob! Resolved path = " << resolved_path;
return content_blob;
}
std::unique_ptr<face_geometry::GeometryPipeline> geometry_pipeline_;
};
} // namespace
using FaceGeometryPipelineCalculator = GeometryPipelineCalculator;
REGISTER_CALCULATOR(FaceGeometryPipelineCalculator);
} // namespace mediapipe

View File

@ -1,27 +0,0 @@
// Copyright 2020 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
syntax = "proto2";
package mediapipe;
import "mediapipe/framework/calculator_options.proto";
message FaceGeometryPipelineCalculatorOptions {
extend CalculatorOptions {
optional FaceGeometryPipelineCalculatorOptions ext = 323693812;
}
optional string metadata_path = 1;
}

View File

@ -1,103 +0,0 @@
# Copyright 2020 The MediaPipe Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
licenses(["notice"])
package(default_visibility = ["//visibility:public"])
cc_library(
name = "effect_renderer",
srcs = ["effect_renderer.cc"],
hdrs = ["effect_renderer.h"],
deps = [
":mesh_3d_utils",
":validation_utils",
"//mediapipe/framework/formats:image_format_cc_proto",
"//mediapipe/framework/formats:image_frame",
"//mediapipe/framework/formats:matrix_data_cc_proto",
"//mediapipe/framework/port:ret_check",
"//mediapipe/framework/port:status",
"//mediapipe/framework/port:statusor",
"//mediapipe/gpu:gl_base",
"//mediapipe/gpu:shader_util",
"//mediapipe/modules/face_geometry/protos:environment_cc_proto",
"//mediapipe/modules/face_geometry/protos:face_geometry_cc_proto",
"//mediapipe/modules/face_geometry/protos:mesh_3d_cc_proto",
"@com_google_absl//absl/memory",
"@com_google_absl//absl/types:optional",
],
)
cc_library(
name = "geometry_pipeline",
srcs = ["geometry_pipeline.cc"],
hdrs = ["geometry_pipeline.h"],
deps = [
":mesh_3d_utils",
":procrustes_solver",
":validation_utils",
"//mediapipe/framework/formats:landmark_cc_proto",
"//mediapipe/framework/formats:matrix",
"//mediapipe/framework/formats:matrix_data_cc_proto",
"//mediapipe/framework/port:ret_check",
"//mediapipe/framework/port:status",
"//mediapipe/framework/port:statusor",
"//mediapipe/modules/face_geometry/protos:environment_cc_proto",
"//mediapipe/modules/face_geometry/protos:face_geometry_cc_proto",
"//mediapipe/modules/face_geometry/protos:geometry_pipeline_metadata_cc_proto",
"//mediapipe/modules/face_geometry/protos:mesh_3d_cc_proto",
"@com_google_absl//absl/memory",
"@eigen_archive//:eigen3",
],
)
cc_library(
name = "mesh_3d_utils",
srcs = ["mesh_3d_utils.cc"],
hdrs = ["mesh_3d_utils.h"],
deps = [
"//mediapipe/framework/port:ret_check",
"//mediapipe/framework/port:statusor",
"//mediapipe/modules/face_geometry/protos:mesh_3d_cc_proto",
],
)
cc_library(
name = "procrustes_solver",
srcs = ["procrustes_solver.cc"],
hdrs = ["procrustes_solver.h"],
deps = [
"//mediapipe/framework/port:ret_check",
"//mediapipe/framework/port:status",
"//mediapipe/framework/port:statusor",
"@com_google_absl//absl/memory",
"@eigen_archive//:eigen3",
],
)
cc_library(
name = "validation_utils",
srcs = ["validation_utils.cc"],
hdrs = ["validation_utils.h"],
deps = [
":mesh_3d_utils",
"//mediapipe/framework/formats:matrix_data_cc_proto",
"//mediapipe/framework/port:ret_check",
"//mediapipe/framework/port:status",
"//mediapipe/modules/face_geometry/protos:environment_cc_proto",
"//mediapipe/modules/face_geometry/protos:face_geometry_cc_proto",
"//mediapipe/modules/face_geometry/protos:geometry_pipeline_metadata_cc_proto",
"//mediapipe/modules/face_geometry/protos:mesh_3d_cc_proto",
],
)

View File

@ -1,733 +0,0 @@
// Copyright 2020 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "mediapipe/modules/face_geometry/libs/effect_renderer.h"
#include <cmath>
#include <cstdint>
#include <limits>
#include <memory>
#include <utility>
#include <vector>
#include "absl/memory/memory.h"
#include "absl/types/optional.h"
#include "mediapipe/framework/formats/image_format.pb.h"
#include "mediapipe/framework/formats/image_frame.h"
#include "mediapipe/framework/formats/matrix_data.pb.h"
#include "mediapipe/framework/port/ret_check.h"
#include "mediapipe/framework/port/status.h"
#include "mediapipe/framework/port/status_macros.h"
#include "mediapipe/framework/port/statusor.h"
#include "mediapipe/gpu/gl_base.h"
#include "mediapipe/gpu/shader_util.h"
#include "mediapipe/modules/face_geometry/libs/mesh_3d_utils.h"
#include "mediapipe/modules/face_geometry/libs/validation_utils.h"
#include "mediapipe/modules/face_geometry/protos/environment.pb.h"
#include "mediapipe/modules/face_geometry/protos/face_geometry.pb.h"
#include "mediapipe/modules/face_geometry/protos/mesh_3d.pb.h"
namespace mediapipe::face_geometry {
namespace {
struct RenderableMesh3d {
static absl::StatusOr<RenderableMesh3d> CreateFromProtoMesh3d(
const Mesh3d& proto_mesh_3d) {
Mesh3d::VertexType vertex_type = proto_mesh_3d.vertex_type();
RenderableMesh3d renderable_mesh_3d;
renderable_mesh_3d.vertex_size = GetVertexSize(vertex_type);
ASSIGN_OR_RETURN(
renderable_mesh_3d.vertex_position_size,
GetVertexComponentSize(vertex_type, VertexComponent::POSITION),
_ << "Failed to get the position vertex size!");
ASSIGN_OR_RETURN(
renderable_mesh_3d.tex_coord_position_size,
GetVertexComponentSize(vertex_type, VertexComponent::TEX_COORD),
_ << "Failed to get the tex coord vertex size!");
ASSIGN_OR_RETURN(
renderable_mesh_3d.vertex_position_offset,
GetVertexComponentOffset(vertex_type, VertexComponent::POSITION),
_ << "Failed to get the position vertex offset!");
ASSIGN_OR_RETURN(
renderable_mesh_3d.tex_coord_position_offset,
GetVertexComponentOffset(vertex_type, VertexComponent::TEX_COORD),
_ << "Failed to get the tex coord vertex offset!");
switch (proto_mesh_3d.primitive_type()) {
case Mesh3d::TRIANGLE:
renderable_mesh_3d.primitive_type = GL_TRIANGLES;
break;
default:
RET_CHECK_FAIL() << "Only triangle primitive types are supported!";
}
renderable_mesh_3d.vertex_buffer.reserve(
proto_mesh_3d.vertex_buffer_size());
for (float vertex_element : proto_mesh_3d.vertex_buffer()) {
renderable_mesh_3d.vertex_buffer.push_back(vertex_element);
}
renderable_mesh_3d.index_buffer.reserve(proto_mesh_3d.index_buffer_size());
for (uint32_t index_element : proto_mesh_3d.index_buffer()) {
RET_CHECK_LE(index_element, std::numeric_limits<uint16_t>::max())
<< "Index buffer elements must fit into the `uint16` type in order "
"to be renderable!";
renderable_mesh_3d.index_buffer.push_back(
static_cast<uint16_t>(index_element));
}
return renderable_mesh_3d;
}
uint32_t vertex_size;
uint32_t vertex_position_size;
uint32_t tex_coord_position_size;
uint32_t vertex_position_offset;
uint32_t tex_coord_position_offset;
uint32_t primitive_type;
std::vector<float> vertex_buffer;
std::vector<uint16_t> index_buffer;
};
class Texture {
public:
static absl::StatusOr<std::unique_ptr<Texture>> WrapExternalTexture(
GLuint handle, GLenum target, int width, int height) {
RET_CHECK(handle) << "External texture must have a non-null handle!";
return absl::WrapUnique(new Texture(handle, target, width, height,
/*is_owned*/ false));
}
static absl::StatusOr<std::unique_ptr<Texture>> CreateFromImageFrame(
const ImageFrame& image_frame) {
RET_CHECK(image_frame.IsAligned(ImageFrame::kGlDefaultAlignmentBoundary))
<< "Image frame memory must be aligned for GL usage!";
RET_CHECK(image_frame.Width() > 0 && image_frame.Height() > 0)
<< "Image frame must have positive dimensions!";
RET_CHECK(image_frame.Format() == ImageFormat::SRGB ||
image_frame.Format() == ImageFormat::SRGBA)
<< "Image frame format must be either SRGB or SRGBA!";
GLint image_format;
switch (image_frame.NumberOfChannels()) {
case 3:
image_format = GL_RGB;
break;
case 4:
image_format = GL_RGBA;
break;
default:
RET_CHECK_FAIL()
<< "Unexpected number of channels; expected 3 or 4, got "
<< image_frame.NumberOfChannels() << "!";
}
GLuint handle;
glGenTextures(1, &handle);
RET_CHECK(handle) << "Failed to initialize an OpenGL texture!";
glBindTexture(GL_TEXTURE_2D, handle);
glTexParameteri(GL_TEXTURE_2D, GL_NEAREST_MIPMAP_LINEAR, GL_LINEAR);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
glTexImage2D(GL_TEXTURE_2D, 0, image_format, image_frame.Width(),
image_frame.Height(), 0, image_format, GL_UNSIGNED_BYTE,
image_frame.PixelData());
glGenerateMipmap(GL_TEXTURE_2D);
glBindTexture(GL_TEXTURE_2D, 0);
return absl::WrapUnique(new Texture(
handle, GL_TEXTURE_2D, image_frame.Width(), image_frame.Height(),
/*is_owned*/ true));
}
~Texture() {
if (is_owned_) {
glDeleteProgram(handle_);
}
}
GLuint handle() const { return handle_; }
GLenum target() const { return target_; }
int width() const { return width_; }
int height() const { return height_; }
private:
Texture(GLuint handle, GLenum target, int width, int height, bool is_owned)
: handle_(handle),
target_(target),
width_(width),
height_(height),
is_owned_(is_owned) {}
GLuint handle_;
GLenum target_;
int width_;
int height_;
bool is_owned_;
};
class RenderTarget {
public:
static absl::StatusOr<std::unique_ptr<RenderTarget>> Create() {
GLuint framebuffer_handle;
glGenFramebuffers(1, &framebuffer_handle);
RET_CHECK(framebuffer_handle)
<< "Failed to initialize an OpenGL framebuffer!";
return absl::WrapUnique(new RenderTarget(framebuffer_handle));
}
~RenderTarget() {
glDeleteFramebuffers(1, &framebuffer_handle_);
// Renderbuffer handle might have never been created if this render target
// is destroyed before `SetColorbuffer()` is called for the first time.
if (renderbuffer_handle_) {
glDeleteFramebuffers(1, &renderbuffer_handle_);
}
}
absl::Status SetColorbuffer(const Texture& colorbuffer_texture) {
glBindFramebuffer(GL_FRAMEBUFFER, framebuffer_handle_);
glViewport(0, 0, colorbuffer_texture.width(), colorbuffer_texture.height());
glActiveTexture(GL_TEXTURE0);
glBindTexture(colorbuffer_texture.target(), colorbuffer_texture.handle());
glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0,
colorbuffer_texture.target(),
colorbuffer_texture.handle(),
/*level*/ 0);
glBindTexture(colorbuffer_texture.target(), 0);
// If the existing depth buffer has different dimensions, delete it.
if (renderbuffer_handle_ &&
(viewport_width_ != colorbuffer_texture.width() ||
viewport_height_ != colorbuffer_texture.height())) {
glDeleteRenderbuffers(1, &renderbuffer_handle_);
renderbuffer_handle_ = 0;
}
// If there is no depth buffer, create one.
if (!renderbuffer_handle_) {
glGenRenderbuffers(1, &renderbuffer_handle_);
RET_CHECK(renderbuffer_handle_)
<< "Failed to initialize an OpenGL renderbuffer!";
glBindRenderbuffer(GL_RENDERBUFFER, renderbuffer_handle_);
glRenderbufferStorage(GL_RENDERBUFFER, GL_DEPTH_COMPONENT16,
colorbuffer_texture.width(),
colorbuffer_texture.height());
glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT,
GL_RENDERBUFFER, renderbuffer_handle_);
glBindRenderbuffer(GL_RENDERBUFFER, 0);
}
viewport_width_ = colorbuffer_texture.width();
viewport_height_ = colorbuffer_texture.height();
glBindFramebuffer(GL_FRAMEBUFFER, 0);
glFlush();
return absl::OkStatus();
}
void Bind() const {
glBindFramebuffer(GL_FRAMEBUFFER, framebuffer_handle_);
glViewport(0, 0, viewport_width_, viewport_height_);
}
void Unbind() const { glBindFramebuffer(GL_FRAMEBUFFER, 0); }
void Clear() const {
Bind();
glEnable(GL_DEPTH_TEST);
glDepthMask(GL_TRUE);
glClearColor(0.f, 0.f, 0.f, 0.f);
glClearDepthf(1.f);
glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);
glDepthMask(GL_FALSE);
glDisable(GL_DEPTH_TEST);
Unbind();
glFlush();
}
private:
explicit RenderTarget(GLuint framebuffer_handle)
: framebuffer_handle_(framebuffer_handle),
renderbuffer_handle_(0),
viewport_width_(-1),
viewport_height_(-1) {}
GLuint framebuffer_handle_;
GLuint renderbuffer_handle_;
int viewport_width_;
int viewport_height_;
};
class Renderer {
public:
enum class RenderMode { OPAQUE, OVERDRAW, OCCLUSION };
static absl::StatusOr<std::unique_ptr<Renderer>> Create() {
static const GLint kAttrLocation[NUM_ATTRIBUTES] = {
ATTRIB_VERTEX,
ATTRIB_TEXTURE_POSITION,
};
static const GLchar* kAttrName[NUM_ATTRIBUTES] = {
"position",
"tex_coord",
};
static const GLchar* kVertSrc = R"(
uniform mat4 projection_mat;
uniform mat4 model_mat;
attribute vec4 position;
attribute vec4 tex_coord;
varying vec2 v_tex_coord;
void main() {
v_tex_coord = tex_coord.xy;
gl_Position = projection_mat * model_mat * position;
}
)";
static const GLchar* kFragSrc = R"(
precision mediump float;
varying vec2 v_tex_coord;
uniform sampler2D texture;
void main() {
gl_FragColor = texture2D(texture, v_tex_coord);
}
)";
GLuint program_handle = 0;
GlhCreateProgram(kVertSrc, kFragSrc, NUM_ATTRIBUTES,
(const GLchar**)&kAttrName[0], kAttrLocation,
&program_handle);
RET_CHECK(program_handle) << "Problem initializing the texture program!";
GLint projection_mat_uniform =
glGetUniformLocation(program_handle, "projection_mat");
GLint model_mat_uniform = glGetUniformLocation(program_handle, "model_mat");
GLint texture_uniform = glGetUniformLocation(program_handle, "texture");
RET_CHECK_NE(projection_mat_uniform, -1)
<< "Failed to find `projection_mat` uniform!";
RET_CHECK_NE(model_mat_uniform, -1)
<< "Failed to find `model_mat` uniform!";
RET_CHECK_NE(texture_uniform, -1) << "Failed to find `texture` uniform!";
return absl::WrapUnique(new Renderer(program_handle, projection_mat_uniform,
model_mat_uniform, texture_uniform));
}
~Renderer() { glDeleteProgram(program_handle_); }
absl::Status Render(const RenderTarget& render_target, const Texture& texture,
const RenderableMesh3d& mesh_3d,
const std::array<float, 16>& projection_mat,
const std::array<float, 16>& model_mat,
RenderMode render_mode) const {
glUseProgram(program_handle_);
// Set up the GL state.
glEnable(GL_BLEND);
glFrontFace(GL_CCW);
switch (render_mode) {
case RenderMode::OPAQUE:
glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA);
glEnable(GL_DEPTH_TEST);
glDepthMask(GL_TRUE);
break;
case RenderMode::OVERDRAW:
glBlendFunc(GL_ONE, GL_ZERO);
glDisable(GL_DEPTH_TEST);
glDepthMask(GL_FALSE);
break;
case RenderMode::OCCLUSION:
glBlendFunc(GL_ZERO, GL_ONE);
glEnable(GL_DEPTH_TEST);
glDepthMask(GL_TRUE);
break;
}
render_target.Bind();
// Set up vertex attributes.
glVertexAttribPointer(
ATTRIB_VERTEX, mesh_3d.vertex_position_size, GL_FLOAT, 0,
mesh_3d.vertex_size * sizeof(float),
mesh_3d.vertex_buffer.data() + mesh_3d.vertex_position_offset);
glEnableVertexAttribArray(ATTRIB_VERTEX);
glVertexAttribPointer(
ATTRIB_TEXTURE_POSITION, mesh_3d.tex_coord_position_size, GL_FLOAT, 0,
mesh_3d.vertex_size * sizeof(float),
mesh_3d.vertex_buffer.data() + mesh_3d.tex_coord_position_offset);
glEnableVertexAttribArray(ATTRIB_TEXTURE_POSITION);
// Set up textures and uniforms.
glActiveTexture(GL_TEXTURE1);
glBindTexture(texture.target(), texture.handle());
glUniform1i(texture_uniform_, 1);
glUniformMatrix4fv(projection_mat_uniform_, 1, GL_FALSE,
projection_mat.data());
glUniformMatrix4fv(model_mat_uniform_, 1, GL_FALSE, model_mat.data());
// Draw the mesh.
glDrawElements(mesh_3d.primitive_type, mesh_3d.index_buffer.size(),
GL_UNSIGNED_SHORT, mesh_3d.index_buffer.data());
// Unbind textures and uniforms.
glActiveTexture(GL_TEXTURE1);
glBindTexture(texture.target(), 0);
render_target.Unbind();
// Unbind vertex attributes.
glDisableVertexAttribArray(ATTRIB_TEXTURE_POSITION);
glDisableVertexAttribArray(ATTRIB_VERTEX);
// Restore the GL state.
glDepthMask(GL_FALSE);
glDisable(GL_DEPTH_TEST);
glDisable(GL_BLEND);
glUseProgram(0);
glFlush();
return absl::OkStatus();
}
private:
enum { ATTRIB_VERTEX, ATTRIB_TEXTURE_POSITION, NUM_ATTRIBUTES };
Renderer(GLuint program_handle, GLint projection_mat_uniform,
GLint model_mat_uniform, GLint texture_uniform)
: program_handle_(program_handle),
projection_mat_uniform_(projection_mat_uniform),
model_mat_uniform_(model_mat_uniform),
texture_uniform_(texture_uniform) {}
GLuint program_handle_;
GLint projection_mat_uniform_;
GLint model_mat_uniform_;
GLint texture_uniform_;
};
class EffectRendererImpl : public EffectRenderer {
public:
EffectRendererImpl(
const Environment& environment,
std::unique_ptr<RenderTarget> render_target,
std::unique_ptr<Renderer> renderer,
RenderableMesh3d&& renderable_quad_mesh_3d,
absl::optional<RenderableMesh3d>&& renderable_effect_mesh_3d,
std::unique_ptr<Texture> empty_color_texture,
std::unique_ptr<Texture> effect_texture)
: environment_(environment),
render_target_(std::move(render_target)),
renderer_(std::move(renderer)),
renderable_quad_mesh_3d_(std::move(renderable_quad_mesh_3d)),
renderable_effect_mesh_3d_(std::move(renderable_effect_mesh_3d)),
empty_color_texture_(std::move(empty_color_texture)),
effect_texture_(std::move(effect_texture)),
identity_matrix_(Create4x4IdentityMatrix()) {}
absl::Status RenderEffect(
const std::vector<FaceGeometry>& multi_face_geometry,
int frame_width, //
int frame_height, //
GLenum src_texture_target, //
GLuint src_texture_name, //
GLenum dst_texture_target, //
GLuint dst_texture_name) {
// Validate input arguments.
MP_RETURN_IF_ERROR(ValidateFrameDimensions(frame_width, frame_height))
<< "Invalid frame dimensions!";
RET_CHECK(src_texture_name > 0 && dst_texture_name > 0)
<< "Both source and destination texture names must be non-null!";
RET_CHECK_NE(src_texture_name, dst_texture_name)
<< "Source and destination texture names must be different!";
// Validate all input face geometries.
for (const FaceGeometry& face_geometry : multi_face_geometry) {
MP_RETURN_IF_ERROR(ValidateFaceGeometry(face_geometry))
<< "Invalid face geometry!";
}
// Wrap both source and destination textures.
ASSIGN_OR_RETURN(
std::unique_ptr<Texture> src_texture,
Texture::WrapExternalTexture(src_texture_name, src_texture_target,
frame_width, frame_height),
_ << "Failed to wrap the external source texture");
ASSIGN_OR_RETURN(
std::unique_ptr<Texture> dst_texture,
Texture::WrapExternalTexture(dst_texture_name, dst_texture_target,
frame_width, frame_height),
_ << "Failed to wrap the external destination texture");
// Set the destination texture as the color buffer. Then, clear both the
// color and the depth buffers for the render target.
MP_RETURN_IF_ERROR(render_target_->SetColorbuffer(*dst_texture))
<< "Failed to set the destination texture as the colorbuffer!";
render_target_->Clear();
// Render the source texture on top of the quad mesh (i.e. make a copy)
// into the render target.
MP_RETURN_IF_ERROR(renderer_->Render(
*render_target_, *src_texture, renderable_quad_mesh_3d_,
identity_matrix_, identity_matrix_, Renderer::RenderMode::OVERDRAW))
<< "Failed to render the source texture on top of the quad mesh!";
// Extract pose transform matrices and meshes from the face geometry data;
const int num_faces = multi_face_geometry.size();
std::vector<std::array<float, 16>> face_pose_transform_matrices(num_faces);
std::vector<RenderableMesh3d> renderable_face_meshes(num_faces);
for (int i = 0; i < num_faces; ++i) {
const FaceGeometry& face_geometry = multi_face_geometry[i];
// Extract the face pose transformation matrix.
ASSIGN_OR_RETURN(
face_pose_transform_matrices[i],
Convert4x4MatrixDataToArrayFormat(
face_geometry.pose_transform_matrix()),
_ << "Failed to extract the face pose transformation matrix!");
// Extract the face mesh as a renderable.
ASSIGN_OR_RETURN(
renderable_face_meshes[i],
RenderableMesh3d::CreateFromProtoMesh3d(face_geometry.mesh()),
_ << "Failed to extract a renderable face mesh!");
}
// Create a perspective matrix using the frame aspect ratio.
std::array<float, 16> perspective_matrix = CreatePerspectiveMatrix(
/*aspect_ratio*/ static_cast<float>(frame_width) / frame_height);
// Render a face mesh occluder for each face.
for (int i = 0; i < num_faces; ++i) {
const std::array<float, 16>& face_pose_transform_matrix =
face_pose_transform_matrices[i];
const RenderableMesh3d& renderable_face_mesh = renderable_face_meshes[i];
// Render the face mesh using the empty color texture, i.e. the face
// mesh occluder.
//
// For occlusion, the pose transformation is moved ~1mm away from camera
// in order to allow the face mesh texture to be rendered without
// failing the depth test.
std::array<float, 16> occlusion_face_pose_transform_matrix =
face_pose_transform_matrix;
occlusion_face_pose_transform_matrix[14] -= 0.1f; // ~ 1mm
MP_RETURN_IF_ERROR(renderer_->Render(
*render_target_, *empty_color_texture_, renderable_face_mesh,
perspective_matrix, occlusion_face_pose_transform_matrix,
Renderer::RenderMode::OCCLUSION))
<< "Failed to render the face mesh occluder!";
}
// Render the main face mesh effect component for each face.
for (int i = 0; i < num_faces; ++i) {
const std::array<float, 16>& face_pose_transform_matrix =
face_pose_transform_matrices[i];
// If there is no effect 3D mesh provided, then the face mesh itself is
// used as a topology for rendering (for example, this can be used for
// facepaint effects or AR makeup).
const RenderableMesh3d& main_effect_mesh_3d =
renderable_effect_mesh_3d_ ? *renderable_effect_mesh_3d_
: renderable_face_meshes[i];
MP_RETURN_IF_ERROR(renderer_->Render(
*render_target_, *effect_texture_, main_effect_mesh_3d,
perspective_matrix, face_pose_transform_matrix,
Renderer::RenderMode::OPAQUE))
<< "Failed to render the main effect pass!";
}
// At this point in the code, the destination texture must contain the
// correctly renderer effect, so we should just return.
return absl::OkStatus();
}
private:
std::array<float, 16> CreatePerspectiveMatrix(float aspect_ratio) const {
static constexpr float kDegreesToRadians = M_PI / 180.f;
std::array<float, 16> perspective_matrix;
perspective_matrix.fill(0.f);
const auto& env_camera = environment_.perspective_camera();
// Standard perspective projection matrix calculations.
const float f = 1.0f / std::tan(kDegreesToRadians *
env_camera.vertical_fov_degrees() / 2.f);
const float denom = 1.0f / (env_camera.near() - env_camera.far());
perspective_matrix[0] = f / aspect_ratio;
perspective_matrix[5] = f;
perspective_matrix[10] = (env_camera.near() + env_camera.far()) * denom;
perspective_matrix[11] = -1.f;
perspective_matrix[14] = 2.f * env_camera.far() * env_camera.near() * denom;
// If the environment's origin point location is in the top left corner,
// then skip additional flip along Y-axis is required to render correctly.
if (environment_.origin_point_location() ==
OriginPointLocation::TOP_LEFT_CORNER) {
perspective_matrix[5] *= -1.f;
}
return perspective_matrix;
}
static std::array<float, 16> Create4x4IdentityMatrix() {
return {1.f, 0.f, 0.f, 0.f, //
0.f, 1.f, 0.f, 0.f, //
0.f, 0.f, 1.f, 0.f, //
0.f, 0.f, 0.f, 1.f};
}
static absl::StatusOr<std::array<float, 16>>
Convert4x4MatrixDataToArrayFormat(const MatrixData& matrix_data) {
RET_CHECK(matrix_data.rows() == 4 && //
matrix_data.cols() == 4 && //
matrix_data.packed_data_size() == 16)
<< "The matrix data must define a 4x4 matrix!";
std::array<float, 16> matrix_array;
for (int i = 0; i < 16; i++) {
matrix_array[i] = matrix_data.packed_data(i);
}
// Matrix array must be in the OpenGL-friendly column-major order. If
// `matrix_data` is in the row-major order, then transpose.
if (matrix_data.layout() == MatrixData::ROW_MAJOR) {
std::swap(matrix_array[1], matrix_array[4]);
std::swap(matrix_array[2], matrix_array[8]);
std::swap(matrix_array[3], matrix_array[12]);
std::swap(matrix_array[6], matrix_array[9]);
std::swap(matrix_array[7], matrix_array[13]);
std::swap(matrix_array[11], matrix_array[14]);
}
return matrix_array;
}
Environment environment_;
std::unique_ptr<RenderTarget> render_target_;
std::unique_ptr<Renderer> renderer_;
RenderableMesh3d renderable_quad_mesh_3d_;
absl::optional<RenderableMesh3d> renderable_effect_mesh_3d_;
std::unique_ptr<Texture> empty_color_texture_;
std::unique_ptr<Texture> effect_texture_;
std::array<float, 16> identity_matrix_;
};
Mesh3d CreateQuadMesh3d() {
static constexpr float kQuadMesh3dVertexBuffer[] = {
-1.f, -1.f, 0.f, 0.f, 0.f, //
1.f, -1.f, 0.f, 1.f, 0.f, //
-1.f, 1.f, 0.f, 0.f, 1.f, //
1.f, 1.f, 0.f, 1.f, 1.f, //
};
static constexpr uint16_t kQuadMesh3dIndexBuffer[] = {0, 1, 2, 1, 3, 2};
static constexpr int kQuadMesh3dVertexBufferSize =
sizeof(kQuadMesh3dVertexBuffer) / sizeof(float);
static constexpr int kQuadMesh3dIndexBufferSize =
sizeof(kQuadMesh3dIndexBuffer) / sizeof(uint16_t);
Mesh3d quad_mesh_3d;
quad_mesh_3d.set_vertex_type(Mesh3d::VERTEX_PT);
quad_mesh_3d.set_primitive_type(Mesh3d::TRIANGLE);
for (int i = 0; i < kQuadMesh3dVertexBufferSize; ++i) {
quad_mesh_3d.add_vertex_buffer(kQuadMesh3dVertexBuffer[i]);
}
for (int i = 0; i < kQuadMesh3dIndexBufferSize; ++i) {
quad_mesh_3d.add_index_buffer(kQuadMesh3dIndexBuffer[i]);
}
return quad_mesh_3d;
}
ImageFrame CreateEmptyColorTexture() {
static constexpr ImageFormat::Format kEmptyColorTextureFormat =
ImageFormat::SRGBA;
static constexpr int kEmptyColorTextureWidth = 1;
static constexpr int kEmptyColorTextureHeight = 1;
ImageFrame empty_color_texture(
kEmptyColorTextureFormat, kEmptyColorTextureWidth,
kEmptyColorTextureHeight, ImageFrame::kGlDefaultAlignmentBoundary);
empty_color_texture.SetToZero();
return empty_color_texture;
}
} // namespace
absl::StatusOr<std::unique_ptr<EffectRenderer>> CreateEffectRenderer(
const Environment& environment, //
const absl::optional<Mesh3d>& effect_mesh_3d, //
ImageFrame&& effect_texture) {
MP_RETURN_IF_ERROR(ValidateEnvironment(environment))
<< "Invalid environment!";
if (effect_mesh_3d) {
MP_RETURN_IF_ERROR(ValidateMesh3d(*effect_mesh_3d))
<< "Invalid effect 3D mesh!";
}
ASSIGN_OR_RETURN(std::unique_ptr<RenderTarget> render_target,
RenderTarget::Create(),
_ << "Failed to create a render target!");
ASSIGN_OR_RETURN(std::unique_ptr<Renderer> renderer, Renderer::Create(),
_ << "Failed to create a renderer!");
ASSIGN_OR_RETURN(RenderableMesh3d renderable_quad_mesh_3d,
RenderableMesh3d::CreateFromProtoMesh3d(CreateQuadMesh3d()),
_ << "Failed to create a renderable quad mesh!");
absl::optional<RenderableMesh3d> renderable_effect_mesh_3d;
if (effect_mesh_3d) {
ASSIGN_OR_RETURN(renderable_effect_mesh_3d,
RenderableMesh3d::CreateFromProtoMesh3d(*effect_mesh_3d),
_ << "Failed to create a renderable effect mesh!");
}
ASSIGN_OR_RETURN(std::unique_ptr<Texture> empty_color_gl_texture,
Texture::CreateFromImageFrame(CreateEmptyColorTexture()),
_ << "Failed to create an empty color texture!");
ASSIGN_OR_RETURN(std::unique_ptr<Texture> effect_gl_texture,
Texture::CreateFromImageFrame(effect_texture),
_ << "Failed to create an effect texture!");
std::unique_ptr<EffectRenderer> result =
absl::make_unique<EffectRendererImpl>(
environment, std::move(render_target), std::move(renderer),
std::move(renderable_quad_mesh_3d),
std::move(renderable_effect_mesh_3d),
std::move(empty_color_gl_texture), std::move(effect_gl_texture));
return result;
}
} // namespace mediapipe::face_geometry

View File

@ -1,92 +0,0 @@
// Copyright 2020 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef MEDIAPIPE_MODULES_FACE_GEOMETRY_LIBS_EFFECT_RENDERER_H_
#define MEDIAPIPE_MODULES_FACE_GEOMETRY_LIBS_EFFECT_RENDERER_H_
#include <memory>
#include <vector>
#include "absl/types/optional.h"
#include "mediapipe/framework/formats/image_frame.h"
#include "mediapipe/framework/port/status.h"
#include "mediapipe/framework/port/statusor.h"
#include "mediapipe/gpu/gl_base.h"
#include "mediapipe/modules/face_geometry/protos/environment.pb.h"
#include "mediapipe/modules/face_geometry/protos/face_geometry.pb.h"
#include "mediapipe/modules/face_geometry/protos/mesh_3d.pb.h"
namespace mediapipe::face_geometry {
// Encapsulates a stateful face effect renderer.
class EffectRenderer {
public:
virtual ~EffectRenderer() = default;
// Renders a face effect based on the multiple facial geometries.
//
// Must be called in the same GL context as was used upon initialization.
//
// Each of the `multi_face_geometry` must be valid (for details, please refer
// to the proto message definition comments and/or `validation_utils.h/cc`).
// Additionally, all face mesh index buffer elements must fit into the
// `uint16` type in order to be renderable.
//
// Both `frame_width` and `frame_height` must be positive.
//
// Both `src_texture_name` and `dst_texture_name` must be positive and
// reference existing OpenGL textures in the current context. They should also
// reference different textures as the in-place effect rendering is not yet
// supported.
virtual absl::Status RenderEffect(
const std::vector<FaceGeometry>& multi_face_geometry,
int frame_width, //
int frame_height, //
GLenum src_texture_target, //
GLuint src_texture_name, //
GLenum dst_texture_target, //
GLuint dst_texture_name) = 0;
};
// Creates an instance of `EffectRenderer`.
//
// `effect_mesh_3d` defines a rigid 3d mesh which is "attached" to the face and
// is driven by the face pose transformation matrix. If is not present, the
// runtime face mesh will be used as the effect mesh - this mode is handy for
// facepaint effects. In both rendering modes, the face mesh is first rendered
// as an occluder straight into the depth buffer. This step helps to create a
// more believable effect via hiding invisible elements behind the face surface.
//
// `effect_texture` defines the color texture to be rendered on top of the
// effect mesh. Please be aware about the difference between the CPU texture
// memory layout and the GPU texture sampler coordinate space. This renderer
// follows conventions discussed here: https://open.gl/textures
//
// Must be called in the same GL context as will be used for rendering.
//
// Both `environment` and `effect_mesh_3d` (is present) must be valid (for
// details, please refer to the proto message definition comments and/or
// `validation_utils.h/cc`). Additionally, `effect_mesh_3d`s index buffer
// elements must fit into the `uint16` type in order to be renderable.
//
// `effect_texture` must have positive dimensions. Its format must be either
// `SRGB` or `SRGBA`. Its memory must be aligned for GL usage.
absl::StatusOr<std::unique_ptr<EffectRenderer>> CreateEffectRenderer(
const Environment& environment, //
const absl::optional<Mesh3d>& effect_mesh_3d, //
ImageFrame&& effect_texture);
} // namespace mediapipe::face_geometry
#endif // MEDIAPIPE_MODULES_FACE_GEOMETRY_LIBS_EFFECT_RENDERER_H_

View File

@ -1,466 +0,0 @@
// Copyright 2020 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "mediapipe/modules/face_geometry/libs/geometry_pipeline.h"
#include <cmath>
#include <cstdint>
#include <memory>
#include <utility>
#include <vector>
#include "Eigen/Core"
#include "absl/memory/memory.h"
#include "mediapipe/framework/formats/landmark.pb.h"
#include "mediapipe/framework/formats/matrix.h"
#include "mediapipe/framework/formats/matrix_data.pb.h"
#include "mediapipe/framework/port/ret_check.h"
#include "mediapipe/framework/port/status.h"
#include "mediapipe/framework/port/status_macros.h"
#include "mediapipe/framework/port/statusor.h"
#include "mediapipe/modules/face_geometry/libs/mesh_3d_utils.h"
#include "mediapipe/modules/face_geometry/libs/procrustes_solver.h"
#include "mediapipe/modules/face_geometry/libs/validation_utils.h"
#include "mediapipe/modules/face_geometry/protos/environment.pb.h"
#include "mediapipe/modules/face_geometry/protos/face_geometry.pb.h"
#include "mediapipe/modules/face_geometry/protos/geometry_pipeline_metadata.pb.h"
#include "mediapipe/modules/face_geometry/protos/mesh_3d.pb.h"
namespace mediapipe::face_geometry {
namespace {
struct PerspectiveCameraFrustum {
// NOTE: all arguments must be validated prior to calling this constructor.
PerspectiveCameraFrustum(const PerspectiveCamera& perspective_camera,
int frame_width, int frame_height) {
static constexpr float kDegreesToRadians = 3.14159265358979323846f / 180.f;
const float height_at_near =
2.f * perspective_camera.near() *
std::tan(0.5f * kDegreesToRadians *
perspective_camera.vertical_fov_degrees());
const float width_at_near = frame_width * height_at_near / frame_height;
left = -0.5f * width_at_near;
right = 0.5f * width_at_near;
bottom = -0.5f * height_at_near;
top = 0.5f * height_at_near;
near = perspective_camera.near();
far = perspective_camera.far();
}
float left;
float right;
float bottom;
float top;
float near;
float far;
};
class ScreenToMetricSpaceConverter {
public:
ScreenToMetricSpaceConverter(
OriginPointLocation origin_point_location, //
InputSource input_source, //
Eigen::Matrix3Xf&& canonical_metric_landmarks, //
Eigen::VectorXf&& landmark_weights, //
std::unique_ptr<ProcrustesSolver> procrustes_solver)
: origin_point_location_(origin_point_location),
input_source_(input_source),
canonical_metric_landmarks_(std::move(canonical_metric_landmarks)),
landmark_weights_(std::move(landmark_weights)),
procrustes_solver_(std::move(procrustes_solver)) {}
// Converts `screen_landmark_list` into `metric_landmark_list` and estimates
// the `pose_transform_mat`.
//
// Here's the algorithm summary:
//
// (1) Project X- and Y- screen landmark coordinates at the Z near plane.
//
// (2) Estimate a canonical-to-runtime landmark set scale by running the
// Procrustes solver using the screen runtime landmarks.
//
// On this iteration, screen landmarks are used instead of unprojected
// metric landmarks as it is not safe to unproject due to the relative
// nature of the input screen landmark Z coordinate.
//
// (3) Use the canonical-to-runtime scale from (2) to unproject the screen
// landmarks. The result is referenced as "intermediate landmarks" because
// they are the first estimation of the resuling metric landmarks, but are
// not quite there yet.
//
// (4) Estimate a canonical-to-runtime landmark set scale by running the
// Procrustes solver using the intermediate runtime landmarks.
//
// (5) Use the product of the scale factors from (2) and (4) to unproject
// the screen landmarks the second time. This is the second and the final
// estimation of the metric landmarks.
//
// (6) Multiply each of the metric landmarks by the inverse pose
// transformation matrix to align the runtime metric face landmarks with
// the canonical metric face landmarks.
//
// Note: the input screen landmarks are in the left-handed coordinate system,
// however any metric landmarks - including the canonical metric
// landmarks, the final runtime metric landmarks and any intermediate
// runtime metric landmarks - are in the right-handed coordinate system.
//
// To keep the logic correct, the landmark set handedness is changed any
// time the screen-to-metric semantic barrier is passed.
absl::Status Convert(const NormalizedLandmarkList& screen_landmark_list, //
const PerspectiveCameraFrustum& pcf, //
LandmarkList& metric_landmark_list, //
Eigen::Matrix4f& pose_transform_mat) const {
RET_CHECK_EQ(screen_landmark_list.landmark_size(),
canonical_metric_landmarks_.cols())
<< "The number of landmarks doesn't match the number passed upon "
"initialization!";
Eigen::Matrix3Xf screen_landmarks;
ConvertLandmarkListToEigenMatrix(screen_landmark_list, screen_landmarks);
ProjectXY(pcf, screen_landmarks);
const float depth_offset = screen_landmarks.row(2).mean();
// 1st iteration: don't unproject XY because it's unsafe to do so due to
// the relative nature of the Z coordinate. Instead, run the
// first estimation on the projected XY and use that scale to
// unproject for the 2nd iteration.
Eigen::Matrix3Xf intermediate_landmarks(screen_landmarks);
ChangeHandedness(intermediate_landmarks);
ASSIGN_OR_RETURN(const float first_iteration_scale,
EstimateScale(intermediate_landmarks),
_ << "Failed to estimate first iteration scale!");
// 2nd iteration: unproject XY using the scale from the 1st iteration.
intermediate_landmarks = screen_landmarks;
MoveAndRescaleZ(pcf, depth_offset, first_iteration_scale,
intermediate_landmarks);
UnprojectXY(pcf, intermediate_landmarks);
ChangeHandedness(intermediate_landmarks);
// For face detection input landmarks, re-write Z-coord from the canonical
// landmarks.
if (input_source_ == InputSource::FACE_DETECTION_PIPELINE) {
Eigen::Matrix4f intermediate_pose_transform_mat;
MP_RETURN_IF_ERROR(procrustes_solver_->SolveWeightedOrthogonalProblem(
canonical_metric_landmarks_, intermediate_landmarks,
landmark_weights_, intermediate_pose_transform_mat))
<< "Failed to estimate pose transform matrix!";
intermediate_landmarks.row(2) =
(intermediate_pose_transform_mat *
canonical_metric_landmarks_.colwise().homogeneous())
.row(2);
}
ASSIGN_OR_RETURN(const float second_iteration_scale,
EstimateScale(intermediate_landmarks),
_ << "Failed to estimate second iteration scale!");
// Use the total scale to unproject the screen landmarks.
const float total_scale = first_iteration_scale * second_iteration_scale;
MoveAndRescaleZ(pcf, depth_offset, total_scale, screen_landmarks);
UnprojectXY(pcf, screen_landmarks);
ChangeHandedness(screen_landmarks);
// At this point, screen landmarks are converted into metric landmarks.
Eigen::Matrix3Xf& metric_landmarks = screen_landmarks;
MP_RETURN_IF_ERROR(procrustes_solver_->SolveWeightedOrthogonalProblem(
canonical_metric_landmarks_, metric_landmarks, landmark_weights_,
pose_transform_mat))
<< "Failed to estimate pose transform matrix!";
// For face detection input landmarks, re-write Z-coord from the canonical
// landmarks and run the pose transform estimation again.
if (input_source_ == InputSource::FACE_DETECTION_PIPELINE) {
metric_landmarks.row(2) =
(pose_transform_mat *
canonical_metric_landmarks_.colwise().homogeneous())
.row(2);
MP_RETURN_IF_ERROR(procrustes_solver_->SolveWeightedOrthogonalProblem(
canonical_metric_landmarks_, metric_landmarks, landmark_weights_,
pose_transform_mat))
<< "Failed to estimate pose transform matrix!";
}
// Multiply each of the metric landmarks by the inverse pose
// transformation matrix to align the runtime metric face landmarks with
// the canonical metric face landmarks.
metric_landmarks = (pose_transform_mat.inverse() *
metric_landmarks.colwise().homogeneous())
.topRows(3);
ConvertEigenMatrixToLandmarkList(metric_landmarks, metric_landmark_list);
return absl::OkStatus();
}
private:
void ProjectXY(const PerspectiveCameraFrustum& pcf,
Eigen::Matrix3Xf& landmarks) const {
float x_scale = pcf.right - pcf.left;
float y_scale = pcf.top - pcf.bottom;
float x_translation = pcf.left;
float y_translation = pcf.bottom;
if (origin_point_location_ == OriginPointLocation::TOP_LEFT_CORNER) {
landmarks.row(1) = 1.f - landmarks.row(1).array();
}
landmarks =
landmarks.array().colwise() * Eigen::Array3f(x_scale, y_scale, x_scale);
landmarks.colwise() += Eigen::Vector3f(x_translation, y_translation, 0.f);
}
absl::StatusOr<float> EstimateScale(Eigen::Matrix3Xf& landmarks) const {
Eigen::Matrix4f transform_mat;
MP_RETURN_IF_ERROR(procrustes_solver_->SolveWeightedOrthogonalProblem(
canonical_metric_landmarks_, landmarks, landmark_weights_,
transform_mat))
<< "Failed to estimate canonical-to-runtime landmark set transform!";
return transform_mat.col(0).norm();
}
static void MoveAndRescaleZ(const PerspectiveCameraFrustum& pcf,
float depth_offset, float scale,
Eigen::Matrix3Xf& landmarks) {
landmarks.row(2) =
(landmarks.array().row(2) - depth_offset + pcf.near) / scale;
}
static void UnprojectXY(const PerspectiveCameraFrustum& pcf,
Eigen::Matrix3Xf& landmarks) {
landmarks.row(0) =
landmarks.row(0).cwiseProduct(landmarks.row(2)) / pcf.near;
landmarks.row(1) =
landmarks.row(1).cwiseProduct(landmarks.row(2)) / pcf.near;
}
static void ChangeHandedness(Eigen::Matrix3Xf& landmarks) {
landmarks.row(2) *= -1.f;
}
static void ConvertLandmarkListToEigenMatrix(
const NormalizedLandmarkList& landmark_list,
Eigen::Matrix3Xf& eigen_matrix) {
eigen_matrix = Eigen::Matrix3Xf(3, landmark_list.landmark_size());
for (int i = 0; i < landmark_list.landmark_size(); ++i) {
const auto& landmark = landmark_list.landmark(i);
eigen_matrix(0, i) = landmark.x();
eigen_matrix(1, i) = landmark.y();
eigen_matrix(2, i) = landmark.z();
}
}
static void ConvertEigenMatrixToLandmarkList(
const Eigen::Matrix3Xf& eigen_matrix, LandmarkList& landmark_list) {
landmark_list.Clear();
for (int i = 0; i < eigen_matrix.cols(); ++i) {
auto& landmark = *landmark_list.add_landmark();
landmark.set_x(eigen_matrix(0, i));
landmark.set_y(eigen_matrix(1, i));
landmark.set_z(eigen_matrix(2, i));
}
}
const OriginPointLocation origin_point_location_;
const InputSource input_source_;
Eigen::Matrix3Xf canonical_metric_landmarks_;
Eigen::VectorXf landmark_weights_;
std::unique_ptr<ProcrustesSolver> procrustes_solver_;
};
class GeometryPipelineImpl : public GeometryPipeline {
public:
GeometryPipelineImpl(
const PerspectiveCamera& perspective_camera, //
const Mesh3d& canonical_mesh, //
uint32_t canonical_mesh_vertex_size, //
uint32_t canonical_mesh_num_vertices,
uint32_t canonical_mesh_vertex_position_offset,
std::unique_ptr<ScreenToMetricSpaceConverter> space_converter)
: perspective_camera_(perspective_camera),
canonical_mesh_(canonical_mesh),
canonical_mesh_vertex_size_(canonical_mesh_vertex_size),
canonical_mesh_num_vertices_(canonical_mesh_num_vertices),
canonical_mesh_vertex_position_offset_(
canonical_mesh_vertex_position_offset),
space_converter_(std::move(space_converter)) {}
absl::StatusOr<std::vector<FaceGeometry>> EstimateFaceGeometry(
const std::vector<NormalizedLandmarkList>& multi_face_landmarks,
int frame_width, int frame_height) const override {
MP_RETURN_IF_ERROR(ValidateFrameDimensions(frame_width, frame_height))
<< "Invalid frame dimensions!";
// Create a perspective camera frustum to be shared for geometry estimation
// per each face.
PerspectiveCameraFrustum pcf(perspective_camera_, frame_width,
frame_height);
std::vector<FaceGeometry> multi_face_geometry;
// From this point, the meaning of "face landmarks" is clarified further as
// "screen face landmarks". This is done do distinguish from "metric face
// landmarks" that are derived during the face geometry estimation process.
for (const NormalizedLandmarkList& screen_face_landmarks :
multi_face_landmarks) {
// Having a too compact screen landmark list will result in numerical
// instabilities, therefore such faces are filtered.
if (IsScreenLandmarkListTooCompact(screen_face_landmarks)) {
continue;
}
// Convert the screen landmarks into the metric landmarks and get the pose
// transformation matrix.
LandmarkList metric_face_landmarks;
Eigen::Matrix4f pose_transform_mat;
MP_RETURN_IF_ERROR(space_converter_->Convert(screen_face_landmarks, pcf,
metric_face_landmarks,
pose_transform_mat))
<< "Failed to convert landmarks from the screen to the metric space!";
// Pack geometry data for this face.
FaceGeometry face_geometry;
Mesh3d* mutable_mesh = face_geometry.mutable_mesh();
// Copy the canonical face mesh as the face geometry mesh.
mutable_mesh->CopyFrom(canonical_mesh_);
// Replace XYZ vertex mesh coodinates with the metric landmark positions.
for (int i = 0; i < canonical_mesh_num_vertices_; ++i) {
uint32_t vertex_buffer_offset = canonical_mesh_vertex_size_ * i +
canonical_mesh_vertex_position_offset_;
mutable_mesh->set_vertex_buffer(vertex_buffer_offset,
metric_face_landmarks.landmark(i).x());
mutable_mesh->set_vertex_buffer(vertex_buffer_offset + 1,
metric_face_landmarks.landmark(i).y());
mutable_mesh->set_vertex_buffer(vertex_buffer_offset + 2,
metric_face_landmarks.landmark(i).z());
}
// Populate the face pose transformation matrix.
mediapipe::MatrixDataProtoFromMatrix(
pose_transform_mat, face_geometry.mutable_pose_transform_matrix());
multi_face_geometry.push_back(face_geometry);
}
return multi_face_geometry;
}
private:
static bool IsScreenLandmarkListTooCompact(
const NormalizedLandmarkList& screen_landmarks) {
float mean_x = 0.f;
float mean_y = 0.f;
for (int i = 0; i < screen_landmarks.landmark_size(); ++i) {
const auto& landmark = screen_landmarks.landmark(i);
mean_x += (landmark.x() - mean_x) / static_cast<float>(i + 1);
mean_y += (landmark.y() - mean_y) / static_cast<float>(i + 1);
}
float max_sq_dist = 0.f;
for (const auto& landmark : screen_landmarks.landmark()) {
const float d_x = landmark.x() - mean_x;
const float d_y = landmark.y() - mean_y;
max_sq_dist = std::max(max_sq_dist, d_x * d_x + d_y * d_y);
}
static constexpr float kIsScreenLandmarkListTooCompactThreshold = 1e-3f;
return std::sqrt(max_sq_dist) <= kIsScreenLandmarkListTooCompactThreshold;
}
const PerspectiveCamera perspective_camera_;
const Mesh3d canonical_mesh_;
const uint32_t canonical_mesh_vertex_size_;
const uint32_t canonical_mesh_num_vertices_;
const uint32_t canonical_mesh_vertex_position_offset_;
std::unique_ptr<ScreenToMetricSpaceConverter> space_converter_;
};
} // namespace
absl::StatusOr<std::unique_ptr<GeometryPipeline>> CreateGeometryPipeline(
const Environment& environment, const GeometryPipelineMetadata& metadata) {
MP_RETURN_IF_ERROR(ValidateEnvironment(environment))
<< "Invalid environment!";
MP_RETURN_IF_ERROR(ValidateGeometryPipelineMetadata(metadata))
<< "Invalid geometry pipeline metadata!";
const auto& canonical_mesh = metadata.canonical_mesh();
RET_CHECK(HasVertexComponent(canonical_mesh.vertex_type(),
VertexComponent::POSITION))
<< "Canonical face mesh must have the `POSITION` vertex component!";
RET_CHECK(HasVertexComponent(canonical_mesh.vertex_type(),
VertexComponent::TEX_COORD))
<< "Canonical face mesh must have the `TEX_COORD` vertex component!";
uint32_t canonical_mesh_vertex_size =
GetVertexSize(canonical_mesh.vertex_type());
uint32_t canonical_mesh_num_vertices =
canonical_mesh.vertex_buffer_size() / canonical_mesh_vertex_size;
uint32_t canonical_mesh_vertex_position_offset =
GetVertexComponentOffset(canonical_mesh.vertex_type(),
VertexComponent::POSITION)
.value();
// Put the Procrustes landmark basis into Eigen matrices for an easier access.
Eigen::Matrix3Xf canonical_metric_landmarks =
Eigen::Matrix3Xf::Zero(3, canonical_mesh_num_vertices);
Eigen::VectorXf landmark_weights =
Eigen::VectorXf::Zero(canonical_mesh_num_vertices);
for (int i = 0; i < canonical_mesh_num_vertices; ++i) {
uint32_t vertex_buffer_offset =
canonical_mesh_vertex_size * i + canonical_mesh_vertex_position_offset;
canonical_metric_landmarks(0, i) =
canonical_mesh.vertex_buffer(vertex_buffer_offset);
canonical_metric_landmarks(1, i) =
canonical_mesh.vertex_buffer(vertex_buffer_offset + 1);
canonical_metric_landmarks(2, i) =
canonical_mesh.vertex_buffer(vertex_buffer_offset + 2);
}
for (const WeightedLandmarkRef& wlr : metadata.procrustes_landmark_basis()) {
uint32_t landmark_id = wlr.landmark_id();
landmark_weights(landmark_id) = wlr.weight();
}
std::unique_ptr<GeometryPipeline> result =
absl::make_unique<GeometryPipelineImpl>(
environment.perspective_camera(), canonical_mesh,
canonical_mesh_vertex_size, canonical_mesh_num_vertices,
canonical_mesh_vertex_position_offset,
absl::make_unique<ScreenToMetricSpaceConverter>(
environment.origin_point_location(),
metadata.input_source() == InputSource::DEFAULT
? InputSource::FACE_LANDMARK_PIPELINE
: metadata.input_source(),
std::move(canonical_metric_landmarks),
std::move(landmark_weights),
CreateFloatPrecisionProcrustesSolver()));
return result;
}
} // namespace mediapipe::face_geometry

View File

@ -1,67 +0,0 @@
// Copyright 2020 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef MEDIAPIPE_FACE_GEOMETRY_LIBS_GEOMETRY_PIPELINE_H_
#define MEDIAPIPE_FACE_GEOMETRY_LIBS_GEOMETRY_PIPELINE_H_
#include <memory>
#include <vector>
#include "mediapipe/framework/formats/landmark.pb.h"
#include "mediapipe/framework/port/statusor.h"
#include "mediapipe/modules/face_geometry/protos/environment.pb.h"
#include "mediapipe/modules/face_geometry/protos/face_geometry.pb.h"
#include "mediapipe/modules/face_geometry/protos/geometry_pipeline_metadata.pb.h"
namespace mediapipe::face_geometry {
// Encapsulates a stateless estimator of facial geometry in a Metric space based
// on the normalized face landmarks in the Screen space.
class GeometryPipeline {
public:
virtual ~GeometryPipeline() = default;
// Estimates geometry data for multiple faces.
//
// Returns an error status if any of the passed arguments is invalid.
//
// The result includes face geometry data for a subset of the input faces,
// however geometry data for some faces might be missing. This may happen if
// it'd be unstable to estimate the facial geometry based on a corresponding
// face landmark list for any reason (for example, if the landmark list is too
// compact).
//
// Each face landmark list must have the same number of landmarks as was
// passed upon initialization via the canonical face mesh (as a part of the
// geometry pipeline metadata).
//
// Both `frame_width` and `frame_height` must be positive.
virtual absl::StatusOr<std::vector<FaceGeometry>> EstimateFaceGeometry(
const std::vector<NormalizedLandmarkList>& multi_face_landmarks,
int frame_width, int frame_height) const = 0;
};
// Creates an instance of `GeometryPipeline`.
//
// Both `environment` and `metadata` must be valid (for details, please refer to
// the proto message definition comments and/or `validation_utils.h/cc`).
//
// Canonical face mesh (defined as a part of `metadata`) must have the
// `POSITION` and the `TEX_COORD` vertex components.
absl::StatusOr<std::unique_ptr<GeometryPipeline>> CreateGeometryPipeline(
const Environment& environment, const GeometryPipelineMetadata& metadata);
} // namespace mediapipe::face_geometry
#endif // MEDIAPIPE_FACE_GEOMETRY_LIBS_GEOMETRY_PIPELINE_H_

View File

@ -1,103 +0,0 @@
// Copyright 2020 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "mediapipe/modules/face_geometry/libs/mesh_3d_utils.h"
#include <cstdint>
#include <cstdlib>
#include "mediapipe/framework/port/ret_check.h"
#include "mediapipe/framework/port/statusor.h"
#include "mediapipe/modules/face_geometry/protos/mesh_3d.pb.h"
namespace mediapipe::face_geometry {
namespace {
bool HasVertexComponentVertexPT(VertexComponent vertex_component) {
switch (vertex_component) {
case VertexComponent::POSITION:
case VertexComponent::TEX_COORD:
return true;
default:
return false;
}
}
uint32_t GetVertexComponentSizeVertexPT(VertexComponent vertex_component) {
switch (vertex_component) {
case VertexComponent::POSITION:
return 3;
case VertexComponent::TEX_COORD:
return 2;
}
}
uint32_t GetVertexComponentOffsetVertexPT(VertexComponent vertex_component) {
switch (vertex_component) {
case VertexComponent::POSITION:
return 0;
case VertexComponent::TEX_COORD:
return GetVertexComponentSizeVertexPT(VertexComponent::POSITION);
}
}
} // namespace
std::size_t GetVertexSize(Mesh3d::VertexType vertex_type) {
switch (vertex_type) {
case Mesh3d::VERTEX_PT:
return GetVertexComponentSizeVertexPT(VertexComponent::POSITION) +
GetVertexComponentSizeVertexPT(VertexComponent::TEX_COORD);
}
}
std::size_t GetPrimitiveSize(Mesh3d::PrimitiveType primitive_type) {
switch (primitive_type) {
case Mesh3d::TRIANGLE:
return 3;
}
}
bool HasVertexComponent(Mesh3d::VertexType vertex_type,
VertexComponent vertex_component) {
switch (vertex_type) {
case Mesh3d::VERTEX_PT:
return HasVertexComponentVertexPT(vertex_component);
}
}
absl::StatusOr<uint32_t> GetVertexComponentOffset(
Mesh3d::VertexType vertex_type, VertexComponent vertex_component) {
RET_CHECK(HasVertexComponentVertexPT(vertex_component))
<< "A given vertex type doesn't have the requested component!";
switch (vertex_type) {
case Mesh3d::VERTEX_PT:
return GetVertexComponentOffsetVertexPT(vertex_component);
}
}
absl::StatusOr<uint32_t> GetVertexComponentSize(
Mesh3d::VertexType vertex_type, VertexComponent vertex_component) {
RET_CHECK(HasVertexComponentVertexPT(vertex_component))
<< "A given vertex type doesn't have the requested component!";
switch (vertex_type) {
case Mesh3d::VERTEX_PT:
return GetVertexComponentSizeVertexPT(vertex_component);
}
}
} // namespace mediapipe::face_geometry

View File

@ -1,51 +0,0 @@
// Copyright 2020 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef MEDIAPIPE_FACE_GEOMETRY_LIBS_MESH_3D_UTILS_H_
#define MEDIAPIPE_FACE_GEOMETRY_LIBS_MESH_3D_UTILS_H_
#include <cstdint>
#include <cstdlib>
#include "mediapipe/framework/port/statusor.h"
#include "mediapipe/modules/face_geometry/protos/mesh_3d.pb.h"
namespace mediapipe::face_geometry {
enum class VertexComponent { POSITION, TEX_COORD };
std::size_t GetVertexSize(Mesh3d::VertexType vertex_type);
std::size_t GetPrimitiveSize(Mesh3d::PrimitiveType primitive_type);
bool HasVertexComponent(Mesh3d::VertexType vertex_type,
VertexComponent vertex_component);
// Computes the vertex component offset.
//
// Returns an error status if a given vertex type doesn't have the requested
// component.
absl::StatusOr<uint32_t> GetVertexComponentOffset(
Mesh3d::VertexType vertex_type, VertexComponent vertex_component);
// Computes the vertex component size.
//
// Returns an error status if a given vertex type doesn't have the requested
// component.
absl::StatusOr<uint32_t> GetVertexComponentSize(
Mesh3d::VertexType vertex_type, VertexComponent vertex_component);
} // namespace mediapipe::face_geometry
#endif // MEDIAPIPE_FACE_GEOMETRY_LIBS_MESH_3D_UTILS_H_

View File

@ -1,266 +0,0 @@
// Copyright 2020 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "mediapipe/modules/face_geometry/libs/procrustes_solver.h"
#include <cmath>
#include <memory>
#include "Eigen/Dense"
#include "absl/memory/memory.h"
#include "mediapipe/framework/port/ret_check.h"
#include "mediapipe/framework/port/status.h"
#include "mediapipe/framework/port/status_macros.h"
#include "mediapipe/framework/port/statusor.h"
namespace mediapipe {
namespace face_geometry {
namespace {
class FloatPrecisionProcrustesSolver : public ProcrustesSolver {
public:
FloatPrecisionProcrustesSolver() = default;
absl::Status SolveWeightedOrthogonalProblem(
const Eigen::Matrix3Xf& source_points, //
const Eigen::Matrix3Xf& target_points, //
const Eigen::VectorXf& point_weights,
Eigen::Matrix4f& transform_mat) const override {
// Validate inputs.
MP_RETURN_IF_ERROR(ValidateInputPoints(source_points, target_points))
<< "Failed to validate weighted orthogonal problem input points!";
MP_RETURN_IF_ERROR(
ValidatePointWeights(source_points.cols(), point_weights))
<< "Failed to validate weighted orthogonal problem point weights!";
// Extract square root from the point weights.
Eigen::VectorXf sqrt_weights = ExtractSquareRoot(point_weights);
// Try to solve the WEOP problem.
MP_RETURN_IF_ERROR(InternalSolveWeightedOrthogonalProblem(
source_points, target_points, sqrt_weights, transform_mat))
<< "Failed to solve the WEOP problem!";
return absl::OkStatus();
}
private:
static constexpr float kAbsoluteErrorEps = 1e-9f;
static absl::Status ValidateInputPoints(
const Eigen::Matrix3Xf& source_points,
const Eigen::Matrix3Xf& target_points) {
RET_CHECK_GT(source_points.cols(), 0)
<< "The number of source points must be positive!";
RET_CHECK_EQ(source_points.cols(), target_points.cols())
<< "The number of source and target points must be equal!";
return absl::OkStatus();
}
static absl::Status ValidatePointWeights(
int num_points, const Eigen::VectorXf& point_weights) {
RET_CHECK_GT(point_weights.size(), 0)
<< "The number of point weights must be positive!";
RET_CHECK_EQ(point_weights.size(), num_points)
<< "The number of points and point weights must be equal!";
float total_weight = 0.f;
for (int i = 0; i < num_points; ++i) {
RET_CHECK_GE(point_weights(i), 0.f)
<< "Each point weight must be non-negative!";
total_weight += point_weights(i);
}
RET_CHECK_GT(total_weight, kAbsoluteErrorEps)
<< "The total point weight is too small!";
return absl::OkStatus();
}
static Eigen::VectorXf ExtractSquareRoot(
const Eigen::VectorXf& point_weights) {
Eigen::VectorXf sqrt_weights(point_weights);
for (int i = 0; i < sqrt_weights.size(); ++i) {
sqrt_weights(i) = std::sqrt(sqrt_weights(i));
}
return sqrt_weights;
}
// Combines a 3x3 rotation-and-scale matrix and a 3x1 translation vector into
// a single 4x4 transformation matrix.
static Eigen::Matrix4f CombineTransformMatrix(const Eigen::Matrix3f& r_and_s,
const Eigen::Vector3f& t) {
Eigen::Matrix4f result = Eigen::Matrix4f::Identity();
result.leftCols(3).topRows(3) = r_and_s;
result.col(3).topRows(3) = t;
return result;
}
// The weighted problem is thoroughly addressed in Section 2.4 of:
// D. Akca, Generalized Procrustes analysis and its applications
// in photogrammetry, 2003, https://doi.org/10.3929/ethz-a-004656648
//
// Notable differences in the code presented here are:
//
// * In the paper, the weights matrix W_p is Cholesky-decomposed as Q^T Q.
// Our W_p is diagonal (equal to diag(sqrt_weights^2)),
// so we can just set Q = diag(sqrt_weights) instead.
//
// * In the paper, the problem is presented as
// (for W_k = I and W_p = tranposed(Q) Q):
// || Q (c A T + j tranposed(t) - B) || -> min.
//
// We reformulate it as an equivalent minimization of the transpose's
// norm:
// || (c tranposed(T) tranposed(A) - tranposed(B)) tranposed(Q) || -> min,
// where tranposed(A) and tranposed(B) are the source and the target point
// clouds, respectively, c tranposed(T) is the rotation+scaling R sought
// for, and Q is diag(sqrt_weights).
//
// Most of the derivations are therefore transposed.
//
// Note: the output `transform_mat` argument is used instead of `StatusOr<>`
// return type in order to avoid Eigen memory alignment issues. Details:
// https://eigen.tuxfamily.org/dox/group__TopicStructHavingEigenMembers.html
static absl::Status InternalSolveWeightedOrthogonalProblem(
const Eigen::Matrix3Xf& sources, const Eigen::Matrix3Xf& targets,
const Eigen::VectorXf& sqrt_weights, Eigen::Matrix4f& transform_mat) {
// tranposed(A_w).
Eigen::Matrix3Xf weighted_sources =
sources.array().rowwise() * sqrt_weights.array().transpose();
// tranposed(B_w).
Eigen::Matrix3Xf weighted_targets =
targets.array().rowwise() * sqrt_weights.array().transpose();
// w = tranposed(j_w) j_w.
float total_weight = sqrt_weights.cwiseProduct(sqrt_weights).sum();
// Let C = (j_w tranposed(j_w)) / (tranposed(j_w) j_w).
// Note that C = tranposed(C), hence (I - C) = tranposed(I - C).
//
// tranposed(A_w) C = tranposed(A_w) j_w tranposed(j_w) / w =
// (tranposed(A_w) j_w) tranposed(j_w) / w = c_w tranposed(j_w),
//
// where c_w = tranposed(A_w) j_w / w is a k x 1 vector calculated here:
Eigen::Matrix3Xf twice_weighted_sources =
weighted_sources.array().rowwise() * sqrt_weights.array().transpose();
Eigen::Vector3f source_center_of_mass =
twice_weighted_sources.rowwise().sum() / total_weight;
// tranposed((I - C) A_w) = tranposed(A_w) (I - C) =
// tranposed(A_w) - tranposed(A_w) C = tranposed(A_w) - c_w tranposed(j_w).
Eigen::Matrix3Xf centered_weighted_sources =
weighted_sources - source_center_of_mass * sqrt_weights.transpose();
Eigen::Matrix3f rotation;
MP_RETURN_IF_ERROR(ComputeOptimalRotation(
weighted_targets * centered_weighted_sources.transpose(), rotation))
<< "Failed to compute the optimal rotation!";
ASSIGN_OR_RETURN(
float scale,
ComputeOptimalScale(centered_weighted_sources, weighted_sources,
weighted_targets, rotation),
_ << "Failed to compute the optimal scale!");
// R = c tranposed(T).
Eigen::Matrix3f rotation_and_scale = scale * rotation;
// Compute optimal translation for the weighted problem.
// tranposed(B_w - c A_w T) = tranposed(B_w) - R tranposed(A_w) in (54).
const auto pointwise_diffs =
weighted_targets - rotation_and_scale * weighted_sources;
// Multiplication by j_w is a respectively weighted column sum.
// (54) from the paper.
const auto weighted_pointwise_diffs =
pointwise_diffs.array().rowwise() * sqrt_weights.array().transpose();
Eigen::Vector3f translation =
weighted_pointwise_diffs.rowwise().sum() / total_weight;
transform_mat = CombineTransformMatrix(rotation_and_scale, translation);
return absl::OkStatus();
}
// `design_matrix` is a transposed LHS of (51) in the paper.
//
// Note: the output `rotation` argument is used instead of `StatusOr<>`
// return type in order to avoid Eigen memory alignment issues. Details:
// https://eigen.tuxfamily.org/dox/group__TopicStructHavingEigenMembers.html
static absl::Status ComputeOptimalRotation(
const Eigen::Matrix3f& design_matrix, Eigen::Matrix3f& rotation) {
RET_CHECK_GT(design_matrix.norm(), kAbsoluteErrorEps)
<< "Design matrix norm is too small!";
Eigen::JacobiSVD<Eigen::Matrix3f> svd(
design_matrix, Eigen::ComputeFullU | Eigen::ComputeFullV);
Eigen::Matrix3f postrotation = svd.matrixU();
Eigen::Matrix3f prerotation = svd.matrixV().transpose();
// Disallow reflection by ensuring that det(`rotation`) = +1 (and not -1),
// see "4.6 Constrained orthogonal Procrustes problems"
// in the Gower & Dijksterhuis's book "Procrustes Analysis".
// We flip the sign of the least singular value along with a column in W.
//
// Note that now the sum of singular values doesn't work for scale
// estimation due to this sign flip.
if (postrotation.determinant() * prerotation.determinant() <
static_cast<float>(0)) {
postrotation.col(2) *= static_cast<float>(-1);
}
// Transposed (52) from the paper.
rotation = postrotation * prerotation;
return absl::OkStatus();
}
static absl::StatusOr<float> ComputeOptimalScale(
const Eigen::Matrix3Xf& centered_weighted_sources,
const Eigen::Matrix3Xf& weighted_sources,
const Eigen::Matrix3Xf& weighted_targets,
const Eigen::Matrix3f& rotation) {
// tranposed(T) tranposed(A_w) (I - C).
const auto rotated_centered_weighted_sources =
rotation * centered_weighted_sources;
// Use the identity trace(A B) = sum(A * B^T)
// to avoid building large intermediate matrices (* is Hadamard product).
// (53) from the paper.
float numerator =
rotated_centered_weighted_sources.cwiseProduct(weighted_targets).sum();
float denominator =
centered_weighted_sources.cwiseProduct(weighted_sources).sum();
RET_CHECK_GT(denominator, kAbsoluteErrorEps)
<< "Scale expression denominator is too small!";
RET_CHECK_GT(numerator / denominator, kAbsoluteErrorEps)
<< "Scale is too small!";
return numerator / denominator;
}
};
} // namespace
std::unique_ptr<ProcrustesSolver> CreateFloatPrecisionProcrustesSolver() {
return absl::make_unique<FloatPrecisionProcrustesSolver>();
}
} // namespace face_geometry
} // namespace mediapipe

View File

@ -1,70 +0,0 @@
// Copyright 2020 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef MEDIAPIPE_FACE_GEOMETRY_LIBS_PROCRUSTES_SOLVER_H_
#define MEDIAPIPE_FACE_GEOMETRY_LIBS_PROCRUSTES_SOLVER_H_
#include <memory>
#include "Eigen/Dense"
#include "mediapipe/framework/port/status.h"
namespace mediapipe::face_geometry {
// Encapsulates a stateless solver for the Weighted Extended Orthogonal
// Procrustes (WEOP) Problem, as defined in Section 2.4 of
// https://doi.org/10.3929/ethz-a-004656648.
//
// Given the source and the target point clouds, the algorithm estimates
// a 4x4 transformation matrix featuring the following semantic components:
//
// * Uniform scale
// * Rotation
// * Translation
//
// The matrix maps the source point cloud into the target point cloud minimizing
// the Mean Squared Error.
class ProcrustesSolver {
public:
virtual ~ProcrustesSolver() = default;
// Solves the Weighted Extended Orthogonal Procrustes (WEOP) Problem.
//
// All `source_points`, `target_points` and `point_weights` must define the
// same number of points. Elements of `point_weights` must be non-negative.
//
// A too small diameter of either of the point clouds will likely lead to
// numerical instabilities and failure to estimate the transformation.
//
// A too small point cloud total weight will likely lead to numerical
// instabilities and failure to estimate the transformation too.
//
// Small point coordinate deviation for either of the point cloud will likely
// result in a failure as it will make the solution very unstable if possible.
//
// Note: the output `transform_mat` argument is used instead of `StatusOr<>`
// return type in order to avoid Eigen memory alignment issues. Details:
// https://eigen.tuxfamily.org/dox/group__TopicStructHavingEigenMembers.html
virtual absl::Status SolveWeightedOrthogonalProblem(
const Eigen::Matrix3Xf& source_points, //
const Eigen::Matrix3Xf& target_points, //
const Eigen::VectorXf& point_weights, //
Eigen::Matrix4f& transform_mat) const = 0;
};
std::unique_ptr<ProcrustesSolver> CreateFloatPrecisionProcrustesSolver();
} // namespace mediapipe::face_geometry
#endif // MEDIAPIPE_FACE_GEOMETRY_LIBS_PROCRUSTES_SOLVER_H_

View File

@ -1,126 +0,0 @@
// Copyright 2020 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "mediapipe/modules/face_geometry/libs/validation_utils.h"
#include <cstdint>
#include <cstdlib>
#include "mediapipe/framework/formats/matrix_data.pb.h"
#include "mediapipe/framework/port/ret_check.h"
#include "mediapipe/framework/port/status.h"
#include "mediapipe/framework/port/status_macros.h"
#include "mediapipe/modules/face_geometry/libs/mesh_3d_utils.h"
#include "mediapipe/modules/face_geometry/protos/environment.pb.h"
#include "mediapipe/modules/face_geometry/protos/geometry_pipeline_metadata.pb.h"
#include "mediapipe/modules/face_geometry/protos/mesh_3d.pb.h"
namespace mediapipe::face_geometry {
absl::Status ValidatePerspectiveCamera(
const PerspectiveCamera& perspective_camera) {
static constexpr float kAbsoluteErrorEps = 1e-9f;
RET_CHECK_GT(perspective_camera.near(), kAbsoluteErrorEps)
<< "Near Z must be greater than 0 with a margin of 10^{-9}!";
RET_CHECK_GT(perspective_camera.far(),
perspective_camera.near() + kAbsoluteErrorEps)
<< "Far Z must be greater than Near Z with a margin of 10^{-9}!";
RET_CHECK_GT(perspective_camera.vertical_fov_degrees(), kAbsoluteErrorEps)
<< "Vertical FOV must be positive with a margin of 10^{-9}!";
RET_CHECK_LT(perspective_camera.vertical_fov_degrees() + kAbsoluteErrorEps,
180.f)
<< "Vertical FOV must be less than 180 degrees with a margin of 10^{-9}";
return absl::OkStatus();
}
absl::Status ValidateEnvironment(const Environment& environment) {
MP_RETURN_IF_ERROR(
ValidatePerspectiveCamera(environment.perspective_camera()))
<< "Invalid perspective camera!";
return absl::OkStatus();
}
absl::Status ValidateMesh3d(const Mesh3d& mesh_3d) {
const std::size_t vertex_size = GetVertexSize(mesh_3d.vertex_type());
const std::size_t primitive_type = GetPrimitiveSize(mesh_3d.primitive_type());
RET_CHECK_EQ(mesh_3d.vertex_buffer_size() % vertex_size, 0)
<< "Vertex buffer size must a multiple of the vertex size!";
RET_CHECK_EQ(mesh_3d.index_buffer_size() % primitive_type, 0)
<< "Index buffer size must a multiple of the primitive size!";
const int num_vertices = mesh_3d.vertex_buffer_size() / vertex_size;
for (uint32_t idx : mesh_3d.index_buffer()) {
RET_CHECK_LT(idx, num_vertices)
<< "All mesh indices must refer to an existing vertex!";
}
return absl::OkStatus();
}
absl::Status ValidateFaceGeometry(const FaceGeometry& face_geometry) {
MP_RETURN_IF_ERROR(ValidateMesh3d(face_geometry.mesh())) << "Invalid mesh!";
static constexpr char kInvalid4x4MatrixMessage[] =
"Pose transformation matrix must be a 4x4 matrix!";
const MatrixData& pose_transform_matrix =
face_geometry.pose_transform_matrix();
RET_CHECK_EQ(pose_transform_matrix.rows(), 4) << kInvalid4x4MatrixMessage;
RET_CHECK_EQ(pose_transform_matrix.rows(), 4) << kInvalid4x4MatrixMessage;
RET_CHECK_EQ(pose_transform_matrix.packed_data_size(), 16)
<< kInvalid4x4MatrixMessage;
return absl::OkStatus();
}
absl::Status ValidateGeometryPipelineMetadata(
const GeometryPipelineMetadata& metadata) {
MP_RETURN_IF_ERROR(ValidateMesh3d(metadata.canonical_mesh()))
<< "Invalid canonical mesh!";
RET_CHECK_GT(metadata.procrustes_landmark_basis_size(), 0)
<< "Procrustes landmark basis must be non-empty!";
const int num_vertices =
metadata.canonical_mesh().vertex_buffer_size() /
GetVertexSize(metadata.canonical_mesh().vertex_type());
for (const WeightedLandmarkRef& wlr : metadata.procrustes_landmark_basis()) {
RET_CHECK_LT(wlr.landmark_id(), num_vertices)
<< "All Procrustes basis indices must refer to an existing canonical "
"mesh vertex!";
RET_CHECK_GE(wlr.weight(), 0.f)
<< "All Procrustes basis landmarks must have a non-negative weight!";
}
return absl::OkStatus();
}
absl::Status ValidateFrameDimensions(int frame_width, int frame_height) {
RET_CHECK_GT(frame_width, 0) << "Frame width must be positive!";
RET_CHECK_GT(frame_height, 0) << "Frame height must be positive!";
return absl::OkStatus();
}
} // namespace mediapipe::face_geometry

View File

@ -1,70 +0,0 @@
// Copyright 2020 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef MEDIAPIPE_FACE_GEOMETRY_LIBS_VALIDATION_UTILS_H_
#define MEDIAPIPE_FACE_GEOMETRY_LIBS_VALIDATION_UTILS_H_
#include "mediapipe/framework/port/status.h"
#include "mediapipe/modules/face_geometry/protos/environment.pb.h"
#include "mediapipe/modules/face_geometry/protos/face_geometry.pb.h"
#include "mediapipe/modules/face_geometry/protos/geometry_pipeline_metadata.pb.h"
#include "mediapipe/modules/face_geometry/protos/mesh_3d.pb.h"
namespace mediapipe::face_geometry {
// Validates `perspective_camera`.
//
// Near Z must be greater than 0 with a margin of `1e-9`.
// Far Z must be greater than Near Z with a margin of `1e-9`.
// Vertical FOV must be in range (0, 180) with a margin of `1e-9` on the range
// edges.
absl::Status ValidatePerspectiveCamera(
const PerspectiveCamera& perspective_camera);
// Validates `environment`.
//
// Environment's perspective camera must be valid.
absl::Status ValidateEnvironment(const Environment& environment);
// Validates `mesh_3d`.
//
// Mesh vertex buffer size must a multiple of the vertex size.
// Mesh index buffer size must a multiple of the primitive size.
// All mesh indices must reference an existing mesh vertex.
absl::Status ValidateMesh3d(const Mesh3d& mesh_3d);
// Validates `face_geometry`.
//
// Face mesh must be valid.
// Face pose transformation matrix must be a 4x4 matrix.
absl::Status ValidateFaceGeometry(const FaceGeometry& face_geometry);
// Validates `metadata`.
//
// Canonical face mesh must be valid.
// Procrustes landmark basis must be non-empty.
// All Procrustes basis indices must reference an existing canonical mesh
// vertex.
// All Procrustes basis landmarks must have a non-negative weight.
absl::Status ValidateGeometryPipelineMetadata(
const GeometryPipelineMetadata& metadata);
// Validates frame dimensions.
//
// Both frame width and frame height must be positive.
absl::Status ValidateFrameDimensions(int frame_width, int frame_height);
} // namespace mediapipe::face_geometry
#endif // MEDIAPIPE_FACE_GEOMETRY_LIBS_VALIDATION_UTILS_H_

View File

@ -1,46 +0,0 @@
# Copyright 2020 The MediaPipe Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
load("//mediapipe/framework/port:build_config.bzl", "mediapipe_proto_library")
licenses(["notice"])
package(default_visibility = ["//visibility:public"])
mediapipe_proto_library(
name = "environment_proto",
srcs = ["environment.proto"],
)
mediapipe_proto_library(
name = "face_geometry_proto",
srcs = ["face_geometry.proto"],
deps = [
":mesh_3d_proto",
"//mediapipe/framework/formats:matrix_data_proto",
],
)
mediapipe_proto_library(
name = "geometry_pipeline_metadata_proto",
srcs = ["geometry_pipeline_metadata.proto"],
deps = [
":mesh_3d_proto",
],
)
mediapipe_proto_library(
name = "mesh_3d_proto",
srcs = ["mesh_3d.proto"],
)

View File

@ -1,84 +0,0 @@
// Copyright 2020 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
syntax = "proto2";
package mediapipe.face_geometry;
option java_package = "com.google.mediapipe.modules.facegeometry";
option java_outer_classname = "EnvironmentProto";
// Defines the (0, 0) origin point location of the environment.
//
// The variation in the origin point location can be traced back to the memory
// layout of the camera video frame buffers.
//
// Usually, the memory layout for most CPU (and also some GPU) camera video
// frame buffers results in having the (0, 0) origin point located in the
// Top Left corner.
//
// On the contrary, the memory layout for most GPU camera video frame buffers
// results in having the (0, 0) origin point located in the Bottom Left corner.
//
// Let's consider the following example:
//
// (A) ---------------+
// ___ |
// | (1) | | |
// | / \ | | |
// | |---|===|-| |
// | |---| | | |
// | / \ | | |
// | | | | | |
// | | (2) |=| | |
// | | | | | |
// | |_______| |_| |
// | |@| |@| | | |
// | ___________|_|_ |
// |
// (B) ---------------+
//
// On this example, (1) and (2) have the same X coordinate regardless of the
// origin point location. However, having the origin point located at (A)
// (Top Left corner) results in (1) having a smaller Y coordinate if compared to
// (2). Similarly, having the origin point located at (B) (Bottom Left corner)
// results in (1) having a greater Y coordinate if compared to (2).
//
// Providing the correct origin point location for your environment and making
// sure all the input landmarks are in-sync with this location is crucial
// for receiving the correct output face geometry and visual renders.
enum OriginPointLocation {
BOTTOM_LEFT_CORNER = 1;
TOP_LEFT_CORNER = 2;
}
// The perspective camera is defined through its vertical FOV angle and the
// Z-clipping planes. The aspect ratio is a runtime variable for the face
// geometry module and should be provided alongside the face landmarks in order
// to estimate the face geometry on a given frame.
//
// More info on Perspective Cameras:
// http://www.songho.ca/opengl/gl_projectionmatrix.html#perspective
message PerspectiveCamera {
// `0 < vertical_fov_degrees < 180`.
optional float vertical_fov_degrees = 1;
// `0 < near < far`.
optional float near = 2;
optional float far = 3;
}
message Environment {
optional OriginPointLocation origin_point_location = 1;
optional PerspectiveCamera perspective_camera = 2;
}

View File

@ -1,60 +0,0 @@
// Copyright 2020 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
syntax = "proto2";
package mediapipe.face_geometry;
import "mediapipe/framework/formats/matrix_data.proto";
import "mediapipe/modules/face_geometry/protos/mesh_3d.proto";
option java_package = "com.google.mediapipe.modules.facegeometry";
option java_outer_classname = "FaceGeometryProto";
// Defines the face geometry pipeline estimation result format.
message FaceGeometry {
// Defines a mesh surface for a face. The face mesh vertex IDs are the same as
// the face landmark IDs.
//
// XYZ coordinates exist in the right-handed Metric 3D space configured by an
// environment. UV coodinates are taken from the canonical face mesh model.
//
// XY coordinates are guaranteed to match the screen positions of
// the input face landmarks after (1) being multiplied by the face pose
// transformation matrix and then (2) being projected with a perspective
// camera matrix of the same environment.
//
// NOTE: the triangular topology of the face mesh is only useful when derived
// from the 468 face landmarks, not from the 6 face detection landmarks
// (keypoints). The former don't cover the entire face and this mesh is
// defined here only to comply with the API. It should be considered as
// a placeholder and/or for debugging purposes.
//
// Use the face geometry derived from the face detection landmarks
// (keypoints) for the face pose transformation matrix, not the mesh.
optional Mesh3d mesh = 1;
// Defines a face pose transformation matrix, which provides mapping from
// the static canonical face model to the runtime face. Tries to distinguish
// a head pose change from a facial expression change and to only reflect the
// former.
//
// Is a 4x4 matrix and contains only the following components:
// * Uniform scale
// * Rotation
// * Translation
//
// The last row is guaranteed to be `[0 0 0 1]`.
optional MatrixData pose_transform_matrix = 2;
}

View File

@ -1,63 +0,0 @@
// Copyright 2020 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
syntax = "proto2";
package mediapipe.face_geometry;
import "mediapipe/modules/face_geometry/protos/mesh_3d.proto";
option java_package = "com.google.mediapipe.modules.facegeometry";
option java_outer_classname = "GeometryPipelineMetadataProto";
enum InputSource {
DEFAULT = 0; // FACE_LANDMARK_PIPELINE
FACE_LANDMARK_PIPELINE = 1;
FACE_DETECTION_PIPELINE = 2;
}
message WeightedLandmarkRef {
// Defines the landmark ID. References an existing face landmark ID.
optional uint32 landmark_id = 1;
// Defines the landmark weight. The larger the weight the more influence this
// landmark has in the basis.
//
// Is positive.
optional float weight = 2;
}
// Next field ID: 4
message GeometryPipelineMetadata {
// Defines the source of the input landmarks to let the underlying geometry
// pipeline to adjust in order to produce the best results.
//
// Face landmark pipeline is expected to produce 3D landmarks with relative Z
// coordinate, which is scaled as the X coordinate assuming the weak
// perspective projection camera model.
//
// Face landmark pipeline is expected to produce 2D landmarks with Z
// coordinate being equal to 0.
optional InputSource input_source = 3;
// Defines a mesh surface for a canonical face. The canonical face mesh vertex
// IDs are the same as the face landmark IDs.
//
// XYZ coordinates are defined in centimeter units.
optional Mesh3d canonical_mesh = 1;
// Defines a weighted landmark basis for running the Procrustes solver
// algorithm inside the geometry pipeline.
//
// A good basis sets face landmark weights in way to distinguish a head pose
// change from a facial expression change and to only respond to the former.
repeated WeightedLandmarkRef procrustes_landmark_basis = 2;
}

View File

@ -1,41 +0,0 @@
// Copyright 2020 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
syntax = "proto2";
package mediapipe.face_geometry;
option java_package = "com.google.mediapipe.modules.facegeometry";
option java_outer_classname = "Mesh3dProto";
message Mesh3d {
enum VertexType {
// Is defined by 5 coordinates: Position (XYZ) + Texture coordinate (UV).
VERTEX_PT = 0;
}
enum PrimitiveType {
// Is defined by 3 indices: triangle vertex IDs.
TRIANGLE = 0;
}
optional VertexType vertex_type = 1;
optional PrimitiveType primitive_type = 2;
// Vertex buffer size is a multiple of the vertex size (e.g., 5 for
// VERTEX_PT).
repeated float vertex_buffer = 3;
// Index buffer size is a multiple of the primitive size (e.g., 3 for
// TRIANGLE).
repeated uint32 index_buffer = 4;
}

View File

@ -1,190 +0,0 @@
# Copyright 2019 The MediaPipe Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
load(
"//mediapipe/framework/tool:mediapipe_graph.bzl",
"mediapipe_simple_subgraph",
)
licenses(["notice"])
package(default_visibility = ["//visibility:public"])
mediapipe_simple_subgraph(
name = "face_landmark_cpu",
graph = "face_landmark_cpu.pbtxt",
register_as = "FaceLandmarkCpu",
deps = [
":face_landmarks_model_loader",
":tensors_to_face_landmarks",
":tensors_to_face_landmarks_with_attention",
"//mediapipe/calculators/core:gate_calculator",
"//mediapipe/calculators/core:split_vector_calculator",
"//mediapipe/calculators/tensor:image_to_tensor_calculator",
"//mediapipe/calculators/tensor:inference_calculator",
"//mediapipe/calculators/tensor:tensors_to_floats_calculator",
"//mediapipe/calculators/tensor:tensors_to_landmarks_calculator",
"//mediapipe/calculators/tflite:tflite_custom_op_resolver_calculator",
"//mediapipe/calculators/util:landmark_projection_calculator",
"//mediapipe/calculators/util:thresholding_calculator",
"//mediapipe/framework/tool:switch_container",
],
)
mediapipe_simple_subgraph(
name = "face_landmark_gpu",
graph = "face_landmark_gpu.pbtxt",
register_as = "FaceLandmarkGpu",
deps = [
":face_landmarks_model_loader",
":tensors_to_face_landmarks",
":tensors_to_face_landmarks_with_attention",
"//mediapipe/calculators/core:gate_calculator",
"//mediapipe/calculators/core:split_vector_calculator",
"//mediapipe/calculators/tensor:image_to_tensor_calculator",
"//mediapipe/calculators/tensor:inference_calculator",
"//mediapipe/calculators/tensor:tensors_to_floats_calculator",
"//mediapipe/calculators/tensor:tensors_to_landmarks_calculator",
"//mediapipe/calculators/tflite:tflite_custom_op_resolver_calculator",
"//mediapipe/calculators/util:landmark_projection_calculator",
"//mediapipe/calculators/util:thresholding_calculator",
"//mediapipe/framework/tool:switch_container",
],
)
mediapipe_simple_subgraph(
name = "face_landmark_front_cpu",
graph = "face_landmark_front_cpu.pbtxt",
register_as = "FaceLandmarkFrontCpu",
deps = [
":face_detection_front_detection_to_roi",
":face_landmark_cpu",
":face_landmark_landmarks_to_roi",
"//mediapipe/calculators/core:begin_loop_calculator",
"//mediapipe/calculators/core:clip_vector_size_calculator",
"//mediapipe/calculators/core:constant_side_packet_calculator",
"//mediapipe/calculators/core:end_loop_calculator",
"//mediapipe/calculators/core:gate_calculator",
"//mediapipe/calculators/core:previous_loopback_calculator",
"//mediapipe/calculators/image:image_properties_calculator",
"//mediapipe/calculators/util:association_norm_rect_calculator",
"//mediapipe/calculators/util:collection_has_min_size_calculator",
"//mediapipe/modules/face_detection:face_detection_short_range_cpu",
],
)
mediapipe_simple_subgraph(
name = "face_landmark_front_gpu",
graph = "face_landmark_front_gpu.pbtxt",
register_as = "FaceLandmarkFrontGpu",
deps = [
":face_detection_front_detection_to_roi",
":face_landmark_gpu",
":face_landmark_landmarks_to_roi",
"//mediapipe/calculators/core:begin_loop_calculator",
"//mediapipe/calculators/core:clip_vector_size_calculator",
"//mediapipe/calculators/core:constant_side_packet_calculator",
"//mediapipe/calculators/core:end_loop_calculator",
"//mediapipe/calculators/core:gate_calculator",
"//mediapipe/calculators/core:previous_loopback_calculator",
"//mediapipe/calculators/image:image_properties_calculator",
"//mediapipe/calculators/util:association_norm_rect_calculator",
"//mediapipe/calculators/util:collection_has_min_size_calculator",
"//mediapipe/modules/face_detection:face_detection_short_range_gpu",
],
)
mediapipe_simple_subgraph(
name = "face_landmark_front_cpu_image",
graph = "face_landmark_front_cpu_image.pbtxt",
register_as = "FaceLandmarkFrontCpuImage",
deps = [
":face_landmark_front_cpu",
"//mediapipe/calculators/core:flow_limiter_calculator",
"//mediapipe/calculators/image:image_transformation_calculator",
"//mediapipe/calculators/util:from_image_calculator",
],
)
mediapipe_simple_subgraph(
name = "face_landmark_front_gpu_image",
graph = "face_landmark_front_gpu_image.pbtxt",
register_as = "FaceLandmarkFrontGpuImage",
deps = [
":face_landmark_front_gpu",
"//mediapipe/calculators/core:flow_limiter_calculator",
"//mediapipe/calculators/image:image_transformation_calculator",
"//mediapipe/calculators/util:from_image_calculator",
],
)
exports_files(
srcs = [
"face_landmark.tflite",
"face_landmark_with_attention.tflite",
],
)
mediapipe_simple_subgraph(
name = "face_detection_front_detection_to_roi",
graph = "face_detection_front_detection_to_roi.pbtxt",
register_as = "FaceDetectionFrontDetectionToRoi",
deps = [
"//mediapipe/calculators/util:detections_to_rects_calculator",
"//mediapipe/calculators/util:rect_transformation_calculator",
],
)
mediapipe_simple_subgraph(
name = "face_landmark_landmarks_to_roi",
graph = "face_landmark_landmarks_to_roi.pbtxt",
register_as = "FaceLandmarkLandmarksToRoi",
deps = [
"//mediapipe/calculators/util:detections_to_rects_calculator",
"//mediapipe/calculators/util:landmarks_to_detection_calculator",
"//mediapipe/calculators/util:rect_transformation_calculator",
],
)
mediapipe_simple_subgraph(
name = "face_landmarks_model_loader",
graph = "face_landmarks_model_loader.pbtxt",
register_as = "FaceLandmarksModelLoader",
deps = [
"//mediapipe/calculators/core:constant_side_packet_calculator",
"//mediapipe/calculators/tflite:tflite_model_calculator",
"//mediapipe/calculators/util:local_file_contents_calculator",
"//mediapipe/framework/tool:switch_container",
],
)
mediapipe_simple_subgraph(
name = "tensors_to_face_landmarks",
graph = "tensors_to_face_landmarks.pbtxt",
register_as = "TensorsToFaceLandmarks",
deps = [
"//mediapipe/calculators/tensor:tensors_to_landmarks_calculator",
],
)
mediapipe_simple_subgraph(
name = "tensors_to_face_landmarks_with_attention",
graph = "tensors_to_face_landmarks_with_attention.pbtxt",
register_as = "TensorsToFaceLandmarksWithAttention",
deps = [
"//mediapipe/calculators/core:split_vector_calculator",
"//mediapipe/calculators/tensor:tensors_to_landmarks_calculator",
"//mediapipe/calculators/util:landmarks_refinement_calculator",
],
)

View File

@ -1,9 +0,0 @@
# face_landmark
Subgraphs|Details
:--- | :---
[`FaceLandmarkCpu`](https://github.com/google/mediapipe/tree/master/mediapipe/modules/face_landmark/face_landmark_cpu.pbtxt)| Detects landmarks on a single face. (CPU input, and inference is executed on CPU.)
[`FaceLandmarkGpu`](https://github.com/google/mediapipe/tree/master/mediapipe/modules/face_landmark/face_landmark_gpu.pbtxt)| Detects landmarks on a single face. (GPU input, and inference is executed on GPU)
[`FaceLandmarkFrontCpu`](https://github.com/google/mediapipe/tree/master/mediapipe/modules/face_landmark/face_landmark_front_cpu.pbtxt)| Detects and tracks landmarks on multiple faces. (CPU input, and inference is executed on CPU)
[`FaceLandmarkFrontGpu`](https://github.com/google/mediapipe/tree/master/mediapipe/modules/face_landmark/face_landmark_front_gpu.pbtxt)| Detects and tracks landmarks on multiple faces. (GPU input, and inference is executed on GPU.)

View File

@ -1,47 +0,0 @@
# MediaPipe graph to calculate face region of interest (ROI) from the very
# first face detection in the vector of detections provided by
# "FaceDetectionShortRangeCpu" or "FaceDetectionShortRangeGpu"
#
# NOTE: this graph is subject to change and should not be used directly.
type: "FaceDetectionFrontDetectionToRoi"
# Face detection. (Detection)
input_stream: "DETECTION:detection"
# Frame size (width and height). (std::pair<int, int>)
input_stream: "IMAGE_SIZE:image_size"
# ROI according to the first detection of input detections. (NormalizedRect)
output_stream: "ROI:roi"
# Converts results of face detection into a rectangle (normalized by image size)
# that encloses the face and is rotated such that the line connecting left eye
# and right eye is aligned with the X-axis of the rectangle.
node {
calculator: "DetectionsToRectsCalculator"
input_stream: "DETECTION:detection"
input_stream: "IMAGE_SIZE:image_size"
output_stream: "NORM_RECT:initial_roi"
options: {
[mediapipe.DetectionsToRectsCalculatorOptions.ext] {
rotation_vector_start_keypoint_index: 0 # Left eye.
rotation_vector_end_keypoint_index: 1 # Right eye.
rotation_vector_target_angle_degrees: 0
}
}
}
# Expands and shifts the rectangle that contains the face so that it's likely
# to cover the entire face.
node {
calculator: "RectTransformationCalculator"
input_stream: "NORM_RECT:initial_roi"
input_stream: "IMAGE_SIZE:image_size"
output_stream: "roi"
options: {
[mediapipe.RectTransformationCalculatorOptions.ext] {
scale_x: 1.5
scale_y: 1.5
square_long: true
}
}
}

View File

@ -1,184 +0,0 @@
# MediaPipe graph to detect/predict face landmarks. (CPU input, and inference is
# executed on CPU.)
#
# It is required that "face_landmark.tflite" is available at
# "mediapipe/modules/face_landmark/face_landmark.tflite"
# path during execution if `with_attention` is not set or set to `false`.
#
# It is required that "face_landmark_with_attention.tflite" is available at
# "mediapipe/modules/face_landmark/face_landmark_with_attention.tflite"
# path during execution if `with_attention` is set to `true`.
#
# EXAMPLE:
# node {
# calculator: "FaceLandmarkCpu"
# input_stream: "IMAGE:image"
# input_stream: "ROI:face_roi"
# input_side_packet: "WITH_ATTENTION:with_attention"
# output_stream: "LANDMARKS:face_landmarks"
# }
type: "FaceLandmarkCpu"
# CPU image. (ImageFrame)
input_stream: "IMAGE:image"
# ROI (region of interest) within the given image where a face is located.
# (NormalizedRect)
input_stream: "ROI:roi"
# Whether to run face mesh model with attention on lips and eyes. (bool)
# Attention provides more accuracy on lips and eye regions as well as iris
# landmarks.
input_side_packet: "WITH_ATTENTION:with_attention"
# 468 or 478 facial landmarks within the given ROI. (NormalizedLandmarkList)
#
# Number of landmarks depends on the WITH_ATTENTION flag. If it's `true` - then
# there will be 478 landmarks with refined lips, eyes and irises (10 extra
# landmarks are for irises), otherwise 468 non-refined landmarks are returned.
#
# NOTE: if a face is not present within the given ROI, for this particular
# timestamp there will not be an output packet in the LANDMARKS stream. However,
# the MediaPipe framework will internally inform the downstream calculators of
# the absence of this packet so that they don't wait for it unnecessarily.
output_stream: "LANDMARKS:face_landmarks"
# Transforms the input image into a 192x192 tensor.
node: {
calculator: "ImageToTensorCalculator"
input_stream: "IMAGE:image"
input_stream: "NORM_RECT:roi"
output_stream: "TENSORS:input_tensors"
options: {
[mediapipe.ImageToTensorCalculatorOptions.ext] {
output_tensor_width: 192
output_tensor_height: 192
output_tensor_float_range {
min: 0.0
max: 1.0
}
}
}
}
# Loads the face landmarks TF Lite model.
node {
calculator: "FaceLandmarksModelLoader"
input_side_packet: "WITH_ATTENTION:with_attention"
output_side_packet: "MODEL:model"
}
# Generates a single side packet containing a TensorFlow Lite op resolver that
# supports custom ops needed by the model used in this graph.
node {
calculator: "TfLiteCustomOpResolverCalculator"
output_side_packet: "op_resolver"
}
# Runs a TensorFlow Lite model on CPU that takes an image tensor and outputs a
# vector of tensors representing, for instance, detection boxes/keypoints and
# scores.
node {
calculator: "InferenceCalculator"
input_stream: "TENSORS:input_tensors"
input_side_packet: "MODEL:model"
input_side_packet: "CUSTOM_OP_RESOLVER:op_resolver"
output_stream: "TENSORS:output_tensors"
options: {
[mediapipe.InferenceCalculatorOptions.ext] {
delegate { xnnpack {} }
}
}
}
# Splits a vector of tensors into landmark tensors and face flag tensor.
node {
calculator: "SwitchContainer"
input_side_packet: "ENABLE:with_attention"
input_stream: "output_tensors"
output_stream: "landmark_tensors"
output_stream: "face_flag_tensor"
options: {
[mediapipe.SwitchContainerOptions.ext] {
contained_node: {
calculator: "SplitTensorVectorCalculator"
options: {
[mediapipe.SplitVectorCalculatorOptions.ext] {
ranges: { begin: 0 end: 1 }
ranges: { begin: 1 end: 2 }
}
}
}
contained_node: {
calculator: "SplitTensorVectorCalculator"
options: {
[mediapipe.SplitVectorCalculatorOptions.ext] {
ranges: { begin: 0 end: 6 }
ranges: { begin: 6 end: 7 }
}
}
}
}
}
}
# Converts the face-flag tensor into a float that represents the confidence
# score of face presence.
node {
calculator: "TensorsToFloatsCalculator"
input_stream: "TENSORS:face_flag_tensor"
output_stream: "FLOAT:face_presence_score"
options {
[mediapipe.TensorsToFloatsCalculatorOptions.ext] {
activation: SIGMOID
}
}
}
# Applies a threshold to the confidence score to determine whether a face is
# present.
node {
calculator: "ThresholdingCalculator"
input_stream: "FLOAT:face_presence_score"
output_stream: "FLAG:face_presence"
options: {
[mediapipe.ThresholdingCalculatorOptions.ext] {
threshold: 0.5
}
}
}
# Drop landmarks tensors if face is not present.
node {
calculator: "GateCalculator"
input_stream: "landmark_tensors"
input_stream: "ALLOW:face_presence"
output_stream: "ensured_landmark_tensors"
}
# Decodes the landmark tensors into a vector of landmarks, where the landmark
# coordinates are normalized by the size of the input image to the model.
node {
calculator: "SwitchContainer"
input_side_packet: "ENABLE:with_attention"
input_stream: "TENSORS:ensured_landmark_tensors"
output_stream: "LANDMARKS:landmarks"
options: {
[mediapipe.SwitchContainerOptions.ext] {
contained_node: {
calculator: "TensorsToFaceLandmarks"
}
contained_node: {
calculator: "TensorsToFaceLandmarksWithAttention"
}
}
}
}
# Projects the landmarks from the cropped face image to the corresponding
# locations on the full image before cropping (input to the graph).
node {
calculator: "LandmarkProjectionCalculator"
input_stream: "NORM_LANDMARKS:landmarks"
input_stream: "NORM_RECT:roi"
output_stream: "NORM_LANDMARKS:face_landmarks"
}

View File

@ -1,247 +0,0 @@
# MediaPipe graph to detect/predict face landmarks. (CPU input, and inference is
# executed on CPU.) This graph tries to skip face detection as much as possible
# by using previously detected/predicted landmarks for new images.
#
# It is required that "face_detection_short_range.tflite" is available at
# "mediapipe/modules/face_detection/face_detection_short_range.tflite"
# path during execution.
#
# It is required that "face_landmark.tflite" is available at
# "mediapipe/modules/face_landmark/face_landmark.tflite"
# path during execution if `with_attention` is not set or set to `false`.
#
# It is required that "face_landmark_with_attention.tflite" is available at
# "mediapipe/modules/face_landmark/face_landmark_with_attention.tflite"
# path during execution if `with_attention` is set to `true`.
#
# EXAMPLE:
# node {
# calculator: "FaceLandmarkFrontCpu"
# input_stream: "IMAGE:image"
# input_side_packet: "NUM_FACES:num_faces"
# input_side_packet: "USE_PREV_LANDMARKS:use_prev_landmarks"
# input_side_packet: "WITH_ATTENTION:with_attention"
# output_stream: "LANDMARKS:multi_face_landmarks"
# }
type: "FaceLandmarkFrontCpu"
# CPU image. (ImageFrame)
input_stream: "IMAGE:image"
# Max number of faces to detect/track. (int)
input_side_packet: "NUM_FACES:num_faces"
# Whether landmarks on the previous image should be used to help localize
# landmarks on the current image. (bool)
input_side_packet: "USE_PREV_LANDMARKS:use_prev_landmarks"
# Whether to run face mesh model with attention on lips and eyes. (bool)
# Attention provides more accuracy on lips and eye regions as well as iris
# landmarks.
input_side_packet: "WITH_ATTENTION:with_attention"
# Collection of detected/predicted faces, each represented as a list of 468 face
# landmarks. (std::vector<NormalizedLandmarkList>)
# NOTE: there will not be an output packet in the LANDMARKS stream for this
# particular timestamp if none of faces detected. However, the MediaPipe
# framework will internally inform the downstream calculators of the absence of
# this packet so that they don't wait for it unnecessarily.
output_stream: "LANDMARKS:multi_face_landmarks"
# Extra outputs (for debugging, for instance).
# Detected faces. (std::vector<Detection>)
output_stream: "DETECTIONS:face_detections"
# Regions of interest calculated based on landmarks.
# (std::vector<NormalizedRect>)
output_stream: "ROIS_FROM_LANDMARKS:face_rects_from_landmarks"
# Regions of interest calculated based on face detections.
# (std::vector<NormalizedRect>)
output_stream: "ROIS_FROM_DETECTIONS:face_rects_from_detections"
# When the optional input side packet "use_prev_landmarks" is either absent or
# set to true, uses the landmarks on the previous image to help localize
# landmarks on the current image.
node {
calculator: "GateCalculator"
input_side_packet: "ALLOW:use_prev_landmarks"
input_stream: "prev_face_rects_from_landmarks"
output_stream: "gated_prev_face_rects_from_landmarks"
options: {
[mediapipe.GateCalculatorOptions.ext] {
allow: true
}
}
}
# Determines if an input vector of NormalizedRect has a size greater than or
# equal to the provided num_faces.
node {
calculator: "NormalizedRectVectorHasMinSizeCalculator"
input_stream: "ITERABLE:gated_prev_face_rects_from_landmarks"
input_side_packet: "num_faces"
output_stream: "prev_has_enough_faces"
}
# Drops the incoming image if enough faces have already been identified from the
# previous image. Otherwise, passes the incoming image through to trigger a new
# round of face detection.
node {
calculator: "GateCalculator"
input_stream: "image"
input_stream: "DISALLOW:prev_has_enough_faces"
output_stream: "gated_image"
options: {
[mediapipe.GateCalculatorOptions.ext] {
empty_packets_as_allow: true
}
}
}
# Detects faces.
node {
calculator: "FaceDetectionShortRangeCpu"
input_stream: "IMAGE:gated_image"
output_stream: "DETECTIONS:all_face_detections"
}
# Makes sure there are no more detections than the provided num_faces.
node {
calculator: "ClipDetectionVectorSizeCalculator"
input_stream: "all_face_detections"
output_stream: "face_detections"
input_side_packet: "num_faces"
}
# Calculate size of the image.
node {
calculator: "ImagePropertiesCalculator"
input_stream: "IMAGE:gated_image"
output_stream: "SIZE:gated_image_size"
}
# Outputs each element of face_detections at a fake timestamp for the rest of
# the graph to process. Clones the image size packet for each face_detection at
# the fake timestamp. At the end of the loop, outputs the BATCH_END timestamp
# for downstream calculators to inform them that all elements in the vector have
# been processed.
node {
calculator: "BeginLoopDetectionCalculator"
input_stream: "ITERABLE:face_detections"
input_stream: "CLONE:gated_image_size"
output_stream: "ITEM:face_detection"
output_stream: "CLONE:detections_loop_image_size"
output_stream: "BATCH_END:detections_loop_end_timestamp"
}
# Calculates region of interest based on face detections, so that can be used
# to detect landmarks.
node {
calculator: "FaceDetectionFrontDetectionToRoi"
input_stream: "DETECTION:face_detection"
input_stream: "IMAGE_SIZE:detections_loop_image_size"
output_stream: "ROI:face_rect_from_detection"
}
# Collects a NormalizedRect for each face into a vector. Upon receiving the
# BATCH_END timestamp, outputs the vector of NormalizedRect at the BATCH_END
# timestamp.
node {
calculator: "EndLoopNormalizedRectCalculator"
input_stream: "ITEM:face_rect_from_detection"
input_stream: "BATCH_END:detections_loop_end_timestamp"
output_stream: "ITERABLE:face_rects_from_detections"
}
# Performs association between NormalizedRect vector elements from previous
# image and rects based on face detections from the current image. This
# calculator ensures that the output face_rects vector doesn't contain
# overlapping regions based on the specified min_similarity_threshold.
node {
calculator: "AssociationNormRectCalculator"
input_stream: "face_rects_from_detections"
input_stream: "gated_prev_face_rects_from_landmarks"
output_stream: "face_rects"
options: {
[mediapipe.AssociationCalculatorOptions.ext] {
min_similarity_threshold: 0.5
}
}
}
# Calculate size of the image.
node {
calculator: "ImagePropertiesCalculator"
input_stream: "IMAGE:image"
output_stream: "SIZE:image_size"
}
# Outputs each element of face_rects at a fake timestamp for the rest of the
# graph to process. Clones image and image size packets for each
# single_face_rect at the fake timestamp. At the end of the loop, outputs the
# BATCH_END timestamp for downstream calculators to inform them that all
# elements in the vector have been processed.
node {
calculator: "BeginLoopNormalizedRectCalculator"
input_stream: "ITERABLE:face_rects"
input_stream: "CLONE:0:image"
input_stream: "CLONE:1:image_size"
output_stream: "ITEM:face_rect"
output_stream: "CLONE:0:landmarks_loop_image"
output_stream: "CLONE:1:landmarks_loop_image_size"
output_stream: "BATCH_END:landmarks_loop_end_timestamp"
}
# Detects face landmarks within specified region of interest of the image.
node {
calculator: "FaceLandmarkCpu"
input_stream: "IMAGE:landmarks_loop_image"
input_stream: "ROI:face_rect"
input_side_packet: "WITH_ATTENTION:with_attention"
output_stream: "LANDMARKS:face_landmarks"
}
# Calculates region of interest based on face landmarks, so that can be reused
# for subsequent image.
node {
calculator: "FaceLandmarkLandmarksToRoi"
input_stream: "LANDMARKS:face_landmarks"
input_stream: "IMAGE_SIZE:landmarks_loop_image_size"
output_stream: "ROI:face_rect_from_landmarks"
}
# Collects a set of landmarks for each face into a vector. Upon receiving the
# BATCH_END timestamp, outputs the vector of landmarks at the BATCH_END
# timestamp.
node {
calculator: "EndLoopNormalizedLandmarkListVectorCalculator"
input_stream: "ITEM:face_landmarks"
input_stream: "BATCH_END:landmarks_loop_end_timestamp"
output_stream: "ITERABLE:multi_face_landmarks"
}
# Collects a NormalizedRect for each face into a vector. Upon receiving the
# BATCH_END timestamp, outputs the vector of NormalizedRect at the BATCH_END
# timestamp.
node {
calculator: "EndLoopNormalizedRectCalculator"
input_stream: "ITEM:face_rect_from_landmarks"
input_stream: "BATCH_END:landmarks_loop_end_timestamp"
output_stream: "ITERABLE:face_rects_from_landmarks"
}
# Caches face rects calculated from landmarks, and upon the arrival of the next
# input image, sends out the cached rects with timestamps replaced by that of
# the input image, essentially generating a packet that carries the previous
# face rects. Note that upon the arrival of the very first input image, a
# timestamp bound update occurs to jump start the feedback loop.
node {
calculator: "PreviousLoopbackCalculator"
input_stream: "MAIN:image"
input_stream: "LOOP:face_rects_from_landmarks"
input_stream_info: {
tag_index: "LOOP"
back_edge: true
}
output_stream: "PREV_LOOP:prev_face_rects_from_landmarks"
}

View File

@ -1,87 +0,0 @@
# MediaPipe graph to detect/predict face landmarks on CPU.
type: "FaceLandmarkFrontCpuImage"
# Input image. (Image)
input_stream: "IMAGE:image"
# Max number of faces to detect/track. (int)
input_side_packet: "NUM_FACES:num_faces"
# Whether landmarks on the previous image should be used to help localize
# landmarks on the current image. (bool)
input_side_packet: "USE_PREV_LANDMARKS:use_prev_landmarks"
# Whether to run face mesh model with attention on lips and eyes. (bool)
# Attention provides more accuracy on lips and eye regions as well as iris
# landmarks.
input_side_packet: "WITH_ATTENTION:with_attention"
# The throttled input image. (Image)
output_stream: "IMAGE:throttled_image"
# Collection of detected/predicted faces, each represented as a list of 468 face
# landmarks. (std::vector<NormalizedLandmarkList>)
# NOTE: there will not be an output packet in the LANDMARKS stream for this
# particular timestamp if none of faces detected. However, the MediaPipe
# framework will internally inform the downstream calculators of the absence of
# this packet so that they don't wait for it unnecessarily.
output_stream: "LANDMARKS:multi_face_landmarks"
# Extra outputs (for debugging, for instance).
# Detected faces. (std::vector<Detection>)
output_stream: "DETECTIONS:face_detections"
# Regions of interest calculated based on landmarks.
# (std::vector<NormalizedRect>)
output_stream: "ROIS_FROM_LANDMARKS:face_rects_from_landmarks"
# Regions of interest calculated based on face detections.
# (std::vector<NormalizedRect>)
output_stream: "ROIS_FROM_DETECTIONS:face_rects_from_detections"
node {
calculator: "FlowLimiterCalculator"
input_stream: "image"
input_stream: "FINISHED:multi_face_landmarks"
input_stream_info: {
tag_index: "FINISHED"
back_edge: true
}
output_stream: "throttled_image"
options: {
[mediapipe.FlowLimiterCalculatorOptions.ext] {
max_in_flight: 1
max_in_queue: 1
}
}
}
# Converts Image to ImageFrame for FaceLandmarkFrontCpu to consume.
node {
calculator: "FromImageCalculator"
input_stream: "IMAGE:throttled_image"
output_stream: "IMAGE_CPU:raw_image_frame"
output_stream: "SOURCE_ON_GPU:is_gpu_image"
}
# TODO: Remove the extra flipping once adopting MlImage.
# If the source images are on gpu, flip the data vertically before sending them
# into FaceLandmarkFrontCpu. This maybe needed because OpenGL represents images
# assuming the image origin is at the bottom-left corner, whereas MediaPipe in
# general assumes the image origin is at the top-left corner.
node: {
calculator: "ImageTransformationCalculator"
input_stream: "IMAGE:raw_image_frame"
input_stream: "FLIP_VERTICALLY:is_gpu_image"
output_stream: "IMAGE:image_frame"
}
node {
calculator: "FaceLandmarkFrontCpu"
input_stream: "IMAGE:image_frame"
input_side_packet: "NUM_FACES:num_faces"
input_side_packet: "USE_PREV_LANDMARKS:use_prev_landmarks"
input_side_packet: "WITH_ATTENTION:with_attention"
output_stream: "LANDMARKS:multi_face_landmarks"
output_stream: "DETECTIONS:face_detections"
output_stream: "ROIS_FROM_LANDMARKS:face_rects_from_landmarks"
output_stream: "ROIS_FROM_DETECTIONS:face_rects_from_detections"
}

View File

@ -1,247 +0,0 @@
# MediaPipe graph to detect/predict face landmarks. (GPU input, and inference is
# executed on GPU.) This graph tries to skip face detection as much as possible
# by using previously detected/predicted landmarks for new images.
#
# It is required that "face_detection_short_range.tflite" is available at
# "mediapipe/modules/face_detection/face_detection_short_range.tflite"
# path during execution.
#
# It is required that "face_landmark.tflite" is available at
# "mediapipe/modules/face_landmark/face_landmark.tflite"
# path during execution if `with_attention` is not set or set to `false`.
#
# It is required that "face_landmark_with_attention.tflite" is available at
# "mediapipe/modules/face_landmark/face_landmark_with_attention.tflite"
# path during execution if `with_attention` is set to `true`.
#
# EXAMPLE:
# node {
# calculator: "FaceLandmarkFrontGpu"
# input_stream: "IMAGE:image"
# input_side_packet: "NUM_FACES:num_faces"
# input_side_packet: "USE_PREV_LANDMARKS:use_prev_landmarks"
# input_side_packet: "WITH_ATTENTION:with_attention"
# output_stream: "LANDMARKS:multi_face_landmarks"
# }
type: "FaceLandmarkFrontGpu"
# GPU image. (GpuBuffer)
input_stream: "IMAGE:image"
# Max number of faces to detect/track. (int)
input_side_packet: "NUM_FACES:num_faces"
# Whether landmarks on the previous image should be used to help localize
# landmarks on the current image. (bool)
input_side_packet: "USE_PREV_LANDMARKS:use_prev_landmarks"
# Whether to run face mesh model with attention on lips and eyes. (bool)
# Attention provides more accuracy on lips and eye regions as well as iris
# landmarks.
input_side_packet: "WITH_ATTENTION:with_attention"
# Collection of detected/predicted faces, each represented as a list of 468 face
# landmarks. (std::vector<NormalizedLandmarkList>)
# NOTE: there will not be an output packet in the LANDMARKS stream for this
# particular timestamp if none of faces detected. However, the MediaPipe
# framework will internally inform the downstream calculators of the absence of
# this packet so that they don't wait for it unnecessarily.
output_stream: "LANDMARKS:multi_face_landmarks"
# Extra outputs (for debugging, for instance).
# Detected faces. (std::vector<Detection>)
output_stream: "DETECTIONS:face_detections"
# Regions of interest calculated based on landmarks.
# (std::vector<NormalizedRect>)
output_stream: "ROIS_FROM_LANDMARKS:face_rects_from_landmarks"
# Regions of interest calculated based on face detections.
# (std::vector<NormalizedRect>)
output_stream: "ROIS_FROM_DETECTIONS:face_rects_from_detections"
# When the optional input side packet "use_prev_landmarks" is either absent or
# set to true, uses the landmarks on the previous image to help localize
# landmarks on the current image.
node {
calculator: "GateCalculator"
input_side_packet: "ALLOW:use_prev_landmarks"
input_stream: "prev_face_rects_from_landmarks"
output_stream: "gated_prev_face_rects_from_landmarks"
options: {
[mediapipe.GateCalculatorOptions.ext] {
allow: true
}
}
}
# Determines if an input vector of NormalizedRect has a size greater than or
# equal to the provided num_faces.
node {
calculator: "NormalizedRectVectorHasMinSizeCalculator"
input_stream: "ITERABLE:gated_prev_face_rects_from_landmarks"
input_side_packet: "num_faces"
output_stream: "prev_has_enough_faces"
}
# Drops the incoming image if enough faces have already been identified from the
# previous image. Otherwise, passes the incoming image through to trigger a new
# round of face detection.
node {
calculator: "GateCalculator"
input_stream: "image"
input_stream: "DISALLOW:prev_has_enough_faces"
output_stream: "gated_image"
options: {
[mediapipe.GateCalculatorOptions.ext] {
empty_packets_as_allow: true
}
}
}
# Detects faces.
node {
calculator: "FaceDetectionShortRangeGpu"
input_stream: "IMAGE:gated_image"
output_stream: "DETECTIONS:all_face_detections"
}
# Makes sure there are no more detections than the provided num_faces.
node {
calculator: "ClipDetectionVectorSizeCalculator"
input_stream: "all_face_detections"
output_stream: "face_detections"
input_side_packet: "num_faces"
}
# Calculate size of the image.
node {
calculator: "ImagePropertiesCalculator"
input_stream: "IMAGE_GPU:gated_image"
output_stream: "SIZE:gated_image_size"
}
# Outputs each element of face_detections at a fake timestamp for the rest of
# the graph to process. Clones the image size packet for each face_detection at
# the fake timestamp. At the end of the loop, outputs the BATCH_END timestamp
# for downstream calculators to inform them that all elements in the vector have
# been processed.
node {
calculator: "BeginLoopDetectionCalculator"
input_stream: "ITERABLE:face_detections"
input_stream: "CLONE:gated_image_size"
output_stream: "ITEM:face_detection"
output_stream: "CLONE:detections_loop_image_size"
output_stream: "BATCH_END:detections_loop_end_timestamp"
}
# Calculates region of interest based on face detections, so that can be used
# to detect landmarks.
node {
calculator: "FaceDetectionFrontDetectionToRoi"
input_stream: "DETECTION:face_detection"
input_stream: "IMAGE_SIZE:detections_loop_image_size"
output_stream: "ROI:face_rect_from_detection"
}
# Collects a NormalizedRect for each face into a vector. Upon receiving the
# BATCH_END timestamp, outputs the vector of NormalizedRect at the BATCH_END
# timestamp.
node {
calculator: "EndLoopNormalizedRectCalculator"
input_stream: "ITEM:face_rect_from_detection"
input_stream: "BATCH_END:detections_loop_end_timestamp"
output_stream: "ITERABLE:face_rects_from_detections"
}
# Performs association between NormalizedRect vector elements from previous
# image and rects based on face detections from the current image. This
# calculator ensures that the output face_rects vector doesn't contain
# overlapping regions based on the specified min_similarity_threshold.
node {
calculator: "AssociationNormRectCalculator"
input_stream: "face_rects_from_detections"
input_stream: "gated_prev_face_rects_from_landmarks"
output_stream: "face_rects"
options: {
[mediapipe.AssociationCalculatorOptions.ext] {
min_similarity_threshold: 0.5
}
}
}
# Calculate size of the image.
node {
calculator: "ImagePropertiesCalculator"
input_stream: "IMAGE_GPU:image"
output_stream: "SIZE:image_size"
}
# Outputs each element of face_rects at a fake timestamp for the rest of the
# graph to process. Clones image and image size packets for each
# single_face_rect at the fake timestamp. At the end of the loop, outputs the
# BATCH_END timestamp for downstream calculators to inform them that all
# elements in the vector have been processed.
node {
calculator: "BeginLoopNormalizedRectCalculator"
input_stream: "ITERABLE:face_rects"
input_stream: "CLONE:0:image"
input_stream: "CLONE:1:image_size"
output_stream: "ITEM:face_rect"
output_stream: "CLONE:0:landmarks_loop_image"
output_stream: "CLONE:1:landmarks_loop_image_size"
output_stream: "BATCH_END:landmarks_loop_end_timestamp"
}
# Detects face landmarks within specified region of interest of the image.
node {
calculator: "FaceLandmarkGpu"
input_stream: "IMAGE:landmarks_loop_image"
input_stream: "ROI:face_rect"
input_side_packet: "WITH_ATTENTION:with_attention"
output_stream: "LANDMARKS:face_landmarks"
}
# Calculates region of interest based on face landmarks, so that can be reused
# for subsequent image.
node {
calculator: "FaceLandmarkLandmarksToRoi"
input_stream: "LANDMARKS:face_landmarks"
input_stream: "IMAGE_SIZE:landmarks_loop_image_size"
output_stream: "ROI:face_rect_from_landmarks"
}
# Collects a set of landmarks for each face into a vector. Upon receiving the
# BATCH_END timestamp, outputs the vector of landmarks at the BATCH_END
# timestamp.
node {
calculator: "EndLoopNormalizedLandmarkListVectorCalculator"
input_stream: "ITEM:face_landmarks"
input_stream: "BATCH_END:landmarks_loop_end_timestamp"
output_stream: "ITERABLE:multi_face_landmarks"
}
# Collects a NormalizedRect for each face into a vector. Upon receiving the
# BATCH_END timestamp, outputs the vector of NormalizedRect at the BATCH_END
# timestamp.
node {
calculator: "EndLoopNormalizedRectCalculator"
input_stream: "ITEM:face_rect_from_landmarks"
input_stream: "BATCH_END:landmarks_loop_end_timestamp"
output_stream: "ITERABLE:face_rects_from_landmarks"
}
# Caches face rects calculated from landmarks, and upon the arrival of the next
# input image, sends out the cached rects with timestamps replaced by that of
# the input image, essentially generating a packet that carries the previous
# face rects. Note that upon the arrival of the very first input image, a
# timestamp bound update occurs to jump start the feedback loop.
node {
calculator: "PreviousLoopbackCalculator"
input_stream: "MAIN:image"
input_stream: "LOOP:face_rects_from_landmarks"
input_stream_info: {
tag_index: "LOOP"
back_edge: true
}
output_stream: "PREV_LOOP:prev_face_rects_from_landmarks"
}

View File

@ -1,87 +0,0 @@
# MediaPipe graph to detect/predict face landmarks on GPU.
type: "FaceLandmarkFrontGpuImage"
# Input image. (Image)
input_stream: "IMAGE:image"
# Max number of faces to detect/track. (int)
input_side_packet: "NUM_FACES:num_faces"
# Whether landmarks on the previous image should be used to help localize
# landmarks on the current image. (bool)
input_side_packet: "USE_PREV_LANDMARKS:use_prev_landmarks"
# Whether to run face mesh model with attention on lips and eyes. (bool)
# Attention provides more accuracy on lips and eye regions as well as iris
# landmarks.
input_side_packet: "WITH_ATTENTION:with_attention"
# The throttled input image. (Image)
output_stream: "IMAGE:throttled_image"
# Collection of detected/predicted faces, each represented as a list of 468 face
# landmarks. (std::vector<NormalizedLandmarkList>)
# NOTE: there will not be an output packet in the LANDMARKS stream for this
# particular timestamp if none of faces detected. However, the MediaPipe
# framework will internally inform the downstream calculators of the absence of
# this packet so that they don't wait for it unnecessarily.
output_stream: "LANDMARKS:multi_face_landmarks"
# Extra outputs (for debugging, for instance).
# Detected faces. (std::vector<Detection>)
output_stream: "DETECTIONS:face_detections"
# Regions of interest calculated based on landmarks.
# (std::vector<NormalizedRect>)
output_stream: "ROIS_FROM_LANDMARKS:face_rects_from_landmarks"
# Regions of interest calculated based on face detections.
# (std::vector<NormalizedRect>)
output_stream: "ROIS_FROM_DETECTIONS:face_rects_from_detections"
node {
calculator: "FlowLimiterCalculator"
input_stream: "image"
input_stream: "FINISHED:multi_face_landmarks"
input_stream_info: {
tag_index: "FINISHED"
back_edge: true
}
output_stream: "throttled_image"
options: {
[mediapipe.FlowLimiterCalculatorOptions.ext] {
max_in_flight: 1
max_in_queue: 1
}
}
}
# Converts Image to GpuBuffer for FaceLandmarkFrontGpu to consume.
node {
calculator: "FromImageCalculator"
input_stream: "IMAGE:throttled_image"
output_stream: "IMAGE_GPU:raw_gpu_buffer"
output_stream: "SOURCE_ON_GPU:is_gpu_image"
}
# TODO: Remove the extra flipping once adopting MlImage.
# If the source images are on gpu, flip the data vertically before sending them
# into FaceLandmarkFrontGpu. This maybe needed because OpenGL represents images
# assuming the image origin is at the bottom-left corner, whereas MediaPipe in
# general assumes the image origin is at the top-left corner.
node: {
calculator: "ImageTransformationCalculator"
input_stream: "IMAGE_GPU:raw_gpu_buffer"
input_stream: "FLIP_VERTICALLY:is_gpu_image"
output_stream: "IMAGE_GPU:gpu_buffer"
}
node {
calculator: "FaceLandmarkFrontGpu"
input_stream: "IMAGE:gpu_buffer"
input_side_packet: "NUM_FACES:num_faces"
input_side_packet: "USE_PREV_LANDMARKS:use_prev_landmarks"
input_side_packet: "WITH_ATTENTION:with_attention"
output_stream: "LANDMARKS:multi_face_landmarks"
output_stream: "DETECTIONS:face_detections"
output_stream: "ROIS_FROM_LANDMARKS:face_rects_from_landmarks"
output_stream: "ROIS_FROM_DETECTIONS:face_rects_from_detections"
}

View File

@ -1,224 +0,0 @@
# MediaPipe graph to detect/predict face landmarks. (CPU input, and inference is
# executed on CPU.) This graph tries to skip face detection as much as possible
# by using previously detected/predicted landmarks for new images.
#
# EXAMPLE:
# node {
# calculator: "FaceLandmarkFrontSideModelCpu"
# input_stream: "IMAGE:image"
# input_side_packet: "NUM_FACES:num_faces"
# input_side_packet: "MODEL:0:face_detection_model"
# input_side_packet: "MODEL:1:face_landmark_model"
# output_stream: "LANDMARKS:multi_face_landmarks"
# }
type: "FaceLandmarkFrontSideModelCpu"
# CPU image. (ImageFrame)
input_stream: "IMAGE:image"
# Max number of faces to detect/track. (int)
input_side_packet: "NUM_FACES:num_faces"
# TfLite model to detect faces.
# (std::unique_ptr<tflite::FlatBufferModel,
# std::function<void(tflite::FlatBufferModel*)>>)
# NOTE: mediapipe/modules/face_detection/face_detection_short_range.tflite
# model only, can be passed here, otherwise - results are undefined.
input_side_packet: "MODEL:0:face_detection_model"
# TfLite model to detect face landmarks.
# (std::unique_ptr<tflite::FlatBufferModel,
# std::function<void(tflite::FlatBufferModel*)>>)
# NOTE: mediapipe/modules/face_landmark/face_landmark.tflite model
# only, can be passed here, otherwise - results are undefined.
input_side_packet: "MODEL:1:face_landmark_model"
# Collection of detected/predicted faces, each represented as a list of 468 face
# landmarks. (std::vector<NormalizedLandmarkList>)
# NOTE: there will not be an output packet in the LANDMARKS stream for this
# particular timestamp if none of faces detected. However, the MediaPipe
# framework will internally inform the downstream calculators of the absence of
# this packet so that they don't wait for it unnecessarily.
output_stream: "LANDMARKS:multi_face_landmarks"
# Extra outputs (for debugging, for instance).
# Detected faces. (std::vector<Detection>)
output_stream: "DETECTIONS:face_detections"
# Regions of interest calculated based on landmarks.
# (std::vector<NormalizedRect>)
output_stream: "ROIS_FROM_LANDMARKS:face_rects_from_landmarks"
# Regions of interest calculated based on face detections.
# (std::vector<NormalizedRect>)
output_stream: "ROIS_FROM_DETECTIONS:face_rects_from_detections"
# Determines if an input vector of NormalizedRect has a size greater than or
# equal to the provided num_faces.
node {
calculator: "NormalizedRectVectorHasMinSizeCalculator"
input_stream: "ITERABLE:prev_face_rects_from_landmarks"
input_side_packet: "num_faces"
output_stream: "prev_has_enough_faces"
}
# Drops the incoming image if FaceLandmarkCpu was able to identify face presence
# in the previous image. Otherwise, passes the incoming image through to trigger
# a new round of face detection in FaceDetectionShortRangeCpu.
node {
calculator: "GateCalculator"
input_stream: "image"
input_stream: "DISALLOW:prev_has_enough_faces"
output_stream: "gated_image"
options: {
[mediapipe.GateCalculatorOptions.ext] {
empty_packets_as_allow: true
}
}
}
# Detects faces.
node {
calculator: "FaceDetectionShortRangeSideModelCpu"
input_stream: "IMAGE:gated_image"
input_side_packet: "MODEL:face_detection_model"
output_stream: "DETECTIONS:all_face_detections"
}
# Makes sure there are no more detections than the provided num_faces.
node {
calculator: "ClipDetectionVectorSizeCalculator"
input_stream: "all_face_detections"
output_stream: "face_detections"
input_side_packet: "num_faces"
}
# Calculate size of the image.
node {
calculator: "ImagePropertiesCalculator"
input_stream: "IMAGE:gated_image"
output_stream: "SIZE:gated_image_size"
}
# Outputs each element of face_detections at a fake timestamp for the rest of
# the graph to process. Clones the image size packet for each face_detection at
# the fake timestamp. At the end of the loop, outputs the BATCH_END timestamp
# for downstream calculators to inform them that all elements in the vector have
# been processed.
node {
calculator: "BeginLoopDetectionCalculator"
input_stream: "ITERABLE:face_detections"
input_stream: "CLONE:gated_image_size"
output_stream: "ITEM:face_detection"
output_stream: "CLONE:detections_loop_image_size"
output_stream: "BATCH_END:detections_loop_end_timestamp"
}
# Calculates region of interest based on face detections, so that can be used
# to detect landmarks.
node {
calculator: "FaceDetectionFrontDetectionToRoi"
input_stream: "DETECTION:face_detection"
input_stream: "IMAGE_SIZE:detections_loop_image_size"
output_stream: "ROI:face_rect_from_detection"
}
# Collects a NormalizedRect for each face into a vector. Upon receiving the
# BATCH_END timestamp, outputs the vector of NormalizedRect at the BATCH_END
# timestamp.
node {
calculator: "EndLoopNormalizedRectCalculator"
input_stream: "ITEM:face_rect_from_detection"
input_stream: "BATCH_END:detections_loop_end_timestamp"
output_stream: "ITERABLE:face_rects_from_detections"
}
# Performs association between NormalizedRect vector elements from previous
# image and rects based on face detections from the current image. This
# calculator ensures that the output face_rects vector doesn't contain
# overlapping regions based on the specified min_similarity_threshold.
node {
calculator: "AssociationNormRectCalculator"
input_stream: "face_rects_from_detections"
input_stream: "prev_face_rects_from_landmarks"
output_stream: "face_rects"
options: {
[mediapipe.AssociationCalculatorOptions.ext] {
min_similarity_threshold: 0.5
}
}
}
# Calculate size of the image.
node {
calculator: "ImagePropertiesCalculator"
input_stream: "IMAGE:image"
output_stream: "SIZE:image_size"
}
# Outputs each element of face_rects at a fake timestamp for the rest of the
# graph to process. Clones image and image size packets for each
# single_face_rect at the fake timestamp. At the end of the loop, outputs the
# BATCH_END timestamp for downstream calculators to inform them that all
# elements in the vector have been processed.
node {
calculator: "BeginLoopNormalizedRectCalculator"
input_stream: "ITERABLE:face_rects"
input_stream: "CLONE:0:image"
input_stream: "CLONE:1:image_size"
output_stream: "ITEM:face_rect"
output_stream: "CLONE:0:landmarks_loop_image"
output_stream: "CLONE:1:landmarks_loop_image_size"
output_stream: "BATCH_END:landmarks_loop_end_timestamp"
}
# Detects face landmarks within specified region of interest of the image.
node {
calculator: "FaceLandmarkSideModelCpu"
input_stream: "IMAGE:landmarks_loop_image"
input_stream: "ROI:face_rect"
input_side_packet: "MODEL:face_landmark_model"
output_stream: "LANDMARKS:face_landmarks"
}
# Calculates region of interest based on face landmarks, so that can be reused
# for subsequent image.
node {
calculator: "FaceLandmarkLandmarksToRoi"
input_stream: "LANDMARKS:face_landmarks"
input_stream: "IMAGE_SIZE:landmarks_loop_image_size"
output_stream: "ROI:face_rect_from_landmarks"
}
# Collects a set of landmarks for each face into a vector. Upon receiving the
# BATCH_END timestamp, outputs the vector of landmarks at the BATCH_END
# timestamp.
node {
calculator: "EndLoopNormalizedLandmarkListVectorCalculator"
input_stream: "ITEM:face_landmarks"
input_stream: "BATCH_END:landmarks_loop_end_timestamp"
output_stream: "ITERABLE:multi_face_landmarks"
}
# Collects a NormalizedRect for each face into a vector. Upon receiving the
# BATCH_END timestamp, outputs the vector of NormalizedRect at the BATCH_END
# timestamp.
node {
calculator: "EndLoopNormalizedRectCalculator"
input_stream: "ITEM:face_rect_from_landmarks"
input_stream: "BATCH_END:landmarks_loop_end_timestamp"
output_stream: "ITERABLE:face_rects_from_landmarks"
}
# Caches face rects calculated from landmarks, and upon the arrival of the next
# input image, sends out the cached rects with timestamps replaced by that of
# the input image, essentially generating a packet that carries the previous
# face rects. Note that upon the arrival of the very first input image, a
# timestamp bound update occurs to jump start the feedback loop.
node {
calculator: "PreviousLoopbackCalculator"
input_stream: "MAIN:image"
input_stream: "LOOP:face_rects_from_landmarks"
input_stream_info: {
tag_index: "LOOP"
back_edge: true
}
output_stream: "PREV_LOOP:prev_face_rects_from_landmarks"
}

View File

@ -1,224 +0,0 @@
# MediaPipe graph to detect/predict face landmarks. (GPU input, and inference is
# executed on GPU.) This graph tries to skip face detection as much as possible
# by using previously detected/predicted landmarks for new images.
#
# EXAMPLE:
# node {
# calculator: "FaceLandmarkFrontSideModelGpu"
# input_stream: "IMAGE:image"
# input_side_packet: "NUM_FACES:num_faces"
# input_side_packet: "MODEL:0:face_detection_model"
# input_side_packet: "MODEL:1:face_landmark_model"
# output_stream: "LANDMARKS:multi_face_landmarks"
# }
type: "FaceLandmarkFrontSideModelGpu"
# GPU image. (GpuBuffer)
input_stream: "IMAGE:image"
# Max number of faces to detect/track. (int)
input_side_packet: "NUM_FACES:num_faces"
# TfLite model to detect faces.
# (std::unique_ptr<tflite::FlatBufferModel,
# std::function<void(tflite::FlatBufferModel*)>>)
# NOTE: mediapipe/modules/face_detection/face_detection_short_range.tflite
# model only, can be passed here, otherwise - results are undefined.
input_side_packet: "MODEL:0:face_detection_model"
# TfLite model to detect face landmarks.
# (std::unique_ptr<tflite::FlatBufferModel,
# std::function<void(tflite::FlatBufferModel*)>>)
# NOTE: mediapipe/modules/face_landmark/face_landmark.tflite model
# only, can be passed here, otherwise - results are undefined.
input_side_packet: "MODEL:1:face_landmark_model"
# Collection of detected/predicted faces, each represented as a list of 468 face
# landmarks. (std::vector<NormalizedLandmarkList>)
# NOTE: there will not be an output packet in the LANDMARKS stream for this
# particular timestamp if none of faces detected. However, the MediaPipe
# framework will internally inform the downstream calculators of the absence of
# this packet so that they don't wait for it unnecessarily.
output_stream: "LANDMARKS:multi_face_landmarks"
# Extra outputs (for debugging, for instance).
# Detected faces. (std::vector<Detection>)
output_stream: "DETECTIONS:face_detections"
# Regions of interest calculated based on landmarks.
# (std::vector<NormalizedRect>)
output_stream: "ROIS_FROM_LANDMARKS:face_rects_from_landmarks"
# Regions of interest calculated based on face detections.
# (std::vector<NormalizedRect>)
output_stream: "ROIS_FROM_DETECTIONS:face_rects_from_detections"
# Determines if an input vector of NormalizedRect has a size greater than or
# equal to the provided num_faces.
node {
calculator: "NormalizedRectVectorHasMinSizeCalculator"
input_stream: "ITERABLE:prev_face_rects_from_landmarks"
input_side_packet: "num_faces"
output_stream: "prev_has_enough_faces"
}
# Drops the incoming image if FaceLandmarkGpu was able to identify face presence
# in the previous image. Otherwise, passes the incoming image through to trigger
# a new round of face detection in FaceDetectionShortRangeGpu.
node {
calculator: "GateCalculator"
input_stream: "image"
input_stream: "DISALLOW:prev_has_enough_faces"
output_stream: "gated_image"
options: {
[mediapipe.GateCalculatorOptions.ext] {
empty_packets_as_allow: true
}
}
}
# Detects faces.
node {
calculator: "FaceDetectionShortRangeSideModelGpu"
input_stream: "IMAGE:gated_image"
input_side_packet: "MODEL:face_detection_model"
output_stream: "DETECTIONS:all_face_detections"
}
# Makes sure there are no more detections than the provided num_faces.
node {
calculator: "ClipDetectionVectorSizeCalculator"
input_stream: "all_face_detections"
output_stream: "face_detections"
input_side_packet: "num_faces"
}
# Calculate size of the image.
node {
calculator: "ImagePropertiesCalculator"
input_stream: "IMAGE_GPU:gated_image"
output_stream: "SIZE:gated_image_size"
}
# Outputs each element of face_detections at a fake timestamp for the rest of
# the graph to process. Clones the image size packet for each face_detection at
# the fake timestamp. At the end of the loop, outputs the BATCH_END timestamp
# for downstream calculators to inform them that all elements in the vector have
# been processed.
node {
calculator: "BeginLoopDetectionCalculator"
input_stream: "ITERABLE:face_detections"
input_stream: "CLONE:gated_image_size"
output_stream: "ITEM:face_detection"
output_stream: "CLONE:detections_loop_image_size"
output_stream: "BATCH_END:detections_loop_end_timestamp"
}
# Calculates region of interest based on face detections, so that can be used
# to detect landmarks.
node {
calculator: "FaceDetectionFrontDetectionToRoi"
input_stream: "DETECTION:face_detection"
input_stream: "IMAGE_SIZE:detections_loop_image_size"
output_stream: "ROI:face_rect_from_detection"
}
# Collects a NormalizedRect for each face into a vector. Upon receiving the
# BATCH_END timestamp, outputs the vector of NormalizedRect at the BATCH_END
# timestamp.
node {
calculator: "EndLoopNormalizedRectCalculator"
input_stream: "ITEM:face_rect_from_detection"
input_stream: "BATCH_END:detections_loop_end_timestamp"
output_stream: "ITERABLE:face_rects_from_detections"
}
# Performs association between NormalizedRect vector elements from previous
# image and rects based on face detections from the current image. This
# calculator ensures that the output face_rects vector doesn't contain
# overlapping regions based on the specified min_similarity_threshold.
node {
calculator: "AssociationNormRectCalculator"
input_stream: "face_rects_from_detections"
input_stream: "prev_face_rects_from_landmarks"
output_stream: "face_rects"
options: {
[mediapipe.AssociationCalculatorOptions.ext] {
min_similarity_threshold: 0.5
}
}
}
# Calculate size of the image.
node {
calculator: "ImagePropertiesCalculator"
input_stream: "IMAGE_GPU:image"
output_stream: "SIZE:image_size"
}
# Outputs each element of face_rects at a fake timestamp for the rest of the
# graph to process. Clones image and image size packets for each
# single_face_rect at the fake timestamp. At the end of the loop, outputs the
# BATCH_END timestamp for downstream calculators to inform them that all
# elements in the vector have been processed.
node {
calculator: "BeginLoopNormalizedRectCalculator"
input_stream: "ITERABLE:face_rects"
input_stream: "CLONE:0:image"
input_stream: "CLONE:1:image_size"
output_stream: "ITEM:face_rect"
output_stream: "CLONE:0:landmarks_loop_image"
output_stream: "CLONE:1:landmarks_loop_image_size"
output_stream: "BATCH_END:landmarks_loop_end_timestamp"
}
# Detects face landmarks within specified region of interest of the image.
node {
calculator: "FaceLandmarkSideModelGpu"
input_stream: "IMAGE:landmarks_loop_image"
input_stream: "ROI:face_rect"
input_side_packet: "MODEL:face_landmark_model"
output_stream: "LANDMARKS:face_landmarks"
}
# Calculates region of interest based on face landmarks, so that can be reused
# for subsequent image.
node {
calculator: "FaceLandmarkLandmarksToRoi"
input_stream: "LANDMARKS:face_landmarks"
input_stream: "IMAGE_SIZE:landmarks_loop_image_size"
output_stream: "ROI:face_rect_from_landmarks"
}
# Collects a set of landmarks for each face into a vector. Upon receiving the
# BATCH_END timestamp, outputs the vector of landmarks at the BATCH_END
# timestamp.
node {
calculator: "EndLoopNormalizedLandmarkListVectorCalculator"
input_stream: "ITEM:face_landmarks"
input_stream: "BATCH_END:landmarks_loop_end_timestamp"
output_stream: "ITERABLE:multi_face_landmarks"
}
# Collects a NormalizedRect for each face into a vector. Upon receiving the
# BATCH_END timestamp, outputs the vector of NormalizedRect at the BATCH_END
# timestamp.
node {
calculator: "EndLoopNormalizedRectCalculator"
input_stream: "ITEM:face_rect_from_landmarks"
input_stream: "BATCH_END:landmarks_loop_end_timestamp"
output_stream: "ITERABLE:face_rects_from_landmarks"
}
# Caches face rects calculated from landmarks, and upon the arrival of the next
# input image, sends out the cached rects with timestamps replaced by that of
# the input image, essentially generating a packet that carries the previous
# face rects. Note that upon the arrival of the very first input image, a
# timestamp bound update occurs to jump start the feedback loop.
node {
calculator: "PreviousLoopbackCalculator"
input_stream: "MAIN:image"
input_stream: "LOOP:face_rects_from_landmarks"
input_stream_info: {
tag_index: "LOOP"
back_edge: true
}
output_stream: "PREV_LOOP:prev_face_rects_from_landmarks"
}

View File

@ -1,185 +0,0 @@
# MediaPipe graph to detect/predict face landmarks. (CPU input, and inference is
# executed on CPU.)
#
# It is required that "face_landmark.tflite" is available at
# "mediapipe/modules/face_landmark/face_landmark.tflite"
# path during execution if `with_attention` is not set or set to `false`.
#
# It is required that "face_landmark_with_attention.tflite" is available at
# "mediapipe/modules/face_landmark/face_landmark_with_attention.tflite"
# path during execution if `with_attention` is set to `true`.
#
# EXAMPLE:
# node {
# calculator: "FaceLandmarkGpu"
# input_stream: "IMAGE:image"
# input_stream: "ROI:face_roi"
# input_side_packet: "WITH_ATTENTION:with_attention"
# output_stream: "LANDMARKS:face_landmarks"
# }
type: "FaceLandmarkGpu"
# GPU image. (GpuBuffer)
input_stream: "IMAGE:image"
# ROI (region of interest) within the given image where a face is located.
# (NormalizedRect)
input_stream: "ROI:roi"
# Whether to run face mesh model with attention on lips and eyes. (bool)
# Attention provides more accuracy on lips and eye regions as well as iris
# landmarks.
input_side_packet: "WITH_ATTENTION:with_attention"
# 468 or 478 facial landmarks within the given ROI. (NormalizedLandmarkList)
#
# Number of landmarks depends on the WITH_ATTENTION flag. If it's `true` - then
# there will be 478 landmarks with refined lips, eyes and irises (10 extra
# landmarks are for irises), otherwise 468 non-refined landmarks are returned.
#
# NOTE: if a face is not present within the given ROI, for this particular
# timestamp there will not be an output packet in the LANDMARKS stream. However,
# the MediaPipe framework will internally inform the downstream calculators of
# the absence of this packet so that they don't wait for it unnecessarily.
output_stream: "LANDMARKS:face_landmarks"
# Transforms the input image into a 192x192 tensor.
node: {
calculator: "ImageToTensorCalculator"
input_stream: "IMAGE_GPU:image"
input_stream: "NORM_RECT:roi"
output_stream: "TENSORS:input_tensors"
options: {
[mediapipe.ImageToTensorCalculatorOptions.ext] {
output_tensor_width: 192
output_tensor_height: 192
output_tensor_float_range {
min: 0.0
max: 1.0
}
gpu_origin: TOP_LEFT
}
}
}
# Loads the face landmarks TF Lite model.
node {
calculator: "FaceLandmarksModelLoader"
input_side_packet: "WITH_ATTENTION:with_attention"
output_side_packet: "MODEL:model"
}
# Generates a single side packet containing a TensorFlow Lite op resolver that
# supports custom ops needed by the model used in this graph.
node {
calculator: "TfLiteCustomOpResolverCalculator"
output_side_packet: "op_resolver"
}
# Runs a TensorFlow Lite model on GPU that takes an image tensor and outputs a
# vector of GPU tensors representing, for instance, detection boxes/keypoints
# and scores.
node {
calculator: "InferenceCalculator"
input_stream: "TENSORS:input_tensors"
input_side_packet: "MODEL:model"
input_side_packet: "CUSTOM_OP_RESOLVER:op_resolver"
output_stream: "TENSORS:output_tensors"
options: {
[mediapipe.InferenceCalculatorOptions.ext] {
# Do not remove. Used for generation of XNNPACK/NNAPI graphs.
}
}
}
# Splits a vector of tensors into landmark tensors and face flag tensor.
node {
calculator: "SwitchContainer"
input_side_packet: "ENABLE:with_attention"
input_stream: "output_tensors"
output_stream: "landmark_tensors"
output_stream: "face_flag_tensor"
options {
[mediapipe.SwitchContainerOptions.ext] {
contained_node: {
calculator: "SplitTensorVectorCalculator"
options: {
[mediapipe.SplitVectorCalculatorOptions.ext] {
ranges: { begin: 0 end: 1 }
ranges: { begin: 1 end: 2 }
}
}
}
contained_node: {
calculator: "SplitTensorVectorCalculator"
options: {
[mediapipe.SplitVectorCalculatorOptions.ext] {
ranges: { begin: 0 end: 6 }
ranges: { begin: 6 end: 7 }
}
}
}
}
}
}
# Converts the face-flag tensor into a float that represents the confidence
# score of face presence.
node {
calculator: "TensorsToFloatsCalculator"
input_stream: "TENSORS:face_flag_tensor"
output_stream: "FLOAT:face_presence_score"
options: {
[mediapipe.TensorsToFloatsCalculatorOptions.ext] {
activation: SIGMOID
}
}
}
# Applies a threshold to the confidence score to determine whether a face is
# present.
node {
calculator: "ThresholdingCalculator"
input_stream: "FLOAT:face_presence_score"
output_stream: "FLAG:face_presence"
options: {
[mediapipe.ThresholdingCalculatorOptions.ext] {
threshold: 0.5
}
}
}
# Drop landmarks tensors if face is not present.
node {
calculator: "GateCalculator"
input_stream: "landmark_tensors"
input_stream: "ALLOW:face_presence"
output_stream: "ensured_landmark_tensors"
}
# Decodes the landmark tensors into a vector of landmarks, where the landmark
# coordinates are normalized by the size of the input image to the model.
node {
calculator: "SwitchContainer"
input_side_packet: "ENABLE:with_attention"
input_stream: "TENSORS:ensured_landmark_tensors"
output_stream: "LANDMARKS:landmarks"
options: {
[mediapipe.SwitchContainerOptions.ext] {
contained_node: {
calculator: "TensorsToFaceLandmarks"
}
contained_node: {
calculator: "TensorsToFaceLandmarksWithAttention"
}
}
}
}
# Projects the landmarks from the cropped face image to the corresponding
# locations on the full image before cropping (input to the graph).
node {
calculator: "LandmarkProjectionCalculator"
input_stream: "NORM_LANDMARKS:landmarks"
input_stream: "NORM_RECT:roi"
output_stream: "NORM_LANDMARKS:face_landmarks"
}

View File

@ -1,54 +0,0 @@
# MediaPipe graph to calculate face region of interest (ROI) from landmarks
# detected by "FaceLandmarkCpu" or "FaceLandmarkGpu".
#
# NOTE: this graph is subject to change and should not be used directly.
type: "FaceLandmarkLandmarksToRoi"
# Normalized landmarks. (NormalizedLandmarkList)
input_stream: "LANDMARKS:landmarks"
# Frame size (width & height). (std::pair<int, int>)
input_stream: "IMAGE_SIZE:image_size"
# ROI according to landmarks. (NormalizedRect)
output_stream: "ROI:roi"
# Converts face landmarks to a detection that tightly encloses all landmarks.
node {
calculator: "LandmarksToDetectionCalculator"
input_stream: "NORM_LANDMARKS:landmarks"
output_stream: "DETECTION:face_detection"
}
# Converts the face detection into a rectangle (normalized by image size)
# that encloses the face and is rotated such that the line connecting left side
# of the left eye and right side of the right eye is aligned with the X-axis of
# the rectangle.
node {
calculator: "DetectionsToRectsCalculator"
input_stream: "DETECTION:face_detection"
input_stream: "IMAGE_SIZE:image_size"
output_stream: "NORM_RECT:face_rect_from_landmarks"
options: {
[mediapipe.DetectionsToRectsCalculatorOptions.ext] {
rotation_vector_start_keypoint_index: 33 # Left side of left eye.
rotation_vector_end_keypoint_index: 263 # Right side of right eye.
rotation_vector_target_angle_degrees: 0
}
}
}
# Expands the face rectangle so that in the next video image it's likely to
# still contain the face even with some motion.
node {
calculator: "RectTransformationCalculator"
input_stream: "NORM_RECT:face_rect_from_landmarks"
input_stream: "IMAGE_SIZE:image_size"
output_stream: "roi"
options: {
[mediapipe.RectTransformationCalculatorOptions.ext] {
scale_x: 1.5
scale_y: 1.5
square_long: true
}
}
}

View File

@ -1,58 +0,0 @@
# MediaPipe graph to load a selected face landmarks TF Lite model.
type: "FaceLandmarksModelLoader"
# Whether to run face mesh model with attention on lips and eyes. (bool)
# Attention provides more accuracy on lips and eye regions as well as iris
# landmarks.
input_side_packet: "WITH_ATTENTION:with_attention"
# TF Lite model represented as a FlatBuffer.
# (std::unique_ptr<tflite::FlatBufferModel, std::function<void(tflite::FlatBufferModel*)>>)
output_side_packet: "MODEL:model"
# Determines path to the desired face landmark model file based on specification
# in the input side packet.
node {
calculator: "SwitchContainer"
input_side_packet: "ENABLE:with_attention"
output_side_packet: "PACKET:model_path"
options: {
[mediapipe.SwitchContainerOptions.ext] {
contained_node: {
calculator: "ConstantSidePacketCalculator"
options: {
[mediapipe.ConstantSidePacketCalculatorOptions.ext]: {
packet {
string_value: "mediapipe/modules/face_landmark/face_landmark.tflite"
}
}
}
}
contained_node: {
calculator: "ConstantSidePacketCalculator"
options: {
[mediapipe.ConstantSidePacketCalculatorOptions.ext]: {
packet {
string_value: "mediapipe/modules/face_landmark/face_landmark_with_attention.tflite"
}
}
}
}
}
}
}
# Loads the file in the specified path into a blob.
node {
calculator: "LocalFileContentsCalculator"
input_side_packet: "FILE_PATH:model_path"
output_side_packet: "CONTENTS:model_blob"
}
# Converts the input blob into a TF Lite model.
node {
calculator: "TfLiteModelCalculator"
input_side_packet: "MODEL_BLOB:model_blob"
output_side_packet: "MODEL:model"
}

View File

@ -1,24 +0,0 @@
# MediaPipe graph to transform single tensor into 468 facial landmarks.
type: "TensorsToFaceLandmarks"
# Vector with a single tensor that contains 468 landmarks. (std::vector<Tensor>)
input_stream: "TENSORS:tensors"
# 468 facial landmarks (NormalizedLandmarkList)
output_stream: "LANDMARKS:landmarks"
# Decodes the landmark tensors into a vector of lanmarks, where the landmark
# coordinates are normalized by the size of the input image to the model.
node {
calculator: "TensorsToLandmarksCalculator"
input_stream: "TENSORS:tensors"
output_stream: "NORM_LANDMARKS:landmarks"
options: {
[mediapipe.TensorsToLandmarksCalculatorOptions.ext] {
num_landmarks: 468
input_image_width: 192
input_image_height: 192
}
}
}

View File

@ -1,299 +0,0 @@
# MediaPipe graph to transform model output tensors into 478 facial landmarks
# with refined lips, eyes and irises.
type: "TensorsToFaceLandmarksWithAttention"
# Vector with a six tensors to parse landmarks from. (std::vector<Tensor>)
# Landmark tensors order:
# - mesh_tensor
# - lips_tensor
# - left_eye_tensor
# - right_eye_tensor
# - left_iris_tensor
# - right_iris_tensor
input_stream: "TENSORS:tensors"
# 478 facial landmarks (NormalizedLandmarkList)
output_stream: "LANDMARKS:landmarks"
# Splits a vector of tensors into multiple vectors.
node {
calculator: "SplitTensorVectorCalculator"
input_stream: "tensors"
output_stream: "mesh_tensor"
output_stream: "lips_tensor"
output_stream: "left_eye_tensor"
output_stream: "right_eye_tensor"
output_stream: "left_iris_tensor"
output_stream: "right_iris_tensor"
options: {
[mediapipe.SplitVectorCalculatorOptions.ext] {
ranges: { begin: 0 end: 1 }
ranges: { begin: 1 end: 2 }
ranges: { begin: 2 end: 3 }
ranges: { begin: 3 end: 4 }
ranges: { begin: 4 end: 5 }
ranges: { begin: 5 end: 6 }
}
}
}
# Decodes mesh landmarks tensor into a vector of normalized lanmarks.
node {
calculator: "TensorsToLandmarksCalculator"
input_stream: "TENSORS:mesh_tensor"
output_stream: "NORM_LANDMARKS:mesh_landmarks"
options: {
[mediapipe.TensorsToLandmarksCalculatorOptions.ext] {
num_landmarks: 468
input_image_width: 192
input_image_height: 192
}
}
}
# Decodes lips landmarks tensor into a vector of normalized lanmarks.
node {
calculator: "TensorsToLandmarksCalculator"
input_stream: "TENSORS:lips_tensor"
output_stream: "NORM_LANDMARKS:lips_landmarks"
options: {
[mediapipe.TensorsToLandmarksCalculatorOptions.ext] {
num_landmarks: 80
input_image_width: 192
input_image_height: 192
}
}
}
# Decodes left eye landmarks tensor into a vector of normalized lanmarks.
node {
calculator: "TensorsToLandmarksCalculator"
input_stream: "TENSORS:left_eye_tensor"
output_stream: "NORM_LANDMARKS:left_eye_landmarks"
options: {
[mediapipe.TensorsToLandmarksCalculatorOptions.ext] {
num_landmarks: 71
input_image_width: 192
input_image_height: 192
}
}
}
# Decodes right eye landmarks tensor into a vector of normalized lanmarks.
node {
calculator: "TensorsToLandmarksCalculator"
input_stream: "TENSORS:right_eye_tensor"
output_stream: "NORM_LANDMARKS:right_eye_landmarks"
options: {
[mediapipe.TensorsToLandmarksCalculatorOptions.ext] {
num_landmarks: 71
input_image_width: 192
input_image_height: 192
}
}
}
# Decodes left iris landmarks tensor into a vector of normalized lanmarks.
node {
calculator: "TensorsToLandmarksCalculator"
input_stream: "TENSORS:left_iris_tensor"
output_stream: "NORM_LANDMARKS:left_iris_landmarks"
options: {
[mediapipe.TensorsToLandmarksCalculatorOptions.ext] {
num_landmarks: 5
input_image_width: 192
input_image_height: 192
}
}
}
# Decodes right iris landmarks tensor into a vector of normalized lanmarks.
node {
calculator: "TensorsToLandmarksCalculator"
input_stream: "TENSORS:right_iris_tensor"
output_stream: "NORM_LANDMARKS:right_iris_landmarks"
options: {
[mediapipe.TensorsToLandmarksCalculatorOptions.ext] {
num_landmarks: 5
input_image_width: 192
input_image_height: 192
}
}
}
# Refine mesh landmarks with lips, eyes and irises.
node {
calculator: "LandmarksRefinementCalculator"
input_stream: "LANDMARKS:0:mesh_landmarks"
input_stream: "LANDMARKS:1:lips_landmarks"
input_stream: "LANDMARKS:2:left_eye_landmarks"
input_stream: "LANDMARKS:3:right_eye_landmarks"
input_stream: "LANDMARKS:4:left_iris_landmarks"
input_stream: "LANDMARKS:5:right_iris_landmarks"
output_stream: "REFINED_LANDMARKS:landmarks"
options: {
[mediapipe.LandmarksRefinementCalculatorOptions.ext] {
# 0 - mesh
refinement: {
indexes_mapping: [
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36,
37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53,
54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70,
71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87,
88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103,
104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117,
118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131,
132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145,
146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159,
160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173,
174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187,
188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201,
202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215,
216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229,
230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243,
244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257,
258, 259, 260, 261, 262, 263, 264, 265, 266, 267, 268, 269, 270, 271,
272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285,
286, 287, 288, 289, 290, 291, 292, 293, 294, 295, 296, 297, 298, 299,
300, 301, 302, 303, 304, 305, 306, 307, 308, 309, 310, 311, 312, 313,
314, 315, 316, 317, 318, 319, 320, 321, 322, 323, 324, 325, 326, 327,
328, 329, 330, 331, 332, 333, 334, 335, 336, 337, 338, 339, 340, 341,
342, 343, 344, 345, 346, 347, 348, 349, 350, 351, 352, 353, 354, 355,
356, 357, 358, 359, 360, 361, 362, 363, 364, 365, 366, 367, 368, 369,
370, 371, 372, 373, 374, 375, 376, 377, 378, 379, 380, 381, 382, 383,
384, 385, 386, 387, 388, 389, 390, 391, 392, 393, 394, 395, 396, 397,
398, 399, 400, 401, 402, 403, 404, 405, 406, 407, 408, 409, 410, 411,
412, 413, 414, 415, 416, 417, 418, 419, 420, 421, 422, 423, 424, 425,
426, 427, 428, 429, 430, 431, 432, 433, 434, 435, 436, 437, 438, 439,
440, 441, 442, 443, 444, 445, 446, 447, 448, 449, 450, 451, 452, 453,
454, 455, 456, 457, 458, 459, 460, 461, 462, 463, 464, 465, 466, 467
]
z_refinement: { copy {} }
}
# 1 - lips
refinement: {
indexes_mapping: [
# Lower outer.
61, 146, 91, 181, 84, 17, 314, 405, 321, 375, 291,
# Upper outer (excluding corners).
185, 40, 39, 37, 0, 267, 269, 270, 409,
# Lower inner.
78, 95, 88, 178, 87, 14, 317, 402, 318, 324, 308,
# Upper inner (excluding corners).
191, 80, 81, 82, 13, 312, 311, 310, 415,
# Lower semi-outer.
76, 77, 90, 180, 85, 16, 315, 404, 320, 307, 306,
# Upper semi-outer (excluding corners).
184, 74, 73, 72, 11, 302, 303, 304, 408,
# Lower semi-inner.
62, 96, 89, 179, 86, 15, 316, 403, 319, 325, 292,
# Upper semi-inner (excluding corners).
183, 42, 41, 38, 12, 268, 271, 272, 407
]
z_refinement: { none {} }
}
# 2 - left eye
refinement: {
indexes_mapping: [
# Lower contour.
33, 7, 163, 144, 145, 153, 154, 155, 133,
# upper contour (excluding corners).
246, 161, 160, 159, 158, 157, 173,
# Halo x2 lower contour.
130, 25, 110, 24, 23, 22, 26, 112, 243,
# Halo x2 upper contour (excluding corners).
247, 30, 29, 27, 28, 56, 190,
# Halo x3 lower contour.
226, 31, 228, 229, 230, 231, 232, 233, 244,
# Halo x3 upper contour (excluding corners).
113, 225, 224, 223, 222, 221, 189,
# Halo x4 upper contour (no lower because of mesh structure) or
# eyebrow inner contour.
35, 124, 46, 53, 52, 65,
# Halo x5 lower contour.
143, 111, 117, 118, 119, 120, 121, 128, 245,
# Halo x5 upper contour (excluding corners) or eyebrow outer contour.
156, 70, 63, 105, 66, 107, 55, 193
]
z_refinement: { none {} }
}
# 3 - right eye
refinement: {
indexes_mapping: [
# Lower contour.
263, 249, 390, 373, 374, 380, 381, 382, 362,
# Upper contour (excluding corners).
466, 388, 387, 386, 385, 384, 398,
# Halo x2 lower contour.
359, 255, 339, 254, 253, 252, 256, 341, 463,
# Halo x2 upper contour (excluding corners).
467, 260, 259, 257, 258, 286, 414,
# Halo x3 lower contour.
446, 261, 448, 449, 450, 451, 452, 453, 464,
# Halo x3 upper contour (excluding corners).
342, 445, 444, 443, 442, 441, 413,
# Halo x4 upper contour (no lower because of mesh structure) or
# eyebrow inner contour.
265, 353, 276, 283, 282, 295,
# Halo x5 lower contour.
372, 340, 346, 347, 348, 349, 350, 357, 465,
# Halo x5 upper contour (excluding corners) or eyebrow outer contour.
383, 300, 293, 334, 296, 336, 285, 417
]
z_refinement: { none {} }
}
# 4 - left iris
refinement: {
indexes_mapping: [
# Center.
468,
# Iris right edge.
469,
# Iris top edge.
470,
# Iris left edge.
471,
# Iris bottom edge.
472
]
z_refinement: {
assign_average: {
indexes_for_average: [
# Lower contour.
33, 7, 163, 144, 145, 153, 154, 155, 133,
# Upper contour (excluding corners).
246, 161, 160, 159, 158, 157, 173
]
}
}
}
# 5 - right iris
refinement: {
indexes_mapping: [
# Center.
473,
# Iris right edge.
474,
# Iris top edge.
475,
# Iris left edge.
476,
# Iris bottom edge.
477
]
z_refinement: {
assign_average: {
indexes_for_average: [
# Lower contour.
263, 249, 390, 373, 374, 380, 381, 382, 362,
# Upper contour (excluding corners).
466, 388, 387, 386, 385, 384, 398
]
}
}
}
}
}
}

View File

@ -1,171 +0,0 @@
# Copyright 2020 The MediaPipe Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
load(
"//mediapipe/framework/tool:mediapipe_graph.bzl",
"mediapipe_simple_subgraph",
)
licenses(["notice"])
package(default_visibility = ["//visibility:public"])
exports_files([
"hand_landmark_full.tflite",
"hand_landmark_lite.tflite",
"handedness.txt",
])
mediapipe_simple_subgraph(
name = "hand_landmark_model_loader",
graph = "hand_landmark_model_loader.pbtxt",
register_as = "HandLandmarkModelLoader",
deps = [
"//mediapipe/calculators/core:constant_side_packet_calculator",
"//mediapipe/calculators/tflite:tflite_model_calculator",
"//mediapipe/calculators/util:local_file_contents_calculator",
"//mediapipe/framework/tool:switch_container",
],
)
mediapipe_simple_subgraph(
name = "hand_landmark_cpu",
graph = "hand_landmark_cpu.pbtxt",
register_as = "HandLandmarkCpu",
deps = [
":hand_landmark_model_loader",
"//mediapipe/calculators/core:gate_calculator",
"//mediapipe/calculators/core:split_vector_calculator",
"//mediapipe/calculators/tensor:image_to_tensor_calculator",
"//mediapipe/calculators/tensor:inference_calculator",
"//mediapipe/calculators/tensor:tensors_to_classification_calculator",
"//mediapipe/calculators/tensor:tensors_to_floats_calculator",
"//mediapipe/calculators/tensor:tensors_to_landmarks_calculator",
"//mediapipe/calculators/util:landmark_letterbox_removal_calculator",
"//mediapipe/calculators/util:landmark_projection_calculator",
"//mediapipe/calculators/util:thresholding_calculator",
"//mediapipe/calculators/util:world_landmark_projection_calculator",
],
)
mediapipe_simple_subgraph(
name = "hand_landmark_gpu",
graph = "hand_landmark_gpu.pbtxt",
register_as = "HandLandmarkGpu",
deps = [
":hand_landmark_model_loader",
"//mediapipe/calculators/core:gate_calculator",
"//mediapipe/calculators/core:split_vector_calculator",
"//mediapipe/calculators/tensor:image_to_tensor_calculator",
"//mediapipe/calculators/tensor:inference_calculator",
"//mediapipe/calculators/tensor:tensors_to_classification_calculator",
"//mediapipe/calculators/tensor:tensors_to_floats_calculator",
"//mediapipe/calculators/tensor:tensors_to_landmarks_calculator",
"//mediapipe/calculators/util:landmark_letterbox_removal_calculator",
"//mediapipe/calculators/util:landmark_projection_calculator",
"//mediapipe/calculators/util:thresholding_calculator",
"//mediapipe/calculators/util:world_landmark_projection_calculator",
],
)
mediapipe_simple_subgraph(
name = "hand_landmark_tracking_gpu",
graph = "hand_landmark_tracking_gpu.pbtxt",
register_as = "HandLandmarkTrackingGpu",
deps = [
":hand_landmark_gpu",
":hand_landmark_landmarks_to_roi",
":palm_detection_detection_to_roi",
"//mediapipe/calculators/core:begin_loop_calculator",
"//mediapipe/calculators/core:clip_vector_size_calculator",
"//mediapipe/calculators/core:constant_side_packet_calculator",
"//mediapipe/calculators/core:end_loop_calculator",
"//mediapipe/calculators/core:flow_limiter_calculator",
"//mediapipe/calculators/core:gate_calculator",
"//mediapipe/calculators/core:previous_loopback_calculator",
"//mediapipe/calculators/image:image_properties_calculator",
"//mediapipe/calculators/util:association_norm_rect_calculator",
"//mediapipe/calculators/util:collection_has_min_size_calculator",
"//mediapipe/calculators/util:filter_collection_calculator",
"//mediapipe/modules/palm_detection:palm_detection_gpu",
],
)
mediapipe_simple_subgraph(
name = "hand_landmark_tracking_cpu_image",
graph = "hand_landmark_tracking_cpu_image.pbtxt",
register_as = "HandLandmarkTrackingCpuImage",
deps = [
":hand_landmark_tracking_cpu",
"//mediapipe/calculators/core:flow_limiter_calculator",
"//mediapipe/calculators/image:image_transformation_calculator",
"//mediapipe/calculators/util:from_image_calculator",
],
)
mediapipe_simple_subgraph(
name = "hand_landmark_tracking_gpu_image",
graph = "hand_landmark_tracking_gpu_image.pbtxt",
register_as = "HandLandmarkTrackingGpuImage",
deps = [
":hand_landmark_tracking_gpu",
"//mediapipe/calculators/core:flow_limiter_calculator",
"//mediapipe/calculators/image:image_transformation_calculator",
"//mediapipe/calculators/util:from_image_calculator",
],
)
mediapipe_simple_subgraph(
name = "hand_landmark_tracking_cpu",
graph = "hand_landmark_tracking_cpu.pbtxt",
register_as = "HandLandmarkTrackingCpu",
deps = [
":hand_landmark_cpu",
":hand_landmark_landmarks_to_roi",
":palm_detection_detection_to_roi",
"//mediapipe/calculators/core:begin_loop_calculator",
"//mediapipe/calculators/core:clip_vector_size_calculator",
"//mediapipe/calculators/core:constant_side_packet_calculator",
"//mediapipe/calculators/core:end_loop_calculator",
"//mediapipe/calculators/core:flow_limiter_calculator",
"//mediapipe/calculators/core:gate_calculator",
"//mediapipe/calculators/core:previous_loopback_calculator",
"//mediapipe/calculators/image:image_properties_calculator",
"//mediapipe/calculators/util:association_norm_rect_calculator",
"//mediapipe/calculators/util:collection_has_min_size_calculator",
"//mediapipe/calculators/util:filter_collection_calculator",
"//mediapipe/modules/palm_detection:palm_detection_cpu",
],
)
mediapipe_simple_subgraph(
name = "palm_detection_detection_to_roi",
graph = "palm_detection_detection_to_roi.pbtxt",
register_as = "PalmDetectionDetectionToRoi",
deps = [
"//mediapipe/calculators/util:detections_to_rects_calculator",
"//mediapipe/calculators/util:rect_transformation_calculator",
],
)
mediapipe_simple_subgraph(
name = "hand_landmark_landmarks_to_roi",
graph = "hand_landmark_landmarks_to_roi.pbtxt",
register_as = "HandLandmarkLandmarksToRoi",
deps = [
"//mediapipe/calculators/core:split_landmarks_calculator",
"//mediapipe/calculators/util:rect_transformation_calculator",
"//mediapipe/modules/hand_landmark/calculators:hand_landmarks_to_rect_calculator",
],
)

View File

@ -1,8 +0,0 @@
# hand_landmark
Subgraphs|Details
:--- | :---
[`HandLandmarkCpu`](https://github.com/google/mediapipe/tree/master/mediapipe/modules/hand_landmark/hand_landmark_cpu.pbtxt)| Detects landmarks of a single hand. (CPU input.)
[`HandLandmarkGpu`](https://github.com/google/mediapipe/tree/master/mediapipe/modules/hand_landmark/hand_landmark_gpu.pbtxt)| Detects landmarks of a single hand. (GPU input.)
[`HandLandmarkTrackingCpu`](https://github.com/google/mediapipe/tree/master/mediapipe/modules/hand_landmark/hand_landmark_tracking_cpu.pbtxt)| Detects and tracks landmarks of multiple hands. (CPU input.)
[`HandLandmarkTrackingGpu`](https://github.com/google/mediapipe/tree/master/mediapipe/modules/hand_landmark/hand_landmark_tracking_gpu.pbtxt)| Detects and tracks landmarks of multiple hands. (GPU input.)

View File

@ -1,33 +0,0 @@
# Copyright 2020 The MediaPipe Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
licenses(["notice"])
package(default_visibility = ["//visibility:public"])
cc_library(
name = "hand_landmarks_to_rect_calculator",
srcs = ["hand_landmarks_to_rect_calculator.cc"],
visibility = ["//visibility:public"],
deps = [
"//mediapipe/framework:calculator_framework",
"//mediapipe/framework:calculator_options_cc_proto",
"//mediapipe/framework/formats:landmark_cc_proto",
"//mediapipe/framework/formats:location_data_cc_proto",
"//mediapipe/framework/formats:rect_cc_proto",
"//mediapipe/framework/port:ret_check",
"//mediapipe/framework/port:status",
],
alwayslink = 1,
)

View File

@ -1,167 +0,0 @@
// Copyright 2020 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <cmath>
#include "mediapipe/framework/calculator_framework.h"
#include "mediapipe/framework/calculator_options.pb.h"
#include "mediapipe/framework/formats/landmark.pb.h"
#include "mediapipe/framework/formats/rect.pb.h"
#include "mediapipe/framework/port/ret_check.h"
#include "mediapipe/framework/port/status.h"
namespace mediapipe {
namespace {
constexpr char kNormalizedLandmarksTag[] = "NORM_LANDMARKS";
constexpr char kNormRectTag[] = "NORM_RECT";
constexpr char kImageSizeTag[] = "IMAGE_SIZE";
constexpr int kWristJoint = 0;
constexpr int kMiddleFingerPIPJoint = 6;
constexpr int kIndexFingerPIPJoint = 4;
constexpr int kRingFingerPIPJoint = 8;
constexpr float kTargetAngle = M_PI * 0.5f;
inline float NormalizeRadians(float angle) {
return angle - 2 * M_PI * std::floor((angle - (-M_PI)) / (2 * M_PI));
}
float ComputeRotation(const NormalizedLandmarkList& landmarks,
const std::pair<int, int>& image_size) {
const float x0 = landmarks.landmark(kWristJoint).x() * image_size.first;
const float y0 = landmarks.landmark(kWristJoint).y() * image_size.second;
float x1 = (landmarks.landmark(kIndexFingerPIPJoint).x() +
landmarks.landmark(kRingFingerPIPJoint).x()) /
2.f;
float y1 = (landmarks.landmark(kIndexFingerPIPJoint).y() +
landmarks.landmark(kRingFingerPIPJoint).y()) /
2.f;
x1 = (x1 + landmarks.landmark(kMiddleFingerPIPJoint).x()) / 2.f *
image_size.first;
y1 = (y1 + landmarks.landmark(kMiddleFingerPIPJoint).y()) / 2.f *
image_size.second;
const float rotation =
NormalizeRadians(kTargetAngle - std::atan2(-(y1 - y0), x1 - x0));
return rotation;
}
absl::Status NormalizedLandmarkListToRect(
const NormalizedLandmarkList& landmarks,
const std::pair<int, int>& image_size, NormalizedRect* rect) {
const float rotation = ComputeRotation(landmarks, image_size);
const float reverse_angle = NormalizeRadians(-rotation);
// Find boundaries of landmarks.
float max_x = std::numeric_limits<float>::min();
float max_y = std::numeric_limits<float>::min();
float min_x = std::numeric_limits<float>::max();
float min_y = std::numeric_limits<float>::max();
for (int i = 0; i < landmarks.landmark_size(); ++i) {
max_x = std::max(max_x, landmarks.landmark(i).x());
max_y = std::max(max_y, landmarks.landmark(i).y());
min_x = std::min(min_x, landmarks.landmark(i).x());
min_y = std::min(min_y, landmarks.landmark(i).y());
}
const float axis_aligned_center_x = (max_x + min_x) / 2.f;
const float axis_aligned_center_y = (max_y + min_y) / 2.f;
// Find boundaries of rotated landmarks.
max_x = std::numeric_limits<float>::min();
max_y = std::numeric_limits<float>::min();
min_x = std::numeric_limits<float>::max();
min_y = std::numeric_limits<float>::max();
for (int i = 0; i < landmarks.landmark_size(); ++i) {
const float original_x =
(landmarks.landmark(i).x() - axis_aligned_center_x) * image_size.first;
const float original_y =
(landmarks.landmark(i).y() - axis_aligned_center_y) * image_size.second;
const float projected_x = original_x * std::cos(reverse_angle) -
original_y * std::sin(reverse_angle);
const float projected_y = original_x * std::sin(reverse_angle) +
original_y * std::cos(reverse_angle);
max_x = std::max(max_x, projected_x);
max_y = std::max(max_y, projected_y);
min_x = std::min(min_x, projected_x);
min_y = std::min(min_y, projected_y);
}
const float projected_center_x = (max_x + min_x) / 2.f;
const float projected_center_y = (max_y + min_y) / 2.f;
const float center_x = projected_center_x * std::cos(rotation) -
projected_center_y * std::sin(rotation) +
image_size.first * axis_aligned_center_x;
const float center_y = projected_center_x * std::sin(rotation) +
projected_center_y * std::cos(rotation) +
image_size.second * axis_aligned_center_y;
const float width = (max_x - min_x) / image_size.first;
const float height = (max_y - min_y) / image_size.second;
rect->set_x_center(center_x / image_size.first);
rect->set_y_center(center_y / image_size.second);
rect->set_width(width);
rect->set_height(height);
rect->set_rotation(rotation);
return absl::OkStatus();
}
} // namespace
// A calculator that converts subset of hand landmarks to a bounding box
// NormalizedRect. The rotation angle of the bounding box is computed based on
// 1) the wrist joint and 2) the average of PIP joints of index finger, middle
// finger and ring finger. After rotation, the vector from the wrist to the mean
// of PIP joints is expected to be vertical with wrist at the bottom and the
// mean of PIP joints at the top.
class HandLandmarksToRectCalculator : public CalculatorBase {
public:
static absl::Status GetContract(CalculatorContract* cc) {
cc->Inputs().Tag(kNormalizedLandmarksTag).Set<NormalizedLandmarkList>();
cc->Inputs().Tag(kImageSizeTag).Set<std::pair<int, int>>();
cc->Outputs().Tag(kNormRectTag).Set<NormalizedRect>();
return absl::OkStatus();
}
absl::Status Open(CalculatorContext* cc) override {
cc->SetOffset(TimestampDiff(0));
return absl::OkStatus();
}
absl::Status Process(CalculatorContext* cc) override {
if (cc->Inputs().Tag(kNormalizedLandmarksTag).IsEmpty()) {
return absl::OkStatus();
}
RET_CHECK(!cc->Inputs().Tag(kImageSizeTag).IsEmpty());
std::pair<int, int> image_size =
cc->Inputs().Tag(kImageSizeTag).Get<std::pair<int, int>>();
const auto& landmarks =
cc->Inputs().Tag(kNormalizedLandmarksTag).Get<NormalizedLandmarkList>();
auto output_rect = absl::make_unique<NormalizedRect>();
MP_RETURN_IF_ERROR(
NormalizedLandmarkListToRect(landmarks, image_size, output_rect.get()));
cc->Outputs()
.Tag(kNormRectTag)
.Add(output_rect.release(), cc->InputTimestamp());
return absl::OkStatus();
}
};
REGISTER_CALCULATOR(HandLandmarksToRectCalculator);
} // namespace mediapipe

View File

@ -1,219 +0,0 @@
# MediaPipe graph to detect/predict hand landmarks on CPU.
type: "HandLandmarkCpu"
# CPU image. (ImageFrame)
input_stream: "IMAGE:image"
# ROI (region of interest) within the given image where a palm/hand is located.
# (NormalizedRect)
input_stream: "ROI:hand_rect"
# Complexity of the hand landmark model: 0 or 1. Landmark accuracy as well as
# inference latency generally go up with the model complexity. If unspecified,
# functions as set to 1. (int)
input_side_packet: "MODEL_COMPLEXITY:model_complexity"
# 21 hand landmarks within the given ROI. (NormalizedLandmarkList)
# NOTE: if a hand is not present within the given ROI, for this particular
# timestamp there will not be an output packet in the LANDMARKS stream. However,
# the MediaPipe framework will internally inform the downstream calculators of
# the absence of this packet so that they don't wait for it unnecessarily.
output_stream: "LANDMARKS:hand_landmarks"
# Hand world landmarks within the given ROI. (LandmarkList)
# World landmarks are real-world 3D coordinates in meters with the origin in the
# center of the given ROI.
#
# WORLD_LANDMARKS shares the same landmark topology as LANDMARKS. However,
# LANDMARKS provides coordinates (in pixels) of a 3D object projected onto the
# 2D image surface, while WORLD_LANDMARKS provides coordinates (in meters) of
# the 3D object itself.
output_stream: "WORLD_LANDMARKS:hand_world_landmarks"
# Handedness of the detected hand (i.e. is hand left or right).
# (ClassificationList)
output_stream: "HANDEDNESS:handedness"
# Transforms a region of image into a 224x224 tensor while keeping the aspect
# ratio, and therefore may result in potential letterboxing.
node {
calculator: "ImageToTensorCalculator"
input_stream: "IMAGE:image"
input_stream: "NORM_RECT:hand_rect"
output_stream: "TENSORS:input_tensor"
output_stream: "LETTERBOX_PADDING:letterbox_padding"
options: {
[mediapipe.ImageToTensorCalculatorOptions.ext] {
output_tensor_width: 224
output_tensor_height: 224
keep_aspect_ratio: true
output_tensor_float_range {
min: 0.0
max: 1.0
}
}
}
}
# Loads the hand landmark TF Lite model.
node {
calculator: "HandLandmarkModelLoader"
input_side_packet: "MODEL_COMPLEXITY:model_complexity"
output_side_packet: "MODEL:model"
}
# Runs a TensorFlow Lite model on CPU that takes an image tensor and outputs a
# vector of tensors representing, for instance, detection boxes/keypoints and
# scores.
node {
calculator: "InferenceCalculator"
input_side_packet: "MODEL:model"
input_stream: "TENSORS:input_tensor"
output_stream: "TENSORS:output_tensors"
options: {
[mediapipe.InferenceCalculatorOptions.ext] {
delegate {
xnnpack {}
}
}
}
}
# Splits a vector of tensors to multiple vectors according to the ranges
# specified in option.
node {
calculator: "SplitTensorVectorCalculator"
input_stream: "output_tensors"
output_stream: "landmark_tensors"
output_stream: "hand_flag_tensor"
output_stream: "handedness_tensor"
output_stream: "world_landmark_tensor"
options: {
[mediapipe.SplitVectorCalculatorOptions.ext] {
ranges: { begin: 0 end: 1 }
ranges: { begin: 1 end: 2 }
ranges: { begin: 2 end: 3 }
ranges: { begin: 3 end: 4 }
}
}
}
# Converts the hand-flag tensor into a float that represents the confidence
# score of hand presence.
node {
calculator: "TensorsToFloatsCalculator"
input_stream: "TENSORS:hand_flag_tensor"
output_stream: "FLOAT:hand_presence_score"
}
# Applies a threshold to the confidence score to determine whether a hand is
# present.
node {
calculator: "ThresholdingCalculator"
input_stream: "FLOAT:hand_presence_score"
output_stream: "FLAG:hand_presence"
options: {
[mediapipe.ThresholdingCalculatorOptions.ext] {
threshold: 0.5
}
}
}
# Drops handedness tensor if hand is not present.
node {
calculator: "GateCalculator"
input_stream: "handedness_tensor"
input_stream: "ALLOW:hand_presence"
output_stream: "ensured_handedness_tensor"
}
# Converts the handedness tensor into a float that represents the classification
# score of handedness.
node {
calculator: "TensorsToClassificationCalculator"
input_stream: "TENSORS:ensured_handedness_tensor"
output_stream: "CLASSIFICATIONS:handedness"
options: {
[mediapipe.TensorsToClassificationCalculatorOptions.ext] {
top_k: 1
label_map_path: "mediapipe/modules/hand_landmark/handedness.txt"
binary_classification: true
}
}
}
# Drops landmarks tensors if hand is not present.
node {
calculator: "GateCalculator"
input_stream: "landmark_tensors"
input_stream: "ALLOW:hand_presence"
output_stream: "ensured_landmark_tensors"
}
# Decodes the landmark tensors into a list of landmarks, where the landmark
# coordinates are normalized by the size of the input image to the model.
node {
calculator: "TensorsToLandmarksCalculator"
input_stream: "TENSORS:ensured_landmark_tensors"
output_stream: "NORM_LANDMARKS:landmarks"
options: {
[mediapipe.TensorsToLandmarksCalculatorOptions.ext] {
num_landmarks: 21
input_image_width: 224
input_image_height: 224
# The additional scaling factor is used to account for the Z coordinate
# distribution in the training data.
normalize_z: 0.4
}
}
}
# Adjusts landmarks (already normalized to [0.f, 1.f]) on the letterboxed hand
# image (after image transformation with the FIT scale mode) to the
# corresponding locations on the same image with the letterbox removed (hand
# image before image transformation).
node {
calculator: "LandmarkLetterboxRemovalCalculator"
input_stream: "LANDMARKS:landmarks"
input_stream: "LETTERBOX_PADDING:letterbox_padding"
output_stream: "LANDMARKS:scaled_landmarks"
}
# Projects the landmarks from the cropped hand image to the corresponding
# locations on the full image before cropping (input to the graph).
node {
calculator: "LandmarkProjectionCalculator"
input_stream: "NORM_LANDMARKS:scaled_landmarks"
input_stream: "NORM_RECT:hand_rect"
output_stream: "NORM_LANDMARKS:hand_landmarks"
}
# Drops world landmarks tensors if hand is not present.
node {
calculator: "GateCalculator"
input_stream: "world_landmark_tensor"
input_stream: "ALLOW:hand_presence"
output_stream: "ensured_world_landmark_tensor"
}
# Decodes the landmark tensors into a list of landmarks, where the landmark
# coordinates are normalized by the size of the input image to the model.
node {
calculator: "TensorsToLandmarksCalculator"
input_stream: "TENSORS:ensured_world_landmark_tensor"
output_stream: "LANDMARKS:unprojected_world_landmarks"
options: {
[mediapipe.TensorsToLandmarksCalculatorOptions.ext] {
num_landmarks: 21
}
}
}
# Projects the world landmarks from the cropped hand image to the corresponding
# locations on the full image before cropping (input to the graph).
node {
calculator: "WorldLandmarkProjectionCalculator"
input_stream: "LANDMARKS:unprojected_world_landmarks"
input_stream: "NORM_RECT:hand_rect"
output_stream: "LANDMARKS:hand_world_landmarks"
}

View File

@ -1,213 +0,0 @@
# MediaPipe graph to detect/predict hand landmarks on CPU.
type: "HandLandmarkGpu"
# GPU image. (GpuBuffer)
input_stream: "IMAGE:image"
# ROI (region of interest) within the given image where a palm/hand is located.
# (NormalizedRect)
input_stream: "ROI:hand_rect"
# Complexity of the hand landmark model: 0 or 1. Landmark accuracy as well as
# inference latency generally go up with the model complexity. If unspecified,
# functions as set to 1. (int)
input_side_packet: "MODEL_COMPLEXITY:model_complexity"
# 21 hand landmarks within the given ROI. (NormalizedLandmarkList)
# NOTE: if a hand is not present within the given ROI, for this particular
# timestamp there will not be an output packet in the LANDMARKS stream. However,
# the MediaPipe framework will internally inform the downstream calculators of
# the absence of this packet so that they don't wait for it unnecessarily.
output_stream: "LANDMARKS:hand_landmarks"
# Hand world landmarks within the given ROI. (LandmarkList)
# World landmarks are real-world 3D coordinates in meters with the origin in the
# center of the given ROI.
#
# WORLD_LANDMARKS shares the same landmark topology as LANDMARKS. However,
# LANDMARKS provides coordinates (in pixels) of a 3D object projected onto the
# 2D image surface, while WORLD_LANDMARKS provides coordinates (in meters) of
# the 3D object itself.
output_stream: "WORLD_LANDMARKS:hand_world_landmarks"
# Handedness of the detected hand (i.e. is hand left or right).
# (ClassificationList)
output_stream: "HANDEDNESS:handedness"
# Transforms a region of image into a 224x224 tensor while keeping the aspect
# ratio, and therefore may result in potential letterboxing.
node {
calculator: "ImageToTensorCalculator"
input_stream: "IMAGE_GPU:image"
input_stream: "NORM_RECT:hand_rect"
output_stream: "TENSORS:input_tensor"
output_stream: "LETTERBOX_PADDING:letterbox_padding"
options: {
[mediapipe.ImageToTensorCalculatorOptions.ext] {
output_tensor_width: 224
output_tensor_height: 224
keep_aspect_ratio: true
output_tensor_float_range {
min: 0.0
max: 1.0
}
gpu_origin: TOP_LEFT
}
}
}
# Loads the hand landmark TF Lite model.
node {
calculator: "HandLandmarkModelLoader"
input_side_packet: "MODEL_COMPLEXITY:model_complexity"
output_side_packet: "MODEL:model"
}
# Runs a TensorFlow Lite model on GPU that takes an image tensor and outputs a
# vector of tensors representing, for instance, detection boxes/keypoints and
# scores.
node {
calculator: "InferenceCalculator"
input_side_packet: "MODEL:model"
input_stream: "TENSORS:input_tensor"
output_stream: "TENSORS:output_tensors"
}
# Splits a vector of tensors to multiple vectors according to the ranges
# specified in option.
node {
calculator: "SplitTensorVectorCalculator"
input_stream: "output_tensors"
output_stream: "landmark_tensors"
output_stream: "hand_flag_tensor"
output_stream: "handedness_tensor"
output_stream: "world_landmark_tensor"
options: {
[mediapipe.SplitVectorCalculatorOptions.ext] {
ranges: { begin: 0 end: 1 }
ranges: { begin: 1 end: 2 }
ranges: { begin: 2 end: 3 }
ranges: { begin: 3 end: 4 }
}
}
}
# Converts the hand-flag tensor into a float that represents the confidence
# score of hand presence.
node {
calculator: "TensorsToFloatsCalculator"
input_stream: "TENSORS:hand_flag_tensor"
output_stream: "FLOAT:hand_presence_score"
}
# Applies a threshold to the confidence score to determine whether a hand is
# present.
node {
calculator: "ThresholdingCalculator"
input_stream: "FLOAT:hand_presence_score"
output_stream: "FLAG:hand_presence"
options: {
[mediapipe.ThresholdingCalculatorOptions.ext] {
threshold: 0.5
}
}
}
# Drops handedness tensor if hand is not present.
node {
calculator: "GateCalculator"
input_stream: "handedness_tensor"
input_stream: "ALLOW:hand_presence"
output_stream: "ensured_handedness_tensor"
}
# Converts the handedness tensor into a float that represents the classification
# score of handedness.
node {
calculator: "TensorsToClassificationCalculator"
input_stream: "TENSORS:ensured_handedness_tensor"
output_stream: "CLASSIFICATIONS:handedness"
options: {
[mediapipe.TensorsToClassificationCalculatorOptions.ext] {
top_k: 1
label_map_path: "mediapipe/modules/hand_landmark/handedness.txt"
binary_classification: true
}
}
}
# Drops landmarks tensors if hand is not present.
node {
calculator: "GateCalculator"
input_stream: "landmark_tensors"
input_stream: "ALLOW:hand_presence"
output_stream: "ensured_landmark_tensors"
}
# Decodes the landmark tensors into a list of landmarks, where the landmark
# coordinates are normalized by the size of the input image to the model.
node {
calculator: "TensorsToLandmarksCalculator"
input_stream: "TENSORS:ensured_landmark_tensors"
output_stream: "NORM_LANDMARKS:landmarks"
options: {
[mediapipe.TensorsToLandmarksCalculatorOptions.ext] {
num_landmarks: 21
input_image_width: 224
input_image_height: 224
# The additional scaling factor is used to account for the Z coordinate
# distribution in the training data.
normalize_z: 0.4
}
}
}
# Adjusts landmarks (already normalized to [0.f, 1.f]) on the letterboxed hand
# image (after image transformation with the FIT scale mode) to the
# corresponding locations on the same image with the letterbox removed (hand
# image before image transformation).
node {
calculator: "LandmarkLetterboxRemovalCalculator"
input_stream: "LANDMARKS:landmarks"
input_stream: "LETTERBOX_PADDING:letterbox_padding"
output_stream: "LANDMARKS:scaled_landmarks"
}
# Projects the landmarks from the cropped hand image to the corresponding
# locations on the full image before cropping (input to the graph).
node {
calculator: "LandmarkProjectionCalculator"
input_stream: "NORM_LANDMARKS:scaled_landmarks"
input_stream: "NORM_RECT:hand_rect"
output_stream: "NORM_LANDMARKS:hand_landmarks"
}
# Drops world landmarks tensors if hand is not present.
node {
calculator: "GateCalculator"
input_stream: "world_landmark_tensor"
input_stream: "ALLOW:hand_presence"
output_stream: "ensured_world_landmark_tensor"
}
# Decodes the landmark tensors into a list of landmarks, where the landmark
# coordinates are normalized by the size of the input image to the model.
node {
calculator: "TensorsToLandmarksCalculator"
input_stream: "TENSORS:ensured_world_landmark_tensor"
output_stream: "LANDMARKS:unprojected_world_landmarks"
options: {
[mediapipe.TensorsToLandmarksCalculatorOptions.ext] {
num_landmarks: 21
}
}
}
# Projects the world landmarks from the cropped hand image to the corresponding
# locations on the full image before cropping (input to the graph).
node {
calculator: "WorldLandmarkProjectionCalculator"
input_stream: "LANDMARKS:unprojected_world_landmarks"
input_stream: "NORM_RECT:hand_rect"
output_stream: "LANDMARKS:hand_world_landmarks"
}

View File

@ -1,63 +0,0 @@
# MediaPipe graph to calculate hand region of interest (ROI) from landmarks
# detected by "HandLandmarkCpu" or "HandLandmarkGpu".
type: "HandLandmarkLandmarksToRoi"
# Normalized landmarks. (NormalizedLandmarkList)
input_stream: "LANDMARKS:landmarks"
# Image size (width & height). (std::pair<int, int>)
input_stream: "IMAGE_SIZE:image_size"
# ROI according to landmarks. (NormalizedRect)
output_stream: "ROI:roi"
# Extracts a subset of the hand landmarks that are relatively more stable across
# frames (e.g. comparing to finger tips) for computing the bounding box. The box
# will later be expanded to contain the entire hand. In this approach, it is
# more robust to drastically changing hand size.
# The landmarks extracted are: wrist, MCP/PIP of five fingers.
node {
calculator: "SplitNormalizedLandmarkListCalculator"
input_stream: "landmarks"
output_stream: "partial_landmarks"
options: {
[mediapipe.SplitVectorCalculatorOptions.ext] {
ranges: { begin: 0 end: 4 }
ranges: { begin: 5 end: 7 }
ranges: { begin: 9 end: 11 }
ranges: { begin: 13 end: 15 }
ranges: { begin: 17 end: 19 }
combine_outputs: true
}
}
}
# Converts the hand landmarks into a rectangle (normalized by image size)
# that encloses the hand. The calculator uses a subset of all hand landmarks
# extracted from SplitNormalizedLandmarkListCalculator above to
# calculate the bounding box and the rotation of the output rectangle. Please
# see the comments in the calculator for more detail.
node {
calculator: "HandLandmarksToRectCalculator"
input_stream: "NORM_LANDMARKS:partial_landmarks"
input_stream: "IMAGE_SIZE:image_size"
output_stream: "NORM_RECT:hand_rect_from_landmarks"
}
# Expands the hand rectangle so that the box contains the entire hand and it's
# big enough so that it's likely to still contain the hand even with some motion
# in the next video frame .
node {
calculator: "RectTransformationCalculator"
input_stream: "NORM_RECT:hand_rect_from_landmarks"
input_stream: "IMAGE_SIZE:image_size"
output_stream: "roi"
options: {
[mediapipe.RectTransformationCalculatorOptions.ext] {
scale_x: 2.0
scale_y: 2.0
shift_y: -0.1
square_long: true
}
}
}

View File

@ -1,63 +0,0 @@
# MediaPipe graph to load a selected hand landmark TF Lite model.
type: "HandLandmarkModelLoader"
# Complexity of the hand landmark model: 0 or 1. Landmark accuracy as well as
# inference latency generally go up with the model complexity. If unspecified,
# functions as set to 1. (int)
input_side_packet: "MODEL_COMPLEXITY:model_complexity"
# TF Lite model represented as a FlatBuffer.
# (std::unique_ptr<tflite::FlatBufferModel, std::function<void(tflite::FlatBufferModel*)>>)
output_side_packet: "MODEL:model"
# Determines path to the desired pose landmark model file.
node {
calculator: "SwitchContainer"
input_side_packet: "SELECT:model_complexity"
output_side_packet: "PACKET:model_path"
options: {
[mediapipe.SwitchContainerOptions.ext] {
select: 1
contained_node: {
calculator: "ConstantSidePacketCalculator"
options: {
[mediapipe.ConstantSidePacketCalculatorOptions.ext]: {
packet {
string_value: "mediapipe/modules/hand_landmark/hand_landmark_lite.tflite"
}
}
}
}
contained_node: {
calculator: "ConstantSidePacketCalculator"
options: {
[mediapipe.ConstantSidePacketCalculatorOptions.ext]: {
packet {
string_value: "mediapipe/modules/hand_landmark/hand_landmark_full.tflite"
}
}
}
}
}
}
}
# Loads the file in the specified path into a blob.
node {
calculator: "LocalFileContentsCalculator"
input_side_packet: "FILE_PATH:model_path"
output_side_packet: "CONTENTS:model_blob"
options: {
[mediapipe.LocalFileContentsCalculatorOptions.ext]: {
text_mode: false
}
}
}
# Converts the input blob into a TF Lite model.
node {
calculator: "TfLiteModelCalculator"
input_side_packet: "MODEL_BLOB:model_blob"
output_side_packet: "MODEL:model"
}

View File

@ -1,271 +0,0 @@
# MediaPipe graph to detect/predict hand landmarks on CPU.
#
# The procedure is done in two steps:
# - locate palms/hands
# - detect landmarks for each palm/hand.
# This graph tries to skip palm detection as much as possible by reusing
# previously detected/predicted landmarks for new images.
type: "HandLandmarkTrackingCpu"
# CPU image. (ImageFrame)
input_stream: "IMAGE:image"
# Max number of hands to detect/track. (int)
input_side_packet: "NUM_HANDS:num_hands"
# Complexity of hand landmark and palm detection models: 0 or 1. Accuracy as
# well as inference latency generally go up with the model complexity. If
# unspecified, functions as set to 1. (int)
input_side_packet: "MODEL_COMPLEXITY:model_complexity"
# Whether landmarks on the previous image should be used to help localize
# landmarks on the current image. (bool)
input_side_packet: "USE_PREV_LANDMARKS:use_prev_landmarks"
# Collection of detected/predicted hands, each represented as a list of
# landmarks. (std::vector<NormalizedLandmarkList>)
# NOTE: there will not be an output packet in the LANDMARKS stream for this
# particular timestamp if none of hands detected. However, the MediaPipe
# framework will internally inform the downstream calculators of the absence of
# this packet so that they don't wait for it unnecessarily.
output_stream: "LANDMARKS:multi_hand_landmarks"
# Collection of detected/predicted hand world landmarks.
# (std::vector<LandmarkList>)
#
# World landmarks are real-world 3D coordinates in meters with the origin in the
# center of the hand bounding box calculated from the landmarks.
#
# WORLD_LANDMARKS shares the same landmark topology as LANDMARKS. However,
# LANDMARKS provides coordinates (in pixels) of a 3D object projected onto the
# 2D image surface, while WORLD_LANDMARKS provides coordinates (in meters) of
# the 3D object itself.
output_stream: "WORLD_LANDMARKS:multi_hand_world_landmarks"
# Collection of handedness of the detected hands (i.e. is hand left or right),
# each represented as a ClassificationList proto with a single Classification
# entry. (std::vector<ClassificationList>)
# Note that handedness is determined assuming the input image is mirrored,
# i.e., taken with a front-facing/selfie camera with images flipped
# horizontally.
output_stream: "HANDEDNESS:multi_handedness"
# Extra outputs (for debugging, for instance).
# Detected palms. (std::vector<Detection>)
output_stream: "PALM_DETECTIONS:palm_detections"
# Regions of interest calculated based on landmarks.
# (std::vector<NormalizedRect>)
output_stream: "HAND_ROIS_FROM_LANDMARKS:hand_rects"
# Regions of interest calculated based on palm detections.
# (std::vector<NormalizedRect>)
output_stream: "HAND_ROIS_FROM_PALM_DETECTIONS:hand_rects_from_palm_detections"
# When the optional input side packet "use_prev_landmarks" is either absent or
# set to true, uses the landmarks on the previous image to help localize
# landmarks on the current image.
node {
calculator: "GateCalculator"
input_side_packet: "ALLOW:use_prev_landmarks"
input_stream: "prev_hand_rects_from_landmarks"
output_stream: "gated_prev_hand_rects_from_landmarks"
options: {
[mediapipe.GateCalculatorOptions.ext] {
allow: true
}
}
}
# Determines if an input vector of NormalizedRect has a size greater than or
# equal to the provided num_hands.
node {
calculator: "NormalizedRectVectorHasMinSizeCalculator"
input_stream: "ITERABLE:gated_prev_hand_rects_from_landmarks"
input_side_packet: "num_hands"
output_stream: "prev_has_enough_hands"
}
# Drops the incoming image if enough hands have already been identified from the
# previous image. Otherwise, passes the incoming image through to trigger a new
# round of palm detection.
node {
calculator: "GateCalculator"
input_stream: "image"
input_stream: "DISALLOW:prev_has_enough_hands"
output_stream: "palm_detection_image"
options: {
[mediapipe.GateCalculatorOptions.ext] {
empty_packets_as_allow: true
}
}
}
# Detects palms.
node {
calculator: "PalmDetectionCpu"
input_side_packet: "MODEL_COMPLEXITY:model_complexity"
input_stream: "IMAGE:palm_detection_image"
output_stream: "DETECTIONS:all_palm_detections"
}
# Makes sure there are no more detections than the provided num_hands.
node {
calculator: "ClipDetectionVectorSizeCalculator"
input_stream: "all_palm_detections"
output_stream: "palm_detections"
input_side_packet: "num_hands"
}
# Extracts image size.
node {
calculator: "ImagePropertiesCalculator"
input_stream: "IMAGE:palm_detection_image"
output_stream: "SIZE:palm_detection_image_size"
}
# Outputs each element of palm_detections at a fake timestamp for the rest of
# the graph to process. Clones the image size packet for each palm_detection at
# the fake timestamp. At the end of the loop, outputs the BATCH_END timestamp
# for downstream calculators to inform them that all elements in the vector have
# been processed.
node {
calculator: "BeginLoopDetectionCalculator"
input_stream: "ITERABLE:palm_detections"
input_stream: "CLONE:palm_detection_image_size"
output_stream: "ITEM:palm_detection"
output_stream: "CLONE:image_size_for_palms"
output_stream: "BATCH_END:palm_detections_timestamp"
}
# Calculates region of interest (ROI) based on the specified palm.
node {
calculator: "PalmDetectionDetectionToRoi"
input_stream: "DETECTION:palm_detection"
input_stream: "IMAGE_SIZE:image_size_for_palms"
output_stream: "ROI:hand_rect_from_palm_detection"
}
# Collects a NormalizedRect for each hand into a vector. Upon receiving the
# BATCH_END timestamp, outputs the vector of NormalizedRect at the BATCH_END
# timestamp.
node {
calculator: "EndLoopNormalizedRectCalculator"
input_stream: "ITEM:hand_rect_from_palm_detection"
input_stream: "BATCH_END:palm_detections_timestamp"
output_stream: "ITERABLE:hand_rects_from_palm_detections"
}
# Performs association between NormalizedRect vector elements from previous
# image and rects based on palm detections from the current image. This
# calculator ensures that the output hand_rects vector doesn't contain
# overlapping regions based on the specified min_similarity_threshold.
node {
calculator: "AssociationNormRectCalculator"
input_stream: "hand_rects_from_palm_detections"
input_stream: "gated_prev_hand_rects_from_landmarks"
output_stream: "hand_rects"
options: {
[mediapipe.AssociationCalculatorOptions.ext] {
min_similarity_threshold: 0.5
}
}
}
# Extracts image size.
node {
calculator: "ImagePropertiesCalculator"
input_stream: "IMAGE_CPU:image"
output_stream: "SIZE:image_size"
}
# Outputs each element of hand_rects at a fake timestamp for the rest of the
# graph to process. Clones image and image size packets for each
# single_hand_rect at the fake timestamp. At the end of the loop, outputs the
# BATCH_END timestamp for downstream calculators to inform them that all
# elements in the vector have been processed.
node {
calculator: "BeginLoopNormalizedRectCalculator"
input_stream: "ITERABLE:hand_rects"
input_stream: "CLONE:0:image"
input_stream: "CLONE:1:image_size"
output_stream: "ITEM:single_hand_rect"
output_stream: "CLONE:0:image_for_landmarks"
output_stream: "CLONE:1:image_size_for_landmarks"
output_stream: "BATCH_END:hand_rects_timestamp"
}
# Detect hand landmarks for the specific hand rect.
node {
calculator: "HandLandmarkCpu"
input_side_packet: "MODEL_COMPLEXITY:model_complexity"
input_stream: "IMAGE:image_for_landmarks"
input_stream: "ROI:single_hand_rect"
output_stream: "LANDMARKS:single_hand_landmarks"
output_stream: "WORLD_LANDMARKS:single_hand_world_landmarks"
output_stream: "HANDEDNESS:single_handedness"
}
# Collects the handedness for each single hand into a vector. Upon receiving the
# BATCH_END timestamp, outputs a vector of ClassificationList at the BATCH_END
# timestamp.
node {
calculator: "EndLoopClassificationListCalculator"
input_stream: "ITEM:single_handedness"
input_stream: "BATCH_END:hand_rects_timestamp"
output_stream: "ITERABLE:multi_handedness"
}
# Calculate region of interest (ROI) based on detected hand landmarks to reuse
# on the subsequent runs of the graph.
node {
calculator: "HandLandmarkLandmarksToRoi"
input_stream: "IMAGE_SIZE:image_size_for_landmarks"
input_stream: "LANDMARKS:single_hand_landmarks"
output_stream: "ROI:single_hand_rect_from_landmarks"
}
# Collects a set of landmarks for each hand into a vector. Upon receiving the
# BATCH_END timestamp, outputs the vector of landmarks at the BATCH_END
# timestamp.
node {
calculator: "EndLoopNormalizedLandmarkListVectorCalculator"
input_stream: "ITEM:single_hand_landmarks"
input_stream: "BATCH_END:hand_rects_timestamp"
output_stream: "ITERABLE:multi_hand_landmarks"
}
# Collects a set of world landmarks for each hand into a vector. Upon receiving
# the BATCH_END timestamp, outputs the vector of landmarks at the BATCH_END
# timestamp.
node {
calculator: "EndLoopLandmarkListVectorCalculator"
input_stream: "ITEM:single_hand_world_landmarks"
input_stream: "BATCH_END:hand_rects_timestamp"
output_stream: "ITERABLE:multi_hand_world_landmarks"
}
# Collects a NormalizedRect for each hand into a vector. Upon receiving the
# BATCH_END timestamp, outputs the vector of NormalizedRect at the BATCH_END
# timestamp.
node {
calculator: "EndLoopNormalizedRectCalculator"
input_stream: "ITEM:single_hand_rect_from_landmarks"
input_stream: "BATCH_END:hand_rects_timestamp"
output_stream: "ITERABLE:hand_rects_from_landmarks"
}
# Caches hand rects calculated from landmarks, and upon the arrival of the next
# input image, sends out the cached rects with timestamps replaced by that of
# the input image, essentially generating a packet that carries the previous
# hand rects. Note that upon the arrival of the very first input image, a
# timestamp bound update occurs to jump start the feedback loop.
node {
calculator: "PreviousLoopbackCalculator"
input_stream: "MAIN:image"
input_stream: "LOOP:hand_rects_from_landmarks"
input_stream_info: {
tag_index: "LOOP"
back_edge: true
}
output_stream: "PREV_LOOP:prev_hand_rects_from_landmarks"
}

View File

@ -1,116 +0,0 @@
# MediaPipe graph to detect/predict hand landmarks on CPU.
#
# The procedure is done in two steps:
# - locate palms/hands
# - detect landmarks for each palm/hand.
# This graph tries to skip palm detection as much as possible by reusing
# previously detected/predicted landmarks for new images.
type: "HandLandmarkTrackingCpuImage"
# Input image. (Image)
input_stream: "IMAGE:image"
# Max number of hands to detect/track. (int)
input_side_packet: "NUM_HANDS:num_hands"
# Complexity of hand landmark and palm detection models: 0 or 1. Accuracy as
# well as inference latency generally go up with the model complexity. If
# unspecified, functions as set to 1. (int)
input_side_packet: "MODEL_COMPLEXITY:model_complexity"
# Whether landmarks on the previous image should be used to help localize
# landmarks on the current image. (bool)
input_side_packet: "USE_PREV_LANDMARKS:use_prev_landmarks"
# The throttled input image. (Image)
output_stream: "IMAGE:throttled_image"
# Collection of detected/predicted hands, each represented as a list of
# landmarks. (std::vector<NormalizedLandmarkList>)
# NOTE: there will not be an output packet in the LANDMARKS stream for this
# particular timestamp if none of hands detected. However, the MediaPipe
# framework will internally inform the downstream calculators of the absence of
# this packet so that they don't wait for it unnecessarily.
output_stream: "LANDMARKS:multi_hand_landmarks"
# Collection of detected/predicted hand world landmarks.
# (std::vector<LandmarkList>)
#
# World landmarks are real-world 3D coordinates in meters with the origin in the
# center of the hand bounding box calculated from the landmarks.
#
# WORLD_LANDMARKS shares the same landmark topology as LANDMARKS. However,
# LANDMARKS provides coordinates (in pixels) of a 3D object projected onto the
# 2D image surface, while WORLD_LANDMARKS provides coordinates (in meters) of
# the 3D object itself.
output_stream: "WORLD_LANDMARKS:multi_hand_world_landmarks"
# Collection of handedness of the detected hands (i.e. is hand left or right),
# each represented as a ClassificationList proto with a single Classification
# entry. (std::vector<ClassificationList>)
# Note that handedness is determined assuming the input image is mirrored,
# i.e., taken with a front-facing/selfie camera with images flipped
# horizontally.
output_stream: "HANDEDNESS:multi_handedness"
# Extra outputs (for debugging, for instance).
# Detected palms. (std::vector<Detection>)
output_stream: "PALM_DETECTIONS:palm_detections"
# Regions of interest calculated based on landmarks.
# (std::vector<NormalizedRect>)
output_stream: "HAND_ROIS_FROM_LANDMARKS:hand_rects"
# Regions of interest calculated based on palm detections.
# (std::vector<NormalizedRect>)
output_stream: "HAND_ROIS_FROM_PALM_DETECTIONS:hand_rects_from_palm_detections"
node {
calculator: "FlowLimiterCalculator"
input_stream: "image"
input_stream: "FINISHED:multi_hand_landmarks"
input_stream_info: {
tag_index: "FINISHED"
back_edge: true
}
output_stream: "throttled_image"
options: {
[mediapipe.FlowLimiterCalculatorOptions.ext] {
max_in_flight: 1
max_in_queue: 1
}
}
}
# Converts Image to ImageFrame for HandLandmarkTrackingCpu to consume.
node {
calculator: "FromImageCalculator"
input_stream: "IMAGE:throttled_image"
output_stream: "IMAGE_CPU:raw_image_frame"
output_stream: "SOURCE_ON_GPU:is_gpu_image"
}
# TODO: Remove the extra flipping once adopting MlImage.
# If the source images are on gpu, flip the data vertically before sending them
# into HandLandmarkTrackingCpu. This maybe needed because OpenGL represents
# images assuming the image origin is at the bottom-left corner, whereas
# MediaPipe in general assumes the image origin is at the top-left corner.
node: {
calculator: "ImageTransformationCalculator"
input_stream: "IMAGE:raw_image_frame"
input_stream: "FLIP_VERTICALLY:is_gpu_image"
output_stream: "IMAGE:image_frame"
}
node {
calculator: "HandLandmarkTrackingCpu"
input_stream: "IMAGE:image_frame"
input_side_packet: "NUM_HANDS:num_hands"
input_side_packet: "MODEL_COMPLEXITY:model_complexity"
input_side_packet: "USE_PREV_LANDMARKS:use_prev_landmarks"
output_stream: "LANDMARKS:multi_hand_landmarks"
output_stream: "WORLD_LANDMARKS:multi_hand_world_landmarks"
output_stream: "HANDEDNESS:multi_handedness"
output_stream: "PALM_DETECTIONS:palm_detections"
output_stream: "HAND_ROIS_FROM_LANDMARKS:hand_rects"
output_stream: "HAND_ROIS_FROM_PALM_DETECTIONS:hand_rects_from_palm_detections"
}

View File

@ -1,272 +0,0 @@
# MediaPipe graph to detect/predict hand landmarks on GPU.
#
# The procedure is done in two steps:
# - locate palms/hands
# - detect landmarks for each palm/hand.
# This graph tries to skip palm detection as much as possible by reusing
# previously detected/predicted landmarks for new images.
type: "HandLandmarkTrackingGpu"
# GPU image. (GpuBuffer)
input_stream: "IMAGE:image"
# Max number of hands to detect/track. (int)
input_side_packet: "NUM_HANDS:num_hands"
# Complexity of hand landmark and palm detection models: 0 or 1. Accuracy as
# well as inference latency generally go up with the model complexity. If
# unspecified, functions as set to 1. (int)
input_side_packet: "MODEL_COMPLEXITY:model_complexity"
# Whether landmarks on the previous image should be used to help localize
# landmarks on the current image. (bool)
input_side_packet: "USE_PREV_LANDMARKS:use_prev_landmarks"
# Collection of detected/predicted hands, each represented as a list of
# landmarks. (std::vector<NormalizedLandmarkList>)
# NOTE: there will not be an output packet in the LANDMARKS stream for this
# particular timestamp if none of hands detected. However, the MediaPipe
# framework will internally inform the downstream calculators of the absence of
# this packet so that they don't wait for it unnecessarily.
output_stream: "LANDMARKS:multi_hand_landmarks"
# Collection of detected/predicted hand world landmarks.
# (std::vector<LandmarkList>)
#
# World landmarks are real-world 3D coordinates in meters with the origin in the
# center of the hand bounding box calculated from the landmarks.
#
# WORLD_LANDMARKS shares the same landmark topology as LANDMARKS. However,
# LANDMARKS provides coordinates (in pixels) of a 3D object projected onto the
# 2D image surface, while WORLD_LANDMARKS provides coordinates (in meters) of
# the 3D object itself.
output_stream: "WORLD_LANDMARKS:multi_hand_world_landmarks"
# Collection of handedness of the detected hands (i.e. is hand left or right),
# each represented as a ClassificationList proto with a single Classification
# entry. (std::vector<ClassificationList>)
# Note that handedness is determined assuming the input image is mirrored,
# i.e., taken with a front-facing/selfie camera with images flipped
# horizontally.
output_stream: "HANDEDNESS:multi_handedness"
# Extra outputs (for debugging, for instance).
# Detected palms. (std::vector<Detection>)
output_stream: "PALM_DETECTIONS:palm_detections"
# Regions of interest calculated based on landmarks.
# (std::vector<NormalizedRect>)
output_stream: "HAND_ROIS_FROM_LANDMARKS:hand_rects"
# Regions of interest calculated based on palm detections.
# (std::vector<NormalizedRect>)
output_stream: "HAND_ROIS_FROM_PALM_DETECTIONS:hand_rects_from_palm_detections"
# When the optional input side packet "use_prev_landmarks" is either absent or
# set to true, uses the landmarks on the previous image to help localize
# landmarks on the current image.
node {
calculator: "GateCalculator"
input_side_packet: "ALLOW:use_prev_landmarks"
input_stream: "prev_hand_rects_from_landmarks"
output_stream: "gated_prev_hand_rects_from_landmarks"
options: {
[mediapipe.GateCalculatorOptions.ext] {
allow: true
}
}
}
# Determines if an input vector of NormalizedRect has a size greater than or
# equal to the provided num_hands.
node {
calculator: "NormalizedRectVectorHasMinSizeCalculator"
input_stream: "ITERABLE:gated_prev_hand_rects_from_landmarks"
input_side_packet: "num_hands"
output_stream: "prev_has_enough_hands"
}
# Drops the incoming image if enough hands have already been identified from the
# previous image. Otherwise, passes the incoming image through to trigger a new
# round of palm detection.
node {
calculator: "GateCalculator"
input_stream: "image"
input_stream: "DISALLOW:prev_has_enough_hands"
output_stream: "palm_detection_image"
options: {
[mediapipe.GateCalculatorOptions.ext] {
empty_packets_as_allow: true
}
}
}
# Detects palms.
node {
calculator: "PalmDetectionGpu"
input_side_packet: "MODEL_COMPLEXITY:model_complexity"
input_stream: "IMAGE:palm_detection_image"
output_stream: "DETECTIONS:all_palm_detections"
}
# Makes sure there are no more detections than provided num_hands.
node {
calculator: "ClipDetectionVectorSizeCalculator"
input_stream: "all_palm_detections"
output_stream: "palm_detections"
input_side_packet: "num_hands"
}
# Extracts image size.
node {
calculator: "ImagePropertiesCalculator"
input_stream: "IMAGE_GPU:palm_detection_image"
output_stream: "SIZE:palm_detection_image_size"
}
# Outputs each element of palm_detections at a fake timestamp for the rest of
# the graph to process. Clones the image_size packet for each palm_detection at
# the fake timestamp. At the end of the loop, outputs the BATCH_END timestamp
# for downstream calculators to inform them that all elements in the vector have
# been processed.
node {
calculator: "BeginLoopDetectionCalculator"
input_stream: "ITERABLE:palm_detections"
input_stream: "CLONE:palm_detection_image_size"
output_stream: "ITEM:palm_detection"
output_stream: "CLONE:image_size_for_palms"
output_stream: "BATCH_END:palm_detections_timestamp"
}
# Calculates region of interest (ROI) base on the specified palm.
node {
calculator: "PalmDetectionDetectionToRoi"
input_stream: "DETECTION:palm_detection"
input_stream: "IMAGE_SIZE:image_size_for_palms"
output_stream: "ROI:hand_rect_from_palm_detection"
}
# Collects a NormalizedRect for each hand into a vector. Upon receiving the
# BATCH_END timestamp, outputs the vector of NormalizedRect at the BATCH_END
# timestamp.
node {
name: "EndLoopForPalmDetections"
calculator: "EndLoopNormalizedRectCalculator"
input_stream: "ITEM:hand_rect_from_palm_detection"
input_stream: "BATCH_END:palm_detections_timestamp"
output_stream: "ITERABLE:hand_rects_from_palm_detections"
}
# Performs association between NormalizedRect vector elements from previous
# image and rects based on palm detections from the current image. This
# calculator ensures that the output hand_rects vector doesn't contain
# overlapping regions based on the specified min_similarity_threshold.
node {
calculator: "AssociationNormRectCalculator"
input_stream: "hand_rects_from_palm_detections"
input_stream: "gated_prev_hand_rects_from_landmarks"
output_stream: "hand_rects"
options: {
[mediapipe.AssociationCalculatorOptions.ext] {
min_similarity_threshold: 0.5
}
}
}
# Extracts image size.
node {
calculator: "ImagePropertiesCalculator"
input_stream: "IMAGE_GPU:image"
output_stream: "SIZE:image_size"
}
# Outputs each element of hand_rects at a fake timestamp for the rest of the
# graph to process. Clones image and image size packets for each
# single_hand_rect at the fake timestamp. At the end of the loop, outputs the
# BATCH_END timestamp for downstream calculators to inform them that all
# elements in the vector have been processed.
node {
calculator: "BeginLoopNormalizedRectCalculator"
input_stream: "ITERABLE:hand_rects"
input_stream: "CLONE:0:image"
input_stream: "CLONE:1:image_size"
output_stream: "ITEM:single_hand_rect"
output_stream: "CLONE:0:image_for_landmarks"
output_stream: "CLONE:1:image_size_for_landmarks"
output_stream: "BATCH_END:hand_rects_timestamp"
}
# Detect hand landmarks for the specific hand rect.
node {
calculator: "HandLandmarkGpu"
input_side_packet: "MODEL_COMPLEXITY:model_complexity"
input_stream: "IMAGE:image_for_landmarks"
input_stream: "ROI:single_hand_rect"
output_stream: "LANDMARKS:single_hand_landmarks"
output_stream: "WORLD_LANDMARKS:single_hand_world_landmarks"
output_stream: "HANDEDNESS:single_handedness"
}
# Collects the handedness for each single hand into a vector. Upon receiving the
# BATCH_END timestamp, outputs a vector of ClassificationList at the BATCH_END
# timestamp.
node {
calculator: "EndLoopClassificationListCalculator"
input_stream: "ITEM:single_handedness"
input_stream: "BATCH_END:hand_rects_timestamp"
output_stream: "ITERABLE:multi_handedness"
}
# Calculate region of interest (ROI) based on detected hand landmarks to reuse
# on the subsequent runs of the graph.
node {
calculator: "HandLandmarkLandmarksToRoi"
input_stream: "IMAGE_SIZE:image_size_for_landmarks"
input_stream: "LANDMARKS:single_hand_landmarks"
output_stream: "ROI:single_hand_rect_from_landmarks"
}
# Collects a set of landmarks for each hand into a vector. Upon receiving the
# BATCH_END timestamp, outputs the vector of landmarks at the BATCH_END
# timestamp.
node {
calculator: "EndLoopNormalizedLandmarkListVectorCalculator"
input_stream: "ITEM:single_hand_landmarks"
input_stream: "BATCH_END:hand_rects_timestamp"
output_stream: "ITERABLE:multi_hand_landmarks"
}
# Collects a set of world landmarks for each hand into a vector. Upon receiving
# the BATCH_END timestamp, outputs the vector of landmarks at the BATCH_END
# timestamp.
node {
calculator: "EndLoopLandmarkListVectorCalculator"
input_stream: "ITEM:single_hand_world_landmarks"
input_stream: "BATCH_END:hand_rects_timestamp"
output_stream: "ITERABLE:multi_hand_world_landmarks"
}
# Collects a NormalizedRect for each hand into a vector. Upon receiving the
# BATCH_END timestamp, outputs the vector of NormalizedRect at the BATCH_END
# timestamp.
node {
calculator: "EndLoopNormalizedRectCalculator"
input_stream: "ITEM:single_hand_rect_from_landmarks"
input_stream: "BATCH_END:hand_rects_timestamp"
output_stream: "ITERABLE:hand_rects_from_landmarks"
}
# Caches hand rects calculated from landmarks, and upon the arrival of the next
# input image, sends out the cached rects with timestamps replaced by that of
# the input image, essentially generating a packet that carries the previous
# hand rects. Note that upon the arrival of the very first input image, a
# timestamp bound update occurs to jump start the feedback loop.
node {
calculator: "PreviousLoopbackCalculator"
input_stream: "MAIN:image"
input_stream: "LOOP:hand_rects_from_landmarks"
input_stream_info: {
tag_index: "LOOP"
back_edge: true
}
output_stream: "PREV_LOOP:prev_hand_rects_from_landmarks"
}

View File

@ -1,115 +0,0 @@
# MediaPipe graph to detect/predict hand landmarks on GPU.
#
# The procedure is done in two steps:
# - locate palms/hands
# - detect landmarks for each palm/hand.
# This graph tries to skip palm detection as much as possible by reusing
# previously detected/predicted landmarks for new images.
type: "HandLandmarkTrackingGpuImage"
# Input image. (Image)
input_stream: "IMAGE:image"
# Max number of hands to detect/track. (int)
input_side_packet: "NUM_HANDS:num_hands"
# Complexity of hand landmark and palm detection models: 0 or 1. Accuracy as
# well as inference latency generally go up with the model complexity. If
# unspecified, functions as set to 1. (int)
input_side_packet: "MODEL_COMPLEXITY:model_complexity"
# Whether landmarks on the previous image should be used to help localize
# landmarks on the current image. (bool)
input_side_packet: "USE_PREV_LANDMARKS:use_prev_landmarks"
# Collection of detected/predicted hands, each represented as a list of
# landmarks. (std::vector<NormalizedLandmarkList>)
# NOTE: there will not be an output packet in the LANDMARKS stream for this
# particular timestamp if none of hands detected. However, the MediaPipe
# framework will internally inform the downstream calculators of the absence of
# this packet so that they don't wait for it unnecessarily.
output_stream: "LANDMARKS:multi_hand_landmarks"
# Collection of detected/predicted hand world landmarks.
# (std::vector<LandmarkList>)
#
# World landmarks are real-world 3D coordinates in meters with the origin in the
# center of the hand bounding box calculated from the landmarks.
#
# WORLD_LANDMARKS shares the same landmark topology as LANDMARKS. However,
# LANDMARKS provides coordinates (in pixels) of a 3D object projected onto the
# 2D image surface, while WORLD_LANDMARKS provides coordinates (in meters) of
# the 3D object itself.
output_stream: "WORLD_LANDMARKS:multi_hand_world_landmarks"
# Collection of handedness of the detected hands (i.e. is hand left or right),
# each represented as a ClassificationList proto with a single Classification
# entry. (std::vector<ClassificationList>)
# Note that handedness is determined assuming the input image is mirrored,
# i.e., taken with a front-facing/selfie camera with images flipped
# horizontally.
output_stream: "HANDEDNESS:multi_handedness"
# The throttled input image. (Image)
output_stream: "IMAGE:throttled_image"
# Extra outputs (for debugging, for instance).
# Detected palms. (std::vector<Detection>)
output_stream: "PALM_DETECTIONS:palm_detections"
# Regions of interest calculated based on landmarks.
# (std::vector<NormalizedRect>)
output_stream: "HAND_ROIS_FROM_LANDMARKS:hand_rects"
# Regions of interest calculated based on palm detections.
# (std::vector<NormalizedRect>)
output_stream: "HAND_ROIS_FROM_PALM_DETECTIONS:hand_rects_from_palm_detections"
node {
calculator: "FlowLimiterCalculator"
input_stream: "image"
input_stream: "FINISHED:multi_hand_landmarks"
input_stream_info: {
tag_index: "FINISHED"
back_edge: true
}
output_stream: "throttled_image"
options: {
[mediapipe.FlowLimiterCalculatorOptions.ext] {
max_in_flight: 1
max_in_queue: 1
}
}
}
# Converts Image to GpuBuffer for HandLandmarkTrackingGpu to consume.
node {
calculator: "FromImageCalculator"
input_stream: "IMAGE:throttled_image"
output_stream: "IMAGE_GPU:raw_gpu_buffer"
output_stream: "SOURCE_ON_GPU:is_gpu_image"
}
# TODO: Remove the extra flipping once adopting MlImage.
# If the source images are on gpu, flip the data vertically before sending them
# into HandLandmarkTrackingGpu. This maybe needed because OpenGL represents
# images assuming the image origin is at the bottom-left corner, whereas
# MediaPipe in general assumes the image origin is at the top-left corner.
node: {
calculator: "ImageTransformationCalculator"
input_stream: "IMAGE_GPU:raw_gpu_buffer"
input_stream: "FLIP_VERTICALLY:is_gpu_image"
output_stream: "IMAGE_GPU:gpu_buffer"
}
node {
calculator: "HandLandmarkTrackingGpu"
input_stream: "IMAGE:gpu_buffer"
input_side_packet: "NUM_HANDS:num_hands"
input_side_packet: "MODEL_COMPLEXITY:model_complexity"
input_side_packet: "USE_PREV_LANDMARKS:use_prev_landmarks"
output_stream: "LANDMARKS:multi_hand_landmarks"
output_stream: "WORLD_LANDMARKS:multi_hand_world_landmarks"
output_stream: "HANDEDNESS:multi_handedness"
output_stream: "PALM_DETECTIONS:palm_detections"
output_stream: "HAND_ROIS_FROM_LANDMARKS:hand_rects"
output_stream: "HAND_ROIS_FROM_PALM_DETECTIONS:hand_rects_from_palm_detections"
}

View File

@ -1,2 +0,0 @@
Left
Right

View File

@ -1,47 +0,0 @@
# MediaPipe subgraph that calculates hand ROI from palm detection.
type: "PalmDetectionDetectionToRoi"
# Palm detection. (Detection)
input_stream: "DETECTION:detection"
# Frame size. (std::pair<int, int>)
input_stream: "IMAGE_SIZE:image_size"
# ROI (region of interest) according to landmarks, represented as normalized
# rect. (NormalizedRect)
output_stream: "ROI:roi"
# Converts results of palm detection into a rectangle (normalized by image size)
# that encloses the palm and is rotated such that the line connecting center of
# the wrist and MCP of the middle finger is aligned with the Y-axis of the
# rectangle.
node {
calculator: "DetectionsToRectsCalculator"
input_stream: "DETECTION:detection"
input_stream: "IMAGE_SIZE:image_size"
output_stream: "NORM_RECT:raw_roi"
options: {
[mediapipe.DetectionsToRectsCalculatorOptions.ext] {
rotation_vector_start_keypoint_index: 0 # Center of wrist.
rotation_vector_end_keypoint_index: 2 # MCP of middle finger.
rotation_vector_target_angle_degrees: 90
}
}
}
# Expands and shifts the rectangle that contains the palm so that it's likely
# to cover the entire hand.
node {
calculator: "RectTransformationCalculator"
input_stream: "NORM_RECT:raw_roi"
input_stream: "IMAGE_SIZE:image_size"
output_stream: "roi"
options: {
[mediapipe.RectTransformationCalculatorOptions.ext] {
scale_x: 2.6
scale_y: 2.6
shift_y: -0.5
square_long: true
}
}
}

View File

@ -1,267 +0,0 @@
# Copyright 2020 The MediaPipe Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
load("//mediapipe/framework/tool:mediapipe_graph.bzl", "mediapipe_simple_subgraph")
# TODO: revert to private.
package(default_visibility = ["//visibility:public"])
licenses(["notice"])
exports_files([
"hand_recrop.tflite",
])
mediapipe_simple_subgraph(
name = "face_landmarks_from_pose_gpu",
graph = "face_landmarks_from_pose_gpu.pbtxt",
register_as = "FaceLandmarksFromPoseGpu",
deps = [
":face_detection_front_detections_to_roi",
":face_landmarks_from_pose_to_recrop_roi",
":face_tracking",
"//mediapipe/calculators/core:split_landmarks_calculator",
"//mediapipe/calculators/image:image_properties_calculator",
"//mediapipe/modules/face_detection:face_detection_short_range_by_roi_gpu",
"//mediapipe/modules/face_landmark:face_landmark_gpu",
],
)
mediapipe_simple_subgraph(
name = "face_landmarks_from_pose_cpu",
graph = "face_landmarks_from_pose_cpu.pbtxt",
register_as = "FaceLandmarksFromPoseCpu",
deps = [
":face_detection_front_detections_to_roi",
":face_landmarks_from_pose_to_recrop_roi",
":face_tracking",
"//mediapipe/calculators/core:split_landmarks_calculator",
"//mediapipe/calculators/image:image_properties_calculator",
"//mediapipe/modules/face_detection:face_detection_short_range_by_roi_cpu",
"//mediapipe/modules/face_landmark:face_landmark_cpu",
],
)
mediapipe_simple_subgraph(
name = "face_landmarks_to_roi",
graph = "face_landmarks_to_roi.pbtxt",
register_as = "FaceLandmarksToRoi",
deps = [
"//mediapipe/calculators/util:detections_to_rects_calculator",
"//mediapipe/calculators/util:landmarks_to_detection_calculator",
"//mediapipe/calculators/util:rect_transformation_calculator",
],
)
mediapipe_simple_subgraph(
name = "face_detection_front_detections_to_roi",
graph = "face_detection_front_detections_to_roi.pbtxt",
register_as = "FaceDetectionFrontDetectionsToRoi",
deps = [
"//mediapipe/calculators/util:detections_to_rects_calculator",
"//mediapipe/calculators/util:rect_transformation_calculator",
],
)
mediapipe_simple_subgraph(
name = "face_tracking",
graph = "face_tracking.pbtxt",
register_as = "FaceTracking",
deps = [
":face_landmarks_to_roi",
"//mediapipe/calculators/core:previous_loopback_calculator",
"//mediapipe/modules/holistic_landmark/calculators:roi_tracking_calculator",
],
)
mediapipe_simple_subgraph(
name = "face_landmarks_from_pose_to_recrop_roi",
graph = "face_landmarks_from_pose_to_recrop_roi.pbtxt",
register_as = "FaceLandmarksFromPoseToRecropRoi",
deps = [
"//mediapipe/calculators/util:detections_to_rects_calculator",
"//mediapipe/calculators/util:landmarks_to_detection_calculator",
"//mediapipe/calculators/util:rect_transformation_calculator",
],
)
mediapipe_simple_subgraph(
name = "hand_landmarks_from_pose_gpu",
graph = "hand_landmarks_from_pose_gpu.pbtxt",
register_as = "HandLandmarksFromPoseGpu",
deps = [
":hand_landmarks_from_pose_to_recrop_roi",
":hand_recrop_by_roi_gpu",
":hand_tracking",
":hand_visibility_from_hand_landmarks_from_pose",
"//mediapipe/calculators/core:gate_calculator",
"//mediapipe/calculators/image:image_properties_calculator",
"//mediapipe/modules/hand_landmark:hand_landmark_gpu",
],
)
mediapipe_simple_subgraph(
name = "hand_landmarks_from_pose_cpu",
graph = "hand_landmarks_from_pose_cpu.pbtxt",
register_as = "HandLandmarksFromPoseCpu",
deps = [
":hand_landmarks_from_pose_to_recrop_roi",
":hand_recrop_by_roi_cpu",
":hand_tracking",
":hand_visibility_from_hand_landmarks_from_pose",
"//mediapipe/calculators/core:gate_calculator",
"//mediapipe/calculators/image:image_properties_calculator",
"//mediapipe/modules/hand_landmark:hand_landmark_cpu",
],
)
mediapipe_simple_subgraph(
name = "hand_landmarks_to_roi",
graph = "hand_landmarks_to_roi.pbtxt",
register_as = "HandLandmarksToRoi",
deps = [
"//mediapipe/calculators/core:split_landmarks_calculator",
"//mediapipe/calculators/util:rect_transformation_calculator",
"//mediapipe/modules/hand_landmark/calculators:hand_landmarks_to_rect_calculator",
],
)
mediapipe_simple_subgraph(
name = "hand_recrop_by_roi_gpu",
graph = "hand_recrop_by_roi_gpu.pbtxt",
register_as = "HandRecropByRoiGpu",
deps = [
"//mediapipe/calculators/image:image_properties_calculator",
"//mediapipe/calculators/tensor:image_to_tensor_calculator",
"//mediapipe/calculators/tensor:inference_calculator",
"//mediapipe/calculators/tensor:tensors_to_landmarks_calculator",
"//mediapipe/calculators/util:alignment_points_to_rects_calculator",
"//mediapipe/calculators/util:landmark_letterbox_removal_calculator",
"//mediapipe/calculators/util:landmark_projection_calculator",
"//mediapipe/calculators/util:landmarks_to_detection_calculator",
"//mediapipe/calculators/util:rect_transformation_calculator",
],
)
mediapipe_simple_subgraph(
name = "hand_recrop_by_roi_cpu",
graph = "hand_recrop_by_roi_cpu.pbtxt",
register_as = "HandRecropByRoiCpu",
deps = [
"//mediapipe/calculators/image:image_properties_calculator",
"//mediapipe/calculators/tensor:image_to_tensor_calculator",
"//mediapipe/calculators/tensor:inference_calculator",
"//mediapipe/calculators/tensor:tensors_to_landmarks_calculator",
"//mediapipe/calculators/util:alignment_points_to_rects_calculator",
"//mediapipe/calculators/util:landmark_letterbox_removal_calculator",
"//mediapipe/calculators/util:landmark_projection_calculator",
"//mediapipe/calculators/util:landmarks_to_detection_calculator",
"//mediapipe/calculators/util:rect_transformation_calculator",
],
)
mediapipe_simple_subgraph(
name = "hand_tracking",
graph = "hand_tracking.pbtxt",
register_as = "HandTracking",
deps = [
":hand_landmarks_to_roi",
"//mediapipe/calculators/core:previous_loopback_calculator",
"//mediapipe/modules/holistic_landmark/calculators:roi_tracking_calculator",
],
)
# TODO: parametrize holistic_landmark graph with visibility and make private.
mediapipe_simple_subgraph(
name = "hand_wrist_for_pose",
graph = "hand_wrist_for_pose.pbtxt",
register_as = "HandWristForPose",
visibility = ["//visibility:public"],
deps = [
"//mediapipe/calculators/core:constant_side_packet_calculator",
"//mediapipe/calculators/core:side_packet_to_stream_calculator",
"//mediapipe/calculators/core:split_landmarks_calculator",
"//mediapipe/calculators/util:set_landmark_visibility_calculator",
],
)
mediapipe_simple_subgraph(
name = "hand_landmarks_left_and_right_gpu",
graph = "hand_landmarks_left_and_right_gpu.pbtxt",
register_as = "HandLandmarksLeftAndRightGpu",
deps = [
":hand_landmarks_from_pose_gpu",
"//mediapipe/calculators/core:split_landmarks_calculator",
],
)
mediapipe_simple_subgraph(
name = "hand_landmarks_left_and_right_cpu",
graph = "hand_landmarks_left_and_right_cpu.pbtxt",
register_as = "HandLandmarksLeftAndRightCpu",
deps = [
":hand_landmarks_from_pose_cpu",
"//mediapipe/calculators/core:split_landmarks_calculator",
],
)
mediapipe_simple_subgraph(
name = "hand_landmarks_from_pose_to_recrop_roi",
graph = "hand_landmarks_from_pose_to_recrop_roi.pbtxt",
register_as = "HandLandmarksFromPoseToRecropRoi",
deps = [
"//mediapipe/calculators/util:landmarks_to_detection_calculator",
"//mediapipe/calculators/util:rect_transformation_calculator",
"//mediapipe/modules/holistic_landmark/calculators:hand_detections_from_pose_to_rects_calculator",
],
)
mediapipe_simple_subgraph(
name = "hand_visibility_from_hand_landmarks_from_pose",
graph = "hand_visibility_from_hand_landmarks_from_pose.pbtxt",
register_as = "HandVisibilityFromHandLandmarksFromPose",
deps = [
"//mediapipe/calculators/core:split_landmarks_calculator",
"//mediapipe/calculators/util:landmark_visibility_calculator",
"//mediapipe/calculators/util:thresholding_calculator",
],
)
mediapipe_simple_subgraph(
name = "holistic_landmark_gpu",
graph = "holistic_landmark_gpu.pbtxt",
register_as = "HolisticLandmarkGpu",
visibility = ["//visibility:public"],
deps = [
":face_landmarks_from_pose_gpu",
":hand_landmarks_left_and_right_gpu",
"//mediapipe/calculators/core:flow_limiter_calculator",
"//mediapipe/calculators/image:image_transformation_calculator",
"//mediapipe/modules/pose_landmark:pose_landmark_gpu",
],
)
mediapipe_simple_subgraph(
name = "holistic_landmark_cpu",
graph = "holistic_landmark_cpu.pbtxt",
register_as = "HolisticLandmarkCpu",
visibility = ["//visibility:public"],
deps = [
":face_landmarks_from_pose_cpu",
":hand_landmarks_left_and_right_cpu",
"//mediapipe/calculators/core:flow_limiter_calculator",
"//mediapipe/calculators/image:image_transformation_calculator",
"//mediapipe/modules/pose_landmark:pose_landmark_cpu",
],
)

View File

@ -1,6 +0,0 @@
# holistic_landmark
Subgraphs|Details
:--- | :---
[`HolisticLandmarkCpu`](https://github.com/google/mediapipe/tree/master/mediapipe/modules/holistic_landmark/holistic_landmark_cpu.pbtxt)| Predicts pose + left/right hand + face landmarks. (CPU input)
[`HolisticLandmarkGpu`](https://github.com/google/mediapipe/tree/master/mediapipe/modules/holistic_landmark/holistic_landmark_gpu.pbtxt)| Predicts pose + left/right hand + face landmarks. (GPU input.)

View File

@ -1,63 +0,0 @@
# Copyright 2020 The MediaPipe Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
load("//mediapipe/framework/port:build_config.bzl", "mediapipe_proto_library")
licenses(["notice"])
package(default_visibility = ["//visibility:public"])
cc_library(
name = "hand_detections_from_pose_to_rects_calculator",
srcs = ["hand_detections_from_pose_to_rects_calculator.cc"],
visibility = ["//visibility:public"],
deps = [
"//mediapipe/calculators/util:detections_to_rects_calculator",
"//mediapipe/calculators/util:detections_to_rects_calculator_cc_proto",
"//mediapipe/framework:calculator_framework",
"//mediapipe/framework:calculator_options_cc_proto",
"//mediapipe/framework/formats:detection_cc_proto",
"//mediapipe/framework/formats:location_data_cc_proto",
"//mediapipe/framework/formats:rect_cc_proto",
"//mediapipe/framework/port:ret_check",
"//mediapipe/framework/port:status",
],
alwayslink = 1,
)
mediapipe_proto_library(
name = "roi_tracking_calculator_proto",
srcs = ["roi_tracking_calculator.proto"],
visibility = ["//visibility:public"],
deps = [
"//mediapipe/framework:calculator_options_proto",
"//mediapipe/framework:calculator_proto",
],
)
cc_library(
name = "roi_tracking_calculator",
srcs = ["roi_tracking_calculator.cc"],
visibility = ["//visibility:public"],
deps = [
":roi_tracking_calculator_cc_proto",
"//mediapipe/framework:calculator_framework",
"//mediapipe/framework/formats:landmark_cc_proto",
"//mediapipe/framework/formats:rect_cc_proto",
"//mediapipe/framework/port:logging",
"//mediapipe/framework/port:rectangle",
"@com_google_absl//absl/strings:str_format",
],
alwayslink = 1,
)

View File

@ -1,156 +0,0 @@
#include <cmath>
#include "mediapipe/calculators/util/detections_to_rects_calculator.h"
#include "mediapipe/calculators/util/detections_to_rects_calculator.pb.h"
#include "mediapipe/framework/calculator_framework.h"
#include "mediapipe/framework/calculator_options.pb.h"
#include "mediapipe/framework/formats/detection.pb.h"
#include "mediapipe/framework/formats/location_data.pb.h"
#include "mediapipe/framework/formats/rect.pb.h"
#include "mediapipe/framework/port/ret_check.h"
#include "mediapipe/framework/port/status.h"
namespace mediapipe {
namespace {} // namespace
// Generates a hand ROI based on a hand detection derived from hand-related pose
// landmarks.
//
// Inputs:
// DETECTION - Detection.
// Detection to convert to ROI. Must contain 3 key points indicating: wrist,
// pinky and index fingers.
//
// IMAGE_SIZE - std::pair<int, int>
// Image width and height.
//
// Outputs:
// NORM_RECT - NormalizedRect.
// ROI based on passed input.
//
// Examples
// node {
// calculator: "HandDetectionsFromPoseToRectsCalculator"
// input_stream: "DETECTION:hand_detection_from_pose"
// input_stream: "IMAGE_SIZE:image_size"
// output_stream: "NORM_RECT:hand_roi_from_pose"
// }
class HandDetectionsFromPoseToRectsCalculator
: public DetectionsToRectsCalculator {
public:
absl::Status Open(CalculatorContext* cc) override;
private:
::absl::Status DetectionToNormalizedRect(const Detection& detection,
const DetectionSpec& detection_spec,
NormalizedRect* rect) override;
absl::Status ComputeRotation(const Detection& detection,
const DetectionSpec& detection_spec,
float* rotation) override;
};
REGISTER_CALCULATOR(HandDetectionsFromPoseToRectsCalculator);
namespace {
constexpr int kWrist = 0;
constexpr int kPinky = 1;
constexpr int kIndex = 2;
constexpr char kImageSizeTag[] = "IMAGE_SIZE";
} // namespace
::absl::Status HandDetectionsFromPoseToRectsCalculator::Open(
CalculatorContext* cc) {
RET_CHECK(cc->Inputs().HasTag(kImageSizeTag))
<< "Image size is required to calculate rotated rect.";
cc->SetOffset(TimestampDiff(0));
target_angle_ = M_PI * 0.5f;
rotate_ = true;
options_ = cc->Options<DetectionsToRectsCalculatorOptions>();
output_zero_rect_for_empty_detections_ =
options_.output_zero_rect_for_empty_detections();
return ::absl::OkStatus();
}
::absl::Status
HandDetectionsFromPoseToRectsCalculator ::DetectionToNormalizedRect(
const Detection& detection, const DetectionSpec& detection_spec,
NormalizedRect* rect) {
const auto& location_data = detection.location_data();
const auto& image_size = detection_spec.image_size;
RET_CHECK(image_size) << "Image size is required to calculate rotation";
const float x_wrist =
location_data.relative_keypoints(kWrist).x() * image_size->first;
const float y_wrist =
location_data.relative_keypoints(kWrist).y() * image_size->second;
const float x_index =
location_data.relative_keypoints(kIndex).x() * image_size->first;
const float y_index =
location_data.relative_keypoints(kIndex).y() * image_size->second;
const float x_pinky =
location_data.relative_keypoints(kPinky).x() * image_size->first;
const float y_pinky =
location_data.relative_keypoints(kPinky).y() * image_size->second;
// Estimate middle finger.
const float x_middle = (2.f * x_index + x_pinky) / 3.f;
const float y_middle = (2.f * y_index + y_pinky) / 3.f;
// Crop center as middle finger.
const float center_x = x_middle;
const float center_y = y_middle;
// Bounding box size as double distance from middle finger to wrist.
const float box_size =
std::sqrt((x_middle - x_wrist) * (x_middle - x_wrist) +
(y_middle - y_wrist) * (y_middle - y_wrist)) *
2.0;
// Set resulting bounding box.
rect->set_x_center(center_x / image_size->first);
rect->set_y_center(center_y / image_size->second);
rect->set_width(box_size / image_size->first);
rect->set_height(box_size / image_size->second);
return ::absl::OkStatus();
}
absl::Status HandDetectionsFromPoseToRectsCalculator::ComputeRotation(
const Detection& detection, const DetectionSpec& detection_spec,
float* rotation) {
const auto& location_data = detection.location_data();
const auto& image_size = detection_spec.image_size;
RET_CHECK(image_size) << "Image size is required to calculate rotation";
const float x_wrist =
location_data.relative_keypoints(kWrist).x() * image_size->first;
const float y_wrist =
location_data.relative_keypoints(kWrist).y() * image_size->second;
const float x_index =
location_data.relative_keypoints(kIndex).x() * image_size->first;
const float y_index =
location_data.relative_keypoints(kIndex).y() * image_size->second;
const float x_pinky =
location_data.relative_keypoints(kPinky).x() * image_size->first;
const float y_pinky =
location_data.relative_keypoints(kPinky).y() * image_size->second;
// Estimate middle finger.
const float x_middle = (2.f * x_index + x_pinky) / 3.f;
const float y_middle = (2.f * y_index + y_pinky) / 3.f;
*rotation = NormalizeRadians(
target_angle_ - std::atan2(-(y_middle - y_wrist), x_middle - x_wrist));
return ::absl::OkStatus();
}
} // namespace mediapipe

View File

@ -1,358 +0,0 @@
// Copyright 2020 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <math.h>
#include <cstdlib>
#include "absl/strings/str_format.h"
#include "mediapipe/framework/calculator_framework.h"
#include "mediapipe/framework/formats/landmark.pb.h"
#include "mediapipe/framework/formats/rect.pb.h"
#include "mediapipe/framework/port/logging.h"
#include "mediapipe/framework/port/rectangle.h"
#include "mediapipe/modules/holistic_landmark/calculators/roi_tracking_calculator.pb.h"
namespace mediapipe {
namespace {
constexpr char kPrevLandmarksTag[] = "PREV_LANDMARKS";
constexpr char kPrevLandmarksRectTag[] = "PREV_LANDMARKS_RECT";
constexpr char kRecropRectTag[] = "RECROP_RECT";
constexpr char kImageSizeTag[] = "IMAGE_SIZE";
constexpr char kTrackingRectTag[] = "TRACKING_RECT";
// TODO: Use rect rotation.
// Verifies that Intersection over Union of previous frame rect and current
// frame re-crop rect is less than threshold.
bool IouRequirementsSatisfied(const NormalizedRect& prev_rect,
const NormalizedRect& recrop_rect,
const std::pair<int, int>& image_size,
const float min_iou) {
auto r1 = Rectangle_f(prev_rect.x_center() * image_size.first,
prev_rect.y_center() * image_size.second,
prev_rect.width() * image_size.first,
prev_rect.height() * image_size.second);
auto r2 = Rectangle_f(recrop_rect.x_center() * image_size.first,
recrop_rect.y_center() * image_size.second,
recrop_rect.width() * image_size.first,
recrop_rect.height() * image_size.second);
const float intersection_area = r1.Intersect(r2).Area();
const float union_area = r1.Area() + r2.Area() - intersection_area;
const float intersection_threshold = union_area * min_iou;
if (intersection_area < intersection_threshold) {
VLOG(1) << absl::StrFormat("Lost tracking: IoU intersection %f < %f",
intersection_area, intersection_threshold);
return false;
}
return true;
}
// Verifies that current frame re-crop rect rotation/translation/scale didn't
// change much comparing to the previous frame rect. Translation and scale are
// normalized by current frame re-crop rect.
bool RectRequirementsSatisfied(const NormalizedRect& prev_rect,
const NormalizedRect& recrop_rect,
const std::pair<int, int> image_size,
const float rotation_degrees,
const float translation, const float scale) {
// Rotate both rects so that re-crop rect edges are parallel to XY axes. That
// will allow to compute x/y translation of the previous frame rect along axes
// of the current frame re-crop rect.
const float rotation = -recrop_rect.rotation();
const float cosa = cos(rotation);
const float sina = sin(rotation);
// Rotate previous frame rect and get its parameters.
const float prev_rect_x = prev_rect.x_center() * image_size.first * cosa -
prev_rect.y_center() * image_size.second * sina;
const float prev_rect_y = prev_rect.x_center() * image_size.first * sina +
prev_rect.y_center() * image_size.second * cosa;
const float prev_rect_width = prev_rect.width() * image_size.first;
const float prev_rect_height = prev_rect.height() * image_size.second;
const float prev_rect_rotation = prev_rect.rotation() / M_PI * 180.f;
// Rotate current frame re-crop rect and get its parameters.
const float recrop_rect_x = recrop_rect.x_center() * image_size.first * cosa -
recrop_rect.y_center() * image_size.second * sina;
const float recrop_rect_y = recrop_rect.x_center() * image_size.first * sina +
recrop_rect.y_center() * image_size.second * cosa;
const float recrop_rect_width = recrop_rect.width() * image_size.first;
const float recrop_rect_height = recrop_rect.height() * image_size.second;
const float recrop_rect_rotation = recrop_rect.rotation() / M_PI * 180.f;
// Rect requirements are satisfied unless one of the checks below fails.
bool satisfied = true;
// Ensure that rotation diff is in [0, 180] range.
float rotation_diff = prev_rect_rotation - recrop_rect_rotation;
if (rotation_diff > 180.f) {
rotation_diff -= 360.f;
}
if (rotation_diff < -180.f) {
rotation_diff += 360.f;
}
rotation_diff = abs(rotation_diff);
if (rotation_diff > rotation_degrees) {
satisfied = false;
VLOG(1) << absl::StrFormat("Lost tracking: rect rotation %f > %f",
rotation_diff, rotation_degrees);
}
const float x_diff = abs(prev_rect_x - recrop_rect_x);
const float x_threshold = recrop_rect_width * translation;
if (x_diff > x_threshold) {
satisfied = false;
VLOG(1) << absl::StrFormat("Lost tracking: rect x translation %f > %f",
x_diff, x_threshold);
}
const float y_diff = abs(prev_rect_y - recrop_rect_y);
const float y_threshold = recrop_rect_height * translation;
if (y_diff > y_threshold) {
satisfied = false;
VLOG(1) << absl::StrFormat("Lost tracking: rect y translation %f > %f",
y_diff, y_threshold);
}
const float width_diff = abs(prev_rect_width - recrop_rect_width);
const float width_threshold = recrop_rect_width * scale;
if (width_diff > width_threshold) {
satisfied = false;
VLOG(1) << absl::StrFormat("Lost tracking: rect width %f > %f", width_diff,
width_threshold);
}
const float height_diff = abs(prev_rect_height - recrop_rect_height);
const float height_threshold = recrop_rect_height * scale;
if (height_diff > height_threshold) {
satisfied = false;
VLOG(1) << absl::StrFormat("Lost tracking: rect height %f > %f",
height_diff, height_threshold);
}
return satisfied;
}
// Verifies that landmarks from the previous frame are within re-crop rectangle
// bounds on the current frame.
bool LandmarksRequirementsSatisfied(const NormalizedLandmarkList& landmarks,
const NormalizedRect& recrop_rect,
const std::pair<int, int> image_size,
const float recrop_rect_margin) {
// Rotate both re-crop rectangle and landmarks so that re-crop rectangle edges
// are parallel to XY axes. It will allow to easily check if landmarks are
// within re-crop rect bounds along re-crop rect axes.
//
// Rect rotation is specified clockwise. To apply cos/sin functions we
// transform it into counterclockwise.
const float rotation = -recrop_rect.rotation();
const float cosa = cos(rotation);
const float sina = sin(rotation);
// Rotate rect.
const float rect_x = recrop_rect.x_center() * image_size.first * cosa -
recrop_rect.y_center() * image_size.second * sina;
const float rect_y = recrop_rect.x_center() * image_size.first * sina +
recrop_rect.y_center() * image_size.second * cosa;
const float rect_width =
recrop_rect.width() * image_size.first * (1.f + recrop_rect_margin);
const float rect_height =
recrop_rect.height() * image_size.second * (1.f + recrop_rect_margin);
// Get rect bounds.
const float rect_left = rect_x - rect_width * 0.5f;
const float rect_right = rect_x + rect_width * 0.5f;
const float rect_top = rect_y - rect_height * 0.5f;
const float rect_bottom = rect_y + rect_height * 0.5f;
for (int i = 0; i < landmarks.landmark_size(); ++i) {
const auto& landmark = landmarks.landmark(i);
const float x = landmark.x() * image_size.first * cosa -
landmark.y() * image_size.second * sina;
const float y = landmark.x() * image_size.first * sina +
landmark.y() * image_size.second * cosa;
if (!(rect_left < x && x < rect_right && rect_top < y && y < rect_bottom)) {
VLOG(1) << "Lost tracking: landmarks out of re-crop rect";
return false;
}
}
return true;
}
} // namespace
// A calculator to track object rectangle between frames.
//
// Calculator checks that all requirements for tracking are satisfied and uses
// rectangle from the previous frame in this case, otherwise - uses current
// frame re-crop rectangle.
//
// There are several types of tracking requirements that can be configured via
// options:
// IoU: Verifies that IoU of the previous frame rectangle and current frame
// re-crop rectangle is less than a given threshold.
// Rect parameters: Verifies that rotation/translation/scale of the re-crop
// rectangle on the current frame is close to the rectangle from the
// previous frame within given thresholds.
// Landmarks: Verifies that landmarks from the previous frame are within
// the re-crop rectangle on the current frame.
//
// Inputs:
// PREV_LANDMARKS: Object landmarks from the previous frame.
// PREV_LANDMARKS_RECT: Object rectangle based on the landmarks from the
// previous frame.
// RECROP_RECT: Object re-crop rectangle from the current frame.
// IMAGE_SIZE: Image size to transform normalized coordinates to absolute.
//
// Outputs:
// TRACKING_RECT: Rectangle to use for object prediction on the current frame.
// It will be either object rectangle from the previous frame (if all
// tracking requirements are satisfied) or re-crop rectangle from the
// current frame (if tracking lost the object).
//
// Example config:
// node {
// calculator: "RoiTrackingCalculator"
// input_stream: "PREV_LANDMARKS:prev_hand_landmarks"
// input_stream: "PREV_LANDMARKS_RECT:prev_hand_landmarks_rect"
// input_stream: "RECROP_RECT:hand_recrop_rect"
// input_stream: "IMAGE_SIZE:image_size"
// output_stream: "TRACKING_RECT:hand_tracking_rect"
// options: {
// [mediapipe.RoiTrackingCalculatorOptions.ext] {
// rect_requirements: {
// rotation_degrees: 40.0
// translation: 0.2
// scale: 0.4
// }
// landmarks_requirements: {
// recrop_rect_margin: -0.1
// }
// }
// }
// }
class RoiTrackingCalculator : public CalculatorBase {
public:
static absl::Status GetContract(CalculatorContract* cc);
absl::Status Open(CalculatorContext* cc) override;
absl::Status Process(CalculatorContext* cc) override;
private:
RoiTrackingCalculatorOptions options_;
};
REGISTER_CALCULATOR(RoiTrackingCalculator);
absl::Status RoiTrackingCalculator::GetContract(CalculatorContract* cc) {
cc->Inputs().Tag(kPrevLandmarksTag).Set<NormalizedLandmarkList>();
cc->Inputs().Tag(kPrevLandmarksRectTag).Set<NormalizedRect>();
cc->Inputs().Tag(kRecropRectTag).Set<NormalizedRect>();
cc->Inputs().Tag(kImageSizeTag).Set<std::pair<int, int>>();
cc->Outputs().Tag(kTrackingRectTag).Set<NormalizedRect>();
return absl::OkStatus();
}
absl::Status RoiTrackingCalculator::Open(CalculatorContext* cc) {
cc->SetOffset(TimestampDiff(0));
options_ = cc->Options<RoiTrackingCalculatorOptions>();
return absl::OkStatus();
}
absl::Status RoiTrackingCalculator::Process(CalculatorContext* cc) {
// If there is no current frame re-crop rect (i.e. object is not present on
// the current frame) - return empty packet.
if (cc->Inputs().Tag(kRecropRectTag).IsEmpty()) {
return absl::OkStatus();
}
// If there is no previous rect, but there is current re-crop rect - return
// current re-crop rect as is.
if (cc->Inputs().Tag(kPrevLandmarksRectTag).IsEmpty()) {
cc->Outputs()
.Tag(kTrackingRectTag)
.AddPacket(cc->Inputs().Tag(kRecropRectTag).Value());
return absl::OkStatus();
}
// At this point we have both previous rect (which also means we have previous
// landmarks) and currrent re-crop rect.
const auto& prev_landmarks =
cc->Inputs().Tag(kPrevLandmarksTag).Get<NormalizedLandmarkList>();
const auto& prev_rect =
cc->Inputs().Tag(kPrevLandmarksRectTag).Get<NormalizedRect>();
const auto& recrop_rect =
cc->Inputs().Tag(kRecropRectTag).Get<NormalizedRect>();
const auto& image_size =
cc->Inputs().Tag(kImageSizeTag).Get<std::pair<int, int>>();
// Keep tracking unless one of the requirements below is not satisfied.
bool keep_tracking = true;
// If IoU of the previous rect and current re-crop rect is lower than allowed
// threshold - use current re-crop rect.
if (options_.has_iou_requirements() &&
!IouRequirementsSatisfied(prev_rect, recrop_rect, image_size,
options_.iou_requirements().min_iou())) {
keep_tracking = false;
}
// If previous rect and current re-crop rect differ more than it is allowed by
// the augmentations (used during the model training) - use current re-crop
// rect.
if (options_.has_rect_requirements() &&
!RectRequirementsSatisfied(
prev_rect, recrop_rect, image_size,
options_.rect_requirements().rotation_degrees(),
options_.rect_requirements().translation(),
options_.rect_requirements().scale())) {
keep_tracking = false;
}
// If landmarks from the previous frame are not in the current re-crop rect
// (i.e. object moved too fast and using previous frame rect won't cover
// landmarks on the current frame) - use current re-crop rect.
if (options_.has_landmarks_requirements() &&
!LandmarksRequirementsSatisfied(
prev_landmarks, recrop_rect, image_size,
options_.landmarks_requirements().recrop_rect_margin())) {
keep_tracking = false;
}
// If object didn't move a lot comparing to the previous frame - we'll keep
// tracking it and will return rect from the previous frame, otherwise -
// return re-crop rect from the current frame.
if (keep_tracking) {
cc->Outputs()
.Tag(kTrackingRectTag)
.AddPacket(cc->Inputs().Tag(kPrevLandmarksRectTag).Value());
} else {
cc->Outputs()
.Tag(kTrackingRectTag)
.AddPacket(cc->Inputs().Tag(kRecropRectTag).Value());
VLOG(1) << "Lost tracking: check messages above for details";
}
return absl::OkStatus();
}
} // namespace mediapipe

View File

@ -1,59 +0,0 @@
// Copyright 2020 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
syntax = "proto2";
package mediapipe;
import "mediapipe/framework/calculator.proto";
message RoiTrackingCalculatorOptions {
extend CalculatorOptions {
optional RoiTrackingCalculatorOptions ext = 329994630;
}
// Verifies that Intersection over Union of previous frame rect and current
// frame re-crop rect is less than threshold.
message IouRequirements {
optional float min_iou = 1 [default = 0.5];
}
// Verifies that current frame re-crop rect rotation/translation/scale didn't
// change much comparing to the previous frame rect.
message RectRequirements {
// Allowed rotation change defined in degrees.
optional float rotation_degrees = 1 [default = 10.0];
// Allowed translation change defined as absolute translation normalized by
// re-crop rectangle size.
optional float translation = 2 [default = 0.1];
// Allowed scale change defined as absolute translation normalized by
// re-crop rectangle size.
optional float scale = 3 [default = 0.1];
}
// Verifies that landmarks from the previous frame are within re-crop
// rectangle bounds on the current frame.
message LandmarksRequirements {
// Margin to apply to re-crop rectangle before checking verifing landmarks.
optional float recrop_rect_margin = 1 [default = 0.0];
}
optional IouRequirements iou_requirements = 1;
optional RectRequirements rect_requirements = 2;
optional LandmarksRequirements landmarks_requirements = 3;
}

View File

@ -1,48 +0,0 @@
# Calculates ROI from detections provided by `face_detection_short_range.tflite`
# model.
type: "FaceDetectionFrontDetectionsToRoi"
# Detected faces. (std::vector<Detection>)
input_stream: "DETECTIONS:detections"
# Image size (width & height). (std::pair<int, int>)
input_stream: "IMAGE_SIZE:image_size"
# Refined (more accurate) ROI to use for face landmarks prediction.
# (NormalizedRect)
output_stream: "ROI:roi"
# Converts the face detection into a rectangle (normalized by image size)
# that encloses the face and is rotated such that the line connecting right side
# of the right eye and left side of the left eye is aligned with the X-axis of
# the rectangle.
node {
calculator: "DetectionsToRectsCalculator"
input_stream: "DETECTIONS:detections"
input_stream: "IMAGE_SIZE:image_size"
output_stream: "NORM_RECT:raw_roi"
options: {
[mediapipe.DetectionsToRectsCalculatorOptions.ext] {
rotation_vector_start_keypoint_index: 0 # Right eye.
rotation_vector_end_keypoint_index: 1 # Left eye.
rotation_vector_target_angle_degrees: 0
conversion_mode: USE_KEYPOINTS
}
}
}
# Expands and shifts the rectangle that contains the face so that it's likely
# to cover the entire face.
node {
calculator: "RectTransformationCalculator"
input_stream: "NORM_RECT:raw_roi"
input_stream: "IMAGE_SIZE:image_size"
output_stream: "roi"
options: {
[mediapipe.RectTransformationCalculatorOptions.ext] {
scale_x: 2.0
scale_y: 2.0
shift_y: -0.1
square_long: true
}
}
}

View File

@ -1,82 +0,0 @@
# Predicts face landmarks within an ROI derived from face-related pose
# landmarks.
type: "FaceLandmarksFromPoseCpu"
# CPU image. (ImageFrame)
input_stream: "IMAGE:input_video"
# Face-related pose landmarks. (NormalizedLandmarkList)
input_stream: "FACE_LANDMARKS_FROM_POSE:face_landmarks_from_pose"
# Whether to run the face landmark model with attention on lips and eyes to
# provide more accuracy, and additionally output iris landmarks. If unspecified,
# functions as set to false. (bool)
input_side_packet: "REFINE_LANDMARKS:refine_landmarks"
# Face landmarks. (NormalizedLandmarkList)
output_stream: "FACE_LANDMARKS:face_landmarks"
# Debug outputs.
# Face ROI derived from face-related pose landmarks, which defines the search
# region for the face detection model. (NormalizedRect)
output_stream: "FACE_ROI_FROM_POSE:face_roi_from_pose"
# Refined face crop rectangle predicted by face detection model.
# (NormalizedRect)
output_stream: "FACE_ROI_FROM_DETECTION:face_roi_from_detection"
# Rectangle used to predict face landmarks. (NormalizedRect)
output_stream: "FACE_TRACKING_ROI:face_tracking_roi"
# TODO: do not predict face when most of the face landmarks from
# pose are invisible.
# Extracts image size from the input images.
node {
calculator: "ImagePropertiesCalculator"
input_stream: "IMAGE:input_video"
output_stream: "SIZE:image_size"
}
# Gets ROI for re-crop model from face-related pose landmarks.
node {
calculator: "FaceLandmarksFromPoseToRecropRoi"
input_stream: "FACE_LANDMARKS_FROM_POSE:face_landmarks_from_pose"
input_stream: "IMAGE_SIZE:image_size"
output_stream: "ROI:face_roi_from_pose"
}
# Detects faces within the face ROI calculated from pose landmarks. This is done
# to refine face ROI for further landmark detection as ROI calculated from
# pose landmarks may be inaccurate.
node {
calculator: "FaceDetectionShortRangeByRoiCpu"
input_stream: "IMAGE:input_video"
input_stream: "ROI:face_roi_from_pose"
output_stream: "DETECTIONS:face_detections"
}
# Calculates refined face ROI.
node {
calculator: "FaceDetectionFrontDetectionsToRoi"
input_stream: "DETECTIONS:face_detections"
input_stream: "IMAGE_SIZE:image_size"
output_stream: "ROI:face_roi_from_detection"
}
# Gets face tracking rectangle (either face rectangle from the previous
# frame or face re-crop rectangle from the current frame) for face prediction.
node {
calculator: "FaceTracking"
input_stream: "LANDMARKS:face_landmarks"
input_stream: "FACE_RECROP_ROI:face_roi_from_detection"
input_stream: "IMAGE_SIZE:image_size"
output_stream: "FACE_TRACKING_ROI:face_tracking_roi"
}
# Predicts face landmarks from the tracking rectangle.
node {
calculator: "FaceLandmarkCpu"
input_stream: "IMAGE:input_video"
input_stream: "ROI:face_tracking_roi"
input_side_packet: "WITH_ATTENTION:refine_landmarks"
output_stream: "LANDMARKS:face_landmarks"
}

View File

@ -1,82 +0,0 @@
# Predicts face landmarks within an ROI derived from face-related pose
# landmarks.
type: "FaceLandmarksFromPoseGpu"
# GPU image. (ImageFrame)
input_stream: "IMAGE:input_video"
# Face-related pose landmarks. (NormalizedLandmarkList)
input_stream: "FACE_LANDMARKS_FROM_POSE:face_landmarks_from_pose"
# Whether to run the face landmark model with attention on lips and eyes to
# provide more accuracy, and additionally output iris landmarks. If unspecified,
# functions as set to false. (bool)
input_side_packet: "REFINE_LANDMARKS:refine_landmarks"
# Face landmarks. (NormalizedLandmarkList)
output_stream: "FACE_LANDMARKS:face_landmarks"
# Debug outputs.
# Face ROI derived from face-related pose landmarks, which defines the search
# region for the face detection model. (NormalizedRect)
output_stream: "FACE_ROI_FROM_POSE:face_roi_from_pose"
# Refined face crop rectangle predicted by face detection model.
# (NormalizedRect)
output_stream: "FACE_ROI_FROM_DETECTION:face_roi_from_detection"
# Rectangle used to predict face landmarks. (NormalizedRect)
output_stream: "FACE_TRACKING_ROI:face_tracking_roi"
# TODO: do not predict face when most of the face landmarks from
# pose are invisible.
# Extracts image size from the input images.
node {
calculator: "ImagePropertiesCalculator"
input_stream: "IMAGE_GPU:input_video"
output_stream: "SIZE:image_size"
}
# Gets ROI for re-crop model from face-related pose landmarks.
node {
calculator: "FaceLandmarksFromPoseToRecropRoi"
input_stream: "FACE_LANDMARKS_FROM_POSE:face_landmarks_from_pose"
input_stream: "IMAGE_SIZE:image_size"
output_stream: "ROI:face_roi_from_pose"
}
# Detects faces within the face ROI calculated from pose landmarks. This is done
# to refine face ROI for further landmark detection as ROI calculated from
# pose landmarks may be inaccurate.
node {
calculator: "FaceDetectionShortRangeByRoiGpu"
input_stream: "IMAGE:input_video"
input_stream: "ROI:face_roi_from_pose"
output_stream: "DETECTIONS:face_detections"
}
# Calculates refined face ROI.
node {
calculator: "FaceDetectionFrontDetectionsToRoi"
input_stream: "DETECTIONS:face_detections"
input_stream: "IMAGE_SIZE:image_size"
output_stream: "ROI:face_roi_from_detection"
}
# Gets face tracking rectangle (either face rectangle from the previous
# frame or face re-crop rectangle from the current frame) for face prediction.
node {
calculator: "FaceTracking"
input_stream: "LANDMARKS:face_landmarks"
input_stream: "FACE_RECROP_ROI:face_roi_from_detection"
input_stream: "IMAGE_SIZE:image_size"
output_stream: "FACE_TRACKING_ROI:face_tracking_roi"
}
# Predicts face landmarks from the tracking rectangle.
node {
calculator: "FaceLandmarkGpu"
input_stream: "IMAGE:input_video"
input_stream: "ROI:face_tracking_roi"
input_side_packet: "WITH_ATTENTION:refine_landmarks"
output_stream: "LANDMARKS:face_landmarks"
}

View File

@ -1,51 +0,0 @@
# Converts face-related pose landmarks to re-crop ROI.
type: "FaceLandmarksFromPoseToRecropRoi"
# Face-related pose landmarks (There should be 11 of them).
# (NormalizedLandmarkList)
input_stream: "FACE_LANDMARKS_FROM_POSE:face_landmarks_from_pose"
# Image size (width & height). (std::pair<int, int>)
input_stream: "IMAGE_SIZE:image_size"
# ROI to be used for face detection. (NormalizedRect)
output_stream: "ROI:roi"
# Converts face-related pose landmarks to a detection that tightly encloses all
# landmarks.
node {
calculator: "LandmarksToDetectionCalculator"
input_stream: "NORM_LANDMARKS:face_landmarks_from_pose"
output_stream: "DETECTION:pose_face_detection"
}
# Converts face detection to a normalized face rectangle.
node {
calculator: "DetectionsToRectsCalculator"
input_stream: "DETECTION:pose_face_detection"
input_stream: "IMAGE_SIZE:image_size"
output_stream: "NORM_RECT:pose_face_rect"
options: {
[mediapipe.DetectionsToRectsCalculatorOptions.ext] {
rotation_vector_start_keypoint_index: 5 # Right eye.
rotation_vector_end_keypoint_index: 2 # Left eye.
rotation_vector_target_angle_degrees: 0
}
}
}
# Expands face rectangle so that it becomes big enough for face detector to
# localize it accurately.
node {
calculator: "RectTransformationCalculator"
input_stream: "NORM_RECT:pose_face_rect"
input_stream: "IMAGE_SIZE:image_size"
output_stream: "roi"
options: {
[mediapipe.RectTransformationCalculatorOptions.ext] {
scale_x: 3.0
scale_y: 3.0
square_long: true
}
}
}

View File

@ -1,53 +0,0 @@
# Converts face landmarks to ROI.
type: "FaceLandmarksToRoi"
# Face landmarks. (NormalizedLandmarkList)
input_stream: "LANDMARKS:face_landmarks"
# Image size (width & height). (std::pair<int, int>)
input_stream: "IMAGE_SIZE:image_size"
# ROI according to landmarks. (NormalizedRect)
output_stream: "ROI:roi"
# Converts face landmarks to a detection that tightly encloses all landmarks.
node {
calculator: "LandmarksToDetectionCalculator"
input_stream: "NORM_LANDMARKS:face_landmarks"
output_stream: "DETECTION:face_detection"
}
# Converts the face detection into a rectangle (normalized by image size)
# that encloses the face and is rotated such that the line connecting center of
# the wrist and MCP of the middle finger is aligned with the Y-axis of the
# rectangle.
node {
calculator: "DetectionsToRectsCalculator"
input_stream: "DETECTION:face_detection"
input_stream: "IMAGE_SIZE:image_size"
output_stream: "NORM_RECT:face_landmarks_rect_tight"
options: {
[mediapipe.DetectionsToRectsCalculatorOptions.ext] {
rotation_vector_start_keypoint_index: 33 # Right side of left eye.
rotation_vector_end_keypoint_index: 263 # Left side of right eye.
rotation_vector_target_angle_degrees: 0
}
}
}
# Expands the face rectangle so that it's likely to contain the face even with
# some motion.
node {
calculator: "RectTransformationCalculator"
input_stream: "NORM_RECT:face_landmarks_rect_tight"
input_stream: "IMAGE_SIZE:image_size"
output_stream: "roi"
options: {
[mediapipe.RectTransformationCalculatorOptions.ext] {
scale_x: 1.5
scale_y: 1.5
# TODO: remove `square_long` where appropriat
square_long: true
}
}
}

View File

@ -1,61 +0,0 @@
# Decides what ROI to use for face landmarks prediction: either previous frame
# landmarks ROI or the current frame face re-crop ROI.
type: "FaceTracking"
# Face landmarks from the current frame. They will be memorized for tracking on
# the next frame. (NormalizedLandmarkList)
input_stream: "LANDMARKS:face_landmarks"
# Face re-crop ROI from the current frame. (NormalizedRect)
input_stream: "FACE_RECROP_ROI:face_recrop_roi"
# Image size (width & height). (std::pair<int, int>)
input_stream: "IMAGE_SIZE:image_size"
# Face tracking ROI. Which is either face landmarks ROI from the previous frame
# if face is still tracked, or face re-crop ROI from the current frame
# otherwise. (NormalizedRect)
output_stream: "FACE_TRACKING_ROI:face_tracking_roi"
# Keeps track of face landmarks from the previous frame.
node {
calculator: "PreviousLoopbackCalculator"
input_stream: "MAIN:image_size"
input_stream: "LOOP:face_landmarks"
input_stream_info: {
tag_index: "LOOP"
back_edge: true
}
output_stream: "PREV_LOOP:prev_face_landmarks"
}
# Gets hand landarmsk rect.
node {
calculator: "FaceLandmarksToRoi"
input_stream: "LANDMARKS:prev_face_landmarks"
input_stream: "IMAGE_SIZE:image_size"
output_stream: "ROI:prev_face_landmarks_rect"
}
# Checks that all requirements for tracking are satisfied and use face rectangle
# from the previous frame in that case. Otherwise - use face re-crop rectangle
# from the current frame.
node {
calculator: "RoiTrackingCalculator"
input_stream: "PREV_LANDMARKS:prev_face_landmarks"
input_stream: "PREV_LANDMARKS_RECT:prev_face_landmarks_rect"
input_stream: "RECROP_RECT:face_recrop_roi"
input_stream: "IMAGE_SIZE:image_size"
output_stream: "TRACKING_RECT:face_tracking_roi"
options: {
[mediapipe.RoiTrackingCalculatorOptions.ext] {
rect_requirements: {
rotation_degrees: 15.0
translation: 0.1
scale: 0.3
}
landmarks_requirements: {
recrop_rect_margin: -0.2
}
}
}
}

View File

@ -1,78 +0,0 @@
# Predicts hand landmarks within a ROI derived from hand-related pose landmarks.
type: "HandLandmarksFromPoseCpu"
# CPU image. (ImageFrame)
input_stream: "IMAGE:input_video"
# Hand-related pose landmarks in [wrist, pinky, index] order.
# (NormalizedLandmarkList)
input_stream: "HAND_LANDMARKS_FROM_POSE:hand_landmarks_from_pose"
# Hand landmarks. (NormalizedLandmarkList)
output_stream: "HAND_LANDMARKS:hand_landmarks"
# Debug outputs.
# Hand ROI derived from hand-related landmarks, which defines the search region
# for the hand re-crop model. (NormalizedRect)
output_stream: "HAND_ROI_FROM_POSE:hand_roi_from_pose"
# Refined hand crop rectangle predicted by hand re-crop model. (NormalizedRect)
output_stream: "HAND_ROI_FROM_RECROP:hand_roi_from_recrop"
# Rectangle used to predict hand landmarks. (NormalizedRect)
output_stream: "HAND_TRACKING_ROI:hand_tracking_roi"
# Gets hand visibility.
node {
calculator: "HandVisibilityFromHandLandmarksFromPose"
input_stream: "HAND_LANDMARKS_FROM_POSE:hand_landmarks_from_pose"
output_stream: "VISIBILITY:hand_visibility"
}
# Drops hand-related pose landmarks if pose wrist is not visible. It will
# prevent from predicting hand landmarks on the current frame.
node {
calculator: "GateCalculator"
input_stream: "hand_landmarks_from_pose"
input_stream: "ALLOW:hand_visibility"
output_stream: "ensured_hand_landmarks_from_pose"
}
# Extracts image size from the input images.
node {
calculator: "ImagePropertiesCalculator"
input_stream: "IMAGE:input_video"
output_stream: "SIZE:image_size"
}
# Gets ROI for re-crop model from hand-related pose landmarks.
node {
calculator: "HandLandmarksFromPoseToRecropRoi"
input_stream: "HAND_LANDMARKS_FROM_POSE:ensured_hand_landmarks_from_pose"
input_stream: "IMAGE_SIZE:image_size"
output_stream: "ROI:hand_roi_from_pose"
}
# Predicts hand re-crop rectangle on the current frame.
node {
calculator: "HandRecropByRoiCpu",
input_stream: "IMAGE:input_video"
input_stream: "ROI:hand_roi_from_pose"
output_stream: "HAND_ROI_FROM_RECROP:hand_roi_from_recrop"
}
# Gets hand tracking rectangle (either hand rectangle from the previous
# frame or hand re-crop rectangle from the current frame) for hand prediction.
node {
calculator: "HandTracking"
input_stream: "LANDMARKS:hand_landmarks"
input_stream: "HAND_ROI_FROM_RECROP:hand_roi_from_recrop"
input_stream: "IMAGE_SIZE:image_size"
output_stream: "HAND_TRACKING_ROI:hand_tracking_roi"
}
# Predicts hand landmarks from the tracking rectangle.
node {
calculator: "HandLandmarkCpu"
input_stream: "IMAGE:input_video"
input_stream: "ROI:hand_tracking_roi"
output_stream: "LANDMARKS:hand_landmarks"
}

View File

@ -1,78 +0,0 @@
# Predicts hand landmarks within a ROI derived from hand-related pose landmarks.
type: "HandLandmarksFromPoseGpu"
# GPU image. (ImageFrame)
input_stream: "IMAGE:input_video"
# Hand-related pose landmarks in [wrist, pinky, index] order.
# (NormalizedLandmarkList)
input_stream: "HAND_LANDMARKS_FROM_POSE:hand_landmarks_from_pose"
# Hand landmarks. (NormalizedLandmarkList)
output_stream: "HAND_LANDMARKS:hand_landmarks"
# Debug outputs.
# Hand ROI derived from hand-related landmarks, which defines the search region
# for the hand re-crop model. (NormalizedRect)
output_stream: "HAND_ROI_FROM_POSE:hand_roi_from_pose"
# Refined hand crop rectangle predicted by hand re-crop model. (NormalizedRect)
output_stream: "HAND_ROI_FROM_RECROP:hand_roi_from_recrop"
# Rectangle used to predict hand landmarks. (NormalizedRect)
output_stream: "HAND_TRACKING_ROI:hand_tracking_roi"
# Gets hand visibility.
node {
calculator: "HandVisibilityFromHandLandmarksFromPose"
input_stream: "HAND_LANDMARKS_FROM_POSE:hand_landmarks_from_pose"
output_stream: "VISIBILITY:hand_visibility"
}
# Drops hand-related pose landmarks if pose wrist is not visible. It will
# prevent from predicting hand landmarks on the current frame.
node {
calculator: "GateCalculator"
input_stream: "hand_landmarks_from_pose"
input_stream: "ALLOW:hand_visibility"
output_stream: "ensured_hand_landmarks_from_pose"
}
# Extracts image size from the input images.
node {
calculator: "ImagePropertiesCalculator"
input_stream: "IMAGE_GPU:input_video"
output_stream: "SIZE:image_size"
}
# Gets ROI for re-crop model from hand-related pose landmarks.
node {
calculator: "HandLandmarksFromPoseToRecropRoi"
input_stream: "HAND_LANDMARKS_FROM_POSE:ensured_hand_landmarks_from_pose"
input_stream: "IMAGE_SIZE:image_size"
output_stream: "ROI:hand_roi_from_pose"
}
# Predicts hand re-crop rectangle on the current frame.
node {
calculator: "HandRecropByRoiGpu",
input_stream: "IMAGE:input_video"
input_stream: "ROI:hand_roi_from_pose"
output_stream: "HAND_ROI_FROM_RECROP:hand_roi_from_recrop"
}
# Gets hand tracking rectangle (either hand rectangle from the previous
# frame or hand re-crop rectangle from the current frame) for hand prediction.
node {
calculator: "HandTracking"
input_stream: "LANDMARKS:hand_landmarks"
input_stream: "HAND_ROI_FROM_RECROP:hand_roi_from_recrop"
input_stream: "IMAGE_SIZE:image_size"
output_stream: "HAND_TRACKING_ROI:hand_tracking_roi"
}
# Predicts hand landmarks from the tracking rectangle.
node {
calculator: "HandLandmarkGpu"
input_stream: "IMAGE:input_video"
input_stream: "ROI:hand_tracking_roi"
output_stream: "LANDMARKS:hand_landmarks"
}

View File

@ -1,45 +0,0 @@
# Converts hand-related pose landmarks to hand re-crop ROI.
type: "HandLandmarksFromPoseToRecropRoi"
# Hand-related pose landmarks in [wrist, pinky, index] order.
# (NormalizedLandmarkList)
input_stream: "HAND_LANDMARKS_FROM_POSE:hand_landmarks_from_pose"
# Image size (width & height). (std::pair<int, int>)
input_stream: "IMAGE_SIZE:image_size"
# ROI to be used for re-crop prediction. (NormalizedRect)
output_stream: "ROI:roi"
# Converts hand-related pose landmarks to a detection that tightly encloses all
# of them.
node {
calculator: "LandmarksToDetectionCalculator"
input_stream: "NORM_LANDMARKS:hand_landmarks_from_pose"
output_stream: "DETECTION:hand_detection_from_pose"
}
# Converts hand detection to a normalized hand rectangle.
node {
calculator: "HandDetectionsFromPoseToRectsCalculator"
input_stream: "DETECTION:hand_detection_from_pose"
input_stream: "IMAGE_SIZE:image_size"
output_stream: "NORM_RECT:hand_roi_from_pose"
}
# Expands the palm rectangle so that it becomes big enough for hand re-crop
# model to localize it accurately.
node {
calculator: "RectTransformationCalculator"
input_stream: "NORM_RECT:hand_roi_from_pose"
input_stream: "IMAGE_SIZE:image_size"
output_stream: "roi"
options: {
[mediapipe.RectTransformationCalculatorOptions.ext] {
scale_x: 2.7
scale_y: 2.7
shift_y: -0.1
square_long: true
}
}
}

View File

@ -1,76 +0,0 @@
# Predicts left and right hand landmarks within corresponding ROIs derived from
# hand-related pose landmarks.
type: "HandLandmarksLeftAndRightCpu"
# CPU image. (ImageFrame)
input_stream: "IMAGE:input_video"
# Pose landmarks to derive initial hand location from. (NormalizedLandmarkList)
input_stream: "POSE_LANDMARKS:pose_landmarks"
# Left hand landmarks. (NormalizedLandmarkList)
output_stream: "LEFT_HAND_LANDMARKS:left_hand_landmarks"
# RIght hand landmarks. (NormalizedLandmarkList)
output_stream: "RIGHT_HAND_LANDMARKS:right_hand_landmarks"
# Debug outputs.
output_stream: "LEFT_HAND_ROI_FROM_POSE:left_hand_roi_from_pose"
output_stream: "LEFT_HAND_ROI_FROM_RECROP:left_hand_roi_from_recrop"
output_stream: "LEFT_HAND_TRACKING_ROI:left_hand_tracking_roi"
output_stream: "RIGHT_HAND_ROI_FROM_POSE:right_hand_roi_from_pose"
output_stream: "RIGHT_HAND_ROI_FROM_RECROP:right_hand_roi_from_recrop"
output_stream: "RIGHT_HAND_TRACKING_ROI:right_hand_tracking_roi"
# Extracts left-hand-related landmarks from the pose landmarks.
node {
calculator: "SplitNormalizedLandmarkListCalculator"
input_stream: "pose_landmarks"
output_stream: "left_hand_landmarks_from_pose"
options: {
[mediapipe.SplitVectorCalculatorOptions.ext] {
ranges: { begin: 15 end: 16 }
ranges: { begin: 17 end: 18 }
ranges: { begin: 19 end: 20 }
combine_outputs: true
}
}
}
# Predicts left hand landmarks.
node {
calculator: "HandLandmarksFromPoseCpu"
input_stream: "IMAGE:input_video"
input_stream: "HAND_LANDMARKS_FROM_POSE:left_hand_landmarks_from_pose"
output_stream: "HAND_LANDMARKS:left_hand_landmarks"
# Debug outputs.
output_stream: "HAND_ROI_FROM_POSE:left_hand_roi_from_pose"
output_stream: "HAND_ROI_FROM_RECROP:left_hand_roi_from_recrop"
output_stream: "HAND_TRACKING_ROI:left_hand_tracking_roi"
}
# Extracts right-hand-related landmarks from the pose landmarks.
node {
calculator: "SplitNormalizedLandmarkListCalculator"
input_stream: "pose_landmarks"
output_stream: "right_hand_landmarks_from_pose"
options: {
[mediapipe.SplitVectorCalculatorOptions.ext] {
ranges: { begin: 16 end: 17 }
ranges: { begin: 18 end: 19 }
ranges: { begin: 20 end: 21 }
combine_outputs: true
}
}
}
# Extracts right-hand-related landmarks from the pose landmarks.
node {
calculator: "HandLandmarksFromPoseCpu"
input_stream: "IMAGE:input_video"
input_stream: "HAND_LANDMARKS_FROM_POSE:right_hand_landmarks_from_pose"
output_stream: "HAND_LANDMARKS:right_hand_landmarks"
# Debug outputs.
output_stream: "HAND_ROI_FROM_POSE:right_hand_roi_from_pose"
output_stream: "HAND_ROI_FROM_RECROP:right_hand_roi_from_recrop"
output_stream: "HAND_TRACKING_ROI:right_hand_tracking_roi"
}

Some files were not shown because too many files have changed in this diff Show More