add mediapipe modules to fix examples
This commit is contained in:
parent
76b6f2bbdf
commit
5c809f341e
|
@ -62,6 +62,7 @@ mod examples {
|
|||
imgproc::cvt_color(&raw_frame, &mut rgb_frame, imgproc::COLOR_BGR2RGB, 0)?;
|
||||
opencv::core::flip(&rgb_frame, &mut flip_frame, 1)?; // horizontal
|
||||
|
||||
println!("processing");
|
||||
let data = detector.process(&flip_frame);
|
||||
println!("received {} landmarks", data.len());
|
||||
|
||||
|
@ -109,6 +110,7 @@ mod examples {
|
|||
imgproc::cvt_color(&raw_frame, &mut rgb_frame, imgproc::COLOR_BGR2RGB, 0)?;
|
||||
opencv::core::flip(&rgb_frame, &mut flip_frame, 1)?; // horizontal
|
||||
|
||||
println!("processing");
|
||||
let data = detector.process(&flip_frame);
|
||||
println!("received {} landmarks", data.len());
|
||||
|
||||
|
@ -156,6 +158,7 @@ mod examples {
|
|||
imgproc::cvt_color(&raw_frame, &mut rgb_frame, imgproc::COLOR_BGR2RGB, 0)?;
|
||||
opencv::core::flip(&rgb_frame, &mut flip_frame, 1)?; // horizontal
|
||||
|
||||
println!("processing");
|
||||
let data = detector.process(&rgb_frame);
|
||||
println!("received {} landmarks", data.len());
|
||||
|
||||
|
@ -179,6 +182,6 @@ mod examples {
|
|||
|
||||
fn main() {
|
||||
// examples::pose_estimation().unwrap()
|
||||
examples::hand_tracking().unwrap()
|
||||
// examples::face_mesh().unwrap()
|
||||
// examples::hand_tracking().unwrap()
|
||||
examples::face_mesh().unwrap()
|
||||
}
|
||||
|
|
18
mediapipe/modules/README.md
Normal file
18
mediapipe/modules/README.md
Normal file
|
@ -0,0 +1,18 @@
|
|||
# Modules
|
||||
|
||||
Each module (represented as a subfolder) provides subgraphs and corresponding resources (e.g. tflite models) to perform domain-specific tasks (e.g. detect faces, detect face landmarks).
|
||||
|
||||
*Modules listed below are already used in some of `mediapipe/graphs` and more graphs are being migrated to use existing and upcoming modules.*
|
||||
|
||||
| Module | Description |
|
||||
| :--- | :--- |
|
||||
| [`face_detection`](face_detection/README.md) | Subgraphs to detect faces. |
|
||||
| [`face_geometry`](face_geometry/README.md) | Subgraphs to extract face geometry. |
|
||||
| [`face_landmark`](face_landmark/README.md) | Subgraphs to detect and track face landmarks. |
|
||||
| [`hand_landmark`](hand_landmark/README.md) | Subgraphs to detect and track hand landmarks. |
|
||||
| [`holistic_landmark`](holistic_landmark/README.md) | Subgraphs to detect and track holistic pose which consists of pose, face and hand landmarks. |
|
||||
| [`iris_landmark`](iris_landmark/README.md) | Subgraphs to detect iris landmarks. |
|
||||
| [`palm_detection`](palm_detection/README.md) | Subgraphs to detect palms/hands. |
|
||||
| [`pose_detection`](pose_detection/README.md) | Subgraphs to detect poses. |
|
||||
| [`pose_landmark`](pose_landmark/README.md) | Subgraphs to detect and track pose landmarks. |
|
||||
| [`objectron`](objectron/README.md) | Subgraphs to detect and track 3D objects. |
|
150
mediapipe/modules/face_detection/BUILD
Normal file
150
mediapipe/modules/face_detection/BUILD
Normal file
|
@ -0,0 +1,150 @@
|
|||
# Copyright 2019 The MediaPipe Authors.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
load(
|
||||
"//mediapipe/framework/tool:mediapipe_graph.bzl",
|
||||
"mediapipe_simple_subgraph",
|
||||
)
|
||||
|
||||
licenses(["notice"])
|
||||
|
||||
package(default_visibility = ["//visibility:public"])
|
||||
|
||||
mediapipe_simple_subgraph(
|
||||
name = "face_detection_short_range_by_roi_cpu",
|
||||
graph = "face_detection_short_range_by_roi_cpu.pbtxt",
|
||||
register_as = "FaceDetectionShortRangeByRoiCpu",
|
||||
deps = [
|
||||
":face_detection_short_range_common",
|
||||
"//mediapipe/calculators/tensor:image_to_tensor_calculator",
|
||||
"//mediapipe/calculators/tensor:inference_calculator",
|
||||
"//mediapipe/calculators/util:to_image_calculator",
|
||||
],
|
||||
)
|
||||
|
||||
mediapipe_simple_subgraph(
|
||||
name = "face_detection_short_range_by_roi_gpu",
|
||||
graph = "face_detection_short_range_by_roi_gpu.pbtxt",
|
||||
register_as = "FaceDetectionShortRangeByRoiGpu",
|
||||
deps = [
|
||||
":face_detection_short_range_common",
|
||||
"//mediapipe/calculators/tensor:image_to_tensor_calculator",
|
||||
"//mediapipe/calculators/tensor:inference_calculator",
|
||||
"//mediapipe/calculators/util:to_image_calculator",
|
||||
],
|
||||
)
|
||||
|
||||
mediapipe_simple_subgraph(
|
||||
name = "face_detection_short_range_cpu",
|
||||
graph = "face_detection_short_range_cpu.pbtxt",
|
||||
register_as = "FaceDetectionShortRangeCpu",
|
||||
deps = [
|
||||
":face_detection_short_range_common",
|
||||
"//mediapipe/calculators/tensor:image_to_tensor_calculator",
|
||||
"//mediapipe/calculators/tensor:inference_calculator",
|
||||
"//mediapipe/calculators/util:to_image_calculator",
|
||||
],
|
||||
)
|
||||
|
||||
mediapipe_simple_subgraph(
|
||||
name = "face_detection_short_range_gpu",
|
||||
graph = "face_detection_short_range_gpu.pbtxt",
|
||||
register_as = "FaceDetectionShortRangeGpu",
|
||||
deps = [
|
||||
":face_detection_short_range_common",
|
||||
"//mediapipe/calculators/tensor:image_to_tensor_calculator",
|
||||
"//mediapipe/calculators/tensor:inference_calculator",
|
||||
"//mediapipe/calculators/util:to_image_calculator",
|
||||
],
|
||||
)
|
||||
|
||||
mediapipe_simple_subgraph(
|
||||
name = "face_detection_short_range_common",
|
||||
graph = "face_detection_short_range_common.pbtxt",
|
||||
register_as = "FaceDetectionShortRangeCommon",
|
||||
deps = [
|
||||
"//mediapipe/calculators/tensor:tensors_to_detections_calculator",
|
||||
"//mediapipe/calculators/tflite:ssd_anchors_calculator",
|
||||
"//mediapipe/calculators/util:detection_projection_calculator",
|
||||
"//mediapipe/calculators/util:non_max_suppression_calculator",
|
||||
],
|
||||
)
|
||||
|
||||
mediapipe_simple_subgraph(
|
||||
name = "face_detection_full_range_cpu",
|
||||
graph = "face_detection_full_range_cpu.pbtxt",
|
||||
register_as = "FaceDetectionFullRangeCpu",
|
||||
deps = [
|
||||
":face_detection_full_range_common",
|
||||
"//mediapipe/calculators/tensor:image_to_tensor_calculator",
|
||||
"//mediapipe/calculators/tensor:inference_calculator",
|
||||
"//mediapipe/calculators/util:to_image_calculator",
|
||||
],
|
||||
)
|
||||
|
||||
mediapipe_simple_subgraph(
|
||||
name = "face_detection_full_range_gpu",
|
||||
graph = "face_detection_full_range_gpu.pbtxt",
|
||||
register_as = "FaceDetectionFullRangeGpu",
|
||||
deps = [
|
||||
":face_detection_full_range_common",
|
||||
"//mediapipe/calculators/tensor:image_to_tensor_calculator",
|
||||
"//mediapipe/calculators/tensor:inference_calculator",
|
||||
"//mediapipe/calculators/util:to_image_calculator",
|
||||
],
|
||||
)
|
||||
|
||||
mediapipe_simple_subgraph(
|
||||
name = "face_detection_full_range_common",
|
||||
graph = "face_detection_full_range_common.pbtxt",
|
||||
register_as = "FaceDetectionFullRangeCommon",
|
||||
deps = [
|
||||
"//mediapipe/calculators/tensor:tensors_to_detections_calculator",
|
||||
"//mediapipe/calculators/tflite:ssd_anchors_calculator",
|
||||
"//mediapipe/calculators/util:detection_projection_calculator",
|
||||
"//mediapipe/calculators/util:non_max_suppression_calculator",
|
||||
],
|
||||
)
|
||||
|
||||
mediapipe_simple_subgraph(
|
||||
name = "face_detection_short_range_image",
|
||||
graph = "face_detection_short_range_image.pbtxt",
|
||||
register_as = "FaceDetectionShortRangeImage",
|
||||
deps = [
|
||||
":face_detection_short_range_common",
|
||||
"//mediapipe/calculators/core:flow_limiter_calculator",
|
||||
"//mediapipe/calculators/tensor:image_to_tensor_calculator",
|
||||
"//mediapipe/calculators/tensor:inference_calculator",
|
||||
],
|
||||
)
|
||||
|
||||
mediapipe_simple_subgraph(
|
||||
name = "face_detection_full_range_image",
|
||||
graph = "face_detection_full_range_image.pbtxt",
|
||||
register_as = "FaceDetectionFullRangeImage",
|
||||
deps = [
|
||||
":face_detection_full_range_common",
|
||||
"//mediapipe/calculators/core:flow_limiter_calculator",
|
||||
"//mediapipe/calculators/tensor:image_to_tensor_calculator",
|
||||
"//mediapipe/calculators/tensor:inference_calculator",
|
||||
],
|
||||
)
|
||||
|
||||
exports_files(
|
||||
srcs = [
|
||||
"face_detection_full_range.tflite",
|
||||
"face_detection_full_range_sparse.tflite",
|
||||
"face_detection_short_range.tflite",
|
||||
],
|
||||
)
|
8
mediapipe/modules/face_detection/README.md
Normal file
8
mediapipe/modules/face_detection/README.md
Normal file
|
@ -0,0 +1,8 @@
|
|||
# face_detection
|
||||
|
||||
Subgraphs|Details
|
||||
:--- | :---
|
||||
[`FaceDetectionFullRangeCpu`](https://github.com/google/mediapipe/tree/master/mediapipe/modules/face_detection/face_detection_full_range_cpu.pbtxt)| Detects faces. Works best for faces within 5 meters from the camera. (CPU input, and inference is executed on CPU.)
|
||||
[`FaceDetectionFullRangeGpu`](https://github.com/google/mediapipe/tree/master/mediapipe/modules/face_detection/face_detection_full_range_gpu.pbtxt)| Detects faces. Works best for faces within 5 meters from the camera. (GPU input, and inference is executed on GPU.)
|
||||
[`FaceDetectionShortRangeCpu`](https://github.com/google/mediapipe/tree/master/mediapipe/modules/face_detection/face_detection_short_range_cpu.pbtxt)| Detects faces. Works best for faces within 2 meters from the camera. (CPU input, and inference is executed on CPU.)
|
||||
[`FaceDetectionShortRangeGpu`](https://github.com/google/mediapipe/tree/master/mediapipe/modules/face_detection/face_detection_short_range_gpu.pbtxt)| Detects faces. Works best for faces within 2 meters from the camera. (GPU input, and inference is executed on GPU.)
|
BIN
mediapipe/modules/face_detection/face_detection_full_range.tflite
Executable file
BIN
mediapipe/modules/face_detection/face_detection_full_range.tflite
Executable file
Binary file not shown.
|
@ -0,0 +1,102 @@
|
|||
# MediaPipe graph performing common processing to detect faces using
|
||||
# face_detection_full_range_sparse.tflite model, currently consisting of tensor
|
||||
# post processing.
|
||||
#
|
||||
# EXAMPLE:
|
||||
# node {
|
||||
# calculator: "FaceDetectionFullRangeCommon"
|
||||
# input_stream: "TENSORS:detection_tensors"
|
||||
# input_stream: "MATRIX:transform_matrix"
|
||||
# output_stream: "DETECTIONS:detections"
|
||||
# }
|
||||
|
||||
type: "FaceDetectionShortRangeCommon"
|
||||
|
||||
# Detection tensors. (std::vector<Tensor>)
|
||||
input_stream: "TENSORS:detection_tensors"
|
||||
|
||||
# A 4x4 row-major-order matrix that maps a point represented in the detection
|
||||
# tensors to a desired coordinate system, e.g., in the original input image
|
||||
# before scaling/cropping. (std::array<float, 16>)
|
||||
input_stream: "MATRIX:transform_matrix"
|
||||
|
||||
# Detected faces. (std::vector<Detection>)
|
||||
# NOTE: there will not be an output packet in the DETECTIONS stream for this
|
||||
# particular timestamp if none of faces detected. However, the MediaPipe
|
||||
# framework will internally inform the downstream calculators of the absence of
|
||||
# this packet so that they don't wait for it unnecessarily.
|
||||
output_stream: "DETECTIONS:detections"
|
||||
|
||||
# Generates a single side packet containing a vector of SSD anchors based on
|
||||
# the specification in the options.
|
||||
node {
|
||||
calculator: "SsdAnchorsCalculator"
|
||||
output_side_packet: "anchors"
|
||||
options: {
|
||||
[mediapipe.SsdAnchorsCalculatorOptions.ext] {
|
||||
num_layers: 1
|
||||
min_scale: 0.1484375
|
||||
max_scale: 0.75
|
||||
input_size_height: 192
|
||||
input_size_width: 192
|
||||
anchor_offset_x: 0.5
|
||||
anchor_offset_y: 0.5
|
||||
strides: 4
|
||||
aspect_ratios: 1.0
|
||||
fixed_anchor_size: true
|
||||
interpolated_scale_aspect_ratio: 0.0
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Decodes the detection tensors generated by the TensorFlow Lite model, based on
|
||||
# the SSD anchors and the specification in the options, into a vector of
|
||||
# detections. Each detection describes a detected object.
|
||||
node {
|
||||
calculator: "TensorsToDetectionsCalculator"
|
||||
input_stream: "TENSORS:detection_tensors"
|
||||
input_side_packet: "ANCHORS:anchors"
|
||||
output_stream: "DETECTIONS:unfiltered_detections"
|
||||
options: {
|
||||
[mediapipe.TensorsToDetectionsCalculatorOptions.ext] {
|
||||
num_classes: 1
|
||||
num_boxes: 2304
|
||||
num_coords: 16
|
||||
box_coord_offset: 0
|
||||
keypoint_coord_offset: 4
|
||||
num_keypoints: 6
|
||||
num_values_per_keypoint: 2
|
||||
sigmoid_score: true
|
||||
score_clipping_thresh: 100.0
|
||||
reverse_output_order: true
|
||||
x_scale: 192.0
|
||||
y_scale: 192.0
|
||||
h_scale: 192.0
|
||||
w_scale: 192.0
|
||||
min_score_thresh: 0.6
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Performs non-max suppression to remove excessive detections.
|
||||
node {
|
||||
calculator: "NonMaxSuppressionCalculator"
|
||||
input_stream: "unfiltered_detections"
|
||||
output_stream: "filtered_detections"
|
||||
options: {
|
||||
[mediapipe.NonMaxSuppressionCalculatorOptions.ext] {
|
||||
min_suppression_threshold: 0.3
|
||||
overlap_type: INTERSECTION_OVER_UNION
|
||||
algorithm: WEIGHTED
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Projects the detections from input tensor to the corresponding locations on
|
||||
# the original image (input to the graph).
|
||||
node {
|
||||
calculator: "DetectionProjectionCalculator"
|
||||
input_stream: "DETECTIONS:filtered_detections"
|
||||
input_stream: "PROJECTION_MATRIX:transform_matrix"
|
||||
output_stream: "DETECTIONS:detections"
|
||||
}
|
|
@ -0,0 +1,80 @@
|
|||
# MediaPipe graph to detect faces. (CPU input, and inference is executed on
|
||||
# CPU.)
|
||||
#
|
||||
# It is required that "face_detection_full_range_sparse.tflite" is available at
|
||||
# "mediapipe/modules/face_detection/face_detection_full_range_sparse.tflite"
|
||||
# path during execution.
|
||||
#
|
||||
# EXAMPLE:
|
||||
# node {
|
||||
# calculator: "FaceDetectionFullRangeCpu"
|
||||
# input_stream: "IMAGE:image"
|
||||
# output_stream: "DETECTIONS:face_detections"
|
||||
# }
|
||||
|
||||
type: "FaceDetectionFullRangeCpu"
|
||||
|
||||
# CPU image. (ImageFrame)
|
||||
input_stream: "IMAGE:image"
|
||||
|
||||
# Detected faces. (std::vector<Detection>)
|
||||
# NOTE: there will not be an output packet in the DETECTIONS stream for this
|
||||
# particular timestamp if none of faces detected. However, the MediaPipe
|
||||
# framework will internally inform the downstream calculators of the absence of
|
||||
# this packet so that they don't wait for it unnecessarily.
|
||||
output_stream: "DETECTIONS:detections"
|
||||
|
||||
# Converts the input CPU image (ImageFrame) to the multi-backend image type
|
||||
# (Image).
|
||||
node: {
|
||||
calculator: "ToImageCalculator"
|
||||
input_stream: "IMAGE_CPU:image"
|
||||
output_stream: "IMAGE:multi_backend_image"
|
||||
}
|
||||
|
||||
# Transforms the input image into a 192x192 tensor while keeping the aspect
|
||||
# ratio (what is expected by the corresponding face detection model), resulting
|
||||
# in potential letterboxing in the transformed image.
|
||||
node: {
|
||||
calculator: "ImageToTensorCalculator"
|
||||
input_stream: "IMAGE:multi_backend_image"
|
||||
output_stream: "TENSORS:input_tensors"
|
||||
output_stream: "MATRIX:transform_matrix"
|
||||
options: {
|
||||
[mediapipe.ImageToTensorCalculatorOptions.ext] {
|
||||
output_tensor_width: 192
|
||||
output_tensor_height: 192
|
||||
keep_aspect_ratio: true
|
||||
output_tensor_float_range {
|
||||
min: -1.0
|
||||
max: 1.0
|
||||
}
|
||||
border_mode: BORDER_ZERO
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Runs a TensorFlow Lite model on CPU that takes an image tensor and outputs a
|
||||
# vector of tensors representing, for instance, detection boxes/keypoints and
|
||||
# scores.
|
||||
node {
|
||||
calculator: "InferenceCalculator"
|
||||
input_stream: "TENSORS:input_tensors"
|
||||
output_stream: "TENSORS:detection_tensors"
|
||||
options: {
|
||||
[mediapipe.InferenceCalculatorOptions.ext] {
|
||||
model_path: "mediapipe/modules/face_detection/face_detection_full_range_sparse.tflite"
|
||||
delegate {
|
||||
xnnpack {}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Performs tensor post processing to generate face detections.
|
||||
node {
|
||||
calculator: "FaceDetectionFullRangeCommon"
|
||||
input_stream: "TENSORS:detection_tensors"
|
||||
input_stream: "MATRIX:transform_matrix"
|
||||
output_stream: "DETECTIONS:detections"
|
||||
}
|
|
@ -0,0 +1,80 @@
|
|||
# MediaPipe graph to detect faces. (GPU input, and inference is executed on
|
||||
# GPU.)
|
||||
#
|
||||
# It is required that "face_detection_full_range_sparse.tflite" is available at
|
||||
# "mediapipe/modules/face_detection/face_detection_full_range_sparse.tflite"
|
||||
# path during execution.
|
||||
#
|
||||
# EXAMPLE:
|
||||
# node {
|
||||
# calculator: "FaceDetectionFullRangeGpu"
|
||||
# input_stream: "IMAGE:image"
|
||||
# output_stream: "DETECTIONS:face_detections"
|
||||
# }
|
||||
|
||||
type: "FaceDetectionFullRangeGpu"
|
||||
|
||||
# GPU image. (GpuBuffer)
|
||||
input_stream: "IMAGE:image"
|
||||
|
||||
# Detected faces. (std::vector<Detection>)
|
||||
# NOTE: there will not be an output packet in the DETECTIONS stream for this
|
||||
# particular timestamp if none of faces detected. However, the MediaPipe
|
||||
# framework will internally inform the downstream calculators of the absence of
|
||||
# this packet so that they don't wait for it unnecessarily.
|
||||
output_stream: "DETECTIONS:detections"
|
||||
|
||||
# Converts the input GPU image (GpuBuffer) to the multi-backend image type
|
||||
# (Image).
|
||||
node: {
|
||||
calculator: "ToImageCalculator"
|
||||
input_stream: "IMAGE_GPU:image"
|
||||
output_stream: "IMAGE:multi_backend_image"
|
||||
}
|
||||
|
||||
# Transforms the input image into a 128x128 tensor while keeping the aspect
|
||||
# ratio (what is expected by the corresponding face detection model), resulting
|
||||
# in potential letterboxing in the transformed image.
|
||||
node: {
|
||||
calculator: "ImageToTensorCalculator"
|
||||
input_stream: "IMAGE:multi_backend_image"
|
||||
output_stream: "TENSORS:input_tensors"
|
||||
output_stream: "MATRIX:transform_matrix"
|
||||
options: {
|
||||
[mediapipe.ImageToTensorCalculatorOptions.ext] {
|
||||
output_tensor_width: 192
|
||||
output_tensor_height: 192
|
||||
keep_aspect_ratio: true
|
||||
output_tensor_float_range {
|
||||
min: -1.0
|
||||
max: 1.0
|
||||
}
|
||||
border_mode: BORDER_ZERO
|
||||
gpu_origin: TOP_LEFT
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Runs a TensorFlow Lite model on GPU that takes an image tensor and outputs a
|
||||
# vector of tensors representing, for instance, detection boxes/keypoints and
|
||||
# scores.
|
||||
node {
|
||||
calculator: "InferenceCalculator"
|
||||
input_stream: "TENSORS:input_tensors"
|
||||
output_stream: "TENSORS:detection_tensors"
|
||||
options: {
|
||||
[mediapipe.InferenceCalculatorOptions.ext] {
|
||||
model_path: "mediapipe/modules/face_detection/face_detection_full_range_sparse.tflite"
|
||||
#
|
||||
delegate: { gpu { use_advanced_gpu_api: true } }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Performs tensor post processing to generate face detections.
|
||||
node {
|
||||
calculator: "FaceDetectionFullRangeCommon"
|
||||
input_stream: "TENSORS:detection_tensors"
|
||||
input_stream: "MATRIX:transform_matrix"
|
||||
output_stream: "DETECTIONS:detections"
|
||||
}
|
|
@ -0,0 +1,86 @@
|
|||
# MediaPipe graph to detect faces. (GPU/CPU input, and inference is executed on
|
||||
# GPU.)
|
||||
#
|
||||
# It is required that "face_detection_full_range_sparse.tflite" is available at
|
||||
# "mediapipe/modules/face_detection/face_detection_full_range_sparse.tflite"
|
||||
# path during execution.
|
||||
|
||||
type: "FaceDetectionFullRangeImage"
|
||||
|
||||
# Image. (Image)
|
||||
input_stream: "IMAGE:image"
|
||||
|
||||
# The throttled input image. (Image)
|
||||
output_stream: "IMAGE:throttled_image"
|
||||
# Detected faces. (std::vector<Detection>)
|
||||
# NOTE: there will not be an output packet in the DETECTIONS stream for this
|
||||
# particular timestamp if none of faces detected. However, the MediaPipe
|
||||
# framework will internally inform the downstream calculators of the absence of
|
||||
# this packet so that they don't wait for it unnecessarily.
|
||||
output_stream: "DETECTIONS:detections"
|
||||
|
||||
node {
|
||||
calculator: "FlowLimiterCalculator"
|
||||
input_stream: "image"
|
||||
input_stream: "FINISHED:detections"
|
||||
input_stream_info: {
|
||||
tag_index: "FINISHED"
|
||||
back_edge: true
|
||||
}
|
||||
output_stream: "throttled_image"
|
||||
options: {
|
||||
[mediapipe.FlowLimiterCalculatorOptions.ext] {
|
||||
max_in_flight: 1
|
||||
max_in_queue: 1
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Transforms the input image into a 128x128 tensor while keeping the aspect
|
||||
# ratio (what is expected by the corresponding face detection model), resulting
|
||||
# in potential letterboxing in the transformed image.
|
||||
node: {
|
||||
calculator: "ImageToTensorCalculator"
|
||||
input_stream: "IMAGE:throttled_image"
|
||||
output_stream: "TENSORS:input_tensors"
|
||||
output_stream: "MATRIX:transform_matrix"
|
||||
options: {
|
||||
[mediapipe.ImageToTensorCalculatorOptions.ext] {
|
||||
output_tensor_width: 192
|
||||
output_tensor_height: 192
|
||||
keep_aspect_ratio: true
|
||||
output_tensor_float_range {
|
||||
min: -1.0
|
||||
max: 1.0
|
||||
}
|
||||
border_mode: BORDER_ZERO
|
||||
gpu_origin: CONVENTIONAL
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Runs a TensorFlow Lite model on GPU that takes an image tensor and outputs a
|
||||
# vector of tensors representing, for instance, detection boxes/keypoints and
|
||||
# scores.
|
||||
# TODO: Use GraphOptions to modify the delegate field to be
|
||||
# `delegate { xnnpack {} }` for the CPU only use cases.
|
||||
node {
|
||||
calculator: "InferenceCalculator"
|
||||
input_stream: "TENSORS:input_tensors"
|
||||
output_stream: "TENSORS:detection_tensors"
|
||||
options: {
|
||||
[mediapipe.InferenceCalculatorOptions.ext] {
|
||||
model_path: "mediapipe/modules/face_detection/face_detection_full_range_sparse.tflite"
|
||||
#
|
||||
delegate: { gpu { use_advanced_gpu_api: true } }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Performs tensor post processing to generate face detections.
|
||||
node {
|
||||
calculator: "FaceDetectionFullRangeCommon"
|
||||
input_stream: "TENSORS:detection_tensors"
|
||||
input_stream: "MATRIX:transform_matrix"
|
||||
output_stream: "DETECTIONS:detections"
|
||||
}
|
BIN
mediapipe/modules/face_detection/face_detection_full_range_sparse.tflite
Executable file
BIN
mediapipe/modules/face_detection/face_detection_full_range_sparse.tflite
Executable file
Binary file not shown.
BIN
mediapipe/modules/face_detection/face_detection_short_range.tflite
Executable file
BIN
mediapipe/modules/face_detection/face_detection_short_range.tflite
Executable file
Binary file not shown.
|
@ -0,0 +1,83 @@
|
|||
# MediaPipe graph to detect faces. (CPU input, and inference is executed on
|
||||
# CPU.)
|
||||
#
|
||||
# It is required that "face_detection_short_range.tflite" is available at
|
||||
# "mediapipe/modules/face_detection/face_detection_short_range.tflite"
|
||||
# path during execution.
|
||||
#
|
||||
# EXAMPLE:
|
||||
# node {
|
||||
# calculator: "FaceDetectionShortRangeByRoiCpu"
|
||||
# input_stream: "IMAGE:image"
|
||||
# input_stream: "ROI:roi"
|
||||
# output_stream: "DETECTIONS:face_detections"
|
||||
# }
|
||||
|
||||
type: "FaceDetectionShortRangeByRoiCpu"
|
||||
|
||||
# CPU image. (ImageFrame)
|
||||
input_stream: "IMAGE:image"
|
||||
|
||||
# ROI (region of interest) within the given image where faces should be
|
||||
# detected. (NormalizedRect)
|
||||
input_stream: "ROI:roi"
|
||||
|
||||
# Detected faces. (std::vector<Detection>)
|
||||
# NOTE: there will not be an output packet in the DETECTIONS stream for this
|
||||
# particular timestamp if none of faces detected. However, the MediaPipe
|
||||
# framework will internally inform the downstream calculators of the absence of
|
||||
# this packet so that they don't wait for it unnecessarily.
|
||||
output_stream: "DETECTIONS:detections"
|
||||
|
||||
# Converts the input CPU image (ImageFrame) to the multi-backend image type
|
||||
# (Image).
|
||||
node: {
|
||||
calculator: "ToImageCalculator"
|
||||
input_stream: "IMAGE_CPU:image"
|
||||
output_stream: "IMAGE:multi_backend_image"
|
||||
}
|
||||
|
||||
# Transforms specified region of image into 128x128 tensor keeping aspect ratio
|
||||
# (padding tensor if needed).
|
||||
node {
|
||||
calculator: "ImageToTensorCalculator"
|
||||
input_stream: "IMAGE:multi_backend_image"
|
||||
input_stream: "NORM_RECT:roi"
|
||||
output_stream: "TENSORS:input_tensors"
|
||||
output_stream: "MATRIX:transform_matrix"
|
||||
options: {
|
||||
[mediapipe.ImageToTensorCalculatorOptions.ext] {
|
||||
output_tensor_width: 128
|
||||
output_tensor_height: 128
|
||||
keep_aspect_ratio: true
|
||||
output_tensor_float_range {
|
||||
min: -1.0
|
||||
max: 1.0
|
||||
}
|
||||
border_mode: BORDER_ZERO
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Runs a TensorFlow Lite model on CPU that takes an image tensor and outputs a
|
||||
# vector of tensors representing, for instance, detection boxes/keypoints and
|
||||
# scores.
|
||||
node {
|
||||
calculator: "InferenceCalculator"
|
||||
input_stream: "TENSORS:input_tensors"
|
||||
output_stream: "TENSORS:detection_tensors"
|
||||
options: {
|
||||
[mediapipe.InferenceCalculatorOptions.ext] {
|
||||
model_path: "mediapipe/modules/face_detection/face_detection_short_range.tflite"
|
||||
delegate { xnnpack {} }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Performs tensor post processing to generate face detections.
|
||||
node {
|
||||
calculator: "FaceDetectionShortRangeCommon"
|
||||
input_stream: "TENSORS:detection_tensors"
|
||||
input_stream: "MATRIX:transform_matrix"
|
||||
output_stream: "DETECTIONS:detections"
|
||||
}
|
|
@ -0,0 +1,83 @@
|
|||
# MediaPipe graph to detect faces. (CPU input, and inference is executed on
|
||||
# CPU.)
|
||||
#
|
||||
# It is required that "face_detection_short_range.tflite" is available at
|
||||
# "mediapipe/modules/face_detection/face_detection_short_range.tflite"
|
||||
# path during execution.
|
||||
#
|
||||
# EXAMPLE:
|
||||
# node {
|
||||
# calculator: "FaceDetectionShortRangeByRoiGpu"
|
||||
# input_stream: "IMAGE:image"
|
||||
# input_stream: "ROI:roi"
|
||||
# output_stream: "DETECTIONS:face_detections"
|
||||
# }
|
||||
|
||||
type: "FaceDetectionShortRangeByRoiGpu"
|
||||
|
||||
# GPU image. (GpuBuffer)
|
||||
input_stream: "IMAGE:image"
|
||||
|
||||
# ROI (region of interest) within the given image where faces should be
|
||||
# detected. (NormalizedRect)
|
||||
input_stream: "ROI:roi"
|
||||
|
||||
# Detected faces. (std::vector<Detection>)
|
||||
# NOTE: there will not be an output packet in the DETECTIONS stream for this
|
||||
# particular timestamp if none of faces detected. However, the MediaPipe
|
||||
# framework will internally inform the downstream calculators of the absence of
|
||||
# this packet so that they don't wait for it unnecessarily.
|
||||
output_stream: "DETECTIONS:detections"
|
||||
|
||||
# Converts the input GPU image (GpuBuffer) to the multi-backend image type
|
||||
# (Image).
|
||||
node: {
|
||||
calculator: "ToImageCalculator"
|
||||
input_stream: "IMAGE_GPU:image"
|
||||
output_stream: "IMAGE:multi_backend_image"
|
||||
}
|
||||
|
||||
# Transforms specified region of image into 128x128 tensor keeping aspect ratio
|
||||
# (padding tensor if needed).
|
||||
node {
|
||||
calculator: "ImageToTensorCalculator"
|
||||
input_stream: "IMAGE:multi_backend_image"
|
||||
input_stream: "NORM_RECT:roi"
|
||||
output_stream: "TENSORS:input_tensors"
|
||||
output_stream: "MATRIX:transform_matrix"
|
||||
options: {
|
||||
[mediapipe.ImageToTensorCalculatorOptions.ext] {
|
||||
output_tensor_width: 128
|
||||
output_tensor_height: 128
|
||||
keep_aspect_ratio: true
|
||||
output_tensor_float_range {
|
||||
min: -1.0
|
||||
max: 1.0
|
||||
}
|
||||
border_mode: BORDER_ZERO
|
||||
gpu_origin: TOP_LEFT
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Runs a TensorFlow Lite model on GPU that takes an image tensor and outputs a
|
||||
# vector of tensors representing, for instance, detection boxes/keypoints and
|
||||
# scores.
|
||||
node {
|
||||
calculator: "InferenceCalculator"
|
||||
input_stream: "TENSORS:input_tensors"
|
||||
output_stream: "TENSORS:detection_tensors"
|
||||
options: {
|
||||
[mediapipe.InferenceCalculatorOptions.ext] {
|
||||
model_path: "mediapipe/modules/face_detection/face_detection_short_range.tflite"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Performs tensor post processing to generate face detections.
|
||||
node {
|
||||
calculator: "FaceDetectionShortRangeCommon"
|
||||
input_stream: "TENSORS:detection_tensors"
|
||||
input_stream: "MATRIX:transform_matrix"
|
||||
output_stream: "DETECTIONS:detections"
|
||||
}
|
|
@ -0,0 +1,103 @@
|
|||
# MediaPipe graph performing common processing to detect faces, currently
|
||||
# consisting of tensor post processing.
|
||||
#
|
||||
# EXAMPLE:
|
||||
# node {
|
||||
# calculator: "FaceDetectionShortRangeCommon"
|
||||
# input_stream: "TENSORS:detection_tensors"
|
||||
# input_stream: "MATRIX:transform_matrix"
|
||||
# output_stream: "DETECTIONS:detections"
|
||||
# }
|
||||
|
||||
type: "FaceDetectionShortRangeCommon"
|
||||
|
||||
# Detection tensors. (std::vector<Tensor>)
|
||||
input_stream: "TENSORS:detection_tensors"
|
||||
|
||||
# A 4x4 row-major-order matrix that maps a point represented in the detection
|
||||
# tensors to a desired coordinate system, e.g., in the original input image
|
||||
# before scaling/cropping. (std::array<float, 16>)
|
||||
input_stream: "MATRIX:transform_matrix"
|
||||
|
||||
# Detected faces. (std::vector<Detection>)
|
||||
# NOTE: there will not be an output packet in the DETECTIONS stream for this
|
||||
# particular timestamp if none of faces detected. However, the MediaPipe
|
||||
# framework will internally inform the downstream calculators of the absence of
|
||||
# this packet so that they don't wait for it unnecessarily.
|
||||
output_stream: "DETECTIONS:detections"
|
||||
|
||||
# Generates a single side packet containing a vector of SSD anchors based on
|
||||
# the specification in the options.
|
||||
node {
|
||||
calculator: "SsdAnchorsCalculator"
|
||||
output_side_packet: "anchors"
|
||||
options: {
|
||||
[mediapipe.SsdAnchorsCalculatorOptions.ext] {
|
||||
num_layers: 4
|
||||
min_scale: 0.1484375
|
||||
max_scale: 0.75
|
||||
input_size_height: 128
|
||||
input_size_width: 128
|
||||
anchor_offset_x: 0.5
|
||||
anchor_offset_y: 0.5
|
||||
strides: 8
|
||||
strides: 16
|
||||
strides: 16
|
||||
strides: 16
|
||||
aspect_ratios: 1.0
|
||||
fixed_anchor_size: true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Decodes the detection tensors generated by the TensorFlow Lite model, based on
|
||||
# the SSD anchors and the specification in the options, into a vector of
|
||||
# detections. Each detection describes a detected object.
|
||||
node {
|
||||
calculator: "TensorsToDetectionsCalculator"
|
||||
input_stream: "TENSORS:detection_tensors"
|
||||
input_side_packet: "ANCHORS:anchors"
|
||||
output_stream: "DETECTIONS:unfiltered_detections"
|
||||
options: {
|
||||
[mediapipe.TensorsToDetectionsCalculatorOptions.ext] {
|
||||
num_classes: 1
|
||||
num_boxes: 896
|
||||
num_coords: 16
|
||||
box_coord_offset: 0
|
||||
keypoint_coord_offset: 4
|
||||
num_keypoints: 6
|
||||
num_values_per_keypoint: 2
|
||||
sigmoid_score: true
|
||||
score_clipping_thresh: 100.0
|
||||
reverse_output_order: true
|
||||
x_scale: 128.0
|
||||
y_scale: 128.0
|
||||
h_scale: 128.0
|
||||
w_scale: 128.0
|
||||
min_score_thresh: 0.5
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Performs non-max suppression to remove excessive detections.
|
||||
node {
|
||||
calculator: "NonMaxSuppressionCalculator"
|
||||
input_stream: "unfiltered_detections"
|
||||
output_stream: "filtered_detections"
|
||||
options: {
|
||||
[mediapipe.NonMaxSuppressionCalculatorOptions.ext] {
|
||||
min_suppression_threshold: 0.3
|
||||
overlap_type: INTERSECTION_OVER_UNION
|
||||
algorithm: WEIGHTED
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Projects the detections from input tensor to the corresponding locations on
|
||||
# the original image (input to the graph).
|
||||
node {
|
||||
calculator: "DetectionProjectionCalculator"
|
||||
input_stream: "DETECTIONS:filtered_detections"
|
||||
input_stream: "PROJECTION_MATRIX:transform_matrix"
|
||||
output_stream: "DETECTIONS:detections"
|
||||
}
|
|
@ -0,0 +1,78 @@
|
|||
# MediaPipe graph to detect faces. (CPU input, and inference is executed on
|
||||
# CPU.)
|
||||
#
|
||||
# It is required that "face_detection_short_range.tflite" is available at
|
||||
# "mediapipe/modules/face_detection/face_detection_short_range.tflite"
|
||||
# path during execution.
|
||||
#
|
||||
# EXAMPLE:
|
||||
# node {
|
||||
# calculator: "FaceDetectionShortRangeCpu"
|
||||
# input_stream: "IMAGE:image"
|
||||
# output_stream: "DETECTIONS:face_detections"
|
||||
# }
|
||||
|
||||
type: "FaceDetectionShortRangeCpu"
|
||||
|
||||
# CPU image. (ImageFrame)
|
||||
input_stream: "IMAGE:image"
|
||||
|
||||
# Detected faces. (std::vector<Detection>)
|
||||
# NOTE: there will not be an output packet in the DETECTIONS stream for this
|
||||
# particular timestamp if none of faces detected. However, the MediaPipe
|
||||
# framework will internally inform the downstream calculators of the absence of
|
||||
# this packet so that they don't wait for it unnecessarily.
|
||||
output_stream: "DETECTIONS:detections"
|
||||
|
||||
# Converts the input CPU image (ImageFrame) to the multi-backend image type
|
||||
# (Image).
|
||||
node: {
|
||||
calculator: "ToImageCalculator"
|
||||
input_stream: "IMAGE_CPU:image"
|
||||
output_stream: "IMAGE:multi_backend_image"
|
||||
}
|
||||
|
||||
# Transforms the input image into a 128x128 tensor while keeping the aspect
|
||||
# ratio (what is expected by the corresponding face detection model), resulting
|
||||
# in potential letterboxing in the transformed image.
|
||||
node: {
|
||||
calculator: "ImageToTensorCalculator"
|
||||
input_stream: "IMAGE:multi_backend_image"
|
||||
output_stream: "TENSORS:input_tensors"
|
||||
output_stream: "MATRIX:transform_matrix"
|
||||
options: {
|
||||
[mediapipe.ImageToTensorCalculatorOptions.ext] {
|
||||
output_tensor_width: 128
|
||||
output_tensor_height: 128
|
||||
keep_aspect_ratio: true
|
||||
output_tensor_float_range {
|
||||
min: -1.0
|
||||
max: 1.0
|
||||
}
|
||||
border_mode: BORDER_ZERO
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Runs a TensorFlow Lite model on CPU that takes an image tensor and outputs a
|
||||
# vector of tensors representing, for instance, detection boxes/keypoints and
|
||||
# scores.
|
||||
node {
|
||||
calculator: "InferenceCalculator"
|
||||
input_stream: "TENSORS:input_tensors"
|
||||
output_stream: "TENSORS:detection_tensors"
|
||||
options: {
|
||||
[mediapipe.InferenceCalculatorOptions.ext] {
|
||||
model_path: "mediapipe/modules/face_detection/face_detection_short_range.tflite"
|
||||
delegate { xnnpack {} }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Performs tensor post processing to generate face detections.
|
||||
node {
|
||||
calculator: "FaceDetectionShortRangeCommon"
|
||||
input_stream: "TENSORS:detection_tensors"
|
||||
input_stream: "MATRIX:transform_matrix"
|
||||
output_stream: "DETECTIONS:detections"
|
||||
}
|
|
@ -0,0 +1,78 @@
|
|||
# MediaPipe graph to detect faces. (CPU input, and inference is executed on
|
||||
# CPU.)
|
||||
#
|
||||
# It is required that "face_detection_short_range.tflite" is available at
|
||||
# "mediapipe/modules/face_detection/face_detection_short_range.tflite"
|
||||
# path during execution.
|
||||
#
|
||||
# EXAMPLE:
|
||||
# node {
|
||||
# calculator: "FaceDetectionShortRangeGpu"
|
||||
# input_stream: "IMAGE:image"
|
||||
# output_stream: "DETECTIONS:face_detections"
|
||||
# }
|
||||
|
||||
type: "FaceDetectionShortRangeGpu"
|
||||
|
||||
# GPU image. (GpuBuffer)
|
||||
input_stream: "IMAGE:image"
|
||||
|
||||
# Detected faces. (std::vector<Detection>)
|
||||
# NOTE: there will not be an output packet in the DETECTIONS stream for this
|
||||
# particular timestamp if none of faces detected. However, the MediaPipe
|
||||
# framework will internally inform the downstream calculators of the absence of
|
||||
# this packet so that they don't wait for it unnecessarily.
|
||||
output_stream: "DETECTIONS:detections"
|
||||
|
||||
# Converts the input GPU image (GpuBuffer) to the multi-backend image type
|
||||
# (Image).
|
||||
node: {
|
||||
calculator: "ToImageCalculator"
|
||||
input_stream: "IMAGE_GPU:image"
|
||||
output_stream: "IMAGE:multi_backend_image"
|
||||
}
|
||||
|
||||
# Transforms the input image into a 128x128 tensor while keeping the aspect
|
||||
# ratio (what is expected by the corresponding face detection model), resulting
|
||||
# in potential letterboxing in the transformed image.
|
||||
node: {
|
||||
calculator: "ImageToTensorCalculator"
|
||||
input_stream: "IMAGE:multi_backend_image"
|
||||
output_stream: "TENSORS:input_tensors"
|
||||
output_stream: "MATRIX:transform_matrix"
|
||||
options: {
|
||||
[mediapipe.ImageToTensorCalculatorOptions.ext] {
|
||||
output_tensor_width: 128
|
||||
output_tensor_height: 128
|
||||
keep_aspect_ratio: true
|
||||
output_tensor_float_range {
|
||||
min: -1.0
|
||||
max: 1.0
|
||||
}
|
||||
border_mode: BORDER_ZERO
|
||||
gpu_origin: TOP_LEFT
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Runs a TensorFlow Lite model on GPU that takes an image tensor and outputs a
|
||||
# vector of tensors representing, for instance, detection boxes/keypoints and
|
||||
# scores.
|
||||
node {
|
||||
calculator: "InferenceCalculator"
|
||||
input_stream: "TENSORS:input_tensors"
|
||||
output_stream: "TENSORS:detection_tensors"
|
||||
options: {
|
||||
[mediapipe.InferenceCalculatorOptions.ext] {
|
||||
model_path: "mediapipe/modules/face_detection/face_detection_short_range.tflite"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Performs tensor post processing to generate face detections.
|
||||
node {
|
||||
calculator: "FaceDetectionShortRangeCommon"
|
||||
input_stream: "TENSORS:detection_tensors"
|
||||
input_stream: "MATRIX:transform_matrix"
|
||||
output_stream: "DETECTIONS:detections"
|
||||
}
|
|
@ -0,0 +1,94 @@
|
|||
# MediaPipe graph to detect faces. (GPU/CPU input, and inference is executed on
|
||||
# GPU.)
|
||||
#
|
||||
# It is required that "face_detection_short_range.tflite" is available at
|
||||
# "mediapipe/modules/face_detection/face_detection_short_range.tflite"
|
||||
# path during execution.
|
||||
#
|
||||
# EXAMPLE:
|
||||
# node {
|
||||
# calculator: "FaceDetectionShortRangeCpu"
|
||||
# input_stream: "IMAGE:image"
|
||||
# output_stream: "DETECTIONS:face_detections"
|
||||
# }
|
||||
|
||||
type: "FaceDetectionShortRangeCpu"
|
||||
|
||||
# Image. (Image)
|
||||
input_stream: "IMAGE:image"
|
||||
|
||||
# The throttled input image. (Image)
|
||||
output_stream: "IMAGE:throttled_image"
|
||||
# Detected faces. (std::vector<Detection>)
|
||||
# NOTE: there will not be an output packet in the DETECTIONS stream for this
|
||||
# particular timestamp if none of faces detected. However, the MediaPipe
|
||||
# framework will internally inform the downstream calculators of the absence of
|
||||
# this packet so that they don't wait for it unnecessarily.
|
||||
output_stream: "DETECTIONS:detections"
|
||||
|
||||
node {
|
||||
calculator: "FlowLimiterCalculator"
|
||||
input_stream: "image"
|
||||
input_stream: "FINISHED:detections"
|
||||
input_stream_info: {
|
||||
tag_index: "FINISHED"
|
||||
back_edge: true
|
||||
}
|
||||
output_stream: "throttled_image"
|
||||
options: {
|
||||
[mediapipe.FlowLimiterCalculatorOptions.ext] {
|
||||
max_in_flight: 1
|
||||
max_in_queue: 1
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Transforms the input image into a 128x128 tensor while keeping the aspect
|
||||
# ratio (what is expected by the corresponding face detection model), resulting
|
||||
# in potential letterboxing in the transformed image.
|
||||
node: {
|
||||
calculator: "ImageToTensorCalculator"
|
||||
input_stream: "IMAGE:throttled_image"
|
||||
output_stream: "TENSORS:input_tensors"
|
||||
output_stream: "MATRIX:transform_matrix"
|
||||
options: {
|
||||
[mediapipe.ImageToTensorCalculatorOptions.ext] {
|
||||
output_tensor_width: 128
|
||||
output_tensor_height: 128
|
||||
keep_aspect_ratio: true
|
||||
output_tensor_float_range {
|
||||
min: -1.0
|
||||
max: 1.0
|
||||
}
|
||||
border_mode: BORDER_ZERO
|
||||
gpu_origin: CONVENTIONAL
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Runs a TensorFlow Lite model on GPU that takes an image tensor and outputs a
|
||||
# vector of tensors representing, for instance, detection boxes/keypoints and
|
||||
# scores.
|
||||
# TODO: Use GraphOptions to modify the delegate field to be
|
||||
# `delegate { xnnpack {} }` for the CPU only use cases.
|
||||
node {
|
||||
calculator: "InferenceCalculator"
|
||||
input_stream: "TENSORS:input_tensors"
|
||||
output_stream: "TENSORS:detection_tensors"
|
||||
options: {
|
||||
[mediapipe.InferenceCalculatorOptions.ext] {
|
||||
model_path: "mediapipe/modules/face_detection/face_detection_short_range.tflite"
|
||||
|
||||
#
|
||||
delegate: { gpu { use_advanced_gpu_api: true } }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Performs tensor post processing to generate face detections.
|
||||
node {
|
||||
calculator: "FaceDetectionShortRangeCommon"
|
||||
input_stream: "TENSORS:detection_tensors"
|
||||
input_stream: "MATRIX:transform_matrix"
|
||||
output_stream: "DETECTIONS:detections"
|
||||
}
|
137
mediapipe/modules/face_geometry/BUILD
Normal file
137
mediapipe/modules/face_geometry/BUILD
Normal file
|
@ -0,0 +1,137 @@
|
|||
# Copyright 2020 The MediaPipe Authors.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
load("//mediapipe/framework/port:build_config.bzl", "mediapipe_proto_library")
|
||||
load("//mediapipe/framework/tool:mediapipe_graph.bzl", "mediapipe_simple_subgraph")
|
||||
|
||||
licenses(["notice"])
|
||||
|
||||
package(default_visibility = ["//visibility:public"])
|
||||
|
||||
mediapipe_simple_subgraph(
|
||||
name = "face_geometry",
|
||||
graph = "face_geometry.pbtxt",
|
||||
register_as = "FaceGeometry",
|
||||
deps = [
|
||||
":geometry_pipeline_calculator",
|
||||
],
|
||||
)
|
||||
|
||||
mediapipe_simple_subgraph(
|
||||
name = "face_geometry_from_detection",
|
||||
graph = "face_geometry_from_detection.pbtxt",
|
||||
register_as = "FaceGeometryFromDetection",
|
||||
deps = [
|
||||
":geometry_pipeline_calculator",
|
||||
"//mediapipe/calculators/core:begin_loop_calculator",
|
||||
"//mediapipe/calculators/core:end_loop_calculator",
|
||||
"//mediapipe/calculators/util:detection_to_landmarks_calculator",
|
||||
],
|
||||
)
|
||||
|
||||
mediapipe_simple_subgraph(
|
||||
name = "face_geometry_from_landmarks",
|
||||
graph = "face_geometry_from_landmarks.pbtxt",
|
||||
register_as = "FaceGeometryFromLandmarks",
|
||||
deps = [
|
||||
":geometry_pipeline_calculator",
|
||||
],
|
||||
)
|
||||
|
||||
mediapipe_proto_library(
|
||||
name = "effect_renderer_calculator_proto",
|
||||
srcs = ["effect_renderer_calculator.proto"],
|
||||
deps = [
|
||||
"//mediapipe/framework:calculator_options_proto",
|
||||
],
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "effect_renderer_calculator",
|
||||
srcs = ["effect_renderer_calculator.cc"],
|
||||
deps = [
|
||||
":effect_renderer_calculator_cc_proto",
|
||||
"//mediapipe/framework:calculator_framework",
|
||||
"//mediapipe/framework/formats:image_frame",
|
||||
"//mediapipe/framework/formats:image_frame_opencv",
|
||||
"//mediapipe/framework/port:opencv_core",
|
||||
"//mediapipe/framework/port:opencv_imgcodecs",
|
||||
"//mediapipe/framework/port:opencv_imgproc",
|
||||
"//mediapipe/framework/port:ret_check",
|
||||
"//mediapipe/framework/port:status",
|
||||
"//mediapipe/framework/port:statusor",
|
||||
"//mediapipe/gpu:gl_calculator_helper",
|
||||
"//mediapipe/gpu:gpu_buffer",
|
||||
"//mediapipe/modules/face_geometry/libs:effect_renderer",
|
||||
"//mediapipe/modules/face_geometry/libs:validation_utils",
|
||||
"//mediapipe/modules/face_geometry/protos:environment_cc_proto",
|
||||
"//mediapipe/modules/face_geometry/protos:face_geometry_cc_proto",
|
||||
"//mediapipe/modules/face_geometry/protos:mesh_3d_cc_proto",
|
||||
"//mediapipe/util:resource_util",
|
||||
"@com_google_absl//absl/types:optional",
|
||||
],
|
||||
alwayslink = 1,
|
||||
)
|
||||
|
||||
mediapipe_proto_library(
|
||||
name = "env_generator_calculator_proto",
|
||||
srcs = ["env_generator_calculator.proto"],
|
||||
deps = [
|
||||
"//mediapipe/framework:calculator_options_proto",
|
||||
"//mediapipe/modules/face_geometry/protos:environment_proto",
|
||||
],
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "env_generator_calculator",
|
||||
srcs = ["env_generator_calculator.cc"],
|
||||
deps = [
|
||||
":env_generator_calculator_cc_proto",
|
||||
"//mediapipe/framework:calculator_framework",
|
||||
"//mediapipe/framework/port:status",
|
||||
"//mediapipe/modules/face_geometry/libs:validation_utils",
|
||||
"//mediapipe/modules/face_geometry/protos:environment_cc_proto",
|
||||
],
|
||||
alwayslink = 1,
|
||||
)
|
||||
|
||||
mediapipe_proto_library(
|
||||
name = "geometry_pipeline_calculator_proto",
|
||||
srcs = ["geometry_pipeline_calculator.proto"],
|
||||
deps = [
|
||||
"//mediapipe/framework:calculator_options_proto",
|
||||
],
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "geometry_pipeline_calculator",
|
||||
srcs = ["geometry_pipeline_calculator.cc"],
|
||||
deps = [
|
||||
":geometry_pipeline_calculator_cc_proto",
|
||||
"//mediapipe/framework:calculator_framework",
|
||||
"//mediapipe/framework/formats:landmark_cc_proto",
|
||||
"//mediapipe/framework/port:logging",
|
||||
"//mediapipe/framework/port:ret_check",
|
||||
"//mediapipe/framework/port:status",
|
||||
"//mediapipe/framework/port:statusor",
|
||||
"//mediapipe/modules/face_geometry/libs:geometry_pipeline",
|
||||
"//mediapipe/modules/face_geometry/libs:validation_utils",
|
||||
"//mediapipe/modules/face_geometry/protos:environment_cc_proto",
|
||||
"//mediapipe/modules/face_geometry/protos:face_geometry_cc_proto",
|
||||
"//mediapipe/modules/face_geometry/protos:geometry_pipeline_metadata_cc_proto",
|
||||
"//mediapipe/util:resource_util",
|
||||
"@com_google_absl//absl/memory",
|
||||
],
|
||||
alwayslink = 1,
|
||||
)
|
20
mediapipe/modules/face_geometry/README.md
Normal file
20
mediapipe/modules/face_geometry/README.md
Normal file
|
@ -0,0 +1,20 @@
|
|||
# face_geometry
|
||||
|
||||
Protos|Details
|
||||
:--- | :---
|
||||
[`face_geometry.Environment`](https://github.com/google/mediapipe/tree/master/mediapipe/modules/face_geometry/protos/environment.proto)| Describes an environment; includes the camera frame origin point location as well as virtual camera parameters.
|
||||
[`face_geometry.GeometryPipelineMetadata`](https://github.com/google/mediapipe/tree/master/mediapipe/modules/face_geometry/protos/geometry_pipeline_metadata.proto)| Describes metadata needed to estimate face geometry based on the face landmark module result.
|
||||
[`face_geometry.FaceGeometry`](https://github.com/google/mediapipe/tree/master/mediapipe/modules/face_geometry/protos/face_geometry.proto)| Describes geometry data for a single face; includes a face mesh surface and a face pose in a given environment.
|
||||
[`face_geometry.Mesh3d`](https://github.com/google/mediapipe/tree/master/mediapipe/modules/face_geometry/protos/mesh_3d.proto)| Describes a 3D mesh surface.
|
||||
|
||||
Calculators|Details
|
||||
:--- | :---
|
||||
[`FaceGeometryEnvGeneratorCalculator`](https://github.com/google/mediapipe/tree/master/mediapipe/modules/face_geometry/env_generator_calculator.cc)| Generates an environment that describes a virtual scene.
|
||||
[`FaceGeometryPipelineCalculator`](https://github.com/google/mediapipe/tree/master/mediapipe/modules/face_geometry/geometry_pipeline_calculator.cc)| Extracts face geometry for multiple faces from a vector of landmark lists.
|
||||
[`FaceGeometryEffectRendererCalculator`](https://github.com/google/mediapipe/tree/master/mediapipe/modules/face_geometry/effect_renderer_calculator.cc)| Renders a face effect.
|
||||
|
||||
Subgraphs|Details
|
||||
:--- | :---
|
||||
[`FaceGeometryFromDetection`](https://github.com/google/mediapipe/tree/master/mediapipe/modules/face_geometry/face_geometry_from_detection.pbtxt)| Extracts geometry from face detection for multiple faces.
|
||||
[`FaceGeometryFromLandmarks`](https://github.com/google/mediapipe/tree/master/mediapipe/modules/face_geometry/face_geometry_from_landmarks.pbtxt)| Extracts geometry from face landmarks for multiple faces.
|
||||
[`FaceGeometry`](https://github.com/google/mediapipe/tree/master/mediapipe/modules/face_geometry/face_geometry.pbtxt)| Extracts geometry from face landmarks for multiple faces. Deprecated, please use `FaceGeometryFromLandmarks` in the new code.
|
59
mediapipe/modules/face_geometry/data/BUILD
Normal file
59
mediapipe/modules/face_geometry/data/BUILD
Normal file
|
@ -0,0 +1,59 @@
|
|||
# Copyright 2020 The MediaPipe Authors.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
load("//mediapipe/framework:encode_binary_proto.bzl", "encode_binary_proto")
|
||||
|
||||
licenses(["notice"])
|
||||
|
||||
package(default_visibility = ["//visibility:public"])
|
||||
|
||||
encode_binary_proto(
|
||||
name = "geometry_pipeline_metadata_detection",
|
||||
input = "geometry_pipeline_metadata_detection.pbtxt",
|
||||
message_type = "mediapipe.face_geometry.GeometryPipelineMetadata",
|
||||
output = "geometry_pipeline_metadata_detection.binarypb",
|
||||
deps = [
|
||||
"//mediapipe/modules/face_geometry/protos:geometry_pipeline_metadata_proto",
|
||||
],
|
||||
)
|
||||
|
||||
encode_binary_proto(
|
||||
name = "geometry_pipeline_metadata_landmarks",
|
||||
input = "geometry_pipeline_metadata_landmarks.pbtxt",
|
||||
message_type = "mediapipe.face_geometry.GeometryPipelineMetadata",
|
||||
output = "geometry_pipeline_metadata_landmarks.binarypb",
|
||||
deps = [
|
||||
"//mediapipe/modules/face_geometry/protos:geometry_pipeline_metadata_proto",
|
||||
],
|
||||
)
|
||||
|
||||
# For backward-compatibility reasons, generate `geometry_pipeline_metadata.binarypb` from
|
||||
# the `geometry_pipeline_metadata_landmarks.pbtxt` definition.
|
||||
encode_binary_proto(
|
||||
name = "geometry_pipeline_metadata",
|
||||
input = "geometry_pipeline_metadata_landmarks.pbtxt",
|
||||
message_type = "mediapipe.face_geometry.GeometryPipelineMetadata",
|
||||
output = "geometry_pipeline_metadata.binarypb",
|
||||
deps = [
|
||||
"//mediapipe/modules/face_geometry/protos:geometry_pipeline_metadata_proto",
|
||||
],
|
||||
)
|
||||
|
||||
# These canonical face model files are not meant to be used in runtime, but rather for asset
|
||||
# creation and/or reference.
|
||||
exports_files([
|
||||
"canonical_face_model.fbx",
|
||||
"canonical_face_model.obj",
|
||||
"canonical_face_model_uv_visualization.png",
|
||||
])
|
BIN
mediapipe/modules/face_geometry/data/canonical_face_model.fbx
Normal file
BIN
mediapipe/modules/face_geometry/data/canonical_face_model.fbx
Normal file
Binary file not shown.
1834
mediapipe/modules/face_geometry/data/canonical_face_model.obj
Normal file
1834
mediapipe/modules/face_geometry/data/canonical_face_model.obj
Normal file
File diff suppressed because it is too large
Load Diff
Binary file not shown.
After Width: | Height: | Size: 731 KiB |
|
@ -0,0 +1,78 @@
|
|||
# Copyright 2020 The MediaPipe Authors.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
input_source: FACE_DETECTION_PIPELINE
|
||||
procrustes_landmark_basis { landmark_id: 0 weight: 1.0 }
|
||||
procrustes_landmark_basis { landmark_id: 1 weight: 1.0 }
|
||||
procrustes_landmark_basis { landmark_id: 2 weight: 1.0 }
|
||||
procrustes_landmark_basis { landmark_id: 3 weight: 1.0 }
|
||||
procrustes_landmark_basis { landmark_id: 4 weight: 1.0 }
|
||||
procrustes_landmark_basis { landmark_id: 5 weight: 1.0 }
|
||||
# NOTE: the triangular topology of the face meshes is only useful when derived
|
||||
# from the 468 face landmarks, not from the 6 face detection landmarks
|
||||
# (keypoints). The former don't cover the entire face and this mesh is
|
||||
# defined here only to comply with the API. It should be considered as
|
||||
# a placeholder and/or for debugging purposes.
|
||||
#
|
||||
# Use the face geometry derived from the face detection landmarks
|
||||
# (keypoints) for the face pose transformation matrix, not the mesh.
|
||||
canonical_mesh: {
|
||||
vertex_type: VERTEX_PT
|
||||
primitive_type: TRIANGLE
|
||||
vertex_buffer: -3.1511454582214355
|
||||
vertex_buffer: 2.6246179342269897
|
||||
vertex_buffer: 3.4656630754470825
|
||||
vertex_buffer: 0.349575996398926
|
||||
vertex_buffer: 0.38137748837470997
|
||||
vertex_buffer: 3.1511454582214355
|
||||
vertex_buffer: 2.6246179342269897
|
||||
vertex_buffer: 3.4656630754470825
|
||||
vertex_buffer: 0.650443494319916
|
||||
vertex_buffer: 0.38137999176979054
|
||||
vertex_buffer: 0.0
|
||||
vertex_buffer: -1.126865029335022
|
||||
vertex_buffer: 7.475604057312012
|
||||
vertex_buffer: 0.500025987625122
|
||||
vertex_buffer: 0.547487020492554
|
||||
vertex_buffer: 0.0
|
||||
vertex_buffer: -4.304508209228516
|
||||
vertex_buffer: 4.162498950958252
|
||||
vertex_buffer: 0.499989986419678
|
||||
vertex_buffer: 0.694203019142151
|
||||
vertex_buffer: -7.664182186126709
|
||||
vertex_buffer: 0.673132002353668
|
||||
vertex_buffer: -2.435867071151733
|
||||
vertex_buffer: 0.007561000064015
|
||||
vertex_buffer: 0.480777025222778
|
||||
vertex_buffer: 7.664182186126709
|
||||
vertex_buffer: 0.673132002353668
|
||||
vertex_buffer: -2.435867071151733
|
||||
vertex_buffer: 0.992439985275269
|
||||
vertex_buffer: 0.480777025222778
|
||||
index_buffer: 0
|
||||
index_buffer: 1
|
||||
index_buffer: 2
|
||||
index_buffer: 1
|
||||
index_buffer: 5
|
||||
index_buffer: 2
|
||||
index_buffer: 4
|
||||
index_buffer: 0
|
||||
index_buffer: 2
|
||||
index_buffer: 4
|
||||
index_buffer: 2
|
||||
index_buffer: 3
|
||||
index_buffer: 2
|
||||
index_buffer: 5
|
||||
index_buffer: 3
|
||||
}
|
File diff suppressed because it is too large
Load Diff
284
mediapipe/modules/face_geometry/effect_renderer_calculator.cc
Normal file
284
mediapipe/modules/face_geometry/effect_renderer_calculator.cc
Normal file
|
@ -0,0 +1,284 @@
|
|||
// Copyright 2020 The MediaPipe Authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "absl/types/optional.h"
|
||||
#include "mediapipe/framework/calculator_framework.h"
|
||||
#include "mediapipe/framework/formats/image_frame.h"
|
||||
#include "mediapipe/framework/formats/image_frame_opencv.h"
|
||||
#include "mediapipe/framework/port/opencv_core_inc.h" // NOTYPO
|
||||
#include "mediapipe/framework/port/opencv_imgcodecs_inc.h" // NOTYPO
|
||||
#include "mediapipe/framework/port/opencv_imgproc_inc.h" // NOTYPO
|
||||
#include "mediapipe/framework/port/ret_check.h"
|
||||
#include "mediapipe/framework/port/status.h"
|
||||
#include "mediapipe/framework/port/status_macros.h"
|
||||
#include "mediapipe/framework/port/statusor.h"
|
||||
#include "mediapipe/gpu/gl_calculator_helper.h"
|
||||
#include "mediapipe/gpu/gpu_buffer.h"
|
||||
#include "mediapipe/modules/face_geometry/effect_renderer_calculator.pb.h"
|
||||
#include "mediapipe/modules/face_geometry/libs/effect_renderer.h"
|
||||
#include "mediapipe/modules/face_geometry/libs/validation_utils.h"
|
||||
#include "mediapipe/modules/face_geometry/protos/environment.pb.h"
|
||||
#include "mediapipe/modules/face_geometry/protos/face_geometry.pb.h"
|
||||
#include "mediapipe/modules/face_geometry/protos/mesh_3d.pb.h"
|
||||
#include "mediapipe/util/resource_util.h"
|
||||
|
||||
namespace mediapipe {
|
||||
namespace {
|
||||
|
||||
static constexpr char kEnvironmentTag[] = "ENVIRONMENT";
|
||||
static constexpr char kImageGpuTag[] = "IMAGE_GPU";
|
||||
static constexpr char kMultiFaceGeometryTag[] = "MULTI_FACE_GEOMETRY";
|
||||
|
||||
// A calculator that renders a visual effect for multiple faces.
|
||||
//
|
||||
// Inputs:
|
||||
// IMAGE_GPU (`GpuBuffer`, required):
|
||||
// A buffer containing input image.
|
||||
//
|
||||
// MULTI_FACE_GEOMETRY (`std::vector<face_geometry::FaceGeometry>`, optional):
|
||||
// A vector of face geometry data.
|
||||
//
|
||||
// If absent, the input GPU buffer is copied over into the output GPU buffer
|
||||
// without any effect being rendered.
|
||||
//
|
||||
// Input side packets:
|
||||
// ENVIRONMENT (`face_geometry::Environment`, required)
|
||||
// Describes an environment; includes the camera frame origin point location
|
||||
// as well as virtual camera parameters.
|
||||
//
|
||||
// Output:
|
||||
// IMAGE_GPU (`GpuBuffer`, required):
|
||||
// A buffer with a visual effect being rendered for multiple faces.
|
||||
//
|
||||
// Options:
|
||||
// effect_texture_path (`string`, required):
|
||||
// Defines a path for the visual effect texture file. The effect texture is
|
||||
// later rendered on top of the effect mesh.
|
||||
//
|
||||
// The texture file format must be supported by the OpenCV image decoder. It
|
||||
// must also define either an RGB or an RGBA texture.
|
||||
//
|
||||
// effect_mesh_3d_path (`string`, optional):
|
||||
// Defines a path for the visual effect mesh 3D file. The effect mesh is
|
||||
// later "attached" to the face and is driven by the face pose
|
||||
// transformation matrix.
|
||||
//
|
||||
// The mesh 3D file format must be the binary `face_geometry.Mesh3d` proto.
|
||||
//
|
||||
// If is not present, the runtime face mesh will be used as the effect mesh
|
||||
// - this mode is handy for facepaint effects.
|
||||
//
|
||||
class EffectRendererCalculator : public CalculatorBase {
|
||||
public:
|
||||
static absl::Status GetContract(CalculatorContract* cc) {
|
||||
MP_RETURN_IF_ERROR(mediapipe::GlCalculatorHelper::UpdateContract(cc))
|
||||
<< "Failed to update contract for the GPU helper!";
|
||||
|
||||
cc->InputSidePackets()
|
||||
.Tag(kEnvironmentTag)
|
||||
.Set<face_geometry::Environment>();
|
||||
cc->Inputs().Tag(kImageGpuTag).Set<GpuBuffer>();
|
||||
cc->Inputs()
|
||||
.Tag(kMultiFaceGeometryTag)
|
||||
.Set<std::vector<face_geometry::FaceGeometry>>();
|
||||
cc->Outputs().Tag(kImageGpuTag).Set<GpuBuffer>();
|
||||
|
||||
return mediapipe::GlCalculatorHelper::UpdateContract(cc);
|
||||
}
|
||||
|
||||
absl::Status Open(CalculatorContext* cc) override {
|
||||
cc->SetOffset(mediapipe::TimestampDiff(0));
|
||||
|
||||
MP_RETURN_IF_ERROR(gpu_helper_.Open(cc))
|
||||
<< "Failed to open the GPU helper!";
|
||||
return gpu_helper_.RunInGlContext([&]() -> absl::Status {
|
||||
const auto& options =
|
||||
cc->Options<FaceGeometryEffectRendererCalculatorOptions>();
|
||||
|
||||
const auto& environment = cc->InputSidePackets()
|
||||
.Tag(kEnvironmentTag)
|
||||
.Get<face_geometry::Environment>();
|
||||
|
||||
MP_RETURN_IF_ERROR(face_geometry::ValidateEnvironment(environment))
|
||||
<< "Invalid environment!";
|
||||
|
||||
absl::optional<face_geometry::Mesh3d> effect_mesh_3d;
|
||||
if (options.has_effect_mesh_3d_path()) {
|
||||
ASSIGN_OR_RETURN(effect_mesh_3d,
|
||||
ReadMesh3dFromFile(options.effect_mesh_3d_path()),
|
||||
_ << "Failed to read the effect 3D mesh from file!");
|
||||
|
||||
MP_RETURN_IF_ERROR(face_geometry::ValidateMesh3d(*effect_mesh_3d))
|
||||
<< "Invalid effect 3D mesh!";
|
||||
}
|
||||
|
||||
ASSIGN_OR_RETURN(ImageFrame effect_texture,
|
||||
ReadTextureFromFile(options.effect_texture_path()),
|
||||
_ << "Failed to read the effect texture from file!");
|
||||
|
||||
ASSIGN_OR_RETURN(effect_renderer_,
|
||||
CreateEffectRenderer(environment, effect_mesh_3d,
|
||||
std::move(effect_texture)),
|
||||
_ << "Failed to create the effect renderer!");
|
||||
|
||||
return absl::OkStatus();
|
||||
});
|
||||
}
|
||||
|
||||
absl::Status Process(CalculatorContext* cc) override {
|
||||
// The `IMAGE_GPU` stream is required to have a non-empty packet. In case
|
||||
// this requirement is not met, there's nothing to be processed at the
|
||||
// current timestamp.
|
||||
if (cc->Inputs().Tag(kImageGpuTag).IsEmpty()) {
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
return gpu_helper_.RunInGlContext([this, cc]() -> absl::Status {
|
||||
const auto& input_gpu_buffer =
|
||||
cc->Inputs().Tag(kImageGpuTag).Get<GpuBuffer>();
|
||||
|
||||
GlTexture input_gl_texture =
|
||||
gpu_helper_.CreateSourceTexture(input_gpu_buffer);
|
||||
|
||||
GlTexture output_gl_texture = gpu_helper_.CreateDestinationTexture(
|
||||
input_gl_texture.width(), input_gl_texture.height());
|
||||
|
||||
std::vector<face_geometry::FaceGeometry> empty_multi_face_geometry;
|
||||
const auto& multi_face_geometry =
|
||||
cc->Inputs().Tag(kMultiFaceGeometryTag).IsEmpty()
|
||||
? empty_multi_face_geometry
|
||||
: cc->Inputs()
|
||||
.Tag(kMultiFaceGeometryTag)
|
||||
.Get<std::vector<face_geometry::FaceGeometry>>();
|
||||
|
||||
// Validate input multi face geometry data.
|
||||
for (const face_geometry::FaceGeometry& face_geometry :
|
||||
multi_face_geometry) {
|
||||
MP_RETURN_IF_ERROR(face_geometry::ValidateFaceGeometry(face_geometry))
|
||||
<< "Invalid face geometry!";
|
||||
}
|
||||
|
||||
MP_RETURN_IF_ERROR(effect_renderer_->RenderEffect(
|
||||
multi_face_geometry, input_gl_texture.width(),
|
||||
input_gl_texture.height(), input_gl_texture.target(),
|
||||
input_gl_texture.name(), output_gl_texture.target(),
|
||||
output_gl_texture.name()))
|
||||
<< "Failed to render the effect!";
|
||||
|
||||
std::unique_ptr<GpuBuffer> output_gpu_buffer =
|
||||
output_gl_texture.GetFrame<GpuBuffer>();
|
||||
|
||||
cc->Outputs()
|
||||
.Tag(kImageGpuTag)
|
||||
.AddPacket(mediapipe::Adopt<GpuBuffer>(output_gpu_buffer.release())
|
||||
.At(cc->InputTimestamp()));
|
||||
|
||||
output_gl_texture.Release();
|
||||
input_gl_texture.Release();
|
||||
|
||||
return absl::OkStatus();
|
||||
});
|
||||
}
|
||||
|
||||
~EffectRendererCalculator() {
|
||||
gpu_helper_.RunInGlContext([this]() { effect_renderer_.reset(); });
|
||||
}
|
||||
|
||||
private:
|
||||
static absl::StatusOr<ImageFrame> ReadTextureFromFile(
|
||||
const std::string& texture_path) {
|
||||
ASSIGN_OR_RETURN(std::string texture_blob,
|
||||
ReadContentBlobFromFile(texture_path),
|
||||
_ << "Failed to read texture blob from file!");
|
||||
|
||||
// Use OpenCV image decoding functionality to finish reading the texture.
|
||||
std::vector<char> texture_blob_vector(texture_blob.begin(),
|
||||
texture_blob.end());
|
||||
cv::Mat decoded_mat =
|
||||
cv::imdecode(texture_blob_vector, cv::IMREAD_UNCHANGED);
|
||||
|
||||
RET_CHECK(decoded_mat.type() == CV_8UC3 || decoded_mat.type() == CV_8UC4)
|
||||
<< "Texture must have `char` as the underlying type and "
|
||||
"must have either 3 or 4 channels!";
|
||||
|
||||
ImageFormat::Format image_format = ImageFormat::UNKNOWN;
|
||||
cv::Mat output_mat;
|
||||
switch (decoded_mat.channels()) {
|
||||
case 3:
|
||||
image_format = ImageFormat::SRGB;
|
||||
cv::cvtColor(decoded_mat, output_mat, cv::COLOR_BGR2RGB);
|
||||
break;
|
||||
|
||||
case 4:
|
||||
image_format = ImageFormat::SRGBA;
|
||||
cv::cvtColor(decoded_mat, output_mat, cv::COLOR_BGRA2RGBA);
|
||||
break;
|
||||
|
||||
default:
|
||||
RET_CHECK_FAIL()
|
||||
<< "Unexpected number of channels; expected 3 or 4, got "
|
||||
<< decoded_mat.channels() << "!";
|
||||
}
|
||||
|
||||
ImageFrame output_image_frame(image_format, output_mat.size().width,
|
||||
output_mat.size().height,
|
||||
ImageFrame::kGlDefaultAlignmentBoundary);
|
||||
|
||||
output_mat.copyTo(formats::MatView(&output_image_frame));
|
||||
|
||||
return output_image_frame;
|
||||
}
|
||||
|
||||
static absl::StatusOr<face_geometry::Mesh3d> ReadMesh3dFromFile(
|
||||
const std::string& mesh_3d_path) {
|
||||
ASSIGN_OR_RETURN(std::string mesh_3d_blob,
|
||||
ReadContentBlobFromFile(mesh_3d_path),
|
||||
_ << "Failed to read mesh 3D blob from file!");
|
||||
|
||||
face_geometry::Mesh3d mesh_3d;
|
||||
RET_CHECK(mesh_3d.ParseFromString(mesh_3d_blob))
|
||||
<< "Failed to parse a mesh 3D proto from a binary blob!";
|
||||
|
||||
return mesh_3d;
|
||||
}
|
||||
|
||||
static absl::StatusOr<std::string> ReadContentBlobFromFile(
|
||||
const std::string& unresolved_path) {
|
||||
ASSIGN_OR_RETURN(std::string resolved_path,
|
||||
mediapipe::PathToResourceAsFile(unresolved_path),
|
||||
_ << "Failed to resolve path! Path = " << unresolved_path);
|
||||
|
||||
std::string content_blob;
|
||||
MP_RETURN_IF_ERROR(
|
||||
mediapipe::GetResourceContents(resolved_path, &content_blob))
|
||||
<< "Failed to read content blob! Resolved path = " << resolved_path;
|
||||
|
||||
return content_blob;
|
||||
}
|
||||
|
||||
mediapipe::GlCalculatorHelper gpu_helper_;
|
||||
std::unique_ptr<face_geometry::EffectRenderer> effect_renderer_;
|
||||
};
|
||||
|
||||
} // namespace
|
||||
|
||||
using FaceGeometryEffectRendererCalculator = EffectRendererCalculator;
|
||||
|
||||
REGISTER_CALCULATOR(FaceGeometryEffectRendererCalculator);
|
||||
|
||||
} // namespace mediapipe
|
|
@ -0,0 +1,46 @@
|
|||
// Copyright 2020 The MediaPipe Authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
syntax = "proto2";
|
||||
|
||||
package mediapipe;
|
||||
|
||||
import "mediapipe/framework/calculator_options.proto";
|
||||
|
||||
message FaceGeometryEffectRendererCalculatorOptions {
|
||||
extend CalculatorOptions {
|
||||
optional FaceGeometryEffectRendererCalculatorOptions ext = 323693808;
|
||||
}
|
||||
|
||||
// Defines a path for the visual effect texture file. The effect texture is
|
||||
// later rendered on top of the effect mesh.
|
||||
//
|
||||
// Please be aware about the difference between the CPU texture memory layout
|
||||
// and the GPU texture sampler coordinate space. This renderer follows
|
||||
// conventions discussed here: https://open.gl/textures
|
||||
//
|
||||
// The texture file format must be supported by the OpenCV image decoder. It
|
||||
// must also define either an RGB or an RGBA texture.
|
||||
optional string effect_texture_path = 1;
|
||||
|
||||
// Defines a path for the visual effect mesh 3D file. The effect mesh is later
|
||||
// "attached" to the face and is driven by the face pose transformation
|
||||
// matrix.
|
||||
//
|
||||
// The mesh 3D file format must be the binary `face_system.Mesh3d` proto.
|
||||
//
|
||||
// If is not present, the runtime face mesh will be used as the effect mesh
|
||||
// - this mode is handy for facepaint effects.
|
||||
optional string effect_mesh_3d_path = 2;
|
||||
}
|
81
mediapipe/modules/face_geometry/env_generator_calculator.cc
Normal file
81
mediapipe/modules/face_geometry/env_generator_calculator.cc
Normal file
|
@ -0,0 +1,81 @@
|
|||
// Copyright 2020 The MediaPipe Authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "mediapipe/framework/calculator_framework.h"
|
||||
#include "mediapipe/framework/port/status.h"
|
||||
#include "mediapipe/framework/port/status_macros.h"
|
||||
#include "mediapipe/modules/face_geometry/env_generator_calculator.pb.h"
|
||||
#include "mediapipe/modules/face_geometry/libs/validation_utils.h"
|
||||
#include "mediapipe/modules/face_geometry/protos/environment.pb.h"
|
||||
|
||||
namespace mediapipe {
|
||||
namespace {
|
||||
|
||||
static constexpr char kEnvironmentTag[] = "ENVIRONMENT";
|
||||
|
||||
// A calculator that generates an environment, which describes a virtual scene.
|
||||
//
|
||||
// Output side packets:
|
||||
// ENVIRONMENT (`face_geometry::Environment`, required)
|
||||
// Describes an environment; includes the camera frame origin point location
|
||||
// as well as virtual camera parameters.
|
||||
//
|
||||
// Options:
|
||||
// environment (`face_geometry.Environment`, required):
|
||||
// Defines an environment to be packed as the output side packet.
|
||||
//
|
||||
// Must be valid (for details, please refer to the proto message definition
|
||||
// comments and/or `modules/face_geometry/libs/validation_utils.h/cc`)
|
||||
//
|
||||
class EnvGeneratorCalculator : public CalculatorBase {
|
||||
public:
|
||||
static absl::Status GetContract(CalculatorContract* cc) {
|
||||
cc->OutputSidePackets()
|
||||
.Tag(kEnvironmentTag)
|
||||
.Set<face_geometry::Environment>();
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
absl::Status Open(CalculatorContext* cc) override {
|
||||
cc->SetOffset(mediapipe::TimestampDiff(0));
|
||||
|
||||
const face_geometry::Environment& environment =
|
||||
cc->Options<FaceGeometryEnvGeneratorCalculatorOptions>().environment();
|
||||
|
||||
MP_RETURN_IF_ERROR(face_geometry::ValidateEnvironment(environment))
|
||||
<< "Invalid environment!";
|
||||
|
||||
cc->OutputSidePackets()
|
||||
.Tag(kEnvironmentTag)
|
||||
.Set(mediapipe::MakePacket<face_geometry::Environment>(environment));
|
||||
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
absl::Status Process(CalculatorContext* cc) override {
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
absl::Status Close(CalculatorContext* cc) override {
|
||||
return absl::OkStatus();
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace
|
||||
|
||||
using FaceGeometryEnvGeneratorCalculator = EnvGeneratorCalculator;
|
||||
|
||||
REGISTER_CALCULATOR(FaceGeometryEnvGeneratorCalculator);
|
||||
|
||||
} // namespace mediapipe
|
|
@ -0,0 +1,32 @@
|
|||
// Copyright 2020 The MediaPipe Authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
syntax = "proto2";
|
||||
|
||||
package mediapipe;
|
||||
|
||||
import "mediapipe/framework/calculator_options.proto";
|
||||
import "mediapipe/modules/face_geometry/protos/environment.proto";
|
||||
|
||||
message FaceGeometryEnvGeneratorCalculatorOptions {
|
||||
extend CalculatorOptions {
|
||||
optional FaceGeometryEnvGeneratorCalculatorOptions ext = 323693810;
|
||||
}
|
||||
|
||||
// Defines an environment to be packed as the output side packet.
|
||||
//
|
||||
// Must be valid (for details, please refer to the proto message definition
|
||||
// comments and/or `modules/face_geometry/libs/validation_utils.h/cc`)
|
||||
optional face_geometry.Environment environment = 1;
|
||||
}
|
48
mediapipe/modules/face_geometry/face_geometry.pbtxt
Normal file
48
mediapipe/modules/face_geometry/face_geometry.pbtxt
Normal file
|
@ -0,0 +1,48 @@
|
|||
# MediaPipe graph to extract geometry from face landmarks for multiple faces.
|
||||
#
|
||||
# It is required that "geometry_pipeline_metadata.binarypb" is available at
|
||||
# "mediapipe/modules/face_geometry/data/geometry_pipeline_metadata.binarypb"
|
||||
# path during execution.
|
||||
#
|
||||
# This is a deprecated subgraph kept for backward-compatibility reasons. Please,
|
||||
# be explicit and use the `FaceGeometryFromLandmarks` subgraph in the new code
|
||||
# to enable the same runtime behaviour.
|
||||
|
||||
type: "FaceGeometry"
|
||||
|
||||
# The size of the input frame. The first element of the pair is the frame width;
|
||||
# the other one is the frame height.
|
||||
#
|
||||
# The face landmarks should have been detected on a frame with the same
|
||||
# ratio. If used as-is, the resulting face geometry visualization should be
|
||||
# happening on a frame with the same ratio as well.
|
||||
#
|
||||
# (std::pair<int, int>)
|
||||
input_stream: "IMAGE_SIZE:image_size"
|
||||
|
||||
# Collection of detected/predicted faces, each represented as a list of face
|
||||
# landmarks. (std::vector<NormalizedLandmarkList>)
|
||||
input_stream: "MULTI_FACE_LANDMARKS:multi_face_landmarks"
|
||||
|
||||
# Environment that describes the current virtual scene.
|
||||
# (face_geometry::Environment)
|
||||
input_side_packet: "ENVIRONMENT:environment"
|
||||
|
||||
# A list of geometry data for each detected face.
|
||||
# (std::vector<face_geometry::FaceGeometry>)
|
||||
output_stream: "MULTI_FACE_GEOMETRY:multi_face_geometry"
|
||||
|
||||
# Extracts face geometry for multiple faces from a vector of face landmark
|
||||
# lists.
|
||||
node {
|
||||
calculator: "FaceGeometryPipelineCalculator"
|
||||
input_side_packet: "ENVIRONMENT:environment"
|
||||
input_stream: "IMAGE_SIZE:image_size"
|
||||
input_stream: "MULTI_FACE_LANDMARKS:multi_face_landmarks"
|
||||
output_stream: "MULTI_FACE_GEOMETRY:multi_face_geometry"
|
||||
options: {
|
||||
[mediapipe.FaceGeometryPipelineCalculatorOptions.ext] {
|
||||
metadata_path: "mediapipe/modules/face_geometry/data/geometry_pipeline_metadata.binarypb"
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,87 @@
|
|||
# MediaPipe graph to extract geometry from face detection for multiple faces.
|
||||
#
|
||||
# It is required that "geometry_pipeline_metadata_detection.binarypb" is
|
||||
# available at
|
||||
# "mediapipe/modules/face_geometry/data/geometry_pipeline_metadata_detection.binarypb"
|
||||
# path during execution.
|
||||
#
|
||||
# EXAMPLE:
|
||||
# node {
|
||||
# calculator: "FaceGeometryFromDetection"
|
||||
# input_stream: "IMAGE_SIZE:image_size"
|
||||
# input_stream: "MULTI_FACE_DETECTION:multi_face_detection"
|
||||
# input_side_packet: "ENVIRONMENT:environment"
|
||||
# output_stream: "MULTI_FACE_GEOMETRY:multi_face_geometry"
|
||||
# }
|
||||
|
||||
type: "FaceGeometryFromDetection"
|
||||
|
||||
# The size of the input frame. The first element of the pair is the frame width;
|
||||
# the other one is the frame height.
|
||||
#
|
||||
# The face landmarks should have been detected on a frame with the same
|
||||
# ratio. If used as-is, the resulting face geometry visualization should be
|
||||
# happening on a frame with the same ratio as well.
|
||||
#
|
||||
# (std::pair<int, int>)
|
||||
input_stream: "IMAGE_SIZE:image_size"
|
||||
|
||||
# Collection of detected/predicted faces, each represented as a detection.
|
||||
# (std::vector<DETECTION>)
|
||||
input_stream: "MULTI_FACE_DETECTION:multi_face_detection"
|
||||
|
||||
# Environment that describes the current virtual scene.
|
||||
# (face_geometry::Environment)
|
||||
input_side_packet: "ENVIRONMENT:environment"
|
||||
|
||||
# A list of geometry data for each detected face.
|
||||
# (std::vector<face_geometry::FaceGeometry>)
|
||||
#
|
||||
# NOTE: the triangular topology of the face meshes is only useful when derived
|
||||
# from the 468 face landmarks, not from the 6 face detection landmarks
|
||||
# (keypoints). The former don't cover the entire face and this mesh is
|
||||
# defined here only to comply with the API. It should be considered as
|
||||
# a placeholder and/or for debugging purposes.
|
||||
#
|
||||
# Use the face geometry derived from the face detection landmarks
|
||||
# (keypoints) for the face pose transformation matrix, not the mesh.
|
||||
output_stream: "MULTI_FACE_GEOMETRY:multi_face_geometry"
|
||||
|
||||
# Begin iterating over a vector of the face detections.
|
||||
node {
|
||||
calculator: "BeginLoopDetectionCalculator"
|
||||
input_stream: "ITERABLE:multi_face_detection"
|
||||
output_stream: "ITEM:face_detection"
|
||||
output_stream: "BATCH_END:detection_timestamp"
|
||||
}
|
||||
|
||||
# Extracts face detection keypoints as a normalized landmarks.
|
||||
node {
|
||||
calculator: "DetectionToLandmarksCalculator"
|
||||
input_stream: "DETECTION:face_detection"
|
||||
output_stream: "LANDMARKS:face_landmarks"
|
||||
}
|
||||
|
||||
# End iterating over a vector of the face detections and receive a vector of
|
||||
# face landmark lists as a result.
|
||||
node {
|
||||
calculator: "EndLoopNormalizedLandmarkListVectorCalculator"
|
||||
input_stream: "ITEM:face_landmarks"
|
||||
input_stream: "BATCH_END:detection_timestamp"
|
||||
output_stream: "ITERABLE:multi_face_landmarks"
|
||||
}
|
||||
|
||||
# Extracts face geometry for multiple faces from a vector of face detection
|
||||
# landmark lists.
|
||||
node {
|
||||
calculator: "FaceGeometryPipelineCalculator"
|
||||
input_side_packet: "ENVIRONMENT:environment"
|
||||
input_stream: "IMAGE_SIZE:image_size"
|
||||
input_stream: "MULTI_FACE_LANDMARKS:multi_face_landmarks"
|
||||
output_stream: "MULTI_FACE_GEOMETRY:multi_face_geometry"
|
||||
options: {
|
||||
[mediapipe.FaceGeometryPipelineCalculatorOptions.ext] {
|
||||
metadata_path: "mediapipe/modules/face_geometry/data/geometry_pipeline_metadata_detection.binarypb"
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,54 @@
|
|||
# MediaPipe graph to extract geometry from face landmarks for multiple faces.
|
||||
#
|
||||
# It is required that "geometry_pipeline_metadata_from_landmark.binarypb" is
|
||||
# available at
|
||||
# "mediapipe/modules/face_geometry/data/geometry_pipeline_metadata_from_landmarks.binarypb"
|
||||
# path during execution.
|
||||
#
|
||||
# EXAMPLE:
|
||||
# node {
|
||||
# calculator: "FaceGeometryFromLandmarks"
|
||||
# input_stream: "IMAGE_SIZE:image_size"
|
||||
# input_stream: "MULTI_FACE_LANDMARKS:multi_face_landmarks"
|
||||
# input_side_packet: "ENVIRONMENT:environment"
|
||||
# output_stream: "MULTI_FACE_GEOMETRY:multi_face_geometry"
|
||||
# }
|
||||
|
||||
type: "FaceGeometryFromLandmarks"
|
||||
|
||||
# The size of the input frame. The first element of the pair is the frame width;
|
||||
# the other one is the frame height.
|
||||
#
|
||||
# The face landmarks should have been detected on a frame with the same
|
||||
# ratio. If used as-is, the resulting face geometry visualization should be
|
||||
# happening on a frame with the same ratio as well.
|
||||
#
|
||||
# (std::pair<int, int>)
|
||||
input_stream: "IMAGE_SIZE:image_size"
|
||||
|
||||
# Collection of detected/predicted faces, each represented as a list of face
|
||||
# landmarks. (std::vector<NormalizedLandmarkList>)
|
||||
input_stream: "MULTI_FACE_LANDMARKS:multi_face_landmarks"
|
||||
|
||||
# Environment that describes the current virtual scene.
|
||||
# (face_geometry::Environment)
|
||||
input_side_packet: "ENVIRONMENT:environment"
|
||||
|
||||
# A list of geometry data for each detected face.
|
||||
# (std::vector<face_geometry::FaceGeometry>)
|
||||
output_stream: "MULTI_FACE_GEOMETRY:multi_face_geometry"
|
||||
|
||||
# Extracts face geometry for multiple faces from a vector of face landmark
|
||||
# lists.
|
||||
node {
|
||||
calculator: "FaceGeometryPipelineCalculator"
|
||||
input_side_packet: "ENVIRONMENT:environment"
|
||||
input_stream: "IMAGE_SIZE:image_size"
|
||||
input_stream: "MULTI_FACE_LANDMARKS:multi_face_landmarks"
|
||||
output_stream: "MULTI_FACE_GEOMETRY:multi_face_geometry"
|
||||
options: {
|
||||
[mediapipe.FaceGeometryPipelineCalculatorOptions.ext] {
|
||||
metadata_path: "mediapipe/modules/face_geometry/data/geometry_pipeline_metadata_landmarks.binarypb"
|
||||
}
|
||||
}
|
||||
}
|
197
mediapipe/modules/face_geometry/geometry_pipeline_calculator.cc
Normal file
197
mediapipe/modules/face_geometry/geometry_pipeline_calculator.cc
Normal file
|
@ -0,0 +1,197 @@
|
|||
// Copyright 2020 The MediaPipe Authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "absl/memory/memory.h"
|
||||
#include "mediapipe/framework/calculator_framework.h"
|
||||
#include "mediapipe/framework/formats/landmark.pb.h"
|
||||
#include "mediapipe/framework/port/ret_check.h"
|
||||
#include "mediapipe/framework/port/status.h"
|
||||
#include "mediapipe/framework/port/status_macros.h"
|
||||
#include "mediapipe/framework/port/statusor.h"
|
||||
#include "mediapipe/modules/face_geometry/geometry_pipeline_calculator.pb.h"
|
||||
#include "mediapipe/modules/face_geometry/libs/geometry_pipeline.h"
|
||||
#include "mediapipe/modules/face_geometry/libs/validation_utils.h"
|
||||
#include "mediapipe/modules/face_geometry/protos/environment.pb.h"
|
||||
#include "mediapipe/modules/face_geometry/protos/face_geometry.pb.h"
|
||||
#include "mediapipe/modules/face_geometry/protos/geometry_pipeline_metadata.pb.h"
|
||||
#include "mediapipe/util/resource_util.h"
|
||||
|
||||
namespace mediapipe {
|
||||
namespace {
|
||||
|
||||
static constexpr char kEnvironmentTag[] = "ENVIRONMENT";
|
||||
static constexpr char kImageSizeTag[] = "IMAGE_SIZE";
|
||||
static constexpr char kMultiFaceGeometryTag[] = "MULTI_FACE_GEOMETRY";
|
||||
static constexpr char kMultiFaceLandmarksTag[] = "MULTI_FACE_LANDMARKS";
|
||||
|
||||
// A calculator that renders a visual effect for multiple faces.
|
||||
//
|
||||
// Inputs:
|
||||
// IMAGE_SIZE (`std::pair<int, int>`, required):
|
||||
// The size of the current frame. The first element of the pair is the frame
|
||||
// width; the other one is the frame height.
|
||||
//
|
||||
// The face landmarks should have been detected on a frame with the same
|
||||
// ratio. If used as-is, the resulting face geometry visualization should be
|
||||
// happening on a frame with the same ratio as well.
|
||||
//
|
||||
// MULTI_FACE_LANDMARKS (`std::vector<NormalizedLandmarkList>`, required):
|
||||
// A vector of face landmark lists.
|
||||
//
|
||||
// Input side packets:
|
||||
// ENVIRONMENT (`face_geometry::Environment`, required)
|
||||
// Describes an environment; includes the camera frame origin point location
|
||||
// as well as virtual camera parameters.
|
||||
//
|
||||
// Output:
|
||||
// MULTI_FACE_GEOMETRY (`std::vector<face_geometry::FaceGeometry>`, required):
|
||||
// A vector of face geometry data.
|
||||
//
|
||||
// Options:
|
||||
// metadata_path (`string`, optional):
|
||||
// Defines a path for the geometry pipeline metadata file.
|
||||
//
|
||||
// The geometry pipeline metadata file format must be the binary
|
||||
// `face_geometry.GeometryPipelineMetadata` proto.
|
||||
//
|
||||
class GeometryPipelineCalculator : public CalculatorBase {
|
||||
public:
|
||||
static absl::Status GetContract(CalculatorContract* cc) {
|
||||
cc->InputSidePackets()
|
||||
.Tag(kEnvironmentTag)
|
||||
.Set<face_geometry::Environment>();
|
||||
cc->Inputs().Tag(kImageSizeTag).Set<std::pair<int, int>>();
|
||||
cc->Inputs()
|
||||
.Tag(kMultiFaceLandmarksTag)
|
||||
.Set<std::vector<NormalizedLandmarkList>>();
|
||||
cc->Outputs()
|
||||
.Tag(kMultiFaceGeometryTag)
|
||||
.Set<std::vector<face_geometry::FaceGeometry>>();
|
||||
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
absl::Status Open(CalculatorContext* cc) override {
|
||||
cc->SetOffset(mediapipe::TimestampDiff(0));
|
||||
|
||||
const auto& options = cc->Options<FaceGeometryPipelineCalculatorOptions>();
|
||||
|
||||
ASSIGN_OR_RETURN(
|
||||
face_geometry::GeometryPipelineMetadata metadata,
|
||||
ReadMetadataFromFile(options.metadata_path()),
|
||||
_ << "Failed to read the geometry pipeline metadata from file!");
|
||||
|
||||
MP_RETURN_IF_ERROR(
|
||||
face_geometry::ValidateGeometryPipelineMetadata(metadata))
|
||||
<< "Invalid geometry pipeline metadata!";
|
||||
|
||||
const face_geometry::Environment& environment =
|
||||
cc->InputSidePackets()
|
||||
.Tag(kEnvironmentTag)
|
||||
.Get<face_geometry::Environment>();
|
||||
|
||||
MP_RETURN_IF_ERROR(face_geometry::ValidateEnvironment(environment))
|
||||
<< "Invalid environment!";
|
||||
|
||||
ASSIGN_OR_RETURN(
|
||||
geometry_pipeline_,
|
||||
face_geometry::CreateGeometryPipeline(environment, metadata),
|
||||
_ << "Failed to create a geometry pipeline!");
|
||||
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
absl::Status Process(CalculatorContext* cc) override {
|
||||
// Both the `IMAGE_SIZE` and the `MULTI_FACE_LANDMARKS` streams are required
|
||||
// to have a non-empty packet. In case this requirement is not met, there's
|
||||
// nothing to be processed at the current timestamp.
|
||||
if (cc->Inputs().Tag(kImageSizeTag).IsEmpty() ||
|
||||
cc->Inputs().Tag(kMultiFaceLandmarksTag).IsEmpty()) {
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
const auto& image_size =
|
||||
cc->Inputs().Tag(kImageSizeTag).Get<std::pair<int, int>>();
|
||||
const auto& multi_face_landmarks =
|
||||
cc->Inputs()
|
||||
.Tag(kMultiFaceLandmarksTag)
|
||||
.Get<std::vector<NormalizedLandmarkList>>();
|
||||
|
||||
auto multi_face_geometry =
|
||||
absl::make_unique<std::vector<face_geometry::FaceGeometry>>();
|
||||
|
||||
ASSIGN_OR_RETURN(
|
||||
*multi_face_geometry,
|
||||
geometry_pipeline_->EstimateFaceGeometry(
|
||||
multi_face_landmarks, //
|
||||
/*frame_width*/ image_size.first,
|
||||
/*frame_height*/ image_size.second),
|
||||
_ << "Failed to estimate face geometry for multiple faces!");
|
||||
|
||||
cc->Outputs()
|
||||
.Tag(kMultiFaceGeometryTag)
|
||||
.AddPacket(mediapipe::Adopt<std::vector<face_geometry::FaceGeometry>>(
|
||||
multi_face_geometry.release())
|
||||
.At(cc->InputTimestamp()));
|
||||
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
absl::Status Close(CalculatorContext* cc) override {
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
private:
|
||||
static absl::StatusOr<face_geometry::GeometryPipelineMetadata>
|
||||
ReadMetadataFromFile(const std::string& metadata_path) {
|
||||
ASSIGN_OR_RETURN(std::string metadata_blob,
|
||||
ReadContentBlobFromFile(metadata_path),
|
||||
_ << "Failed to read a metadata blob from file!");
|
||||
|
||||
face_geometry::GeometryPipelineMetadata metadata;
|
||||
RET_CHECK(metadata.ParseFromString(metadata_blob))
|
||||
<< "Failed to parse a metadata proto from a binary blob!";
|
||||
|
||||
return metadata;
|
||||
}
|
||||
|
||||
static absl::StatusOr<std::string> ReadContentBlobFromFile(
|
||||
const std::string& unresolved_path) {
|
||||
ASSIGN_OR_RETURN(std::string resolved_path,
|
||||
mediapipe::PathToResourceAsFile(unresolved_path),
|
||||
_ << "Failed to resolve path! Path = " << unresolved_path);
|
||||
|
||||
std::string content_blob;
|
||||
MP_RETURN_IF_ERROR(
|
||||
mediapipe::GetResourceContents(resolved_path, &content_blob))
|
||||
<< "Failed to read content blob! Resolved path = " << resolved_path;
|
||||
|
||||
return content_blob;
|
||||
}
|
||||
|
||||
std::unique_ptr<face_geometry::GeometryPipeline> geometry_pipeline_;
|
||||
};
|
||||
|
||||
} // namespace
|
||||
|
||||
using FaceGeometryPipelineCalculator = GeometryPipelineCalculator;
|
||||
|
||||
REGISTER_CALCULATOR(FaceGeometryPipelineCalculator);
|
||||
|
||||
} // namespace mediapipe
|
|
@ -0,0 +1,27 @@
|
|||
// Copyright 2020 The MediaPipe Authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
syntax = "proto2";
|
||||
|
||||
package mediapipe;
|
||||
|
||||
import "mediapipe/framework/calculator_options.proto";
|
||||
|
||||
message FaceGeometryPipelineCalculatorOptions {
|
||||
extend CalculatorOptions {
|
||||
optional FaceGeometryPipelineCalculatorOptions ext = 323693812;
|
||||
}
|
||||
|
||||
optional string metadata_path = 1;
|
||||
}
|
103
mediapipe/modules/face_geometry/libs/BUILD
Normal file
103
mediapipe/modules/face_geometry/libs/BUILD
Normal file
|
@ -0,0 +1,103 @@
|
|||
# Copyright 2020 The MediaPipe Authors.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
licenses(["notice"])
|
||||
|
||||
package(default_visibility = ["//visibility:public"])
|
||||
|
||||
cc_library(
|
||||
name = "effect_renderer",
|
||||
srcs = ["effect_renderer.cc"],
|
||||
hdrs = ["effect_renderer.h"],
|
||||
deps = [
|
||||
":mesh_3d_utils",
|
||||
":validation_utils",
|
||||
"//mediapipe/framework/formats:image_format_cc_proto",
|
||||
"//mediapipe/framework/formats:image_frame",
|
||||
"//mediapipe/framework/formats:matrix_data_cc_proto",
|
||||
"//mediapipe/framework/port:ret_check",
|
||||
"//mediapipe/framework/port:status",
|
||||
"//mediapipe/framework/port:statusor",
|
||||
"//mediapipe/gpu:gl_base",
|
||||
"//mediapipe/gpu:shader_util",
|
||||
"//mediapipe/modules/face_geometry/protos:environment_cc_proto",
|
||||
"//mediapipe/modules/face_geometry/protos:face_geometry_cc_proto",
|
||||
"//mediapipe/modules/face_geometry/protos:mesh_3d_cc_proto",
|
||||
"@com_google_absl//absl/memory",
|
||||
"@com_google_absl//absl/types:optional",
|
||||
],
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "geometry_pipeline",
|
||||
srcs = ["geometry_pipeline.cc"],
|
||||
hdrs = ["geometry_pipeline.h"],
|
||||
deps = [
|
||||
":mesh_3d_utils",
|
||||
":procrustes_solver",
|
||||
":validation_utils",
|
||||
"//mediapipe/framework/formats:landmark_cc_proto",
|
||||
"//mediapipe/framework/formats:matrix",
|
||||
"//mediapipe/framework/formats:matrix_data_cc_proto",
|
||||
"//mediapipe/framework/port:ret_check",
|
||||
"//mediapipe/framework/port:status",
|
||||
"//mediapipe/framework/port:statusor",
|
||||
"//mediapipe/modules/face_geometry/protos:environment_cc_proto",
|
||||
"//mediapipe/modules/face_geometry/protos:face_geometry_cc_proto",
|
||||
"//mediapipe/modules/face_geometry/protos:geometry_pipeline_metadata_cc_proto",
|
||||
"//mediapipe/modules/face_geometry/protos:mesh_3d_cc_proto",
|
||||
"@com_google_absl//absl/memory",
|
||||
"@eigen_archive//:eigen3",
|
||||
],
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "mesh_3d_utils",
|
||||
srcs = ["mesh_3d_utils.cc"],
|
||||
hdrs = ["mesh_3d_utils.h"],
|
||||
deps = [
|
||||
"//mediapipe/framework/port:ret_check",
|
||||
"//mediapipe/framework/port:statusor",
|
||||
"//mediapipe/modules/face_geometry/protos:mesh_3d_cc_proto",
|
||||
],
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "procrustes_solver",
|
||||
srcs = ["procrustes_solver.cc"],
|
||||
hdrs = ["procrustes_solver.h"],
|
||||
deps = [
|
||||
"//mediapipe/framework/port:ret_check",
|
||||
"//mediapipe/framework/port:status",
|
||||
"//mediapipe/framework/port:statusor",
|
||||
"@com_google_absl//absl/memory",
|
||||
"@eigen_archive//:eigen3",
|
||||
],
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "validation_utils",
|
||||
srcs = ["validation_utils.cc"],
|
||||
hdrs = ["validation_utils.h"],
|
||||
deps = [
|
||||
":mesh_3d_utils",
|
||||
"//mediapipe/framework/formats:matrix_data_cc_proto",
|
||||
"//mediapipe/framework/port:ret_check",
|
||||
"//mediapipe/framework/port:status",
|
||||
"//mediapipe/modules/face_geometry/protos:environment_cc_proto",
|
||||
"//mediapipe/modules/face_geometry/protos:face_geometry_cc_proto",
|
||||
"//mediapipe/modules/face_geometry/protos:geometry_pipeline_metadata_cc_proto",
|
||||
"//mediapipe/modules/face_geometry/protos:mesh_3d_cc_proto",
|
||||
],
|
||||
)
|
733
mediapipe/modules/face_geometry/libs/effect_renderer.cc
Normal file
733
mediapipe/modules/face_geometry/libs/effect_renderer.cc
Normal file
|
@ -0,0 +1,733 @@
|
|||
// Copyright 2020 The MediaPipe Authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "mediapipe/modules/face_geometry/libs/effect_renderer.h"
|
||||
|
||||
#include <cmath>
|
||||
#include <cstdint>
|
||||
#include <limits>
|
||||
#include <memory>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "absl/memory/memory.h"
|
||||
#include "absl/types/optional.h"
|
||||
#include "mediapipe/framework/formats/image_format.pb.h"
|
||||
#include "mediapipe/framework/formats/image_frame.h"
|
||||
#include "mediapipe/framework/formats/matrix_data.pb.h"
|
||||
#include "mediapipe/framework/port/ret_check.h"
|
||||
#include "mediapipe/framework/port/status.h"
|
||||
#include "mediapipe/framework/port/status_macros.h"
|
||||
#include "mediapipe/framework/port/statusor.h"
|
||||
#include "mediapipe/gpu/gl_base.h"
|
||||
#include "mediapipe/gpu/shader_util.h"
|
||||
#include "mediapipe/modules/face_geometry/libs/mesh_3d_utils.h"
|
||||
#include "mediapipe/modules/face_geometry/libs/validation_utils.h"
|
||||
#include "mediapipe/modules/face_geometry/protos/environment.pb.h"
|
||||
#include "mediapipe/modules/face_geometry/protos/face_geometry.pb.h"
|
||||
#include "mediapipe/modules/face_geometry/protos/mesh_3d.pb.h"
|
||||
|
||||
namespace mediapipe::face_geometry {
|
||||
namespace {
|
||||
|
||||
struct RenderableMesh3d {
|
||||
static absl::StatusOr<RenderableMesh3d> CreateFromProtoMesh3d(
|
||||
const Mesh3d& proto_mesh_3d) {
|
||||
Mesh3d::VertexType vertex_type = proto_mesh_3d.vertex_type();
|
||||
|
||||
RenderableMesh3d renderable_mesh_3d;
|
||||
renderable_mesh_3d.vertex_size = GetVertexSize(vertex_type);
|
||||
ASSIGN_OR_RETURN(
|
||||
renderable_mesh_3d.vertex_position_size,
|
||||
GetVertexComponentSize(vertex_type, VertexComponent::POSITION),
|
||||
_ << "Failed to get the position vertex size!");
|
||||
ASSIGN_OR_RETURN(
|
||||
renderable_mesh_3d.tex_coord_position_size,
|
||||
GetVertexComponentSize(vertex_type, VertexComponent::TEX_COORD),
|
||||
_ << "Failed to get the tex coord vertex size!");
|
||||
ASSIGN_OR_RETURN(
|
||||
renderable_mesh_3d.vertex_position_offset,
|
||||
GetVertexComponentOffset(vertex_type, VertexComponent::POSITION),
|
||||
_ << "Failed to get the position vertex offset!");
|
||||
ASSIGN_OR_RETURN(
|
||||
renderable_mesh_3d.tex_coord_position_offset,
|
||||
GetVertexComponentOffset(vertex_type, VertexComponent::TEX_COORD),
|
||||
_ << "Failed to get the tex coord vertex offset!");
|
||||
|
||||
switch (proto_mesh_3d.primitive_type()) {
|
||||
case Mesh3d::TRIANGLE:
|
||||
renderable_mesh_3d.primitive_type = GL_TRIANGLES;
|
||||
break;
|
||||
|
||||
default:
|
||||
RET_CHECK_FAIL() << "Only triangle primitive types are supported!";
|
||||
}
|
||||
|
||||
renderable_mesh_3d.vertex_buffer.reserve(
|
||||
proto_mesh_3d.vertex_buffer_size());
|
||||
for (float vertex_element : proto_mesh_3d.vertex_buffer()) {
|
||||
renderable_mesh_3d.vertex_buffer.push_back(vertex_element);
|
||||
}
|
||||
|
||||
renderable_mesh_3d.index_buffer.reserve(proto_mesh_3d.index_buffer_size());
|
||||
for (uint32_t index_element : proto_mesh_3d.index_buffer()) {
|
||||
RET_CHECK_LE(index_element, std::numeric_limits<uint16_t>::max())
|
||||
<< "Index buffer elements must fit into the `uint16` type in order "
|
||||
"to be renderable!";
|
||||
|
||||
renderable_mesh_3d.index_buffer.push_back(
|
||||
static_cast<uint16_t>(index_element));
|
||||
}
|
||||
|
||||
return renderable_mesh_3d;
|
||||
}
|
||||
|
||||
uint32_t vertex_size;
|
||||
uint32_t vertex_position_size;
|
||||
uint32_t tex_coord_position_size;
|
||||
uint32_t vertex_position_offset;
|
||||
uint32_t tex_coord_position_offset;
|
||||
uint32_t primitive_type;
|
||||
|
||||
std::vector<float> vertex_buffer;
|
||||
std::vector<uint16_t> index_buffer;
|
||||
};
|
||||
|
||||
class Texture {
|
||||
public:
|
||||
static absl::StatusOr<std::unique_ptr<Texture>> WrapExternalTexture(
|
||||
GLuint handle, GLenum target, int width, int height) {
|
||||
RET_CHECK(handle) << "External texture must have a non-null handle!";
|
||||
return absl::WrapUnique(new Texture(handle, target, width, height,
|
||||
/*is_owned*/ false));
|
||||
}
|
||||
|
||||
static absl::StatusOr<std::unique_ptr<Texture>> CreateFromImageFrame(
|
||||
const ImageFrame& image_frame) {
|
||||
RET_CHECK(image_frame.IsAligned(ImageFrame::kGlDefaultAlignmentBoundary))
|
||||
<< "Image frame memory must be aligned for GL usage!";
|
||||
|
||||
RET_CHECK(image_frame.Width() > 0 && image_frame.Height() > 0)
|
||||
<< "Image frame must have positive dimensions!";
|
||||
|
||||
RET_CHECK(image_frame.Format() == ImageFormat::SRGB ||
|
||||
image_frame.Format() == ImageFormat::SRGBA)
|
||||
<< "Image frame format must be either SRGB or SRGBA!";
|
||||
|
||||
GLint image_format;
|
||||
switch (image_frame.NumberOfChannels()) {
|
||||
case 3:
|
||||
image_format = GL_RGB;
|
||||
break;
|
||||
case 4:
|
||||
image_format = GL_RGBA;
|
||||
break;
|
||||
default:
|
||||
RET_CHECK_FAIL()
|
||||
<< "Unexpected number of channels; expected 3 or 4, got "
|
||||
<< image_frame.NumberOfChannels() << "!";
|
||||
}
|
||||
|
||||
GLuint handle;
|
||||
glGenTextures(1, &handle);
|
||||
RET_CHECK(handle) << "Failed to initialize an OpenGL texture!";
|
||||
|
||||
glBindTexture(GL_TEXTURE_2D, handle);
|
||||
glTexParameteri(GL_TEXTURE_2D, GL_NEAREST_MIPMAP_LINEAR, GL_LINEAR);
|
||||
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
|
||||
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
|
||||
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
|
||||
glTexImage2D(GL_TEXTURE_2D, 0, image_format, image_frame.Width(),
|
||||
image_frame.Height(), 0, image_format, GL_UNSIGNED_BYTE,
|
||||
image_frame.PixelData());
|
||||
glGenerateMipmap(GL_TEXTURE_2D);
|
||||
glBindTexture(GL_TEXTURE_2D, 0);
|
||||
|
||||
return absl::WrapUnique(new Texture(
|
||||
handle, GL_TEXTURE_2D, image_frame.Width(), image_frame.Height(),
|
||||
/*is_owned*/ true));
|
||||
}
|
||||
|
||||
~Texture() {
|
||||
if (is_owned_) {
|
||||
glDeleteProgram(handle_);
|
||||
}
|
||||
}
|
||||
|
||||
GLuint handle() const { return handle_; }
|
||||
GLenum target() const { return target_; }
|
||||
int width() const { return width_; }
|
||||
int height() const { return height_; }
|
||||
|
||||
private:
|
||||
Texture(GLuint handle, GLenum target, int width, int height, bool is_owned)
|
||||
: handle_(handle),
|
||||
target_(target),
|
||||
width_(width),
|
||||
height_(height),
|
||||
is_owned_(is_owned) {}
|
||||
|
||||
GLuint handle_;
|
||||
GLenum target_;
|
||||
int width_;
|
||||
int height_;
|
||||
bool is_owned_;
|
||||
};
|
||||
|
||||
class RenderTarget {
|
||||
public:
|
||||
static absl::StatusOr<std::unique_ptr<RenderTarget>> Create() {
|
||||
GLuint framebuffer_handle;
|
||||
glGenFramebuffers(1, &framebuffer_handle);
|
||||
RET_CHECK(framebuffer_handle)
|
||||
<< "Failed to initialize an OpenGL framebuffer!";
|
||||
|
||||
return absl::WrapUnique(new RenderTarget(framebuffer_handle));
|
||||
}
|
||||
|
||||
~RenderTarget() {
|
||||
glDeleteFramebuffers(1, &framebuffer_handle_);
|
||||
// Renderbuffer handle might have never been created if this render target
|
||||
// is destroyed before `SetColorbuffer()` is called for the first time.
|
||||
if (renderbuffer_handle_) {
|
||||
glDeleteFramebuffers(1, &renderbuffer_handle_);
|
||||
}
|
||||
}
|
||||
|
||||
absl::Status SetColorbuffer(const Texture& colorbuffer_texture) {
|
||||
glBindFramebuffer(GL_FRAMEBUFFER, framebuffer_handle_);
|
||||
glViewport(0, 0, colorbuffer_texture.width(), colorbuffer_texture.height());
|
||||
|
||||
glActiveTexture(GL_TEXTURE0);
|
||||
glBindTexture(colorbuffer_texture.target(), colorbuffer_texture.handle());
|
||||
glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0,
|
||||
colorbuffer_texture.target(),
|
||||
colorbuffer_texture.handle(),
|
||||
/*level*/ 0);
|
||||
glBindTexture(colorbuffer_texture.target(), 0);
|
||||
|
||||
// If the existing depth buffer has different dimensions, delete it.
|
||||
if (renderbuffer_handle_ &&
|
||||
(viewport_width_ != colorbuffer_texture.width() ||
|
||||
viewport_height_ != colorbuffer_texture.height())) {
|
||||
glDeleteRenderbuffers(1, &renderbuffer_handle_);
|
||||
renderbuffer_handle_ = 0;
|
||||
}
|
||||
|
||||
// If there is no depth buffer, create one.
|
||||
if (!renderbuffer_handle_) {
|
||||
glGenRenderbuffers(1, &renderbuffer_handle_);
|
||||
RET_CHECK(renderbuffer_handle_)
|
||||
<< "Failed to initialize an OpenGL renderbuffer!";
|
||||
glBindRenderbuffer(GL_RENDERBUFFER, renderbuffer_handle_);
|
||||
glRenderbufferStorage(GL_RENDERBUFFER, GL_DEPTH_COMPONENT16,
|
||||
colorbuffer_texture.width(),
|
||||
colorbuffer_texture.height());
|
||||
glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT,
|
||||
GL_RENDERBUFFER, renderbuffer_handle_);
|
||||
glBindRenderbuffer(GL_RENDERBUFFER, 0);
|
||||
}
|
||||
|
||||
viewport_width_ = colorbuffer_texture.width();
|
||||
viewport_height_ = colorbuffer_texture.height();
|
||||
|
||||
glBindFramebuffer(GL_FRAMEBUFFER, 0);
|
||||
glFlush();
|
||||
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
void Bind() const {
|
||||
glBindFramebuffer(GL_FRAMEBUFFER, framebuffer_handle_);
|
||||
glViewport(0, 0, viewport_width_, viewport_height_);
|
||||
}
|
||||
|
||||
void Unbind() const { glBindFramebuffer(GL_FRAMEBUFFER, 0); }
|
||||
|
||||
void Clear() const {
|
||||
Bind();
|
||||
glEnable(GL_DEPTH_TEST);
|
||||
glDepthMask(GL_TRUE);
|
||||
|
||||
glClearColor(0.f, 0.f, 0.f, 0.f);
|
||||
glClearDepthf(1.f);
|
||||
glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);
|
||||
|
||||
glDepthMask(GL_FALSE);
|
||||
glDisable(GL_DEPTH_TEST);
|
||||
|
||||
Unbind();
|
||||
glFlush();
|
||||
}
|
||||
|
||||
private:
|
||||
explicit RenderTarget(GLuint framebuffer_handle)
|
||||
: framebuffer_handle_(framebuffer_handle),
|
||||
renderbuffer_handle_(0),
|
||||
viewport_width_(-1),
|
||||
viewport_height_(-1) {}
|
||||
|
||||
GLuint framebuffer_handle_;
|
||||
GLuint renderbuffer_handle_;
|
||||
int viewport_width_;
|
||||
int viewport_height_;
|
||||
};
|
||||
|
||||
class Renderer {
|
||||
public:
|
||||
enum class RenderMode { OPAQUE, OVERDRAW, OCCLUSION };
|
||||
|
||||
static absl::StatusOr<std::unique_ptr<Renderer>> Create() {
|
||||
static const GLint kAttrLocation[NUM_ATTRIBUTES] = {
|
||||
ATTRIB_VERTEX,
|
||||
ATTRIB_TEXTURE_POSITION,
|
||||
};
|
||||
static const GLchar* kAttrName[NUM_ATTRIBUTES] = {
|
||||
"position",
|
||||
"tex_coord",
|
||||
};
|
||||
|
||||
static const GLchar* kVertSrc = R"(
|
||||
uniform mat4 projection_mat;
|
||||
uniform mat4 model_mat;
|
||||
|
||||
attribute vec4 position;
|
||||
attribute vec4 tex_coord;
|
||||
|
||||
varying vec2 v_tex_coord;
|
||||
|
||||
void main() {
|
||||
v_tex_coord = tex_coord.xy;
|
||||
gl_Position = projection_mat * model_mat * position;
|
||||
}
|
||||
)";
|
||||
|
||||
static const GLchar* kFragSrc = R"(
|
||||
precision mediump float;
|
||||
|
||||
varying vec2 v_tex_coord;
|
||||
uniform sampler2D texture;
|
||||
|
||||
void main() {
|
||||
gl_FragColor = texture2D(texture, v_tex_coord);
|
||||
}
|
||||
)";
|
||||
|
||||
GLuint program_handle = 0;
|
||||
GlhCreateProgram(kVertSrc, kFragSrc, NUM_ATTRIBUTES,
|
||||
(const GLchar**)&kAttrName[0], kAttrLocation,
|
||||
&program_handle);
|
||||
RET_CHECK(program_handle) << "Problem initializing the texture program!";
|
||||
GLint projection_mat_uniform =
|
||||
glGetUniformLocation(program_handle, "projection_mat");
|
||||
GLint model_mat_uniform = glGetUniformLocation(program_handle, "model_mat");
|
||||
GLint texture_uniform = glGetUniformLocation(program_handle, "texture");
|
||||
|
||||
RET_CHECK_NE(projection_mat_uniform, -1)
|
||||
<< "Failed to find `projection_mat` uniform!";
|
||||
RET_CHECK_NE(model_mat_uniform, -1)
|
||||
<< "Failed to find `model_mat` uniform!";
|
||||
RET_CHECK_NE(texture_uniform, -1) << "Failed to find `texture` uniform!";
|
||||
|
||||
return absl::WrapUnique(new Renderer(program_handle, projection_mat_uniform,
|
||||
model_mat_uniform, texture_uniform));
|
||||
}
|
||||
|
||||
~Renderer() { glDeleteProgram(program_handle_); }
|
||||
|
||||
absl::Status Render(const RenderTarget& render_target, const Texture& texture,
|
||||
const RenderableMesh3d& mesh_3d,
|
||||
const std::array<float, 16>& projection_mat,
|
||||
const std::array<float, 16>& model_mat,
|
||||
RenderMode render_mode) const {
|
||||
glUseProgram(program_handle_);
|
||||
// Set up the GL state.
|
||||
glEnable(GL_BLEND);
|
||||
glFrontFace(GL_CCW);
|
||||
switch (render_mode) {
|
||||
case RenderMode::OPAQUE:
|
||||
glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA);
|
||||
glEnable(GL_DEPTH_TEST);
|
||||
glDepthMask(GL_TRUE);
|
||||
break;
|
||||
|
||||
case RenderMode::OVERDRAW:
|
||||
glBlendFunc(GL_ONE, GL_ZERO);
|
||||
glDisable(GL_DEPTH_TEST);
|
||||
glDepthMask(GL_FALSE);
|
||||
break;
|
||||
|
||||
case RenderMode::OCCLUSION:
|
||||
glBlendFunc(GL_ZERO, GL_ONE);
|
||||
glEnable(GL_DEPTH_TEST);
|
||||
glDepthMask(GL_TRUE);
|
||||
break;
|
||||
}
|
||||
|
||||
render_target.Bind();
|
||||
// Set up vertex attributes.
|
||||
glVertexAttribPointer(
|
||||
ATTRIB_VERTEX, mesh_3d.vertex_position_size, GL_FLOAT, 0,
|
||||
mesh_3d.vertex_size * sizeof(float),
|
||||
mesh_3d.vertex_buffer.data() + mesh_3d.vertex_position_offset);
|
||||
glEnableVertexAttribArray(ATTRIB_VERTEX);
|
||||
glVertexAttribPointer(
|
||||
ATTRIB_TEXTURE_POSITION, mesh_3d.tex_coord_position_size, GL_FLOAT, 0,
|
||||
mesh_3d.vertex_size * sizeof(float),
|
||||
mesh_3d.vertex_buffer.data() + mesh_3d.tex_coord_position_offset);
|
||||
glEnableVertexAttribArray(ATTRIB_TEXTURE_POSITION);
|
||||
// Set up textures and uniforms.
|
||||
glActiveTexture(GL_TEXTURE1);
|
||||
glBindTexture(texture.target(), texture.handle());
|
||||
glUniform1i(texture_uniform_, 1);
|
||||
glUniformMatrix4fv(projection_mat_uniform_, 1, GL_FALSE,
|
||||
projection_mat.data());
|
||||
glUniformMatrix4fv(model_mat_uniform_, 1, GL_FALSE, model_mat.data());
|
||||
// Draw the mesh.
|
||||
glDrawElements(mesh_3d.primitive_type, mesh_3d.index_buffer.size(),
|
||||
GL_UNSIGNED_SHORT, mesh_3d.index_buffer.data());
|
||||
// Unbind textures and uniforms.
|
||||
glActiveTexture(GL_TEXTURE1);
|
||||
glBindTexture(texture.target(), 0);
|
||||
render_target.Unbind();
|
||||
// Unbind vertex attributes.
|
||||
glDisableVertexAttribArray(ATTRIB_TEXTURE_POSITION);
|
||||
glDisableVertexAttribArray(ATTRIB_VERTEX);
|
||||
// Restore the GL state.
|
||||
glDepthMask(GL_FALSE);
|
||||
glDisable(GL_DEPTH_TEST);
|
||||
glDisable(GL_BLEND);
|
||||
|
||||
glUseProgram(0);
|
||||
glFlush();
|
||||
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
private:
|
||||
enum { ATTRIB_VERTEX, ATTRIB_TEXTURE_POSITION, NUM_ATTRIBUTES };
|
||||
|
||||
Renderer(GLuint program_handle, GLint projection_mat_uniform,
|
||||
GLint model_mat_uniform, GLint texture_uniform)
|
||||
: program_handle_(program_handle),
|
||||
projection_mat_uniform_(projection_mat_uniform),
|
||||
model_mat_uniform_(model_mat_uniform),
|
||||
texture_uniform_(texture_uniform) {}
|
||||
|
||||
GLuint program_handle_;
|
||||
GLint projection_mat_uniform_;
|
||||
GLint model_mat_uniform_;
|
||||
GLint texture_uniform_;
|
||||
};
|
||||
|
||||
class EffectRendererImpl : public EffectRenderer {
|
||||
public:
|
||||
EffectRendererImpl(
|
||||
const Environment& environment,
|
||||
std::unique_ptr<RenderTarget> render_target,
|
||||
std::unique_ptr<Renderer> renderer,
|
||||
RenderableMesh3d&& renderable_quad_mesh_3d,
|
||||
absl::optional<RenderableMesh3d>&& renderable_effect_mesh_3d,
|
||||
std::unique_ptr<Texture> empty_color_texture,
|
||||
std::unique_ptr<Texture> effect_texture)
|
||||
: environment_(environment),
|
||||
render_target_(std::move(render_target)),
|
||||
renderer_(std::move(renderer)),
|
||||
renderable_quad_mesh_3d_(std::move(renderable_quad_mesh_3d)),
|
||||
renderable_effect_mesh_3d_(std::move(renderable_effect_mesh_3d)),
|
||||
empty_color_texture_(std::move(empty_color_texture)),
|
||||
effect_texture_(std::move(effect_texture)),
|
||||
identity_matrix_(Create4x4IdentityMatrix()) {}
|
||||
|
||||
absl::Status RenderEffect(
|
||||
const std::vector<FaceGeometry>& multi_face_geometry,
|
||||
int frame_width, //
|
||||
int frame_height, //
|
||||
GLenum src_texture_target, //
|
||||
GLuint src_texture_name, //
|
||||
GLenum dst_texture_target, //
|
||||
GLuint dst_texture_name) {
|
||||
// Validate input arguments.
|
||||
MP_RETURN_IF_ERROR(ValidateFrameDimensions(frame_width, frame_height))
|
||||
<< "Invalid frame dimensions!";
|
||||
RET_CHECK(src_texture_name > 0 && dst_texture_name > 0)
|
||||
<< "Both source and destination texture names must be non-null!";
|
||||
RET_CHECK_NE(src_texture_name, dst_texture_name)
|
||||
<< "Source and destination texture names must be different!";
|
||||
|
||||
// Validate all input face geometries.
|
||||
for (const FaceGeometry& face_geometry : multi_face_geometry) {
|
||||
MP_RETURN_IF_ERROR(ValidateFaceGeometry(face_geometry))
|
||||
<< "Invalid face geometry!";
|
||||
}
|
||||
|
||||
// Wrap both source and destination textures.
|
||||
ASSIGN_OR_RETURN(
|
||||
std::unique_ptr<Texture> src_texture,
|
||||
Texture::WrapExternalTexture(src_texture_name, src_texture_target,
|
||||
frame_width, frame_height),
|
||||
_ << "Failed to wrap the external source texture");
|
||||
ASSIGN_OR_RETURN(
|
||||
std::unique_ptr<Texture> dst_texture,
|
||||
Texture::WrapExternalTexture(dst_texture_name, dst_texture_target,
|
||||
frame_width, frame_height),
|
||||
_ << "Failed to wrap the external destination texture");
|
||||
|
||||
// Set the destination texture as the color buffer. Then, clear both the
|
||||
// color and the depth buffers for the render target.
|
||||
MP_RETURN_IF_ERROR(render_target_->SetColorbuffer(*dst_texture))
|
||||
<< "Failed to set the destination texture as the colorbuffer!";
|
||||
render_target_->Clear();
|
||||
|
||||
// Render the source texture on top of the quad mesh (i.e. make a copy)
|
||||
// into the render target.
|
||||
MP_RETURN_IF_ERROR(renderer_->Render(
|
||||
*render_target_, *src_texture, renderable_quad_mesh_3d_,
|
||||
identity_matrix_, identity_matrix_, Renderer::RenderMode::OVERDRAW))
|
||||
<< "Failed to render the source texture on top of the quad mesh!";
|
||||
|
||||
// Extract pose transform matrices and meshes from the face geometry data;
|
||||
const int num_faces = multi_face_geometry.size();
|
||||
|
||||
std::vector<std::array<float, 16>> face_pose_transform_matrices(num_faces);
|
||||
std::vector<RenderableMesh3d> renderable_face_meshes(num_faces);
|
||||
for (int i = 0; i < num_faces; ++i) {
|
||||
const FaceGeometry& face_geometry = multi_face_geometry[i];
|
||||
|
||||
// Extract the face pose transformation matrix.
|
||||
ASSIGN_OR_RETURN(
|
||||
face_pose_transform_matrices[i],
|
||||
Convert4x4MatrixDataToArrayFormat(
|
||||
face_geometry.pose_transform_matrix()),
|
||||
_ << "Failed to extract the face pose transformation matrix!");
|
||||
|
||||
// Extract the face mesh as a renderable.
|
||||
ASSIGN_OR_RETURN(
|
||||
renderable_face_meshes[i],
|
||||
RenderableMesh3d::CreateFromProtoMesh3d(face_geometry.mesh()),
|
||||
_ << "Failed to extract a renderable face mesh!");
|
||||
}
|
||||
|
||||
// Create a perspective matrix using the frame aspect ratio.
|
||||
std::array<float, 16> perspective_matrix = CreatePerspectiveMatrix(
|
||||
/*aspect_ratio*/ static_cast<float>(frame_width) / frame_height);
|
||||
|
||||
// Render a face mesh occluder for each face.
|
||||
for (int i = 0; i < num_faces; ++i) {
|
||||
const std::array<float, 16>& face_pose_transform_matrix =
|
||||
face_pose_transform_matrices[i];
|
||||
const RenderableMesh3d& renderable_face_mesh = renderable_face_meshes[i];
|
||||
|
||||
// Render the face mesh using the empty color texture, i.e. the face
|
||||
// mesh occluder.
|
||||
//
|
||||
// For occlusion, the pose transformation is moved ~1mm away from camera
|
||||
// in order to allow the face mesh texture to be rendered without
|
||||
// failing the depth test.
|
||||
std::array<float, 16> occlusion_face_pose_transform_matrix =
|
||||
face_pose_transform_matrix;
|
||||
occlusion_face_pose_transform_matrix[14] -= 0.1f; // ~ 1mm
|
||||
MP_RETURN_IF_ERROR(renderer_->Render(
|
||||
*render_target_, *empty_color_texture_, renderable_face_mesh,
|
||||
perspective_matrix, occlusion_face_pose_transform_matrix,
|
||||
Renderer::RenderMode::OCCLUSION))
|
||||
<< "Failed to render the face mesh occluder!";
|
||||
}
|
||||
|
||||
// Render the main face mesh effect component for each face.
|
||||
for (int i = 0; i < num_faces; ++i) {
|
||||
const std::array<float, 16>& face_pose_transform_matrix =
|
||||
face_pose_transform_matrices[i];
|
||||
|
||||
// If there is no effect 3D mesh provided, then the face mesh itself is
|
||||
// used as a topology for rendering (for example, this can be used for
|
||||
// facepaint effects or AR makeup).
|
||||
const RenderableMesh3d& main_effect_mesh_3d =
|
||||
renderable_effect_mesh_3d_ ? *renderable_effect_mesh_3d_
|
||||
: renderable_face_meshes[i];
|
||||
|
||||
MP_RETURN_IF_ERROR(renderer_->Render(
|
||||
*render_target_, *effect_texture_, main_effect_mesh_3d,
|
||||
perspective_matrix, face_pose_transform_matrix,
|
||||
Renderer::RenderMode::OPAQUE))
|
||||
<< "Failed to render the main effect pass!";
|
||||
}
|
||||
|
||||
// At this point in the code, the destination texture must contain the
|
||||
// correctly renderer effect, so we should just return.
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
private:
|
||||
std::array<float, 16> CreatePerspectiveMatrix(float aspect_ratio) const {
|
||||
static constexpr float kDegreesToRadians = M_PI / 180.f;
|
||||
|
||||
std::array<float, 16> perspective_matrix;
|
||||
perspective_matrix.fill(0.f);
|
||||
|
||||
const auto& env_camera = environment_.perspective_camera();
|
||||
// Standard perspective projection matrix calculations.
|
||||
const float f = 1.0f / std::tan(kDegreesToRadians *
|
||||
env_camera.vertical_fov_degrees() / 2.f);
|
||||
|
||||
const float denom = 1.0f / (env_camera.near() - env_camera.far());
|
||||
perspective_matrix[0] = f / aspect_ratio;
|
||||
perspective_matrix[5] = f;
|
||||
perspective_matrix[10] = (env_camera.near() + env_camera.far()) * denom;
|
||||
perspective_matrix[11] = -1.f;
|
||||
perspective_matrix[14] = 2.f * env_camera.far() * env_camera.near() * denom;
|
||||
|
||||
// If the environment's origin point location is in the top left corner,
|
||||
// then skip additional flip along Y-axis is required to render correctly.
|
||||
if (environment_.origin_point_location() ==
|
||||
OriginPointLocation::TOP_LEFT_CORNER) {
|
||||
perspective_matrix[5] *= -1.f;
|
||||
}
|
||||
|
||||
return perspective_matrix;
|
||||
}
|
||||
|
||||
static std::array<float, 16> Create4x4IdentityMatrix() {
|
||||
return {1.f, 0.f, 0.f, 0.f, //
|
||||
0.f, 1.f, 0.f, 0.f, //
|
||||
0.f, 0.f, 1.f, 0.f, //
|
||||
0.f, 0.f, 0.f, 1.f};
|
||||
}
|
||||
|
||||
static absl::StatusOr<std::array<float, 16>>
|
||||
Convert4x4MatrixDataToArrayFormat(const MatrixData& matrix_data) {
|
||||
RET_CHECK(matrix_data.rows() == 4 && //
|
||||
matrix_data.cols() == 4 && //
|
||||
matrix_data.packed_data_size() == 16)
|
||||
<< "The matrix data must define a 4x4 matrix!";
|
||||
|
||||
std::array<float, 16> matrix_array;
|
||||
for (int i = 0; i < 16; i++) {
|
||||
matrix_array[i] = matrix_data.packed_data(i);
|
||||
}
|
||||
|
||||
// Matrix array must be in the OpenGL-friendly column-major order. If
|
||||
// `matrix_data` is in the row-major order, then transpose.
|
||||
if (matrix_data.layout() == MatrixData::ROW_MAJOR) {
|
||||
std::swap(matrix_array[1], matrix_array[4]);
|
||||
std::swap(matrix_array[2], matrix_array[8]);
|
||||
std::swap(matrix_array[3], matrix_array[12]);
|
||||
std::swap(matrix_array[6], matrix_array[9]);
|
||||
std::swap(matrix_array[7], matrix_array[13]);
|
||||
std::swap(matrix_array[11], matrix_array[14]);
|
||||
}
|
||||
|
||||
return matrix_array;
|
||||
}
|
||||
|
||||
Environment environment_;
|
||||
|
||||
std::unique_ptr<RenderTarget> render_target_;
|
||||
std::unique_ptr<Renderer> renderer_;
|
||||
|
||||
RenderableMesh3d renderable_quad_mesh_3d_;
|
||||
absl::optional<RenderableMesh3d> renderable_effect_mesh_3d_;
|
||||
|
||||
std::unique_ptr<Texture> empty_color_texture_;
|
||||
std::unique_ptr<Texture> effect_texture_;
|
||||
|
||||
std::array<float, 16> identity_matrix_;
|
||||
};
|
||||
|
||||
Mesh3d CreateQuadMesh3d() {
|
||||
static constexpr float kQuadMesh3dVertexBuffer[] = {
|
||||
-1.f, -1.f, 0.f, 0.f, 0.f, //
|
||||
1.f, -1.f, 0.f, 1.f, 0.f, //
|
||||
-1.f, 1.f, 0.f, 0.f, 1.f, //
|
||||
1.f, 1.f, 0.f, 1.f, 1.f, //
|
||||
};
|
||||
static constexpr uint16_t kQuadMesh3dIndexBuffer[] = {0, 1, 2, 1, 3, 2};
|
||||
|
||||
static constexpr int kQuadMesh3dVertexBufferSize =
|
||||
sizeof(kQuadMesh3dVertexBuffer) / sizeof(float);
|
||||
static constexpr int kQuadMesh3dIndexBufferSize =
|
||||
sizeof(kQuadMesh3dIndexBuffer) / sizeof(uint16_t);
|
||||
|
||||
Mesh3d quad_mesh_3d;
|
||||
quad_mesh_3d.set_vertex_type(Mesh3d::VERTEX_PT);
|
||||
quad_mesh_3d.set_primitive_type(Mesh3d::TRIANGLE);
|
||||
for (int i = 0; i < kQuadMesh3dVertexBufferSize; ++i) {
|
||||
quad_mesh_3d.add_vertex_buffer(kQuadMesh3dVertexBuffer[i]);
|
||||
}
|
||||
for (int i = 0; i < kQuadMesh3dIndexBufferSize; ++i) {
|
||||
quad_mesh_3d.add_index_buffer(kQuadMesh3dIndexBuffer[i]);
|
||||
}
|
||||
|
||||
return quad_mesh_3d;
|
||||
}
|
||||
|
||||
ImageFrame CreateEmptyColorTexture() {
|
||||
static constexpr ImageFormat::Format kEmptyColorTextureFormat =
|
||||
ImageFormat::SRGBA;
|
||||
static constexpr int kEmptyColorTextureWidth = 1;
|
||||
static constexpr int kEmptyColorTextureHeight = 1;
|
||||
|
||||
ImageFrame empty_color_texture(
|
||||
kEmptyColorTextureFormat, kEmptyColorTextureWidth,
|
||||
kEmptyColorTextureHeight, ImageFrame::kGlDefaultAlignmentBoundary);
|
||||
empty_color_texture.SetToZero();
|
||||
|
||||
return empty_color_texture;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
absl::StatusOr<std::unique_ptr<EffectRenderer>> CreateEffectRenderer(
|
||||
const Environment& environment, //
|
||||
const absl::optional<Mesh3d>& effect_mesh_3d, //
|
||||
ImageFrame&& effect_texture) {
|
||||
MP_RETURN_IF_ERROR(ValidateEnvironment(environment))
|
||||
<< "Invalid environment!";
|
||||
if (effect_mesh_3d) {
|
||||
MP_RETURN_IF_ERROR(ValidateMesh3d(*effect_mesh_3d))
|
||||
<< "Invalid effect 3D mesh!";
|
||||
}
|
||||
|
||||
ASSIGN_OR_RETURN(std::unique_ptr<RenderTarget> render_target,
|
||||
RenderTarget::Create(),
|
||||
_ << "Failed to create a render target!");
|
||||
ASSIGN_OR_RETURN(std::unique_ptr<Renderer> renderer, Renderer::Create(),
|
||||
_ << "Failed to create a renderer!");
|
||||
ASSIGN_OR_RETURN(RenderableMesh3d renderable_quad_mesh_3d,
|
||||
RenderableMesh3d::CreateFromProtoMesh3d(CreateQuadMesh3d()),
|
||||
_ << "Failed to create a renderable quad mesh!");
|
||||
absl::optional<RenderableMesh3d> renderable_effect_mesh_3d;
|
||||
if (effect_mesh_3d) {
|
||||
ASSIGN_OR_RETURN(renderable_effect_mesh_3d,
|
||||
RenderableMesh3d::CreateFromProtoMesh3d(*effect_mesh_3d),
|
||||
_ << "Failed to create a renderable effect mesh!");
|
||||
}
|
||||
ASSIGN_OR_RETURN(std::unique_ptr<Texture> empty_color_gl_texture,
|
||||
Texture::CreateFromImageFrame(CreateEmptyColorTexture()),
|
||||
_ << "Failed to create an empty color texture!");
|
||||
ASSIGN_OR_RETURN(std::unique_ptr<Texture> effect_gl_texture,
|
||||
Texture::CreateFromImageFrame(effect_texture),
|
||||
_ << "Failed to create an effect texture!");
|
||||
|
||||
std::unique_ptr<EffectRenderer> result =
|
||||
absl::make_unique<EffectRendererImpl>(
|
||||
environment, std::move(render_target), std::move(renderer),
|
||||
std::move(renderable_quad_mesh_3d),
|
||||
std::move(renderable_effect_mesh_3d),
|
||||
std::move(empty_color_gl_texture), std::move(effect_gl_texture));
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
} // namespace mediapipe::face_geometry
|
92
mediapipe/modules/face_geometry/libs/effect_renderer.h
Normal file
92
mediapipe/modules/face_geometry/libs/effect_renderer.h
Normal file
|
@ -0,0 +1,92 @@
|
|||
// Copyright 2020 The MediaPipe Authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#ifndef MEDIAPIPE_MODULES_FACE_GEOMETRY_LIBS_EFFECT_RENDERER_H_
|
||||
#define MEDIAPIPE_MODULES_FACE_GEOMETRY_LIBS_EFFECT_RENDERER_H_
|
||||
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
|
||||
#include "absl/types/optional.h"
|
||||
#include "mediapipe/framework/formats/image_frame.h"
|
||||
#include "mediapipe/framework/port/status.h"
|
||||
#include "mediapipe/framework/port/statusor.h"
|
||||
#include "mediapipe/gpu/gl_base.h"
|
||||
#include "mediapipe/modules/face_geometry/protos/environment.pb.h"
|
||||
#include "mediapipe/modules/face_geometry/protos/face_geometry.pb.h"
|
||||
#include "mediapipe/modules/face_geometry/protos/mesh_3d.pb.h"
|
||||
|
||||
namespace mediapipe::face_geometry {
|
||||
|
||||
// Encapsulates a stateful face effect renderer.
|
||||
class EffectRenderer {
|
||||
public:
|
||||
virtual ~EffectRenderer() = default;
|
||||
|
||||
// Renders a face effect based on the multiple facial geometries.
|
||||
//
|
||||
// Must be called in the same GL context as was used upon initialization.
|
||||
//
|
||||
// Each of the `multi_face_geometry` must be valid (for details, please refer
|
||||
// to the proto message definition comments and/or `validation_utils.h/cc`).
|
||||
// Additionally, all face mesh index buffer elements must fit into the
|
||||
// `uint16` type in order to be renderable.
|
||||
//
|
||||
// Both `frame_width` and `frame_height` must be positive.
|
||||
//
|
||||
// Both `src_texture_name` and `dst_texture_name` must be positive and
|
||||
// reference existing OpenGL textures in the current context. They should also
|
||||
// reference different textures as the in-place effect rendering is not yet
|
||||
// supported.
|
||||
virtual absl::Status RenderEffect(
|
||||
const std::vector<FaceGeometry>& multi_face_geometry,
|
||||
int frame_width, //
|
||||
int frame_height, //
|
||||
GLenum src_texture_target, //
|
||||
GLuint src_texture_name, //
|
||||
GLenum dst_texture_target, //
|
||||
GLuint dst_texture_name) = 0;
|
||||
};
|
||||
|
||||
// Creates an instance of `EffectRenderer`.
|
||||
//
|
||||
// `effect_mesh_3d` defines a rigid 3d mesh which is "attached" to the face and
|
||||
// is driven by the face pose transformation matrix. If is not present, the
|
||||
// runtime face mesh will be used as the effect mesh - this mode is handy for
|
||||
// facepaint effects. In both rendering modes, the face mesh is first rendered
|
||||
// as an occluder straight into the depth buffer. This step helps to create a
|
||||
// more believable effect via hiding invisible elements behind the face surface.
|
||||
//
|
||||
// `effect_texture` defines the color texture to be rendered on top of the
|
||||
// effect mesh. Please be aware about the difference between the CPU texture
|
||||
// memory layout and the GPU texture sampler coordinate space. This renderer
|
||||
// follows conventions discussed here: https://open.gl/textures
|
||||
//
|
||||
// Must be called in the same GL context as will be used for rendering.
|
||||
//
|
||||
// Both `environment` and `effect_mesh_3d` (is present) must be valid (for
|
||||
// details, please refer to the proto message definition comments and/or
|
||||
// `validation_utils.h/cc`). Additionally, `effect_mesh_3d`s index buffer
|
||||
// elements must fit into the `uint16` type in order to be renderable.
|
||||
//
|
||||
// `effect_texture` must have positive dimensions. Its format must be either
|
||||
// `SRGB` or `SRGBA`. Its memory must be aligned for GL usage.
|
||||
absl::StatusOr<std::unique_ptr<EffectRenderer>> CreateEffectRenderer(
|
||||
const Environment& environment, //
|
||||
const absl::optional<Mesh3d>& effect_mesh_3d, //
|
||||
ImageFrame&& effect_texture);
|
||||
|
||||
} // namespace mediapipe::face_geometry
|
||||
|
||||
#endif // MEDIAPIPE_MODULES_FACE_GEOMETRY_LIBS_EFFECT_RENDERER_H_
|
466
mediapipe/modules/face_geometry/libs/geometry_pipeline.cc
Normal file
466
mediapipe/modules/face_geometry/libs/geometry_pipeline.cc
Normal file
|
@ -0,0 +1,466 @@
|
|||
// Copyright 2020 The MediaPipe Authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "mediapipe/modules/face_geometry/libs/geometry_pipeline.h"
|
||||
|
||||
#include <cmath>
|
||||
#include <cstdint>
|
||||
#include <memory>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "Eigen/Core"
|
||||
#include "absl/memory/memory.h"
|
||||
#include "mediapipe/framework/formats/landmark.pb.h"
|
||||
#include "mediapipe/framework/formats/matrix.h"
|
||||
#include "mediapipe/framework/formats/matrix_data.pb.h"
|
||||
#include "mediapipe/framework/port/ret_check.h"
|
||||
#include "mediapipe/framework/port/status.h"
|
||||
#include "mediapipe/framework/port/status_macros.h"
|
||||
#include "mediapipe/framework/port/statusor.h"
|
||||
#include "mediapipe/modules/face_geometry/libs/mesh_3d_utils.h"
|
||||
#include "mediapipe/modules/face_geometry/libs/procrustes_solver.h"
|
||||
#include "mediapipe/modules/face_geometry/libs/validation_utils.h"
|
||||
#include "mediapipe/modules/face_geometry/protos/environment.pb.h"
|
||||
#include "mediapipe/modules/face_geometry/protos/face_geometry.pb.h"
|
||||
#include "mediapipe/modules/face_geometry/protos/geometry_pipeline_metadata.pb.h"
|
||||
#include "mediapipe/modules/face_geometry/protos/mesh_3d.pb.h"
|
||||
|
||||
namespace mediapipe::face_geometry {
|
||||
namespace {
|
||||
|
||||
struct PerspectiveCameraFrustum {
|
||||
// NOTE: all arguments must be validated prior to calling this constructor.
|
||||
PerspectiveCameraFrustum(const PerspectiveCamera& perspective_camera,
|
||||
int frame_width, int frame_height) {
|
||||
static constexpr float kDegreesToRadians = 3.14159265358979323846f / 180.f;
|
||||
|
||||
const float height_at_near =
|
||||
2.f * perspective_camera.near() *
|
||||
std::tan(0.5f * kDegreesToRadians *
|
||||
perspective_camera.vertical_fov_degrees());
|
||||
|
||||
const float width_at_near = frame_width * height_at_near / frame_height;
|
||||
|
||||
left = -0.5f * width_at_near;
|
||||
right = 0.5f * width_at_near;
|
||||
bottom = -0.5f * height_at_near;
|
||||
top = 0.5f * height_at_near;
|
||||
near = perspective_camera.near();
|
||||
far = perspective_camera.far();
|
||||
}
|
||||
|
||||
float left;
|
||||
float right;
|
||||
float bottom;
|
||||
float top;
|
||||
float near;
|
||||
float far;
|
||||
};
|
||||
|
||||
class ScreenToMetricSpaceConverter {
|
||||
public:
|
||||
ScreenToMetricSpaceConverter(
|
||||
OriginPointLocation origin_point_location, //
|
||||
InputSource input_source, //
|
||||
Eigen::Matrix3Xf&& canonical_metric_landmarks, //
|
||||
Eigen::VectorXf&& landmark_weights, //
|
||||
std::unique_ptr<ProcrustesSolver> procrustes_solver)
|
||||
: origin_point_location_(origin_point_location),
|
||||
input_source_(input_source),
|
||||
canonical_metric_landmarks_(std::move(canonical_metric_landmarks)),
|
||||
landmark_weights_(std::move(landmark_weights)),
|
||||
procrustes_solver_(std::move(procrustes_solver)) {}
|
||||
|
||||
// Converts `screen_landmark_list` into `metric_landmark_list` and estimates
|
||||
// the `pose_transform_mat`.
|
||||
//
|
||||
// Here's the algorithm summary:
|
||||
//
|
||||
// (1) Project X- and Y- screen landmark coordinates at the Z near plane.
|
||||
//
|
||||
// (2) Estimate a canonical-to-runtime landmark set scale by running the
|
||||
// Procrustes solver using the screen runtime landmarks.
|
||||
//
|
||||
// On this iteration, screen landmarks are used instead of unprojected
|
||||
// metric landmarks as it is not safe to unproject due to the relative
|
||||
// nature of the input screen landmark Z coordinate.
|
||||
//
|
||||
// (3) Use the canonical-to-runtime scale from (2) to unproject the screen
|
||||
// landmarks. The result is referenced as "intermediate landmarks" because
|
||||
// they are the first estimation of the resuling metric landmarks, but are
|
||||
// not quite there yet.
|
||||
//
|
||||
// (4) Estimate a canonical-to-runtime landmark set scale by running the
|
||||
// Procrustes solver using the intermediate runtime landmarks.
|
||||
//
|
||||
// (5) Use the product of the scale factors from (2) and (4) to unproject
|
||||
// the screen landmarks the second time. This is the second and the final
|
||||
// estimation of the metric landmarks.
|
||||
//
|
||||
// (6) Multiply each of the metric landmarks by the inverse pose
|
||||
// transformation matrix to align the runtime metric face landmarks with
|
||||
// the canonical metric face landmarks.
|
||||
//
|
||||
// Note: the input screen landmarks are in the left-handed coordinate system,
|
||||
// however any metric landmarks - including the canonical metric
|
||||
// landmarks, the final runtime metric landmarks and any intermediate
|
||||
// runtime metric landmarks - are in the right-handed coordinate system.
|
||||
//
|
||||
// To keep the logic correct, the landmark set handedness is changed any
|
||||
// time the screen-to-metric semantic barrier is passed.
|
||||
absl::Status Convert(const NormalizedLandmarkList& screen_landmark_list, //
|
||||
const PerspectiveCameraFrustum& pcf, //
|
||||
LandmarkList& metric_landmark_list, //
|
||||
Eigen::Matrix4f& pose_transform_mat) const {
|
||||
RET_CHECK_EQ(screen_landmark_list.landmark_size(),
|
||||
canonical_metric_landmarks_.cols())
|
||||
<< "The number of landmarks doesn't match the number passed upon "
|
||||
"initialization!";
|
||||
|
||||
Eigen::Matrix3Xf screen_landmarks;
|
||||
ConvertLandmarkListToEigenMatrix(screen_landmark_list, screen_landmarks);
|
||||
|
||||
ProjectXY(pcf, screen_landmarks);
|
||||
const float depth_offset = screen_landmarks.row(2).mean();
|
||||
|
||||
// 1st iteration: don't unproject XY because it's unsafe to do so due to
|
||||
// the relative nature of the Z coordinate. Instead, run the
|
||||
// first estimation on the projected XY and use that scale to
|
||||
// unproject for the 2nd iteration.
|
||||
Eigen::Matrix3Xf intermediate_landmarks(screen_landmarks);
|
||||
ChangeHandedness(intermediate_landmarks);
|
||||
|
||||
ASSIGN_OR_RETURN(const float first_iteration_scale,
|
||||
EstimateScale(intermediate_landmarks),
|
||||
_ << "Failed to estimate first iteration scale!");
|
||||
|
||||
// 2nd iteration: unproject XY using the scale from the 1st iteration.
|
||||
intermediate_landmarks = screen_landmarks;
|
||||
MoveAndRescaleZ(pcf, depth_offset, first_iteration_scale,
|
||||
intermediate_landmarks);
|
||||
UnprojectXY(pcf, intermediate_landmarks);
|
||||
ChangeHandedness(intermediate_landmarks);
|
||||
|
||||
// For face detection input landmarks, re-write Z-coord from the canonical
|
||||
// landmarks.
|
||||
if (input_source_ == InputSource::FACE_DETECTION_PIPELINE) {
|
||||
Eigen::Matrix4f intermediate_pose_transform_mat;
|
||||
MP_RETURN_IF_ERROR(procrustes_solver_->SolveWeightedOrthogonalProblem(
|
||||
canonical_metric_landmarks_, intermediate_landmarks,
|
||||
landmark_weights_, intermediate_pose_transform_mat))
|
||||
<< "Failed to estimate pose transform matrix!";
|
||||
|
||||
intermediate_landmarks.row(2) =
|
||||
(intermediate_pose_transform_mat *
|
||||
canonical_metric_landmarks_.colwise().homogeneous())
|
||||
.row(2);
|
||||
}
|
||||
ASSIGN_OR_RETURN(const float second_iteration_scale,
|
||||
EstimateScale(intermediate_landmarks),
|
||||
_ << "Failed to estimate second iteration scale!");
|
||||
|
||||
// Use the total scale to unproject the screen landmarks.
|
||||
const float total_scale = first_iteration_scale * second_iteration_scale;
|
||||
MoveAndRescaleZ(pcf, depth_offset, total_scale, screen_landmarks);
|
||||
UnprojectXY(pcf, screen_landmarks);
|
||||
ChangeHandedness(screen_landmarks);
|
||||
|
||||
// At this point, screen landmarks are converted into metric landmarks.
|
||||
Eigen::Matrix3Xf& metric_landmarks = screen_landmarks;
|
||||
|
||||
MP_RETURN_IF_ERROR(procrustes_solver_->SolveWeightedOrthogonalProblem(
|
||||
canonical_metric_landmarks_, metric_landmarks, landmark_weights_,
|
||||
pose_transform_mat))
|
||||
<< "Failed to estimate pose transform matrix!";
|
||||
|
||||
// For face detection input landmarks, re-write Z-coord from the canonical
|
||||
// landmarks and run the pose transform estimation again.
|
||||
if (input_source_ == InputSource::FACE_DETECTION_PIPELINE) {
|
||||
metric_landmarks.row(2) =
|
||||
(pose_transform_mat *
|
||||
canonical_metric_landmarks_.colwise().homogeneous())
|
||||
.row(2);
|
||||
|
||||
MP_RETURN_IF_ERROR(procrustes_solver_->SolveWeightedOrthogonalProblem(
|
||||
canonical_metric_landmarks_, metric_landmarks, landmark_weights_,
|
||||
pose_transform_mat))
|
||||
<< "Failed to estimate pose transform matrix!";
|
||||
}
|
||||
|
||||
// Multiply each of the metric landmarks by the inverse pose
|
||||
// transformation matrix to align the runtime metric face landmarks with
|
||||
// the canonical metric face landmarks.
|
||||
metric_landmarks = (pose_transform_mat.inverse() *
|
||||
metric_landmarks.colwise().homogeneous())
|
||||
.topRows(3);
|
||||
|
||||
ConvertEigenMatrixToLandmarkList(metric_landmarks, metric_landmark_list);
|
||||
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
private:
|
||||
void ProjectXY(const PerspectiveCameraFrustum& pcf,
|
||||
Eigen::Matrix3Xf& landmarks) const {
|
||||
float x_scale = pcf.right - pcf.left;
|
||||
float y_scale = pcf.top - pcf.bottom;
|
||||
float x_translation = pcf.left;
|
||||
float y_translation = pcf.bottom;
|
||||
|
||||
if (origin_point_location_ == OriginPointLocation::TOP_LEFT_CORNER) {
|
||||
landmarks.row(1) = 1.f - landmarks.row(1).array();
|
||||
}
|
||||
|
||||
landmarks =
|
||||
landmarks.array().colwise() * Eigen::Array3f(x_scale, y_scale, x_scale);
|
||||
landmarks.colwise() += Eigen::Vector3f(x_translation, y_translation, 0.f);
|
||||
}
|
||||
|
||||
absl::StatusOr<float> EstimateScale(Eigen::Matrix3Xf& landmarks) const {
|
||||
Eigen::Matrix4f transform_mat;
|
||||
MP_RETURN_IF_ERROR(procrustes_solver_->SolveWeightedOrthogonalProblem(
|
||||
canonical_metric_landmarks_, landmarks, landmark_weights_,
|
||||
transform_mat))
|
||||
<< "Failed to estimate canonical-to-runtime landmark set transform!";
|
||||
|
||||
return transform_mat.col(0).norm();
|
||||
}
|
||||
|
||||
static void MoveAndRescaleZ(const PerspectiveCameraFrustum& pcf,
|
||||
float depth_offset, float scale,
|
||||
Eigen::Matrix3Xf& landmarks) {
|
||||
landmarks.row(2) =
|
||||
(landmarks.array().row(2) - depth_offset + pcf.near) / scale;
|
||||
}
|
||||
|
||||
static void UnprojectXY(const PerspectiveCameraFrustum& pcf,
|
||||
Eigen::Matrix3Xf& landmarks) {
|
||||
landmarks.row(0) =
|
||||
landmarks.row(0).cwiseProduct(landmarks.row(2)) / pcf.near;
|
||||
landmarks.row(1) =
|
||||
landmarks.row(1).cwiseProduct(landmarks.row(2)) / pcf.near;
|
||||
}
|
||||
|
||||
static void ChangeHandedness(Eigen::Matrix3Xf& landmarks) {
|
||||
landmarks.row(2) *= -1.f;
|
||||
}
|
||||
|
||||
static void ConvertLandmarkListToEigenMatrix(
|
||||
const NormalizedLandmarkList& landmark_list,
|
||||
Eigen::Matrix3Xf& eigen_matrix) {
|
||||
eigen_matrix = Eigen::Matrix3Xf(3, landmark_list.landmark_size());
|
||||
for (int i = 0; i < landmark_list.landmark_size(); ++i) {
|
||||
const auto& landmark = landmark_list.landmark(i);
|
||||
eigen_matrix(0, i) = landmark.x();
|
||||
eigen_matrix(1, i) = landmark.y();
|
||||
eigen_matrix(2, i) = landmark.z();
|
||||
}
|
||||
}
|
||||
|
||||
static void ConvertEigenMatrixToLandmarkList(
|
||||
const Eigen::Matrix3Xf& eigen_matrix, LandmarkList& landmark_list) {
|
||||
landmark_list.Clear();
|
||||
|
||||
for (int i = 0; i < eigen_matrix.cols(); ++i) {
|
||||
auto& landmark = *landmark_list.add_landmark();
|
||||
landmark.set_x(eigen_matrix(0, i));
|
||||
landmark.set_y(eigen_matrix(1, i));
|
||||
landmark.set_z(eigen_matrix(2, i));
|
||||
}
|
||||
}
|
||||
|
||||
const OriginPointLocation origin_point_location_;
|
||||
const InputSource input_source_;
|
||||
Eigen::Matrix3Xf canonical_metric_landmarks_;
|
||||
Eigen::VectorXf landmark_weights_;
|
||||
|
||||
std::unique_ptr<ProcrustesSolver> procrustes_solver_;
|
||||
};
|
||||
|
||||
class GeometryPipelineImpl : public GeometryPipeline {
|
||||
public:
|
||||
GeometryPipelineImpl(
|
||||
const PerspectiveCamera& perspective_camera, //
|
||||
const Mesh3d& canonical_mesh, //
|
||||
uint32_t canonical_mesh_vertex_size, //
|
||||
uint32_t canonical_mesh_num_vertices,
|
||||
uint32_t canonical_mesh_vertex_position_offset,
|
||||
std::unique_ptr<ScreenToMetricSpaceConverter> space_converter)
|
||||
: perspective_camera_(perspective_camera),
|
||||
canonical_mesh_(canonical_mesh),
|
||||
canonical_mesh_vertex_size_(canonical_mesh_vertex_size),
|
||||
canonical_mesh_num_vertices_(canonical_mesh_num_vertices),
|
||||
canonical_mesh_vertex_position_offset_(
|
||||
canonical_mesh_vertex_position_offset),
|
||||
space_converter_(std::move(space_converter)) {}
|
||||
|
||||
absl::StatusOr<std::vector<FaceGeometry>> EstimateFaceGeometry(
|
||||
const std::vector<NormalizedLandmarkList>& multi_face_landmarks,
|
||||
int frame_width, int frame_height) const override {
|
||||
MP_RETURN_IF_ERROR(ValidateFrameDimensions(frame_width, frame_height))
|
||||
<< "Invalid frame dimensions!";
|
||||
|
||||
// Create a perspective camera frustum to be shared for geometry estimation
|
||||
// per each face.
|
||||
PerspectiveCameraFrustum pcf(perspective_camera_, frame_width,
|
||||
frame_height);
|
||||
|
||||
std::vector<FaceGeometry> multi_face_geometry;
|
||||
|
||||
// From this point, the meaning of "face landmarks" is clarified further as
|
||||
// "screen face landmarks". This is done do distinguish from "metric face
|
||||
// landmarks" that are derived during the face geometry estimation process.
|
||||
for (const NormalizedLandmarkList& screen_face_landmarks :
|
||||
multi_face_landmarks) {
|
||||
// Having a too compact screen landmark list will result in numerical
|
||||
// instabilities, therefore such faces are filtered.
|
||||
if (IsScreenLandmarkListTooCompact(screen_face_landmarks)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Convert the screen landmarks into the metric landmarks and get the pose
|
||||
// transformation matrix.
|
||||
LandmarkList metric_face_landmarks;
|
||||
Eigen::Matrix4f pose_transform_mat;
|
||||
MP_RETURN_IF_ERROR(space_converter_->Convert(screen_face_landmarks, pcf,
|
||||
metric_face_landmarks,
|
||||
pose_transform_mat))
|
||||
<< "Failed to convert landmarks from the screen to the metric space!";
|
||||
|
||||
// Pack geometry data for this face.
|
||||
FaceGeometry face_geometry;
|
||||
Mesh3d* mutable_mesh = face_geometry.mutable_mesh();
|
||||
// Copy the canonical face mesh as the face geometry mesh.
|
||||
mutable_mesh->CopyFrom(canonical_mesh_);
|
||||
// Replace XYZ vertex mesh coodinates with the metric landmark positions.
|
||||
for (int i = 0; i < canonical_mesh_num_vertices_; ++i) {
|
||||
uint32_t vertex_buffer_offset = canonical_mesh_vertex_size_ * i +
|
||||
canonical_mesh_vertex_position_offset_;
|
||||
|
||||
mutable_mesh->set_vertex_buffer(vertex_buffer_offset,
|
||||
metric_face_landmarks.landmark(i).x());
|
||||
mutable_mesh->set_vertex_buffer(vertex_buffer_offset + 1,
|
||||
metric_face_landmarks.landmark(i).y());
|
||||
mutable_mesh->set_vertex_buffer(vertex_buffer_offset + 2,
|
||||
metric_face_landmarks.landmark(i).z());
|
||||
}
|
||||
// Populate the face pose transformation matrix.
|
||||
mediapipe::MatrixDataProtoFromMatrix(
|
||||
pose_transform_mat, face_geometry.mutable_pose_transform_matrix());
|
||||
|
||||
multi_face_geometry.push_back(face_geometry);
|
||||
}
|
||||
|
||||
return multi_face_geometry;
|
||||
}
|
||||
|
||||
private:
|
||||
static bool IsScreenLandmarkListTooCompact(
|
||||
const NormalizedLandmarkList& screen_landmarks) {
|
||||
float mean_x = 0.f;
|
||||
float mean_y = 0.f;
|
||||
for (int i = 0; i < screen_landmarks.landmark_size(); ++i) {
|
||||
const auto& landmark = screen_landmarks.landmark(i);
|
||||
mean_x += (landmark.x() - mean_x) / static_cast<float>(i + 1);
|
||||
mean_y += (landmark.y() - mean_y) / static_cast<float>(i + 1);
|
||||
}
|
||||
|
||||
float max_sq_dist = 0.f;
|
||||
for (const auto& landmark : screen_landmarks.landmark()) {
|
||||
const float d_x = landmark.x() - mean_x;
|
||||
const float d_y = landmark.y() - mean_y;
|
||||
max_sq_dist = std::max(max_sq_dist, d_x * d_x + d_y * d_y);
|
||||
}
|
||||
|
||||
static constexpr float kIsScreenLandmarkListTooCompactThreshold = 1e-3f;
|
||||
return std::sqrt(max_sq_dist) <= kIsScreenLandmarkListTooCompactThreshold;
|
||||
}
|
||||
|
||||
const PerspectiveCamera perspective_camera_;
|
||||
const Mesh3d canonical_mesh_;
|
||||
const uint32_t canonical_mesh_vertex_size_;
|
||||
const uint32_t canonical_mesh_num_vertices_;
|
||||
const uint32_t canonical_mesh_vertex_position_offset_;
|
||||
|
||||
std::unique_ptr<ScreenToMetricSpaceConverter> space_converter_;
|
||||
};
|
||||
|
||||
} // namespace
|
||||
|
||||
absl::StatusOr<std::unique_ptr<GeometryPipeline>> CreateGeometryPipeline(
|
||||
const Environment& environment, const GeometryPipelineMetadata& metadata) {
|
||||
MP_RETURN_IF_ERROR(ValidateEnvironment(environment))
|
||||
<< "Invalid environment!";
|
||||
MP_RETURN_IF_ERROR(ValidateGeometryPipelineMetadata(metadata))
|
||||
<< "Invalid geometry pipeline metadata!";
|
||||
|
||||
const auto& canonical_mesh = metadata.canonical_mesh();
|
||||
RET_CHECK(HasVertexComponent(canonical_mesh.vertex_type(),
|
||||
VertexComponent::POSITION))
|
||||
<< "Canonical face mesh must have the `POSITION` vertex component!";
|
||||
RET_CHECK(HasVertexComponent(canonical_mesh.vertex_type(),
|
||||
VertexComponent::TEX_COORD))
|
||||
<< "Canonical face mesh must have the `TEX_COORD` vertex component!";
|
||||
|
||||
uint32_t canonical_mesh_vertex_size =
|
||||
GetVertexSize(canonical_mesh.vertex_type());
|
||||
uint32_t canonical_mesh_num_vertices =
|
||||
canonical_mesh.vertex_buffer_size() / canonical_mesh_vertex_size;
|
||||
uint32_t canonical_mesh_vertex_position_offset =
|
||||
GetVertexComponentOffset(canonical_mesh.vertex_type(),
|
||||
VertexComponent::POSITION)
|
||||
.value();
|
||||
|
||||
// Put the Procrustes landmark basis into Eigen matrices for an easier access.
|
||||
Eigen::Matrix3Xf canonical_metric_landmarks =
|
||||
Eigen::Matrix3Xf::Zero(3, canonical_mesh_num_vertices);
|
||||
Eigen::VectorXf landmark_weights =
|
||||
Eigen::VectorXf::Zero(canonical_mesh_num_vertices);
|
||||
|
||||
for (int i = 0; i < canonical_mesh_num_vertices; ++i) {
|
||||
uint32_t vertex_buffer_offset =
|
||||
canonical_mesh_vertex_size * i + canonical_mesh_vertex_position_offset;
|
||||
|
||||
canonical_metric_landmarks(0, i) =
|
||||
canonical_mesh.vertex_buffer(vertex_buffer_offset);
|
||||
canonical_metric_landmarks(1, i) =
|
||||
canonical_mesh.vertex_buffer(vertex_buffer_offset + 1);
|
||||
canonical_metric_landmarks(2, i) =
|
||||
canonical_mesh.vertex_buffer(vertex_buffer_offset + 2);
|
||||
}
|
||||
|
||||
for (const WeightedLandmarkRef& wlr : metadata.procrustes_landmark_basis()) {
|
||||
uint32_t landmark_id = wlr.landmark_id();
|
||||
landmark_weights(landmark_id) = wlr.weight();
|
||||
}
|
||||
|
||||
std::unique_ptr<GeometryPipeline> result =
|
||||
absl::make_unique<GeometryPipelineImpl>(
|
||||
environment.perspective_camera(), canonical_mesh,
|
||||
canonical_mesh_vertex_size, canonical_mesh_num_vertices,
|
||||
canonical_mesh_vertex_position_offset,
|
||||
absl::make_unique<ScreenToMetricSpaceConverter>(
|
||||
environment.origin_point_location(),
|
||||
metadata.input_source() == InputSource::DEFAULT
|
||||
? InputSource::FACE_LANDMARK_PIPELINE
|
||||
: metadata.input_source(),
|
||||
std::move(canonical_metric_landmarks),
|
||||
std::move(landmark_weights),
|
||||
CreateFloatPrecisionProcrustesSolver()));
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
} // namespace mediapipe::face_geometry
|
67
mediapipe/modules/face_geometry/libs/geometry_pipeline.h
Normal file
67
mediapipe/modules/face_geometry/libs/geometry_pipeline.h
Normal file
|
@ -0,0 +1,67 @@
|
|||
// Copyright 2020 The MediaPipe Authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#ifndef MEDIAPIPE_FACE_GEOMETRY_LIBS_GEOMETRY_PIPELINE_H_
|
||||
#define MEDIAPIPE_FACE_GEOMETRY_LIBS_GEOMETRY_PIPELINE_H_
|
||||
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
|
||||
#include "mediapipe/framework/formats/landmark.pb.h"
|
||||
#include "mediapipe/framework/port/statusor.h"
|
||||
#include "mediapipe/modules/face_geometry/protos/environment.pb.h"
|
||||
#include "mediapipe/modules/face_geometry/protos/face_geometry.pb.h"
|
||||
#include "mediapipe/modules/face_geometry/protos/geometry_pipeline_metadata.pb.h"
|
||||
|
||||
namespace mediapipe::face_geometry {
|
||||
|
||||
// Encapsulates a stateless estimator of facial geometry in a Metric space based
|
||||
// on the normalized face landmarks in the Screen space.
|
||||
class GeometryPipeline {
|
||||
public:
|
||||
virtual ~GeometryPipeline() = default;
|
||||
|
||||
// Estimates geometry data for multiple faces.
|
||||
//
|
||||
// Returns an error status if any of the passed arguments is invalid.
|
||||
//
|
||||
// The result includes face geometry data for a subset of the input faces,
|
||||
// however geometry data for some faces might be missing. This may happen if
|
||||
// it'd be unstable to estimate the facial geometry based on a corresponding
|
||||
// face landmark list for any reason (for example, if the landmark list is too
|
||||
// compact).
|
||||
//
|
||||
// Each face landmark list must have the same number of landmarks as was
|
||||
// passed upon initialization via the canonical face mesh (as a part of the
|
||||
// geometry pipeline metadata).
|
||||
//
|
||||
// Both `frame_width` and `frame_height` must be positive.
|
||||
virtual absl::StatusOr<std::vector<FaceGeometry>> EstimateFaceGeometry(
|
||||
const std::vector<NormalizedLandmarkList>& multi_face_landmarks,
|
||||
int frame_width, int frame_height) const = 0;
|
||||
};
|
||||
|
||||
// Creates an instance of `GeometryPipeline`.
|
||||
//
|
||||
// Both `environment` and `metadata` must be valid (for details, please refer to
|
||||
// the proto message definition comments and/or `validation_utils.h/cc`).
|
||||
//
|
||||
// Canonical face mesh (defined as a part of `metadata`) must have the
|
||||
// `POSITION` and the `TEX_COORD` vertex components.
|
||||
absl::StatusOr<std::unique_ptr<GeometryPipeline>> CreateGeometryPipeline(
|
||||
const Environment& environment, const GeometryPipelineMetadata& metadata);
|
||||
|
||||
} // namespace mediapipe::face_geometry
|
||||
|
||||
#endif // MEDIAPIPE_FACE_GEOMETRY_LIBS_GEOMETRY_PIPELINE_H_
|
103
mediapipe/modules/face_geometry/libs/mesh_3d_utils.cc
Normal file
103
mediapipe/modules/face_geometry/libs/mesh_3d_utils.cc
Normal file
|
@ -0,0 +1,103 @@
|
|||
// Copyright 2020 The MediaPipe Authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "mediapipe/modules/face_geometry/libs/mesh_3d_utils.h"
|
||||
|
||||
#include <cstdint>
|
||||
#include <cstdlib>
|
||||
|
||||
#include "mediapipe/framework/port/ret_check.h"
|
||||
#include "mediapipe/framework/port/statusor.h"
|
||||
#include "mediapipe/modules/face_geometry/protos/mesh_3d.pb.h"
|
||||
|
||||
namespace mediapipe::face_geometry {
|
||||
namespace {
|
||||
|
||||
bool HasVertexComponentVertexPT(VertexComponent vertex_component) {
|
||||
switch (vertex_component) {
|
||||
case VertexComponent::POSITION:
|
||||
case VertexComponent::TEX_COORD:
|
||||
return true;
|
||||
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
uint32_t GetVertexComponentSizeVertexPT(VertexComponent vertex_component) {
|
||||
switch (vertex_component) {
|
||||
case VertexComponent::POSITION:
|
||||
return 3;
|
||||
case VertexComponent::TEX_COORD:
|
||||
return 2;
|
||||
}
|
||||
}
|
||||
|
||||
uint32_t GetVertexComponentOffsetVertexPT(VertexComponent vertex_component) {
|
||||
switch (vertex_component) {
|
||||
case VertexComponent::POSITION:
|
||||
return 0;
|
||||
case VertexComponent::TEX_COORD:
|
||||
return GetVertexComponentSizeVertexPT(VertexComponent::POSITION);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
std::size_t GetVertexSize(Mesh3d::VertexType vertex_type) {
|
||||
switch (vertex_type) {
|
||||
case Mesh3d::VERTEX_PT:
|
||||
return GetVertexComponentSizeVertexPT(VertexComponent::POSITION) +
|
||||
GetVertexComponentSizeVertexPT(VertexComponent::TEX_COORD);
|
||||
}
|
||||
}
|
||||
|
||||
std::size_t GetPrimitiveSize(Mesh3d::PrimitiveType primitive_type) {
|
||||
switch (primitive_type) {
|
||||
case Mesh3d::TRIANGLE:
|
||||
return 3;
|
||||
}
|
||||
}
|
||||
|
||||
bool HasVertexComponent(Mesh3d::VertexType vertex_type,
|
||||
VertexComponent vertex_component) {
|
||||
switch (vertex_type) {
|
||||
case Mesh3d::VERTEX_PT:
|
||||
return HasVertexComponentVertexPT(vertex_component);
|
||||
}
|
||||
}
|
||||
|
||||
absl::StatusOr<uint32_t> GetVertexComponentOffset(
|
||||
Mesh3d::VertexType vertex_type, VertexComponent vertex_component) {
|
||||
RET_CHECK(HasVertexComponentVertexPT(vertex_component))
|
||||
<< "A given vertex type doesn't have the requested component!";
|
||||
|
||||
switch (vertex_type) {
|
||||
case Mesh3d::VERTEX_PT:
|
||||
return GetVertexComponentOffsetVertexPT(vertex_component);
|
||||
}
|
||||
}
|
||||
|
||||
absl::StatusOr<uint32_t> GetVertexComponentSize(
|
||||
Mesh3d::VertexType vertex_type, VertexComponent vertex_component) {
|
||||
RET_CHECK(HasVertexComponentVertexPT(vertex_component))
|
||||
<< "A given vertex type doesn't have the requested component!";
|
||||
|
||||
switch (vertex_type) {
|
||||
case Mesh3d::VERTEX_PT:
|
||||
return GetVertexComponentSizeVertexPT(vertex_component);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace mediapipe::face_geometry
|
51
mediapipe/modules/face_geometry/libs/mesh_3d_utils.h
Normal file
51
mediapipe/modules/face_geometry/libs/mesh_3d_utils.h
Normal file
|
@ -0,0 +1,51 @@
|
|||
// Copyright 2020 The MediaPipe Authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#ifndef MEDIAPIPE_FACE_GEOMETRY_LIBS_MESH_3D_UTILS_H_
|
||||
#define MEDIAPIPE_FACE_GEOMETRY_LIBS_MESH_3D_UTILS_H_
|
||||
|
||||
#include <cstdint>
|
||||
#include <cstdlib>
|
||||
|
||||
#include "mediapipe/framework/port/statusor.h"
|
||||
#include "mediapipe/modules/face_geometry/protos/mesh_3d.pb.h"
|
||||
|
||||
namespace mediapipe::face_geometry {
|
||||
|
||||
enum class VertexComponent { POSITION, TEX_COORD };
|
||||
|
||||
std::size_t GetVertexSize(Mesh3d::VertexType vertex_type);
|
||||
|
||||
std::size_t GetPrimitiveSize(Mesh3d::PrimitiveType primitive_type);
|
||||
|
||||
bool HasVertexComponent(Mesh3d::VertexType vertex_type,
|
||||
VertexComponent vertex_component);
|
||||
|
||||
// Computes the vertex component offset.
|
||||
//
|
||||
// Returns an error status if a given vertex type doesn't have the requested
|
||||
// component.
|
||||
absl::StatusOr<uint32_t> GetVertexComponentOffset(
|
||||
Mesh3d::VertexType vertex_type, VertexComponent vertex_component);
|
||||
|
||||
// Computes the vertex component size.
|
||||
//
|
||||
// Returns an error status if a given vertex type doesn't have the requested
|
||||
// component.
|
||||
absl::StatusOr<uint32_t> GetVertexComponentSize(
|
||||
Mesh3d::VertexType vertex_type, VertexComponent vertex_component);
|
||||
|
||||
} // namespace mediapipe::face_geometry
|
||||
|
||||
#endif // MEDIAPIPE_FACE_GEOMETRY_LIBS_MESH_3D_UTILS_H_
|
266
mediapipe/modules/face_geometry/libs/procrustes_solver.cc
Normal file
266
mediapipe/modules/face_geometry/libs/procrustes_solver.cc
Normal file
|
@ -0,0 +1,266 @@
|
|||
// Copyright 2020 The MediaPipe Authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "mediapipe/modules/face_geometry/libs/procrustes_solver.h"
|
||||
|
||||
#include <cmath>
|
||||
#include <memory>
|
||||
|
||||
#include "Eigen/Dense"
|
||||
#include "absl/memory/memory.h"
|
||||
#include "mediapipe/framework/port/ret_check.h"
|
||||
#include "mediapipe/framework/port/status.h"
|
||||
#include "mediapipe/framework/port/status_macros.h"
|
||||
#include "mediapipe/framework/port/statusor.h"
|
||||
|
||||
namespace mediapipe {
|
||||
namespace face_geometry {
|
||||
namespace {
|
||||
|
||||
class FloatPrecisionProcrustesSolver : public ProcrustesSolver {
|
||||
public:
|
||||
FloatPrecisionProcrustesSolver() = default;
|
||||
|
||||
absl::Status SolveWeightedOrthogonalProblem(
|
||||
const Eigen::Matrix3Xf& source_points, //
|
||||
const Eigen::Matrix3Xf& target_points, //
|
||||
const Eigen::VectorXf& point_weights,
|
||||
Eigen::Matrix4f& transform_mat) const override {
|
||||
// Validate inputs.
|
||||
MP_RETURN_IF_ERROR(ValidateInputPoints(source_points, target_points))
|
||||
<< "Failed to validate weighted orthogonal problem input points!";
|
||||
MP_RETURN_IF_ERROR(
|
||||
ValidatePointWeights(source_points.cols(), point_weights))
|
||||
<< "Failed to validate weighted orthogonal problem point weights!";
|
||||
|
||||
// Extract square root from the point weights.
|
||||
Eigen::VectorXf sqrt_weights = ExtractSquareRoot(point_weights);
|
||||
|
||||
// Try to solve the WEOP problem.
|
||||
MP_RETURN_IF_ERROR(InternalSolveWeightedOrthogonalProblem(
|
||||
source_points, target_points, sqrt_weights, transform_mat))
|
||||
<< "Failed to solve the WEOP problem!";
|
||||
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
private:
|
||||
static constexpr float kAbsoluteErrorEps = 1e-9f;
|
||||
|
||||
static absl::Status ValidateInputPoints(
|
||||
const Eigen::Matrix3Xf& source_points,
|
||||
const Eigen::Matrix3Xf& target_points) {
|
||||
RET_CHECK_GT(source_points.cols(), 0)
|
||||
<< "The number of source points must be positive!";
|
||||
|
||||
RET_CHECK_EQ(source_points.cols(), target_points.cols())
|
||||
<< "The number of source and target points must be equal!";
|
||||
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
static absl::Status ValidatePointWeights(
|
||||
int num_points, const Eigen::VectorXf& point_weights) {
|
||||
RET_CHECK_GT(point_weights.size(), 0)
|
||||
<< "The number of point weights must be positive!";
|
||||
|
||||
RET_CHECK_EQ(point_weights.size(), num_points)
|
||||
<< "The number of points and point weights must be equal!";
|
||||
|
||||
float total_weight = 0.f;
|
||||
for (int i = 0; i < num_points; ++i) {
|
||||
RET_CHECK_GE(point_weights(i), 0.f)
|
||||
<< "Each point weight must be non-negative!";
|
||||
|
||||
total_weight += point_weights(i);
|
||||
}
|
||||
|
||||
RET_CHECK_GT(total_weight, kAbsoluteErrorEps)
|
||||
<< "The total point weight is too small!";
|
||||
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
static Eigen::VectorXf ExtractSquareRoot(
|
||||
const Eigen::VectorXf& point_weights) {
|
||||
Eigen::VectorXf sqrt_weights(point_weights);
|
||||
for (int i = 0; i < sqrt_weights.size(); ++i) {
|
||||
sqrt_weights(i) = std::sqrt(sqrt_weights(i));
|
||||
}
|
||||
|
||||
return sqrt_weights;
|
||||
}
|
||||
|
||||
// Combines a 3x3 rotation-and-scale matrix and a 3x1 translation vector into
|
||||
// a single 4x4 transformation matrix.
|
||||
static Eigen::Matrix4f CombineTransformMatrix(const Eigen::Matrix3f& r_and_s,
|
||||
const Eigen::Vector3f& t) {
|
||||
Eigen::Matrix4f result = Eigen::Matrix4f::Identity();
|
||||
result.leftCols(3).topRows(3) = r_and_s;
|
||||
result.col(3).topRows(3) = t;
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
// The weighted problem is thoroughly addressed in Section 2.4 of:
|
||||
// D. Akca, Generalized Procrustes analysis and its applications
|
||||
// in photogrammetry, 2003, https://doi.org/10.3929/ethz-a-004656648
|
||||
//
|
||||
// Notable differences in the code presented here are:
|
||||
//
|
||||
// * In the paper, the weights matrix W_p is Cholesky-decomposed as Q^T Q.
|
||||
// Our W_p is diagonal (equal to diag(sqrt_weights^2)),
|
||||
// so we can just set Q = diag(sqrt_weights) instead.
|
||||
//
|
||||
// * In the paper, the problem is presented as
|
||||
// (for W_k = I and W_p = tranposed(Q) Q):
|
||||
// || Q (c A T + j tranposed(t) - B) || -> min.
|
||||
//
|
||||
// We reformulate it as an equivalent minimization of the transpose's
|
||||
// norm:
|
||||
// || (c tranposed(T) tranposed(A) - tranposed(B)) tranposed(Q) || -> min,
|
||||
// where tranposed(A) and tranposed(B) are the source and the target point
|
||||
// clouds, respectively, c tranposed(T) is the rotation+scaling R sought
|
||||
// for, and Q is diag(sqrt_weights).
|
||||
//
|
||||
// Most of the derivations are therefore transposed.
|
||||
//
|
||||
// Note: the output `transform_mat` argument is used instead of `StatusOr<>`
|
||||
// return type in order to avoid Eigen memory alignment issues. Details:
|
||||
// https://eigen.tuxfamily.org/dox/group__TopicStructHavingEigenMembers.html
|
||||
static absl::Status InternalSolveWeightedOrthogonalProblem(
|
||||
const Eigen::Matrix3Xf& sources, const Eigen::Matrix3Xf& targets,
|
||||
const Eigen::VectorXf& sqrt_weights, Eigen::Matrix4f& transform_mat) {
|
||||
// tranposed(A_w).
|
||||
Eigen::Matrix3Xf weighted_sources =
|
||||
sources.array().rowwise() * sqrt_weights.array().transpose();
|
||||
// tranposed(B_w).
|
||||
Eigen::Matrix3Xf weighted_targets =
|
||||
targets.array().rowwise() * sqrt_weights.array().transpose();
|
||||
|
||||
// w = tranposed(j_w) j_w.
|
||||
float total_weight = sqrt_weights.cwiseProduct(sqrt_weights).sum();
|
||||
|
||||
// Let C = (j_w tranposed(j_w)) / (tranposed(j_w) j_w).
|
||||
// Note that C = tranposed(C), hence (I - C) = tranposed(I - C).
|
||||
//
|
||||
// tranposed(A_w) C = tranposed(A_w) j_w tranposed(j_w) / w =
|
||||
// (tranposed(A_w) j_w) tranposed(j_w) / w = c_w tranposed(j_w),
|
||||
//
|
||||
// where c_w = tranposed(A_w) j_w / w is a k x 1 vector calculated here:
|
||||
Eigen::Matrix3Xf twice_weighted_sources =
|
||||
weighted_sources.array().rowwise() * sqrt_weights.array().transpose();
|
||||
Eigen::Vector3f source_center_of_mass =
|
||||
twice_weighted_sources.rowwise().sum() / total_weight;
|
||||
// tranposed((I - C) A_w) = tranposed(A_w) (I - C) =
|
||||
// tranposed(A_w) - tranposed(A_w) C = tranposed(A_w) - c_w tranposed(j_w).
|
||||
Eigen::Matrix3Xf centered_weighted_sources =
|
||||
weighted_sources - source_center_of_mass * sqrt_weights.transpose();
|
||||
|
||||
Eigen::Matrix3f rotation;
|
||||
MP_RETURN_IF_ERROR(ComputeOptimalRotation(
|
||||
weighted_targets * centered_weighted_sources.transpose(), rotation))
|
||||
<< "Failed to compute the optimal rotation!";
|
||||
ASSIGN_OR_RETURN(
|
||||
float scale,
|
||||
ComputeOptimalScale(centered_weighted_sources, weighted_sources,
|
||||
weighted_targets, rotation),
|
||||
_ << "Failed to compute the optimal scale!");
|
||||
|
||||
// R = c tranposed(T).
|
||||
Eigen::Matrix3f rotation_and_scale = scale * rotation;
|
||||
|
||||
// Compute optimal translation for the weighted problem.
|
||||
|
||||
// tranposed(B_w - c A_w T) = tranposed(B_w) - R tranposed(A_w) in (54).
|
||||
const auto pointwise_diffs =
|
||||
weighted_targets - rotation_and_scale * weighted_sources;
|
||||
// Multiplication by j_w is a respectively weighted column sum.
|
||||
// (54) from the paper.
|
||||
const auto weighted_pointwise_diffs =
|
||||
pointwise_diffs.array().rowwise() * sqrt_weights.array().transpose();
|
||||
Eigen::Vector3f translation =
|
||||
weighted_pointwise_diffs.rowwise().sum() / total_weight;
|
||||
|
||||
transform_mat = CombineTransformMatrix(rotation_and_scale, translation);
|
||||
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
// `design_matrix` is a transposed LHS of (51) in the paper.
|
||||
//
|
||||
// Note: the output `rotation` argument is used instead of `StatusOr<>`
|
||||
// return type in order to avoid Eigen memory alignment issues. Details:
|
||||
// https://eigen.tuxfamily.org/dox/group__TopicStructHavingEigenMembers.html
|
||||
static absl::Status ComputeOptimalRotation(
|
||||
const Eigen::Matrix3f& design_matrix, Eigen::Matrix3f& rotation) {
|
||||
RET_CHECK_GT(design_matrix.norm(), kAbsoluteErrorEps)
|
||||
<< "Design matrix norm is too small!";
|
||||
|
||||
Eigen::JacobiSVD<Eigen::Matrix3f> svd(
|
||||
design_matrix, Eigen::ComputeFullU | Eigen::ComputeFullV);
|
||||
|
||||
Eigen::Matrix3f postrotation = svd.matrixU();
|
||||
Eigen::Matrix3f prerotation = svd.matrixV().transpose();
|
||||
|
||||
// Disallow reflection by ensuring that det(`rotation`) = +1 (and not -1),
|
||||
// see "4.6 Constrained orthogonal Procrustes problems"
|
||||
// in the Gower & Dijksterhuis's book "Procrustes Analysis".
|
||||
// We flip the sign of the least singular value along with a column in W.
|
||||
//
|
||||
// Note that now the sum of singular values doesn't work for scale
|
||||
// estimation due to this sign flip.
|
||||
if (postrotation.determinant() * prerotation.determinant() <
|
||||
static_cast<float>(0)) {
|
||||
postrotation.col(2) *= static_cast<float>(-1);
|
||||
}
|
||||
|
||||
// Transposed (52) from the paper.
|
||||
rotation = postrotation * prerotation;
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
static absl::StatusOr<float> ComputeOptimalScale(
|
||||
const Eigen::Matrix3Xf& centered_weighted_sources,
|
||||
const Eigen::Matrix3Xf& weighted_sources,
|
||||
const Eigen::Matrix3Xf& weighted_targets,
|
||||
const Eigen::Matrix3f& rotation) {
|
||||
// tranposed(T) tranposed(A_w) (I - C).
|
||||
const auto rotated_centered_weighted_sources =
|
||||
rotation * centered_weighted_sources;
|
||||
// Use the identity trace(A B) = sum(A * B^T)
|
||||
// to avoid building large intermediate matrices (* is Hadamard product).
|
||||
// (53) from the paper.
|
||||
float numerator =
|
||||
rotated_centered_weighted_sources.cwiseProduct(weighted_targets).sum();
|
||||
float denominator =
|
||||
centered_weighted_sources.cwiseProduct(weighted_sources).sum();
|
||||
|
||||
RET_CHECK_GT(denominator, kAbsoluteErrorEps)
|
||||
<< "Scale expression denominator is too small!";
|
||||
RET_CHECK_GT(numerator / denominator, kAbsoluteErrorEps)
|
||||
<< "Scale is too small!";
|
||||
|
||||
return numerator / denominator;
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace
|
||||
|
||||
std::unique_ptr<ProcrustesSolver> CreateFloatPrecisionProcrustesSolver() {
|
||||
return absl::make_unique<FloatPrecisionProcrustesSolver>();
|
||||
}
|
||||
|
||||
} // namespace face_geometry
|
||||
} // namespace mediapipe
|
70
mediapipe/modules/face_geometry/libs/procrustes_solver.h
Normal file
70
mediapipe/modules/face_geometry/libs/procrustes_solver.h
Normal file
|
@ -0,0 +1,70 @@
|
|||
// Copyright 2020 The MediaPipe Authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#ifndef MEDIAPIPE_FACE_GEOMETRY_LIBS_PROCRUSTES_SOLVER_H_
|
||||
#define MEDIAPIPE_FACE_GEOMETRY_LIBS_PROCRUSTES_SOLVER_H_
|
||||
|
||||
#include <memory>
|
||||
|
||||
#include "Eigen/Dense"
|
||||
#include "mediapipe/framework/port/status.h"
|
||||
|
||||
namespace mediapipe::face_geometry {
|
||||
|
||||
// Encapsulates a stateless solver for the Weighted Extended Orthogonal
|
||||
// Procrustes (WEOP) Problem, as defined in Section 2.4 of
|
||||
// https://doi.org/10.3929/ethz-a-004656648.
|
||||
//
|
||||
// Given the source and the target point clouds, the algorithm estimates
|
||||
// a 4x4 transformation matrix featuring the following semantic components:
|
||||
//
|
||||
// * Uniform scale
|
||||
// * Rotation
|
||||
// * Translation
|
||||
//
|
||||
// The matrix maps the source point cloud into the target point cloud minimizing
|
||||
// the Mean Squared Error.
|
||||
class ProcrustesSolver {
|
||||
public:
|
||||
virtual ~ProcrustesSolver() = default;
|
||||
|
||||
// Solves the Weighted Extended Orthogonal Procrustes (WEOP) Problem.
|
||||
//
|
||||
// All `source_points`, `target_points` and `point_weights` must define the
|
||||
// same number of points. Elements of `point_weights` must be non-negative.
|
||||
//
|
||||
// A too small diameter of either of the point clouds will likely lead to
|
||||
// numerical instabilities and failure to estimate the transformation.
|
||||
//
|
||||
// A too small point cloud total weight will likely lead to numerical
|
||||
// instabilities and failure to estimate the transformation too.
|
||||
//
|
||||
// Small point coordinate deviation for either of the point cloud will likely
|
||||
// result in a failure as it will make the solution very unstable if possible.
|
||||
//
|
||||
// Note: the output `transform_mat` argument is used instead of `StatusOr<>`
|
||||
// return type in order to avoid Eigen memory alignment issues. Details:
|
||||
// https://eigen.tuxfamily.org/dox/group__TopicStructHavingEigenMembers.html
|
||||
virtual absl::Status SolveWeightedOrthogonalProblem(
|
||||
const Eigen::Matrix3Xf& source_points, //
|
||||
const Eigen::Matrix3Xf& target_points, //
|
||||
const Eigen::VectorXf& point_weights, //
|
||||
Eigen::Matrix4f& transform_mat) const = 0;
|
||||
};
|
||||
|
||||
std::unique_ptr<ProcrustesSolver> CreateFloatPrecisionProcrustesSolver();
|
||||
|
||||
} // namespace mediapipe::face_geometry
|
||||
|
||||
#endif // MEDIAPIPE_FACE_GEOMETRY_LIBS_PROCRUSTES_SOLVER_H_
|
126
mediapipe/modules/face_geometry/libs/validation_utils.cc
Normal file
126
mediapipe/modules/face_geometry/libs/validation_utils.cc
Normal file
|
@ -0,0 +1,126 @@
|
|||
// Copyright 2020 The MediaPipe Authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "mediapipe/modules/face_geometry/libs/validation_utils.h"
|
||||
|
||||
#include <cstdint>
|
||||
#include <cstdlib>
|
||||
|
||||
#include "mediapipe/framework/formats/matrix_data.pb.h"
|
||||
#include "mediapipe/framework/port/ret_check.h"
|
||||
#include "mediapipe/framework/port/status.h"
|
||||
#include "mediapipe/framework/port/status_macros.h"
|
||||
#include "mediapipe/modules/face_geometry/libs/mesh_3d_utils.h"
|
||||
#include "mediapipe/modules/face_geometry/protos/environment.pb.h"
|
||||
#include "mediapipe/modules/face_geometry/protos/geometry_pipeline_metadata.pb.h"
|
||||
#include "mediapipe/modules/face_geometry/protos/mesh_3d.pb.h"
|
||||
|
||||
namespace mediapipe::face_geometry {
|
||||
|
||||
absl::Status ValidatePerspectiveCamera(
|
||||
const PerspectiveCamera& perspective_camera) {
|
||||
static constexpr float kAbsoluteErrorEps = 1e-9f;
|
||||
|
||||
RET_CHECK_GT(perspective_camera.near(), kAbsoluteErrorEps)
|
||||
<< "Near Z must be greater than 0 with a margin of 10^{-9}!";
|
||||
|
||||
RET_CHECK_GT(perspective_camera.far(),
|
||||
perspective_camera.near() + kAbsoluteErrorEps)
|
||||
<< "Far Z must be greater than Near Z with a margin of 10^{-9}!";
|
||||
|
||||
RET_CHECK_GT(perspective_camera.vertical_fov_degrees(), kAbsoluteErrorEps)
|
||||
<< "Vertical FOV must be positive with a margin of 10^{-9}!";
|
||||
|
||||
RET_CHECK_LT(perspective_camera.vertical_fov_degrees() + kAbsoluteErrorEps,
|
||||
180.f)
|
||||
<< "Vertical FOV must be less than 180 degrees with a margin of 10^{-9}";
|
||||
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
absl::Status ValidateEnvironment(const Environment& environment) {
|
||||
MP_RETURN_IF_ERROR(
|
||||
ValidatePerspectiveCamera(environment.perspective_camera()))
|
||||
<< "Invalid perspective camera!";
|
||||
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
absl::Status ValidateMesh3d(const Mesh3d& mesh_3d) {
|
||||
const std::size_t vertex_size = GetVertexSize(mesh_3d.vertex_type());
|
||||
const std::size_t primitive_type = GetPrimitiveSize(mesh_3d.primitive_type());
|
||||
|
||||
RET_CHECK_EQ(mesh_3d.vertex_buffer_size() % vertex_size, 0)
|
||||
<< "Vertex buffer size must a multiple of the vertex size!";
|
||||
|
||||
RET_CHECK_EQ(mesh_3d.index_buffer_size() % primitive_type, 0)
|
||||
<< "Index buffer size must a multiple of the primitive size!";
|
||||
|
||||
const int num_vertices = mesh_3d.vertex_buffer_size() / vertex_size;
|
||||
for (uint32_t idx : mesh_3d.index_buffer()) {
|
||||
RET_CHECK_LT(idx, num_vertices)
|
||||
<< "All mesh indices must refer to an existing vertex!";
|
||||
}
|
||||
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
absl::Status ValidateFaceGeometry(const FaceGeometry& face_geometry) {
|
||||
MP_RETURN_IF_ERROR(ValidateMesh3d(face_geometry.mesh())) << "Invalid mesh!";
|
||||
|
||||
static constexpr char kInvalid4x4MatrixMessage[] =
|
||||
"Pose transformation matrix must be a 4x4 matrix!";
|
||||
|
||||
const MatrixData& pose_transform_matrix =
|
||||
face_geometry.pose_transform_matrix();
|
||||
RET_CHECK_EQ(pose_transform_matrix.rows(), 4) << kInvalid4x4MatrixMessage;
|
||||
RET_CHECK_EQ(pose_transform_matrix.rows(), 4) << kInvalid4x4MatrixMessage;
|
||||
RET_CHECK_EQ(pose_transform_matrix.packed_data_size(), 16)
|
||||
<< kInvalid4x4MatrixMessage;
|
||||
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
absl::Status ValidateGeometryPipelineMetadata(
|
||||
const GeometryPipelineMetadata& metadata) {
|
||||
MP_RETURN_IF_ERROR(ValidateMesh3d(metadata.canonical_mesh()))
|
||||
<< "Invalid canonical mesh!";
|
||||
|
||||
RET_CHECK_GT(metadata.procrustes_landmark_basis_size(), 0)
|
||||
|
||||
<< "Procrustes landmark basis must be non-empty!";
|
||||
|
||||
const int num_vertices =
|
||||
metadata.canonical_mesh().vertex_buffer_size() /
|
||||
GetVertexSize(metadata.canonical_mesh().vertex_type());
|
||||
for (const WeightedLandmarkRef& wlr : metadata.procrustes_landmark_basis()) {
|
||||
RET_CHECK_LT(wlr.landmark_id(), num_vertices)
|
||||
<< "All Procrustes basis indices must refer to an existing canonical "
|
||||
"mesh vertex!";
|
||||
|
||||
RET_CHECK_GE(wlr.weight(), 0.f)
|
||||
<< "All Procrustes basis landmarks must have a non-negative weight!";
|
||||
}
|
||||
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
absl::Status ValidateFrameDimensions(int frame_width, int frame_height) {
|
||||
RET_CHECK_GT(frame_width, 0) << "Frame width must be positive!";
|
||||
RET_CHECK_GT(frame_height, 0) << "Frame height must be positive!";
|
||||
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
} // namespace mediapipe::face_geometry
|
70
mediapipe/modules/face_geometry/libs/validation_utils.h
Normal file
70
mediapipe/modules/face_geometry/libs/validation_utils.h
Normal file
|
@ -0,0 +1,70 @@
|
|||
// Copyright 2020 The MediaPipe Authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#ifndef MEDIAPIPE_FACE_GEOMETRY_LIBS_VALIDATION_UTILS_H_
|
||||
#define MEDIAPIPE_FACE_GEOMETRY_LIBS_VALIDATION_UTILS_H_
|
||||
|
||||
#include "mediapipe/framework/port/status.h"
|
||||
#include "mediapipe/modules/face_geometry/protos/environment.pb.h"
|
||||
#include "mediapipe/modules/face_geometry/protos/face_geometry.pb.h"
|
||||
#include "mediapipe/modules/face_geometry/protos/geometry_pipeline_metadata.pb.h"
|
||||
#include "mediapipe/modules/face_geometry/protos/mesh_3d.pb.h"
|
||||
|
||||
namespace mediapipe::face_geometry {
|
||||
|
||||
// Validates `perspective_camera`.
|
||||
//
|
||||
// Near Z must be greater than 0 with a margin of `1e-9`.
|
||||
// Far Z must be greater than Near Z with a margin of `1e-9`.
|
||||
// Vertical FOV must be in range (0, 180) with a margin of `1e-9` on the range
|
||||
// edges.
|
||||
absl::Status ValidatePerspectiveCamera(
|
||||
const PerspectiveCamera& perspective_camera);
|
||||
|
||||
// Validates `environment`.
|
||||
//
|
||||
// Environment's perspective camera must be valid.
|
||||
absl::Status ValidateEnvironment(const Environment& environment);
|
||||
|
||||
// Validates `mesh_3d`.
|
||||
//
|
||||
// Mesh vertex buffer size must a multiple of the vertex size.
|
||||
// Mesh index buffer size must a multiple of the primitive size.
|
||||
// All mesh indices must reference an existing mesh vertex.
|
||||
absl::Status ValidateMesh3d(const Mesh3d& mesh_3d);
|
||||
|
||||
// Validates `face_geometry`.
|
||||
//
|
||||
// Face mesh must be valid.
|
||||
// Face pose transformation matrix must be a 4x4 matrix.
|
||||
absl::Status ValidateFaceGeometry(const FaceGeometry& face_geometry);
|
||||
|
||||
// Validates `metadata`.
|
||||
//
|
||||
// Canonical face mesh must be valid.
|
||||
// Procrustes landmark basis must be non-empty.
|
||||
// All Procrustes basis indices must reference an existing canonical mesh
|
||||
// vertex.
|
||||
// All Procrustes basis landmarks must have a non-negative weight.
|
||||
absl::Status ValidateGeometryPipelineMetadata(
|
||||
const GeometryPipelineMetadata& metadata);
|
||||
|
||||
// Validates frame dimensions.
|
||||
//
|
||||
// Both frame width and frame height must be positive.
|
||||
absl::Status ValidateFrameDimensions(int frame_width, int frame_height);
|
||||
|
||||
} // namespace mediapipe::face_geometry
|
||||
|
||||
#endif // MEDIAPIPE_FACE_GEOMETRY_LIBS_VALIDATION_UTILS_H_
|
46
mediapipe/modules/face_geometry/protos/BUILD
Normal file
46
mediapipe/modules/face_geometry/protos/BUILD
Normal file
|
@ -0,0 +1,46 @@
|
|||
# Copyright 2020 The MediaPipe Authors.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
load("//mediapipe/framework/port:build_config.bzl", "mediapipe_proto_library")
|
||||
|
||||
licenses(["notice"])
|
||||
|
||||
package(default_visibility = ["//visibility:public"])
|
||||
|
||||
mediapipe_proto_library(
|
||||
name = "environment_proto",
|
||||
srcs = ["environment.proto"],
|
||||
)
|
||||
|
||||
mediapipe_proto_library(
|
||||
name = "face_geometry_proto",
|
||||
srcs = ["face_geometry.proto"],
|
||||
deps = [
|
||||
":mesh_3d_proto",
|
||||
"//mediapipe/framework/formats:matrix_data_proto",
|
||||
],
|
||||
)
|
||||
|
||||
mediapipe_proto_library(
|
||||
name = "geometry_pipeline_metadata_proto",
|
||||
srcs = ["geometry_pipeline_metadata.proto"],
|
||||
deps = [
|
||||
":mesh_3d_proto",
|
||||
],
|
||||
)
|
||||
|
||||
mediapipe_proto_library(
|
||||
name = "mesh_3d_proto",
|
||||
srcs = ["mesh_3d.proto"],
|
||||
)
|
84
mediapipe/modules/face_geometry/protos/environment.proto
Normal file
84
mediapipe/modules/face_geometry/protos/environment.proto
Normal file
|
@ -0,0 +1,84 @@
|
|||
// Copyright 2020 The MediaPipe Authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
syntax = "proto2";
|
||||
|
||||
package mediapipe.face_geometry;
|
||||
|
||||
option java_package = "com.google.mediapipe.modules.facegeometry";
|
||||
option java_outer_classname = "EnvironmentProto";
|
||||
|
||||
// Defines the (0, 0) origin point location of the environment.
|
||||
//
|
||||
// The variation in the origin point location can be traced back to the memory
|
||||
// layout of the camera video frame buffers.
|
||||
//
|
||||
// Usually, the memory layout for most CPU (and also some GPU) camera video
|
||||
// frame buffers results in having the (0, 0) origin point located in the
|
||||
// Top Left corner.
|
||||
//
|
||||
// On the contrary, the memory layout for most GPU camera video frame buffers
|
||||
// results in having the (0, 0) origin point located in the Bottom Left corner.
|
||||
//
|
||||
// Let's consider the following example:
|
||||
//
|
||||
// (A) ---------------+
|
||||
// ___ |
|
||||
// | (1) | | |
|
||||
// | / \ | | |
|
||||
// | |---|===|-| |
|
||||
// | |---| | | |
|
||||
// | / \ | | |
|
||||
// | | | | | |
|
||||
// | | (2) |=| | |
|
||||
// | | | | | |
|
||||
// | |_______| |_| |
|
||||
// | |@| |@| | | |
|
||||
// | ___________|_|_ |
|
||||
// |
|
||||
// (B) ---------------+
|
||||
//
|
||||
// On this example, (1) and (2) have the same X coordinate regardless of the
|
||||
// origin point location. However, having the origin point located at (A)
|
||||
// (Top Left corner) results in (1) having a smaller Y coordinate if compared to
|
||||
// (2). Similarly, having the origin point located at (B) (Bottom Left corner)
|
||||
// results in (1) having a greater Y coordinate if compared to (2).
|
||||
//
|
||||
// Providing the correct origin point location for your environment and making
|
||||
// sure all the input landmarks are in-sync with this location is crucial
|
||||
// for receiving the correct output face geometry and visual renders.
|
||||
enum OriginPointLocation {
|
||||
BOTTOM_LEFT_CORNER = 1;
|
||||
TOP_LEFT_CORNER = 2;
|
||||
}
|
||||
|
||||
// The perspective camera is defined through its vertical FOV angle and the
|
||||
// Z-clipping planes. The aspect ratio is a runtime variable for the face
|
||||
// geometry module and should be provided alongside the face landmarks in order
|
||||
// to estimate the face geometry on a given frame.
|
||||
//
|
||||
// More info on Perspective Cameras:
|
||||
// http://www.songho.ca/opengl/gl_projectionmatrix.html#perspective
|
||||
message PerspectiveCamera {
|
||||
// `0 < vertical_fov_degrees < 180`.
|
||||
optional float vertical_fov_degrees = 1;
|
||||
// `0 < near < far`.
|
||||
optional float near = 2;
|
||||
optional float far = 3;
|
||||
}
|
||||
|
||||
message Environment {
|
||||
optional OriginPointLocation origin_point_location = 1;
|
||||
optional PerspectiveCamera perspective_camera = 2;
|
||||
}
|
60
mediapipe/modules/face_geometry/protos/face_geometry.proto
Normal file
60
mediapipe/modules/face_geometry/protos/face_geometry.proto
Normal file
|
@ -0,0 +1,60 @@
|
|||
// Copyright 2020 The MediaPipe Authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
syntax = "proto2";
|
||||
|
||||
package mediapipe.face_geometry;
|
||||
|
||||
import "mediapipe/framework/formats/matrix_data.proto";
|
||||
import "mediapipe/modules/face_geometry/protos/mesh_3d.proto";
|
||||
|
||||
option java_package = "com.google.mediapipe.modules.facegeometry";
|
||||
option java_outer_classname = "FaceGeometryProto";
|
||||
|
||||
// Defines the face geometry pipeline estimation result format.
|
||||
message FaceGeometry {
|
||||
// Defines a mesh surface for a face. The face mesh vertex IDs are the same as
|
||||
// the face landmark IDs.
|
||||
//
|
||||
// XYZ coordinates exist in the right-handed Metric 3D space configured by an
|
||||
// environment. UV coodinates are taken from the canonical face mesh model.
|
||||
//
|
||||
// XY coordinates are guaranteed to match the screen positions of
|
||||
// the input face landmarks after (1) being multiplied by the face pose
|
||||
// transformation matrix and then (2) being projected with a perspective
|
||||
// camera matrix of the same environment.
|
||||
//
|
||||
// NOTE: the triangular topology of the face mesh is only useful when derived
|
||||
// from the 468 face landmarks, not from the 6 face detection landmarks
|
||||
// (keypoints). The former don't cover the entire face and this mesh is
|
||||
// defined here only to comply with the API. It should be considered as
|
||||
// a placeholder and/or for debugging purposes.
|
||||
//
|
||||
// Use the face geometry derived from the face detection landmarks
|
||||
// (keypoints) for the face pose transformation matrix, not the mesh.
|
||||
optional Mesh3d mesh = 1;
|
||||
|
||||
// Defines a face pose transformation matrix, which provides mapping from
|
||||
// the static canonical face model to the runtime face. Tries to distinguish
|
||||
// a head pose change from a facial expression change and to only reflect the
|
||||
// former.
|
||||
//
|
||||
// Is a 4x4 matrix and contains only the following components:
|
||||
// * Uniform scale
|
||||
// * Rotation
|
||||
// * Translation
|
||||
//
|
||||
// The last row is guaranteed to be `[0 0 0 1]`.
|
||||
optional MatrixData pose_transform_matrix = 2;
|
||||
}
|
|
@ -0,0 +1,63 @@
|
|||
// Copyright 2020 The MediaPipe Authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
syntax = "proto2";
|
||||
|
||||
package mediapipe.face_geometry;
|
||||
|
||||
import "mediapipe/modules/face_geometry/protos/mesh_3d.proto";
|
||||
|
||||
option java_package = "com.google.mediapipe.modules.facegeometry";
|
||||
option java_outer_classname = "GeometryPipelineMetadataProto";
|
||||
|
||||
enum InputSource {
|
||||
DEFAULT = 0; // FACE_LANDMARK_PIPELINE
|
||||
FACE_LANDMARK_PIPELINE = 1;
|
||||
FACE_DETECTION_PIPELINE = 2;
|
||||
}
|
||||
|
||||
message WeightedLandmarkRef {
|
||||
// Defines the landmark ID. References an existing face landmark ID.
|
||||
optional uint32 landmark_id = 1;
|
||||
// Defines the landmark weight. The larger the weight the more influence this
|
||||
// landmark has in the basis.
|
||||
//
|
||||
// Is positive.
|
||||
optional float weight = 2;
|
||||
}
|
||||
|
||||
// Next field ID: 4
|
||||
message GeometryPipelineMetadata {
|
||||
// Defines the source of the input landmarks to let the underlying geometry
|
||||
// pipeline to adjust in order to produce the best results.
|
||||
//
|
||||
// Face landmark pipeline is expected to produce 3D landmarks with relative Z
|
||||
// coordinate, which is scaled as the X coordinate assuming the weak
|
||||
// perspective projection camera model.
|
||||
//
|
||||
// Face landmark pipeline is expected to produce 2D landmarks with Z
|
||||
// coordinate being equal to 0.
|
||||
optional InputSource input_source = 3;
|
||||
// Defines a mesh surface for a canonical face. The canonical face mesh vertex
|
||||
// IDs are the same as the face landmark IDs.
|
||||
//
|
||||
// XYZ coordinates are defined in centimeter units.
|
||||
optional Mesh3d canonical_mesh = 1;
|
||||
// Defines a weighted landmark basis for running the Procrustes solver
|
||||
// algorithm inside the geometry pipeline.
|
||||
//
|
||||
// A good basis sets face landmark weights in way to distinguish a head pose
|
||||
// change from a facial expression change and to only respond to the former.
|
||||
repeated WeightedLandmarkRef procrustes_landmark_basis = 2;
|
||||
}
|
41
mediapipe/modules/face_geometry/protos/mesh_3d.proto
Normal file
41
mediapipe/modules/face_geometry/protos/mesh_3d.proto
Normal file
|
@ -0,0 +1,41 @@
|
|||
// Copyright 2020 The MediaPipe Authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
syntax = "proto2";
|
||||
|
||||
package mediapipe.face_geometry;
|
||||
|
||||
option java_package = "com.google.mediapipe.modules.facegeometry";
|
||||
option java_outer_classname = "Mesh3dProto";
|
||||
|
||||
message Mesh3d {
|
||||
enum VertexType {
|
||||
// Is defined by 5 coordinates: Position (XYZ) + Texture coordinate (UV).
|
||||
VERTEX_PT = 0;
|
||||
}
|
||||
|
||||
enum PrimitiveType {
|
||||
// Is defined by 3 indices: triangle vertex IDs.
|
||||
TRIANGLE = 0;
|
||||
}
|
||||
|
||||
optional VertexType vertex_type = 1;
|
||||
optional PrimitiveType primitive_type = 2;
|
||||
// Vertex buffer size is a multiple of the vertex size (e.g., 5 for
|
||||
// VERTEX_PT).
|
||||
repeated float vertex_buffer = 3;
|
||||
// Index buffer size is a multiple of the primitive size (e.g., 3 for
|
||||
// TRIANGLE).
|
||||
repeated uint32 index_buffer = 4;
|
||||
}
|
190
mediapipe/modules/face_landmark/BUILD
Normal file
190
mediapipe/modules/face_landmark/BUILD
Normal file
|
@ -0,0 +1,190 @@
|
|||
# Copyright 2019 The MediaPipe Authors.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
load(
|
||||
"//mediapipe/framework/tool:mediapipe_graph.bzl",
|
||||
"mediapipe_simple_subgraph",
|
||||
)
|
||||
|
||||
licenses(["notice"])
|
||||
|
||||
package(default_visibility = ["//visibility:public"])
|
||||
|
||||
mediapipe_simple_subgraph(
|
||||
name = "face_landmark_cpu",
|
||||
graph = "face_landmark_cpu.pbtxt",
|
||||
register_as = "FaceLandmarkCpu",
|
||||
deps = [
|
||||
":face_landmarks_model_loader",
|
||||
":tensors_to_face_landmarks",
|
||||
":tensors_to_face_landmarks_with_attention",
|
||||
"//mediapipe/calculators/core:gate_calculator",
|
||||
"//mediapipe/calculators/core:split_vector_calculator",
|
||||
"//mediapipe/calculators/tensor:image_to_tensor_calculator",
|
||||
"//mediapipe/calculators/tensor:inference_calculator",
|
||||
"//mediapipe/calculators/tensor:tensors_to_floats_calculator",
|
||||
"//mediapipe/calculators/tensor:tensors_to_landmarks_calculator",
|
||||
"//mediapipe/calculators/tflite:tflite_custom_op_resolver_calculator",
|
||||
"//mediapipe/calculators/util:landmark_projection_calculator",
|
||||
"//mediapipe/calculators/util:thresholding_calculator",
|
||||
"//mediapipe/framework/tool:switch_container",
|
||||
],
|
||||
)
|
||||
|
||||
mediapipe_simple_subgraph(
|
||||
name = "face_landmark_gpu",
|
||||
graph = "face_landmark_gpu.pbtxt",
|
||||
register_as = "FaceLandmarkGpu",
|
||||
deps = [
|
||||
":face_landmarks_model_loader",
|
||||
":tensors_to_face_landmarks",
|
||||
":tensors_to_face_landmarks_with_attention",
|
||||
"//mediapipe/calculators/core:gate_calculator",
|
||||
"//mediapipe/calculators/core:split_vector_calculator",
|
||||
"//mediapipe/calculators/tensor:image_to_tensor_calculator",
|
||||
"//mediapipe/calculators/tensor:inference_calculator",
|
||||
"//mediapipe/calculators/tensor:tensors_to_floats_calculator",
|
||||
"//mediapipe/calculators/tensor:tensors_to_landmarks_calculator",
|
||||
"//mediapipe/calculators/tflite:tflite_custom_op_resolver_calculator",
|
||||
"//mediapipe/calculators/util:landmark_projection_calculator",
|
||||
"//mediapipe/calculators/util:thresholding_calculator",
|
||||
"//mediapipe/framework/tool:switch_container",
|
||||
],
|
||||
)
|
||||
|
||||
mediapipe_simple_subgraph(
|
||||
name = "face_landmark_front_cpu",
|
||||
graph = "face_landmark_front_cpu.pbtxt",
|
||||
register_as = "FaceLandmarkFrontCpu",
|
||||
deps = [
|
||||
":face_detection_front_detection_to_roi",
|
||||
":face_landmark_cpu",
|
||||
":face_landmark_landmarks_to_roi",
|
||||
"//mediapipe/calculators/core:begin_loop_calculator",
|
||||
"//mediapipe/calculators/core:clip_vector_size_calculator",
|
||||
"//mediapipe/calculators/core:constant_side_packet_calculator",
|
||||
"//mediapipe/calculators/core:end_loop_calculator",
|
||||
"//mediapipe/calculators/core:gate_calculator",
|
||||
"//mediapipe/calculators/core:previous_loopback_calculator",
|
||||
"//mediapipe/calculators/image:image_properties_calculator",
|
||||
"//mediapipe/calculators/util:association_norm_rect_calculator",
|
||||
"//mediapipe/calculators/util:collection_has_min_size_calculator",
|
||||
"//mediapipe/modules/face_detection:face_detection_short_range_cpu",
|
||||
],
|
||||
)
|
||||
|
||||
mediapipe_simple_subgraph(
|
||||
name = "face_landmark_front_gpu",
|
||||
graph = "face_landmark_front_gpu.pbtxt",
|
||||
register_as = "FaceLandmarkFrontGpu",
|
||||
deps = [
|
||||
":face_detection_front_detection_to_roi",
|
||||
":face_landmark_gpu",
|
||||
":face_landmark_landmarks_to_roi",
|
||||
"//mediapipe/calculators/core:begin_loop_calculator",
|
||||
"//mediapipe/calculators/core:clip_vector_size_calculator",
|
||||
"//mediapipe/calculators/core:constant_side_packet_calculator",
|
||||
"//mediapipe/calculators/core:end_loop_calculator",
|
||||
"//mediapipe/calculators/core:gate_calculator",
|
||||
"//mediapipe/calculators/core:previous_loopback_calculator",
|
||||
"//mediapipe/calculators/image:image_properties_calculator",
|
||||
"//mediapipe/calculators/util:association_norm_rect_calculator",
|
||||
"//mediapipe/calculators/util:collection_has_min_size_calculator",
|
||||
"//mediapipe/modules/face_detection:face_detection_short_range_gpu",
|
||||
],
|
||||
)
|
||||
|
||||
mediapipe_simple_subgraph(
|
||||
name = "face_landmark_front_cpu_image",
|
||||
graph = "face_landmark_front_cpu_image.pbtxt",
|
||||
register_as = "FaceLandmarkFrontCpuImage",
|
||||
deps = [
|
||||
":face_landmark_front_cpu",
|
||||
"//mediapipe/calculators/core:flow_limiter_calculator",
|
||||
"//mediapipe/calculators/image:image_transformation_calculator",
|
||||
"//mediapipe/calculators/util:from_image_calculator",
|
||||
],
|
||||
)
|
||||
|
||||
mediapipe_simple_subgraph(
|
||||
name = "face_landmark_front_gpu_image",
|
||||
graph = "face_landmark_front_gpu_image.pbtxt",
|
||||
register_as = "FaceLandmarkFrontGpuImage",
|
||||
deps = [
|
||||
":face_landmark_front_gpu",
|
||||
"//mediapipe/calculators/core:flow_limiter_calculator",
|
||||
"//mediapipe/calculators/image:image_transformation_calculator",
|
||||
"//mediapipe/calculators/util:from_image_calculator",
|
||||
],
|
||||
)
|
||||
|
||||
exports_files(
|
||||
srcs = [
|
||||
"face_landmark.tflite",
|
||||
"face_landmark_with_attention.tflite",
|
||||
],
|
||||
)
|
||||
|
||||
mediapipe_simple_subgraph(
|
||||
name = "face_detection_front_detection_to_roi",
|
||||
graph = "face_detection_front_detection_to_roi.pbtxt",
|
||||
register_as = "FaceDetectionFrontDetectionToRoi",
|
||||
deps = [
|
||||
"//mediapipe/calculators/util:detections_to_rects_calculator",
|
||||
"//mediapipe/calculators/util:rect_transformation_calculator",
|
||||
],
|
||||
)
|
||||
|
||||
mediapipe_simple_subgraph(
|
||||
name = "face_landmark_landmarks_to_roi",
|
||||
graph = "face_landmark_landmarks_to_roi.pbtxt",
|
||||
register_as = "FaceLandmarkLandmarksToRoi",
|
||||
deps = [
|
||||
"//mediapipe/calculators/util:detections_to_rects_calculator",
|
||||
"//mediapipe/calculators/util:landmarks_to_detection_calculator",
|
||||
"//mediapipe/calculators/util:rect_transformation_calculator",
|
||||
],
|
||||
)
|
||||
|
||||
mediapipe_simple_subgraph(
|
||||
name = "face_landmarks_model_loader",
|
||||
graph = "face_landmarks_model_loader.pbtxt",
|
||||
register_as = "FaceLandmarksModelLoader",
|
||||
deps = [
|
||||
"//mediapipe/calculators/core:constant_side_packet_calculator",
|
||||
"//mediapipe/calculators/tflite:tflite_model_calculator",
|
||||
"//mediapipe/calculators/util:local_file_contents_calculator",
|
||||
"//mediapipe/framework/tool:switch_container",
|
||||
],
|
||||
)
|
||||
|
||||
mediapipe_simple_subgraph(
|
||||
name = "tensors_to_face_landmarks",
|
||||
graph = "tensors_to_face_landmarks.pbtxt",
|
||||
register_as = "TensorsToFaceLandmarks",
|
||||
deps = [
|
||||
"//mediapipe/calculators/tensor:tensors_to_landmarks_calculator",
|
||||
],
|
||||
)
|
||||
|
||||
mediapipe_simple_subgraph(
|
||||
name = "tensors_to_face_landmarks_with_attention",
|
||||
graph = "tensors_to_face_landmarks_with_attention.pbtxt",
|
||||
register_as = "TensorsToFaceLandmarksWithAttention",
|
||||
deps = [
|
||||
"//mediapipe/calculators/core:split_vector_calculator",
|
||||
"//mediapipe/calculators/tensor:tensors_to_landmarks_calculator",
|
||||
"//mediapipe/calculators/util:landmarks_refinement_calculator",
|
||||
],
|
||||
)
|
9
mediapipe/modules/face_landmark/README.md
Normal file
9
mediapipe/modules/face_landmark/README.md
Normal file
|
@ -0,0 +1,9 @@
|
|||
# face_landmark
|
||||
|
||||
Subgraphs|Details
|
||||
:--- | :---
|
||||
[`FaceLandmarkCpu`](https://github.com/google/mediapipe/tree/master/mediapipe/modules/face_landmark/face_landmark_cpu.pbtxt)| Detects landmarks on a single face. (CPU input, and inference is executed on CPU.)
|
||||
[`FaceLandmarkGpu`](https://github.com/google/mediapipe/tree/master/mediapipe/modules/face_landmark/face_landmark_gpu.pbtxt)| Detects landmarks on a single face. (GPU input, and inference is executed on GPU)
|
||||
[`FaceLandmarkFrontCpu`](https://github.com/google/mediapipe/tree/master/mediapipe/modules/face_landmark/face_landmark_front_cpu.pbtxt)| Detects and tracks landmarks on multiple faces. (CPU input, and inference is executed on CPU)
|
||||
[`FaceLandmarkFrontGpu`](https://github.com/google/mediapipe/tree/master/mediapipe/modules/face_landmark/face_landmark_front_gpu.pbtxt)| Detects and tracks landmarks on multiple faces. (GPU input, and inference is executed on GPU.)
|
||||
|
|
@ -0,0 +1,47 @@
|
|||
# MediaPipe graph to calculate face region of interest (ROI) from the very
|
||||
# first face detection in the vector of detections provided by
|
||||
# "FaceDetectionShortRangeCpu" or "FaceDetectionShortRangeGpu"
|
||||
#
|
||||
# NOTE: this graph is subject to change and should not be used directly.
|
||||
|
||||
type: "FaceDetectionFrontDetectionToRoi"
|
||||
|
||||
# Face detection. (Detection)
|
||||
input_stream: "DETECTION:detection"
|
||||
# Frame size (width and height). (std::pair<int, int>)
|
||||
input_stream: "IMAGE_SIZE:image_size"
|
||||
# ROI according to the first detection of input detections. (NormalizedRect)
|
||||
output_stream: "ROI:roi"
|
||||
|
||||
# Converts results of face detection into a rectangle (normalized by image size)
|
||||
# that encloses the face and is rotated such that the line connecting left eye
|
||||
# and right eye is aligned with the X-axis of the rectangle.
|
||||
node {
|
||||
calculator: "DetectionsToRectsCalculator"
|
||||
input_stream: "DETECTION:detection"
|
||||
input_stream: "IMAGE_SIZE:image_size"
|
||||
output_stream: "NORM_RECT:initial_roi"
|
||||
options: {
|
||||
[mediapipe.DetectionsToRectsCalculatorOptions.ext] {
|
||||
rotation_vector_start_keypoint_index: 0 # Left eye.
|
||||
rotation_vector_end_keypoint_index: 1 # Right eye.
|
||||
rotation_vector_target_angle_degrees: 0
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Expands and shifts the rectangle that contains the face so that it's likely
|
||||
# to cover the entire face.
|
||||
node {
|
||||
calculator: "RectTransformationCalculator"
|
||||
input_stream: "NORM_RECT:initial_roi"
|
||||
input_stream: "IMAGE_SIZE:image_size"
|
||||
output_stream: "roi"
|
||||
options: {
|
||||
[mediapipe.RectTransformationCalculatorOptions.ext] {
|
||||
scale_x: 1.5
|
||||
scale_y: 1.5
|
||||
square_long: true
|
||||
}
|
||||
}
|
||||
}
|
BIN
mediapipe/modules/face_landmark/face_landmark.tflite
Executable file
BIN
mediapipe/modules/face_landmark/face_landmark.tflite
Executable file
Binary file not shown.
184
mediapipe/modules/face_landmark/face_landmark_cpu.pbtxt
Normal file
184
mediapipe/modules/face_landmark/face_landmark_cpu.pbtxt
Normal file
|
@ -0,0 +1,184 @@
|
|||
# MediaPipe graph to detect/predict face landmarks. (CPU input, and inference is
|
||||
# executed on CPU.)
|
||||
#
|
||||
# It is required that "face_landmark.tflite" is available at
|
||||
# "mediapipe/modules/face_landmark/face_landmark.tflite"
|
||||
# path during execution if `with_attention` is not set or set to `false`.
|
||||
#
|
||||
# It is required that "face_landmark_with_attention.tflite" is available at
|
||||
# "mediapipe/modules/face_landmark/face_landmark_with_attention.tflite"
|
||||
# path during execution if `with_attention` is set to `true`.
|
||||
#
|
||||
# EXAMPLE:
|
||||
# node {
|
||||
# calculator: "FaceLandmarkCpu"
|
||||
# input_stream: "IMAGE:image"
|
||||
# input_stream: "ROI:face_roi"
|
||||
# input_side_packet: "WITH_ATTENTION:with_attention"
|
||||
# output_stream: "LANDMARKS:face_landmarks"
|
||||
# }
|
||||
|
||||
type: "FaceLandmarkCpu"
|
||||
|
||||
# CPU image. (ImageFrame)
|
||||
input_stream: "IMAGE:image"
|
||||
# ROI (region of interest) within the given image where a face is located.
|
||||
# (NormalizedRect)
|
||||
input_stream: "ROI:roi"
|
||||
# Whether to run face mesh model with attention on lips and eyes. (bool)
|
||||
# Attention provides more accuracy on lips and eye regions as well as iris
|
||||
# landmarks.
|
||||
input_side_packet: "WITH_ATTENTION:with_attention"
|
||||
|
||||
# 468 or 478 facial landmarks within the given ROI. (NormalizedLandmarkList)
|
||||
#
|
||||
# Number of landmarks depends on the WITH_ATTENTION flag. If it's `true` - then
|
||||
# there will be 478 landmarks with refined lips, eyes and irises (10 extra
|
||||
# landmarks are for irises), otherwise 468 non-refined landmarks are returned.
|
||||
#
|
||||
# NOTE: if a face is not present within the given ROI, for this particular
|
||||
# timestamp there will not be an output packet in the LANDMARKS stream. However,
|
||||
# the MediaPipe framework will internally inform the downstream calculators of
|
||||
# the absence of this packet so that they don't wait for it unnecessarily.
|
||||
output_stream: "LANDMARKS:face_landmarks"
|
||||
|
||||
# Transforms the input image into a 192x192 tensor.
|
||||
node: {
|
||||
calculator: "ImageToTensorCalculator"
|
||||
input_stream: "IMAGE:image"
|
||||
input_stream: "NORM_RECT:roi"
|
||||
output_stream: "TENSORS:input_tensors"
|
||||
options: {
|
||||
[mediapipe.ImageToTensorCalculatorOptions.ext] {
|
||||
output_tensor_width: 192
|
||||
output_tensor_height: 192
|
||||
output_tensor_float_range {
|
||||
min: 0.0
|
||||
max: 1.0
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Loads the face landmarks TF Lite model.
|
||||
node {
|
||||
calculator: "FaceLandmarksModelLoader"
|
||||
input_side_packet: "WITH_ATTENTION:with_attention"
|
||||
output_side_packet: "MODEL:model"
|
||||
}
|
||||
|
||||
# Generates a single side packet containing a TensorFlow Lite op resolver that
|
||||
# supports custom ops needed by the model used in this graph.
|
||||
node {
|
||||
calculator: "TfLiteCustomOpResolverCalculator"
|
||||
output_side_packet: "op_resolver"
|
||||
}
|
||||
|
||||
# Runs a TensorFlow Lite model on CPU that takes an image tensor and outputs a
|
||||
# vector of tensors representing, for instance, detection boxes/keypoints and
|
||||
# scores.
|
||||
node {
|
||||
calculator: "InferenceCalculator"
|
||||
input_stream: "TENSORS:input_tensors"
|
||||
input_side_packet: "MODEL:model"
|
||||
input_side_packet: "CUSTOM_OP_RESOLVER:op_resolver"
|
||||
output_stream: "TENSORS:output_tensors"
|
||||
options: {
|
||||
[mediapipe.InferenceCalculatorOptions.ext] {
|
||||
delegate { xnnpack {} }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Splits a vector of tensors into landmark tensors and face flag tensor.
|
||||
node {
|
||||
calculator: "SwitchContainer"
|
||||
input_side_packet: "ENABLE:with_attention"
|
||||
input_stream: "output_tensors"
|
||||
output_stream: "landmark_tensors"
|
||||
output_stream: "face_flag_tensor"
|
||||
options: {
|
||||
[mediapipe.SwitchContainerOptions.ext] {
|
||||
contained_node: {
|
||||
calculator: "SplitTensorVectorCalculator"
|
||||
options: {
|
||||
[mediapipe.SplitVectorCalculatorOptions.ext] {
|
||||
ranges: { begin: 0 end: 1 }
|
||||
ranges: { begin: 1 end: 2 }
|
||||
}
|
||||
}
|
||||
}
|
||||
contained_node: {
|
||||
calculator: "SplitTensorVectorCalculator"
|
||||
options: {
|
||||
[mediapipe.SplitVectorCalculatorOptions.ext] {
|
||||
ranges: { begin: 0 end: 6 }
|
||||
ranges: { begin: 6 end: 7 }
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Converts the face-flag tensor into a float that represents the confidence
|
||||
# score of face presence.
|
||||
node {
|
||||
calculator: "TensorsToFloatsCalculator"
|
||||
input_stream: "TENSORS:face_flag_tensor"
|
||||
output_stream: "FLOAT:face_presence_score"
|
||||
options {
|
||||
[mediapipe.TensorsToFloatsCalculatorOptions.ext] {
|
||||
activation: SIGMOID
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Applies a threshold to the confidence score to determine whether a face is
|
||||
# present.
|
||||
node {
|
||||
calculator: "ThresholdingCalculator"
|
||||
input_stream: "FLOAT:face_presence_score"
|
||||
output_stream: "FLAG:face_presence"
|
||||
options: {
|
||||
[mediapipe.ThresholdingCalculatorOptions.ext] {
|
||||
threshold: 0.5
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Drop landmarks tensors if face is not present.
|
||||
node {
|
||||
calculator: "GateCalculator"
|
||||
input_stream: "landmark_tensors"
|
||||
input_stream: "ALLOW:face_presence"
|
||||
output_stream: "ensured_landmark_tensors"
|
||||
}
|
||||
|
||||
# Decodes the landmark tensors into a vector of landmarks, where the landmark
|
||||
# coordinates are normalized by the size of the input image to the model.
|
||||
node {
|
||||
calculator: "SwitchContainer"
|
||||
input_side_packet: "ENABLE:with_attention"
|
||||
input_stream: "TENSORS:ensured_landmark_tensors"
|
||||
output_stream: "LANDMARKS:landmarks"
|
||||
options: {
|
||||
[mediapipe.SwitchContainerOptions.ext] {
|
||||
contained_node: {
|
||||
calculator: "TensorsToFaceLandmarks"
|
||||
}
|
||||
contained_node: {
|
||||
calculator: "TensorsToFaceLandmarksWithAttention"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Projects the landmarks from the cropped face image to the corresponding
|
||||
# locations on the full image before cropping (input to the graph).
|
||||
node {
|
||||
calculator: "LandmarkProjectionCalculator"
|
||||
input_stream: "NORM_LANDMARKS:landmarks"
|
||||
input_stream: "NORM_RECT:roi"
|
||||
output_stream: "NORM_LANDMARKS:face_landmarks"
|
||||
}
|
247
mediapipe/modules/face_landmark/face_landmark_front_cpu.pbtxt
Normal file
247
mediapipe/modules/face_landmark/face_landmark_front_cpu.pbtxt
Normal file
|
@ -0,0 +1,247 @@
|
|||
# MediaPipe graph to detect/predict face landmarks. (CPU input, and inference is
|
||||
# executed on CPU.) This graph tries to skip face detection as much as possible
|
||||
# by using previously detected/predicted landmarks for new images.
|
||||
#
|
||||
# It is required that "face_detection_short_range.tflite" is available at
|
||||
# "mediapipe/modules/face_detection/face_detection_short_range.tflite"
|
||||
# path during execution.
|
||||
#
|
||||
# It is required that "face_landmark.tflite" is available at
|
||||
# "mediapipe/modules/face_landmark/face_landmark.tflite"
|
||||
# path during execution if `with_attention` is not set or set to `false`.
|
||||
#
|
||||
# It is required that "face_landmark_with_attention.tflite" is available at
|
||||
# "mediapipe/modules/face_landmark/face_landmark_with_attention.tflite"
|
||||
# path during execution if `with_attention` is set to `true`.
|
||||
#
|
||||
# EXAMPLE:
|
||||
# node {
|
||||
# calculator: "FaceLandmarkFrontCpu"
|
||||
# input_stream: "IMAGE:image"
|
||||
# input_side_packet: "NUM_FACES:num_faces"
|
||||
# input_side_packet: "USE_PREV_LANDMARKS:use_prev_landmarks"
|
||||
# input_side_packet: "WITH_ATTENTION:with_attention"
|
||||
# output_stream: "LANDMARKS:multi_face_landmarks"
|
||||
# }
|
||||
|
||||
type: "FaceLandmarkFrontCpu"
|
||||
|
||||
# CPU image. (ImageFrame)
|
||||
input_stream: "IMAGE:image"
|
||||
|
||||
# Max number of faces to detect/track. (int)
|
||||
input_side_packet: "NUM_FACES:num_faces"
|
||||
|
||||
# Whether landmarks on the previous image should be used to help localize
|
||||
# landmarks on the current image. (bool)
|
||||
input_side_packet: "USE_PREV_LANDMARKS:use_prev_landmarks"
|
||||
|
||||
# Whether to run face mesh model with attention on lips and eyes. (bool)
|
||||
# Attention provides more accuracy on lips and eye regions as well as iris
|
||||
# landmarks.
|
||||
input_side_packet: "WITH_ATTENTION:with_attention"
|
||||
|
||||
# Collection of detected/predicted faces, each represented as a list of 468 face
|
||||
# landmarks. (std::vector<NormalizedLandmarkList>)
|
||||
# NOTE: there will not be an output packet in the LANDMARKS stream for this
|
||||
# particular timestamp if none of faces detected. However, the MediaPipe
|
||||
# framework will internally inform the downstream calculators of the absence of
|
||||
# this packet so that they don't wait for it unnecessarily.
|
||||
output_stream: "LANDMARKS:multi_face_landmarks"
|
||||
|
||||
# Extra outputs (for debugging, for instance).
|
||||
# Detected faces. (std::vector<Detection>)
|
||||
output_stream: "DETECTIONS:face_detections"
|
||||
# Regions of interest calculated based on landmarks.
|
||||
# (std::vector<NormalizedRect>)
|
||||
output_stream: "ROIS_FROM_LANDMARKS:face_rects_from_landmarks"
|
||||
# Regions of interest calculated based on face detections.
|
||||
# (std::vector<NormalizedRect>)
|
||||
output_stream: "ROIS_FROM_DETECTIONS:face_rects_from_detections"
|
||||
|
||||
# When the optional input side packet "use_prev_landmarks" is either absent or
|
||||
# set to true, uses the landmarks on the previous image to help localize
|
||||
# landmarks on the current image.
|
||||
node {
|
||||
calculator: "GateCalculator"
|
||||
input_side_packet: "ALLOW:use_prev_landmarks"
|
||||
input_stream: "prev_face_rects_from_landmarks"
|
||||
output_stream: "gated_prev_face_rects_from_landmarks"
|
||||
options: {
|
||||
[mediapipe.GateCalculatorOptions.ext] {
|
||||
allow: true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Determines if an input vector of NormalizedRect has a size greater than or
|
||||
# equal to the provided num_faces.
|
||||
node {
|
||||
calculator: "NormalizedRectVectorHasMinSizeCalculator"
|
||||
input_stream: "ITERABLE:gated_prev_face_rects_from_landmarks"
|
||||
input_side_packet: "num_faces"
|
||||
output_stream: "prev_has_enough_faces"
|
||||
}
|
||||
|
||||
# Drops the incoming image if enough faces have already been identified from the
|
||||
# previous image. Otherwise, passes the incoming image through to trigger a new
|
||||
# round of face detection.
|
||||
node {
|
||||
calculator: "GateCalculator"
|
||||
input_stream: "image"
|
||||
input_stream: "DISALLOW:prev_has_enough_faces"
|
||||
output_stream: "gated_image"
|
||||
options: {
|
||||
[mediapipe.GateCalculatorOptions.ext] {
|
||||
empty_packets_as_allow: true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Detects faces.
|
||||
node {
|
||||
calculator: "FaceDetectionShortRangeCpu"
|
||||
input_stream: "IMAGE:gated_image"
|
||||
output_stream: "DETECTIONS:all_face_detections"
|
||||
}
|
||||
|
||||
# Makes sure there are no more detections than the provided num_faces.
|
||||
node {
|
||||
calculator: "ClipDetectionVectorSizeCalculator"
|
||||
input_stream: "all_face_detections"
|
||||
output_stream: "face_detections"
|
||||
input_side_packet: "num_faces"
|
||||
}
|
||||
|
||||
# Calculate size of the image.
|
||||
node {
|
||||
calculator: "ImagePropertiesCalculator"
|
||||
input_stream: "IMAGE:gated_image"
|
||||
output_stream: "SIZE:gated_image_size"
|
||||
}
|
||||
|
||||
# Outputs each element of face_detections at a fake timestamp for the rest of
|
||||
# the graph to process. Clones the image size packet for each face_detection at
|
||||
# the fake timestamp. At the end of the loop, outputs the BATCH_END timestamp
|
||||
# for downstream calculators to inform them that all elements in the vector have
|
||||
# been processed.
|
||||
node {
|
||||
calculator: "BeginLoopDetectionCalculator"
|
||||
input_stream: "ITERABLE:face_detections"
|
||||
input_stream: "CLONE:gated_image_size"
|
||||
output_stream: "ITEM:face_detection"
|
||||
output_stream: "CLONE:detections_loop_image_size"
|
||||
output_stream: "BATCH_END:detections_loop_end_timestamp"
|
||||
}
|
||||
|
||||
# Calculates region of interest based on face detections, so that can be used
|
||||
# to detect landmarks.
|
||||
node {
|
||||
calculator: "FaceDetectionFrontDetectionToRoi"
|
||||
input_stream: "DETECTION:face_detection"
|
||||
input_stream: "IMAGE_SIZE:detections_loop_image_size"
|
||||
output_stream: "ROI:face_rect_from_detection"
|
||||
}
|
||||
|
||||
# Collects a NormalizedRect for each face into a vector. Upon receiving the
|
||||
# BATCH_END timestamp, outputs the vector of NormalizedRect at the BATCH_END
|
||||
# timestamp.
|
||||
node {
|
||||
calculator: "EndLoopNormalizedRectCalculator"
|
||||
input_stream: "ITEM:face_rect_from_detection"
|
||||
input_stream: "BATCH_END:detections_loop_end_timestamp"
|
||||
output_stream: "ITERABLE:face_rects_from_detections"
|
||||
}
|
||||
|
||||
# Performs association between NormalizedRect vector elements from previous
|
||||
# image and rects based on face detections from the current image. This
|
||||
# calculator ensures that the output face_rects vector doesn't contain
|
||||
# overlapping regions based on the specified min_similarity_threshold.
|
||||
node {
|
||||
calculator: "AssociationNormRectCalculator"
|
||||
input_stream: "face_rects_from_detections"
|
||||
input_stream: "gated_prev_face_rects_from_landmarks"
|
||||
output_stream: "face_rects"
|
||||
options: {
|
||||
[mediapipe.AssociationCalculatorOptions.ext] {
|
||||
min_similarity_threshold: 0.5
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Calculate size of the image.
|
||||
node {
|
||||
calculator: "ImagePropertiesCalculator"
|
||||
input_stream: "IMAGE:image"
|
||||
output_stream: "SIZE:image_size"
|
||||
}
|
||||
|
||||
# Outputs each element of face_rects at a fake timestamp for the rest of the
|
||||
# graph to process. Clones image and image size packets for each
|
||||
# single_face_rect at the fake timestamp. At the end of the loop, outputs the
|
||||
# BATCH_END timestamp for downstream calculators to inform them that all
|
||||
# elements in the vector have been processed.
|
||||
node {
|
||||
calculator: "BeginLoopNormalizedRectCalculator"
|
||||
input_stream: "ITERABLE:face_rects"
|
||||
input_stream: "CLONE:0:image"
|
||||
input_stream: "CLONE:1:image_size"
|
||||
output_stream: "ITEM:face_rect"
|
||||
output_stream: "CLONE:0:landmarks_loop_image"
|
||||
output_stream: "CLONE:1:landmarks_loop_image_size"
|
||||
output_stream: "BATCH_END:landmarks_loop_end_timestamp"
|
||||
}
|
||||
|
||||
# Detects face landmarks within specified region of interest of the image.
|
||||
node {
|
||||
calculator: "FaceLandmarkCpu"
|
||||
input_stream: "IMAGE:landmarks_loop_image"
|
||||
input_stream: "ROI:face_rect"
|
||||
input_side_packet: "WITH_ATTENTION:with_attention"
|
||||
output_stream: "LANDMARKS:face_landmarks"
|
||||
}
|
||||
|
||||
# Calculates region of interest based on face landmarks, so that can be reused
|
||||
# for subsequent image.
|
||||
node {
|
||||
calculator: "FaceLandmarkLandmarksToRoi"
|
||||
input_stream: "LANDMARKS:face_landmarks"
|
||||
input_stream: "IMAGE_SIZE:landmarks_loop_image_size"
|
||||
output_stream: "ROI:face_rect_from_landmarks"
|
||||
}
|
||||
|
||||
# Collects a set of landmarks for each face into a vector. Upon receiving the
|
||||
# BATCH_END timestamp, outputs the vector of landmarks at the BATCH_END
|
||||
# timestamp.
|
||||
node {
|
||||
calculator: "EndLoopNormalizedLandmarkListVectorCalculator"
|
||||
input_stream: "ITEM:face_landmarks"
|
||||
input_stream: "BATCH_END:landmarks_loop_end_timestamp"
|
||||
output_stream: "ITERABLE:multi_face_landmarks"
|
||||
}
|
||||
|
||||
# Collects a NormalizedRect for each face into a vector. Upon receiving the
|
||||
# BATCH_END timestamp, outputs the vector of NormalizedRect at the BATCH_END
|
||||
# timestamp.
|
||||
node {
|
||||
calculator: "EndLoopNormalizedRectCalculator"
|
||||
input_stream: "ITEM:face_rect_from_landmarks"
|
||||
input_stream: "BATCH_END:landmarks_loop_end_timestamp"
|
||||
output_stream: "ITERABLE:face_rects_from_landmarks"
|
||||
}
|
||||
|
||||
# Caches face rects calculated from landmarks, and upon the arrival of the next
|
||||
# input image, sends out the cached rects with timestamps replaced by that of
|
||||
# the input image, essentially generating a packet that carries the previous
|
||||
# face rects. Note that upon the arrival of the very first input image, a
|
||||
# timestamp bound update occurs to jump start the feedback loop.
|
||||
node {
|
||||
calculator: "PreviousLoopbackCalculator"
|
||||
input_stream: "MAIN:image"
|
||||
input_stream: "LOOP:face_rects_from_landmarks"
|
||||
input_stream_info: {
|
||||
tag_index: "LOOP"
|
||||
back_edge: true
|
||||
}
|
||||
output_stream: "PREV_LOOP:prev_face_rects_from_landmarks"
|
||||
}
|
|
@ -0,0 +1,87 @@
|
|||
# MediaPipe graph to detect/predict face landmarks on CPU.
|
||||
|
||||
type: "FaceLandmarkFrontCpuImage"
|
||||
|
||||
# Input image. (Image)
|
||||
input_stream: "IMAGE:image"
|
||||
|
||||
# Max number of faces to detect/track. (int)
|
||||
input_side_packet: "NUM_FACES:num_faces"
|
||||
|
||||
# Whether landmarks on the previous image should be used to help localize
|
||||
# landmarks on the current image. (bool)
|
||||
input_side_packet: "USE_PREV_LANDMARKS:use_prev_landmarks"
|
||||
|
||||
# Whether to run face mesh model with attention on lips and eyes. (bool)
|
||||
# Attention provides more accuracy on lips and eye regions as well as iris
|
||||
# landmarks.
|
||||
input_side_packet: "WITH_ATTENTION:with_attention"
|
||||
|
||||
# The throttled input image. (Image)
|
||||
output_stream: "IMAGE:throttled_image"
|
||||
# Collection of detected/predicted faces, each represented as a list of 468 face
|
||||
# landmarks. (std::vector<NormalizedLandmarkList>)
|
||||
# NOTE: there will not be an output packet in the LANDMARKS stream for this
|
||||
# particular timestamp if none of faces detected. However, the MediaPipe
|
||||
# framework will internally inform the downstream calculators of the absence of
|
||||
# this packet so that they don't wait for it unnecessarily.
|
||||
output_stream: "LANDMARKS:multi_face_landmarks"
|
||||
|
||||
# Extra outputs (for debugging, for instance).
|
||||
# Detected faces. (std::vector<Detection>)
|
||||
output_stream: "DETECTIONS:face_detections"
|
||||
# Regions of interest calculated based on landmarks.
|
||||
# (std::vector<NormalizedRect>)
|
||||
output_stream: "ROIS_FROM_LANDMARKS:face_rects_from_landmarks"
|
||||
# Regions of interest calculated based on face detections.
|
||||
# (std::vector<NormalizedRect>)
|
||||
output_stream: "ROIS_FROM_DETECTIONS:face_rects_from_detections"
|
||||
|
||||
node {
|
||||
calculator: "FlowLimiterCalculator"
|
||||
input_stream: "image"
|
||||
input_stream: "FINISHED:multi_face_landmarks"
|
||||
input_stream_info: {
|
||||
tag_index: "FINISHED"
|
||||
back_edge: true
|
||||
}
|
||||
output_stream: "throttled_image"
|
||||
options: {
|
||||
[mediapipe.FlowLimiterCalculatorOptions.ext] {
|
||||
max_in_flight: 1
|
||||
max_in_queue: 1
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Converts Image to ImageFrame for FaceLandmarkFrontCpu to consume.
|
||||
node {
|
||||
calculator: "FromImageCalculator"
|
||||
input_stream: "IMAGE:throttled_image"
|
||||
output_stream: "IMAGE_CPU:raw_image_frame"
|
||||
output_stream: "SOURCE_ON_GPU:is_gpu_image"
|
||||
}
|
||||
|
||||
# TODO: Remove the extra flipping once adopting MlImage.
|
||||
# If the source images are on gpu, flip the data vertically before sending them
|
||||
# into FaceLandmarkFrontCpu. This maybe needed because OpenGL represents images
|
||||
# assuming the image origin is at the bottom-left corner, whereas MediaPipe in
|
||||
# general assumes the image origin is at the top-left corner.
|
||||
node: {
|
||||
calculator: "ImageTransformationCalculator"
|
||||
input_stream: "IMAGE:raw_image_frame"
|
||||
input_stream: "FLIP_VERTICALLY:is_gpu_image"
|
||||
output_stream: "IMAGE:image_frame"
|
||||
}
|
||||
|
||||
node {
|
||||
calculator: "FaceLandmarkFrontCpu"
|
||||
input_stream: "IMAGE:image_frame"
|
||||
input_side_packet: "NUM_FACES:num_faces"
|
||||
input_side_packet: "USE_PREV_LANDMARKS:use_prev_landmarks"
|
||||
input_side_packet: "WITH_ATTENTION:with_attention"
|
||||
output_stream: "LANDMARKS:multi_face_landmarks"
|
||||
output_stream: "DETECTIONS:face_detections"
|
||||
output_stream: "ROIS_FROM_LANDMARKS:face_rects_from_landmarks"
|
||||
output_stream: "ROIS_FROM_DETECTIONS:face_rects_from_detections"
|
||||
}
|
247
mediapipe/modules/face_landmark/face_landmark_front_gpu.pbtxt
Normal file
247
mediapipe/modules/face_landmark/face_landmark_front_gpu.pbtxt
Normal file
|
@ -0,0 +1,247 @@
|
|||
# MediaPipe graph to detect/predict face landmarks. (GPU input, and inference is
|
||||
# executed on GPU.) This graph tries to skip face detection as much as possible
|
||||
# by using previously detected/predicted landmarks for new images.
|
||||
#
|
||||
# It is required that "face_detection_short_range.tflite" is available at
|
||||
# "mediapipe/modules/face_detection/face_detection_short_range.tflite"
|
||||
# path during execution.
|
||||
#
|
||||
# It is required that "face_landmark.tflite" is available at
|
||||
# "mediapipe/modules/face_landmark/face_landmark.tflite"
|
||||
# path during execution if `with_attention` is not set or set to `false`.
|
||||
#
|
||||
# It is required that "face_landmark_with_attention.tflite" is available at
|
||||
# "mediapipe/modules/face_landmark/face_landmark_with_attention.tflite"
|
||||
# path during execution if `with_attention` is set to `true`.
|
||||
#
|
||||
# EXAMPLE:
|
||||
# node {
|
||||
# calculator: "FaceLandmarkFrontGpu"
|
||||
# input_stream: "IMAGE:image"
|
||||
# input_side_packet: "NUM_FACES:num_faces"
|
||||
# input_side_packet: "USE_PREV_LANDMARKS:use_prev_landmarks"
|
||||
# input_side_packet: "WITH_ATTENTION:with_attention"
|
||||
# output_stream: "LANDMARKS:multi_face_landmarks"
|
||||
# }
|
||||
|
||||
type: "FaceLandmarkFrontGpu"
|
||||
|
||||
# GPU image. (GpuBuffer)
|
||||
input_stream: "IMAGE:image"
|
||||
|
||||
# Max number of faces to detect/track. (int)
|
||||
input_side_packet: "NUM_FACES:num_faces"
|
||||
|
||||
# Whether landmarks on the previous image should be used to help localize
|
||||
# landmarks on the current image. (bool)
|
||||
input_side_packet: "USE_PREV_LANDMARKS:use_prev_landmarks"
|
||||
|
||||
# Whether to run face mesh model with attention on lips and eyes. (bool)
|
||||
# Attention provides more accuracy on lips and eye regions as well as iris
|
||||
# landmarks.
|
||||
input_side_packet: "WITH_ATTENTION:with_attention"
|
||||
|
||||
# Collection of detected/predicted faces, each represented as a list of 468 face
|
||||
# landmarks. (std::vector<NormalizedLandmarkList>)
|
||||
# NOTE: there will not be an output packet in the LANDMARKS stream for this
|
||||
# particular timestamp if none of faces detected. However, the MediaPipe
|
||||
# framework will internally inform the downstream calculators of the absence of
|
||||
# this packet so that they don't wait for it unnecessarily.
|
||||
output_stream: "LANDMARKS:multi_face_landmarks"
|
||||
|
||||
# Extra outputs (for debugging, for instance).
|
||||
# Detected faces. (std::vector<Detection>)
|
||||
output_stream: "DETECTIONS:face_detections"
|
||||
# Regions of interest calculated based on landmarks.
|
||||
# (std::vector<NormalizedRect>)
|
||||
output_stream: "ROIS_FROM_LANDMARKS:face_rects_from_landmarks"
|
||||
# Regions of interest calculated based on face detections.
|
||||
# (std::vector<NormalizedRect>)
|
||||
output_stream: "ROIS_FROM_DETECTIONS:face_rects_from_detections"
|
||||
|
||||
# When the optional input side packet "use_prev_landmarks" is either absent or
|
||||
# set to true, uses the landmarks on the previous image to help localize
|
||||
# landmarks on the current image.
|
||||
node {
|
||||
calculator: "GateCalculator"
|
||||
input_side_packet: "ALLOW:use_prev_landmarks"
|
||||
input_stream: "prev_face_rects_from_landmarks"
|
||||
output_stream: "gated_prev_face_rects_from_landmarks"
|
||||
options: {
|
||||
[mediapipe.GateCalculatorOptions.ext] {
|
||||
allow: true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Determines if an input vector of NormalizedRect has a size greater than or
|
||||
# equal to the provided num_faces.
|
||||
node {
|
||||
calculator: "NormalizedRectVectorHasMinSizeCalculator"
|
||||
input_stream: "ITERABLE:gated_prev_face_rects_from_landmarks"
|
||||
input_side_packet: "num_faces"
|
||||
output_stream: "prev_has_enough_faces"
|
||||
}
|
||||
|
||||
# Drops the incoming image if enough faces have already been identified from the
|
||||
# previous image. Otherwise, passes the incoming image through to trigger a new
|
||||
# round of face detection.
|
||||
node {
|
||||
calculator: "GateCalculator"
|
||||
input_stream: "image"
|
||||
input_stream: "DISALLOW:prev_has_enough_faces"
|
||||
output_stream: "gated_image"
|
||||
options: {
|
||||
[mediapipe.GateCalculatorOptions.ext] {
|
||||
empty_packets_as_allow: true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Detects faces.
|
||||
node {
|
||||
calculator: "FaceDetectionShortRangeGpu"
|
||||
input_stream: "IMAGE:gated_image"
|
||||
output_stream: "DETECTIONS:all_face_detections"
|
||||
}
|
||||
|
||||
# Makes sure there are no more detections than the provided num_faces.
|
||||
node {
|
||||
calculator: "ClipDetectionVectorSizeCalculator"
|
||||
input_stream: "all_face_detections"
|
||||
output_stream: "face_detections"
|
||||
input_side_packet: "num_faces"
|
||||
}
|
||||
|
||||
# Calculate size of the image.
|
||||
node {
|
||||
calculator: "ImagePropertiesCalculator"
|
||||
input_stream: "IMAGE_GPU:gated_image"
|
||||
output_stream: "SIZE:gated_image_size"
|
||||
}
|
||||
|
||||
# Outputs each element of face_detections at a fake timestamp for the rest of
|
||||
# the graph to process. Clones the image size packet for each face_detection at
|
||||
# the fake timestamp. At the end of the loop, outputs the BATCH_END timestamp
|
||||
# for downstream calculators to inform them that all elements in the vector have
|
||||
# been processed.
|
||||
node {
|
||||
calculator: "BeginLoopDetectionCalculator"
|
||||
input_stream: "ITERABLE:face_detections"
|
||||
input_stream: "CLONE:gated_image_size"
|
||||
output_stream: "ITEM:face_detection"
|
||||
output_stream: "CLONE:detections_loop_image_size"
|
||||
output_stream: "BATCH_END:detections_loop_end_timestamp"
|
||||
}
|
||||
|
||||
# Calculates region of interest based on face detections, so that can be used
|
||||
# to detect landmarks.
|
||||
node {
|
||||
calculator: "FaceDetectionFrontDetectionToRoi"
|
||||
input_stream: "DETECTION:face_detection"
|
||||
input_stream: "IMAGE_SIZE:detections_loop_image_size"
|
||||
output_stream: "ROI:face_rect_from_detection"
|
||||
}
|
||||
|
||||
# Collects a NormalizedRect for each face into a vector. Upon receiving the
|
||||
# BATCH_END timestamp, outputs the vector of NormalizedRect at the BATCH_END
|
||||
# timestamp.
|
||||
node {
|
||||
calculator: "EndLoopNormalizedRectCalculator"
|
||||
input_stream: "ITEM:face_rect_from_detection"
|
||||
input_stream: "BATCH_END:detections_loop_end_timestamp"
|
||||
output_stream: "ITERABLE:face_rects_from_detections"
|
||||
}
|
||||
|
||||
# Performs association between NormalizedRect vector elements from previous
|
||||
# image and rects based on face detections from the current image. This
|
||||
# calculator ensures that the output face_rects vector doesn't contain
|
||||
# overlapping regions based on the specified min_similarity_threshold.
|
||||
node {
|
||||
calculator: "AssociationNormRectCalculator"
|
||||
input_stream: "face_rects_from_detections"
|
||||
input_stream: "gated_prev_face_rects_from_landmarks"
|
||||
output_stream: "face_rects"
|
||||
options: {
|
||||
[mediapipe.AssociationCalculatorOptions.ext] {
|
||||
min_similarity_threshold: 0.5
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Calculate size of the image.
|
||||
node {
|
||||
calculator: "ImagePropertiesCalculator"
|
||||
input_stream: "IMAGE_GPU:image"
|
||||
output_stream: "SIZE:image_size"
|
||||
}
|
||||
|
||||
# Outputs each element of face_rects at a fake timestamp for the rest of the
|
||||
# graph to process. Clones image and image size packets for each
|
||||
# single_face_rect at the fake timestamp. At the end of the loop, outputs the
|
||||
# BATCH_END timestamp for downstream calculators to inform them that all
|
||||
# elements in the vector have been processed.
|
||||
node {
|
||||
calculator: "BeginLoopNormalizedRectCalculator"
|
||||
input_stream: "ITERABLE:face_rects"
|
||||
input_stream: "CLONE:0:image"
|
||||
input_stream: "CLONE:1:image_size"
|
||||
output_stream: "ITEM:face_rect"
|
||||
output_stream: "CLONE:0:landmarks_loop_image"
|
||||
output_stream: "CLONE:1:landmarks_loop_image_size"
|
||||
output_stream: "BATCH_END:landmarks_loop_end_timestamp"
|
||||
}
|
||||
|
||||
# Detects face landmarks within specified region of interest of the image.
|
||||
node {
|
||||
calculator: "FaceLandmarkGpu"
|
||||
input_stream: "IMAGE:landmarks_loop_image"
|
||||
input_stream: "ROI:face_rect"
|
||||
input_side_packet: "WITH_ATTENTION:with_attention"
|
||||
output_stream: "LANDMARKS:face_landmarks"
|
||||
}
|
||||
|
||||
# Calculates region of interest based on face landmarks, so that can be reused
|
||||
# for subsequent image.
|
||||
node {
|
||||
calculator: "FaceLandmarkLandmarksToRoi"
|
||||
input_stream: "LANDMARKS:face_landmarks"
|
||||
input_stream: "IMAGE_SIZE:landmarks_loop_image_size"
|
||||
output_stream: "ROI:face_rect_from_landmarks"
|
||||
}
|
||||
|
||||
# Collects a set of landmarks for each face into a vector. Upon receiving the
|
||||
# BATCH_END timestamp, outputs the vector of landmarks at the BATCH_END
|
||||
# timestamp.
|
||||
node {
|
||||
calculator: "EndLoopNormalizedLandmarkListVectorCalculator"
|
||||
input_stream: "ITEM:face_landmarks"
|
||||
input_stream: "BATCH_END:landmarks_loop_end_timestamp"
|
||||
output_stream: "ITERABLE:multi_face_landmarks"
|
||||
}
|
||||
|
||||
# Collects a NormalizedRect for each face into a vector. Upon receiving the
|
||||
# BATCH_END timestamp, outputs the vector of NormalizedRect at the BATCH_END
|
||||
# timestamp.
|
||||
node {
|
||||
calculator: "EndLoopNormalizedRectCalculator"
|
||||
input_stream: "ITEM:face_rect_from_landmarks"
|
||||
input_stream: "BATCH_END:landmarks_loop_end_timestamp"
|
||||
output_stream: "ITERABLE:face_rects_from_landmarks"
|
||||
}
|
||||
|
||||
# Caches face rects calculated from landmarks, and upon the arrival of the next
|
||||
# input image, sends out the cached rects with timestamps replaced by that of
|
||||
# the input image, essentially generating a packet that carries the previous
|
||||
# face rects. Note that upon the arrival of the very first input image, a
|
||||
# timestamp bound update occurs to jump start the feedback loop.
|
||||
node {
|
||||
calculator: "PreviousLoopbackCalculator"
|
||||
input_stream: "MAIN:image"
|
||||
input_stream: "LOOP:face_rects_from_landmarks"
|
||||
input_stream_info: {
|
||||
tag_index: "LOOP"
|
||||
back_edge: true
|
||||
}
|
||||
output_stream: "PREV_LOOP:prev_face_rects_from_landmarks"
|
||||
}
|
|
@ -0,0 +1,87 @@
|
|||
# MediaPipe graph to detect/predict face landmarks on GPU.
|
||||
|
||||
type: "FaceLandmarkFrontGpuImage"
|
||||
|
||||
# Input image. (Image)
|
||||
input_stream: "IMAGE:image"
|
||||
|
||||
# Max number of faces to detect/track. (int)
|
||||
input_side_packet: "NUM_FACES:num_faces"
|
||||
|
||||
# Whether landmarks on the previous image should be used to help localize
|
||||
# landmarks on the current image. (bool)
|
||||
input_side_packet: "USE_PREV_LANDMARKS:use_prev_landmarks"
|
||||
|
||||
# Whether to run face mesh model with attention on lips and eyes. (bool)
|
||||
# Attention provides more accuracy on lips and eye regions as well as iris
|
||||
# landmarks.
|
||||
input_side_packet: "WITH_ATTENTION:with_attention"
|
||||
|
||||
# The throttled input image. (Image)
|
||||
output_stream: "IMAGE:throttled_image"
|
||||
# Collection of detected/predicted faces, each represented as a list of 468 face
|
||||
# landmarks. (std::vector<NormalizedLandmarkList>)
|
||||
# NOTE: there will not be an output packet in the LANDMARKS stream for this
|
||||
# particular timestamp if none of faces detected. However, the MediaPipe
|
||||
# framework will internally inform the downstream calculators of the absence of
|
||||
# this packet so that they don't wait for it unnecessarily.
|
||||
output_stream: "LANDMARKS:multi_face_landmarks"
|
||||
|
||||
# Extra outputs (for debugging, for instance).
|
||||
# Detected faces. (std::vector<Detection>)
|
||||
output_stream: "DETECTIONS:face_detections"
|
||||
# Regions of interest calculated based on landmarks.
|
||||
# (std::vector<NormalizedRect>)
|
||||
output_stream: "ROIS_FROM_LANDMARKS:face_rects_from_landmarks"
|
||||
# Regions of interest calculated based on face detections.
|
||||
# (std::vector<NormalizedRect>)
|
||||
output_stream: "ROIS_FROM_DETECTIONS:face_rects_from_detections"
|
||||
|
||||
node {
|
||||
calculator: "FlowLimiterCalculator"
|
||||
input_stream: "image"
|
||||
input_stream: "FINISHED:multi_face_landmarks"
|
||||
input_stream_info: {
|
||||
tag_index: "FINISHED"
|
||||
back_edge: true
|
||||
}
|
||||
output_stream: "throttled_image"
|
||||
options: {
|
||||
[mediapipe.FlowLimiterCalculatorOptions.ext] {
|
||||
max_in_flight: 1
|
||||
max_in_queue: 1
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Converts Image to GpuBuffer for FaceLandmarkFrontGpu to consume.
|
||||
node {
|
||||
calculator: "FromImageCalculator"
|
||||
input_stream: "IMAGE:throttled_image"
|
||||
output_stream: "IMAGE_GPU:raw_gpu_buffer"
|
||||
output_stream: "SOURCE_ON_GPU:is_gpu_image"
|
||||
}
|
||||
|
||||
# TODO: Remove the extra flipping once adopting MlImage.
|
||||
# If the source images are on gpu, flip the data vertically before sending them
|
||||
# into FaceLandmarkFrontGpu. This maybe needed because OpenGL represents images
|
||||
# assuming the image origin is at the bottom-left corner, whereas MediaPipe in
|
||||
# general assumes the image origin is at the top-left corner.
|
||||
node: {
|
||||
calculator: "ImageTransformationCalculator"
|
||||
input_stream: "IMAGE_GPU:raw_gpu_buffer"
|
||||
input_stream: "FLIP_VERTICALLY:is_gpu_image"
|
||||
output_stream: "IMAGE_GPU:gpu_buffer"
|
||||
}
|
||||
|
||||
node {
|
||||
calculator: "FaceLandmarkFrontGpu"
|
||||
input_stream: "IMAGE:gpu_buffer"
|
||||
input_side_packet: "NUM_FACES:num_faces"
|
||||
input_side_packet: "USE_PREV_LANDMARKS:use_prev_landmarks"
|
||||
input_side_packet: "WITH_ATTENTION:with_attention"
|
||||
output_stream: "LANDMARKS:multi_face_landmarks"
|
||||
output_stream: "DETECTIONS:face_detections"
|
||||
output_stream: "ROIS_FROM_LANDMARKS:face_rects_from_landmarks"
|
||||
output_stream: "ROIS_FROM_DETECTIONS:face_rects_from_detections"
|
||||
}
|
|
@ -0,0 +1,224 @@
|
|||
# MediaPipe graph to detect/predict face landmarks. (CPU input, and inference is
|
||||
# executed on CPU.) This graph tries to skip face detection as much as possible
|
||||
# by using previously detected/predicted landmarks for new images.
|
||||
#
|
||||
# EXAMPLE:
|
||||
# node {
|
||||
# calculator: "FaceLandmarkFrontSideModelCpu"
|
||||
# input_stream: "IMAGE:image"
|
||||
# input_side_packet: "NUM_FACES:num_faces"
|
||||
# input_side_packet: "MODEL:0:face_detection_model"
|
||||
# input_side_packet: "MODEL:1:face_landmark_model"
|
||||
# output_stream: "LANDMARKS:multi_face_landmarks"
|
||||
# }
|
||||
|
||||
type: "FaceLandmarkFrontSideModelCpu"
|
||||
|
||||
# CPU image. (ImageFrame)
|
||||
input_stream: "IMAGE:image"
|
||||
|
||||
# Max number of faces to detect/track. (int)
|
||||
input_side_packet: "NUM_FACES:num_faces"
|
||||
# TfLite model to detect faces.
|
||||
# (std::unique_ptr<tflite::FlatBufferModel,
|
||||
# std::function<void(tflite::FlatBufferModel*)>>)
|
||||
# NOTE: mediapipe/modules/face_detection/face_detection_short_range.tflite
|
||||
# model only, can be passed here, otherwise - results are undefined.
|
||||
input_side_packet: "MODEL:0:face_detection_model"
|
||||
# TfLite model to detect face landmarks.
|
||||
# (std::unique_ptr<tflite::FlatBufferModel,
|
||||
# std::function<void(tflite::FlatBufferModel*)>>)
|
||||
# NOTE: mediapipe/modules/face_landmark/face_landmark.tflite model
|
||||
# only, can be passed here, otherwise - results are undefined.
|
||||
input_side_packet: "MODEL:1:face_landmark_model"
|
||||
|
||||
# Collection of detected/predicted faces, each represented as a list of 468 face
|
||||
# landmarks. (std::vector<NormalizedLandmarkList>)
|
||||
# NOTE: there will not be an output packet in the LANDMARKS stream for this
|
||||
# particular timestamp if none of faces detected. However, the MediaPipe
|
||||
# framework will internally inform the downstream calculators of the absence of
|
||||
# this packet so that they don't wait for it unnecessarily.
|
||||
output_stream: "LANDMARKS:multi_face_landmarks"
|
||||
|
||||
# Extra outputs (for debugging, for instance).
|
||||
# Detected faces. (std::vector<Detection>)
|
||||
output_stream: "DETECTIONS:face_detections"
|
||||
# Regions of interest calculated based on landmarks.
|
||||
# (std::vector<NormalizedRect>)
|
||||
output_stream: "ROIS_FROM_LANDMARKS:face_rects_from_landmarks"
|
||||
# Regions of interest calculated based on face detections.
|
||||
# (std::vector<NormalizedRect>)
|
||||
output_stream: "ROIS_FROM_DETECTIONS:face_rects_from_detections"
|
||||
|
||||
# Determines if an input vector of NormalizedRect has a size greater than or
|
||||
# equal to the provided num_faces.
|
||||
node {
|
||||
calculator: "NormalizedRectVectorHasMinSizeCalculator"
|
||||
input_stream: "ITERABLE:prev_face_rects_from_landmarks"
|
||||
input_side_packet: "num_faces"
|
||||
output_stream: "prev_has_enough_faces"
|
||||
}
|
||||
|
||||
# Drops the incoming image if FaceLandmarkCpu was able to identify face presence
|
||||
# in the previous image. Otherwise, passes the incoming image through to trigger
|
||||
# a new round of face detection in FaceDetectionShortRangeCpu.
|
||||
node {
|
||||
calculator: "GateCalculator"
|
||||
input_stream: "image"
|
||||
input_stream: "DISALLOW:prev_has_enough_faces"
|
||||
output_stream: "gated_image"
|
||||
options: {
|
||||
[mediapipe.GateCalculatorOptions.ext] {
|
||||
empty_packets_as_allow: true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Detects faces.
|
||||
node {
|
||||
calculator: "FaceDetectionShortRangeSideModelCpu"
|
||||
input_stream: "IMAGE:gated_image"
|
||||
input_side_packet: "MODEL:face_detection_model"
|
||||
output_stream: "DETECTIONS:all_face_detections"
|
||||
}
|
||||
|
||||
# Makes sure there are no more detections than the provided num_faces.
|
||||
node {
|
||||
calculator: "ClipDetectionVectorSizeCalculator"
|
||||
input_stream: "all_face_detections"
|
||||
output_stream: "face_detections"
|
||||
input_side_packet: "num_faces"
|
||||
}
|
||||
|
||||
# Calculate size of the image.
|
||||
node {
|
||||
calculator: "ImagePropertiesCalculator"
|
||||
input_stream: "IMAGE:gated_image"
|
||||
output_stream: "SIZE:gated_image_size"
|
||||
}
|
||||
|
||||
# Outputs each element of face_detections at a fake timestamp for the rest of
|
||||
# the graph to process. Clones the image size packet for each face_detection at
|
||||
# the fake timestamp. At the end of the loop, outputs the BATCH_END timestamp
|
||||
# for downstream calculators to inform them that all elements in the vector have
|
||||
# been processed.
|
||||
node {
|
||||
calculator: "BeginLoopDetectionCalculator"
|
||||
input_stream: "ITERABLE:face_detections"
|
||||
input_stream: "CLONE:gated_image_size"
|
||||
output_stream: "ITEM:face_detection"
|
||||
output_stream: "CLONE:detections_loop_image_size"
|
||||
output_stream: "BATCH_END:detections_loop_end_timestamp"
|
||||
}
|
||||
|
||||
# Calculates region of interest based on face detections, so that can be used
|
||||
# to detect landmarks.
|
||||
node {
|
||||
calculator: "FaceDetectionFrontDetectionToRoi"
|
||||
input_stream: "DETECTION:face_detection"
|
||||
input_stream: "IMAGE_SIZE:detections_loop_image_size"
|
||||
output_stream: "ROI:face_rect_from_detection"
|
||||
}
|
||||
|
||||
# Collects a NormalizedRect for each face into a vector. Upon receiving the
|
||||
# BATCH_END timestamp, outputs the vector of NormalizedRect at the BATCH_END
|
||||
# timestamp.
|
||||
node {
|
||||
calculator: "EndLoopNormalizedRectCalculator"
|
||||
input_stream: "ITEM:face_rect_from_detection"
|
||||
input_stream: "BATCH_END:detections_loop_end_timestamp"
|
||||
output_stream: "ITERABLE:face_rects_from_detections"
|
||||
}
|
||||
|
||||
# Performs association between NormalizedRect vector elements from previous
|
||||
# image and rects based on face detections from the current image. This
|
||||
# calculator ensures that the output face_rects vector doesn't contain
|
||||
# overlapping regions based on the specified min_similarity_threshold.
|
||||
node {
|
||||
calculator: "AssociationNormRectCalculator"
|
||||
input_stream: "face_rects_from_detections"
|
||||
input_stream: "prev_face_rects_from_landmarks"
|
||||
output_stream: "face_rects"
|
||||
options: {
|
||||
[mediapipe.AssociationCalculatorOptions.ext] {
|
||||
min_similarity_threshold: 0.5
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Calculate size of the image.
|
||||
node {
|
||||
calculator: "ImagePropertiesCalculator"
|
||||
input_stream: "IMAGE:image"
|
||||
output_stream: "SIZE:image_size"
|
||||
}
|
||||
|
||||
# Outputs each element of face_rects at a fake timestamp for the rest of the
|
||||
# graph to process. Clones image and image size packets for each
|
||||
# single_face_rect at the fake timestamp. At the end of the loop, outputs the
|
||||
# BATCH_END timestamp for downstream calculators to inform them that all
|
||||
# elements in the vector have been processed.
|
||||
node {
|
||||
calculator: "BeginLoopNormalizedRectCalculator"
|
||||
input_stream: "ITERABLE:face_rects"
|
||||
input_stream: "CLONE:0:image"
|
||||
input_stream: "CLONE:1:image_size"
|
||||
output_stream: "ITEM:face_rect"
|
||||
output_stream: "CLONE:0:landmarks_loop_image"
|
||||
output_stream: "CLONE:1:landmarks_loop_image_size"
|
||||
output_stream: "BATCH_END:landmarks_loop_end_timestamp"
|
||||
}
|
||||
|
||||
# Detects face landmarks within specified region of interest of the image.
|
||||
node {
|
||||
calculator: "FaceLandmarkSideModelCpu"
|
||||
input_stream: "IMAGE:landmarks_loop_image"
|
||||
input_stream: "ROI:face_rect"
|
||||
input_side_packet: "MODEL:face_landmark_model"
|
||||
output_stream: "LANDMARKS:face_landmarks"
|
||||
}
|
||||
|
||||
# Calculates region of interest based on face landmarks, so that can be reused
|
||||
# for subsequent image.
|
||||
node {
|
||||
calculator: "FaceLandmarkLandmarksToRoi"
|
||||
input_stream: "LANDMARKS:face_landmarks"
|
||||
input_stream: "IMAGE_SIZE:landmarks_loop_image_size"
|
||||
output_stream: "ROI:face_rect_from_landmarks"
|
||||
}
|
||||
|
||||
# Collects a set of landmarks for each face into a vector. Upon receiving the
|
||||
# BATCH_END timestamp, outputs the vector of landmarks at the BATCH_END
|
||||
# timestamp.
|
||||
node {
|
||||
calculator: "EndLoopNormalizedLandmarkListVectorCalculator"
|
||||
input_stream: "ITEM:face_landmarks"
|
||||
input_stream: "BATCH_END:landmarks_loop_end_timestamp"
|
||||
output_stream: "ITERABLE:multi_face_landmarks"
|
||||
}
|
||||
|
||||
# Collects a NormalizedRect for each face into a vector. Upon receiving the
|
||||
# BATCH_END timestamp, outputs the vector of NormalizedRect at the BATCH_END
|
||||
# timestamp.
|
||||
node {
|
||||
calculator: "EndLoopNormalizedRectCalculator"
|
||||
input_stream: "ITEM:face_rect_from_landmarks"
|
||||
input_stream: "BATCH_END:landmarks_loop_end_timestamp"
|
||||
output_stream: "ITERABLE:face_rects_from_landmarks"
|
||||
}
|
||||
|
||||
# Caches face rects calculated from landmarks, and upon the arrival of the next
|
||||
# input image, sends out the cached rects with timestamps replaced by that of
|
||||
# the input image, essentially generating a packet that carries the previous
|
||||
# face rects. Note that upon the arrival of the very first input image, a
|
||||
# timestamp bound update occurs to jump start the feedback loop.
|
||||
node {
|
||||
calculator: "PreviousLoopbackCalculator"
|
||||
input_stream: "MAIN:image"
|
||||
input_stream: "LOOP:face_rects_from_landmarks"
|
||||
input_stream_info: {
|
||||
tag_index: "LOOP"
|
||||
back_edge: true
|
||||
}
|
||||
output_stream: "PREV_LOOP:prev_face_rects_from_landmarks"
|
||||
}
|
|
@ -0,0 +1,224 @@
|
|||
# MediaPipe graph to detect/predict face landmarks. (GPU input, and inference is
|
||||
# executed on GPU.) This graph tries to skip face detection as much as possible
|
||||
# by using previously detected/predicted landmarks for new images.
|
||||
#
|
||||
# EXAMPLE:
|
||||
# node {
|
||||
# calculator: "FaceLandmarkFrontSideModelGpu"
|
||||
# input_stream: "IMAGE:image"
|
||||
# input_side_packet: "NUM_FACES:num_faces"
|
||||
# input_side_packet: "MODEL:0:face_detection_model"
|
||||
# input_side_packet: "MODEL:1:face_landmark_model"
|
||||
# output_stream: "LANDMARKS:multi_face_landmarks"
|
||||
# }
|
||||
|
||||
type: "FaceLandmarkFrontSideModelGpu"
|
||||
|
||||
# GPU image. (GpuBuffer)
|
||||
input_stream: "IMAGE:image"
|
||||
|
||||
# Max number of faces to detect/track. (int)
|
||||
input_side_packet: "NUM_FACES:num_faces"
|
||||
# TfLite model to detect faces.
|
||||
# (std::unique_ptr<tflite::FlatBufferModel,
|
||||
# std::function<void(tflite::FlatBufferModel*)>>)
|
||||
# NOTE: mediapipe/modules/face_detection/face_detection_short_range.tflite
|
||||
# model only, can be passed here, otherwise - results are undefined.
|
||||
input_side_packet: "MODEL:0:face_detection_model"
|
||||
# TfLite model to detect face landmarks.
|
||||
# (std::unique_ptr<tflite::FlatBufferModel,
|
||||
# std::function<void(tflite::FlatBufferModel*)>>)
|
||||
# NOTE: mediapipe/modules/face_landmark/face_landmark.tflite model
|
||||
# only, can be passed here, otherwise - results are undefined.
|
||||
input_side_packet: "MODEL:1:face_landmark_model"
|
||||
|
||||
# Collection of detected/predicted faces, each represented as a list of 468 face
|
||||
# landmarks. (std::vector<NormalizedLandmarkList>)
|
||||
# NOTE: there will not be an output packet in the LANDMARKS stream for this
|
||||
# particular timestamp if none of faces detected. However, the MediaPipe
|
||||
# framework will internally inform the downstream calculators of the absence of
|
||||
# this packet so that they don't wait for it unnecessarily.
|
||||
output_stream: "LANDMARKS:multi_face_landmarks"
|
||||
|
||||
# Extra outputs (for debugging, for instance).
|
||||
# Detected faces. (std::vector<Detection>)
|
||||
output_stream: "DETECTIONS:face_detections"
|
||||
# Regions of interest calculated based on landmarks.
|
||||
# (std::vector<NormalizedRect>)
|
||||
output_stream: "ROIS_FROM_LANDMARKS:face_rects_from_landmarks"
|
||||
# Regions of interest calculated based on face detections.
|
||||
# (std::vector<NormalizedRect>)
|
||||
output_stream: "ROIS_FROM_DETECTIONS:face_rects_from_detections"
|
||||
|
||||
# Determines if an input vector of NormalizedRect has a size greater than or
|
||||
# equal to the provided num_faces.
|
||||
node {
|
||||
calculator: "NormalizedRectVectorHasMinSizeCalculator"
|
||||
input_stream: "ITERABLE:prev_face_rects_from_landmarks"
|
||||
input_side_packet: "num_faces"
|
||||
output_stream: "prev_has_enough_faces"
|
||||
}
|
||||
|
||||
# Drops the incoming image if FaceLandmarkGpu was able to identify face presence
|
||||
# in the previous image. Otherwise, passes the incoming image through to trigger
|
||||
# a new round of face detection in FaceDetectionShortRangeGpu.
|
||||
node {
|
||||
calculator: "GateCalculator"
|
||||
input_stream: "image"
|
||||
input_stream: "DISALLOW:prev_has_enough_faces"
|
||||
output_stream: "gated_image"
|
||||
options: {
|
||||
[mediapipe.GateCalculatorOptions.ext] {
|
||||
empty_packets_as_allow: true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Detects faces.
|
||||
node {
|
||||
calculator: "FaceDetectionShortRangeSideModelGpu"
|
||||
input_stream: "IMAGE:gated_image"
|
||||
input_side_packet: "MODEL:face_detection_model"
|
||||
output_stream: "DETECTIONS:all_face_detections"
|
||||
}
|
||||
|
||||
# Makes sure there are no more detections than the provided num_faces.
|
||||
node {
|
||||
calculator: "ClipDetectionVectorSizeCalculator"
|
||||
input_stream: "all_face_detections"
|
||||
output_stream: "face_detections"
|
||||
input_side_packet: "num_faces"
|
||||
}
|
||||
|
||||
# Calculate size of the image.
|
||||
node {
|
||||
calculator: "ImagePropertiesCalculator"
|
||||
input_stream: "IMAGE_GPU:gated_image"
|
||||
output_stream: "SIZE:gated_image_size"
|
||||
}
|
||||
|
||||
# Outputs each element of face_detections at a fake timestamp for the rest of
|
||||
# the graph to process. Clones the image size packet for each face_detection at
|
||||
# the fake timestamp. At the end of the loop, outputs the BATCH_END timestamp
|
||||
# for downstream calculators to inform them that all elements in the vector have
|
||||
# been processed.
|
||||
node {
|
||||
calculator: "BeginLoopDetectionCalculator"
|
||||
input_stream: "ITERABLE:face_detections"
|
||||
input_stream: "CLONE:gated_image_size"
|
||||
output_stream: "ITEM:face_detection"
|
||||
output_stream: "CLONE:detections_loop_image_size"
|
||||
output_stream: "BATCH_END:detections_loop_end_timestamp"
|
||||
}
|
||||
|
||||
# Calculates region of interest based on face detections, so that can be used
|
||||
# to detect landmarks.
|
||||
node {
|
||||
calculator: "FaceDetectionFrontDetectionToRoi"
|
||||
input_stream: "DETECTION:face_detection"
|
||||
input_stream: "IMAGE_SIZE:detections_loop_image_size"
|
||||
output_stream: "ROI:face_rect_from_detection"
|
||||
}
|
||||
|
||||
# Collects a NormalizedRect for each face into a vector. Upon receiving the
|
||||
# BATCH_END timestamp, outputs the vector of NormalizedRect at the BATCH_END
|
||||
# timestamp.
|
||||
node {
|
||||
calculator: "EndLoopNormalizedRectCalculator"
|
||||
input_stream: "ITEM:face_rect_from_detection"
|
||||
input_stream: "BATCH_END:detections_loop_end_timestamp"
|
||||
output_stream: "ITERABLE:face_rects_from_detections"
|
||||
}
|
||||
|
||||
# Performs association between NormalizedRect vector elements from previous
|
||||
# image and rects based on face detections from the current image. This
|
||||
# calculator ensures that the output face_rects vector doesn't contain
|
||||
# overlapping regions based on the specified min_similarity_threshold.
|
||||
node {
|
||||
calculator: "AssociationNormRectCalculator"
|
||||
input_stream: "face_rects_from_detections"
|
||||
input_stream: "prev_face_rects_from_landmarks"
|
||||
output_stream: "face_rects"
|
||||
options: {
|
||||
[mediapipe.AssociationCalculatorOptions.ext] {
|
||||
min_similarity_threshold: 0.5
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Calculate size of the image.
|
||||
node {
|
||||
calculator: "ImagePropertiesCalculator"
|
||||
input_stream: "IMAGE_GPU:image"
|
||||
output_stream: "SIZE:image_size"
|
||||
}
|
||||
|
||||
# Outputs each element of face_rects at a fake timestamp for the rest of the
|
||||
# graph to process. Clones image and image size packets for each
|
||||
# single_face_rect at the fake timestamp. At the end of the loop, outputs the
|
||||
# BATCH_END timestamp for downstream calculators to inform them that all
|
||||
# elements in the vector have been processed.
|
||||
node {
|
||||
calculator: "BeginLoopNormalizedRectCalculator"
|
||||
input_stream: "ITERABLE:face_rects"
|
||||
input_stream: "CLONE:0:image"
|
||||
input_stream: "CLONE:1:image_size"
|
||||
output_stream: "ITEM:face_rect"
|
||||
output_stream: "CLONE:0:landmarks_loop_image"
|
||||
output_stream: "CLONE:1:landmarks_loop_image_size"
|
||||
output_stream: "BATCH_END:landmarks_loop_end_timestamp"
|
||||
}
|
||||
|
||||
# Detects face landmarks within specified region of interest of the image.
|
||||
node {
|
||||
calculator: "FaceLandmarkSideModelGpu"
|
||||
input_stream: "IMAGE:landmarks_loop_image"
|
||||
input_stream: "ROI:face_rect"
|
||||
input_side_packet: "MODEL:face_landmark_model"
|
||||
output_stream: "LANDMARKS:face_landmarks"
|
||||
}
|
||||
|
||||
# Calculates region of interest based on face landmarks, so that can be reused
|
||||
# for subsequent image.
|
||||
node {
|
||||
calculator: "FaceLandmarkLandmarksToRoi"
|
||||
input_stream: "LANDMARKS:face_landmarks"
|
||||
input_stream: "IMAGE_SIZE:landmarks_loop_image_size"
|
||||
output_stream: "ROI:face_rect_from_landmarks"
|
||||
}
|
||||
|
||||
# Collects a set of landmarks for each face into a vector. Upon receiving the
|
||||
# BATCH_END timestamp, outputs the vector of landmarks at the BATCH_END
|
||||
# timestamp.
|
||||
node {
|
||||
calculator: "EndLoopNormalizedLandmarkListVectorCalculator"
|
||||
input_stream: "ITEM:face_landmarks"
|
||||
input_stream: "BATCH_END:landmarks_loop_end_timestamp"
|
||||
output_stream: "ITERABLE:multi_face_landmarks"
|
||||
}
|
||||
|
||||
# Collects a NormalizedRect for each face into a vector. Upon receiving the
|
||||
# BATCH_END timestamp, outputs the vector of NormalizedRect at the BATCH_END
|
||||
# timestamp.
|
||||
node {
|
||||
calculator: "EndLoopNormalizedRectCalculator"
|
||||
input_stream: "ITEM:face_rect_from_landmarks"
|
||||
input_stream: "BATCH_END:landmarks_loop_end_timestamp"
|
||||
output_stream: "ITERABLE:face_rects_from_landmarks"
|
||||
}
|
||||
|
||||
# Caches face rects calculated from landmarks, and upon the arrival of the next
|
||||
# input image, sends out the cached rects with timestamps replaced by that of
|
||||
# the input image, essentially generating a packet that carries the previous
|
||||
# face rects. Note that upon the arrival of the very first input image, a
|
||||
# timestamp bound update occurs to jump start the feedback loop.
|
||||
node {
|
||||
calculator: "PreviousLoopbackCalculator"
|
||||
input_stream: "MAIN:image"
|
||||
input_stream: "LOOP:face_rects_from_landmarks"
|
||||
input_stream_info: {
|
||||
tag_index: "LOOP"
|
||||
back_edge: true
|
||||
}
|
||||
output_stream: "PREV_LOOP:prev_face_rects_from_landmarks"
|
||||
}
|
185
mediapipe/modules/face_landmark/face_landmark_gpu.pbtxt
Normal file
185
mediapipe/modules/face_landmark/face_landmark_gpu.pbtxt
Normal file
|
@ -0,0 +1,185 @@
|
|||
# MediaPipe graph to detect/predict face landmarks. (CPU input, and inference is
|
||||
# executed on CPU.)
|
||||
#
|
||||
# It is required that "face_landmark.tflite" is available at
|
||||
# "mediapipe/modules/face_landmark/face_landmark.tflite"
|
||||
# path during execution if `with_attention` is not set or set to `false`.
|
||||
#
|
||||
# It is required that "face_landmark_with_attention.tflite" is available at
|
||||
# "mediapipe/modules/face_landmark/face_landmark_with_attention.tflite"
|
||||
# path during execution if `with_attention` is set to `true`.
|
||||
#
|
||||
# EXAMPLE:
|
||||
# node {
|
||||
# calculator: "FaceLandmarkGpu"
|
||||
# input_stream: "IMAGE:image"
|
||||
# input_stream: "ROI:face_roi"
|
||||
# input_side_packet: "WITH_ATTENTION:with_attention"
|
||||
# output_stream: "LANDMARKS:face_landmarks"
|
||||
# }
|
||||
|
||||
type: "FaceLandmarkGpu"
|
||||
|
||||
# GPU image. (GpuBuffer)
|
||||
input_stream: "IMAGE:image"
|
||||
# ROI (region of interest) within the given image where a face is located.
|
||||
# (NormalizedRect)
|
||||
input_stream: "ROI:roi"
|
||||
# Whether to run face mesh model with attention on lips and eyes. (bool)
|
||||
# Attention provides more accuracy on lips and eye regions as well as iris
|
||||
# landmarks.
|
||||
input_side_packet: "WITH_ATTENTION:with_attention"
|
||||
|
||||
# 468 or 478 facial landmarks within the given ROI. (NormalizedLandmarkList)
|
||||
#
|
||||
# Number of landmarks depends on the WITH_ATTENTION flag. If it's `true` - then
|
||||
# there will be 478 landmarks with refined lips, eyes and irises (10 extra
|
||||
# landmarks are for irises), otherwise 468 non-refined landmarks are returned.
|
||||
#
|
||||
# NOTE: if a face is not present within the given ROI, for this particular
|
||||
# timestamp there will not be an output packet in the LANDMARKS stream. However,
|
||||
# the MediaPipe framework will internally inform the downstream calculators of
|
||||
# the absence of this packet so that they don't wait for it unnecessarily.
|
||||
output_stream: "LANDMARKS:face_landmarks"
|
||||
|
||||
# Transforms the input image into a 192x192 tensor.
|
||||
node: {
|
||||
calculator: "ImageToTensorCalculator"
|
||||
input_stream: "IMAGE_GPU:image"
|
||||
input_stream: "NORM_RECT:roi"
|
||||
output_stream: "TENSORS:input_tensors"
|
||||
options: {
|
||||
[mediapipe.ImageToTensorCalculatorOptions.ext] {
|
||||
output_tensor_width: 192
|
||||
output_tensor_height: 192
|
||||
output_tensor_float_range {
|
||||
min: 0.0
|
||||
max: 1.0
|
||||
}
|
||||
gpu_origin: TOP_LEFT
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Loads the face landmarks TF Lite model.
|
||||
node {
|
||||
calculator: "FaceLandmarksModelLoader"
|
||||
input_side_packet: "WITH_ATTENTION:with_attention"
|
||||
output_side_packet: "MODEL:model"
|
||||
}
|
||||
|
||||
# Generates a single side packet containing a TensorFlow Lite op resolver that
|
||||
# supports custom ops needed by the model used in this graph.
|
||||
node {
|
||||
calculator: "TfLiteCustomOpResolverCalculator"
|
||||
output_side_packet: "op_resolver"
|
||||
}
|
||||
|
||||
# Runs a TensorFlow Lite model on GPU that takes an image tensor and outputs a
|
||||
# vector of GPU tensors representing, for instance, detection boxes/keypoints
|
||||
# and scores.
|
||||
node {
|
||||
calculator: "InferenceCalculator"
|
||||
input_stream: "TENSORS:input_tensors"
|
||||
input_side_packet: "MODEL:model"
|
||||
input_side_packet: "CUSTOM_OP_RESOLVER:op_resolver"
|
||||
output_stream: "TENSORS:output_tensors"
|
||||
options: {
|
||||
[mediapipe.InferenceCalculatorOptions.ext] {
|
||||
# Do not remove. Used for generation of XNNPACK/NNAPI graphs.
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Splits a vector of tensors into landmark tensors and face flag tensor.
|
||||
node {
|
||||
calculator: "SwitchContainer"
|
||||
input_side_packet: "ENABLE:with_attention"
|
||||
input_stream: "output_tensors"
|
||||
output_stream: "landmark_tensors"
|
||||
output_stream: "face_flag_tensor"
|
||||
options {
|
||||
[mediapipe.SwitchContainerOptions.ext] {
|
||||
contained_node: {
|
||||
calculator: "SplitTensorVectorCalculator"
|
||||
options: {
|
||||
[mediapipe.SplitVectorCalculatorOptions.ext] {
|
||||
ranges: { begin: 0 end: 1 }
|
||||
ranges: { begin: 1 end: 2 }
|
||||
}
|
||||
}
|
||||
}
|
||||
contained_node: {
|
||||
calculator: "SplitTensorVectorCalculator"
|
||||
options: {
|
||||
[mediapipe.SplitVectorCalculatorOptions.ext] {
|
||||
ranges: { begin: 0 end: 6 }
|
||||
ranges: { begin: 6 end: 7 }
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Converts the face-flag tensor into a float that represents the confidence
|
||||
# score of face presence.
|
||||
node {
|
||||
calculator: "TensorsToFloatsCalculator"
|
||||
input_stream: "TENSORS:face_flag_tensor"
|
||||
output_stream: "FLOAT:face_presence_score"
|
||||
options: {
|
||||
[mediapipe.TensorsToFloatsCalculatorOptions.ext] {
|
||||
activation: SIGMOID
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Applies a threshold to the confidence score to determine whether a face is
|
||||
# present.
|
||||
node {
|
||||
calculator: "ThresholdingCalculator"
|
||||
input_stream: "FLOAT:face_presence_score"
|
||||
output_stream: "FLAG:face_presence"
|
||||
options: {
|
||||
[mediapipe.ThresholdingCalculatorOptions.ext] {
|
||||
threshold: 0.5
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Drop landmarks tensors if face is not present.
|
||||
node {
|
||||
calculator: "GateCalculator"
|
||||
input_stream: "landmark_tensors"
|
||||
input_stream: "ALLOW:face_presence"
|
||||
output_stream: "ensured_landmark_tensors"
|
||||
}
|
||||
|
||||
# Decodes the landmark tensors into a vector of landmarks, where the landmark
|
||||
# coordinates are normalized by the size of the input image to the model.
|
||||
node {
|
||||
calculator: "SwitchContainer"
|
||||
input_side_packet: "ENABLE:with_attention"
|
||||
input_stream: "TENSORS:ensured_landmark_tensors"
|
||||
output_stream: "LANDMARKS:landmarks"
|
||||
options: {
|
||||
[mediapipe.SwitchContainerOptions.ext] {
|
||||
contained_node: {
|
||||
calculator: "TensorsToFaceLandmarks"
|
||||
}
|
||||
contained_node: {
|
||||
calculator: "TensorsToFaceLandmarksWithAttention"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Projects the landmarks from the cropped face image to the corresponding
|
||||
# locations on the full image before cropping (input to the graph).
|
||||
node {
|
||||
calculator: "LandmarkProjectionCalculator"
|
||||
input_stream: "NORM_LANDMARKS:landmarks"
|
||||
input_stream: "NORM_RECT:roi"
|
||||
output_stream: "NORM_LANDMARKS:face_landmarks"
|
||||
}
|
|
@ -0,0 +1,54 @@
|
|||
# MediaPipe graph to calculate face region of interest (ROI) from landmarks
|
||||
# detected by "FaceLandmarkCpu" or "FaceLandmarkGpu".
|
||||
#
|
||||
# NOTE: this graph is subject to change and should not be used directly.
|
||||
|
||||
type: "FaceLandmarkLandmarksToRoi"
|
||||
|
||||
# Normalized landmarks. (NormalizedLandmarkList)
|
||||
input_stream: "LANDMARKS:landmarks"
|
||||
# Frame size (width & height). (std::pair<int, int>)
|
||||
input_stream: "IMAGE_SIZE:image_size"
|
||||
# ROI according to landmarks. (NormalizedRect)
|
||||
output_stream: "ROI:roi"
|
||||
|
||||
# Converts face landmarks to a detection that tightly encloses all landmarks.
|
||||
node {
|
||||
calculator: "LandmarksToDetectionCalculator"
|
||||
input_stream: "NORM_LANDMARKS:landmarks"
|
||||
output_stream: "DETECTION:face_detection"
|
||||
}
|
||||
|
||||
# Converts the face detection into a rectangle (normalized by image size)
|
||||
# that encloses the face and is rotated such that the line connecting left side
|
||||
# of the left eye and right side of the right eye is aligned with the X-axis of
|
||||
# the rectangle.
|
||||
node {
|
||||
calculator: "DetectionsToRectsCalculator"
|
||||
input_stream: "DETECTION:face_detection"
|
||||
input_stream: "IMAGE_SIZE:image_size"
|
||||
output_stream: "NORM_RECT:face_rect_from_landmarks"
|
||||
options: {
|
||||
[mediapipe.DetectionsToRectsCalculatorOptions.ext] {
|
||||
rotation_vector_start_keypoint_index: 33 # Left side of left eye.
|
||||
rotation_vector_end_keypoint_index: 263 # Right side of right eye.
|
||||
rotation_vector_target_angle_degrees: 0
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Expands the face rectangle so that in the next video image it's likely to
|
||||
# still contain the face even with some motion.
|
||||
node {
|
||||
calculator: "RectTransformationCalculator"
|
||||
input_stream: "NORM_RECT:face_rect_from_landmarks"
|
||||
input_stream: "IMAGE_SIZE:image_size"
|
||||
output_stream: "roi"
|
||||
options: {
|
||||
[mediapipe.RectTransformationCalculatorOptions.ext] {
|
||||
scale_x: 1.5
|
||||
scale_y: 1.5
|
||||
square_long: true
|
||||
}
|
||||
}
|
||||
}
|
BIN
mediapipe/modules/face_landmark/face_landmark_with_attention.tflite
Executable file
BIN
mediapipe/modules/face_landmark/face_landmark_with_attention.tflite
Executable file
Binary file not shown.
|
@ -0,0 +1,58 @@
|
|||
# MediaPipe graph to load a selected face landmarks TF Lite model.
|
||||
|
||||
type: "FaceLandmarksModelLoader"
|
||||
|
||||
# Whether to run face mesh model with attention on lips and eyes. (bool)
|
||||
# Attention provides more accuracy on lips and eye regions as well as iris
|
||||
# landmarks.
|
||||
input_side_packet: "WITH_ATTENTION:with_attention"
|
||||
|
||||
# TF Lite model represented as a FlatBuffer.
|
||||
# (std::unique_ptr<tflite::FlatBufferModel, std::function<void(tflite::FlatBufferModel*)>>)
|
||||
output_side_packet: "MODEL:model"
|
||||
|
||||
# Determines path to the desired face landmark model file based on specification
|
||||
# in the input side packet.
|
||||
node {
|
||||
calculator: "SwitchContainer"
|
||||
input_side_packet: "ENABLE:with_attention"
|
||||
output_side_packet: "PACKET:model_path"
|
||||
options: {
|
||||
[mediapipe.SwitchContainerOptions.ext] {
|
||||
contained_node: {
|
||||
calculator: "ConstantSidePacketCalculator"
|
||||
options: {
|
||||
[mediapipe.ConstantSidePacketCalculatorOptions.ext]: {
|
||||
packet {
|
||||
string_value: "mediapipe/modules/face_landmark/face_landmark.tflite"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
contained_node: {
|
||||
calculator: "ConstantSidePacketCalculator"
|
||||
options: {
|
||||
[mediapipe.ConstantSidePacketCalculatorOptions.ext]: {
|
||||
packet {
|
||||
string_value: "mediapipe/modules/face_landmark/face_landmark_with_attention.tflite"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Loads the file in the specified path into a blob.
|
||||
node {
|
||||
calculator: "LocalFileContentsCalculator"
|
||||
input_side_packet: "FILE_PATH:model_path"
|
||||
output_side_packet: "CONTENTS:model_blob"
|
||||
}
|
||||
|
||||
# Converts the input blob into a TF Lite model.
|
||||
node {
|
||||
calculator: "TfLiteModelCalculator"
|
||||
input_side_packet: "MODEL_BLOB:model_blob"
|
||||
output_side_packet: "MODEL:model"
|
||||
}
|
|
@ -0,0 +1,24 @@
|
|||
# MediaPipe graph to transform single tensor into 468 facial landmarks.
|
||||
|
||||
type: "TensorsToFaceLandmarks"
|
||||
|
||||
# Vector with a single tensor that contains 468 landmarks. (std::vector<Tensor>)
|
||||
input_stream: "TENSORS:tensors"
|
||||
|
||||
# 468 facial landmarks (NormalizedLandmarkList)
|
||||
output_stream: "LANDMARKS:landmarks"
|
||||
|
||||
# Decodes the landmark tensors into a vector of lanmarks, where the landmark
|
||||
# coordinates are normalized by the size of the input image to the model.
|
||||
node {
|
||||
calculator: "TensorsToLandmarksCalculator"
|
||||
input_stream: "TENSORS:tensors"
|
||||
output_stream: "NORM_LANDMARKS:landmarks"
|
||||
options: {
|
||||
[mediapipe.TensorsToLandmarksCalculatorOptions.ext] {
|
||||
num_landmarks: 468
|
||||
input_image_width: 192
|
||||
input_image_height: 192
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,299 @@
|
|||
# MediaPipe graph to transform model output tensors into 478 facial landmarks
|
||||
# with refined lips, eyes and irises.
|
||||
|
||||
type: "TensorsToFaceLandmarksWithAttention"
|
||||
|
||||
# Vector with a six tensors to parse landmarks from. (std::vector<Tensor>)
|
||||
# Landmark tensors order:
|
||||
# - mesh_tensor
|
||||
# - lips_tensor
|
||||
# - left_eye_tensor
|
||||
# - right_eye_tensor
|
||||
# - left_iris_tensor
|
||||
# - right_iris_tensor
|
||||
input_stream: "TENSORS:tensors"
|
||||
|
||||
# 478 facial landmarks (NormalizedLandmarkList)
|
||||
output_stream: "LANDMARKS:landmarks"
|
||||
|
||||
# Splits a vector of tensors into multiple vectors.
|
||||
node {
|
||||
calculator: "SplitTensorVectorCalculator"
|
||||
input_stream: "tensors"
|
||||
output_stream: "mesh_tensor"
|
||||
output_stream: "lips_tensor"
|
||||
output_stream: "left_eye_tensor"
|
||||
output_stream: "right_eye_tensor"
|
||||
output_stream: "left_iris_tensor"
|
||||
output_stream: "right_iris_tensor"
|
||||
options: {
|
||||
[mediapipe.SplitVectorCalculatorOptions.ext] {
|
||||
ranges: { begin: 0 end: 1 }
|
||||
ranges: { begin: 1 end: 2 }
|
||||
ranges: { begin: 2 end: 3 }
|
||||
ranges: { begin: 3 end: 4 }
|
||||
ranges: { begin: 4 end: 5 }
|
||||
ranges: { begin: 5 end: 6 }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Decodes mesh landmarks tensor into a vector of normalized lanmarks.
|
||||
node {
|
||||
calculator: "TensorsToLandmarksCalculator"
|
||||
input_stream: "TENSORS:mesh_tensor"
|
||||
output_stream: "NORM_LANDMARKS:mesh_landmarks"
|
||||
options: {
|
||||
[mediapipe.TensorsToLandmarksCalculatorOptions.ext] {
|
||||
num_landmarks: 468
|
||||
input_image_width: 192
|
||||
input_image_height: 192
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Decodes lips landmarks tensor into a vector of normalized lanmarks.
|
||||
node {
|
||||
calculator: "TensorsToLandmarksCalculator"
|
||||
input_stream: "TENSORS:lips_tensor"
|
||||
output_stream: "NORM_LANDMARKS:lips_landmarks"
|
||||
options: {
|
||||
[mediapipe.TensorsToLandmarksCalculatorOptions.ext] {
|
||||
num_landmarks: 80
|
||||
input_image_width: 192
|
||||
input_image_height: 192
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Decodes left eye landmarks tensor into a vector of normalized lanmarks.
|
||||
node {
|
||||
calculator: "TensorsToLandmarksCalculator"
|
||||
input_stream: "TENSORS:left_eye_tensor"
|
||||
output_stream: "NORM_LANDMARKS:left_eye_landmarks"
|
||||
options: {
|
||||
[mediapipe.TensorsToLandmarksCalculatorOptions.ext] {
|
||||
num_landmarks: 71
|
||||
input_image_width: 192
|
||||
input_image_height: 192
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Decodes right eye landmarks tensor into a vector of normalized lanmarks.
|
||||
node {
|
||||
calculator: "TensorsToLandmarksCalculator"
|
||||
input_stream: "TENSORS:right_eye_tensor"
|
||||
output_stream: "NORM_LANDMARKS:right_eye_landmarks"
|
||||
options: {
|
||||
[mediapipe.TensorsToLandmarksCalculatorOptions.ext] {
|
||||
num_landmarks: 71
|
||||
input_image_width: 192
|
||||
input_image_height: 192
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Decodes left iris landmarks tensor into a vector of normalized lanmarks.
|
||||
node {
|
||||
calculator: "TensorsToLandmarksCalculator"
|
||||
input_stream: "TENSORS:left_iris_tensor"
|
||||
output_stream: "NORM_LANDMARKS:left_iris_landmarks"
|
||||
options: {
|
||||
[mediapipe.TensorsToLandmarksCalculatorOptions.ext] {
|
||||
num_landmarks: 5
|
||||
input_image_width: 192
|
||||
input_image_height: 192
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Decodes right iris landmarks tensor into a vector of normalized lanmarks.
|
||||
node {
|
||||
calculator: "TensorsToLandmarksCalculator"
|
||||
input_stream: "TENSORS:right_iris_tensor"
|
||||
output_stream: "NORM_LANDMARKS:right_iris_landmarks"
|
||||
options: {
|
||||
[mediapipe.TensorsToLandmarksCalculatorOptions.ext] {
|
||||
num_landmarks: 5
|
||||
input_image_width: 192
|
||||
input_image_height: 192
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Refine mesh landmarks with lips, eyes and irises.
|
||||
node {
|
||||
calculator: "LandmarksRefinementCalculator"
|
||||
input_stream: "LANDMARKS:0:mesh_landmarks"
|
||||
input_stream: "LANDMARKS:1:lips_landmarks"
|
||||
input_stream: "LANDMARKS:2:left_eye_landmarks"
|
||||
input_stream: "LANDMARKS:3:right_eye_landmarks"
|
||||
input_stream: "LANDMARKS:4:left_iris_landmarks"
|
||||
input_stream: "LANDMARKS:5:right_iris_landmarks"
|
||||
output_stream: "REFINED_LANDMARKS:landmarks"
|
||||
options: {
|
||||
[mediapipe.LandmarksRefinementCalculatorOptions.ext] {
|
||||
# 0 - mesh
|
||||
refinement: {
|
||||
indexes_mapping: [
|
||||
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
|
||||
20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36,
|
||||
37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53,
|
||||
54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70,
|
||||
71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87,
|
||||
88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103,
|
||||
104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117,
|
||||
118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131,
|
||||
132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145,
|
||||
146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159,
|
||||
160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173,
|
||||
174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187,
|
||||
188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201,
|
||||
202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215,
|
||||
216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229,
|
||||
230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243,
|
||||
244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257,
|
||||
258, 259, 260, 261, 262, 263, 264, 265, 266, 267, 268, 269, 270, 271,
|
||||
272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285,
|
||||
286, 287, 288, 289, 290, 291, 292, 293, 294, 295, 296, 297, 298, 299,
|
||||
300, 301, 302, 303, 304, 305, 306, 307, 308, 309, 310, 311, 312, 313,
|
||||
314, 315, 316, 317, 318, 319, 320, 321, 322, 323, 324, 325, 326, 327,
|
||||
328, 329, 330, 331, 332, 333, 334, 335, 336, 337, 338, 339, 340, 341,
|
||||
342, 343, 344, 345, 346, 347, 348, 349, 350, 351, 352, 353, 354, 355,
|
||||
356, 357, 358, 359, 360, 361, 362, 363, 364, 365, 366, 367, 368, 369,
|
||||
370, 371, 372, 373, 374, 375, 376, 377, 378, 379, 380, 381, 382, 383,
|
||||
384, 385, 386, 387, 388, 389, 390, 391, 392, 393, 394, 395, 396, 397,
|
||||
398, 399, 400, 401, 402, 403, 404, 405, 406, 407, 408, 409, 410, 411,
|
||||
412, 413, 414, 415, 416, 417, 418, 419, 420, 421, 422, 423, 424, 425,
|
||||
426, 427, 428, 429, 430, 431, 432, 433, 434, 435, 436, 437, 438, 439,
|
||||
440, 441, 442, 443, 444, 445, 446, 447, 448, 449, 450, 451, 452, 453,
|
||||
454, 455, 456, 457, 458, 459, 460, 461, 462, 463, 464, 465, 466, 467
|
||||
]
|
||||
z_refinement: { copy {} }
|
||||
}
|
||||
# 1 - lips
|
||||
refinement: {
|
||||
indexes_mapping: [
|
||||
# Lower outer.
|
||||
61, 146, 91, 181, 84, 17, 314, 405, 321, 375, 291,
|
||||
# Upper outer (excluding corners).
|
||||
185, 40, 39, 37, 0, 267, 269, 270, 409,
|
||||
# Lower inner.
|
||||
78, 95, 88, 178, 87, 14, 317, 402, 318, 324, 308,
|
||||
# Upper inner (excluding corners).
|
||||
191, 80, 81, 82, 13, 312, 311, 310, 415,
|
||||
# Lower semi-outer.
|
||||
76, 77, 90, 180, 85, 16, 315, 404, 320, 307, 306,
|
||||
# Upper semi-outer (excluding corners).
|
||||
184, 74, 73, 72, 11, 302, 303, 304, 408,
|
||||
# Lower semi-inner.
|
||||
62, 96, 89, 179, 86, 15, 316, 403, 319, 325, 292,
|
||||
# Upper semi-inner (excluding corners).
|
||||
183, 42, 41, 38, 12, 268, 271, 272, 407
|
||||
]
|
||||
z_refinement: { none {} }
|
||||
}
|
||||
# 2 - left eye
|
||||
refinement: {
|
||||
indexes_mapping: [
|
||||
# Lower contour.
|
||||
33, 7, 163, 144, 145, 153, 154, 155, 133,
|
||||
# upper contour (excluding corners).
|
||||
246, 161, 160, 159, 158, 157, 173,
|
||||
# Halo x2 lower contour.
|
||||
130, 25, 110, 24, 23, 22, 26, 112, 243,
|
||||
# Halo x2 upper contour (excluding corners).
|
||||
247, 30, 29, 27, 28, 56, 190,
|
||||
# Halo x3 lower contour.
|
||||
226, 31, 228, 229, 230, 231, 232, 233, 244,
|
||||
# Halo x3 upper contour (excluding corners).
|
||||
113, 225, 224, 223, 222, 221, 189,
|
||||
# Halo x4 upper contour (no lower because of mesh structure) or
|
||||
# eyebrow inner contour.
|
||||
35, 124, 46, 53, 52, 65,
|
||||
# Halo x5 lower contour.
|
||||
143, 111, 117, 118, 119, 120, 121, 128, 245,
|
||||
# Halo x5 upper contour (excluding corners) or eyebrow outer contour.
|
||||
156, 70, 63, 105, 66, 107, 55, 193
|
||||
]
|
||||
z_refinement: { none {} }
|
||||
}
|
||||
# 3 - right eye
|
||||
refinement: {
|
||||
indexes_mapping: [
|
||||
# Lower contour.
|
||||
263, 249, 390, 373, 374, 380, 381, 382, 362,
|
||||
# Upper contour (excluding corners).
|
||||
466, 388, 387, 386, 385, 384, 398,
|
||||
# Halo x2 lower contour.
|
||||
359, 255, 339, 254, 253, 252, 256, 341, 463,
|
||||
# Halo x2 upper contour (excluding corners).
|
||||
467, 260, 259, 257, 258, 286, 414,
|
||||
# Halo x3 lower contour.
|
||||
446, 261, 448, 449, 450, 451, 452, 453, 464,
|
||||
# Halo x3 upper contour (excluding corners).
|
||||
342, 445, 444, 443, 442, 441, 413,
|
||||
# Halo x4 upper contour (no lower because of mesh structure) or
|
||||
# eyebrow inner contour.
|
||||
265, 353, 276, 283, 282, 295,
|
||||
# Halo x5 lower contour.
|
||||
372, 340, 346, 347, 348, 349, 350, 357, 465,
|
||||
# Halo x5 upper contour (excluding corners) or eyebrow outer contour.
|
||||
383, 300, 293, 334, 296, 336, 285, 417
|
||||
]
|
||||
z_refinement: { none {} }
|
||||
}
|
||||
# 4 - left iris
|
||||
refinement: {
|
||||
indexes_mapping: [
|
||||
# Center.
|
||||
468,
|
||||
# Iris right edge.
|
||||
469,
|
||||
# Iris top edge.
|
||||
470,
|
||||
# Iris left edge.
|
||||
471,
|
||||
# Iris bottom edge.
|
||||
472
|
||||
]
|
||||
z_refinement: {
|
||||
assign_average: {
|
||||
indexes_for_average: [
|
||||
# Lower contour.
|
||||
33, 7, 163, 144, 145, 153, 154, 155, 133,
|
||||
# Upper contour (excluding corners).
|
||||
246, 161, 160, 159, 158, 157, 173
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
# 5 - right iris
|
||||
refinement: {
|
||||
indexes_mapping: [
|
||||
# Center.
|
||||
473,
|
||||
# Iris right edge.
|
||||
474,
|
||||
# Iris top edge.
|
||||
475,
|
||||
# Iris left edge.
|
||||
476,
|
||||
# Iris bottom edge.
|
||||
477
|
||||
]
|
||||
z_refinement: {
|
||||
assign_average: {
|
||||
indexes_for_average: [
|
||||
# Lower contour.
|
||||
263, 249, 390, 373, 374, 380, 381, 382, 362,
|
||||
# Upper contour (excluding corners).
|
||||
466, 388, 387, 386, 385, 384, 398
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
171
mediapipe/modules/hand_landmark/BUILD
Normal file
171
mediapipe/modules/hand_landmark/BUILD
Normal file
|
@ -0,0 +1,171 @@
|
|||
# Copyright 2020 The MediaPipe Authors.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
load(
|
||||
"//mediapipe/framework/tool:mediapipe_graph.bzl",
|
||||
"mediapipe_simple_subgraph",
|
||||
)
|
||||
|
||||
licenses(["notice"])
|
||||
|
||||
package(default_visibility = ["//visibility:public"])
|
||||
|
||||
exports_files([
|
||||
"hand_landmark_full.tflite",
|
||||
"hand_landmark_lite.tflite",
|
||||
"handedness.txt",
|
||||
])
|
||||
|
||||
mediapipe_simple_subgraph(
|
||||
name = "hand_landmark_model_loader",
|
||||
graph = "hand_landmark_model_loader.pbtxt",
|
||||
register_as = "HandLandmarkModelLoader",
|
||||
deps = [
|
||||
"//mediapipe/calculators/core:constant_side_packet_calculator",
|
||||
"//mediapipe/calculators/tflite:tflite_model_calculator",
|
||||
"//mediapipe/calculators/util:local_file_contents_calculator",
|
||||
"//mediapipe/framework/tool:switch_container",
|
||||
],
|
||||
)
|
||||
|
||||
mediapipe_simple_subgraph(
|
||||
name = "hand_landmark_cpu",
|
||||
graph = "hand_landmark_cpu.pbtxt",
|
||||
register_as = "HandLandmarkCpu",
|
||||
deps = [
|
||||
":hand_landmark_model_loader",
|
||||
"//mediapipe/calculators/core:gate_calculator",
|
||||
"//mediapipe/calculators/core:split_vector_calculator",
|
||||
"//mediapipe/calculators/tensor:image_to_tensor_calculator",
|
||||
"//mediapipe/calculators/tensor:inference_calculator",
|
||||
"//mediapipe/calculators/tensor:tensors_to_classification_calculator",
|
||||
"//mediapipe/calculators/tensor:tensors_to_floats_calculator",
|
||||
"//mediapipe/calculators/tensor:tensors_to_landmarks_calculator",
|
||||
"//mediapipe/calculators/util:landmark_letterbox_removal_calculator",
|
||||
"//mediapipe/calculators/util:landmark_projection_calculator",
|
||||
"//mediapipe/calculators/util:thresholding_calculator",
|
||||
"//mediapipe/calculators/util:world_landmark_projection_calculator",
|
||||
],
|
||||
)
|
||||
|
||||
mediapipe_simple_subgraph(
|
||||
name = "hand_landmark_gpu",
|
||||
graph = "hand_landmark_gpu.pbtxt",
|
||||
register_as = "HandLandmarkGpu",
|
||||
deps = [
|
||||
":hand_landmark_model_loader",
|
||||
"//mediapipe/calculators/core:gate_calculator",
|
||||
"//mediapipe/calculators/core:split_vector_calculator",
|
||||
"//mediapipe/calculators/tensor:image_to_tensor_calculator",
|
||||
"//mediapipe/calculators/tensor:inference_calculator",
|
||||
"//mediapipe/calculators/tensor:tensors_to_classification_calculator",
|
||||
"//mediapipe/calculators/tensor:tensors_to_floats_calculator",
|
||||
"//mediapipe/calculators/tensor:tensors_to_landmarks_calculator",
|
||||
"//mediapipe/calculators/util:landmark_letterbox_removal_calculator",
|
||||
"//mediapipe/calculators/util:landmark_projection_calculator",
|
||||
"//mediapipe/calculators/util:thresholding_calculator",
|
||||
"//mediapipe/calculators/util:world_landmark_projection_calculator",
|
||||
],
|
||||
)
|
||||
|
||||
mediapipe_simple_subgraph(
|
||||
name = "hand_landmark_tracking_gpu",
|
||||
graph = "hand_landmark_tracking_gpu.pbtxt",
|
||||
register_as = "HandLandmarkTrackingGpu",
|
||||
deps = [
|
||||
":hand_landmark_gpu",
|
||||
":hand_landmark_landmarks_to_roi",
|
||||
":palm_detection_detection_to_roi",
|
||||
"//mediapipe/calculators/core:begin_loop_calculator",
|
||||
"//mediapipe/calculators/core:clip_vector_size_calculator",
|
||||
"//mediapipe/calculators/core:constant_side_packet_calculator",
|
||||
"//mediapipe/calculators/core:end_loop_calculator",
|
||||
"//mediapipe/calculators/core:flow_limiter_calculator",
|
||||
"//mediapipe/calculators/core:gate_calculator",
|
||||
"//mediapipe/calculators/core:previous_loopback_calculator",
|
||||
"//mediapipe/calculators/image:image_properties_calculator",
|
||||
"//mediapipe/calculators/util:association_norm_rect_calculator",
|
||||
"//mediapipe/calculators/util:collection_has_min_size_calculator",
|
||||
"//mediapipe/calculators/util:filter_collection_calculator",
|
||||
"//mediapipe/modules/palm_detection:palm_detection_gpu",
|
||||
],
|
||||
)
|
||||
|
||||
mediapipe_simple_subgraph(
|
||||
name = "hand_landmark_tracking_cpu_image",
|
||||
graph = "hand_landmark_tracking_cpu_image.pbtxt",
|
||||
register_as = "HandLandmarkTrackingCpuImage",
|
||||
deps = [
|
||||
":hand_landmark_tracking_cpu",
|
||||
"//mediapipe/calculators/core:flow_limiter_calculator",
|
||||
"//mediapipe/calculators/image:image_transformation_calculator",
|
||||
"//mediapipe/calculators/util:from_image_calculator",
|
||||
],
|
||||
)
|
||||
|
||||
mediapipe_simple_subgraph(
|
||||
name = "hand_landmark_tracking_gpu_image",
|
||||
graph = "hand_landmark_tracking_gpu_image.pbtxt",
|
||||
register_as = "HandLandmarkTrackingGpuImage",
|
||||
deps = [
|
||||
":hand_landmark_tracking_gpu",
|
||||
"//mediapipe/calculators/core:flow_limiter_calculator",
|
||||
"//mediapipe/calculators/image:image_transformation_calculator",
|
||||
"//mediapipe/calculators/util:from_image_calculator",
|
||||
],
|
||||
)
|
||||
|
||||
mediapipe_simple_subgraph(
|
||||
name = "hand_landmark_tracking_cpu",
|
||||
graph = "hand_landmark_tracking_cpu.pbtxt",
|
||||
register_as = "HandLandmarkTrackingCpu",
|
||||
deps = [
|
||||
":hand_landmark_cpu",
|
||||
":hand_landmark_landmarks_to_roi",
|
||||
":palm_detection_detection_to_roi",
|
||||
"//mediapipe/calculators/core:begin_loop_calculator",
|
||||
"//mediapipe/calculators/core:clip_vector_size_calculator",
|
||||
"//mediapipe/calculators/core:constant_side_packet_calculator",
|
||||
"//mediapipe/calculators/core:end_loop_calculator",
|
||||
"//mediapipe/calculators/core:flow_limiter_calculator",
|
||||
"//mediapipe/calculators/core:gate_calculator",
|
||||
"//mediapipe/calculators/core:previous_loopback_calculator",
|
||||
"//mediapipe/calculators/image:image_properties_calculator",
|
||||
"//mediapipe/calculators/util:association_norm_rect_calculator",
|
||||
"//mediapipe/calculators/util:collection_has_min_size_calculator",
|
||||
"//mediapipe/calculators/util:filter_collection_calculator",
|
||||
"//mediapipe/modules/palm_detection:palm_detection_cpu",
|
||||
],
|
||||
)
|
||||
|
||||
mediapipe_simple_subgraph(
|
||||
name = "palm_detection_detection_to_roi",
|
||||
graph = "palm_detection_detection_to_roi.pbtxt",
|
||||
register_as = "PalmDetectionDetectionToRoi",
|
||||
deps = [
|
||||
"//mediapipe/calculators/util:detections_to_rects_calculator",
|
||||
"//mediapipe/calculators/util:rect_transformation_calculator",
|
||||
],
|
||||
)
|
||||
|
||||
mediapipe_simple_subgraph(
|
||||
name = "hand_landmark_landmarks_to_roi",
|
||||
graph = "hand_landmark_landmarks_to_roi.pbtxt",
|
||||
register_as = "HandLandmarkLandmarksToRoi",
|
||||
deps = [
|
||||
"//mediapipe/calculators/core:split_landmarks_calculator",
|
||||
"//mediapipe/calculators/util:rect_transformation_calculator",
|
||||
"//mediapipe/modules/hand_landmark/calculators:hand_landmarks_to_rect_calculator",
|
||||
],
|
||||
)
|
8
mediapipe/modules/hand_landmark/README.md
Normal file
8
mediapipe/modules/hand_landmark/README.md
Normal file
|
@ -0,0 +1,8 @@
|
|||
# hand_landmark
|
||||
|
||||
Subgraphs|Details
|
||||
:--- | :---
|
||||
[`HandLandmarkCpu`](https://github.com/google/mediapipe/tree/master/mediapipe/modules/hand_landmark/hand_landmark_cpu.pbtxt)| Detects landmarks of a single hand. (CPU input.)
|
||||
[`HandLandmarkGpu`](https://github.com/google/mediapipe/tree/master/mediapipe/modules/hand_landmark/hand_landmark_gpu.pbtxt)| Detects landmarks of a single hand. (GPU input.)
|
||||
[`HandLandmarkTrackingCpu`](https://github.com/google/mediapipe/tree/master/mediapipe/modules/hand_landmark/hand_landmark_tracking_cpu.pbtxt)| Detects and tracks landmarks of multiple hands. (CPU input.)
|
||||
[`HandLandmarkTrackingGpu`](https://github.com/google/mediapipe/tree/master/mediapipe/modules/hand_landmark/hand_landmark_tracking_gpu.pbtxt)| Detects and tracks landmarks of multiple hands. (GPU input.)
|
33
mediapipe/modules/hand_landmark/calculators/BUILD
Normal file
33
mediapipe/modules/hand_landmark/calculators/BUILD
Normal file
|
@ -0,0 +1,33 @@
|
|||
# Copyright 2020 The MediaPipe Authors.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
licenses(["notice"])
|
||||
|
||||
package(default_visibility = ["//visibility:public"])
|
||||
|
||||
cc_library(
|
||||
name = "hand_landmarks_to_rect_calculator",
|
||||
srcs = ["hand_landmarks_to_rect_calculator.cc"],
|
||||
visibility = ["//visibility:public"],
|
||||
deps = [
|
||||
"//mediapipe/framework:calculator_framework",
|
||||
"//mediapipe/framework:calculator_options_cc_proto",
|
||||
"//mediapipe/framework/formats:landmark_cc_proto",
|
||||
"//mediapipe/framework/formats:location_data_cc_proto",
|
||||
"//mediapipe/framework/formats:rect_cc_proto",
|
||||
"//mediapipe/framework/port:ret_check",
|
||||
"//mediapipe/framework/port:status",
|
||||
],
|
||||
alwayslink = 1,
|
||||
)
|
|
@ -0,0 +1,167 @@
|
|||
// Copyright 2020 The MediaPipe Authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
#include <cmath>
|
||||
|
||||
#include "mediapipe/framework/calculator_framework.h"
|
||||
#include "mediapipe/framework/calculator_options.pb.h"
|
||||
#include "mediapipe/framework/formats/landmark.pb.h"
|
||||
#include "mediapipe/framework/formats/rect.pb.h"
|
||||
#include "mediapipe/framework/port/ret_check.h"
|
||||
#include "mediapipe/framework/port/status.h"
|
||||
|
||||
namespace mediapipe {
|
||||
|
||||
namespace {
|
||||
|
||||
constexpr char kNormalizedLandmarksTag[] = "NORM_LANDMARKS";
|
||||
constexpr char kNormRectTag[] = "NORM_RECT";
|
||||
constexpr char kImageSizeTag[] = "IMAGE_SIZE";
|
||||
constexpr int kWristJoint = 0;
|
||||
constexpr int kMiddleFingerPIPJoint = 6;
|
||||
constexpr int kIndexFingerPIPJoint = 4;
|
||||
constexpr int kRingFingerPIPJoint = 8;
|
||||
constexpr float kTargetAngle = M_PI * 0.5f;
|
||||
|
||||
inline float NormalizeRadians(float angle) {
|
||||
return angle - 2 * M_PI * std::floor((angle - (-M_PI)) / (2 * M_PI));
|
||||
}
|
||||
|
||||
float ComputeRotation(const NormalizedLandmarkList& landmarks,
|
||||
const std::pair<int, int>& image_size) {
|
||||
const float x0 = landmarks.landmark(kWristJoint).x() * image_size.first;
|
||||
const float y0 = landmarks.landmark(kWristJoint).y() * image_size.second;
|
||||
|
||||
float x1 = (landmarks.landmark(kIndexFingerPIPJoint).x() +
|
||||
landmarks.landmark(kRingFingerPIPJoint).x()) /
|
||||
2.f;
|
||||
float y1 = (landmarks.landmark(kIndexFingerPIPJoint).y() +
|
||||
landmarks.landmark(kRingFingerPIPJoint).y()) /
|
||||
2.f;
|
||||
x1 = (x1 + landmarks.landmark(kMiddleFingerPIPJoint).x()) / 2.f *
|
||||
image_size.first;
|
||||
y1 = (y1 + landmarks.landmark(kMiddleFingerPIPJoint).y()) / 2.f *
|
||||
image_size.second;
|
||||
|
||||
const float rotation =
|
||||
NormalizeRadians(kTargetAngle - std::atan2(-(y1 - y0), x1 - x0));
|
||||
return rotation;
|
||||
}
|
||||
|
||||
absl::Status NormalizedLandmarkListToRect(
|
||||
const NormalizedLandmarkList& landmarks,
|
||||
const std::pair<int, int>& image_size, NormalizedRect* rect) {
|
||||
const float rotation = ComputeRotation(landmarks, image_size);
|
||||
const float reverse_angle = NormalizeRadians(-rotation);
|
||||
|
||||
// Find boundaries of landmarks.
|
||||
float max_x = std::numeric_limits<float>::min();
|
||||
float max_y = std::numeric_limits<float>::min();
|
||||
float min_x = std::numeric_limits<float>::max();
|
||||
float min_y = std::numeric_limits<float>::max();
|
||||
for (int i = 0; i < landmarks.landmark_size(); ++i) {
|
||||
max_x = std::max(max_x, landmarks.landmark(i).x());
|
||||
max_y = std::max(max_y, landmarks.landmark(i).y());
|
||||
min_x = std::min(min_x, landmarks.landmark(i).x());
|
||||
min_y = std::min(min_y, landmarks.landmark(i).y());
|
||||
}
|
||||
const float axis_aligned_center_x = (max_x + min_x) / 2.f;
|
||||
const float axis_aligned_center_y = (max_y + min_y) / 2.f;
|
||||
|
||||
// Find boundaries of rotated landmarks.
|
||||
max_x = std::numeric_limits<float>::min();
|
||||
max_y = std::numeric_limits<float>::min();
|
||||
min_x = std::numeric_limits<float>::max();
|
||||
min_y = std::numeric_limits<float>::max();
|
||||
for (int i = 0; i < landmarks.landmark_size(); ++i) {
|
||||
const float original_x =
|
||||
(landmarks.landmark(i).x() - axis_aligned_center_x) * image_size.first;
|
||||
const float original_y =
|
||||
(landmarks.landmark(i).y() - axis_aligned_center_y) * image_size.second;
|
||||
|
||||
const float projected_x = original_x * std::cos(reverse_angle) -
|
||||
original_y * std::sin(reverse_angle);
|
||||
const float projected_y = original_x * std::sin(reverse_angle) +
|
||||
original_y * std::cos(reverse_angle);
|
||||
|
||||
max_x = std::max(max_x, projected_x);
|
||||
max_y = std::max(max_y, projected_y);
|
||||
min_x = std::min(min_x, projected_x);
|
||||
min_y = std::min(min_y, projected_y);
|
||||
}
|
||||
const float projected_center_x = (max_x + min_x) / 2.f;
|
||||
const float projected_center_y = (max_y + min_y) / 2.f;
|
||||
|
||||
const float center_x = projected_center_x * std::cos(rotation) -
|
||||
projected_center_y * std::sin(rotation) +
|
||||
image_size.first * axis_aligned_center_x;
|
||||
const float center_y = projected_center_x * std::sin(rotation) +
|
||||
projected_center_y * std::cos(rotation) +
|
||||
image_size.second * axis_aligned_center_y;
|
||||
const float width = (max_x - min_x) / image_size.first;
|
||||
const float height = (max_y - min_y) / image_size.second;
|
||||
|
||||
rect->set_x_center(center_x / image_size.first);
|
||||
rect->set_y_center(center_y / image_size.second);
|
||||
rect->set_width(width);
|
||||
rect->set_height(height);
|
||||
rect->set_rotation(rotation);
|
||||
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
// A calculator that converts subset of hand landmarks to a bounding box
|
||||
// NormalizedRect. The rotation angle of the bounding box is computed based on
|
||||
// 1) the wrist joint and 2) the average of PIP joints of index finger, middle
|
||||
// finger and ring finger. After rotation, the vector from the wrist to the mean
|
||||
// of PIP joints is expected to be vertical with wrist at the bottom and the
|
||||
// mean of PIP joints at the top.
|
||||
class HandLandmarksToRectCalculator : public CalculatorBase {
|
||||
public:
|
||||
static absl::Status GetContract(CalculatorContract* cc) {
|
||||
cc->Inputs().Tag(kNormalizedLandmarksTag).Set<NormalizedLandmarkList>();
|
||||
cc->Inputs().Tag(kImageSizeTag).Set<std::pair<int, int>>();
|
||||
cc->Outputs().Tag(kNormRectTag).Set<NormalizedRect>();
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
absl::Status Open(CalculatorContext* cc) override {
|
||||
cc->SetOffset(TimestampDiff(0));
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
absl::Status Process(CalculatorContext* cc) override {
|
||||
if (cc->Inputs().Tag(kNormalizedLandmarksTag).IsEmpty()) {
|
||||
return absl::OkStatus();
|
||||
}
|
||||
RET_CHECK(!cc->Inputs().Tag(kImageSizeTag).IsEmpty());
|
||||
|
||||
std::pair<int, int> image_size =
|
||||
cc->Inputs().Tag(kImageSizeTag).Get<std::pair<int, int>>();
|
||||
const auto& landmarks =
|
||||
cc->Inputs().Tag(kNormalizedLandmarksTag).Get<NormalizedLandmarkList>();
|
||||
auto output_rect = absl::make_unique<NormalizedRect>();
|
||||
MP_RETURN_IF_ERROR(
|
||||
NormalizedLandmarkListToRect(landmarks, image_size, output_rect.get()));
|
||||
cc->Outputs()
|
||||
.Tag(kNormRectTag)
|
||||
.Add(output_rect.release(), cc->InputTimestamp());
|
||||
|
||||
return absl::OkStatus();
|
||||
}
|
||||
};
|
||||
REGISTER_CALCULATOR(HandLandmarksToRectCalculator);
|
||||
|
||||
} // namespace mediapipe
|
219
mediapipe/modules/hand_landmark/hand_landmark_cpu.pbtxt
Normal file
219
mediapipe/modules/hand_landmark/hand_landmark_cpu.pbtxt
Normal file
|
@ -0,0 +1,219 @@
|
|||
# MediaPipe graph to detect/predict hand landmarks on CPU.
|
||||
|
||||
type: "HandLandmarkCpu"
|
||||
|
||||
# CPU image. (ImageFrame)
|
||||
input_stream: "IMAGE:image"
|
||||
# ROI (region of interest) within the given image where a palm/hand is located.
|
||||
# (NormalizedRect)
|
||||
input_stream: "ROI:hand_rect"
|
||||
|
||||
# Complexity of the hand landmark model: 0 or 1. Landmark accuracy as well as
|
||||
# inference latency generally go up with the model complexity. If unspecified,
|
||||
# functions as set to 1. (int)
|
||||
input_side_packet: "MODEL_COMPLEXITY:model_complexity"
|
||||
|
||||
# 21 hand landmarks within the given ROI. (NormalizedLandmarkList)
|
||||
# NOTE: if a hand is not present within the given ROI, for this particular
|
||||
# timestamp there will not be an output packet in the LANDMARKS stream. However,
|
||||
# the MediaPipe framework will internally inform the downstream calculators of
|
||||
# the absence of this packet so that they don't wait for it unnecessarily.
|
||||
output_stream: "LANDMARKS:hand_landmarks"
|
||||
|
||||
# Hand world landmarks within the given ROI. (LandmarkList)
|
||||
# World landmarks are real-world 3D coordinates in meters with the origin in the
|
||||
# center of the given ROI.
|
||||
#
|
||||
# WORLD_LANDMARKS shares the same landmark topology as LANDMARKS. However,
|
||||
# LANDMARKS provides coordinates (in pixels) of a 3D object projected onto the
|
||||
# 2D image surface, while WORLD_LANDMARKS provides coordinates (in meters) of
|
||||
# the 3D object itself.
|
||||
output_stream: "WORLD_LANDMARKS:hand_world_landmarks"
|
||||
|
||||
# Handedness of the detected hand (i.e. is hand left or right).
|
||||
# (ClassificationList)
|
||||
output_stream: "HANDEDNESS:handedness"
|
||||
|
||||
# Transforms a region of image into a 224x224 tensor while keeping the aspect
|
||||
# ratio, and therefore may result in potential letterboxing.
|
||||
node {
|
||||
calculator: "ImageToTensorCalculator"
|
||||
input_stream: "IMAGE:image"
|
||||
input_stream: "NORM_RECT:hand_rect"
|
||||
output_stream: "TENSORS:input_tensor"
|
||||
output_stream: "LETTERBOX_PADDING:letterbox_padding"
|
||||
options: {
|
||||
[mediapipe.ImageToTensorCalculatorOptions.ext] {
|
||||
output_tensor_width: 224
|
||||
output_tensor_height: 224
|
||||
keep_aspect_ratio: true
|
||||
output_tensor_float_range {
|
||||
min: 0.0
|
||||
max: 1.0
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Loads the hand landmark TF Lite model.
|
||||
node {
|
||||
calculator: "HandLandmarkModelLoader"
|
||||
input_side_packet: "MODEL_COMPLEXITY:model_complexity"
|
||||
output_side_packet: "MODEL:model"
|
||||
}
|
||||
|
||||
# Runs a TensorFlow Lite model on CPU that takes an image tensor and outputs a
|
||||
# vector of tensors representing, for instance, detection boxes/keypoints and
|
||||
# scores.
|
||||
node {
|
||||
calculator: "InferenceCalculator"
|
||||
input_side_packet: "MODEL:model"
|
||||
input_stream: "TENSORS:input_tensor"
|
||||
output_stream: "TENSORS:output_tensors"
|
||||
options: {
|
||||
[mediapipe.InferenceCalculatorOptions.ext] {
|
||||
delegate {
|
||||
xnnpack {}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Splits a vector of tensors to multiple vectors according to the ranges
|
||||
# specified in option.
|
||||
node {
|
||||
calculator: "SplitTensorVectorCalculator"
|
||||
input_stream: "output_tensors"
|
||||
output_stream: "landmark_tensors"
|
||||
output_stream: "hand_flag_tensor"
|
||||
output_stream: "handedness_tensor"
|
||||
output_stream: "world_landmark_tensor"
|
||||
options: {
|
||||
[mediapipe.SplitVectorCalculatorOptions.ext] {
|
||||
ranges: { begin: 0 end: 1 }
|
||||
ranges: { begin: 1 end: 2 }
|
||||
ranges: { begin: 2 end: 3 }
|
||||
ranges: { begin: 3 end: 4 }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Converts the hand-flag tensor into a float that represents the confidence
|
||||
# score of hand presence.
|
||||
node {
|
||||
calculator: "TensorsToFloatsCalculator"
|
||||
input_stream: "TENSORS:hand_flag_tensor"
|
||||
output_stream: "FLOAT:hand_presence_score"
|
||||
}
|
||||
|
||||
# Applies a threshold to the confidence score to determine whether a hand is
|
||||
# present.
|
||||
node {
|
||||
calculator: "ThresholdingCalculator"
|
||||
input_stream: "FLOAT:hand_presence_score"
|
||||
output_stream: "FLAG:hand_presence"
|
||||
options: {
|
||||
[mediapipe.ThresholdingCalculatorOptions.ext] {
|
||||
threshold: 0.5
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Drops handedness tensor if hand is not present.
|
||||
node {
|
||||
calculator: "GateCalculator"
|
||||
input_stream: "handedness_tensor"
|
||||
input_stream: "ALLOW:hand_presence"
|
||||
output_stream: "ensured_handedness_tensor"
|
||||
}
|
||||
|
||||
# Converts the handedness tensor into a float that represents the classification
|
||||
# score of handedness.
|
||||
node {
|
||||
calculator: "TensorsToClassificationCalculator"
|
||||
input_stream: "TENSORS:ensured_handedness_tensor"
|
||||
output_stream: "CLASSIFICATIONS:handedness"
|
||||
options: {
|
||||
[mediapipe.TensorsToClassificationCalculatorOptions.ext] {
|
||||
top_k: 1
|
||||
label_map_path: "mediapipe/modules/hand_landmark/handedness.txt"
|
||||
binary_classification: true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Drops landmarks tensors if hand is not present.
|
||||
node {
|
||||
calculator: "GateCalculator"
|
||||
input_stream: "landmark_tensors"
|
||||
input_stream: "ALLOW:hand_presence"
|
||||
output_stream: "ensured_landmark_tensors"
|
||||
}
|
||||
|
||||
# Decodes the landmark tensors into a list of landmarks, where the landmark
|
||||
# coordinates are normalized by the size of the input image to the model.
|
||||
node {
|
||||
calculator: "TensorsToLandmarksCalculator"
|
||||
input_stream: "TENSORS:ensured_landmark_tensors"
|
||||
output_stream: "NORM_LANDMARKS:landmarks"
|
||||
options: {
|
||||
[mediapipe.TensorsToLandmarksCalculatorOptions.ext] {
|
||||
num_landmarks: 21
|
||||
input_image_width: 224
|
||||
input_image_height: 224
|
||||
# The additional scaling factor is used to account for the Z coordinate
|
||||
# distribution in the training data.
|
||||
normalize_z: 0.4
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Adjusts landmarks (already normalized to [0.f, 1.f]) on the letterboxed hand
|
||||
# image (after image transformation with the FIT scale mode) to the
|
||||
# corresponding locations on the same image with the letterbox removed (hand
|
||||
# image before image transformation).
|
||||
node {
|
||||
calculator: "LandmarkLetterboxRemovalCalculator"
|
||||
input_stream: "LANDMARKS:landmarks"
|
||||
input_stream: "LETTERBOX_PADDING:letterbox_padding"
|
||||
output_stream: "LANDMARKS:scaled_landmarks"
|
||||
}
|
||||
|
||||
# Projects the landmarks from the cropped hand image to the corresponding
|
||||
# locations on the full image before cropping (input to the graph).
|
||||
node {
|
||||
calculator: "LandmarkProjectionCalculator"
|
||||
input_stream: "NORM_LANDMARKS:scaled_landmarks"
|
||||
input_stream: "NORM_RECT:hand_rect"
|
||||
output_stream: "NORM_LANDMARKS:hand_landmarks"
|
||||
}
|
||||
|
||||
# Drops world landmarks tensors if hand is not present.
|
||||
node {
|
||||
calculator: "GateCalculator"
|
||||
input_stream: "world_landmark_tensor"
|
||||
input_stream: "ALLOW:hand_presence"
|
||||
output_stream: "ensured_world_landmark_tensor"
|
||||
}
|
||||
|
||||
# Decodes the landmark tensors into a list of landmarks, where the landmark
|
||||
# coordinates are normalized by the size of the input image to the model.
|
||||
node {
|
||||
calculator: "TensorsToLandmarksCalculator"
|
||||
input_stream: "TENSORS:ensured_world_landmark_tensor"
|
||||
output_stream: "LANDMARKS:unprojected_world_landmarks"
|
||||
options: {
|
||||
[mediapipe.TensorsToLandmarksCalculatorOptions.ext] {
|
||||
num_landmarks: 21
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Projects the world landmarks from the cropped hand image to the corresponding
|
||||
# locations on the full image before cropping (input to the graph).
|
||||
node {
|
||||
calculator: "WorldLandmarkProjectionCalculator"
|
||||
input_stream: "LANDMARKS:unprojected_world_landmarks"
|
||||
input_stream: "NORM_RECT:hand_rect"
|
||||
output_stream: "LANDMARKS:hand_world_landmarks"
|
||||
}
|
BIN
mediapipe/modules/hand_landmark/hand_landmark_full.tflite
Executable file
BIN
mediapipe/modules/hand_landmark/hand_landmark_full.tflite
Executable file
Binary file not shown.
213
mediapipe/modules/hand_landmark/hand_landmark_gpu.pbtxt
Normal file
213
mediapipe/modules/hand_landmark/hand_landmark_gpu.pbtxt
Normal file
|
@ -0,0 +1,213 @@
|
|||
# MediaPipe graph to detect/predict hand landmarks on CPU.
|
||||
|
||||
type: "HandLandmarkGpu"
|
||||
|
||||
# GPU image. (GpuBuffer)
|
||||
input_stream: "IMAGE:image"
|
||||
# ROI (region of interest) within the given image where a palm/hand is located.
|
||||
# (NormalizedRect)
|
||||
input_stream: "ROI:hand_rect"
|
||||
|
||||
# Complexity of the hand landmark model: 0 or 1. Landmark accuracy as well as
|
||||
# inference latency generally go up with the model complexity. If unspecified,
|
||||
# functions as set to 1. (int)
|
||||
input_side_packet: "MODEL_COMPLEXITY:model_complexity"
|
||||
|
||||
# 21 hand landmarks within the given ROI. (NormalizedLandmarkList)
|
||||
# NOTE: if a hand is not present within the given ROI, for this particular
|
||||
# timestamp there will not be an output packet in the LANDMARKS stream. However,
|
||||
# the MediaPipe framework will internally inform the downstream calculators of
|
||||
# the absence of this packet so that they don't wait for it unnecessarily.
|
||||
output_stream: "LANDMARKS:hand_landmarks"
|
||||
|
||||
# Hand world landmarks within the given ROI. (LandmarkList)
|
||||
# World landmarks are real-world 3D coordinates in meters with the origin in the
|
||||
# center of the given ROI.
|
||||
#
|
||||
# WORLD_LANDMARKS shares the same landmark topology as LANDMARKS. However,
|
||||
# LANDMARKS provides coordinates (in pixels) of a 3D object projected onto the
|
||||
# 2D image surface, while WORLD_LANDMARKS provides coordinates (in meters) of
|
||||
# the 3D object itself.
|
||||
output_stream: "WORLD_LANDMARKS:hand_world_landmarks"
|
||||
|
||||
# Handedness of the detected hand (i.e. is hand left or right).
|
||||
# (ClassificationList)
|
||||
output_stream: "HANDEDNESS:handedness"
|
||||
|
||||
# Transforms a region of image into a 224x224 tensor while keeping the aspect
|
||||
# ratio, and therefore may result in potential letterboxing.
|
||||
node {
|
||||
calculator: "ImageToTensorCalculator"
|
||||
input_stream: "IMAGE_GPU:image"
|
||||
input_stream: "NORM_RECT:hand_rect"
|
||||
output_stream: "TENSORS:input_tensor"
|
||||
output_stream: "LETTERBOX_PADDING:letterbox_padding"
|
||||
options: {
|
||||
[mediapipe.ImageToTensorCalculatorOptions.ext] {
|
||||
output_tensor_width: 224
|
||||
output_tensor_height: 224
|
||||
keep_aspect_ratio: true
|
||||
output_tensor_float_range {
|
||||
min: 0.0
|
||||
max: 1.0
|
||||
}
|
||||
gpu_origin: TOP_LEFT
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Loads the hand landmark TF Lite model.
|
||||
node {
|
||||
calculator: "HandLandmarkModelLoader"
|
||||
input_side_packet: "MODEL_COMPLEXITY:model_complexity"
|
||||
output_side_packet: "MODEL:model"
|
||||
}
|
||||
|
||||
# Runs a TensorFlow Lite model on GPU that takes an image tensor and outputs a
|
||||
# vector of tensors representing, for instance, detection boxes/keypoints and
|
||||
# scores.
|
||||
node {
|
||||
calculator: "InferenceCalculator"
|
||||
input_side_packet: "MODEL:model"
|
||||
input_stream: "TENSORS:input_tensor"
|
||||
output_stream: "TENSORS:output_tensors"
|
||||
}
|
||||
|
||||
# Splits a vector of tensors to multiple vectors according to the ranges
|
||||
# specified in option.
|
||||
node {
|
||||
calculator: "SplitTensorVectorCalculator"
|
||||
input_stream: "output_tensors"
|
||||
output_stream: "landmark_tensors"
|
||||
output_stream: "hand_flag_tensor"
|
||||
output_stream: "handedness_tensor"
|
||||
output_stream: "world_landmark_tensor"
|
||||
options: {
|
||||
[mediapipe.SplitVectorCalculatorOptions.ext] {
|
||||
ranges: { begin: 0 end: 1 }
|
||||
ranges: { begin: 1 end: 2 }
|
||||
ranges: { begin: 2 end: 3 }
|
||||
ranges: { begin: 3 end: 4 }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Converts the hand-flag tensor into a float that represents the confidence
|
||||
# score of hand presence.
|
||||
node {
|
||||
calculator: "TensorsToFloatsCalculator"
|
||||
input_stream: "TENSORS:hand_flag_tensor"
|
||||
output_stream: "FLOAT:hand_presence_score"
|
||||
}
|
||||
|
||||
# Applies a threshold to the confidence score to determine whether a hand is
|
||||
# present.
|
||||
node {
|
||||
calculator: "ThresholdingCalculator"
|
||||
input_stream: "FLOAT:hand_presence_score"
|
||||
output_stream: "FLAG:hand_presence"
|
||||
options: {
|
||||
[mediapipe.ThresholdingCalculatorOptions.ext] {
|
||||
threshold: 0.5
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Drops handedness tensor if hand is not present.
|
||||
node {
|
||||
calculator: "GateCalculator"
|
||||
input_stream: "handedness_tensor"
|
||||
input_stream: "ALLOW:hand_presence"
|
||||
output_stream: "ensured_handedness_tensor"
|
||||
}
|
||||
|
||||
# Converts the handedness tensor into a float that represents the classification
|
||||
# score of handedness.
|
||||
node {
|
||||
calculator: "TensorsToClassificationCalculator"
|
||||
input_stream: "TENSORS:ensured_handedness_tensor"
|
||||
output_stream: "CLASSIFICATIONS:handedness"
|
||||
options: {
|
||||
[mediapipe.TensorsToClassificationCalculatorOptions.ext] {
|
||||
top_k: 1
|
||||
label_map_path: "mediapipe/modules/hand_landmark/handedness.txt"
|
||||
binary_classification: true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Drops landmarks tensors if hand is not present.
|
||||
node {
|
||||
calculator: "GateCalculator"
|
||||
input_stream: "landmark_tensors"
|
||||
input_stream: "ALLOW:hand_presence"
|
||||
output_stream: "ensured_landmark_tensors"
|
||||
}
|
||||
|
||||
# Decodes the landmark tensors into a list of landmarks, where the landmark
|
||||
# coordinates are normalized by the size of the input image to the model.
|
||||
node {
|
||||
calculator: "TensorsToLandmarksCalculator"
|
||||
input_stream: "TENSORS:ensured_landmark_tensors"
|
||||
output_stream: "NORM_LANDMARKS:landmarks"
|
||||
options: {
|
||||
[mediapipe.TensorsToLandmarksCalculatorOptions.ext] {
|
||||
num_landmarks: 21
|
||||
input_image_width: 224
|
||||
input_image_height: 224
|
||||
# The additional scaling factor is used to account for the Z coordinate
|
||||
# distribution in the training data.
|
||||
normalize_z: 0.4
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Adjusts landmarks (already normalized to [0.f, 1.f]) on the letterboxed hand
|
||||
# image (after image transformation with the FIT scale mode) to the
|
||||
# corresponding locations on the same image with the letterbox removed (hand
|
||||
# image before image transformation).
|
||||
node {
|
||||
calculator: "LandmarkLetterboxRemovalCalculator"
|
||||
input_stream: "LANDMARKS:landmarks"
|
||||
input_stream: "LETTERBOX_PADDING:letterbox_padding"
|
||||
output_stream: "LANDMARKS:scaled_landmarks"
|
||||
}
|
||||
|
||||
# Projects the landmarks from the cropped hand image to the corresponding
|
||||
# locations on the full image before cropping (input to the graph).
|
||||
node {
|
||||
calculator: "LandmarkProjectionCalculator"
|
||||
input_stream: "NORM_LANDMARKS:scaled_landmarks"
|
||||
input_stream: "NORM_RECT:hand_rect"
|
||||
output_stream: "NORM_LANDMARKS:hand_landmarks"
|
||||
}
|
||||
|
||||
# Drops world landmarks tensors if hand is not present.
|
||||
node {
|
||||
calculator: "GateCalculator"
|
||||
input_stream: "world_landmark_tensor"
|
||||
input_stream: "ALLOW:hand_presence"
|
||||
output_stream: "ensured_world_landmark_tensor"
|
||||
}
|
||||
|
||||
# Decodes the landmark tensors into a list of landmarks, where the landmark
|
||||
# coordinates are normalized by the size of the input image to the model.
|
||||
node {
|
||||
calculator: "TensorsToLandmarksCalculator"
|
||||
input_stream: "TENSORS:ensured_world_landmark_tensor"
|
||||
output_stream: "LANDMARKS:unprojected_world_landmarks"
|
||||
options: {
|
||||
[mediapipe.TensorsToLandmarksCalculatorOptions.ext] {
|
||||
num_landmarks: 21
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Projects the world landmarks from the cropped hand image to the corresponding
|
||||
# locations on the full image before cropping (input to the graph).
|
||||
node {
|
||||
calculator: "WorldLandmarkProjectionCalculator"
|
||||
input_stream: "LANDMARKS:unprojected_world_landmarks"
|
||||
input_stream: "NORM_RECT:hand_rect"
|
||||
output_stream: "LANDMARKS:hand_world_landmarks"
|
||||
}
|
|
@ -0,0 +1,63 @@
|
|||
# MediaPipe graph to calculate hand region of interest (ROI) from landmarks
|
||||
# detected by "HandLandmarkCpu" or "HandLandmarkGpu".
|
||||
|
||||
type: "HandLandmarkLandmarksToRoi"
|
||||
|
||||
# Normalized landmarks. (NormalizedLandmarkList)
|
||||
input_stream: "LANDMARKS:landmarks"
|
||||
# Image size (width & height). (std::pair<int, int>)
|
||||
input_stream: "IMAGE_SIZE:image_size"
|
||||
|
||||
# ROI according to landmarks. (NormalizedRect)
|
||||
output_stream: "ROI:roi"
|
||||
|
||||
# Extracts a subset of the hand landmarks that are relatively more stable across
|
||||
# frames (e.g. comparing to finger tips) for computing the bounding box. The box
|
||||
# will later be expanded to contain the entire hand. In this approach, it is
|
||||
# more robust to drastically changing hand size.
|
||||
# The landmarks extracted are: wrist, MCP/PIP of five fingers.
|
||||
node {
|
||||
calculator: "SplitNormalizedLandmarkListCalculator"
|
||||
input_stream: "landmarks"
|
||||
output_stream: "partial_landmarks"
|
||||
options: {
|
||||
[mediapipe.SplitVectorCalculatorOptions.ext] {
|
||||
ranges: { begin: 0 end: 4 }
|
||||
ranges: { begin: 5 end: 7 }
|
||||
ranges: { begin: 9 end: 11 }
|
||||
ranges: { begin: 13 end: 15 }
|
||||
ranges: { begin: 17 end: 19 }
|
||||
combine_outputs: true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Converts the hand landmarks into a rectangle (normalized by image size)
|
||||
# that encloses the hand. The calculator uses a subset of all hand landmarks
|
||||
# extracted from SplitNormalizedLandmarkListCalculator above to
|
||||
# calculate the bounding box and the rotation of the output rectangle. Please
|
||||
# see the comments in the calculator for more detail.
|
||||
node {
|
||||
calculator: "HandLandmarksToRectCalculator"
|
||||
input_stream: "NORM_LANDMARKS:partial_landmarks"
|
||||
input_stream: "IMAGE_SIZE:image_size"
|
||||
output_stream: "NORM_RECT:hand_rect_from_landmarks"
|
||||
}
|
||||
|
||||
# Expands the hand rectangle so that the box contains the entire hand and it's
|
||||
# big enough so that it's likely to still contain the hand even with some motion
|
||||
# in the next video frame .
|
||||
node {
|
||||
calculator: "RectTransformationCalculator"
|
||||
input_stream: "NORM_RECT:hand_rect_from_landmarks"
|
||||
input_stream: "IMAGE_SIZE:image_size"
|
||||
output_stream: "roi"
|
||||
options: {
|
||||
[mediapipe.RectTransformationCalculatorOptions.ext] {
|
||||
scale_x: 2.0
|
||||
scale_y: 2.0
|
||||
shift_y: -0.1
|
||||
square_long: true
|
||||
}
|
||||
}
|
||||
}
|
BIN
mediapipe/modules/hand_landmark/hand_landmark_lite.tflite
Executable file
BIN
mediapipe/modules/hand_landmark/hand_landmark_lite.tflite
Executable file
Binary file not shown.
|
@ -0,0 +1,63 @@
|
|||
# MediaPipe graph to load a selected hand landmark TF Lite model.
|
||||
|
||||
type: "HandLandmarkModelLoader"
|
||||
|
||||
# Complexity of the hand landmark model: 0 or 1. Landmark accuracy as well as
|
||||
# inference latency generally go up with the model complexity. If unspecified,
|
||||
# functions as set to 1. (int)
|
||||
input_side_packet: "MODEL_COMPLEXITY:model_complexity"
|
||||
|
||||
# TF Lite model represented as a FlatBuffer.
|
||||
# (std::unique_ptr<tflite::FlatBufferModel, std::function<void(tflite::FlatBufferModel*)>>)
|
||||
output_side_packet: "MODEL:model"
|
||||
|
||||
# Determines path to the desired pose landmark model file.
|
||||
node {
|
||||
calculator: "SwitchContainer"
|
||||
input_side_packet: "SELECT:model_complexity"
|
||||
output_side_packet: "PACKET:model_path"
|
||||
options: {
|
||||
[mediapipe.SwitchContainerOptions.ext] {
|
||||
select: 1
|
||||
contained_node: {
|
||||
calculator: "ConstantSidePacketCalculator"
|
||||
options: {
|
||||
[mediapipe.ConstantSidePacketCalculatorOptions.ext]: {
|
||||
packet {
|
||||
string_value: "mediapipe/modules/hand_landmark/hand_landmark_lite.tflite"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
contained_node: {
|
||||
calculator: "ConstantSidePacketCalculator"
|
||||
options: {
|
||||
[mediapipe.ConstantSidePacketCalculatorOptions.ext]: {
|
||||
packet {
|
||||
string_value: "mediapipe/modules/hand_landmark/hand_landmark_full.tflite"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Loads the file in the specified path into a blob.
|
||||
node {
|
||||
calculator: "LocalFileContentsCalculator"
|
||||
input_side_packet: "FILE_PATH:model_path"
|
||||
output_side_packet: "CONTENTS:model_blob"
|
||||
options: {
|
||||
[mediapipe.LocalFileContentsCalculatorOptions.ext]: {
|
||||
text_mode: false
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Converts the input blob into a TF Lite model.
|
||||
node {
|
||||
calculator: "TfLiteModelCalculator"
|
||||
input_side_packet: "MODEL_BLOB:model_blob"
|
||||
output_side_packet: "MODEL:model"
|
||||
}
|
271
mediapipe/modules/hand_landmark/hand_landmark_tracking_cpu.pbtxt
Normal file
271
mediapipe/modules/hand_landmark/hand_landmark_tracking_cpu.pbtxt
Normal file
|
@ -0,0 +1,271 @@
|
|||
# MediaPipe graph to detect/predict hand landmarks on CPU.
|
||||
#
|
||||
# The procedure is done in two steps:
|
||||
# - locate palms/hands
|
||||
# - detect landmarks for each palm/hand.
|
||||
# This graph tries to skip palm detection as much as possible by reusing
|
||||
# previously detected/predicted landmarks for new images.
|
||||
|
||||
type: "HandLandmarkTrackingCpu"
|
||||
|
||||
# CPU image. (ImageFrame)
|
||||
input_stream: "IMAGE:image"
|
||||
|
||||
# Max number of hands to detect/track. (int)
|
||||
input_side_packet: "NUM_HANDS:num_hands"
|
||||
|
||||
# Complexity of hand landmark and palm detection models: 0 or 1. Accuracy as
|
||||
# well as inference latency generally go up with the model complexity. If
|
||||
# unspecified, functions as set to 1. (int)
|
||||
input_side_packet: "MODEL_COMPLEXITY:model_complexity"
|
||||
|
||||
# Whether landmarks on the previous image should be used to help localize
|
||||
# landmarks on the current image. (bool)
|
||||
input_side_packet: "USE_PREV_LANDMARKS:use_prev_landmarks"
|
||||
|
||||
# Collection of detected/predicted hands, each represented as a list of
|
||||
# landmarks. (std::vector<NormalizedLandmarkList>)
|
||||
# NOTE: there will not be an output packet in the LANDMARKS stream for this
|
||||
# particular timestamp if none of hands detected. However, the MediaPipe
|
||||
# framework will internally inform the downstream calculators of the absence of
|
||||
# this packet so that they don't wait for it unnecessarily.
|
||||
output_stream: "LANDMARKS:multi_hand_landmarks"
|
||||
|
||||
# Collection of detected/predicted hand world landmarks.
|
||||
# (std::vector<LandmarkList>)
|
||||
#
|
||||
# World landmarks are real-world 3D coordinates in meters with the origin in the
|
||||
# center of the hand bounding box calculated from the landmarks.
|
||||
#
|
||||
# WORLD_LANDMARKS shares the same landmark topology as LANDMARKS. However,
|
||||
# LANDMARKS provides coordinates (in pixels) of a 3D object projected onto the
|
||||
# 2D image surface, while WORLD_LANDMARKS provides coordinates (in meters) of
|
||||
# the 3D object itself.
|
||||
output_stream: "WORLD_LANDMARKS:multi_hand_world_landmarks"
|
||||
|
||||
# Collection of handedness of the detected hands (i.e. is hand left or right),
|
||||
# each represented as a ClassificationList proto with a single Classification
|
||||
# entry. (std::vector<ClassificationList>)
|
||||
# Note that handedness is determined assuming the input image is mirrored,
|
||||
# i.e., taken with a front-facing/selfie camera with images flipped
|
||||
# horizontally.
|
||||
output_stream: "HANDEDNESS:multi_handedness"
|
||||
|
||||
# Extra outputs (for debugging, for instance).
|
||||
# Detected palms. (std::vector<Detection>)
|
||||
output_stream: "PALM_DETECTIONS:palm_detections"
|
||||
# Regions of interest calculated based on landmarks.
|
||||
# (std::vector<NormalizedRect>)
|
||||
output_stream: "HAND_ROIS_FROM_LANDMARKS:hand_rects"
|
||||
# Regions of interest calculated based on palm detections.
|
||||
# (std::vector<NormalizedRect>)
|
||||
output_stream: "HAND_ROIS_FROM_PALM_DETECTIONS:hand_rects_from_palm_detections"
|
||||
|
||||
# When the optional input side packet "use_prev_landmarks" is either absent or
|
||||
# set to true, uses the landmarks on the previous image to help localize
|
||||
# landmarks on the current image.
|
||||
node {
|
||||
calculator: "GateCalculator"
|
||||
input_side_packet: "ALLOW:use_prev_landmarks"
|
||||
input_stream: "prev_hand_rects_from_landmarks"
|
||||
output_stream: "gated_prev_hand_rects_from_landmarks"
|
||||
options: {
|
||||
[mediapipe.GateCalculatorOptions.ext] {
|
||||
allow: true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Determines if an input vector of NormalizedRect has a size greater than or
|
||||
# equal to the provided num_hands.
|
||||
node {
|
||||
calculator: "NormalizedRectVectorHasMinSizeCalculator"
|
||||
input_stream: "ITERABLE:gated_prev_hand_rects_from_landmarks"
|
||||
input_side_packet: "num_hands"
|
||||
output_stream: "prev_has_enough_hands"
|
||||
}
|
||||
|
||||
# Drops the incoming image if enough hands have already been identified from the
|
||||
# previous image. Otherwise, passes the incoming image through to trigger a new
|
||||
# round of palm detection.
|
||||
node {
|
||||
calculator: "GateCalculator"
|
||||
input_stream: "image"
|
||||
input_stream: "DISALLOW:prev_has_enough_hands"
|
||||
output_stream: "palm_detection_image"
|
||||
options: {
|
||||
[mediapipe.GateCalculatorOptions.ext] {
|
||||
empty_packets_as_allow: true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Detects palms.
|
||||
node {
|
||||
calculator: "PalmDetectionCpu"
|
||||
input_side_packet: "MODEL_COMPLEXITY:model_complexity"
|
||||
input_stream: "IMAGE:palm_detection_image"
|
||||
output_stream: "DETECTIONS:all_palm_detections"
|
||||
}
|
||||
|
||||
# Makes sure there are no more detections than the provided num_hands.
|
||||
node {
|
||||
calculator: "ClipDetectionVectorSizeCalculator"
|
||||
input_stream: "all_palm_detections"
|
||||
output_stream: "palm_detections"
|
||||
input_side_packet: "num_hands"
|
||||
}
|
||||
|
||||
# Extracts image size.
|
||||
node {
|
||||
calculator: "ImagePropertiesCalculator"
|
||||
input_stream: "IMAGE:palm_detection_image"
|
||||
output_stream: "SIZE:palm_detection_image_size"
|
||||
}
|
||||
|
||||
# Outputs each element of palm_detections at a fake timestamp for the rest of
|
||||
# the graph to process. Clones the image size packet for each palm_detection at
|
||||
# the fake timestamp. At the end of the loop, outputs the BATCH_END timestamp
|
||||
# for downstream calculators to inform them that all elements in the vector have
|
||||
# been processed.
|
||||
node {
|
||||
calculator: "BeginLoopDetectionCalculator"
|
||||
input_stream: "ITERABLE:palm_detections"
|
||||
input_stream: "CLONE:palm_detection_image_size"
|
||||
output_stream: "ITEM:palm_detection"
|
||||
output_stream: "CLONE:image_size_for_palms"
|
||||
output_stream: "BATCH_END:palm_detections_timestamp"
|
||||
}
|
||||
|
||||
# Calculates region of interest (ROI) based on the specified palm.
|
||||
node {
|
||||
calculator: "PalmDetectionDetectionToRoi"
|
||||
input_stream: "DETECTION:palm_detection"
|
||||
input_stream: "IMAGE_SIZE:image_size_for_palms"
|
||||
output_stream: "ROI:hand_rect_from_palm_detection"
|
||||
}
|
||||
|
||||
# Collects a NormalizedRect for each hand into a vector. Upon receiving the
|
||||
# BATCH_END timestamp, outputs the vector of NormalizedRect at the BATCH_END
|
||||
# timestamp.
|
||||
node {
|
||||
calculator: "EndLoopNormalizedRectCalculator"
|
||||
input_stream: "ITEM:hand_rect_from_palm_detection"
|
||||
input_stream: "BATCH_END:palm_detections_timestamp"
|
||||
output_stream: "ITERABLE:hand_rects_from_palm_detections"
|
||||
}
|
||||
|
||||
# Performs association between NormalizedRect vector elements from previous
|
||||
# image and rects based on palm detections from the current image. This
|
||||
# calculator ensures that the output hand_rects vector doesn't contain
|
||||
# overlapping regions based on the specified min_similarity_threshold.
|
||||
node {
|
||||
calculator: "AssociationNormRectCalculator"
|
||||
input_stream: "hand_rects_from_palm_detections"
|
||||
input_stream: "gated_prev_hand_rects_from_landmarks"
|
||||
output_stream: "hand_rects"
|
||||
options: {
|
||||
[mediapipe.AssociationCalculatorOptions.ext] {
|
||||
min_similarity_threshold: 0.5
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Extracts image size.
|
||||
node {
|
||||
calculator: "ImagePropertiesCalculator"
|
||||
input_stream: "IMAGE_CPU:image"
|
||||
output_stream: "SIZE:image_size"
|
||||
}
|
||||
|
||||
# Outputs each element of hand_rects at a fake timestamp for the rest of the
|
||||
# graph to process. Clones image and image size packets for each
|
||||
# single_hand_rect at the fake timestamp. At the end of the loop, outputs the
|
||||
# BATCH_END timestamp for downstream calculators to inform them that all
|
||||
# elements in the vector have been processed.
|
||||
node {
|
||||
calculator: "BeginLoopNormalizedRectCalculator"
|
||||
input_stream: "ITERABLE:hand_rects"
|
||||
input_stream: "CLONE:0:image"
|
||||
input_stream: "CLONE:1:image_size"
|
||||
output_stream: "ITEM:single_hand_rect"
|
||||
output_stream: "CLONE:0:image_for_landmarks"
|
||||
output_stream: "CLONE:1:image_size_for_landmarks"
|
||||
output_stream: "BATCH_END:hand_rects_timestamp"
|
||||
}
|
||||
|
||||
# Detect hand landmarks for the specific hand rect.
|
||||
node {
|
||||
calculator: "HandLandmarkCpu"
|
||||
input_side_packet: "MODEL_COMPLEXITY:model_complexity"
|
||||
input_stream: "IMAGE:image_for_landmarks"
|
||||
input_stream: "ROI:single_hand_rect"
|
||||
output_stream: "LANDMARKS:single_hand_landmarks"
|
||||
output_stream: "WORLD_LANDMARKS:single_hand_world_landmarks"
|
||||
output_stream: "HANDEDNESS:single_handedness"
|
||||
}
|
||||
|
||||
# Collects the handedness for each single hand into a vector. Upon receiving the
|
||||
# BATCH_END timestamp, outputs a vector of ClassificationList at the BATCH_END
|
||||
# timestamp.
|
||||
node {
|
||||
calculator: "EndLoopClassificationListCalculator"
|
||||
input_stream: "ITEM:single_handedness"
|
||||
input_stream: "BATCH_END:hand_rects_timestamp"
|
||||
output_stream: "ITERABLE:multi_handedness"
|
||||
}
|
||||
|
||||
# Calculate region of interest (ROI) based on detected hand landmarks to reuse
|
||||
# on the subsequent runs of the graph.
|
||||
node {
|
||||
calculator: "HandLandmarkLandmarksToRoi"
|
||||
input_stream: "IMAGE_SIZE:image_size_for_landmarks"
|
||||
input_stream: "LANDMARKS:single_hand_landmarks"
|
||||
output_stream: "ROI:single_hand_rect_from_landmarks"
|
||||
}
|
||||
|
||||
# Collects a set of landmarks for each hand into a vector. Upon receiving the
|
||||
# BATCH_END timestamp, outputs the vector of landmarks at the BATCH_END
|
||||
# timestamp.
|
||||
node {
|
||||
calculator: "EndLoopNormalizedLandmarkListVectorCalculator"
|
||||
input_stream: "ITEM:single_hand_landmarks"
|
||||
input_stream: "BATCH_END:hand_rects_timestamp"
|
||||
output_stream: "ITERABLE:multi_hand_landmarks"
|
||||
}
|
||||
|
||||
# Collects a set of world landmarks for each hand into a vector. Upon receiving
|
||||
# the BATCH_END timestamp, outputs the vector of landmarks at the BATCH_END
|
||||
# timestamp.
|
||||
node {
|
||||
calculator: "EndLoopLandmarkListVectorCalculator"
|
||||
input_stream: "ITEM:single_hand_world_landmarks"
|
||||
input_stream: "BATCH_END:hand_rects_timestamp"
|
||||
output_stream: "ITERABLE:multi_hand_world_landmarks"
|
||||
}
|
||||
|
||||
# Collects a NormalizedRect for each hand into a vector. Upon receiving the
|
||||
# BATCH_END timestamp, outputs the vector of NormalizedRect at the BATCH_END
|
||||
# timestamp.
|
||||
node {
|
||||
calculator: "EndLoopNormalizedRectCalculator"
|
||||
input_stream: "ITEM:single_hand_rect_from_landmarks"
|
||||
input_stream: "BATCH_END:hand_rects_timestamp"
|
||||
output_stream: "ITERABLE:hand_rects_from_landmarks"
|
||||
}
|
||||
|
||||
# Caches hand rects calculated from landmarks, and upon the arrival of the next
|
||||
# input image, sends out the cached rects with timestamps replaced by that of
|
||||
# the input image, essentially generating a packet that carries the previous
|
||||
# hand rects. Note that upon the arrival of the very first input image, a
|
||||
# timestamp bound update occurs to jump start the feedback loop.
|
||||
node {
|
||||
calculator: "PreviousLoopbackCalculator"
|
||||
input_stream: "MAIN:image"
|
||||
input_stream: "LOOP:hand_rects_from_landmarks"
|
||||
input_stream_info: {
|
||||
tag_index: "LOOP"
|
||||
back_edge: true
|
||||
}
|
||||
output_stream: "PREV_LOOP:prev_hand_rects_from_landmarks"
|
||||
}
|
|
@ -0,0 +1,116 @@
|
|||
# MediaPipe graph to detect/predict hand landmarks on CPU.
|
||||
#
|
||||
# The procedure is done in two steps:
|
||||
# - locate palms/hands
|
||||
# - detect landmarks for each palm/hand.
|
||||
# This graph tries to skip palm detection as much as possible by reusing
|
||||
# previously detected/predicted landmarks for new images.
|
||||
|
||||
type: "HandLandmarkTrackingCpuImage"
|
||||
|
||||
# Input image. (Image)
|
||||
input_stream: "IMAGE:image"
|
||||
|
||||
# Max number of hands to detect/track. (int)
|
||||
input_side_packet: "NUM_HANDS:num_hands"
|
||||
|
||||
# Complexity of hand landmark and palm detection models: 0 or 1. Accuracy as
|
||||
# well as inference latency generally go up with the model complexity. If
|
||||
# unspecified, functions as set to 1. (int)
|
||||
input_side_packet: "MODEL_COMPLEXITY:model_complexity"
|
||||
|
||||
# Whether landmarks on the previous image should be used to help localize
|
||||
# landmarks on the current image. (bool)
|
||||
input_side_packet: "USE_PREV_LANDMARKS:use_prev_landmarks"
|
||||
|
||||
# The throttled input image. (Image)
|
||||
output_stream: "IMAGE:throttled_image"
|
||||
|
||||
# Collection of detected/predicted hands, each represented as a list of
|
||||
# landmarks. (std::vector<NormalizedLandmarkList>)
|
||||
# NOTE: there will not be an output packet in the LANDMARKS stream for this
|
||||
# particular timestamp if none of hands detected. However, the MediaPipe
|
||||
# framework will internally inform the downstream calculators of the absence of
|
||||
# this packet so that they don't wait for it unnecessarily.
|
||||
output_stream: "LANDMARKS:multi_hand_landmarks"
|
||||
|
||||
# Collection of detected/predicted hand world landmarks.
|
||||
# (std::vector<LandmarkList>)
|
||||
#
|
||||
# World landmarks are real-world 3D coordinates in meters with the origin in the
|
||||
# center of the hand bounding box calculated from the landmarks.
|
||||
#
|
||||
# WORLD_LANDMARKS shares the same landmark topology as LANDMARKS. However,
|
||||
# LANDMARKS provides coordinates (in pixels) of a 3D object projected onto the
|
||||
# 2D image surface, while WORLD_LANDMARKS provides coordinates (in meters) of
|
||||
# the 3D object itself.
|
||||
output_stream: "WORLD_LANDMARKS:multi_hand_world_landmarks"
|
||||
|
||||
# Collection of handedness of the detected hands (i.e. is hand left or right),
|
||||
# each represented as a ClassificationList proto with a single Classification
|
||||
# entry. (std::vector<ClassificationList>)
|
||||
# Note that handedness is determined assuming the input image is mirrored,
|
||||
# i.e., taken with a front-facing/selfie camera with images flipped
|
||||
# horizontally.
|
||||
output_stream: "HANDEDNESS:multi_handedness"
|
||||
|
||||
# Extra outputs (for debugging, for instance).
|
||||
# Detected palms. (std::vector<Detection>)
|
||||
output_stream: "PALM_DETECTIONS:palm_detections"
|
||||
# Regions of interest calculated based on landmarks.
|
||||
# (std::vector<NormalizedRect>)
|
||||
output_stream: "HAND_ROIS_FROM_LANDMARKS:hand_rects"
|
||||
# Regions of interest calculated based on palm detections.
|
||||
# (std::vector<NormalizedRect>)
|
||||
output_stream: "HAND_ROIS_FROM_PALM_DETECTIONS:hand_rects_from_palm_detections"
|
||||
|
||||
node {
|
||||
calculator: "FlowLimiterCalculator"
|
||||
input_stream: "image"
|
||||
input_stream: "FINISHED:multi_hand_landmarks"
|
||||
input_stream_info: {
|
||||
tag_index: "FINISHED"
|
||||
back_edge: true
|
||||
}
|
||||
output_stream: "throttled_image"
|
||||
options: {
|
||||
[mediapipe.FlowLimiterCalculatorOptions.ext] {
|
||||
max_in_flight: 1
|
||||
max_in_queue: 1
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Converts Image to ImageFrame for HandLandmarkTrackingCpu to consume.
|
||||
node {
|
||||
calculator: "FromImageCalculator"
|
||||
input_stream: "IMAGE:throttled_image"
|
||||
output_stream: "IMAGE_CPU:raw_image_frame"
|
||||
output_stream: "SOURCE_ON_GPU:is_gpu_image"
|
||||
}
|
||||
|
||||
# TODO: Remove the extra flipping once adopting MlImage.
|
||||
# If the source images are on gpu, flip the data vertically before sending them
|
||||
# into HandLandmarkTrackingCpu. This maybe needed because OpenGL represents
|
||||
# images assuming the image origin is at the bottom-left corner, whereas
|
||||
# MediaPipe in general assumes the image origin is at the top-left corner.
|
||||
node: {
|
||||
calculator: "ImageTransformationCalculator"
|
||||
input_stream: "IMAGE:raw_image_frame"
|
||||
input_stream: "FLIP_VERTICALLY:is_gpu_image"
|
||||
output_stream: "IMAGE:image_frame"
|
||||
}
|
||||
|
||||
node {
|
||||
calculator: "HandLandmarkTrackingCpu"
|
||||
input_stream: "IMAGE:image_frame"
|
||||
input_side_packet: "NUM_HANDS:num_hands"
|
||||
input_side_packet: "MODEL_COMPLEXITY:model_complexity"
|
||||
input_side_packet: "USE_PREV_LANDMARKS:use_prev_landmarks"
|
||||
output_stream: "LANDMARKS:multi_hand_landmarks"
|
||||
output_stream: "WORLD_LANDMARKS:multi_hand_world_landmarks"
|
||||
output_stream: "HANDEDNESS:multi_handedness"
|
||||
output_stream: "PALM_DETECTIONS:palm_detections"
|
||||
output_stream: "HAND_ROIS_FROM_LANDMARKS:hand_rects"
|
||||
output_stream: "HAND_ROIS_FROM_PALM_DETECTIONS:hand_rects_from_palm_detections"
|
||||
}
|
272
mediapipe/modules/hand_landmark/hand_landmark_tracking_gpu.pbtxt
Normal file
272
mediapipe/modules/hand_landmark/hand_landmark_tracking_gpu.pbtxt
Normal file
|
@ -0,0 +1,272 @@
|
|||
# MediaPipe graph to detect/predict hand landmarks on GPU.
|
||||
#
|
||||
# The procedure is done in two steps:
|
||||
# - locate palms/hands
|
||||
# - detect landmarks for each palm/hand.
|
||||
# This graph tries to skip palm detection as much as possible by reusing
|
||||
# previously detected/predicted landmarks for new images.
|
||||
|
||||
type: "HandLandmarkTrackingGpu"
|
||||
|
||||
# GPU image. (GpuBuffer)
|
||||
input_stream: "IMAGE:image"
|
||||
|
||||
# Max number of hands to detect/track. (int)
|
||||
input_side_packet: "NUM_HANDS:num_hands"
|
||||
|
||||
# Complexity of hand landmark and palm detection models: 0 or 1. Accuracy as
|
||||
# well as inference latency generally go up with the model complexity. If
|
||||
# unspecified, functions as set to 1. (int)
|
||||
input_side_packet: "MODEL_COMPLEXITY:model_complexity"
|
||||
|
||||
# Whether landmarks on the previous image should be used to help localize
|
||||
# landmarks on the current image. (bool)
|
||||
input_side_packet: "USE_PREV_LANDMARKS:use_prev_landmarks"
|
||||
|
||||
# Collection of detected/predicted hands, each represented as a list of
|
||||
# landmarks. (std::vector<NormalizedLandmarkList>)
|
||||
# NOTE: there will not be an output packet in the LANDMARKS stream for this
|
||||
# particular timestamp if none of hands detected. However, the MediaPipe
|
||||
# framework will internally inform the downstream calculators of the absence of
|
||||
# this packet so that they don't wait for it unnecessarily.
|
||||
output_stream: "LANDMARKS:multi_hand_landmarks"
|
||||
|
||||
# Collection of detected/predicted hand world landmarks.
|
||||
# (std::vector<LandmarkList>)
|
||||
#
|
||||
# World landmarks are real-world 3D coordinates in meters with the origin in the
|
||||
# center of the hand bounding box calculated from the landmarks.
|
||||
#
|
||||
# WORLD_LANDMARKS shares the same landmark topology as LANDMARKS. However,
|
||||
# LANDMARKS provides coordinates (in pixels) of a 3D object projected onto the
|
||||
# 2D image surface, while WORLD_LANDMARKS provides coordinates (in meters) of
|
||||
# the 3D object itself.
|
||||
output_stream: "WORLD_LANDMARKS:multi_hand_world_landmarks"
|
||||
|
||||
# Collection of handedness of the detected hands (i.e. is hand left or right),
|
||||
# each represented as a ClassificationList proto with a single Classification
|
||||
# entry. (std::vector<ClassificationList>)
|
||||
# Note that handedness is determined assuming the input image is mirrored,
|
||||
# i.e., taken with a front-facing/selfie camera with images flipped
|
||||
# horizontally.
|
||||
output_stream: "HANDEDNESS:multi_handedness"
|
||||
|
||||
# Extra outputs (for debugging, for instance).
|
||||
# Detected palms. (std::vector<Detection>)
|
||||
output_stream: "PALM_DETECTIONS:palm_detections"
|
||||
# Regions of interest calculated based on landmarks.
|
||||
# (std::vector<NormalizedRect>)
|
||||
output_stream: "HAND_ROIS_FROM_LANDMARKS:hand_rects"
|
||||
# Regions of interest calculated based on palm detections.
|
||||
# (std::vector<NormalizedRect>)
|
||||
output_stream: "HAND_ROIS_FROM_PALM_DETECTIONS:hand_rects_from_palm_detections"
|
||||
|
||||
# When the optional input side packet "use_prev_landmarks" is either absent or
|
||||
# set to true, uses the landmarks on the previous image to help localize
|
||||
# landmarks on the current image.
|
||||
node {
|
||||
calculator: "GateCalculator"
|
||||
input_side_packet: "ALLOW:use_prev_landmarks"
|
||||
input_stream: "prev_hand_rects_from_landmarks"
|
||||
output_stream: "gated_prev_hand_rects_from_landmarks"
|
||||
options: {
|
||||
[mediapipe.GateCalculatorOptions.ext] {
|
||||
allow: true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Determines if an input vector of NormalizedRect has a size greater than or
|
||||
# equal to the provided num_hands.
|
||||
node {
|
||||
calculator: "NormalizedRectVectorHasMinSizeCalculator"
|
||||
input_stream: "ITERABLE:gated_prev_hand_rects_from_landmarks"
|
||||
input_side_packet: "num_hands"
|
||||
output_stream: "prev_has_enough_hands"
|
||||
}
|
||||
|
||||
# Drops the incoming image if enough hands have already been identified from the
|
||||
# previous image. Otherwise, passes the incoming image through to trigger a new
|
||||
# round of palm detection.
|
||||
node {
|
||||
calculator: "GateCalculator"
|
||||
input_stream: "image"
|
||||
input_stream: "DISALLOW:prev_has_enough_hands"
|
||||
output_stream: "palm_detection_image"
|
||||
options: {
|
||||
[mediapipe.GateCalculatorOptions.ext] {
|
||||
empty_packets_as_allow: true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Detects palms.
|
||||
node {
|
||||
calculator: "PalmDetectionGpu"
|
||||
input_side_packet: "MODEL_COMPLEXITY:model_complexity"
|
||||
input_stream: "IMAGE:palm_detection_image"
|
||||
output_stream: "DETECTIONS:all_palm_detections"
|
||||
}
|
||||
|
||||
# Makes sure there are no more detections than provided num_hands.
|
||||
node {
|
||||
calculator: "ClipDetectionVectorSizeCalculator"
|
||||
input_stream: "all_palm_detections"
|
||||
output_stream: "palm_detections"
|
||||
input_side_packet: "num_hands"
|
||||
}
|
||||
|
||||
# Extracts image size.
|
||||
node {
|
||||
calculator: "ImagePropertiesCalculator"
|
||||
input_stream: "IMAGE_GPU:palm_detection_image"
|
||||
output_stream: "SIZE:palm_detection_image_size"
|
||||
}
|
||||
|
||||
# Outputs each element of palm_detections at a fake timestamp for the rest of
|
||||
# the graph to process. Clones the image_size packet for each palm_detection at
|
||||
# the fake timestamp. At the end of the loop, outputs the BATCH_END timestamp
|
||||
# for downstream calculators to inform them that all elements in the vector have
|
||||
# been processed.
|
||||
node {
|
||||
calculator: "BeginLoopDetectionCalculator"
|
||||
input_stream: "ITERABLE:palm_detections"
|
||||
input_stream: "CLONE:palm_detection_image_size"
|
||||
output_stream: "ITEM:palm_detection"
|
||||
output_stream: "CLONE:image_size_for_palms"
|
||||
output_stream: "BATCH_END:palm_detections_timestamp"
|
||||
}
|
||||
|
||||
# Calculates region of interest (ROI) base on the specified palm.
|
||||
node {
|
||||
calculator: "PalmDetectionDetectionToRoi"
|
||||
input_stream: "DETECTION:palm_detection"
|
||||
input_stream: "IMAGE_SIZE:image_size_for_palms"
|
||||
output_stream: "ROI:hand_rect_from_palm_detection"
|
||||
}
|
||||
|
||||
# Collects a NormalizedRect for each hand into a vector. Upon receiving the
|
||||
# BATCH_END timestamp, outputs the vector of NormalizedRect at the BATCH_END
|
||||
# timestamp.
|
||||
node {
|
||||
name: "EndLoopForPalmDetections"
|
||||
calculator: "EndLoopNormalizedRectCalculator"
|
||||
input_stream: "ITEM:hand_rect_from_palm_detection"
|
||||
input_stream: "BATCH_END:palm_detections_timestamp"
|
||||
output_stream: "ITERABLE:hand_rects_from_palm_detections"
|
||||
}
|
||||
|
||||
# Performs association between NormalizedRect vector elements from previous
|
||||
# image and rects based on palm detections from the current image. This
|
||||
# calculator ensures that the output hand_rects vector doesn't contain
|
||||
# overlapping regions based on the specified min_similarity_threshold.
|
||||
node {
|
||||
calculator: "AssociationNormRectCalculator"
|
||||
input_stream: "hand_rects_from_palm_detections"
|
||||
input_stream: "gated_prev_hand_rects_from_landmarks"
|
||||
output_stream: "hand_rects"
|
||||
options: {
|
||||
[mediapipe.AssociationCalculatorOptions.ext] {
|
||||
min_similarity_threshold: 0.5
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Extracts image size.
|
||||
node {
|
||||
calculator: "ImagePropertiesCalculator"
|
||||
input_stream: "IMAGE_GPU:image"
|
||||
output_stream: "SIZE:image_size"
|
||||
}
|
||||
|
||||
# Outputs each element of hand_rects at a fake timestamp for the rest of the
|
||||
# graph to process. Clones image and image size packets for each
|
||||
# single_hand_rect at the fake timestamp. At the end of the loop, outputs the
|
||||
# BATCH_END timestamp for downstream calculators to inform them that all
|
||||
# elements in the vector have been processed.
|
||||
node {
|
||||
calculator: "BeginLoopNormalizedRectCalculator"
|
||||
input_stream: "ITERABLE:hand_rects"
|
||||
input_stream: "CLONE:0:image"
|
||||
input_stream: "CLONE:1:image_size"
|
||||
output_stream: "ITEM:single_hand_rect"
|
||||
output_stream: "CLONE:0:image_for_landmarks"
|
||||
output_stream: "CLONE:1:image_size_for_landmarks"
|
||||
output_stream: "BATCH_END:hand_rects_timestamp"
|
||||
}
|
||||
|
||||
# Detect hand landmarks for the specific hand rect.
|
||||
node {
|
||||
calculator: "HandLandmarkGpu"
|
||||
input_side_packet: "MODEL_COMPLEXITY:model_complexity"
|
||||
input_stream: "IMAGE:image_for_landmarks"
|
||||
input_stream: "ROI:single_hand_rect"
|
||||
output_stream: "LANDMARKS:single_hand_landmarks"
|
||||
output_stream: "WORLD_LANDMARKS:single_hand_world_landmarks"
|
||||
output_stream: "HANDEDNESS:single_handedness"
|
||||
}
|
||||
|
||||
# Collects the handedness for each single hand into a vector. Upon receiving the
|
||||
# BATCH_END timestamp, outputs a vector of ClassificationList at the BATCH_END
|
||||
# timestamp.
|
||||
node {
|
||||
calculator: "EndLoopClassificationListCalculator"
|
||||
input_stream: "ITEM:single_handedness"
|
||||
input_stream: "BATCH_END:hand_rects_timestamp"
|
||||
output_stream: "ITERABLE:multi_handedness"
|
||||
}
|
||||
|
||||
# Calculate region of interest (ROI) based on detected hand landmarks to reuse
|
||||
# on the subsequent runs of the graph.
|
||||
node {
|
||||
calculator: "HandLandmarkLandmarksToRoi"
|
||||
input_stream: "IMAGE_SIZE:image_size_for_landmarks"
|
||||
input_stream: "LANDMARKS:single_hand_landmarks"
|
||||
output_stream: "ROI:single_hand_rect_from_landmarks"
|
||||
}
|
||||
|
||||
# Collects a set of landmarks for each hand into a vector. Upon receiving the
|
||||
# BATCH_END timestamp, outputs the vector of landmarks at the BATCH_END
|
||||
# timestamp.
|
||||
node {
|
||||
calculator: "EndLoopNormalizedLandmarkListVectorCalculator"
|
||||
input_stream: "ITEM:single_hand_landmarks"
|
||||
input_stream: "BATCH_END:hand_rects_timestamp"
|
||||
output_stream: "ITERABLE:multi_hand_landmarks"
|
||||
}
|
||||
|
||||
# Collects a set of world landmarks for each hand into a vector. Upon receiving
|
||||
# the BATCH_END timestamp, outputs the vector of landmarks at the BATCH_END
|
||||
# timestamp.
|
||||
node {
|
||||
calculator: "EndLoopLandmarkListVectorCalculator"
|
||||
input_stream: "ITEM:single_hand_world_landmarks"
|
||||
input_stream: "BATCH_END:hand_rects_timestamp"
|
||||
output_stream: "ITERABLE:multi_hand_world_landmarks"
|
||||
}
|
||||
|
||||
# Collects a NormalizedRect for each hand into a vector. Upon receiving the
|
||||
# BATCH_END timestamp, outputs the vector of NormalizedRect at the BATCH_END
|
||||
# timestamp.
|
||||
node {
|
||||
calculator: "EndLoopNormalizedRectCalculator"
|
||||
input_stream: "ITEM:single_hand_rect_from_landmarks"
|
||||
input_stream: "BATCH_END:hand_rects_timestamp"
|
||||
output_stream: "ITERABLE:hand_rects_from_landmarks"
|
||||
}
|
||||
|
||||
# Caches hand rects calculated from landmarks, and upon the arrival of the next
|
||||
# input image, sends out the cached rects with timestamps replaced by that of
|
||||
# the input image, essentially generating a packet that carries the previous
|
||||
# hand rects. Note that upon the arrival of the very first input image, a
|
||||
# timestamp bound update occurs to jump start the feedback loop.
|
||||
node {
|
||||
calculator: "PreviousLoopbackCalculator"
|
||||
input_stream: "MAIN:image"
|
||||
input_stream: "LOOP:hand_rects_from_landmarks"
|
||||
input_stream_info: {
|
||||
tag_index: "LOOP"
|
||||
back_edge: true
|
||||
}
|
||||
output_stream: "PREV_LOOP:prev_hand_rects_from_landmarks"
|
||||
}
|
|
@ -0,0 +1,115 @@
|
|||
# MediaPipe graph to detect/predict hand landmarks on GPU.
|
||||
#
|
||||
# The procedure is done in two steps:
|
||||
# - locate palms/hands
|
||||
# - detect landmarks for each palm/hand.
|
||||
# This graph tries to skip palm detection as much as possible by reusing
|
||||
# previously detected/predicted landmarks for new images.
|
||||
|
||||
type: "HandLandmarkTrackingGpuImage"
|
||||
|
||||
# Input image. (Image)
|
||||
input_stream: "IMAGE:image"
|
||||
|
||||
# Max number of hands to detect/track. (int)
|
||||
input_side_packet: "NUM_HANDS:num_hands"
|
||||
|
||||
# Complexity of hand landmark and palm detection models: 0 or 1. Accuracy as
|
||||
# well as inference latency generally go up with the model complexity. If
|
||||
# unspecified, functions as set to 1. (int)
|
||||
input_side_packet: "MODEL_COMPLEXITY:model_complexity"
|
||||
|
||||
# Whether landmarks on the previous image should be used to help localize
|
||||
# landmarks on the current image. (bool)
|
||||
input_side_packet: "USE_PREV_LANDMARKS:use_prev_landmarks"
|
||||
|
||||
# Collection of detected/predicted hands, each represented as a list of
|
||||
# landmarks. (std::vector<NormalizedLandmarkList>)
|
||||
# NOTE: there will not be an output packet in the LANDMARKS stream for this
|
||||
# particular timestamp if none of hands detected. However, the MediaPipe
|
||||
# framework will internally inform the downstream calculators of the absence of
|
||||
# this packet so that they don't wait for it unnecessarily.
|
||||
output_stream: "LANDMARKS:multi_hand_landmarks"
|
||||
|
||||
# Collection of detected/predicted hand world landmarks.
|
||||
# (std::vector<LandmarkList>)
|
||||
#
|
||||
# World landmarks are real-world 3D coordinates in meters with the origin in the
|
||||
# center of the hand bounding box calculated from the landmarks.
|
||||
#
|
||||
# WORLD_LANDMARKS shares the same landmark topology as LANDMARKS. However,
|
||||
# LANDMARKS provides coordinates (in pixels) of a 3D object projected onto the
|
||||
# 2D image surface, while WORLD_LANDMARKS provides coordinates (in meters) of
|
||||
# the 3D object itself.
|
||||
output_stream: "WORLD_LANDMARKS:multi_hand_world_landmarks"
|
||||
|
||||
# Collection of handedness of the detected hands (i.e. is hand left or right),
|
||||
# each represented as a ClassificationList proto with a single Classification
|
||||
# entry. (std::vector<ClassificationList>)
|
||||
# Note that handedness is determined assuming the input image is mirrored,
|
||||
# i.e., taken with a front-facing/selfie camera with images flipped
|
||||
# horizontally.
|
||||
output_stream: "HANDEDNESS:multi_handedness"
|
||||
|
||||
# The throttled input image. (Image)
|
||||
output_stream: "IMAGE:throttled_image"
|
||||
# Extra outputs (for debugging, for instance).
|
||||
# Detected palms. (std::vector<Detection>)
|
||||
output_stream: "PALM_DETECTIONS:palm_detections"
|
||||
# Regions of interest calculated based on landmarks.
|
||||
# (std::vector<NormalizedRect>)
|
||||
output_stream: "HAND_ROIS_FROM_LANDMARKS:hand_rects"
|
||||
# Regions of interest calculated based on palm detections.
|
||||
# (std::vector<NormalizedRect>)
|
||||
output_stream: "HAND_ROIS_FROM_PALM_DETECTIONS:hand_rects_from_palm_detections"
|
||||
|
||||
node {
|
||||
calculator: "FlowLimiterCalculator"
|
||||
input_stream: "image"
|
||||
input_stream: "FINISHED:multi_hand_landmarks"
|
||||
input_stream_info: {
|
||||
tag_index: "FINISHED"
|
||||
back_edge: true
|
||||
}
|
||||
output_stream: "throttled_image"
|
||||
options: {
|
||||
[mediapipe.FlowLimiterCalculatorOptions.ext] {
|
||||
max_in_flight: 1
|
||||
max_in_queue: 1
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Converts Image to GpuBuffer for HandLandmarkTrackingGpu to consume.
|
||||
node {
|
||||
calculator: "FromImageCalculator"
|
||||
input_stream: "IMAGE:throttled_image"
|
||||
output_stream: "IMAGE_GPU:raw_gpu_buffer"
|
||||
output_stream: "SOURCE_ON_GPU:is_gpu_image"
|
||||
}
|
||||
|
||||
# TODO: Remove the extra flipping once adopting MlImage.
|
||||
# If the source images are on gpu, flip the data vertically before sending them
|
||||
# into HandLandmarkTrackingGpu. This maybe needed because OpenGL represents
|
||||
# images assuming the image origin is at the bottom-left corner, whereas
|
||||
# MediaPipe in general assumes the image origin is at the top-left corner.
|
||||
node: {
|
||||
calculator: "ImageTransformationCalculator"
|
||||
input_stream: "IMAGE_GPU:raw_gpu_buffer"
|
||||
input_stream: "FLIP_VERTICALLY:is_gpu_image"
|
||||
output_stream: "IMAGE_GPU:gpu_buffer"
|
||||
}
|
||||
|
||||
node {
|
||||
calculator: "HandLandmarkTrackingGpu"
|
||||
input_stream: "IMAGE:gpu_buffer"
|
||||
input_side_packet: "NUM_HANDS:num_hands"
|
||||
input_side_packet: "MODEL_COMPLEXITY:model_complexity"
|
||||
input_side_packet: "USE_PREV_LANDMARKS:use_prev_landmarks"
|
||||
output_stream: "LANDMARKS:multi_hand_landmarks"
|
||||
output_stream: "WORLD_LANDMARKS:multi_hand_world_landmarks"
|
||||
output_stream: "HANDEDNESS:multi_handedness"
|
||||
output_stream: "PALM_DETECTIONS:palm_detections"
|
||||
output_stream: "HAND_ROIS_FROM_LANDMARKS:hand_rects"
|
||||
output_stream: "HAND_ROIS_FROM_PALM_DETECTIONS:hand_rects_from_palm_detections"
|
||||
}
|
2
mediapipe/modules/hand_landmark/handedness.txt
Normal file
2
mediapipe/modules/hand_landmark/handedness.txt
Normal file
|
@ -0,0 +1,2 @@
|
|||
Left
|
||||
Right
|
|
@ -0,0 +1,47 @@
|
|||
# MediaPipe subgraph that calculates hand ROI from palm detection.
|
||||
|
||||
type: "PalmDetectionDetectionToRoi"
|
||||
|
||||
# Palm detection. (Detection)
|
||||
input_stream: "DETECTION:detection"
|
||||
# Frame size. (std::pair<int, int>)
|
||||
input_stream: "IMAGE_SIZE:image_size"
|
||||
|
||||
# ROI (region of interest) according to landmarks, represented as normalized
|
||||
# rect. (NormalizedRect)
|
||||
output_stream: "ROI:roi"
|
||||
|
||||
# Converts results of palm detection into a rectangle (normalized by image size)
|
||||
# that encloses the palm and is rotated such that the line connecting center of
|
||||
# the wrist and MCP of the middle finger is aligned with the Y-axis of the
|
||||
# rectangle.
|
||||
node {
|
||||
calculator: "DetectionsToRectsCalculator"
|
||||
input_stream: "DETECTION:detection"
|
||||
input_stream: "IMAGE_SIZE:image_size"
|
||||
output_stream: "NORM_RECT:raw_roi"
|
||||
options: {
|
||||
[mediapipe.DetectionsToRectsCalculatorOptions.ext] {
|
||||
rotation_vector_start_keypoint_index: 0 # Center of wrist.
|
||||
rotation_vector_end_keypoint_index: 2 # MCP of middle finger.
|
||||
rotation_vector_target_angle_degrees: 90
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Expands and shifts the rectangle that contains the palm so that it's likely
|
||||
# to cover the entire hand.
|
||||
node {
|
||||
calculator: "RectTransformationCalculator"
|
||||
input_stream: "NORM_RECT:raw_roi"
|
||||
input_stream: "IMAGE_SIZE:image_size"
|
||||
output_stream: "roi"
|
||||
options: {
|
||||
[mediapipe.RectTransformationCalculatorOptions.ext] {
|
||||
scale_x: 2.6
|
||||
scale_y: 2.6
|
||||
shift_y: -0.5
|
||||
square_long: true
|
||||
}
|
||||
}
|
||||
}
|
267
mediapipe/modules/holistic_landmark/BUILD
Normal file
267
mediapipe/modules/holistic_landmark/BUILD
Normal file
|
@ -0,0 +1,267 @@
|
|||
# Copyright 2020 The MediaPipe Authors.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
load("//mediapipe/framework/tool:mediapipe_graph.bzl", "mediapipe_simple_subgraph")
|
||||
|
||||
# TODO: revert to private.
|
||||
package(default_visibility = ["//visibility:public"])
|
||||
|
||||
licenses(["notice"])
|
||||
|
||||
exports_files([
|
||||
"hand_recrop.tflite",
|
||||
])
|
||||
|
||||
mediapipe_simple_subgraph(
|
||||
name = "face_landmarks_from_pose_gpu",
|
||||
graph = "face_landmarks_from_pose_gpu.pbtxt",
|
||||
register_as = "FaceLandmarksFromPoseGpu",
|
||||
deps = [
|
||||
":face_detection_front_detections_to_roi",
|
||||
":face_landmarks_from_pose_to_recrop_roi",
|
||||
":face_tracking",
|
||||
"//mediapipe/calculators/core:split_landmarks_calculator",
|
||||
"//mediapipe/calculators/image:image_properties_calculator",
|
||||
"//mediapipe/modules/face_detection:face_detection_short_range_by_roi_gpu",
|
||||
"//mediapipe/modules/face_landmark:face_landmark_gpu",
|
||||
],
|
||||
)
|
||||
|
||||
mediapipe_simple_subgraph(
|
||||
name = "face_landmarks_from_pose_cpu",
|
||||
graph = "face_landmarks_from_pose_cpu.pbtxt",
|
||||
register_as = "FaceLandmarksFromPoseCpu",
|
||||
deps = [
|
||||
":face_detection_front_detections_to_roi",
|
||||
":face_landmarks_from_pose_to_recrop_roi",
|
||||
":face_tracking",
|
||||
"//mediapipe/calculators/core:split_landmarks_calculator",
|
||||
"//mediapipe/calculators/image:image_properties_calculator",
|
||||
"//mediapipe/modules/face_detection:face_detection_short_range_by_roi_cpu",
|
||||
"//mediapipe/modules/face_landmark:face_landmark_cpu",
|
||||
],
|
||||
)
|
||||
|
||||
mediapipe_simple_subgraph(
|
||||
name = "face_landmarks_to_roi",
|
||||
graph = "face_landmarks_to_roi.pbtxt",
|
||||
register_as = "FaceLandmarksToRoi",
|
||||
deps = [
|
||||
"//mediapipe/calculators/util:detections_to_rects_calculator",
|
||||
"//mediapipe/calculators/util:landmarks_to_detection_calculator",
|
||||
"//mediapipe/calculators/util:rect_transformation_calculator",
|
||||
],
|
||||
)
|
||||
|
||||
mediapipe_simple_subgraph(
|
||||
name = "face_detection_front_detections_to_roi",
|
||||
graph = "face_detection_front_detections_to_roi.pbtxt",
|
||||
register_as = "FaceDetectionFrontDetectionsToRoi",
|
||||
deps = [
|
||||
"//mediapipe/calculators/util:detections_to_rects_calculator",
|
||||
"//mediapipe/calculators/util:rect_transformation_calculator",
|
||||
],
|
||||
)
|
||||
|
||||
mediapipe_simple_subgraph(
|
||||
name = "face_tracking",
|
||||
graph = "face_tracking.pbtxt",
|
||||
register_as = "FaceTracking",
|
||||
deps = [
|
||||
":face_landmarks_to_roi",
|
||||
"//mediapipe/calculators/core:previous_loopback_calculator",
|
||||
"//mediapipe/modules/holistic_landmark/calculators:roi_tracking_calculator",
|
||||
],
|
||||
)
|
||||
|
||||
mediapipe_simple_subgraph(
|
||||
name = "face_landmarks_from_pose_to_recrop_roi",
|
||||
graph = "face_landmarks_from_pose_to_recrop_roi.pbtxt",
|
||||
register_as = "FaceLandmarksFromPoseToRecropRoi",
|
||||
deps = [
|
||||
"//mediapipe/calculators/util:detections_to_rects_calculator",
|
||||
"//mediapipe/calculators/util:landmarks_to_detection_calculator",
|
||||
"//mediapipe/calculators/util:rect_transformation_calculator",
|
||||
],
|
||||
)
|
||||
|
||||
mediapipe_simple_subgraph(
|
||||
name = "hand_landmarks_from_pose_gpu",
|
||||
graph = "hand_landmarks_from_pose_gpu.pbtxt",
|
||||
register_as = "HandLandmarksFromPoseGpu",
|
||||
deps = [
|
||||
":hand_landmarks_from_pose_to_recrop_roi",
|
||||
":hand_recrop_by_roi_gpu",
|
||||
":hand_tracking",
|
||||
":hand_visibility_from_hand_landmarks_from_pose",
|
||||
"//mediapipe/calculators/core:gate_calculator",
|
||||
"//mediapipe/calculators/image:image_properties_calculator",
|
||||
"//mediapipe/modules/hand_landmark:hand_landmark_gpu",
|
||||
],
|
||||
)
|
||||
|
||||
mediapipe_simple_subgraph(
|
||||
name = "hand_landmarks_from_pose_cpu",
|
||||
graph = "hand_landmarks_from_pose_cpu.pbtxt",
|
||||
register_as = "HandLandmarksFromPoseCpu",
|
||||
deps = [
|
||||
":hand_landmarks_from_pose_to_recrop_roi",
|
||||
":hand_recrop_by_roi_cpu",
|
||||
":hand_tracking",
|
||||
":hand_visibility_from_hand_landmarks_from_pose",
|
||||
"//mediapipe/calculators/core:gate_calculator",
|
||||
"//mediapipe/calculators/image:image_properties_calculator",
|
||||
"//mediapipe/modules/hand_landmark:hand_landmark_cpu",
|
||||
],
|
||||
)
|
||||
|
||||
mediapipe_simple_subgraph(
|
||||
name = "hand_landmarks_to_roi",
|
||||
graph = "hand_landmarks_to_roi.pbtxt",
|
||||
register_as = "HandLandmarksToRoi",
|
||||
deps = [
|
||||
"//mediapipe/calculators/core:split_landmarks_calculator",
|
||||
"//mediapipe/calculators/util:rect_transformation_calculator",
|
||||
"//mediapipe/modules/hand_landmark/calculators:hand_landmarks_to_rect_calculator",
|
||||
],
|
||||
)
|
||||
|
||||
mediapipe_simple_subgraph(
|
||||
name = "hand_recrop_by_roi_gpu",
|
||||
graph = "hand_recrop_by_roi_gpu.pbtxt",
|
||||
register_as = "HandRecropByRoiGpu",
|
||||
deps = [
|
||||
"//mediapipe/calculators/image:image_properties_calculator",
|
||||
"//mediapipe/calculators/tensor:image_to_tensor_calculator",
|
||||
"//mediapipe/calculators/tensor:inference_calculator",
|
||||
"//mediapipe/calculators/tensor:tensors_to_landmarks_calculator",
|
||||
"//mediapipe/calculators/util:alignment_points_to_rects_calculator",
|
||||
"//mediapipe/calculators/util:landmark_letterbox_removal_calculator",
|
||||
"//mediapipe/calculators/util:landmark_projection_calculator",
|
||||
"//mediapipe/calculators/util:landmarks_to_detection_calculator",
|
||||
"//mediapipe/calculators/util:rect_transformation_calculator",
|
||||
],
|
||||
)
|
||||
|
||||
mediapipe_simple_subgraph(
|
||||
name = "hand_recrop_by_roi_cpu",
|
||||
graph = "hand_recrop_by_roi_cpu.pbtxt",
|
||||
register_as = "HandRecropByRoiCpu",
|
||||
deps = [
|
||||
"//mediapipe/calculators/image:image_properties_calculator",
|
||||
"//mediapipe/calculators/tensor:image_to_tensor_calculator",
|
||||
"//mediapipe/calculators/tensor:inference_calculator",
|
||||
"//mediapipe/calculators/tensor:tensors_to_landmarks_calculator",
|
||||
"//mediapipe/calculators/util:alignment_points_to_rects_calculator",
|
||||
"//mediapipe/calculators/util:landmark_letterbox_removal_calculator",
|
||||
"//mediapipe/calculators/util:landmark_projection_calculator",
|
||||
"//mediapipe/calculators/util:landmarks_to_detection_calculator",
|
||||
"//mediapipe/calculators/util:rect_transformation_calculator",
|
||||
],
|
||||
)
|
||||
|
||||
mediapipe_simple_subgraph(
|
||||
name = "hand_tracking",
|
||||
graph = "hand_tracking.pbtxt",
|
||||
register_as = "HandTracking",
|
||||
deps = [
|
||||
":hand_landmarks_to_roi",
|
||||
"//mediapipe/calculators/core:previous_loopback_calculator",
|
||||
"//mediapipe/modules/holistic_landmark/calculators:roi_tracking_calculator",
|
||||
],
|
||||
)
|
||||
|
||||
# TODO: parametrize holistic_landmark graph with visibility and make private.
|
||||
mediapipe_simple_subgraph(
|
||||
name = "hand_wrist_for_pose",
|
||||
graph = "hand_wrist_for_pose.pbtxt",
|
||||
register_as = "HandWristForPose",
|
||||
visibility = ["//visibility:public"],
|
||||
deps = [
|
||||
"//mediapipe/calculators/core:constant_side_packet_calculator",
|
||||
"//mediapipe/calculators/core:side_packet_to_stream_calculator",
|
||||
"//mediapipe/calculators/core:split_landmarks_calculator",
|
||||
"//mediapipe/calculators/util:set_landmark_visibility_calculator",
|
||||
],
|
||||
)
|
||||
|
||||
mediapipe_simple_subgraph(
|
||||
name = "hand_landmarks_left_and_right_gpu",
|
||||
graph = "hand_landmarks_left_and_right_gpu.pbtxt",
|
||||
register_as = "HandLandmarksLeftAndRightGpu",
|
||||
deps = [
|
||||
":hand_landmarks_from_pose_gpu",
|
||||
"//mediapipe/calculators/core:split_landmarks_calculator",
|
||||
],
|
||||
)
|
||||
|
||||
mediapipe_simple_subgraph(
|
||||
name = "hand_landmarks_left_and_right_cpu",
|
||||
graph = "hand_landmarks_left_and_right_cpu.pbtxt",
|
||||
register_as = "HandLandmarksLeftAndRightCpu",
|
||||
deps = [
|
||||
":hand_landmarks_from_pose_cpu",
|
||||
"//mediapipe/calculators/core:split_landmarks_calculator",
|
||||
],
|
||||
)
|
||||
|
||||
mediapipe_simple_subgraph(
|
||||
name = "hand_landmarks_from_pose_to_recrop_roi",
|
||||
graph = "hand_landmarks_from_pose_to_recrop_roi.pbtxt",
|
||||
register_as = "HandLandmarksFromPoseToRecropRoi",
|
||||
deps = [
|
||||
"//mediapipe/calculators/util:landmarks_to_detection_calculator",
|
||||
"//mediapipe/calculators/util:rect_transformation_calculator",
|
||||
"//mediapipe/modules/holistic_landmark/calculators:hand_detections_from_pose_to_rects_calculator",
|
||||
],
|
||||
)
|
||||
|
||||
mediapipe_simple_subgraph(
|
||||
name = "hand_visibility_from_hand_landmarks_from_pose",
|
||||
graph = "hand_visibility_from_hand_landmarks_from_pose.pbtxt",
|
||||
register_as = "HandVisibilityFromHandLandmarksFromPose",
|
||||
deps = [
|
||||
"//mediapipe/calculators/core:split_landmarks_calculator",
|
||||
"//mediapipe/calculators/util:landmark_visibility_calculator",
|
||||
"//mediapipe/calculators/util:thresholding_calculator",
|
||||
],
|
||||
)
|
||||
|
||||
mediapipe_simple_subgraph(
|
||||
name = "holistic_landmark_gpu",
|
||||
graph = "holistic_landmark_gpu.pbtxt",
|
||||
register_as = "HolisticLandmarkGpu",
|
||||
visibility = ["//visibility:public"],
|
||||
deps = [
|
||||
":face_landmarks_from_pose_gpu",
|
||||
":hand_landmarks_left_and_right_gpu",
|
||||
"//mediapipe/calculators/core:flow_limiter_calculator",
|
||||
"//mediapipe/calculators/image:image_transformation_calculator",
|
||||
"//mediapipe/modules/pose_landmark:pose_landmark_gpu",
|
||||
],
|
||||
)
|
||||
|
||||
mediapipe_simple_subgraph(
|
||||
name = "holistic_landmark_cpu",
|
||||
graph = "holistic_landmark_cpu.pbtxt",
|
||||
register_as = "HolisticLandmarkCpu",
|
||||
visibility = ["//visibility:public"],
|
||||
deps = [
|
||||
":face_landmarks_from_pose_cpu",
|
||||
":hand_landmarks_left_and_right_cpu",
|
||||
"//mediapipe/calculators/core:flow_limiter_calculator",
|
||||
"//mediapipe/calculators/image:image_transformation_calculator",
|
||||
"//mediapipe/modules/pose_landmark:pose_landmark_cpu",
|
||||
],
|
||||
)
|
6
mediapipe/modules/holistic_landmark/README.md
Normal file
6
mediapipe/modules/holistic_landmark/README.md
Normal file
|
@ -0,0 +1,6 @@
|
|||
# holistic_landmark
|
||||
|
||||
Subgraphs|Details
|
||||
:--- | :---
|
||||
[`HolisticLandmarkCpu`](https://github.com/google/mediapipe/tree/master/mediapipe/modules/holistic_landmark/holistic_landmark_cpu.pbtxt)| Predicts pose + left/right hand + face landmarks. (CPU input)
|
||||
[`HolisticLandmarkGpu`](https://github.com/google/mediapipe/tree/master/mediapipe/modules/holistic_landmark/holistic_landmark_gpu.pbtxt)| Predicts pose + left/right hand + face landmarks. (GPU input.)
|
63
mediapipe/modules/holistic_landmark/calculators/BUILD
Normal file
63
mediapipe/modules/holistic_landmark/calculators/BUILD
Normal file
|
@ -0,0 +1,63 @@
|
|||
# Copyright 2020 The MediaPipe Authors.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
load("//mediapipe/framework/port:build_config.bzl", "mediapipe_proto_library")
|
||||
|
||||
licenses(["notice"])
|
||||
|
||||
package(default_visibility = ["//visibility:public"])
|
||||
|
||||
cc_library(
|
||||
name = "hand_detections_from_pose_to_rects_calculator",
|
||||
srcs = ["hand_detections_from_pose_to_rects_calculator.cc"],
|
||||
visibility = ["//visibility:public"],
|
||||
deps = [
|
||||
"//mediapipe/calculators/util:detections_to_rects_calculator",
|
||||
"//mediapipe/calculators/util:detections_to_rects_calculator_cc_proto",
|
||||
"//mediapipe/framework:calculator_framework",
|
||||
"//mediapipe/framework:calculator_options_cc_proto",
|
||||
"//mediapipe/framework/formats:detection_cc_proto",
|
||||
"//mediapipe/framework/formats:location_data_cc_proto",
|
||||
"//mediapipe/framework/formats:rect_cc_proto",
|
||||
"//mediapipe/framework/port:ret_check",
|
||||
"//mediapipe/framework/port:status",
|
||||
],
|
||||
alwayslink = 1,
|
||||
)
|
||||
|
||||
mediapipe_proto_library(
|
||||
name = "roi_tracking_calculator_proto",
|
||||
srcs = ["roi_tracking_calculator.proto"],
|
||||
visibility = ["//visibility:public"],
|
||||
deps = [
|
||||
"//mediapipe/framework:calculator_options_proto",
|
||||
"//mediapipe/framework:calculator_proto",
|
||||
],
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "roi_tracking_calculator",
|
||||
srcs = ["roi_tracking_calculator.cc"],
|
||||
visibility = ["//visibility:public"],
|
||||
deps = [
|
||||
":roi_tracking_calculator_cc_proto",
|
||||
"//mediapipe/framework:calculator_framework",
|
||||
"//mediapipe/framework/formats:landmark_cc_proto",
|
||||
"//mediapipe/framework/formats:rect_cc_proto",
|
||||
"//mediapipe/framework/port:logging",
|
||||
"//mediapipe/framework/port:rectangle",
|
||||
"@com_google_absl//absl/strings:str_format",
|
||||
],
|
||||
alwayslink = 1,
|
||||
)
|
|
@ -0,0 +1,156 @@
|
|||
#include <cmath>
|
||||
|
||||
#include "mediapipe/calculators/util/detections_to_rects_calculator.h"
|
||||
#include "mediapipe/calculators/util/detections_to_rects_calculator.pb.h"
|
||||
#include "mediapipe/framework/calculator_framework.h"
|
||||
#include "mediapipe/framework/calculator_options.pb.h"
|
||||
#include "mediapipe/framework/formats/detection.pb.h"
|
||||
#include "mediapipe/framework/formats/location_data.pb.h"
|
||||
#include "mediapipe/framework/formats/rect.pb.h"
|
||||
#include "mediapipe/framework/port/ret_check.h"
|
||||
#include "mediapipe/framework/port/status.h"
|
||||
|
||||
namespace mediapipe {
|
||||
|
||||
namespace {} // namespace
|
||||
|
||||
// Generates a hand ROI based on a hand detection derived from hand-related pose
|
||||
// landmarks.
|
||||
//
|
||||
// Inputs:
|
||||
// DETECTION - Detection.
|
||||
// Detection to convert to ROI. Must contain 3 key points indicating: wrist,
|
||||
// pinky and index fingers.
|
||||
//
|
||||
// IMAGE_SIZE - std::pair<int, int>
|
||||
// Image width and height.
|
||||
//
|
||||
// Outputs:
|
||||
// NORM_RECT - NormalizedRect.
|
||||
// ROI based on passed input.
|
||||
//
|
||||
// Examples
|
||||
// node {
|
||||
// calculator: "HandDetectionsFromPoseToRectsCalculator"
|
||||
// input_stream: "DETECTION:hand_detection_from_pose"
|
||||
// input_stream: "IMAGE_SIZE:image_size"
|
||||
// output_stream: "NORM_RECT:hand_roi_from_pose"
|
||||
// }
|
||||
class HandDetectionsFromPoseToRectsCalculator
|
||||
: public DetectionsToRectsCalculator {
|
||||
public:
|
||||
absl::Status Open(CalculatorContext* cc) override;
|
||||
|
||||
private:
|
||||
::absl::Status DetectionToNormalizedRect(const Detection& detection,
|
||||
const DetectionSpec& detection_spec,
|
||||
NormalizedRect* rect) override;
|
||||
absl::Status ComputeRotation(const Detection& detection,
|
||||
const DetectionSpec& detection_spec,
|
||||
float* rotation) override;
|
||||
};
|
||||
REGISTER_CALCULATOR(HandDetectionsFromPoseToRectsCalculator);
|
||||
|
||||
namespace {
|
||||
|
||||
constexpr int kWrist = 0;
|
||||
constexpr int kPinky = 1;
|
||||
constexpr int kIndex = 2;
|
||||
|
||||
constexpr char kImageSizeTag[] = "IMAGE_SIZE";
|
||||
|
||||
} // namespace
|
||||
|
||||
::absl::Status HandDetectionsFromPoseToRectsCalculator::Open(
|
||||
CalculatorContext* cc) {
|
||||
RET_CHECK(cc->Inputs().HasTag(kImageSizeTag))
|
||||
<< "Image size is required to calculate rotated rect.";
|
||||
cc->SetOffset(TimestampDiff(0));
|
||||
target_angle_ = M_PI * 0.5f;
|
||||
rotate_ = true;
|
||||
options_ = cc->Options<DetectionsToRectsCalculatorOptions>();
|
||||
output_zero_rect_for_empty_detections_ =
|
||||
options_.output_zero_rect_for_empty_detections();
|
||||
|
||||
return ::absl::OkStatus();
|
||||
}
|
||||
|
||||
::absl::Status
|
||||
HandDetectionsFromPoseToRectsCalculator ::DetectionToNormalizedRect(
|
||||
const Detection& detection, const DetectionSpec& detection_spec,
|
||||
NormalizedRect* rect) {
|
||||
const auto& location_data = detection.location_data();
|
||||
const auto& image_size = detection_spec.image_size;
|
||||
RET_CHECK(image_size) << "Image size is required to calculate rotation";
|
||||
|
||||
const float x_wrist =
|
||||
location_data.relative_keypoints(kWrist).x() * image_size->first;
|
||||
const float y_wrist =
|
||||
location_data.relative_keypoints(kWrist).y() * image_size->second;
|
||||
|
||||
const float x_index =
|
||||
location_data.relative_keypoints(kIndex).x() * image_size->first;
|
||||
const float y_index =
|
||||
location_data.relative_keypoints(kIndex).y() * image_size->second;
|
||||
|
||||
const float x_pinky =
|
||||
location_data.relative_keypoints(kPinky).x() * image_size->first;
|
||||
const float y_pinky =
|
||||
location_data.relative_keypoints(kPinky).y() * image_size->second;
|
||||
|
||||
// Estimate middle finger.
|
||||
const float x_middle = (2.f * x_index + x_pinky) / 3.f;
|
||||
const float y_middle = (2.f * y_index + y_pinky) / 3.f;
|
||||
|
||||
// Crop center as middle finger.
|
||||
const float center_x = x_middle;
|
||||
const float center_y = y_middle;
|
||||
|
||||
// Bounding box size as double distance from middle finger to wrist.
|
||||
const float box_size =
|
||||
std::sqrt((x_middle - x_wrist) * (x_middle - x_wrist) +
|
||||
(y_middle - y_wrist) * (y_middle - y_wrist)) *
|
||||
2.0;
|
||||
|
||||
// Set resulting bounding box.
|
||||
rect->set_x_center(center_x / image_size->first);
|
||||
rect->set_y_center(center_y / image_size->second);
|
||||
rect->set_width(box_size / image_size->first);
|
||||
rect->set_height(box_size / image_size->second);
|
||||
|
||||
return ::absl::OkStatus();
|
||||
}
|
||||
|
||||
absl::Status HandDetectionsFromPoseToRectsCalculator::ComputeRotation(
|
||||
const Detection& detection, const DetectionSpec& detection_spec,
|
||||
float* rotation) {
|
||||
const auto& location_data = detection.location_data();
|
||||
const auto& image_size = detection_spec.image_size;
|
||||
RET_CHECK(image_size) << "Image size is required to calculate rotation";
|
||||
|
||||
const float x_wrist =
|
||||
location_data.relative_keypoints(kWrist).x() * image_size->first;
|
||||
const float y_wrist =
|
||||
location_data.relative_keypoints(kWrist).y() * image_size->second;
|
||||
|
||||
const float x_index =
|
||||
location_data.relative_keypoints(kIndex).x() * image_size->first;
|
||||
const float y_index =
|
||||
location_data.relative_keypoints(kIndex).y() * image_size->second;
|
||||
|
||||
const float x_pinky =
|
||||
location_data.relative_keypoints(kPinky).x() * image_size->first;
|
||||
const float y_pinky =
|
||||
location_data.relative_keypoints(kPinky).y() * image_size->second;
|
||||
|
||||
// Estimate middle finger.
|
||||
const float x_middle = (2.f * x_index + x_pinky) / 3.f;
|
||||
const float y_middle = (2.f * y_index + y_pinky) / 3.f;
|
||||
|
||||
*rotation = NormalizeRadians(
|
||||
target_angle_ - std::atan2(-(y_middle - y_wrist), x_middle - x_wrist));
|
||||
|
||||
return ::absl::OkStatus();
|
||||
}
|
||||
|
||||
} // namespace mediapipe
|
|
@ -0,0 +1,358 @@
|
|||
// Copyright 2020 The MediaPipe Authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include <math.h>
|
||||
|
||||
#include <cstdlib>
|
||||
|
||||
#include "absl/strings/str_format.h"
|
||||
#include "mediapipe/framework/calculator_framework.h"
|
||||
#include "mediapipe/framework/formats/landmark.pb.h"
|
||||
#include "mediapipe/framework/formats/rect.pb.h"
|
||||
#include "mediapipe/framework/port/logging.h"
|
||||
#include "mediapipe/framework/port/rectangle.h"
|
||||
#include "mediapipe/modules/holistic_landmark/calculators/roi_tracking_calculator.pb.h"
|
||||
|
||||
namespace mediapipe {
|
||||
|
||||
namespace {
|
||||
|
||||
constexpr char kPrevLandmarksTag[] = "PREV_LANDMARKS";
|
||||
constexpr char kPrevLandmarksRectTag[] = "PREV_LANDMARKS_RECT";
|
||||
constexpr char kRecropRectTag[] = "RECROP_RECT";
|
||||
constexpr char kImageSizeTag[] = "IMAGE_SIZE";
|
||||
constexpr char kTrackingRectTag[] = "TRACKING_RECT";
|
||||
|
||||
// TODO: Use rect rotation.
|
||||
// Verifies that Intersection over Union of previous frame rect and current
|
||||
// frame re-crop rect is less than threshold.
|
||||
bool IouRequirementsSatisfied(const NormalizedRect& prev_rect,
|
||||
const NormalizedRect& recrop_rect,
|
||||
const std::pair<int, int>& image_size,
|
||||
const float min_iou) {
|
||||
auto r1 = Rectangle_f(prev_rect.x_center() * image_size.first,
|
||||
prev_rect.y_center() * image_size.second,
|
||||
prev_rect.width() * image_size.first,
|
||||
prev_rect.height() * image_size.second);
|
||||
auto r2 = Rectangle_f(recrop_rect.x_center() * image_size.first,
|
||||
recrop_rect.y_center() * image_size.second,
|
||||
recrop_rect.width() * image_size.first,
|
||||
recrop_rect.height() * image_size.second);
|
||||
|
||||
const float intersection_area = r1.Intersect(r2).Area();
|
||||
const float union_area = r1.Area() + r2.Area() - intersection_area;
|
||||
|
||||
const float intersection_threshold = union_area * min_iou;
|
||||
if (intersection_area < intersection_threshold) {
|
||||
VLOG(1) << absl::StrFormat("Lost tracking: IoU intersection %f < %f",
|
||||
intersection_area, intersection_threshold);
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
// Verifies that current frame re-crop rect rotation/translation/scale didn't
|
||||
// change much comparing to the previous frame rect. Translation and scale are
|
||||
// normalized by current frame re-crop rect.
|
||||
bool RectRequirementsSatisfied(const NormalizedRect& prev_rect,
|
||||
const NormalizedRect& recrop_rect,
|
||||
const std::pair<int, int> image_size,
|
||||
const float rotation_degrees,
|
||||
const float translation, const float scale) {
|
||||
// Rotate both rects so that re-crop rect edges are parallel to XY axes. That
|
||||
// will allow to compute x/y translation of the previous frame rect along axes
|
||||
// of the current frame re-crop rect.
|
||||
const float rotation = -recrop_rect.rotation();
|
||||
|
||||
const float cosa = cos(rotation);
|
||||
const float sina = sin(rotation);
|
||||
|
||||
// Rotate previous frame rect and get its parameters.
|
||||
const float prev_rect_x = prev_rect.x_center() * image_size.first * cosa -
|
||||
prev_rect.y_center() * image_size.second * sina;
|
||||
const float prev_rect_y = prev_rect.x_center() * image_size.first * sina +
|
||||
prev_rect.y_center() * image_size.second * cosa;
|
||||
const float prev_rect_width = prev_rect.width() * image_size.first;
|
||||
const float prev_rect_height = prev_rect.height() * image_size.second;
|
||||
const float prev_rect_rotation = prev_rect.rotation() / M_PI * 180.f;
|
||||
|
||||
// Rotate current frame re-crop rect and get its parameters.
|
||||
const float recrop_rect_x = recrop_rect.x_center() * image_size.first * cosa -
|
||||
recrop_rect.y_center() * image_size.second * sina;
|
||||
const float recrop_rect_y = recrop_rect.x_center() * image_size.first * sina +
|
||||
recrop_rect.y_center() * image_size.second * cosa;
|
||||
const float recrop_rect_width = recrop_rect.width() * image_size.first;
|
||||
const float recrop_rect_height = recrop_rect.height() * image_size.second;
|
||||
const float recrop_rect_rotation = recrop_rect.rotation() / M_PI * 180.f;
|
||||
|
||||
// Rect requirements are satisfied unless one of the checks below fails.
|
||||
bool satisfied = true;
|
||||
|
||||
// Ensure that rotation diff is in [0, 180] range.
|
||||
float rotation_diff = prev_rect_rotation - recrop_rect_rotation;
|
||||
if (rotation_diff > 180.f) {
|
||||
rotation_diff -= 360.f;
|
||||
}
|
||||
if (rotation_diff < -180.f) {
|
||||
rotation_diff += 360.f;
|
||||
}
|
||||
rotation_diff = abs(rotation_diff);
|
||||
if (rotation_diff > rotation_degrees) {
|
||||
satisfied = false;
|
||||
VLOG(1) << absl::StrFormat("Lost tracking: rect rotation %f > %f",
|
||||
rotation_diff, rotation_degrees);
|
||||
}
|
||||
|
||||
const float x_diff = abs(prev_rect_x - recrop_rect_x);
|
||||
const float x_threshold = recrop_rect_width * translation;
|
||||
if (x_diff > x_threshold) {
|
||||
satisfied = false;
|
||||
VLOG(1) << absl::StrFormat("Lost tracking: rect x translation %f > %f",
|
||||
x_diff, x_threshold);
|
||||
}
|
||||
|
||||
const float y_diff = abs(prev_rect_y - recrop_rect_y);
|
||||
const float y_threshold = recrop_rect_height * translation;
|
||||
if (y_diff > y_threshold) {
|
||||
satisfied = false;
|
||||
VLOG(1) << absl::StrFormat("Lost tracking: rect y translation %f > %f",
|
||||
y_diff, y_threshold);
|
||||
}
|
||||
|
||||
const float width_diff = abs(prev_rect_width - recrop_rect_width);
|
||||
const float width_threshold = recrop_rect_width * scale;
|
||||
if (width_diff > width_threshold) {
|
||||
satisfied = false;
|
||||
VLOG(1) << absl::StrFormat("Lost tracking: rect width %f > %f", width_diff,
|
||||
width_threshold);
|
||||
}
|
||||
|
||||
const float height_diff = abs(prev_rect_height - recrop_rect_height);
|
||||
const float height_threshold = recrop_rect_height * scale;
|
||||
if (height_diff > height_threshold) {
|
||||
satisfied = false;
|
||||
VLOG(1) << absl::StrFormat("Lost tracking: rect height %f > %f",
|
||||
height_diff, height_threshold);
|
||||
}
|
||||
|
||||
return satisfied;
|
||||
}
|
||||
|
||||
// Verifies that landmarks from the previous frame are within re-crop rectangle
|
||||
// bounds on the current frame.
|
||||
bool LandmarksRequirementsSatisfied(const NormalizedLandmarkList& landmarks,
|
||||
const NormalizedRect& recrop_rect,
|
||||
const std::pair<int, int> image_size,
|
||||
const float recrop_rect_margin) {
|
||||
// Rotate both re-crop rectangle and landmarks so that re-crop rectangle edges
|
||||
// are parallel to XY axes. It will allow to easily check if landmarks are
|
||||
// within re-crop rect bounds along re-crop rect axes.
|
||||
//
|
||||
// Rect rotation is specified clockwise. To apply cos/sin functions we
|
||||
// transform it into counterclockwise.
|
||||
const float rotation = -recrop_rect.rotation();
|
||||
|
||||
const float cosa = cos(rotation);
|
||||
const float sina = sin(rotation);
|
||||
|
||||
// Rotate rect.
|
||||
const float rect_x = recrop_rect.x_center() * image_size.first * cosa -
|
||||
recrop_rect.y_center() * image_size.second * sina;
|
||||
const float rect_y = recrop_rect.x_center() * image_size.first * sina +
|
||||
recrop_rect.y_center() * image_size.second * cosa;
|
||||
const float rect_width =
|
||||
recrop_rect.width() * image_size.first * (1.f + recrop_rect_margin);
|
||||
const float rect_height =
|
||||
recrop_rect.height() * image_size.second * (1.f + recrop_rect_margin);
|
||||
|
||||
// Get rect bounds.
|
||||
const float rect_left = rect_x - rect_width * 0.5f;
|
||||
const float rect_right = rect_x + rect_width * 0.5f;
|
||||
const float rect_top = rect_y - rect_height * 0.5f;
|
||||
const float rect_bottom = rect_y + rect_height * 0.5f;
|
||||
|
||||
for (int i = 0; i < landmarks.landmark_size(); ++i) {
|
||||
const auto& landmark = landmarks.landmark(i);
|
||||
const float x = landmark.x() * image_size.first * cosa -
|
||||
landmark.y() * image_size.second * sina;
|
||||
const float y = landmark.x() * image_size.first * sina +
|
||||
landmark.y() * image_size.second * cosa;
|
||||
|
||||
if (!(rect_left < x && x < rect_right && rect_top < y && y < rect_bottom)) {
|
||||
VLOG(1) << "Lost tracking: landmarks out of re-crop rect";
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
// A calculator to track object rectangle between frames.
|
||||
//
|
||||
// Calculator checks that all requirements for tracking are satisfied and uses
|
||||
// rectangle from the previous frame in this case, otherwise - uses current
|
||||
// frame re-crop rectangle.
|
||||
//
|
||||
// There are several types of tracking requirements that can be configured via
|
||||
// options:
|
||||
// IoU: Verifies that IoU of the previous frame rectangle and current frame
|
||||
// re-crop rectangle is less than a given threshold.
|
||||
// Rect parameters: Verifies that rotation/translation/scale of the re-crop
|
||||
// rectangle on the current frame is close to the rectangle from the
|
||||
// previous frame within given thresholds.
|
||||
// Landmarks: Verifies that landmarks from the previous frame are within
|
||||
// the re-crop rectangle on the current frame.
|
||||
//
|
||||
// Inputs:
|
||||
// PREV_LANDMARKS: Object landmarks from the previous frame.
|
||||
// PREV_LANDMARKS_RECT: Object rectangle based on the landmarks from the
|
||||
// previous frame.
|
||||
// RECROP_RECT: Object re-crop rectangle from the current frame.
|
||||
// IMAGE_SIZE: Image size to transform normalized coordinates to absolute.
|
||||
//
|
||||
// Outputs:
|
||||
// TRACKING_RECT: Rectangle to use for object prediction on the current frame.
|
||||
// It will be either object rectangle from the previous frame (if all
|
||||
// tracking requirements are satisfied) or re-crop rectangle from the
|
||||
// current frame (if tracking lost the object).
|
||||
//
|
||||
// Example config:
|
||||
// node {
|
||||
// calculator: "RoiTrackingCalculator"
|
||||
// input_stream: "PREV_LANDMARKS:prev_hand_landmarks"
|
||||
// input_stream: "PREV_LANDMARKS_RECT:prev_hand_landmarks_rect"
|
||||
// input_stream: "RECROP_RECT:hand_recrop_rect"
|
||||
// input_stream: "IMAGE_SIZE:image_size"
|
||||
// output_stream: "TRACKING_RECT:hand_tracking_rect"
|
||||
// options: {
|
||||
// [mediapipe.RoiTrackingCalculatorOptions.ext] {
|
||||
// rect_requirements: {
|
||||
// rotation_degrees: 40.0
|
||||
// translation: 0.2
|
||||
// scale: 0.4
|
||||
// }
|
||||
// landmarks_requirements: {
|
||||
// recrop_rect_margin: -0.1
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
class RoiTrackingCalculator : public CalculatorBase {
|
||||
public:
|
||||
static absl::Status GetContract(CalculatorContract* cc);
|
||||
absl::Status Open(CalculatorContext* cc) override;
|
||||
absl::Status Process(CalculatorContext* cc) override;
|
||||
|
||||
private:
|
||||
RoiTrackingCalculatorOptions options_;
|
||||
};
|
||||
REGISTER_CALCULATOR(RoiTrackingCalculator);
|
||||
|
||||
absl::Status RoiTrackingCalculator::GetContract(CalculatorContract* cc) {
|
||||
cc->Inputs().Tag(kPrevLandmarksTag).Set<NormalizedLandmarkList>();
|
||||
cc->Inputs().Tag(kPrevLandmarksRectTag).Set<NormalizedRect>();
|
||||
cc->Inputs().Tag(kRecropRectTag).Set<NormalizedRect>();
|
||||
cc->Inputs().Tag(kImageSizeTag).Set<std::pair<int, int>>();
|
||||
cc->Outputs().Tag(kTrackingRectTag).Set<NormalizedRect>();
|
||||
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
absl::Status RoiTrackingCalculator::Open(CalculatorContext* cc) {
|
||||
cc->SetOffset(TimestampDiff(0));
|
||||
options_ = cc->Options<RoiTrackingCalculatorOptions>();
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
absl::Status RoiTrackingCalculator::Process(CalculatorContext* cc) {
|
||||
// If there is no current frame re-crop rect (i.e. object is not present on
|
||||
// the current frame) - return empty packet.
|
||||
if (cc->Inputs().Tag(kRecropRectTag).IsEmpty()) {
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
// If there is no previous rect, but there is current re-crop rect - return
|
||||
// current re-crop rect as is.
|
||||
if (cc->Inputs().Tag(kPrevLandmarksRectTag).IsEmpty()) {
|
||||
cc->Outputs()
|
||||
.Tag(kTrackingRectTag)
|
||||
.AddPacket(cc->Inputs().Tag(kRecropRectTag).Value());
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
// At this point we have both previous rect (which also means we have previous
|
||||
// landmarks) and currrent re-crop rect.
|
||||
const auto& prev_landmarks =
|
||||
cc->Inputs().Tag(kPrevLandmarksTag).Get<NormalizedLandmarkList>();
|
||||
const auto& prev_rect =
|
||||
cc->Inputs().Tag(kPrevLandmarksRectTag).Get<NormalizedRect>();
|
||||
const auto& recrop_rect =
|
||||
cc->Inputs().Tag(kRecropRectTag).Get<NormalizedRect>();
|
||||
const auto& image_size =
|
||||
cc->Inputs().Tag(kImageSizeTag).Get<std::pair<int, int>>();
|
||||
|
||||
// Keep tracking unless one of the requirements below is not satisfied.
|
||||
bool keep_tracking = true;
|
||||
|
||||
// If IoU of the previous rect and current re-crop rect is lower than allowed
|
||||
// threshold - use current re-crop rect.
|
||||
if (options_.has_iou_requirements() &&
|
||||
!IouRequirementsSatisfied(prev_rect, recrop_rect, image_size,
|
||||
options_.iou_requirements().min_iou())) {
|
||||
keep_tracking = false;
|
||||
}
|
||||
|
||||
// If previous rect and current re-crop rect differ more than it is allowed by
|
||||
// the augmentations (used during the model training) - use current re-crop
|
||||
// rect.
|
||||
if (options_.has_rect_requirements() &&
|
||||
!RectRequirementsSatisfied(
|
||||
prev_rect, recrop_rect, image_size,
|
||||
options_.rect_requirements().rotation_degrees(),
|
||||
options_.rect_requirements().translation(),
|
||||
options_.rect_requirements().scale())) {
|
||||
keep_tracking = false;
|
||||
}
|
||||
|
||||
// If landmarks from the previous frame are not in the current re-crop rect
|
||||
// (i.e. object moved too fast and using previous frame rect won't cover
|
||||
// landmarks on the current frame) - use current re-crop rect.
|
||||
if (options_.has_landmarks_requirements() &&
|
||||
!LandmarksRequirementsSatisfied(
|
||||
prev_landmarks, recrop_rect, image_size,
|
||||
options_.landmarks_requirements().recrop_rect_margin())) {
|
||||
keep_tracking = false;
|
||||
}
|
||||
|
||||
// If object didn't move a lot comparing to the previous frame - we'll keep
|
||||
// tracking it and will return rect from the previous frame, otherwise -
|
||||
// return re-crop rect from the current frame.
|
||||
if (keep_tracking) {
|
||||
cc->Outputs()
|
||||
.Tag(kTrackingRectTag)
|
||||
.AddPacket(cc->Inputs().Tag(kPrevLandmarksRectTag).Value());
|
||||
} else {
|
||||
cc->Outputs()
|
||||
.Tag(kTrackingRectTag)
|
||||
.AddPacket(cc->Inputs().Tag(kRecropRectTag).Value());
|
||||
VLOG(1) << "Lost tracking: check messages above for details";
|
||||
}
|
||||
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
} // namespace mediapipe
|
|
@ -0,0 +1,59 @@
|
|||
// Copyright 2020 The MediaPipe Authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
syntax = "proto2";
|
||||
|
||||
package mediapipe;
|
||||
|
||||
import "mediapipe/framework/calculator.proto";
|
||||
|
||||
message RoiTrackingCalculatorOptions {
|
||||
extend CalculatorOptions {
|
||||
optional RoiTrackingCalculatorOptions ext = 329994630;
|
||||
}
|
||||
|
||||
// Verifies that Intersection over Union of previous frame rect and current
|
||||
// frame re-crop rect is less than threshold.
|
||||
message IouRequirements {
|
||||
optional float min_iou = 1 [default = 0.5];
|
||||
}
|
||||
|
||||
// Verifies that current frame re-crop rect rotation/translation/scale didn't
|
||||
// change much comparing to the previous frame rect.
|
||||
message RectRequirements {
|
||||
// Allowed rotation change defined in degrees.
|
||||
optional float rotation_degrees = 1 [default = 10.0];
|
||||
|
||||
// Allowed translation change defined as absolute translation normalized by
|
||||
// re-crop rectangle size.
|
||||
optional float translation = 2 [default = 0.1];
|
||||
|
||||
// Allowed scale change defined as absolute translation normalized by
|
||||
// re-crop rectangle size.
|
||||
optional float scale = 3 [default = 0.1];
|
||||
}
|
||||
|
||||
// Verifies that landmarks from the previous frame are within re-crop
|
||||
// rectangle bounds on the current frame.
|
||||
message LandmarksRequirements {
|
||||
// Margin to apply to re-crop rectangle before checking verifing landmarks.
|
||||
optional float recrop_rect_margin = 1 [default = 0.0];
|
||||
}
|
||||
|
||||
optional IouRequirements iou_requirements = 1;
|
||||
|
||||
optional RectRequirements rect_requirements = 2;
|
||||
|
||||
optional LandmarksRequirements landmarks_requirements = 3;
|
||||
}
|
|
@ -0,0 +1,48 @@
|
|||
# Calculates ROI from detections provided by `face_detection_short_range.tflite`
|
||||
# model.
|
||||
type: "FaceDetectionFrontDetectionsToRoi"
|
||||
|
||||
# Detected faces. (std::vector<Detection>)
|
||||
input_stream: "DETECTIONS:detections"
|
||||
# Image size (width & height). (std::pair<int, int>)
|
||||
input_stream: "IMAGE_SIZE:image_size"
|
||||
|
||||
# Refined (more accurate) ROI to use for face landmarks prediction.
|
||||
# (NormalizedRect)
|
||||
output_stream: "ROI:roi"
|
||||
|
||||
# Converts the face detection into a rectangle (normalized by image size)
|
||||
# that encloses the face and is rotated such that the line connecting right side
|
||||
# of the right eye and left side of the left eye is aligned with the X-axis of
|
||||
# the rectangle.
|
||||
node {
|
||||
calculator: "DetectionsToRectsCalculator"
|
||||
input_stream: "DETECTIONS:detections"
|
||||
input_stream: "IMAGE_SIZE:image_size"
|
||||
output_stream: "NORM_RECT:raw_roi"
|
||||
options: {
|
||||
[mediapipe.DetectionsToRectsCalculatorOptions.ext] {
|
||||
rotation_vector_start_keypoint_index: 0 # Right eye.
|
||||
rotation_vector_end_keypoint_index: 1 # Left eye.
|
||||
rotation_vector_target_angle_degrees: 0
|
||||
conversion_mode: USE_KEYPOINTS
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Expands and shifts the rectangle that contains the face so that it's likely
|
||||
# to cover the entire face.
|
||||
node {
|
||||
calculator: "RectTransformationCalculator"
|
||||
input_stream: "NORM_RECT:raw_roi"
|
||||
input_stream: "IMAGE_SIZE:image_size"
|
||||
output_stream: "roi"
|
||||
options: {
|
||||
[mediapipe.RectTransformationCalculatorOptions.ext] {
|
||||
scale_x: 2.0
|
||||
scale_y: 2.0
|
||||
shift_y: -0.1
|
||||
square_long: true
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,82 @@
|
|||
# Predicts face landmarks within an ROI derived from face-related pose
|
||||
# landmarks.
|
||||
|
||||
type: "FaceLandmarksFromPoseCpu"
|
||||
|
||||
# CPU image. (ImageFrame)
|
||||
input_stream: "IMAGE:input_video"
|
||||
# Face-related pose landmarks. (NormalizedLandmarkList)
|
||||
input_stream: "FACE_LANDMARKS_FROM_POSE:face_landmarks_from_pose"
|
||||
|
||||
# Whether to run the face landmark model with attention on lips and eyes to
|
||||
# provide more accuracy, and additionally output iris landmarks. If unspecified,
|
||||
# functions as set to false. (bool)
|
||||
input_side_packet: "REFINE_LANDMARKS:refine_landmarks"
|
||||
|
||||
# Face landmarks. (NormalizedLandmarkList)
|
||||
output_stream: "FACE_LANDMARKS:face_landmarks"
|
||||
|
||||
# Debug outputs.
|
||||
# Face ROI derived from face-related pose landmarks, which defines the search
|
||||
# region for the face detection model. (NormalizedRect)
|
||||
output_stream: "FACE_ROI_FROM_POSE:face_roi_from_pose"
|
||||
# Refined face crop rectangle predicted by face detection model.
|
||||
# (NormalizedRect)
|
||||
output_stream: "FACE_ROI_FROM_DETECTION:face_roi_from_detection"
|
||||
# Rectangle used to predict face landmarks. (NormalizedRect)
|
||||
output_stream: "FACE_TRACKING_ROI:face_tracking_roi"
|
||||
|
||||
# TODO: do not predict face when most of the face landmarks from
|
||||
# pose are invisible.
|
||||
|
||||
# Extracts image size from the input images.
|
||||
node {
|
||||
calculator: "ImagePropertiesCalculator"
|
||||
input_stream: "IMAGE:input_video"
|
||||
output_stream: "SIZE:image_size"
|
||||
}
|
||||
|
||||
# Gets ROI for re-crop model from face-related pose landmarks.
|
||||
node {
|
||||
calculator: "FaceLandmarksFromPoseToRecropRoi"
|
||||
input_stream: "FACE_LANDMARKS_FROM_POSE:face_landmarks_from_pose"
|
||||
input_stream: "IMAGE_SIZE:image_size"
|
||||
output_stream: "ROI:face_roi_from_pose"
|
||||
}
|
||||
|
||||
# Detects faces within the face ROI calculated from pose landmarks. This is done
|
||||
# to refine face ROI for further landmark detection as ROI calculated from
|
||||
# pose landmarks may be inaccurate.
|
||||
node {
|
||||
calculator: "FaceDetectionShortRangeByRoiCpu"
|
||||
input_stream: "IMAGE:input_video"
|
||||
input_stream: "ROI:face_roi_from_pose"
|
||||
output_stream: "DETECTIONS:face_detections"
|
||||
}
|
||||
|
||||
# Calculates refined face ROI.
|
||||
node {
|
||||
calculator: "FaceDetectionFrontDetectionsToRoi"
|
||||
input_stream: "DETECTIONS:face_detections"
|
||||
input_stream: "IMAGE_SIZE:image_size"
|
||||
output_stream: "ROI:face_roi_from_detection"
|
||||
}
|
||||
|
||||
# Gets face tracking rectangle (either face rectangle from the previous
|
||||
# frame or face re-crop rectangle from the current frame) for face prediction.
|
||||
node {
|
||||
calculator: "FaceTracking"
|
||||
input_stream: "LANDMARKS:face_landmarks"
|
||||
input_stream: "FACE_RECROP_ROI:face_roi_from_detection"
|
||||
input_stream: "IMAGE_SIZE:image_size"
|
||||
output_stream: "FACE_TRACKING_ROI:face_tracking_roi"
|
||||
}
|
||||
|
||||
# Predicts face landmarks from the tracking rectangle.
|
||||
node {
|
||||
calculator: "FaceLandmarkCpu"
|
||||
input_stream: "IMAGE:input_video"
|
||||
input_stream: "ROI:face_tracking_roi"
|
||||
input_side_packet: "WITH_ATTENTION:refine_landmarks"
|
||||
output_stream: "LANDMARKS:face_landmarks"
|
||||
}
|
|
@ -0,0 +1,82 @@
|
|||
# Predicts face landmarks within an ROI derived from face-related pose
|
||||
# landmarks.
|
||||
|
||||
type: "FaceLandmarksFromPoseGpu"
|
||||
|
||||
# GPU image. (ImageFrame)
|
||||
input_stream: "IMAGE:input_video"
|
||||
# Face-related pose landmarks. (NormalizedLandmarkList)
|
||||
input_stream: "FACE_LANDMARKS_FROM_POSE:face_landmarks_from_pose"
|
||||
|
||||
# Whether to run the face landmark model with attention on lips and eyes to
|
||||
# provide more accuracy, and additionally output iris landmarks. If unspecified,
|
||||
# functions as set to false. (bool)
|
||||
input_side_packet: "REFINE_LANDMARKS:refine_landmarks"
|
||||
|
||||
# Face landmarks. (NormalizedLandmarkList)
|
||||
output_stream: "FACE_LANDMARKS:face_landmarks"
|
||||
|
||||
# Debug outputs.
|
||||
# Face ROI derived from face-related pose landmarks, which defines the search
|
||||
# region for the face detection model. (NormalizedRect)
|
||||
output_stream: "FACE_ROI_FROM_POSE:face_roi_from_pose"
|
||||
# Refined face crop rectangle predicted by face detection model.
|
||||
# (NormalizedRect)
|
||||
output_stream: "FACE_ROI_FROM_DETECTION:face_roi_from_detection"
|
||||
# Rectangle used to predict face landmarks. (NormalizedRect)
|
||||
output_stream: "FACE_TRACKING_ROI:face_tracking_roi"
|
||||
|
||||
# TODO: do not predict face when most of the face landmarks from
|
||||
# pose are invisible.
|
||||
|
||||
# Extracts image size from the input images.
|
||||
node {
|
||||
calculator: "ImagePropertiesCalculator"
|
||||
input_stream: "IMAGE_GPU:input_video"
|
||||
output_stream: "SIZE:image_size"
|
||||
}
|
||||
|
||||
# Gets ROI for re-crop model from face-related pose landmarks.
|
||||
node {
|
||||
calculator: "FaceLandmarksFromPoseToRecropRoi"
|
||||
input_stream: "FACE_LANDMARKS_FROM_POSE:face_landmarks_from_pose"
|
||||
input_stream: "IMAGE_SIZE:image_size"
|
||||
output_stream: "ROI:face_roi_from_pose"
|
||||
}
|
||||
|
||||
# Detects faces within the face ROI calculated from pose landmarks. This is done
|
||||
# to refine face ROI for further landmark detection as ROI calculated from
|
||||
# pose landmarks may be inaccurate.
|
||||
node {
|
||||
calculator: "FaceDetectionShortRangeByRoiGpu"
|
||||
input_stream: "IMAGE:input_video"
|
||||
input_stream: "ROI:face_roi_from_pose"
|
||||
output_stream: "DETECTIONS:face_detections"
|
||||
}
|
||||
|
||||
# Calculates refined face ROI.
|
||||
node {
|
||||
calculator: "FaceDetectionFrontDetectionsToRoi"
|
||||
input_stream: "DETECTIONS:face_detections"
|
||||
input_stream: "IMAGE_SIZE:image_size"
|
||||
output_stream: "ROI:face_roi_from_detection"
|
||||
}
|
||||
|
||||
# Gets face tracking rectangle (either face rectangle from the previous
|
||||
# frame or face re-crop rectangle from the current frame) for face prediction.
|
||||
node {
|
||||
calculator: "FaceTracking"
|
||||
input_stream: "LANDMARKS:face_landmarks"
|
||||
input_stream: "FACE_RECROP_ROI:face_roi_from_detection"
|
||||
input_stream: "IMAGE_SIZE:image_size"
|
||||
output_stream: "FACE_TRACKING_ROI:face_tracking_roi"
|
||||
}
|
||||
|
||||
# Predicts face landmarks from the tracking rectangle.
|
||||
node {
|
||||
calculator: "FaceLandmarkGpu"
|
||||
input_stream: "IMAGE:input_video"
|
||||
input_stream: "ROI:face_tracking_roi"
|
||||
input_side_packet: "WITH_ATTENTION:refine_landmarks"
|
||||
output_stream: "LANDMARKS:face_landmarks"
|
||||
}
|
|
@ -0,0 +1,51 @@
|
|||
# Converts face-related pose landmarks to re-crop ROI.
|
||||
|
||||
type: "FaceLandmarksFromPoseToRecropRoi"
|
||||
|
||||
# Face-related pose landmarks (There should be 11 of them).
|
||||
# (NormalizedLandmarkList)
|
||||
input_stream: "FACE_LANDMARKS_FROM_POSE:face_landmarks_from_pose"
|
||||
# Image size (width & height). (std::pair<int, int>)
|
||||
input_stream: "IMAGE_SIZE:image_size"
|
||||
|
||||
# ROI to be used for face detection. (NormalizedRect)
|
||||
output_stream: "ROI:roi"
|
||||
|
||||
# Converts face-related pose landmarks to a detection that tightly encloses all
|
||||
# landmarks.
|
||||
node {
|
||||
calculator: "LandmarksToDetectionCalculator"
|
||||
input_stream: "NORM_LANDMARKS:face_landmarks_from_pose"
|
||||
output_stream: "DETECTION:pose_face_detection"
|
||||
}
|
||||
|
||||
# Converts face detection to a normalized face rectangle.
|
||||
node {
|
||||
calculator: "DetectionsToRectsCalculator"
|
||||
input_stream: "DETECTION:pose_face_detection"
|
||||
input_stream: "IMAGE_SIZE:image_size"
|
||||
output_stream: "NORM_RECT:pose_face_rect"
|
||||
options: {
|
||||
[mediapipe.DetectionsToRectsCalculatorOptions.ext] {
|
||||
rotation_vector_start_keypoint_index: 5 # Right eye.
|
||||
rotation_vector_end_keypoint_index: 2 # Left eye.
|
||||
rotation_vector_target_angle_degrees: 0
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Expands face rectangle so that it becomes big enough for face detector to
|
||||
# localize it accurately.
|
||||
node {
|
||||
calculator: "RectTransformationCalculator"
|
||||
input_stream: "NORM_RECT:pose_face_rect"
|
||||
input_stream: "IMAGE_SIZE:image_size"
|
||||
output_stream: "roi"
|
||||
options: {
|
||||
[mediapipe.RectTransformationCalculatorOptions.ext] {
|
||||
scale_x: 3.0
|
||||
scale_y: 3.0
|
||||
square_long: true
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,53 @@
|
|||
# Converts face landmarks to ROI.
|
||||
|
||||
type: "FaceLandmarksToRoi"
|
||||
|
||||
# Face landmarks. (NormalizedLandmarkList)
|
||||
input_stream: "LANDMARKS:face_landmarks"
|
||||
# Image size (width & height). (std::pair<int, int>)
|
||||
input_stream: "IMAGE_SIZE:image_size"
|
||||
|
||||
# ROI according to landmarks. (NormalizedRect)
|
||||
output_stream: "ROI:roi"
|
||||
|
||||
# Converts face landmarks to a detection that tightly encloses all landmarks.
|
||||
node {
|
||||
calculator: "LandmarksToDetectionCalculator"
|
||||
input_stream: "NORM_LANDMARKS:face_landmarks"
|
||||
output_stream: "DETECTION:face_detection"
|
||||
}
|
||||
|
||||
# Converts the face detection into a rectangle (normalized by image size)
|
||||
# that encloses the face and is rotated such that the line connecting center of
|
||||
# the wrist and MCP of the middle finger is aligned with the Y-axis of the
|
||||
# rectangle.
|
||||
node {
|
||||
calculator: "DetectionsToRectsCalculator"
|
||||
input_stream: "DETECTION:face_detection"
|
||||
input_stream: "IMAGE_SIZE:image_size"
|
||||
output_stream: "NORM_RECT:face_landmarks_rect_tight"
|
||||
options: {
|
||||
[mediapipe.DetectionsToRectsCalculatorOptions.ext] {
|
||||
rotation_vector_start_keypoint_index: 33 # Right side of left eye.
|
||||
rotation_vector_end_keypoint_index: 263 # Left side of right eye.
|
||||
rotation_vector_target_angle_degrees: 0
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Expands the face rectangle so that it's likely to contain the face even with
|
||||
# some motion.
|
||||
node {
|
||||
calculator: "RectTransformationCalculator"
|
||||
input_stream: "NORM_RECT:face_landmarks_rect_tight"
|
||||
input_stream: "IMAGE_SIZE:image_size"
|
||||
output_stream: "roi"
|
||||
options: {
|
||||
[mediapipe.RectTransformationCalculatorOptions.ext] {
|
||||
scale_x: 1.5
|
||||
scale_y: 1.5
|
||||
# TODO: remove `square_long` where appropriat
|
||||
square_long: true
|
||||
}
|
||||
}
|
||||
}
|
61
mediapipe/modules/holistic_landmark/face_tracking.pbtxt
Normal file
61
mediapipe/modules/holistic_landmark/face_tracking.pbtxt
Normal file
|
@ -0,0 +1,61 @@
|
|||
# Decides what ROI to use for face landmarks prediction: either previous frame
|
||||
# landmarks ROI or the current frame face re-crop ROI.
|
||||
|
||||
type: "FaceTracking"
|
||||
|
||||
# Face landmarks from the current frame. They will be memorized for tracking on
|
||||
# the next frame. (NormalizedLandmarkList)
|
||||
input_stream: "LANDMARKS:face_landmarks"
|
||||
# Face re-crop ROI from the current frame. (NormalizedRect)
|
||||
input_stream: "FACE_RECROP_ROI:face_recrop_roi"
|
||||
# Image size (width & height). (std::pair<int, int>)
|
||||
input_stream: "IMAGE_SIZE:image_size"
|
||||
|
||||
# Face tracking ROI. Which is either face landmarks ROI from the previous frame
|
||||
# if face is still tracked, or face re-crop ROI from the current frame
|
||||
# otherwise. (NormalizedRect)
|
||||
output_stream: "FACE_TRACKING_ROI:face_tracking_roi"
|
||||
|
||||
# Keeps track of face landmarks from the previous frame.
|
||||
node {
|
||||
calculator: "PreviousLoopbackCalculator"
|
||||
input_stream: "MAIN:image_size"
|
||||
input_stream: "LOOP:face_landmarks"
|
||||
input_stream_info: {
|
||||
tag_index: "LOOP"
|
||||
back_edge: true
|
||||
}
|
||||
output_stream: "PREV_LOOP:prev_face_landmarks"
|
||||
}
|
||||
|
||||
# Gets hand landarmsk rect.
|
||||
node {
|
||||
calculator: "FaceLandmarksToRoi"
|
||||
input_stream: "LANDMARKS:prev_face_landmarks"
|
||||
input_stream: "IMAGE_SIZE:image_size"
|
||||
output_stream: "ROI:prev_face_landmarks_rect"
|
||||
}
|
||||
|
||||
# Checks that all requirements for tracking are satisfied and use face rectangle
|
||||
# from the previous frame in that case. Otherwise - use face re-crop rectangle
|
||||
# from the current frame.
|
||||
node {
|
||||
calculator: "RoiTrackingCalculator"
|
||||
input_stream: "PREV_LANDMARKS:prev_face_landmarks"
|
||||
input_stream: "PREV_LANDMARKS_RECT:prev_face_landmarks_rect"
|
||||
input_stream: "RECROP_RECT:face_recrop_roi"
|
||||
input_stream: "IMAGE_SIZE:image_size"
|
||||
output_stream: "TRACKING_RECT:face_tracking_roi"
|
||||
options: {
|
||||
[mediapipe.RoiTrackingCalculatorOptions.ext] {
|
||||
rect_requirements: {
|
||||
rotation_degrees: 15.0
|
||||
translation: 0.1
|
||||
scale: 0.3
|
||||
}
|
||||
landmarks_requirements: {
|
||||
recrop_rect_margin: -0.2
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,78 @@
|
|||
# Predicts hand landmarks within a ROI derived from hand-related pose landmarks.
|
||||
|
||||
type: "HandLandmarksFromPoseCpu"
|
||||
|
||||
# CPU image. (ImageFrame)
|
||||
input_stream: "IMAGE:input_video"
|
||||
# Hand-related pose landmarks in [wrist, pinky, index] order.
|
||||
# (NormalizedLandmarkList)
|
||||
input_stream: "HAND_LANDMARKS_FROM_POSE:hand_landmarks_from_pose"
|
||||
|
||||
# Hand landmarks. (NormalizedLandmarkList)
|
||||
output_stream: "HAND_LANDMARKS:hand_landmarks"
|
||||
|
||||
# Debug outputs.
|
||||
# Hand ROI derived from hand-related landmarks, which defines the search region
|
||||
# for the hand re-crop model. (NormalizedRect)
|
||||
output_stream: "HAND_ROI_FROM_POSE:hand_roi_from_pose"
|
||||
# Refined hand crop rectangle predicted by hand re-crop model. (NormalizedRect)
|
||||
output_stream: "HAND_ROI_FROM_RECROP:hand_roi_from_recrop"
|
||||
# Rectangle used to predict hand landmarks. (NormalizedRect)
|
||||
output_stream: "HAND_TRACKING_ROI:hand_tracking_roi"
|
||||
|
||||
# Gets hand visibility.
|
||||
node {
|
||||
calculator: "HandVisibilityFromHandLandmarksFromPose"
|
||||
input_stream: "HAND_LANDMARKS_FROM_POSE:hand_landmarks_from_pose"
|
||||
output_stream: "VISIBILITY:hand_visibility"
|
||||
}
|
||||
|
||||
# Drops hand-related pose landmarks if pose wrist is not visible. It will
|
||||
# prevent from predicting hand landmarks on the current frame.
|
||||
node {
|
||||
calculator: "GateCalculator"
|
||||
input_stream: "hand_landmarks_from_pose"
|
||||
input_stream: "ALLOW:hand_visibility"
|
||||
output_stream: "ensured_hand_landmarks_from_pose"
|
||||
}
|
||||
|
||||
# Extracts image size from the input images.
|
||||
node {
|
||||
calculator: "ImagePropertiesCalculator"
|
||||
input_stream: "IMAGE:input_video"
|
||||
output_stream: "SIZE:image_size"
|
||||
}
|
||||
|
||||
# Gets ROI for re-crop model from hand-related pose landmarks.
|
||||
node {
|
||||
calculator: "HandLandmarksFromPoseToRecropRoi"
|
||||
input_stream: "HAND_LANDMARKS_FROM_POSE:ensured_hand_landmarks_from_pose"
|
||||
input_stream: "IMAGE_SIZE:image_size"
|
||||
output_stream: "ROI:hand_roi_from_pose"
|
||||
}
|
||||
|
||||
# Predicts hand re-crop rectangle on the current frame.
|
||||
node {
|
||||
calculator: "HandRecropByRoiCpu",
|
||||
input_stream: "IMAGE:input_video"
|
||||
input_stream: "ROI:hand_roi_from_pose"
|
||||
output_stream: "HAND_ROI_FROM_RECROP:hand_roi_from_recrop"
|
||||
}
|
||||
|
||||
# Gets hand tracking rectangle (either hand rectangle from the previous
|
||||
# frame or hand re-crop rectangle from the current frame) for hand prediction.
|
||||
node {
|
||||
calculator: "HandTracking"
|
||||
input_stream: "LANDMARKS:hand_landmarks"
|
||||
input_stream: "HAND_ROI_FROM_RECROP:hand_roi_from_recrop"
|
||||
input_stream: "IMAGE_SIZE:image_size"
|
||||
output_stream: "HAND_TRACKING_ROI:hand_tracking_roi"
|
||||
}
|
||||
|
||||
# Predicts hand landmarks from the tracking rectangle.
|
||||
node {
|
||||
calculator: "HandLandmarkCpu"
|
||||
input_stream: "IMAGE:input_video"
|
||||
input_stream: "ROI:hand_tracking_roi"
|
||||
output_stream: "LANDMARKS:hand_landmarks"
|
||||
}
|
|
@ -0,0 +1,78 @@
|
|||
# Predicts hand landmarks within a ROI derived from hand-related pose landmarks.
|
||||
|
||||
type: "HandLandmarksFromPoseGpu"
|
||||
|
||||
# GPU image. (ImageFrame)
|
||||
input_stream: "IMAGE:input_video"
|
||||
# Hand-related pose landmarks in [wrist, pinky, index] order.
|
||||
# (NormalizedLandmarkList)
|
||||
input_stream: "HAND_LANDMARKS_FROM_POSE:hand_landmarks_from_pose"
|
||||
|
||||
# Hand landmarks. (NormalizedLandmarkList)
|
||||
output_stream: "HAND_LANDMARKS:hand_landmarks"
|
||||
|
||||
# Debug outputs.
|
||||
# Hand ROI derived from hand-related landmarks, which defines the search region
|
||||
# for the hand re-crop model. (NormalizedRect)
|
||||
output_stream: "HAND_ROI_FROM_POSE:hand_roi_from_pose"
|
||||
# Refined hand crop rectangle predicted by hand re-crop model. (NormalizedRect)
|
||||
output_stream: "HAND_ROI_FROM_RECROP:hand_roi_from_recrop"
|
||||
# Rectangle used to predict hand landmarks. (NormalizedRect)
|
||||
output_stream: "HAND_TRACKING_ROI:hand_tracking_roi"
|
||||
|
||||
# Gets hand visibility.
|
||||
node {
|
||||
calculator: "HandVisibilityFromHandLandmarksFromPose"
|
||||
input_stream: "HAND_LANDMARKS_FROM_POSE:hand_landmarks_from_pose"
|
||||
output_stream: "VISIBILITY:hand_visibility"
|
||||
}
|
||||
|
||||
# Drops hand-related pose landmarks if pose wrist is not visible. It will
|
||||
# prevent from predicting hand landmarks on the current frame.
|
||||
node {
|
||||
calculator: "GateCalculator"
|
||||
input_stream: "hand_landmarks_from_pose"
|
||||
input_stream: "ALLOW:hand_visibility"
|
||||
output_stream: "ensured_hand_landmarks_from_pose"
|
||||
}
|
||||
|
||||
# Extracts image size from the input images.
|
||||
node {
|
||||
calculator: "ImagePropertiesCalculator"
|
||||
input_stream: "IMAGE_GPU:input_video"
|
||||
output_stream: "SIZE:image_size"
|
||||
}
|
||||
|
||||
# Gets ROI for re-crop model from hand-related pose landmarks.
|
||||
node {
|
||||
calculator: "HandLandmarksFromPoseToRecropRoi"
|
||||
input_stream: "HAND_LANDMARKS_FROM_POSE:ensured_hand_landmarks_from_pose"
|
||||
input_stream: "IMAGE_SIZE:image_size"
|
||||
output_stream: "ROI:hand_roi_from_pose"
|
||||
}
|
||||
|
||||
# Predicts hand re-crop rectangle on the current frame.
|
||||
node {
|
||||
calculator: "HandRecropByRoiGpu",
|
||||
input_stream: "IMAGE:input_video"
|
||||
input_stream: "ROI:hand_roi_from_pose"
|
||||
output_stream: "HAND_ROI_FROM_RECROP:hand_roi_from_recrop"
|
||||
}
|
||||
|
||||
# Gets hand tracking rectangle (either hand rectangle from the previous
|
||||
# frame or hand re-crop rectangle from the current frame) for hand prediction.
|
||||
node {
|
||||
calculator: "HandTracking"
|
||||
input_stream: "LANDMARKS:hand_landmarks"
|
||||
input_stream: "HAND_ROI_FROM_RECROP:hand_roi_from_recrop"
|
||||
input_stream: "IMAGE_SIZE:image_size"
|
||||
output_stream: "HAND_TRACKING_ROI:hand_tracking_roi"
|
||||
}
|
||||
|
||||
# Predicts hand landmarks from the tracking rectangle.
|
||||
node {
|
||||
calculator: "HandLandmarkGpu"
|
||||
input_stream: "IMAGE:input_video"
|
||||
input_stream: "ROI:hand_tracking_roi"
|
||||
output_stream: "LANDMARKS:hand_landmarks"
|
||||
}
|
|
@ -0,0 +1,45 @@
|
|||
# Converts hand-related pose landmarks to hand re-crop ROI.
|
||||
|
||||
type: "HandLandmarksFromPoseToRecropRoi"
|
||||
|
||||
# Hand-related pose landmarks in [wrist, pinky, index] order.
|
||||
# (NormalizedLandmarkList)
|
||||
input_stream: "HAND_LANDMARKS_FROM_POSE:hand_landmarks_from_pose"
|
||||
# Image size (width & height). (std::pair<int, int>)
|
||||
input_stream: "IMAGE_SIZE:image_size"
|
||||
|
||||
# ROI to be used for re-crop prediction. (NormalizedRect)
|
||||
output_stream: "ROI:roi"
|
||||
|
||||
# Converts hand-related pose landmarks to a detection that tightly encloses all
|
||||
# of them.
|
||||
node {
|
||||
calculator: "LandmarksToDetectionCalculator"
|
||||
input_stream: "NORM_LANDMARKS:hand_landmarks_from_pose"
|
||||
output_stream: "DETECTION:hand_detection_from_pose"
|
||||
}
|
||||
|
||||
# Converts hand detection to a normalized hand rectangle.
|
||||
node {
|
||||
calculator: "HandDetectionsFromPoseToRectsCalculator"
|
||||
input_stream: "DETECTION:hand_detection_from_pose"
|
||||
input_stream: "IMAGE_SIZE:image_size"
|
||||
output_stream: "NORM_RECT:hand_roi_from_pose"
|
||||
}
|
||||
|
||||
# Expands the palm rectangle so that it becomes big enough for hand re-crop
|
||||
# model to localize it accurately.
|
||||
node {
|
||||
calculator: "RectTransformationCalculator"
|
||||
input_stream: "NORM_RECT:hand_roi_from_pose"
|
||||
input_stream: "IMAGE_SIZE:image_size"
|
||||
output_stream: "roi"
|
||||
options: {
|
||||
[mediapipe.RectTransformationCalculatorOptions.ext] {
|
||||
scale_x: 2.7
|
||||
scale_y: 2.7
|
||||
shift_y: -0.1
|
||||
square_long: true
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,76 @@
|
|||
# Predicts left and right hand landmarks within corresponding ROIs derived from
|
||||
# hand-related pose landmarks.
|
||||
|
||||
type: "HandLandmarksLeftAndRightCpu"
|
||||
|
||||
# CPU image. (ImageFrame)
|
||||
input_stream: "IMAGE:input_video"
|
||||
# Pose landmarks to derive initial hand location from. (NormalizedLandmarkList)
|
||||
input_stream: "POSE_LANDMARKS:pose_landmarks"
|
||||
|
||||
# Left hand landmarks. (NormalizedLandmarkList)
|
||||
output_stream: "LEFT_HAND_LANDMARKS:left_hand_landmarks"
|
||||
# RIght hand landmarks. (NormalizedLandmarkList)
|
||||
output_stream: "RIGHT_HAND_LANDMARKS:right_hand_landmarks"
|
||||
|
||||
# Debug outputs.
|
||||
output_stream: "LEFT_HAND_ROI_FROM_POSE:left_hand_roi_from_pose"
|
||||
output_stream: "LEFT_HAND_ROI_FROM_RECROP:left_hand_roi_from_recrop"
|
||||
output_stream: "LEFT_HAND_TRACKING_ROI:left_hand_tracking_roi"
|
||||
output_stream: "RIGHT_HAND_ROI_FROM_POSE:right_hand_roi_from_pose"
|
||||
output_stream: "RIGHT_HAND_ROI_FROM_RECROP:right_hand_roi_from_recrop"
|
||||
output_stream: "RIGHT_HAND_TRACKING_ROI:right_hand_tracking_roi"
|
||||
|
||||
# Extracts left-hand-related landmarks from the pose landmarks.
|
||||
node {
|
||||
calculator: "SplitNormalizedLandmarkListCalculator"
|
||||
input_stream: "pose_landmarks"
|
||||
output_stream: "left_hand_landmarks_from_pose"
|
||||
options: {
|
||||
[mediapipe.SplitVectorCalculatorOptions.ext] {
|
||||
ranges: { begin: 15 end: 16 }
|
||||
ranges: { begin: 17 end: 18 }
|
||||
ranges: { begin: 19 end: 20 }
|
||||
combine_outputs: true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Predicts left hand landmarks.
|
||||
node {
|
||||
calculator: "HandLandmarksFromPoseCpu"
|
||||
input_stream: "IMAGE:input_video"
|
||||
input_stream: "HAND_LANDMARKS_FROM_POSE:left_hand_landmarks_from_pose"
|
||||
output_stream: "HAND_LANDMARKS:left_hand_landmarks"
|
||||
# Debug outputs.
|
||||
output_stream: "HAND_ROI_FROM_POSE:left_hand_roi_from_pose"
|
||||
output_stream: "HAND_ROI_FROM_RECROP:left_hand_roi_from_recrop"
|
||||
output_stream: "HAND_TRACKING_ROI:left_hand_tracking_roi"
|
||||
}
|
||||
|
||||
# Extracts right-hand-related landmarks from the pose landmarks.
|
||||
node {
|
||||
calculator: "SplitNormalizedLandmarkListCalculator"
|
||||
input_stream: "pose_landmarks"
|
||||
output_stream: "right_hand_landmarks_from_pose"
|
||||
options: {
|
||||
[mediapipe.SplitVectorCalculatorOptions.ext] {
|
||||
ranges: { begin: 16 end: 17 }
|
||||
ranges: { begin: 18 end: 19 }
|
||||
ranges: { begin: 20 end: 21 }
|
||||
combine_outputs: true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Extracts right-hand-related landmarks from the pose landmarks.
|
||||
node {
|
||||
calculator: "HandLandmarksFromPoseCpu"
|
||||
input_stream: "IMAGE:input_video"
|
||||
input_stream: "HAND_LANDMARKS_FROM_POSE:right_hand_landmarks_from_pose"
|
||||
output_stream: "HAND_LANDMARKS:right_hand_landmarks"
|
||||
# Debug outputs.
|
||||
output_stream: "HAND_ROI_FROM_POSE:right_hand_roi_from_pose"
|
||||
output_stream: "HAND_ROI_FROM_RECROP:right_hand_roi_from_recrop"
|
||||
output_stream: "HAND_TRACKING_ROI:right_hand_tracking_roi"
|
||||
}
|
|
@ -0,0 +1,76 @@
|
|||
# Predicts left and right hand landmarks within corresponding ROIs derived from
|
||||
# hand-related pose landmarks.
|
||||
|
||||
type: "HandLandmarksLeftAndRightGpu"
|
||||
|
||||
# GPU image. (ImageFrame)
|
||||
input_stream: "IMAGE:input_video"
|
||||
# Pose landmarks to derive initial hand location from. (NormalizedLandmarkList)
|
||||
input_stream: "POSE_LANDMARKS:pose_landmarks"
|
||||
|
||||
# Left hand landmarks. (NormalizedLandmarkList)
|
||||
output_stream: "LEFT_HAND_LANDMARKS:left_hand_landmarks"
|
||||
# RIght hand landmarks. (NormalizedLandmarkList)
|
||||
output_stream: "RIGHT_HAND_LANDMARKS:right_hand_landmarks"
|
||||
|
||||
# Debug outputs.
|
||||
output_stream: "LEFT_HAND_ROI_FROM_POSE:left_hand_roi_from_pose"
|
||||
output_stream: "LEFT_HAND_ROI_FROM_RECROP:left_hand_roi_from_recrop"
|
||||
output_stream: "LEFT_HAND_TRACKING_ROI:left_hand_tracking_roi"
|
||||
output_stream: "RIGHT_HAND_ROI_FROM_POSE:right_hand_roi_from_pose"
|
||||
output_stream: "RIGHT_HAND_ROI_FROM_RECROP:right_hand_roi_from_recrop"
|
||||
output_stream: "RIGHT_HAND_TRACKING_ROI:right_hand_tracking_roi"
|
||||
|
||||
# Extracts left-hand-related landmarks from the pose landmarks.
|
||||
node {
|
||||
calculator: "SplitNormalizedLandmarkListCalculator"
|
||||
input_stream: "pose_landmarks"
|
||||
output_stream: "left_hand_landmarks_from_pose"
|
||||
options: {
|
||||
[mediapipe.SplitVectorCalculatorOptions.ext] {
|
||||
ranges: { begin: 15 end: 16 }
|
||||
ranges: { begin: 17 end: 18 }
|
||||
ranges: { begin: 19 end: 20 }
|
||||
combine_outputs: true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Predicts left hand landmarks.
|
||||
node {
|
||||
calculator: "HandLandmarksFromPoseGpu"
|
||||
input_stream: "IMAGE:input_video"
|
||||
input_stream: "HAND_LANDMARKS_FROM_POSE:left_hand_landmarks_from_pose"
|
||||
output_stream: "HAND_LANDMARKS:left_hand_landmarks"
|
||||
# Debug outputs.
|
||||
output_stream: "HAND_ROI_FROM_POSE:left_hand_roi_from_pose"
|
||||
output_stream: "HAND_ROI_FROM_RECROP:left_hand_roi_from_recrop"
|
||||
output_stream: "HAND_TRACKING_ROI:left_hand_tracking_roi"
|
||||
}
|
||||
|
||||
# Extracts right-hand-related landmarks from the pose landmarks.
|
||||
node {
|
||||
calculator: "SplitNormalizedLandmarkListCalculator"
|
||||
input_stream: "pose_landmarks"
|
||||
output_stream: "right_hand_landmarks_from_pose"
|
||||
options: {
|
||||
[mediapipe.SplitVectorCalculatorOptions.ext] {
|
||||
ranges: { begin: 16 end: 17 }
|
||||
ranges: { begin: 18 end: 19 }
|
||||
ranges: { begin: 20 end: 21 }
|
||||
combine_outputs: true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Extracts right-hand-related landmarks from the pose landmarks.
|
||||
node {
|
||||
calculator: "HandLandmarksFromPoseGpu"
|
||||
input_stream: "IMAGE:input_video"
|
||||
input_stream: "HAND_LANDMARKS_FROM_POSE:right_hand_landmarks_from_pose"
|
||||
output_stream: "HAND_LANDMARKS:right_hand_landmarks"
|
||||
# Debug outputs.
|
||||
output_stream: "HAND_ROI_FROM_POSE:right_hand_roi_from_pose"
|
||||
output_stream: "HAND_ROI_FROM_RECROP:right_hand_roi_from_recrop"
|
||||
output_stream: "HAND_TRACKING_ROI:right_hand_tracking_roi"
|
||||
}
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user