diff --git a/mediapipe/examples/desktop/face_detection/BUILD b/mediapipe/examples/desktop/face_detection/BUILD index 8cd75b44e..6d131ac68 100644 --- a/mediapipe/examples/desktop/face_detection/BUILD +++ b/mediapipe/examples/desktop/face_detection/BUILD @@ -24,6 +24,46 @@ cc_binary( ], ) +cc_binary( + name = "face_detection_full_range_cpu_fps", + deps = [ + "//mediapipe/examples/desktop:demo_run_graph_main_fps", + "//mediapipe/graphs/face_detection:face_detection_full_range_desktop_live_deps", + ], +) + +cc_binary( + name = "face_detection_full_range_onnx_cuda", + deps = [ + "//mediapipe/examples/desktop:demo_run_graph_main", + "//mediapipe/graphs/face_detection:face_detection_full_range_desktop_live_onnx_cuda_deps", + ], +) + +cc_binary( + name = "face_detection_full_range_onnx_cuda_fps", + deps = [ + "//mediapipe/examples/desktop:demo_run_graph_main_fps", + "//mediapipe/graphs/face_detection:face_detection_full_range_desktop_live_onnx_cuda_deps", + ], +) + +cc_binary( + name = "face_detection_full_range_onnx_tensorrt", + deps = [ + "//mediapipe/examples/desktop:demo_run_graph_main", + "//mediapipe/graphs/face_detection:face_detection_full_range_desktop_live_onnx_tensorrt_deps", + ], +) + +cc_binary( + name = "face_detection_full_range_onnx_tensorrt_fps", + deps = [ + "//mediapipe/examples/desktop:demo_run_graph_main_fps", + "//mediapipe/graphs/face_detection:face_detection_full_range_desktop_live_onnx_tensorrt_deps", + ], +) + cc_binary( name = "face_detection_cpu", deps = [ @@ -32,6 +72,46 @@ cc_binary( ], ) +cc_binary( + name = "face_detection_cpu_fps", + deps = [ + "//mediapipe/examples/desktop:demo_run_graph_main_fps", + "//mediapipe/graphs/face_detection:desktop_live_calculators", + ], +) + +cc_binary( + name = "face_detection_onnx_cuda", + deps = [ + "//mediapipe/examples/desktop:demo_run_graph_main", + "//mediapipe/graphs/face_detection:desktop_live_onnx_cuda_calculators", + ], +) + +cc_binary( + name = "face_detection_onnx_cuda_fps", + deps = [ + "//mediapipe/examples/desktop:demo_run_graph_main_fps", + "//mediapipe/graphs/face_detection:desktop_live_onnx_cuda_calculators", + ], +) + +cc_binary( + name = "face_detection_onnx_tensorrt", + deps = [ + "//mediapipe/examples/desktop:demo_run_graph_main", + "//mediapipe/graphs/face_detection:desktop_live_onnx_tensorrt_calculators", + ], +) + +cc_binary( + name = "face_detection_onnx_tensorrt_fps", + deps = [ + "//mediapipe/examples/desktop:demo_run_graph_main_fps", + "//mediapipe/graphs/face_detection:desktop_live_onnx_tensorrt_calculators", + ], +) + # Linux only cc_binary( name = "face_detection_gpu", diff --git a/mediapipe/graphs/face_detection/BUILD b/mediapipe/graphs/face_detection/BUILD index 9e7cf2505..81eec6692 100644 --- a/mediapipe/graphs/face_detection/BUILD +++ b/mediapipe/graphs/face_detection/BUILD @@ -43,6 +43,26 @@ cc_library( ], ) +cc_library( + name = "desktop_live_onnx_cuda_calculators", + deps = [ + "//mediapipe/calculators/core:flow_limiter_calculator", + "//mediapipe/calculators/util:annotation_overlay_calculator", + "//mediapipe/calculators/util:detections_to_render_data_calculator", + "//mediapipe/modules/face_detection:face_detection_short_range_onnx_cuda", + ], +) + +cc_library( + name = "desktop_live_onnx_tensorrt_calculators", + deps = [ + "//mediapipe/calculators/core:flow_limiter_calculator", + "//mediapipe/calculators/util:annotation_overlay_calculator", + "//mediapipe/calculators/util:detections_to_render_data_calculator", + "//mediapipe/modules/face_detection:face_detection_short_range_onnx_tensorrt", + ], +) + cc_library( name = "desktop_live_gpu_calculators", deps = [ @@ -93,3 +113,23 @@ cc_library( "//mediapipe/modules/face_detection:face_detection_full_range_cpu", ], ) + +cc_library( + name = "face_detection_full_range_desktop_live_onnx_cuda_deps", + deps = [ + "//mediapipe/calculators/core:flow_limiter_calculator", + "//mediapipe/calculators/util:annotation_overlay_calculator", + "//mediapipe/calculators/util:detections_to_render_data_calculator", + "//mediapipe/modules/face_detection:face_detection_full_range_onnx_cuda", + ], +) + +cc_library( + name = "face_detection_full_range_desktop_live_onnx_tensorrt_deps", + deps = [ + "//mediapipe/calculators/core:flow_limiter_calculator", + "//mediapipe/calculators/util:annotation_overlay_calculator", + "//mediapipe/calculators/util:detections_to_render_data_calculator", + "//mediapipe/modules/face_detection:face_detection_full_range_onnx_tensorrt", + ], +) diff --git a/mediapipe/graphs/face_detection/face_detection_desktop_live_onnx_cuda.pbtxt b/mediapipe/graphs/face_detection/face_detection_desktop_live_onnx_cuda.pbtxt new file mode 100644 index 000000000..367327335 --- /dev/null +++ b/mediapipe/graphs/face_detection/face_detection_desktop_live_onnx_cuda.pbtxt @@ -0,0 +1,58 @@ +# MediaPipe graph that performs face mesh with onnxruntime cuda. + +# CPU buffer. (ImageFrame) +input_stream: "input_video" + +# Output image with rendered results. (ImageFrame) +output_stream: "output_video" +# Detected faces. (std::vector) +output_stream: "face_detections" + +# Throttles the images flowing downstream for flow control. It passes through +# the very first incoming image unaltered, and waits for downstream nodes +# (calculators and subgraphs) in the graph to finish their tasks before it +# passes through another image. All images that come in while waiting are +# dropped, limiting the number of in-flight images in most part of the graph to +# 1. This prevents the downstream nodes from queuing up incoming images and data +# excessively, which leads to increased latency and memory usage, unwanted in +# real-time mobile applications. It also eliminates unnecessarily computation, +# e.g., the output produced by a node may get dropped downstream if the +# subsequent nodes are still busy processing previous inputs. +node { + calculator: "FlowLimiterCalculator" + input_stream: "input_video" + input_stream: "FINISHED:output_video" + input_stream_info: { + tag_index: "FINISHED" + back_edge: true + } + output_stream: "throttled_input_video" +} + +# Subgraph that detects faces. +node { + calculator: "FaceDetectionShortRangeOnnxCUDA" + input_stream: "IMAGE:throttled_input_video" + output_stream: "DETECTIONS:face_detections" +} + +# Converts the detections to drawing primitives for annotation overlay. +node { + calculator: "DetectionsToRenderDataCalculator" + input_stream: "DETECTIONS:face_detections" + output_stream: "RENDER_DATA:render_data" + node_options: { + [type.googleapis.com/mediapipe.DetectionsToRenderDataCalculatorOptions] { + thickness: 4.0 + color { r: 255 g: 0 b: 0 } + } + } +} + +# Draws annotations and overlays them on top of the input images. +node { + calculator: "AnnotationOverlayCalculator" + input_stream: "IMAGE:throttled_input_video" + input_stream: "render_data" + output_stream: "IMAGE:output_video" +} diff --git a/mediapipe/graphs/face_detection/face_detection_desktop_live_onnx_tensorrt.pbtxt b/mediapipe/graphs/face_detection/face_detection_desktop_live_onnx_tensorrt.pbtxt new file mode 100644 index 000000000..d3a7f097f --- /dev/null +++ b/mediapipe/graphs/face_detection/face_detection_desktop_live_onnx_tensorrt.pbtxt @@ -0,0 +1,58 @@ +# MediaPipe graph that performs face mesh with onnxruntime tensorrt. + +# CPU buffer. (ImageFrame) +input_stream: "input_video" + +# Output image with rendered results. (ImageFrame) +output_stream: "output_video" +# Detected faces. (std::vector) +output_stream: "face_detections" + +# Throttles the images flowing downstream for flow control. It passes through +# the very first incoming image unaltered, and waits for downstream nodes +# (calculators and subgraphs) in the graph to finish their tasks before it +# passes through another image. All images that come in while waiting are +# dropped, limiting the number of in-flight images in most part of the graph to +# 1. This prevents the downstream nodes from queuing up incoming images and data +# excessively, which leads to increased latency and memory usage, unwanted in +# real-time mobile applications. It also eliminates unnecessarily computation, +# e.g., the output produced by a node may get dropped downstream if the +# subsequent nodes are still busy processing previous inputs. +node { + calculator: "FlowLimiterCalculator" + input_stream: "input_video" + input_stream: "FINISHED:output_video" + input_stream_info: { + tag_index: "FINISHED" + back_edge: true + } + output_stream: "throttled_input_video" +} + +# Subgraph that detects faces. +node { + calculator: "FaceDetectionShortRangeOnnxTensorRT" + input_stream: "IMAGE:throttled_input_video" + output_stream: "DETECTIONS:face_detections" +} + +# Converts the detections to drawing primitives for annotation overlay. +node { + calculator: "DetectionsToRenderDataCalculator" + input_stream: "DETECTIONS:face_detections" + output_stream: "RENDER_DATA:render_data" + node_options: { + [type.googleapis.com/mediapipe.DetectionsToRenderDataCalculatorOptions] { + thickness: 4.0 + color { r: 255 g: 0 b: 0 } + } + } +} + +# Draws annotations and overlays them on top of the input images. +node { + calculator: "AnnotationOverlayCalculator" + input_stream: "IMAGE:throttled_input_video" + input_stream: "render_data" + output_stream: "IMAGE:output_video" +} diff --git a/mediapipe/graphs/face_detection/face_detection_full_range_desktop_live_onnx_cuda.pbtxt b/mediapipe/graphs/face_detection/face_detection_full_range_desktop_live_onnx_cuda.pbtxt new file mode 100644 index 000000000..d33a772a3 --- /dev/null +++ b/mediapipe/graphs/face_detection/face_detection_full_range_desktop_live_onnx_cuda.pbtxt @@ -0,0 +1,58 @@ +# MediaPipe graph that performs face detection with onnxruntime on cuda. + +# Images on GPU coming into and out of the graph. +input_stream: "input_video" +output_stream: "output_video" + +# Throttles the images flowing downstream for flow control. It passes through +# the very first incoming image unaltered, and waits for +# TfLiteTensorsToDetectionsCalculator downstream in the graph to finish +# generating the corresponding detections before it passes through another +# image. All images that come in while waiting are dropped, limiting the number +# of in-flight images between this calculator and +# TfLiteTensorsToDetectionsCalculator to 1. This prevents the nodes in between +# from queuing up incoming images and data excessively, which leads to increased +# latency and memory usage, unwanted in real-time mobile applications. It also +# eliminates unnecessarily computation, e.g., a transformed image produced by +# ImageTransformationCalculator may get dropped downstream if the subsequent +# TfLiteConverterCalculator or TfLiteInferenceCalculator is still busy +# processing previous inputs. +node { + calculator: "FlowLimiterCalculator" + input_stream: "input_video" + input_stream: "FINISHED:detections" + input_stream_info: { + tag_index: "FINISHED" + back_edge: true + } + output_stream: "throttled_input_video" +} + +# Detects faces. +node { + calculator: "FaceDetectionFullRangeOnnxCUDA" + input_stream: "IMAGE:throttled_input_video" + output_stream: "DETECTIONS:detections" +} + +# Converts the detections to drawing primitives for annotation overlay. +node { + calculator: "DetectionsToRenderDataCalculator" + input_stream: "DETECTIONS:detections" + output_stream: "RENDER_DATA:render_data" + node_options: { + [type.googleapis.com/mediapipe.DetectionsToRenderDataCalculatorOptions] { + thickness: 4.0 + color { r: 255 g: 0 b: 0 } + } + } +} + +# Draws annotations and overlays them on top of the input images. +node { + calculator: "AnnotationOverlayCalculator" + input_stream: "IMAGE:throttled_input_video" + input_stream: "render_data" + output_stream: "IMAGE:output_video" +} + diff --git a/mediapipe/graphs/face_detection/face_detection_full_range_desktop_live_onnx_tensorrt.pbtxt b/mediapipe/graphs/face_detection/face_detection_full_range_desktop_live_onnx_tensorrt.pbtxt new file mode 100644 index 000000000..4db446757 --- /dev/null +++ b/mediapipe/graphs/face_detection/face_detection_full_range_desktop_live_onnx_tensorrt.pbtxt @@ -0,0 +1,58 @@ +# MediaPipe graph that performs face detection with onnxruntime on tensorrt. + +# Images on GPU coming into and out of the graph. +input_stream: "input_video" +output_stream: "output_video" + +# Throttles the images flowing downstream for flow control. It passes through +# the very first incoming image unaltered, and waits for +# TfLiteTensorsToDetectionsCalculator downstream in the graph to finish +# generating the corresponding detections before it passes through another +# image. All images that come in while waiting are dropped, limiting the number +# of in-flight images between this calculator and +# TfLiteTensorsToDetectionsCalculator to 1. This prevents the nodes in between +# from queuing up incoming images and data excessively, which leads to increased +# latency and memory usage, unwanted in real-time mobile applications. It also +# eliminates unnecessarily computation, e.g., a transformed image produced by +# ImageTransformationCalculator may get dropped downstream if the subsequent +# TfLiteConverterCalculator or TfLiteInferenceCalculator is still busy +# processing previous inputs. +node { + calculator: "FlowLimiterCalculator" + input_stream: "input_video" + input_stream: "FINISHED:detections" + input_stream_info: { + tag_index: "FINISHED" + back_edge: true + } + output_stream: "throttled_input_video" +} + +# Detects faces. +node { + calculator: "FaceDetectionFullRangeOnnxTensorRT" + input_stream: "IMAGE:throttled_input_video" + output_stream: "DETECTIONS:detections" +} + +# Converts the detections to drawing primitives for annotation overlay. +node { + calculator: "DetectionsToRenderDataCalculator" + input_stream: "DETECTIONS:detections" + output_stream: "RENDER_DATA:render_data" + node_options: { + [type.googleapis.com/mediapipe.DetectionsToRenderDataCalculatorOptions] { + thickness: 4.0 + color { r: 255 g: 0 b: 0 } + } + } +} + +# Draws annotations and overlays them on top of the input images. +node { + calculator: "AnnotationOverlayCalculator" + input_stream: "IMAGE:throttled_input_video" + input_stream: "render_data" + output_stream: "IMAGE:output_video" +} + diff --git a/mediapipe/modules/face_detection/BUILD b/mediapipe/modules/face_detection/BUILD index 84c9388ea..d6815d5ac 100644 --- a/mediapipe/modules/face_detection/BUILD +++ b/mediapipe/modules/face_detection/BUILD @@ -17,7 +17,7 @@ load( "mediapipe_simple_subgraph", ) load("//mediapipe/framework/port:build_config.bzl", "mediapipe_proto_library") -load("//mediapipe/framework:mediapipe_cc_test.bzl", "mediapipe_cc_test") +load("//mediapipe/framework:mediapipe_cc_test.bzl", "mediapipe_cc_test") #@unused licenses(["notice"]) @@ -35,6 +35,24 @@ mediapipe_simple_subgraph( ], ) +mediapipe_simple_subgraph( + name = "face_detection_short_range_by_roi_onnx_cuda", + graph = "face_detection_short_range_by_roi_onnx_cuda.pbtxt", + register_as = "FaceDetectionShortRangeByRoiOnnxCUDA", + deps = [ + ":face_detection_short_range_onnx_cuda", + ], +) + +mediapipe_simple_subgraph( + name = "face_detection_short_range_by_roi_onnx_tensorrt", + graph = "face_detection_short_range_by_roi_onnx_tensorrt.pbtxt", + register_as = "FaceDetectionShortRangeByRoiOnnxTensorRT", + deps = [ + ":face_detection_short_range_onnx_tensorrt", + ], +) + mediapipe_simple_subgraph( name = "face_detection_short_range_by_roi_gpu", graph = "face_detection_short_range_by_roi_gpu.pbtxt", @@ -74,6 +92,24 @@ mediapipe_simple_subgraph( ], ) +mediapipe_simple_subgraph( + name = "face_detection_short_range_onnx_cuda", + graph = "face_detection_short_range_onnx_cuda.pbtxt", + register_as = "FaceDetectionShortRangeOnnxCUDA", + deps = [ + ":face_detection_onnx_cuda", + ], +) + +mediapipe_simple_subgraph( + name = "face_detection_short_range_onnx_tensorrt", + graph = "face_detection_short_range_onnx_tensorrt.pbtxt", + register_as = "FaceDetectionShortRangeOnnxTensorRT", + deps = [ + ":face_detection_onnx_tensorrt", + ], +) + mediapipe_simple_subgraph( name = "face_detection_full_range", graph = "face_detection_full_range.pbtxt", @@ -83,6 +119,24 @@ mediapipe_simple_subgraph( ], ) +mediapipe_simple_subgraph( + name = "face_detection_full_range_onnx_cuda", + graph = "face_detection_full_range_onnx_cuda.pbtxt", + register_as = "FaceDetectionFullRangeOnnxCUDA", + deps = [ + ":face_detection_onnx_cuda", + ], +) + +mediapipe_simple_subgraph( + name = "face_detection_full_range_onnx_tensorrt", + graph = "face_detection_full_range_onnx_tensorrt.pbtxt", + register_as = "FaceDetectionFullRangeOnnxTensorRT", + deps = [ + ":face_detection_onnx_tensorrt", + ], +) + mediapipe_simple_subgraph( name = "face_detection_without_roi", graph = "face_detection_without_roi.pbtxt", @@ -110,6 +164,42 @@ mediapipe_simple_subgraph( ], ) +mediapipe_simple_subgraph( + name = "face_detection_onnx_cuda", + graph = "face_detection_onnx_cuda.pbtxt", + register_as = "FaceDetectionOnnxCUDA", + deps = [ + ":face_detection_cc_proto", + ":face_detection_options_lib", + "//mediapipe/calculators/core:gate_calculator", + "//mediapipe/calculators/tensor:image_to_tensor_calculator", + "//mediapipe/calculators/tensor:inference_calculator_onnx_cuda", + "//mediapipe/calculators/tensor:tensors_to_detections_calculator", + "//mediapipe/calculators/tflite:ssd_anchors_calculator", + "//mediapipe/calculators/util:detection_projection_calculator", + "//mediapipe/calculators/util:non_max_suppression_calculator", + "//mediapipe/calculators/util:to_image_calculator", + ], +) + +mediapipe_simple_subgraph( + name = "face_detection_onnx_tensorrt", + graph = "face_detection_onnx_tensorrt.pbtxt", + register_as = "FaceDetectionOnnxTensorRT", + deps = [ + ":face_detection_cc_proto", + ":face_detection_options_lib", + "//mediapipe/calculators/core:gate_calculator", + "//mediapipe/calculators/tensor:image_to_tensor_calculator", + "//mediapipe/calculators/tensor:inference_calculator_onnx_tensorrt", + "//mediapipe/calculators/tensor:tensors_to_detections_calculator", + "//mediapipe/calculators/tflite:ssd_anchors_calculator", + "//mediapipe/calculators/util:detection_projection_calculator", + "//mediapipe/calculators/util:non_max_suppression_calculator", + "//mediapipe/calculators/util:to_image_calculator", + ], +) + mediapipe_proto_library( name = "face_detection_proto", srcs = ["face_detection.proto"], @@ -168,8 +258,11 @@ mediapipe_simple_subgraph( exports_files( srcs = [ + "face_detection_full_range.onnx", "face_detection_full_range.tflite", + "face_detection_full_range_sparse.onnx", "face_detection_full_range_sparse.tflite", + "face_detection_short_range.onnx", "face_detection_short_range.tflite", ], ) diff --git a/mediapipe/modules/face_detection/face_detection_full_range.onnx b/mediapipe/modules/face_detection/face_detection_full_range.onnx index ef2d8df17..27c3a29b9 100644 Binary files a/mediapipe/modules/face_detection/face_detection_full_range.onnx and b/mediapipe/modules/face_detection/face_detection_full_range.onnx differ diff --git a/mediapipe/modules/face_detection/face_detection_full_range_onnx_cuda.pbtxt b/mediapipe/modules/face_detection/face_detection_full_range_onnx_cuda.pbtxt new file mode 100644 index 000000000..44c13f661 --- /dev/null +++ b/mediapipe/modules/face_detection/face_detection_full_range_onnx_cuda.pbtxt @@ -0,0 +1,37 @@ +type: "FaceDetectionFullRangeOnnxCUDA" + +input_stream: "IMAGE:image" + +input_stream: "ROI:roi" + +output_stream: "DETECTIONS:detections" + +graph_options: { + [type.googleapis.com/mediapipe.FaceDetectionOptions] {} +} + +node { + calculator: "FaceDetectionOnnxCUDA" + input_stream: "IMAGE:image" + input_stream: "ROI:roi" + output_stream: "DETECTIONS:detections" + node_options: { + [type.googleapis.com/mediapipe.FaceDetectionOptions] { + model_path: "mediapipe/modules/face_detection/face_detection_full_range.onnx" + tensor_width: 192 + tensor_height: 192 + + num_layers: 1 + strides: 4 + interpolated_scale_aspect_ratio: 0.0 + + num_boxes: 2304 + x_scale: 192.0 + y_scale: 192.0 + h_scale: 192.0 + w_scale: 192.0 + min_score_thresh: 0.6 + } + } + option_value: "OPTIONS:options" +} \ No newline at end of file diff --git a/mediapipe/modules/face_detection/face_detection_full_range_onnx_tensorrt.pbtxt b/mediapipe/modules/face_detection/face_detection_full_range_onnx_tensorrt.pbtxt new file mode 100644 index 000000000..24dd21772 --- /dev/null +++ b/mediapipe/modules/face_detection/face_detection_full_range_onnx_tensorrt.pbtxt @@ -0,0 +1,37 @@ +type: "FaceDetectionFullRangeOnnxTensorRT" + +input_stream: "IMAGE:image" + +input_stream: "ROI:roi" + +output_stream: "DETECTIONS:detections" + +graph_options: { + [type.googleapis.com/mediapipe.FaceDetectionOptions] {} +} + +node { + calculator: "FaceDetectionOnnxTensorRT" + input_stream: "IMAGE:image" + input_stream: "ROI:roi" + output_stream: "DETECTIONS:detections" + node_options: { + [type.googleapis.com/mediapipe.FaceDetectionOptions] { + model_path: "mediapipe/modules/face_detection/face_detection_full_range.onnx" + tensor_width: 192 + tensor_height: 192 + + num_layers: 1 + strides: 4 + interpolated_scale_aspect_ratio: 0.0 + + num_boxes: 2304 + x_scale: 192.0 + y_scale: 192.0 + h_scale: 192.0 + w_scale: 192.0 + min_score_thresh: 0.6 + } + } + option_value: "OPTIONS:options" +} diff --git a/mediapipe/modules/face_detection/face_detection_onnx_cuda.pbtxt b/mediapipe/modules/face_detection/face_detection_onnx_cuda.pbtxt new file mode 100644 index 000000000..5d23d3f16 --- /dev/null +++ b/mediapipe/modules/face_detection/face_detection_onnx_cuda.pbtxt @@ -0,0 +1,155 @@ +type: "FaceDetectionOnnxCUDA" + +# The input image, either ImageFrame, GpuBuffer, or (multi-backend) Image. +input_stream: "IMAGE:image" + +# ROI (region of interest) within the given image where faces should be +# detected. (NormalizedRect) +input_stream: "ROI:roi" + +# Detected faces. (std::vector) +# NOTE: there will not be an output packet in the DETECTIONS stream for this +# particular timestamp if none of faces detected. However, the MediaPipe +# framework will internally inform the downstream calculators of the absence of +# this packet so that they don't wait for it unnecessarily. +output_stream: "DETECTIONS:detections" + +graph_options: { + [type.googleapis.com/mediapipe.FaceDetectionOptions] {} +} + +# Converts the input CPU or GPU image to the multi-backend image type (Image). +node: { + calculator: "ToImageCalculator" + input_stream: "IMAGE:image" + output_stream: "IMAGE:multi_backend_image" +} + +# Transforms the input image into a 128x128 tensor while keeping the aspect +# ratio (what is expected by the corresponding face detection model), resulting +# in potential letterboxing in the transformed image. +node: { + calculator: "ImageToTensorCalculator" + input_stream: "IMAGE:multi_backend_image" + input_stream: "NORM_RECT:roi" + output_stream: "TENSORS:input_tensors" + output_stream: "MATRIX:transform_matrix" + options: { + [mediapipe.ImageToTensorCalculatorOptions.ext] { + keep_aspect_ratio: true + output_tensor_float_range { + min: -1.0 + max: 1.0 + } + border_mode: BORDER_ZERO + } + } + option_value: "gpu_origin:options/gpu_origin" + option_value: "output_tensor_width:options/tensor_width" + option_value: "output_tensor_height:options/tensor_height" +} + +# Runs a TensorFlow Lite model on CPU that takes an image tensor and outputs a +# vector of tensors representing, for instance, detection boxes/keypoints and +# scores. +node { + calculator: "InferenceCalculator" + input_stream: "TENSORS:input_tensors" + output_stream: "TENSORS:detection_tensors" + options: { + [mediapipe.InferenceCalculatorOptions.ext] { + delegate { cuda {} } + } + } + option_value: "model_path:options/model_path" +} + +# Detection tensors. (std::vector) +#input_stream: "TENSORS:detection_tensors" + +# A 4x4 row-major-order matrix that maps a point represented in the detection +# tensors to a desired coordinate system, e.g., in the original input image +# before scaling/cropping. (std::array) +#input_stream: "MATRIX:transform_matrix" + +# Detected faces. (std::vector) +# NOTE: there will not be an output packet in the DETECTIONS stream for this +# particular timestamp if none of faces detected. However, the MediaPipe +# framework will internally inform the downstream calculators of the absence of +# this packet so that they don't wait for it unnecessarily. +#output_stream: "DETECTIONS:detections" + +# Generates a single side packet containing a vector of SSD anchors based on +# the specification in the options. +node { + calculator: "SsdAnchorsCalculator" + output_side_packet: "anchors" + options: { + [mediapipe.SsdAnchorsCalculatorOptions.ext] { + num_layers: 1 + min_scale: 0.1484375 + max_scale: 0.75 + anchor_offset_x: 0.5 + anchor_offset_y: 0.5 + aspect_ratios: 1.0 + fixed_anchor_size: true + } + } + option_value: "input_size_width:tensor_width" + option_value: "input_size_height:tensor_height" + option_value: "num_layers:num_layers" + option_value: "strides:strides" + option_value: "interpolated_scale_aspect_ratio:interpolated_scale_aspect_ratio" +} + +# Decodes the detection tensors generated by the TensorFlow Lite model, based on +# the SSD anchors and the specification in the options, into a vector of +# detections. Each detection describes a detected object. +node { + calculator: "TensorsToDetectionsCalculator" + input_stream: "TENSORS:detection_tensors" + input_side_packet: "ANCHORS:anchors" + output_stream: "DETECTIONS:unfiltered_detections" + options: { + [mediapipe.TensorsToDetectionsCalculatorOptions.ext] { + num_classes: 1 + num_coords: 16 + box_coord_offset: 0 + keypoint_coord_offset: 4 + num_keypoints: 6 + num_values_per_keypoint: 2 + sigmoid_score: true + score_clipping_thresh: 100.0 + reverse_output_order: true + } + } + option_value: "num_boxes:num_boxes" + option_value: "x_scale:x_scale" + option_value: "y_scale:y_scale" + option_value: "h_scale:h_scale" + option_value: "w_scale:w_scale" + option_value: "min_score_thresh:min_score_thresh" +} + +# Performs non-max suppression to remove excessive detections. +node { + calculator: "NonMaxSuppressionCalculator" + input_stream: "unfiltered_detections" + output_stream: "filtered_detections" + options: { + [mediapipe.NonMaxSuppressionCalculatorOptions.ext] { + min_suppression_threshold: 0.3 + overlap_type: INTERSECTION_OVER_UNION + algorithm: WEIGHTED + } + } +} + +# Projects the detections from input tensor to the corresponding locations on +# the original image (input to the graph). +node { + calculator: "DetectionProjectionCalculator" + input_stream: "DETECTIONS:filtered_detections" + input_stream: "PROJECTION_MATRIX:transform_matrix" + output_stream: "DETECTIONS:detections" +} diff --git a/mediapipe/modules/face_detection/face_detection_onnx_tensorrt.pbtxt b/mediapipe/modules/face_detection/face_detection_onnx_tensorrt.pbtxt new file mode 100644 index 000000000..321736b5f --- /dev/null +++ b/mediapipe/modules/face_detection/face_detection_onnx_tensorrt.pbtxt @@ -0,0 +1,165 @@ +# MediaPipe graph to detect faces. +# +# EXAMPLE: +# node { +# calculator: "FaceDetectionFrontCpu" +# input_stream: "IMAGE:image" +# input_stream: "ROI:roi" +# output_stream: "DETECTIONS:face_detections" +# } + +type: "FaceDetectionOnnxTensorRT" + +# The input image, either ImageFrame, GpuBuffer, or (multi-backend) Image. +input_stream: "IMAGE:image" + +# ROI (region of interest) within the given image where faces should be +# detected. (NormalizedRect) +input_stream: "ROI:roi" + +# Detected faces. (std::vector) +# NOTE: there will not be an output packet in the DETECTIONS stream for this +# particular timestamp if none of faces detected. However, the MediaPipe +# framework will internally inform the downstream calculators of the absence of +# this packet so that they don't wait for it unnecessarily. +output_stream: "DETECTIONS:detections" + +graph_options: { + [type.googleapis.com/mediapipe.FaceDetectionOptions] {} +} + +# Converts the input CPU or GPU image to the multi-backend image type (Image). +node: { + calculator: "ToImageCalculator" + input_stream: "IMAGE:image" + output_stream: "IMAGE:multi_backend_image" +} + +# Transforms the input image into a 128x128 tensor while keeping the aspect +# ratio (what is expected by the corresponding face detection model), resulting +# in potential letterboxing in the transformed image. +node: { + calculator: "ImageToTensorCalculator" + input_stream: "IMAGE:multi_backend_image" + input_stream: "NORM_RECT:roi" + output_stream: "TENSORS:input_tensors" + output_stream: "MATRIX:transform_matrix" + options: { + [mediapipe.ImageToTensorCalculatorOptions.ext] { + keep_aspect_ratio: true + output_tensor_float_range { + min: -1.0 + max: 1.0 + } + border_mode: BORDER_ZERO + } + } + option_value: "gpu_origin:options/gpu_origin" + option_value: "output_tensor_width:options/tensor_width" + option_value: "output_tensor_height:options/tensor_height" +} + +# Runs a TensorFlow Lite model on CPU that takes an image tensor and outputs a +# vector of tensors representing, for instance, detection boxes/keypoints and +# scores. +node { + calculator: "InferenceCalculator" + input_stream: "TENSORS:input_tensors" + output_stream: "TENSORS:detection_tensors" + options: { + [mediapipe.InferenceCalculatorOptions.ext] { + delegate { tensorrt {} } + } + } + option_value: "model_path:options/model_path" +} + +# Detection tensors. (std::vector) +#input_stream: "TENSORS:detection_tensors" + +# A 4x4 row-major-order matrix that maps a point represented in the detection +# tensors to a desired coordinate system, e.g., in the original input image +# before scaling/cropping. (std::array) +#input_stream: "MATRIX:transform_matrix" + +# Detected faces. (std::vector) +# NOTE: there will not be an output packet in the DETECTIONS stream for this +# particular timestamp if none of faces detected. However, the MediaPipe +# framework will internally inform the downstream calculators of the absence of +# this packet so that they don't wait for it unnecessarily. +#output_stream: "DETECTIONS:detections" + +# Generates a single side packet containing a vector of SSD anchors based on +# the specification in the options. +node { + calculator: "SsdAnchorsCalculator" + output_side_packet: "anchors" + options: { + [mediapipe.SsdAnchorsCalculatorOptions.ext] { + num_layers: 1 + min_scale: 0.1484375 + max_scale: 0.75 + anchor_offset_x: 0.5 + anchor_offset_y: 0.5 + aspect_ratios: 1.0 + fixed_anchor_size: true + } + } + option_value: "input_size_width:tensor_width" + option_value: "input_size_height:tensor_height" + option_value: "num_layers:num_layers" + option_value: "strides:strides" + option_value: "interpolated_scale_aspect_ratio:interpolated_scale_aspect_ratio" +} + +# Decodes the detection tensors generated by the TensorFlow Lite model, based on +# the SSD anchors and the specification in the options, into a vector of +# detections. Each detection describes a detected object. +node { + calculator: "TensorsToDetectionsCalculator" + input_stream: "TENSORS:detection_tensors" + input_side_packet: "ANCHORS:anchors" + output_stream: "DETECTIONS:unfiltered_detections" + options: { + [mediapipe.TensorsToDetectionsCalculatorOptions.ext] { + num_classes: 1 + num_coords: 16 + box_coord_offset: 0 + keypoint_coord_offset: 4 + num_keypoints: 6 + num_values_per_keypoint: 2 + sigmoid_score: true + score_clipping_thresh: 100.0 + reverse_output_order: true + } + } + option_value: "num_boxes:num_boxes" + option_value: "x_scale:x_scale" + option_value: "y_scale:y_scale" + option_value: "h_scale:h_scale" + option_value: "w_scale:w_scale" + option_value: "min_score_thresh:min_score_thresh" +} + +# Performs non-max suppression to remove excessive detections. +node { + calculator: "NonMaxSuppressionCalculator" + input_stream: "unfiltered_detections" + output_stream: "filtered_detections" + options: { + [mediapipe.NonMaxSuppressionCalculatorOptions.ext] { + min_suppression_threshold: 0.3 + overlap_type: INTERSECTION_OVER_UNION + algorithm: WEIGHTED + } + } +} + +# Projects the detections from input tensor to the corresponding locations on +# the original image (input to the graph). +node { + calculator: "DetectionProjectionCalculator" + input_stream: "DETECTIONS:filtered_detections" + input_stream: "PROJECTION_MATRIX:transform_matrix" + output_stream: "DETECTIONS:detections" +} diff --git a/mediapipe/modules/face_detection/face_detection_short_range.onnx b/mediapipe/modules/face_detection/face_detection_short_range.onnx index 638600236..18edac978 100644 Binary files a/mediapipe/modules/face_detection/face_detection_short_range.onnx and b/mediapipe/modules/face_detection/face_detection_short_range.onnx differ diff --git a/mediapipe/modules/face_detection/face_detection_short_range_by_roi_onnx_cuda.pbtxt b/mediapipe/modules/face_detection/face_detection_short_range_by_roi_onnx_cuda.pbtxt new file mode 100644 index 000000000..10dd4774f --- /dev/null +++ b/mediapipe/modules/face_detection/face_detection_short_range_by_roi_onnx_cuda.pbtxt @@ -0,0 +1,40 @@ +type: "FaceDetectionShortRangeByRoiOnnxCUDA" + +input_stream: "IMAGE:image" + +input_stream: "ROI:roi" + +output_stream: "DETECTIONS:detections" + +graph_options: { + [type.googleapis.com/mediapipe.FaceDetectionOptions] {} +} + +node { + calculator: "FaceDetectionOnnxCUDA" + input_stream: "IMAGE:image" + input_stream: "ROI:roi" + output_stream: "DETECTIONS:detections" + node_options: { + [type.googleapis.com/mediapipe.FaceDetectionOptions] { + model_path: "mediapipe/modules/face_detection/face_detection_short_range.onnx" + tensor_width: 128 + tensor_height: 128 + + num_layers: 4 + strides: 8 + strides: 16 + strides: 16 + strides: 16 + interpolated_scale_aspect_ratio: 1.0 + + num_boxes: 896 + x_scale: 128.0 + y_scale: 128.0 + h_scale: 128.0 + w_scale: 128.0 + min_score_thresh: 0.5 + } + } + option_value: "OPTIONS:options" +} diff --git a/mediapipe/modules/face_detection/face_detection_short_range_by_roi_onnx_tensorrt.pbtxt b/mediapipe/modules/face_detection/face_detection_short_range_by_roi_onnx_tensorrt.pbtxt new file mode 100644 index 000000000..9d431912e --- /dev/null +++ b/mediapipe/modules/face_detection/face_detection_short_range_by_roi_onnx_tensorrt.pbtxt @@ -0,0 +1,40 @@ +type: "FaceDetectionShortRangeByRoiOnnxTensorRT" + +input_stream: "IMAGE:image" + +input_stream: "ROI:roi" + +output_stream: "DETECTIONS:detections" + +graph_options: { + [type.googleapis.com/mediapipe.FaceDetectionOptions] {} +} + +node { + calculator: "FaceDetectionOnnxTensorRT" + input_stream: "IMAGE:image" + input_stream: "ROI:roi" + output_stream: "DETECTIONS:detections" + node_options: { + [type.googleapis.com/mediapipe.FaceDetectionOptions] { + model_path: "mediapipe/modules/face_detection/face_detection_short_range.onnx" + tensor_width: 128 + tensor_height: 128 + + num_layers: 4 + strides: 8 + strides: 16 + strides: 16 + strides: 16 + interpolated_scale_aspect_ratio: 1.0 + + num_boxes: 896 + x_scale: 128.0 + y_scale: 128.0 + h_scale: 128.0 + w_scale: 128.0 + min_score_thresh: 0.5 + } + } + option_value: "OPTIONS:options" +} diff --git a/mediapipe/modules/face_detection/face_detection_short_range_onnx_cuda.pbtxt b/mediapipe/modules/face_detection/face_detection_short_range_onnx_cuda.pbtxt new file mode 100644 index 000000000..9d79fb6ac --- /dev/null +++ b/mediapipe/modules/face_detection/face_detection_short_range_onnx_cuda.pbtxt @@ -0,0 +1,40 @@ +type: "FaceDetectionShortRangeOnnxCUDA" + +input_stream: "IMAGE:image" + +input_stream: "ROI:roi" + +output_stream: "DETECTIONS:detections" + +graph_options: { + [type.googleapis.com/mediapipe.FaceDetectionOptions] {} +} + +node { + calculator: "FaceDetectionOnnxCUDA" + input_stream: "IMAGE:image" + input_stream: "ROI:roi" + output_stream: "DETECTIONS:detections" + node_options: { + [type.googleapis.com/mediapipe.FaceDetectionOptions] { + model_path: "mediapipe/modules/face_detection/face_detection_short_range.onnx" + tensor_width: 128 + tensor_height: 128 + + num_layers: 4 + strides: 8 + strides: 16 + strides: 16 + strides: 16 + interpolated_scale_aspect_ratio: 1.0 + + num_boxes: 896 + x_scale: 128.0 + y_scale: 128.0 + h_scale: 128.0 + w_scale: 128.0 + min_score_thresh: 0.5 + } + } + option_value: "OPTIONS:options" +} diff --git a/mediapipe/modules/face_detection/face_detection_short_range_onnx_tensorrt.pbtxt b/mediapipe/modules/face_detection/face_detection_short_range_onnx_tensorrt.pbtxt new file mode 100644 index 000000000..c54dff27d --- /dev/null +++ b/mediapipe/modules/face_detection/face_detection_short_range_onnx_tensorrt.pbtxt @@ -0,0 +1,40 @@ +type: "FaceDetectionShortRangeOnnxTensorRT" + +input_stream: "IMAGE:image" + +input_stream: "ROI:roi" + +output_stream: "DETECTIONS:detections" + +graph_options: { + [type.googleapis.com/mediapipe.FaceDetectionOptions] {} +} + +node { + calculator: "FaceDetectionOnnxTensorRT" + input_stream: "IMAGE:image" + input_stream: "ROI:roi" + output_stream: "DETECTIONS:detections" + node_options: { + [type.googleapis.com/mediapipe.FaceDetectionOptions] { + model_path: "mediapipe/modules/face_detection/face_detection_short_range.onnx" + tensor_width: 128 + tensor_height: 128 + + num_layers: 4 + strides: 8 + strides: 16 + strides: 16 + strides: 16 + interpolated_scale_aspect_ratio: 1.0 + + num_boxes: 896 + x_scale: 128.0 + y_scale: 128.0 + h_scale: 128.0 + w_scale: 128.0 + min_score_thresh: 0.5 + } + } + option_value: "OPTIONS:options" +} diff --git a/mediapipe/modules/face_landmark/BUILD b/mediapipe/modules/face_landmark/BUILD index f155e46d5..331319fcf 100644 --- a/mediapipe/modules/face_landmark/BUILD +++ b/mediapipe/modules/face_landmark/BUILD @@ -42,6 +42,45 @@ mediapipe_simple_subgraph( ], ) +mediapipe_simple_subgraph( + name = "face_landmark_onnx_cuda", + graph = "face_landmark_onnx_cuda.pbtxt", + register_as = "FaceLandmarkOnnxCUDA", + deps = [ + ":tensors_to_face_landmarks", + ":tensors_to_face_landmarks_with_attention", + "//mediapipe/calculators/core:gate_calculator", + "//mediapipe/calculators/core:split_vector_calculator", + "//mediapipe/calculators/tensor:image_to_tensor_calculator", + "//mediapipe/calculators/tensor:inference_calculator_onnx_cuda", + "//mediapipe/calculators/tensor:tensors_to_floats_calculator", + "//mediapipe/calculators/tensor:tensors_to_landmarks_calculator", + "//mediapipe/calculators/util:landmark_projection_calculator", + "//mediapipe/calculators/util:thresholding_calculator", + "//mediapipe/framework/tool:switch_container", + ], +) + +mediapipe_simple_subgraph( + name = "face_landmark_onnx_tensorrt", + graph = "face_landmark_onnx_tensorrt.pbtxt", + register_as = "FaceLandmarkOnnxTensorRT", + deps = [ + ":tensors_to_face_landmarks", + ":tensors_to_face_landmarks_with_attention", + "//mediapipe/calculators/core:gate_calculator", + "//mediapipe/calculators/core:split_vector_calculator", + "//mediapipe/calculators/tensor:image_to_tensor_calculator", + "//mediapipe/calculators/tensor:inference_calculator", + "//mediapipe/calculators/tensor:inference_calculator_onnx_tensorrt", + "//mediapipe/calculators/tensor:tensors_to_floats_calculator", + "//mediapipe/calculators/tensor:tensors_to_landmarks_calculator", + "//mediapipe/calculators/util:landmark_projection_calculator", + "//mediapipe/calculators/util:thresholding_calculator", + "//mediapipe/framework/tool:switch_container", + ], +) + mediapipe_simple_subgraph( name = "face_landmark_gpu", graph = "face_landmark_gpu.pbtxt", @@ -84,6 +123,48 @@ mediapipe_simple_subgraph( ], ) +mediapipe_simple_subgraph( + name = "face_landmark_front_onnx_cuda", + graph = "face_landmark_front_onnx_cuda.pbtxt", + register_as = "FaceLandmarkFrontOnnxCUDA", + deps = [ + ":face_detection_front_detection_to_roi", + ":face_landmark_landmarks_to_roi", + ":face_landmark_onnx_cuda", + "//mediapipe/calculators/core:begin_loop_calculator", + "//mediapipe/calculators/core:clip_vector_size_calculator", + "//mediapipe/calculators/core:constant_side_packet_calculator", + "//mediapipe/calculators/core:end_loop_calculator", + "//mediapipe/calculators/core:gate_calculator", + "//mediapipe/calculators/core:previous_loopback_calculator", + "//mediapipe/calculators/image:image_properties_calculator", + "//mediapipe/calculators/util:association_norm_rect_calculator", + "//mediapipe/calculators/util:collection_has_min_size_calculator", + "//mediapipe/modules/face_detection:face_detection_short_range_onnx_cuda", + ], +) + +mediapipe_simple_subgraph( + name = "face_landmark_front_onnx_tensorrt", + graph = "face_landmark_front_onnx_tensorrt.pbtxt", + register_as = "FaceLandmarkFrontOnnxTensorRT", + deps = [ + ":face_detection_front_detection_to_roi", + ":face_landmark_landmarks_to_roi", + ":face_landmark_onnx_tensorrt", + "//mediapipe/calculators/core:begin_loop_calculator", + "//mediapipe/calculators/core:clip_vector_size_calculator", + "//mediapipe/calculators/core:constant_side_packet_calculator", + "//mediapipe/calculators/core:end_loop_calculator", + "//mediapipe/calculators/core:gate_calculator", + "//mediapipe/calculators/core:previous_loopback_calculator", + "//mediapipe/calculators/image:image_properties_calculator", + "//mediapipe/calculators/util:association_norm_rect_calculator", + "//mediapipe/calculators/util:collection_has_min_size_calculator", + "//mediapipe/modules/face_detection:face_detection_short_range_onnx_tensorrt", + ], +) + mediapipe_simple_subgraph( name = "face_landmark_front_gpu", graph = "face_landmark_front_gpu.pbtxt", diff --git a/mediapipe/modules/face_landmark/face_landmark_front_onnx_cuda.pbtxt b/mediapipe/modules/face_landmark/face_landmark_front_onnx_cuda.pbtxt new file mode 100644 index 000000000..fa1283b14 --- /dev/null +++ b/mediapipe/modules/face_landmark/face_landmark_front_onnx_cuda.pbtxt @@ -0,0 +1,247 @@ +# MediaPipe graph to detect/predict face landmarks. (CPU input, and inference is +# executed with onnxruntime on cuda.) This graph tries to skip face detection as much as possible +# by using previously detected/predicted landmarks for new images. +# +# It is required that "face_detection_short_range.onnxruntime" is available at +# "mediapipe/modules/face_detection/face_detection_short_range.onnxruntime" +# path during execution. +# +# It is required that "face_landmark.onnxruntime" is available at +# "mediapipe/modules/face_landmark/face_landmark.onnxruntime" +# path during execution if `with_attention` is not set or set to `false`. +# +# It is required that "face_landmark_with_attention.onnxruntime" is available at +# "mediapipe/modules/face_landmark/face_landmark_with_attention.onnxruntime" +# path during execution if `with_attention` is set to `true`. +# +# EXAMPLE: +# node { +# calculator: "FaceLandmarkFrontOnnxCUDA" +# input_stream: "IMAGE:image" +# input_side_packet: "NUM_FACES:num_faces" +# input_side_packet: "USE_PREV_LANDMARKS:use_prev_landmarks" +# input_side_packet: "WITH_ATTENTION:with_attention" +# output_stream: "LANDMARKS:multi_face_landmarks" +# } + +type: "FaceLandmarkFrontOnnxCUDA" + +# CPU image. (ImageFrame) +input_stream: "IMAGE:image" + +# Max number of faces to detect/track. (int) +input_side_packet: "NUM_FACES:num_faces" + +# Whether landmarks on the previous image should be used to help localize +# landmarks on the current image. (bool) +input_side_packet: "USE_PREV_LANDMARKS:use_prev_landmarks" + +# Whether to run face mesh model with attention on lips and eyes. (bool) +# Attention provides more accuracy on lips and eye regions as well as iris +# landmarks. +input_side_packet: "WITH_ATTENTION:with_attention" + +# Collection of detected/predicted faces, each represented as a list of 468 face +# landmarks. (std::vector) +# NOTE: there will not be an output packet in the LANDMARKS stream for this +# particular timestamp if none of faces detected. However, the MediaPipe +# framework will internally inform the downstream calculators of the absence of +# this packet so that they don't wait for it unnecessarily. +output_stream: "LANDMARKS:multi_face_landmarks" + +# Extra outputs (for debugging, for instance). +# Detected faces. (std::vector) +output_stream: "DETECTIONS:face_detections" +# Regions of interest calculated based on landmarks. +# (std::vector) +output_stream: "ROIS_FROM_LANDMARKS:face_rects_from_landmarks" +# Regions of interest calculated based on face detections. +# (std::vector) +output_stream: "ROIS_FROM_DETECTIONS:face_rects_from_detections" + +# When the optional input side packet "use_prev_landmarks" is either absent or +# set to true, uses the landmarks on the previous image to help localize +# landmarks on the current image. +node { + calculator: "GateCalculator" + input_side_packet: "ALLOW:use_prev_landmarks" + input_stream: "prev_face_rects_from_landmarks" + output_stream: "gated_prev_face_rects_from_landmarks" + options: { + [mediapipe.GateCalculatorOptions.ext] { + allow: true + } + } +} + +# Determines if an input vector of NormalizedRect has a size greater than or +# equal to the provided num_faces. +node { + calculator: "NormalizedRectVectorHasMinSizeCalculator" + input_stream: "ITERABLE:gated_prev_face_rects_from_landmarks" + input_side_packet: "num_faces" + output_stream: "prev_has_enough_faces" +} + +# Drops the incoming image if enough faces have already been identified from the +# previous image. Otherwise, passes the incoming image through to trigger a new +# round of face detection. +node { + calculator: "GateCalculator" + input_stream: "image" + input_stream: "DISALLOW:prev_has_enough_faces" + output_stream: "gated_image" + options: { + [mediapipe.GateCalculatorOptions.ext] { + empty_packets_as_allow: true + } + } +} + +# Detects faces. +node { + calculator: "FaceDetectionShortRangeOnnxCUDA" + input_stream: "IMAGE:gated_image" + output_stream: "DETECTIONS:all_face_detections" +} + +# Makes sure there are no more detections than the provided num_faces. +node { + calculator: "ClipDetectionVectorSizeCalculator" + input_stream: "all_face_detections" + output_stream: "face_detections" + input_side_packet: "num_faces" +} + +# Calculate size of the image. +node { + calculator: "ImagePropertiesCalculator" + input_stream: "IMAGE:gated_image" + output_stream: "SIZE:gated_image_size" +} + +# Outputs each element of face_detections at a fake timestamp for the rest of +# the graph to process. Clones the image size packet for each face_detection at +# the fake timestamp. At the end of the loop, outputs the BATCH_END timestamp +# for downstream calculators to inform them that all elements in the vector have +# been processed. +node { + calculator: "BeginLoopDetectionCalculator" + input_stream: "ITERABLE:face_detections" + input_stream: "CLONE:gated_image_size" + output_stream: "ITEM:face_detection" + output_stream: "CLONE:detections_loop_image_size" + output_stream: "BATCH_END:detections_loop_end_timestamp" +} + +# Calculates region of interest based on face detections, so that can be used +# to detect landmarks. +node { + calculator: "FaceDetectionFrontDetectionToRoi" + input_stream: "DETECTION:face_detection" + input_stream: "IMAGE_SIZE:detections_loop_image_size" + output_stream: "ROI:face_rect_from_detection" +} + +# Collects a NormalizedRect for each face into a vector. Upon receiving the +# BATCH_END timestamp, outputs the vector of NormalizedRect at the BATCH_END +# timestamp. +node { + calculator: "EndLoopNormalizedRectCalculator" + input_stream: "ITEM:face_rect_from_detection" + input_stream: "BATCH_END:detections_loop_end_timestamp" + output_stream: "ITERABLE:face_rects_from_detections" +} + +# Performs association between NormalizedRect vector elements from previous +# image and rects based on face detections from the current image. This +# calculator ensures that the output face_rects vector doesn't contain +# overlapping regions based on the specified min_similarity_threshold. +node { + calculator: "AssociationNormRectCalculator" + input_stream: "face_rects_from_detections" + input_stream: "gated_prev_face_rects_from_landmarks" + output_stream: "face_rects" + options: { + [mediapipe.AssociationCalculatorOptions.ext] { + min_similarity_threshold: 0.5 + } + } +} + +# Calculate size of the image. +node { + calculator: "ImagePropertiesCalculator" + input_stream: "IMAGE:image" + output_stream: "SIZE:image_size" +} + +# Outputs each element of face_rects at a fake timestamp for the rest of the +# graph to process. Clones image and image size packets for each +# single_face_rect at the fake timestamp. At the end of the loop, outputs the +# BATCH_END timestamp for downstream calculators to inform them that all +# elements in the vector have been processed. +node { + calculator: "BeginLoopNormalizedRectCalculator" + input_stream: "ITERABLE:face_rects" + input_stream: "CLONE:0:image" + input_stream: "CLONE:1:image_size" + output_stream: "ITEM:face_rect" + output_stream: "CLONE:0:landmarks_loop_image" + output_stream: "CLONE:1:landmarks_loop_image_size" + output_stream: "BATCH_END:landmarks_loop_end_timestamp" +} + +# Detects face landmarks within specified region of interest of the image. +node { + calculator: "FaceLandmarkOnnxCUDA" + input_stream: "IMAGE:landmarks_loop_image" + input_stream: "ROI:face_rect" + input_side_packet: "WITH_ATTENTION:with_attention" + output_stream: "LANDMARKS:face_landmarks" +} + +# Calculates region of interest based on face landmarks, so that can be reused +# for subsequent image. +node { + calculator: "FaceLandmarkLandmarksToRoi" + input_stream: "LANDMARKS:face_landmarks" + input_stream: "IMAGE_SIZE:landmarks_loop_image_size" + output_stream: "ROI:face_rect_from_landmarks" +} + +# Collects a set of landmarks for each face into a vector. Upon receiving the +# BATCH_END timestamp, outputs the vector of landmarks at the BATCH_END +# timestamp. +node { + calculator: "EndLoopNormalizedLandmarkListVectorCalculator" + input_stream: "ITEM:face_landmarks" + input_stream: "BATCH_END:landmarks_loop_end_timestamp" + output_stream: "ITERABLE:multi_face_landmarks" +} + +# Collects a NormalizedRect for each face into a vector. Upon receiving the +# BATCH_END timestamp, outputs the vector of NormalizedRect at the BATCH_END +# timestamp. +node { + calculator: "EndLoopNormalizedRectCalculator" + input_stream: "ITEM:face_rect_from_landmarks" + input_stream: "BATCH_END:landmarks_loop_end_timestamp" + output_stream: "ITERABLE:face_rects_from_landmarks" +} + +# Caches face rects calculated from landmarks, and upon the arrival of the next +# input image, sends out the cached rects with timestamps replaced by that of +# the input image, essentially generating a packet that carries the previous +# face rects. Note that upon the arrival of the very first input image, a +# timestamp bound update occurs to jump start the feedback loop. +node { + calculator: "PreviousLoopbackCalculator" + input_stream: "MAIN:image" + input_stream: "LOOP:face_rects_from_landmarks" + input_stream_info: { + tag_index: "LOOP" + back_edge: true + } + output_stream: "PREV_LOOP:prev_face_rects_from_landmarks" +} diff --git a/mediapipe/modules/face_landmark/face_landmark_front_onnx_tensorrt.pbtxt b/mediapipe/modules/face_landmark/face_landmark_front_onnx_tensorrt.pbtxt new file mode 100644 index 000000000..fca5f7105 --- /dev/null +++ b/mediapipe/modules/face_landmark/face_landmark_front_onnx_tensorrt.pbtxt @@ -0,0 +1,247 @@ +# MediaPipe graph to detect/predict face landmarks. (CPU input, and inference is +# executed with onnxruntime on tensorrt.) This graph tries to skip face detection as much as possible +# by using previously detected/predicted landmarks for new images. +# +# It is required that "face_detection_short_range.onnxruntime" is available at +# "mediapipe/modules/face_detection/face_detection_short_range.onnxruntime" +# path during execution. +# +# It is required that "face_landmark.onnxruntime" is available at +# "mediapipe/modules/face_landmark/face_landmark.onnxruntime" +# path during execution if `with_attention` is not set or set to `false`. +# +# It is required that "face_landmark_with_attention.onnxruntime" is available at +# "mediapipe/modules/face_landmark/face_landmark_with_attention.onnxruntime" +# path during execution if `with_attention` is set to `true`. +# +# EXAMPLE: +# node { +# calculator: "FaceLandmarkFrontTensorRT" +# input_stream: "IMAGE:image" +# input_side_packet: "NUM_FACES:num_faces" +# input_side_packet: "USE_PREV_LANDMARKS:use_prev_landmarks" +# input_side_packet: "WITH_ATTENTION:with_attention" +# output_stream: "LANDMARKS:multi_face_landmarks" +# } + +type: "FaceLandmarkFrontTensorRT" + +# CPU image. (ImageFrame) +input_stream: "IMAGE:image" + +# Max number of faces to detect/track. (int) +input_side_packet: "NUM_FACES:num_faces" + +# Whether landmarks on the previous image should be used to help localize +# landmarks on the current image. (bool) +input_side_packet: "USE_PREV_LANDMARKS:use_prev_landmarks" + +# Whether to run face mesh model with attention on lips and eyes. (bool) +# Attention provides more accuracy on lips and eye regions as well as iris +# landmarks. +input_side_packet: "WITH_ATTENTION:with_attention" + +# Collection of detected/predicted faces, each represented as a list of 468 face +# landmarks. (std::vector) +# NOTE: there will not be an output packet in the LANDMARKS stream for this +# particular timestamp if none of faces detected. However, the MediaPipe +# framework will internally inform the downstream calculators of the absence of +# this packet so that they don't wait for it unnecessarily. +output_stream: "LANDMARKS:multi_face_landmarks" + +# Extra outputs (for debugging, for instance). +# Detected faces. (std::vector) +output_stream: "DETECTIONS:face_detections" +# Regions of interest calculated based on landmarks. +# (std::vector) +output_stream: "ROIS_FROM_LANDMARKS:face_rects_from_landmarks" +# Regions of interest calculated based on face detections. +# (std::vector) +output_stream: "ROIS_FROM_DETECTIONS:face_rects_from_detections" + +# When the optional input side packet "use_prev_landmarks" is either absent or +# set to true, uses the landmarks on the previous image to help localize +# landmarks on the current image. +node { + calculator: "GateCalculator" + input_side_packet: "ALLOW:use_prev_landmarks" + input_stream: "prev_face_rects_from_landmarks" + output_stream: "gated_prev_face_rects_from_landmarks" + options: { + [mediapipe.GateCalculatorOptions.ext] { + allow: true + } + } +} + +# Determines if an input vector of NormalizedRect has a size greater than or +# equal to the provided num_faces. +node { + calculator: "NormalizedRectVectorHasMinSizeCalculator" + input_stream: "ITERABLE:gated_prev_face_rects_from_landmarks" + input_side_packet: "num_faces" + output_stream: "prev_has_enough_faces" +} + +# Drops the incoming image if enough faces have already been identified from the +# previous image. Otherwise, passes the incoming image through to trigger a new +# round of face detection. +node { + calculator: "GateCalculator" + input_stream: "image" + input_stream: "DISALLOW:prev_has_enough_faces" + output_stream: "gated_image" + options: { + [mediapipe.GateCalculatorOptions.ext] { + empty_packets_as_allow: true + } + } +} + +# Detects faces. +node { + calculator: "FaceDetectionShortRangeOnnxTensorRT" + input_stream: "IMAGE:gated_image" + output_stream: "DETECTIONS:all_face_detections" +} + +# Makes sure there are no more detections than the provided num_faces. +node { + calculator: "ClipDetectionVectorSizeCalculator" + input_stream: "all_face_detections" + output_stream: "face_detections" + input_side_packet: "num_faces" +} + +# Calculate size of the image. +node { + calculator: "ImagePropertiesCalculator" + input_stream: "IMAGE:gated_image" + output_stream: "SIZE:gated_image_size" +} + +# Outputs each element of face_detections at a fake timestamp for the rest of +# the graph to process. Clones the image size packet for each face_detection at +# the fake timestamp. At the end of the loop, outputs the BATCH_END timestamp +# for downstream calculators to inform them that all elements in the vector have +# been processed. +node { + calculator: "BeginLoopDetectionCalculator" + input_stream: "ITERABLE:face_detections" + input_stream: "CLONE:gated_image_size" + output_stream: "ITEM:face_detection" + output_stream: "CLONE:detections_loop_image_size" + output_stream: "BATCH_END:detections_loop_end_timestamp" +} + +# Calculates region of interest based on face detections, so that can be used +# to detect landmarks. +node { + calculator: "FaceDetectionFrontDetectionToRoi" + input_stream: "DETECTION:face_detection" + input_stream: "IMAGE_SIZE:detections_loop_image_size" + output_stream: "ROI:face_rect_from_detection" +} + +# Collects a NormalizedRect for each face into a vector. Upon receiving the +# BATCH_END timestamp, outputs the vector of NormalizedRect at the BATCH_END +# timestamp. +node { + calculator: "EndLoopNormalizedRectCalculator" + input_stream: "ITEM:face_rect_from_detection" + input_stream: "BATCH_END:detections_loop_end_timestamp" + output_stream: "ITERABLE:face_rects_from_detections" +} + +# Performs association between NormalizedRect vector elements from previous +# image and rects based on face detections from the current image. This +# calculator ensures that the output face_rects vector doesn't contain +# overlapping regions based on the specified min_similarity_threshold. +node { + calculator: "AssociationNormRectCalculator" + input_stream: "face_rects_from_detections" + input_stream: "gated_prev_face_rects_from_landmarks" + output_stream: "face_rects" + options: { + [mediapipe.AssociationCalculatorOptions.ext] { + min_similarity_threshold: 0.5 + } + } +} + +# Calculate size of the image. +node { + calculator: "ImagePropertiesCalculator" + input_stream: "IMAGE:image" + output_stream: "SIZE:image_size" +} + +# Outputs each element of face_rects at a fake timestamp for the rest of the +# graph to process. Clones image and image size packets for each +# single_face_rect at the fake timestamp. At the end of the loop, outputs the +# BATCH_END timestamp for downstream calculators to inform them that all +# elements in the vector have been processed. +node { + calculator: "BeginLoopNormalizedRectCalculator" + input_stream: "ITERABLE:face_rects" + input_stream: "CLONE:0:image" + input_stream: "CLONE:1:image_size" + output_stream: "ITEM:face_rect" + output_stream: "CLONE:0:landmarks_loop_image" + output_stream: "CLONE:1:landmarks_loop_image_size" + output_stream: "BATCH_END:landmarks_loop_end_timestamp" +} + +# Detects face landmarks within specified region of interest of the image. +node { + calculator: "FaceLandmarkOnnxTensorRT" + input_stream: "IMAGE:landmarks_loop_image" + input_stream: "ROI:face_rect" + input_side_packet: "WITH_ATTENTION:with_attention" + output_stream: "LANDMARKS:face_landmarks" +} + +# Calculates region of interest based on face landmarks, so that can be reused +# for subsequent image. +node { + calculator: "FaceLandmarkLandmarksToRoi" + input_stream: "LANDMARKS:face_landmarks" + input_stream: "IMAGE_SIZE:landmarks_loop_image_size" + output_stream: "ROI:face_rect_from_landmarks" +} + +# Collects a set of landmarks for each face into a vector. Upon receiving the +# BATCH_END timestamp, outputs the vector of landmarks at the BATCH_END +# timestamp. +node { + calculator: "EndLoopNormalizedLandmarkListVectorCalculator" + input_stream: "ITEM:face_landmarks" + input_stream: "BATCH_END:landmarks_loop_end_timestamp" + output_stream: "ITERABLE:multi_face_landmarks" +} + +# Collects a NormalizedRect for each face into a vector. Upon receiving the +# BATCH_END timestamp, outputs the vector of NormalizedRect at the BATCH_END +# timestamp. +node { + calculator: "EndLoopNormalizedRectCalculator" + input_stream: "ITEM:face_rect_from_landmarks" + input_stream: "BATCH_END:landmarks_loop_end_timestamp" + output_stream: "ITERABLE:face_rects_from_landmarks" +} + +# Caches face rects calculated from landmarks, and upon the arrival of the next +# input image, sends out the cached rects with timestamps replaced by that of +# the input image, essentially generating a packet that carries the previous +# face rects. Note that upon the arrival of the very first input image, a +# timestamp bound update occurs to jump start the feedback loop. +node { + calculator: "PreviousLoopbackCalculator" + input_stream: "MAIN:image" + input_stream: "LOOP:face_rects_from_landmarks" + input_stream_info: { + tag_index: "LOOP" + back_edge: true + } + output_stream: "PREV_LOOP:prev_face_rects_from_landmarks" +} diff --git a/mediapipe/modules/face_landmark/face_landmark_onnx_cuda.pbtxt b/mediapipe/modules/face_landmark/face_landmark_onnx_cuda.pbtxt new file mode 100644 index 000000000..a006ea15f --- /dev/null +++ b/mediapipe/modules/face_landmark/face_landmark_onnx_cuda.pbtxt @@ -0,0 +1,166 @@ +# MediaPipe graph to detect/predict face landmarks. (CPU input, and inference is +# executed with onnxruntime on cuda.) +# +# It is required that "face_landmark.onnx" is available at +# "mediapipe/modules/face_landmark/face_landmark.onnx" +# path during execution if `with_attention` is not set or set to `false`. +# +# It is required that "face_landmark_with_attention.onnx" is available at +# "mediapipe/modules/face_landmark/face_landmark_with_attention.onnx" +# path during execution if `with_attention` is set to `true`. +# +# EXAMPLE: +# node { +# calculator: "FaceLandmarkOnnxCUDA" +# input_stream: "IMAGE:image" +# input_stream: "ROI:face_roi" +# input_side_packet: "WITH_ATTENTION:with_attention" +# output_stream: "LANDMARKS:face_landmarks" +# } + +type: "FaceLandmarkOnnxCUDA" + +# CPU image. (ImageFrame) +input_stream: "IMAGE:image" +# ROI (region of interest) within the given image where a face is located. +# (NormalizedRect) +input_stream: "ROI:roi" +# Whether to run face mesh model with attention on lips and eyes. (bool) +# Attention provides more accuracy on lips and eye regions as well as iris +# landmarks. +input_side_packet: "WITH_ATTENTION:with_attention" + +# 468 or 478 facial landmarks within the given ROI. (NormalizedLandmarkList) +# +# Number of landmarks depends on the WITH_ATTENTION flag. If it's `true` - then +# there will be 478 landmarks with refined lips, eyes and irises (10 extra +# landmarks are for irises), otherwise 468 non-refined landmarks are returned. +# +# NOTE: if a face is not present within the given ROI, for this particular +# timestamp there will not be an output packet in the LANDMARKS stream. However, +# the MediaPipe framework will internally inform the downstream calculators of +# the absence of this packet so that they don't wait for it unnecessarily. +output_stream: "LANDMARKS:face_landmarks" + +# Transforms the input image into a 192x192 tensor. +node: { + calculator: "ImageToTensorCalculator" + input_stream: "IMAGE:image" + input_stream: "NORM_RECT:roi" + output_stream: "TENSORS:input_tensors" + options: { + [mediapipe.ImageToTensorCalculatorOptions.ext] { + output_tensor_width: 192 + output_tensor_height: 192 + output_tensor_float_range { + min: 0.0 + max: 1.0 + } + } + } +} + +node { + calculator: "InferenceCalculator" + input_stream: "TENSORS:input_tensors" + output_stream: "TENSORS:output_tensors" + options: { + [mediapipe.InferenceCalculatorOptions.ext] { + delegate { cuda {} } + model_path: "mediapipe/modules/face_landmark/face_landmark.onnx" + } + } +} + +# Splits a vector of tensors into landmark tensors and face flag tensor. +node { + calculator: "SwitchContainer" + input_side_packet: "ENABLE:with_attention" + input_stream: "output_tensors" + output_stream: "landmark_tensors" + output_stream: "face_flag_tensor" + options: { + [mediapipe.SwitchContainerOptions.ext] { + contained_node: { + calculator: "SplitTensorVectorCalculator" + options: { + [mediapipe.SplitVectorCalculatorOptions.ext] { + ranges: { begin: 0 end: 1 } + ranges: { begin: 1 end: 2 } + } + } + } + contained_node: { + calculator: "SplitTensorVectorCalculator" + options: { + [mediapipe.SplitVectorCalculatorOptions.ext] { + ranges: { begin: 0 end: 6 } + ranges: { begin: 6 end: 7 } + } + } + } + } + } +} + +# Converts the face-flag tensor into a float that represents the confidence +# score of face presence. +node { + calculator: "TensorsToFloatsCalculator" + input_stream: "TENSORS:face_flag_tensor" + output_stream: "FLOAT:face_presence_score" + options { + [mediapipe.TensorsToFloatsCalculatorOptions.ext] { + activation: SIGMOID + } + } +} + +# Applies a threshold to the confidence score to determine whether a face is +# present. +node { + calculator: "ThresholdingCalculator" + input_stream: "FLOAT:face_presence_score" + output_stream: "FLAG:face_presence" + options: { + [mediapipe.ThresholdingCalculatorOptions.ext] { + threshold: 0.5 + } + } +} + +# Drop landmarks tensors if face is not present. +node { + calculator: "GateCalculator" + input_stream: "landmark_tensors" + input_stream: "ALLOW:face_presence" + output_stream: "ensured_landmark_tensors" +} + +# Decodes the landmark tensors into a vector of landmarks, where the landmark +# coordinates are normalized by the size of the input image to the model. +node { + calculator: "SwitchContainer" + input_side_packet: "ENABLE:with_attention" + input_stream: "TENSORS:ensured_landmark_tensors" + output_stream: "LANDMARKS:landmarks" + options: { + [mediapipe.SwitchContainerOptions.ext] { + contained_node: { + calculator: "TensorsToFaceLandmarks" + } + contained_node: { + calculator: "TensorsToFaceLandmarksWithAttention" + } + } + } +} + +# Projects the landmarks from the cropped face image to the corresponding +# locations on the full image before cropping (input to the graph). +node { + calculator: "LandmarkProjectionCalculator" + input_stream: "NORM_LANDMARKS:landmarks" + input_stream: "NORM_RECT:roi" + output_stream: "NORM_LANDMARKS:face_landmarks" +} diff --git a/mediapipe/modules/face_landmark/face_landmark_onnx_tensorrt.pbtxt b/mediapipe/modules/face_landmark/face_landmark_onnx_tensorrt.pbtxt new file mode 100644 index 000000000..37af474c8 --- /dev/null +++ b/mediapipe/modules/face_landmark/face_landmark_onnx_tensorrt.pbtxt @@ -0,0 +1,166 @@ +# MediaPipe graph to detect/predict face landmarks. (CPU input, and inference is +# executed with onnxruntime on TensorRT.) +# +# It is required that "face_landmark.onnx" is available at +# "mediapipe/modules/face_landmark/face_landmark.onnx" +# path during execution if `with_attention` is not set or set to `false`. +# +# It is required that "face_landmark_with_attention.onnx" is available at +# "mediapipe/modules/face_landmark/face_landmark_with_attention.onnx" +# path during execution if `with_attention` is set to `true`. +# +# EXAMPLE: +# node { +# calculator: "FaceLandmarkOnnxTensorrt" +# input_stream: "IMAGE:image" +# input_stream: "ROI:face_roi" +# input_side_packet: "WITH_ATTENTION:with_attention" +# output_stream: "LANDMARKS:face_landmarks" +# } + +type: "FaceLandmarkOnnxTensorrt" + +# CPU image. (ImageFrame) +input_stream: "IMAGE:image" +# ROI (region of interest) within the given image where a face is located. +# (NormalizedRect) +input_stream: "ROI:roi" +# Whether to run face mesh model with attention on lips and eyes. (bool) +# Attention provides more accuracy on lips and eye regions as well as iris +# landmarks. +input_side_packet: "WITH_ATTENTION:with_attention" + +# 468 or 478 facial landmarks within the given ROI. (NormalizedLandmarkList) +# +# Number of landmarks depends on the WITH_ATTENTION flag. If it's `true` - then +# there will be 478 landmarks with refined lips, eyes and irises (10 extra +# landmarks are for irises), otherwise 468 non-refined landmarks are returned. +# +# NOTE: if a face is not present within the given ROI, for this particular +# timestamp there will not be an output packet in the LANDMARKS stream. However, +# the MediaPipe framework will internally inform the downstream calculators of +# the absence of this packet so that they don't wait for it unnecessarily. +output_stream: "LANDMARKS:face_landmarks" + +# Transforms the input image into a 192x192 tensor. +node: { + calculator: "ImageToTensorCalculator" + input_stream: "IMAGE:image" + input_stream: "NORM_RECT:roi" + output_stream: "TENSORS:input_tensors" + options: { + [mediapipe.ImageToTensorCalculatorOptions.ext] { + output_tensor_width: 192 + output_tensor_height: 192 + output_tensor_float_range { + min: 0.0 + max: 1.0 + } + } + } +} + +node { + calculator: "InferenceCalculator" + input_stream: "TENSORS:input_tensors" + output_stream: "TENSORS:output_tensors" + options: { + [mediapipe.InferenceCalculatorOptions.ext] { + delegate { tensorrt {} } + model_path: "mediapipe/modules/face_landmark/face_landmark.onnx" + } + } +} + +# Splits a vector of tensors into landmark tensors and face flag tensor. +node { + calculator: "SwitchContainer" + input_side_packet: "ENABLE:with_attention" + input_stream: "output_tensors" + output_stream: "landmark_tensors" + output_stream: "face_flag_tensor" + options: { + [mediapipe.SwitchContainerOptions.ext] { + contained_node: { + calculator: "SplitTensorVectorCalculator" + options: { + [mediapipe.SplitVectorCalculatorOptions.ext] { + ranges: { begin: 0 end: 1 } + ranges: { begin: 1 end: 2 } + } + } + } + contained_node: { + calculator: "SplitTensorVectorCalculator" + options: { + [mediapipe.SplitVectorCalculatorOptions.ext] { + ranges: { begin: 0 end: 6 } + ranges: { begin: 6 end: 7 } + } + } + } + } + } +} + +# Converts the face-flag tensor into a float that represents the confidence +# score of face presence. +node { + calculator: "TensorsToFloatsCalculator" + input_stream: "TENSORS:face_flag_tensor" + output_stream: "FLOAT:face_presence_score" + options { + [mediapipe.TensorsToFloatsCalculatorOptions.ext] { + activation: SIGMOID + } + } +} + +# Applies a threshold to the confidence score to determine whether a face is +# present. +node { + calculator: "ThresholdingCalculator" + input_stream: "FLOAT:face_presence_score" + output_stream: "FLAG:face_presence" + options: { + [mediapipe.ThresholdingCalculatorOptions.ext] { + threshold: 0.5 + } + } +} + +# Drop landmarks tensors if face is not present. +node { + calculator: "GateCalculator" + input_stream: "landmark_tensors" + input_stream: "ALLOW:face_presence" + output_stream: "ensured_landmark_tensors" +} + +# Decodes the landmark tensors into a vector of landmarks, where the landmark +# coordinates are normalized by the size of the input image to the model. +node { + calculator: "SwitchContainer" + input_side_packet: "ENABLE:with_attention" + input_stream: "TENSORS:ensured_landmark_tensors" + output_stream: "LANDMARKS:landmarks" + options: { + [mediapipe.SwitchContainerOptions.ext] { + contained_node: { + calculator: "TensorsToFaceLandmarks" + } + contained_node: { + calculator: "TensorsToFaceLandmarksWithAttention" + } + } + } +} + +# Projects the landmarks from the cropped face image to the corresponding +# locations on the full image before cropping (input to the graph). +node { + calculator: "LandmarkProjectionCalculator" + input_stream: "NORM_LANDMARKS:landmarks" + input_stream: "NORM_RECT:roi" + output_stream: "NORM_LANDMARKS:face_landmarks" +}