face detection和landmark支持onnxruntime的cuda和tensorrt
This commit is contained in:
parent
a440427bb2
commit
12046fcf89
|
@ -24,6 +24,46 @@ cc_binary(
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
|
|
||||||
|
cc_binary(
|
||||||
|
name = "face_detection_full_range_cpu_fps",
|
||||||
|
deps = [
|
||||||
|
"//mediapipe/examples/desktop:demo_run_graph_main_fps",
|
||||||
|
"//mediapipe/graphs/face_detection:face_detection_full_range_desktop_live_deps",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
cc_binary(
|
||||||
|
name = "face_detection_full_range_onnx_cuda",
|
||||||
|
deps = [
|
||||||
|
"//mediapipe/examples/desktop:demo_run_graph_main",
|
||||||
|
"//mediapipe/graphs/face_detection:face_detection_full_range_desktop_live_onnx_cuda_deps",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
cc_binary(
|
||||||
|
name = "face_detection_full_range_onnx_cuda_fps",
|
||||||
|
deps = [
|
||||||
|
"//mediapipe/examples/desktop:demo_run_graph_main_fps",
|
||||||
|
"//mediapipe/graphs/face_detection:face_detection_full_range_desktop_live_onnx_cuda_deps",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
cc_binary(
|
||||||
|
name = "face_detection_full_range_onnx_tensorrt",
|
||||||
|
deps = [
|
||||||
|
"//mediapipe/examples/desktop:demo_run_graph_main",
|
||||||
|
"//mediapipe/graphs/face_detection:face_detection_full_range_desktop_live_onnx_tensorrt_deps",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
cc_binary(
|
||||||
|
name = "face_detection_full_range_onnx_tensorrt_fps",
|
||||||
|
deps = [
|
||||||
|
"//mediapipe/examples/desktop:demo_run_graph_main_fps",
|
||||||
|
"//mediapipe/graphs/face_detection:face_detection_full_range_desktop_live_onnx_tensorrt_deps",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
cc_binary(
|
cc_binary(
|
||||||
name = "face_detection_cpu",
|
name = "face_detection_cpu",
|
||||||
deps = [
|
deps = [
|
||||||
|
@ -32,6 +72,46 @@ cc_binary(
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
|
|
||||||
|
cc_binary(
|
||||||
|
name = "face_detection_cpu_fps",
|
||||||
|
deps = [
|
||||||
|
"//mediapipe/examples/desktop:demo_run_graph_main_fps",
|
||||||
|
"//mediapipe/graphs/face_detection:desktop_live_calculators",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
cc_binary(
|
||||||
|
name = "face_detection_onnx_cuda",
|
||||||
|
deps = [
|
||||||
|
"//mediapipe/examples/desktop:demo_run_graph_main",
|
||||||
|
"//mediapipe/graphs/face_detection:desktop_live_onnx_cuda_calculators",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
cc_binary(
|
||||||
|
name = "face_detection_onnx_cuda_fps",
|
||||||
|
deps = [
|
||||||
|
"//mediapipe/examples/desktop:demo_run_graph_main_fps",
|
||||||
|
"//mediapipe/graphs/face_detection:desktop_live_onnx_cuda_calculators",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
cc_binary(
|
||||||
|
name = "face_detection_onnx_tensorrt",
|
||||||
|
deps = [
|
||||||
|
"//mediapipe/examples/desktop:demo_run_graph_main",
|
||||||
|
"//mediapipe/graphs/face_detection:desktop_live_onnx_tensorrt_calculators",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
cc_binary(
|
||||||
|
name = "face_detection_onnx_tensorrt_fps",
|
||||||
|
deps = [
|
||||||
|
"//mediapipe/examples/desktop:demo_run_graph_main_fps",
|
||||||
|
"//mediapipe/graphs/face_detection:desktop_live_onnx_tensorrt_calculators",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
# Linux only
|
# Linux only
|
||||||
cc_binary(
|
cc_binary(
|
||||||
name = "face_detection_gpu",
|
name = "face_detection_gpu",
|
||||||
|
|
|
@ -43,6 +43,26 @@ cc_library(
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
|
|
||||||
|
cc_library(
|
||||||
|
name = "desktop_live_onnx_cuda_calculators",
|
||||||
|
deps = [
|
||||||
|
"//mediapipe/calculators/core:flow_limiter_calculator",
|
||||||
|
"//mediapipe/calculators/util:annotation_overlay_calculator",
|
||||||
|
"//mediapipe/calculators/util:detections_to_render_data_calculator",
|
||||||
|
"//mediapipe/modules/face_detection:face_detection_short_range_onnx_cuda",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
cc_library(
|
||||||
|
name = "desktop_live_onnx_tensorrt_calculators",
|
||||||
|
deps = [
|
||||||
|
"//mediapipe/calculators/core:flow_limiter_calculator",
|
||||||
|
"//mediapipe/calculators/util:annotation_overlay_calculator",
|
||||||
|
"//mediapipe/calculators/util:detections_to_render_data_calculator",
|
||||||
|
"//mediapipe/modules/face_detection:face_detection_short_range_onnx_tensorrt",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
cc_library(
|
cc_library(
|
||||||
name = "desktop_live_gpu_calculators",
|
name = "desktop_live_gpu_calculators",
|
||||||
deps = [
|
deps = [
|
||||||
|
@ -93,3 +113,23 @@ cc_library(
|
||||||
"//mediapipe/modules/face_detection:face_detection_full_range_cpu",
|
"//mediapipe/modules/face_detection:face_detection_full_range_cpu",
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
|
|
||||||
|
cc_library(
|
||||||
|
name = "face_detection_full_range_desktop_live_onnx_cuda_deps",
|
||||||
|
deps = [
|
||||||
|
"//mediapipe/calculators/core:flow_limiter_calculator",
|
||||||
|
"//mediapipe/calculators/util:annotation_overlay_calculator",
|
||||||
|
"//mediapipe/calculators/util:detections_to_render_data_calculator",
|
||||||
|
"//mediapipe/modules/face_detection:face_detection_full_range_onnx_cuda",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
cc_library(
|
||||||
|
name = "face_detection_full_range_desktop_live_onnx_tensorrt_deps",
|
||||||
|
deps = [
|
||||||
|
"//mediapipe/calculators/core:flow_limiter_calculator",
|
||||||
|
"//mediapipe/calculators/util:annotation_overlay_calculator",
|
||||||
|
"//mediapipe/calculators/util:detections_to_render_data_calculator",
|
||||||
|
"//mediapipe/modules/face_detection:face_detection_full_range_onnx_tensorrt",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
|
@ -0,0 +1,58 @@
|
||||||
|
# MediaPipe graph that performs face mesh with onnxruntime cuda.
|
||||||
|
|
||||||
|
# CPU buffer. (ImageFrame)
|
||||||
|
input_stream: "input_video"
|
||||||
|
|
||||||
|
# Output image with rendered results. (ImageFrame)
|
||||||
|
output_stream: "output_video"
|
||||||
|
# Detected faces. (std::vector<Detection>)
|
||||||
|
output_stream: "face_detections"
|
||||||
|
|
||||||
|
# Throttles the images flowing downstream for flow control. It passes through
|
||||||
|
# the very first incoming image unaltered, and waits for downstream nodes
|
||||||
|
# (calculators and subgraphs) in the graph to finish their tasks before it
|
||||||
|
# passes through another image. All images that come in while waiting are
|
||||||
|
# dropped, limiting the number of in-flight images in most part of the graph to
|
||||||
|
# 1. This prevents the downstream nodes from queuing up incoming images and data
|
||||||
|
# excessively, which leads to increased latency and memory usage, unwanted in
|
||||||
|
# real-time mobile applications. It also eliminates unnecessarily computation,
|
||||||
|
# e.g., the output produced by a node may get dropped downstream if the
|
||||||
|
# subsequent nodes are still busy processing previous inputs.
|
||||||
|
node {
|
||||||
|
calculator: "FlowLimiterCalculator"
|
||||||
|
input_stream: "input_video"
|
||||||
|
input_stream: "FINISHED:output_video"
|
||||||
|
input_stream_info: {
|
||||||
|
tag_index: "FINISHED"
|
||||||
|
back_edge: true
|
||||||
|
}
|
||||||
|
output_stream: "throttled_input_video"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Subgraph that detects faces.
|
||||||
|
node {
|
||||||
|
calculator: "FaceDetectionShortRangeOnnxCUDA"
|
||||||
|
input_stream: "IMAGE:throttled_input_video"
|
||||||
|
output_stream: "DETECTIONS:face_detections"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Converts the detections to drawing primitives for annotation overlay.
|
||||||
|
node {
|
||||||
|
calculator: "DetectionsToRenderDataCalculator"
|
||||||
|
input_stream: "DETECTIONS:face_detections"
|
||||||
|
output_stream: "RENDER_DATA:render_data"
|
||||||
|
node_options: {
|
||||||
|
[type.googleapis.com/mediapipe.DetectionsToRenderDataCalculatorOptions] {
|
||||||
|
thickness: 4.0
|
||||||
|
color { r: 255 g: 0 b: 0 }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Draws annotations and overlays them on top of the input images.
|
||||||
|
node {
|
||||||
|
calculator: "AnnotationOverlayCalculator"
|
||||||
|
input_stream: "IMAGE:throttled_input_video"
|
||||||
|
input_stream: "render_data"
|
||||||
|
output_stream: "IMAGE:output_video"
|
||||||
|
}
|
|
@ -0,0 +1,58 @@
|
||||||
|
# MediaPipe graph that performs face mesh with onnxruntime tensorrt.
|
||||||
|
|
||||||
|
# CPU buffer. (ImageFrame)
|
||||||
|
input_stream: "input_video"
|
||||||
|
|
||||||
|
# Output image with rendered results. (ImageFrame)
|
||||||
|
output_stream: "output_video"
|
||||||
|
# Detected faces. (std::vector<Detection>)
|
||||||
|
output_stream: "face_detections"
|
||||||
|
|
||||||
|
# Throttles the images flowing downstream for flow control. It passes through
|
||||||
|
# the very first incoming image unaltered, and waits for downstream nodes
|
||||||
|
# (calculators and subgraphs) in the graph to finish their tasks before it
|
||||||
|
# passes through another image. All images that come in while waiting are
|
||||||
|
# dropped, limiting the number of in-flight images in most part of the graph to
|
||||||
|
# 1. This prevents the downstream nodes from queuing up incoming images and data
|
||||||
|
# excessively, which leads to increased latency and memory usage, unwanted in
|
||||||
|
# real-time mobile applications. It also eliminates unnecessarily computation,
|
||||||
|
# e.g., the output produced by a node may get dropped downstream if the
|
||||||
|
# subsequent nodes are still busy processing previous inputs.
|
||||||
|
node {
|
||||||
|
calculator: "FlowLimiterCalculator"
|
||||||
|
input_stream: "input_video"
|
||||||
|
input_stream: "FINISHED:output_video"
|
||||||
|
input_stream_info: {
|
||||||
|
tag_index: "FINISHED"
|
||||||
|
back_edge: true
|
||||||
|
}
|
||||||
|
output_stream: "throttled_input_video"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Subgraph that detects faces.
|
||||||
|
node {
|
||||||
|
calculator: "FaceDetectionShortRangeOnnxTensorRT"
|
||||||
|
input_stream: "IMAGE:throttled_input_video"
|
||||||
|
output_stream: "DETECTIONS:face_detections"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Converts the detections to drawing primitives for annotation overlay.
|
||||||
|
node {
|
||||||
|
calculator: "DetectionsToRenderDataCalculator"
|
||||||
|
input_stream: "DETECTIONS:face_detections"
|
||||||
|
output_stream: "RENDER_DATA:render_data"
|
||||||
|
node_options: {
|
||||||
|
[type.googleapis.com/mediapipe.DetectionsToRenderDataCalculatorOptions] {
|
||||||
|
thickness: 4.0
|
||||||
|
color { r: 255 g: 0 b: 0 }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Draws annotations and overlays them on top of the input images.
|
||||||
|
node {
|
||||||
|
calculator: "AnnotationOverlayCalculator"
|
||||||
|
input_stream: "IMAGE:throttled_input_video"
|
||||||
|
input_stream: "render_data"
|
||||||
|
output_stream: "IMAGE:output_video"
|
||||||
|
}
|
|
@ -0,0 +1,58 @@
|
||||||
|
# MediaPipe graph that performs face detection with onnxruntime on cuda.
|
||||||
|
|
||||||
|
# Images on GPU coming into and out of the graph.
|
||||||
|
input_stream: "input_video"
|
||||||
|
output_stream: "output_video"
|
||||||
|
|
||||||
|
# Throttles the images flowing downstream for flow control. It passes through
|
||||||
|
# the very first incoming image unaltered, and waits for
|
||||||
|
# TfLiteTensorsToDetectionsCalculator downstream in the graph to finish
|
||||||
|
# generating the corresponding detections before it passes through another
|
||||||
|
# image. All images that come in while waiting are dropped, limiting the number
|
||||||
|
# of in-flight images between this calculator and
|
||||||
|
# TfLiteTensorsToDetectionsCalculator to 1. This prevents the nodes in between
|
||||||
|
# from queuing up incoming images and data excessively, which leads to increased
|
||||||
|
# latency and memory usage, unwanted in real-time mobile applications. It also
|
||||||
|
# eliminates unnecessarily computation, e.g., a transformed image produced by
|
||||||
|
# ImageTransformationCalculator may get dropped downstream if the subsequent
|
||||||
|
# TfLiteConverterCalculator or TfLiteInferenceCalculator is still busy
|
||||||
|
# processing previous inputs.
|
||||||
|
node {
|
||||||
|
calculator: "FlowLimiterCalculator"
|
||||||
|
input_stream: "input_video"
|
||||||
|
input_stream: "FINISHED:detections"
|
||||||
|
input_stream_info: {
|
||||||
|
tag_index: "FINISHED"
|
||||||
|
back_edge: true
|
||||||
|
}
|
||||||
|
output_stream: "throttled_input_video"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Detects faces.
|
||||||
|
node {
|
||||||
|
calculator: "FaceDetectionFullRangeOnnxCUDA"
|
||||||
|
input_stream: "IMAGE:throttled_input_video"
|
||||||
|
output_stream: "DETECTIONS:detections"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Converts the detections to drawing primitives for annotation overlay.
|
||||||
|
node {
|
||||||
|
calculator: "DetectionsToRenderDataCalculator"
|
||||||
|
input_stream: "DETECTIONS:detections"
|
||||||
|
output_stream: "RENDER_DATA:render_data"
|
||||||
|
node_options: {
|
||||||
|
[type.googleapis.com/mediapipe.DetectionsToRenderDataCalculatorOptions] {
|
||||||
|
thickness: 4.0
|
||||||
|
color { r: 255 g: 0 b: 0 }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Draws annotations and overlays them on top of the input images.
|
||||||
|
node {
|
||||||
|
calculator: "AnnotationOverlayCalculator"
|
||||||
|
input_stream: "IMAGE:throttled_input_video"
|
||||||
|
input_stream: "render_data"
|
||||||
|
output_stream: "IMAGE:output_video"
|
||||||
|
}
|
||||||
|
|
|
@ -0,0 +1,58 @@
|
||||||
|
# MediaPipe graph that performs face detection with onnxruntime on tensorrt.
|
||||||
|
|
||||||
|
# Images on GPU coming into and out of the graph.
|
||||||
|
input_stream: "input_video"
|
||||||
|
output_stream: "output_video"
|
||||||
|
|
||||||
|
# Throttles the images flowing downstream for flow control. It passes through
|
||||||
|
# the very first incoming image unaltered, and waits for
|
||||||
|
# TfLiteTensorsToDetectionsCalculator downstream in the graph to finish
|
||||||
|
# generating the corresponding detections before it passes through another
|
||||||
|
# image. All images that come in while waiting are dropped, limiting the number
|
||||||
|
# of in-flight images between this calculator and
|
||||||
|
# TfLiteTensorsToDetectionsCalculator to 1. This prevents the nodes in between
|
||||||
|
# from queuing up incoming images and data excessively, which leads to increased
|
||||||
|
# latency and memory usage, unwanted in real-time mobile applications. It also
|
||||||
|
# eliminates unnecessarily computation, e.g., a transformed image produced by
|
||||||
|
# ImageTransformationCalculator may get dropped downstream if the subsequent
|
||||||
|
# TfLiteConverterCalculator or TfLiteInferenceCalculator is still busy
|
||||||
|
# processing previous inputs.
|
||||||
|
node {
|
||||||
|
calculator: "FlowLimiterCalculator"
|
||||||
|
input_stream: "input_video"
|
||||||
|
input_stream: "FINISHED:detections"
|
||||||
|
input_stream_info: {
|
||||||
|
tag_index: "FINISHED"
|
||||||
|
back_edge: true
|
||||||
|
}
|
||||||
|
output_stream: "throttled_input_video"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Detects faces.
|
||||||
|
node {
|
||||||
|
calculator: "FaceDetectionFullRangeOnnxTensorRT"
|
||||||
|
input_stream: "IMAGE:throttled_input_video"
|
||||||
|
output_stream: "DETECTIONS:detections"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Converts the detections to drawing primitives for annotation overlay.
|
||||||
|
node {
|
||||||
|
calculator: "DetectionsToRenderDataCalculator"
|
||||||
|
input_stream: "DETECTIONS:detections"
|
||||||
|
output_stream: "RENDER_DATA:render_data"
|
||||||
|
node_options: {
|
||||||
|
[type.googleapis.com/mediapipe.DetectionsToRenderDataCalculatorOptions] {
|
||||||
|
thickness: 4.0
|
||||||
|
color { r: 255 g: 0 b: 0 }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Draws annotations and overlays them on top of the input images.
|
||||||
|
node {
|
||||||
|
calculator: "AnnotationOverlayCalculator"
|
||||||
|
input_stream: "IMAGE:throttled_input_video"
|
||||||
|
input_stream: "render_data"
|
||||||
|
output_stream: "IMAGE:output_video"
|
||||||
|
}
|
||||||
|
|
|
@ -17,7 +17,7 @@ load(
|
||||||
"mediapipe_simple_subgraph",
|
"mediapipe_simple_subgraph",
|
||||||
)
|
)
|
||||||
load("//mediapipe/framework/port:build_config.bzl", "mediapipe_proto_library")
|
load("//mediapipe/framework/port:build_config.bzl", "mediapipe_proto_library")
|
||||||
load("//mediapipe/framework:mediapipe_cc_test.bzl", "mediapipe_cc_test")
|
load("//mediapipe/framework:mediapipe_cc_test.bzl", "mediapipe_cc_test") #@unused
|
||||||
|
|
||||||
licenses(["notice"])
|
licenses(["notice"])
|
||||||
|
|
||||||
|
@ -35,6 +35,24 @@ mediapipe_simple_subgraph(
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
|
|
||||||
|
mediapipe_simple_subgraph(
|
||||||
|
name = "face_detection_short_range_by_roi_onnx_cuda",
|
||||||
|
graph = "face_detection_short_range_by_roi_onnx_cuda.pbtxt",
|
||||||
|
register_as = "FaceDetectionShortRangeByRoiOnnxCUDA",
|
||||||
|
deps = [
|
||||||
|
":face_detection_short_range_onnx_cuda",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
mediapipe_simple_subgraph(
|
||||||
|
name = "face_detection_short_range_by_roi_onnx_tensorrt",
|
||||||
|
graph = "face_detection_short_range_by_roi_onnx_tensorrt.pbtxt",
|
||||||
|
register_as = "FaceDetectionShortRangeByRoiOnnxTensorRT",
|
||||||
|
deps = [
|
||||||
|
":face_detection_short_range_onnx_tensorrt",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
mediapipe_simple_subgraph(
|
mediapipe_simple_subgraph(
|
||||||
name = "face_detection_short_range_by_roi_gpu",
|
name = "face_detection_short_range_by_roi_gpu",
|
||||||
graph = "face_detection_short_range_by_roi_gpu.pbtxt",
|
graph = "face_detection_short_range_by_roi_gpu.pbtxt",
|
||||||
|
@ -74,6 +92,24 @@ mediapipe_simple_subgraph(
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
|
|
||||||
|
mediapipe_simple_subgraph(
|
||||||
|
name = "face_detection_short_range_onnx_cuda",
|
||||||
|
graph = "face_detection_short_range_onnx_cuda.pbtxt",
|
||||||
|
register_as = "FaceDetectionShortRangeOnnxCUDA",
|
||||||
|
deps = [
|
||||||
|
":face_detection_onnx_cuda",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
mediapipe_simple_subgraph(
|
||||||
|
name = "face_detection_short_range_onnx_tensorrt",
|
||||||
|
graph = "face_detection_short_range_onnx_tensorrt.pbtxt",
|
||||||
|
register_as = "FaceDetectionShortRangeOnnxTensorRT",
|
||||||
|
deps = [
|
||||||
|
":face_detection_onnx_tensorrt",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
mediapipe_simple_subgraph(
|
mediapipe_simple_subgraph(
|
||||||
name = "face_detection_full_range",
|
name = "face_detection_full_range",
|
||||||
graph = "face_detection_full_range.pbtxt",
|
graph = "face_detection_full_range.pbtxt",
|
||||||
|
@ -83,6 +119,24 @@ mediapipe_simple_subgraph(
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
|
|
||||||
|
mediapipe_simple_subgraph(
|
||||||
|
name = "face_detection_full_range_onnx_cuda",
|
||||||
|
graph = "face_detection_full_range_onnx_cuda.pbtxt",
|
||||||
|
register_as = "FaceDetectionFullRangeOnnxCUDA",
|
||||||
|
deps = [
|
||||||
|
":face_detection_onnx_cuda",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
mediapipe_simple_subgraph(
|
||||||
|
name = "face_detection_full_range_onnx_tensorrt",
|
||||||
|
graph = "face_detection_full_range_onnx_tensorrt.pbtxt",
|
||||||
|
register_as = "FaceDetectionFullRangeOnnxTensorRT",
|
||||||
|
deps = [
|
||||||
|
":face_detection_onnx_tensorrt",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
mediapipe_simple_subgraph(
|
mediapipe_simple_subgraph(
|
||||||
name = "face_detection_without_roi",
|
name = "face_detection_without_roi",
|
||||||
graph = "face_detection_without_roi.pbtxt",
|
graph = "face_detection_without_roi.pbtxt",
|
||||||
|
@ -110,6 +164,42 @@ mediapipe_simple_subgraph(
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
|
|
||||||
|
mediapipe_simple_subgraph(
|
||||||
|
name = "face_detection_onnx_cuda",
|
||||||
|
graph = "face_detection_onnx_cuda.pbtxt",
|
||||||
|
register_as = "FaceDetectionOnnxCUDA",
|
||||||
|
deps = [
|
||||||
|
":face_detection_cc_proto",
|
||||||
|
":face_detection_options_lib",
|
||||||
|
"//mediapipe/calculators/core:gate_calculator",
|
||||||
|
"//mediapipe/calculators/tensor:image_to_tensor_calculator",
|
||||||
|
"//mediapipe/calculators/tensor:inference_calculator_onnx_cuda",
|
||||||
|
"//mediapipe/calculators/tensor:tensors_to_detections_calculator",
|
||||||
|
"//mediapipe/calculators/tflite:ssd_anchors_calculator",
|
||||||
|
"//mediapipe/calculators/util:detection_projection_calculator",
|
||||||
|
"//mediapipe/calculators/util:non_max_suppression_calculator",
|
||||||
|
"//mediapipe/calculators/util:to_image_calculator",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
mediapipe_simple_subgraph(
|
||||||
|
name = "face_detection_onnx_tensorrt",
|
||||||
|
graph = "face_detection_onnx_tensorrt.pbtxt",
|
||||||
|
register_as = "FaceDetectionOnnxTensorRT",
|
||||||
|
deps = [
|
||||||
|
":face_detection_cc_proto",
|
||||||
|
":face_detection_options_lib",
|
||||||
|
"//mediapipe/calculators/core:gate_calculator",
|
||||||
|
"//mediapipe/calculators/tensor:image_to_tensor_calculator",
|
||||||
|
"//mediapipe/calculators/tensor:inference_calculator_onnx_tensorrt",
|
||||||
|
"//mediapipe/calculators/tensor:tensors_to_detections_calculator",
|
||||||
|
"//mediapipe/calculators/tflite:ssd_anchors_calculator",
|
||||||
|
"//mediapipe/calculators/util:detection_projection_calculator",
|
||||||
|
"//mediapipe/calculators/util:non_max_suppression_calculator",
|
||||||
|
"//mediapipe/calculators/util:to_image_calculator",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
mediapipe_proto_library(
|
mediapipe_proto_library(
|
||||||
name = "face_detection_proto",
|
name = "face_detection_proto",
|
||||||
srcs = ["face_detection.proto"],
|
srcs = ["face_detection.proto"],
|
||||||
|
@ -168,8 +258,11 @@ mediapipe_simple_subgraph(
|
||||||
|
|
||||||
exports_files(
|
exports_files(
|
||||||
srcs = [
|
srcs = [
|
||||||
|
"face_detection_full_range.onnx",
|
||||||
"face_detection_full_range.tflite",
|
"face_detection_full_range.tflite",
|
||||||
|
"face_detection_full_range_sparse.onnx",
|
||||||
"face_detection_full_range_sparse.tflite",
|
"face_detection_full_range_sparse.tflite",
|
||||||
|
"face_detection_short_range.onnx",
|
||||||
"face_detection_short_range.tflite",
|
"face_detection_short_range.tflite",
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
|
|
Binary file not shown.
|
@ -0,0 +1,37 @@
|
||||||
|
type: "FaceDetectionFullRangeOnnxCUDA"
|
||||||
|
|
||||||
|
input_stream: "IMAGE:image"
|
||||||
|
|
||||||
|
input_stream: "ROI:roi"
|
||||||
|
|
||||||
|
output_stream: "DETECTIONS:detections"
|
||||||
|
|
||||||
|
graph_options: {
|
||||||
|
[type.googleapis.com/mediapipe.FaceDetectionOptions] {}
|
||||||
|
}
|
||||||
|
|
||||||
|
node {
|
||||||
|
calculator: "FaceDetectionOnnxCUDA"
|
||||||
|
input_stream: "IMAGE:image"
|
||||||
|
input_stream: "ROI:roi"
|
||||||
|
output_stream: "DETECTIONS:detections"
|
||||||
|
node_options: {
|
||||||
|
[type.googleapis.com/mediapipe.FaceDetectionOptions] {
|
||||||
|
model_path: "mediapipe/modules/face_detection/face_detection_full_range.onnx"
|
||||||
|
tensor_width: 192
|
||||||
|
tensor_height: 192
|
||||||
|
|
||||||
|
num_layers: 1
|
||||||
|
strides: 4
|
||||||
|
interpolated_scale_aspect_ratio: 0.0
|
||||||
|
|
||||||
|
num_boxes: 2304
|
||||||
|
x_scale: 192.0
|
||||||
|
y_scale: 192.0
|
||||||
|
h_scale: 192.0
|
||||||
|
w_scale: 192.0
|
||||||
|
min_score_thresh: 0.6
|
||||||
|
}
|
||||||
|
}
|
||||||
|
option_value: "OPTIONS:options"
|
||||||
|
}
|
|
@ -0,0 +1,37 @@
|
||||||
|
type: "FaceDetectionFullRangeOnnxTensorRT"
|
||||||
|
|
||||||
|
input_stream: "IMAGE:image"
|
||||||
|
|
||||||
|
input_stream: "ROI:roi"
|
||||||
|
|
||||||
|
output_stream: "DETECTIONS:detections"
|
||||||
|
|
||||||
|
graph_options: {
|
||||||
|
[type.googleapis.com/mediapipe.FaceDetectionOptions] {}
|
||||||
|
}
|
||||||
|
|
||||||
|
node {
|
||||||
|
calculator: "FaceDetectionOnnxTensorRT"
|
||||||
|
input_stream: "IMAGE:image"
|
||||||
|
input_stream: "ROI:roi"
|
||||||
|
output_stream: "DETECTIONS:detections"
|
||||||
|
node_options: {
|
||||||
|
[type.googleapis.com/mediapipe.FaceDetectionOptions] {
|
||||||
|
model_path: "mediapipe/modules/face_detection/face_detection_full_range.onnx"
|
||||||
|
tensor_width: 192
|
||||||
|
tensor_height: 192
|
||||||
|
|
||||||
|
num_layers: 1
|
||||||
|
strides: 4
|
||||||
|
interpolated_scale_aspect_ratio: 0.0
|
||||||
|
|
||||||
|
num_boxes: 2304
|
||||||
|
x_scale: 192.0
|
||||||
|
y_scale: 192.0
|
||||||
|
h_scale: 192.0
|
||||||
|
w_scale: 192.0
|
||||||
|
min_score_thresh: 0.6
|
||||||
|
}
|
||||||
|
}
|
||||||
|
option_value: "OPTIONS:options"
|
||||||
|
}
|
155
mediapipe/modules/face_detection/face_detection_onnx_cuda.pbtxt
Normal file
155
mediapipe/modules/face_detection/face_detection_onnx_cuda.pbtxt
Normal file
|
@ -0,0 +1,155 @@
|
||||||
|
type: "FaceDetectionOnnxCUDA"
|
||||||
|
|
||||||
|
# The input image, either ImageFrame, GpuBuffer, or (multi-backend) Image.
|
||||||
|
input_stream: "IMAGE:image"
|
||||||
|
|
||||||
|
# ROI (region of interest) within the given image where faces should be
|
||||||
|
# detected. (NormalizedRect)
|
||||||
|
input_stream: "ROI:roi"
|
||||||
|
|
||||||
|
# Detected faces. (std::vector<Detection>)
|
||||||
|
# NOTE: there will not be an output packet in the DETECTIONS stream for this
|
||||||
|
# particular timestamp if none of faces detected. However, the MediaPipe
|
||||||
|
# framework will internally inform the downstream calculators of the absence of
|
||||||
|
# this packet so that they don't wait for it unnecessarily.
|
||||||
|
output_stream: "DETECTIONS:detections"
|
||||||
|
|
||||||
|
graph_options: {
|
||||||
|
[type.googleapis.com/mediapipe.FaceDetectionOptions] {}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Converts the input CPU or GPU image to the multi-backend image type (Image).
|
||||||
|
node: {
|
||||||
|
calculator: "ToImageCalculator"
|
||||||
|
input_stream: "IMAGE:image"
|
||||||
|
output_stream: "IMAGE:multi_backend_image"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Transforms the input image into a 128x128 tensor while keeping the aspect
|
||||||
|
# ratio (what is expected by the corresponding face detection model), resulting
|
||||||
|
# in potential letterboxing in the transformed image.
|
||||||
|
node: {
|
||||||
|
calculator: "ImageToTensorCalculator"
|
||||||
|
input_stream: "IMAGE:multi_backend_image"
|
||||||
|
input_stream: "NORM_RECT:roi"
|
||||||
|
output_stream: "TENSORS:input_tensors"
|
||||||
|
output_stream: "MATRIX:transform_matrix"
|
||||||
|
options: {
|
||||||
|
[mediapipe.ImageToTensorCalculatorOptions.ext] {
|
||||||
|
keep_aspect_ratio: true
|
||||||
|
output_tensor_float_range {
|
||||||
|
min: -1.0
|
||||||
|
max: 1.0
|
||||||
|
}
|
||||||
|
border_mode: BORDER_ZERO
|
||||||
|
}
|
||||||
|
}
|
||||||
|
option_value: "gpu_origin:options/gpu_origin"
|
||||||
|
option_value: "output_tensor_width:options/tensor_width"
|
||||||
|
option_value: "output_tensor_height:options/tensor_height"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Runs a TensorFlow Lite model on CPU that takes an image tensor and outputs a
|
||||||
|
# vector of tensors representing, for instance, detection boxes/keypoints and
|
||||||
|
# scores.
|
||||||
|
node {
|
||||||
|
calculator: "InferenceCalculator"
|
||||||
|
input_stream: "TENSORS:input_tensors"
|
||||||
|
output_stream: "TENSORS:detection_tensors"
|
||||||
|
options: {
|
||||||
|
[mediapipe.InferenceCalculatorOptions.ext] {
|
||||||
|
delegate { cuda {} }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
option_value: "model_path:options/model_path"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Detection tensors. (std::vector<Tensor>)
|
||||||
|
#input_stream: "TENSORS:detection_tensors"
|
||||||
|
|
||||||
|
# A 4x4 row-major-order matrix that maps a point represented in the detection
|
||||||
|
# tensors to a desired coordinate system, e.g., in the original input image
|
||||||
|
# before scaling/cropping. (std::array<float, 16>)
|
||||||
|
#input_stream: "MATRIX:transform_matrix"
|
||||||
|
|
||||||
|
# Detected faces. (std::vector<Detection>)
|
||||||
|
# NOTE: there will not be an output packet in the DETECTIONS stream for this
|
||||||
|
# particular timestamp if none of faces detected. However, the MediaPipe
|
||||||
|
# framework will internally inform the downstream calculators of the absence of
|
||||||
|
# this packet so that they don't wait for it unnecessarily.
|
||||||
|
#output_stream: "DETECTIONS:detections"
|
||||||
|
|
||||||
|
# Generates a single side packet containing a vector of SSD anchors based on
|
||||||
|
# the specification in the options.
|
||||||
|
node {
|
||||||
|
calculator: "SsdAnchorsCalculator"
|
||||||
|
output_side_packet: "anchors"
|
||||||
|
options: {
|
||||||
|
[mediapipe.SsdAnchorsCalculatorOptions.ext] {
|
||||||
|
num_layers: 1
|
||||||
|
min_scale: 0.1484375
|
||||||
|
max_scale: 0.75
|
||||||
|
anchor_offset_x: 0.5
|
||||||
|
anchor_offset_y: 0.5
|
||||||
|
aspect_ratios: 1.0
|
||||||
|
fixed_anchor_size: true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
option_value: "input_size_width:tensor_width"
|
||||||
|
option_value: "input_size_height:tensor_height"
|
||||||
|
option_value: "num_layers:num_layers"
|
||||||
|
option_value: "strides:strides"
|
||||||
|
option_value: "interpolated_scale_aspect_ratio:interpolated_scale_aspect_ratio"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Decodes the detection tensors generated by the TensorFlow Lite model, based on
|
||||||
|
# the SSD anchors and the specification in the options, into a vector of
|
||||||
|
# detections. Each detection describes a detected object.
|
||||||
|
node {
|
||||||
|
calculator: "TensorsToDetectionsCalculator"
|
||||||
|
input_stream: "TENSORS:detection_tensors"
|
||||||
|
input_side_packet: "ANCHORS:anchors"
|
||||||
|
output_stream: "DETECTIONS:unfiltered_detections"
|
||||||
|
options: {
|
||||||
|
[mediapipe.TensorsToDetectionsCalculatorOptions.ext] {
|
||||||
|
num_classes: 1
|
||||||
|
num_coords: 16
|
||||||
|
box_coord_offset: 0
|
||||||
|
keypoint_coord_offset: 4
|
||||||
|
num_keypoints: 6
|
||||||
|
num_values_per_keypoint: 2
|
||||||
|
sigmoid_score: true
|
||||||
|
score_clipping_thresh: 100.0
|
||||||
|
reverse_output_order: true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
option_value: "num_boxes:num_boxes"
|
||||||
|
option_value: "x_scale:x_scale"
|
||||||
|
option_value: "y_scale:y_scale"
|
||||||
|
option_value: "h_scale:h_scale"
|
||||||
|
option_value: "w_scale:w_scale"
|
||||||
|
option_value: "min_score_thresh:min_score_thresh"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Performs non-max suppression to remove excessive detections.
|
||||||
|
node {
|
||||||
|
calculator: "NonMaxSuppressionCalculator"
|
||||||
|
input_stream: "unfiltered_detections"
|
||||||
|
output_stream: "filtered_detections"
|
||||||
|
options: {
|
||||||
|
[mediapipe.NonMaxSuppressionCalculatorOptions.ext] {
|
||||||
|
min_suppression_threshold: 0.3
|
||||||
|
overlap_type: INTERSECTION_OVER_UNION
|
||||||
|
algorithm: WEIGHTED
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Projects the detections from input tensor to the corresponding locations on
|
||||||
|
# the original image (input to the graph).
|
||||||
|
node {
|
||||||
|
calculator: "DetectionProjectionCalculator"
|
||||||
|
input_stream: "DETECTIONS:filtered_detections"
|
||||||
|
input_stream: "PROJECTION_MATRIX:transform_matrix"
|
||||||
|
output_stream: "DETECTIONS:detections"
|
||||||
|
}
|
|
@ -0,0 +1,165 @@
|
||||||
|
# MediaPipe graph to detect faces.
|
||||||
|
#
|
||||||
|
# EXAMPLE:
|
||||||
|
# node {
|
||||||
|
# calculator: "FaceDetectionFrontCpu"
|
||||||
|
# input_stream: "IMAGE:image"
|
||||||
|
# input_stream: "ROI:roi"
|
||||||
|
# output_stream: "DETECTIONS:face_detections"
|
||||||
|
# }
|
||||||
|
|
||||||
|
type: "FaceDetectionOnnxTensorRT"
|
||||||
|
|
||||||
|
# The input image, either ImageFrame, GpuBuffer, or (multi-backend) Image.
|
||||||
|
input_stream: "IMAGE:image"
|
||||||
|
|
||||||
|
# ROI (region of interest) within the given image where faces should be
|
||||||
|
# detected. (NormalizedRect)
|
||||||
|
input_stream: "ROI:roi"
|
||||||
|
|
||||||
|
# Detected faces. (std::vector<Detection>)
|
||||||
|
# NOTE: there will not be an output packet in the DETECTIONS stream for this
|
||||||
|
# particular timestamp if none of faces detected. However, the MediaPipe
|
||||||
|
# framework will internally inform the downstream calculators of the absence of
|
||||||
|
# this packet so that they don't wait for it unnecessarily.
|
||||||
|
output_stream: "DETECTIONS:detections"
|
||||||
|
|
||||||
|
graph_options: {
|
||||||
|
[type.googleapis.com/mediapipe.FaceDetectionOptions] {}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Converts the input CPU or GPU image to the multi-backend image type (Image).
|
||||||
|
node: {
|
||||||
|
calculator: "ToImageCalculator"
|
||||||
|
input_stream: "IMAGE:image"
|
||||||
|
output_stream: "IMAGE:multi_backend_image"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Transforms the input image into a 128x128 tensor while keeping the aspect
|
||||||
|
# ratio (what is expected by the corresponding face detection model), resulting
|
||||||
|
# in potential letterboxing in the transformed image.
|
||||||
|
node: {
|
||||||
|
calculator: "ImageToTensorCalculator"
|
||||||
|
input_stream: "IMAGE:multi_backend_image"
|
||||||
|
input_stream: "NORM_RECT:roi"
|
||||||
|
output_stream: "TENSORS:input_tensors"
|
||||||
|
output_stream: "MATRIX:transform_matrix"
|
||||||
|
options: {
|
||||||
|
[mediapipe.ImageToTensorCalculatorOptions.ext] {
|
||||||
|
keep_aspect_ratio: true
|
||||||
|
output_tensor_float_range {
|
||||||
|
min: -1.0
|
||||||
|
max: 1.0
|
||||||
|
}
|
||||||
|
border_mode: BORDER_ZERO
|
||||||
|
}
|
||||||
|
}
|
||||||
|
option_value: "gpu_origin:options/gpu_origin"
|
||||||
|
option_value: "output_tensor_width:options/tensor_width"
|
||||||
|
option_value: "output_tensor_height:options/tensor_height"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Runs a TensorFlow Lite model on CPU that takes an image tensor and outputs a
|
||||||
|
# vector of tensors representing, for instance, detection boxes/keypoints and
|
||||||
|
# scores.
|
||||||
|
node {
|
||||||
|
calculator: "InferenceCalculator"
|
||||||
|
input_stream: "TENSORS:input_tensors"
|
||||||
|
output_stream: "TENSORS:detection_tensors"
|
||||||
|
options: {
|
||||||
|
[mediapipe.InferenceCalculatorOptions.ext] {
|
||||||
|
delegate { tensorrt {} }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
option_value: "model_path:options/model_path"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Detection tensors. (std::vector<Tensor>)
|
||||||
|
#input_stream: "TENSORS:detection_tensors"
|
||||||
|
|
||||||
|
# A 4x4 row-major-order matrix that maps a point represented in the detection
|
||||||
|
# tensors to a desired coordinate system, e.g., in the original input image
|
||||||
|
# before scaling/cropping. (std::array<float, 16>)
|
||||||
|
#input_stream: "MATRIX:transform_matrix"
|
||||||
|
|
||||||
|
# Detected faces. (std::vector<Detection>)
|
||||||
|
# NOTE: there will not be an output packet in the DETECTIONS stream for this
|
||||||
|
# particular timestamp if none of faces detected. However, the MediaPipe
|
||||||
|
# framework will internally inform the downstream calculators of the absence of
|
||||||
|
# this packet so that they don't wait for it unnecessarily.
|
||||||
|
#output_stream: "DETECTIONS:detections"
|
||||||
|
|
||||||
|
# Generates a single side packet containing a vector of SSD anchors based on
|
||||||
|
# the specification in the options.
|
||||||
|
node {
|
||||||
|
calculator: "SsdAnchorsCalculator"
|
||||||
|
output_side_packet: "anchors"
|
||||||
|
options: {
|
||||||
|
[mediapipe.SsdAnchorsCalculatorOptions.ext] {
|
||||||
|
num_layers: 1
|
||||||
|
min_scale: 0.1484375
|
||||||
|
max_scale: 0.75
|
||||||
|
anchor_offset_x: 0.5
|
||||||
|
anchor_offset_y: 0.5
|
||||||
|
aspect_ratios: 1.0
|
||||||
|
fixed_anchor_size: true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
option_value: "input_size_width:tensor_width"
|
||||||
|
option_value: "input_size_height:tensor_height"
|
||||||
|
option_value: "num_layers:num_layers"
|
||||||
|
option_value: "strides:strides"
|
||||||
|
option_value: "interpolated_scale_aspect_ratio:interpolated_scale_aspect_ratio"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Decodes the detection tensors generated by the TensorFlow Lite model, based on
|
||||||
|
# the SSD anchors and the specification in the options, into a vector of
|
||||||
|
# detections. Each detection describes a detected object.
|
||||||
|
node {
|
||||||
|
calculator: "TensorsToDetectionsCalculator"
|
||||||
|
input_stream: "TENSORS:detection_tensors"
|
||||||
|
input_side_packet: "ANCHORS:anchors"
|
||||||
|
output_stream: "DETECTIONS:unfiltered_detections"
|
||||||
|
options: {
|
||||||
|
[mediapipe.TensorsToDetectionsCalculatorOptions.ext] {
|
||||||
|
num_classes: 1
|
||||||
|
num_coords: 16
|
||||||
|
box_coord_offset: 0
|
||||||
|
keypoint_coord_offset: 4
|
||||||
|
num_keypoints: 6
|
||||||
|
num_values_per_keypoint: 2
|
||||||
|
sigmoid_score: true
|
||||||
|
score_clipping_thresh: 100.0
|
||||||
|
reverse_output_order: true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
option_value: "num_boxes:num_boxes"
|
||||||
|
option_value: "x_scale:x_scale"
|
||||||
|
option_value: "y_scale:y_scale"
|
||||||
|
option_value: "h_scale:h_scale"
|
||||||
|
option_value: "w_scale:w_scale"
|
||||||
|
option_value: "min_score_thresh:min_score_thresh"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Performs non-max suppression to remove excessive detections.
|
||||||
|
node {
|
||||||
|
calculator: "NonMaxSuppressionCalculator"
|
||||||
|
input_stream: "unfiltered_detections"
|
||||||
|
output_stream: "filtered_detections"
|
||||||
|
options: {
|
||||||
|
[mediapipe.NonMaxSuppressionCalculatorOptions.ext] {
|
||||||
|
min_suppression_threshold: 0.3
|
||||||
|
overlap_type: INTERSECTION_OVER_UNION
|
||||||
|
algorithm: WEIGHTED
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Projects the detections from input tensor to the corresponding locations on
|
||||||
|
# the original image (input to the graph).
|
||||||
|
node {
|
||||||
|
calculator: "DetectionProjectionCalculator"
|
||||||
|
input_stream: "DETECTIONS:filtered_detections"
|
||||||
|
input_stream: "PROJECTION_MATRIX:transform_matrix"
|
||||||
|
output_stream: "DETECTIONS:detections"
|
||||||
|
}
|
Binary file not shown.
|
@ -0,0 +1,40 @@
|
||||||
|
type: "FaceDetectionShortRangeByRoiOnnxCUDA"
|
||||||
|
|
||||||
|
input_stream: "IMAGE:image"
|
||||||
|
|
||||||
|
input_stream: "ROI:roi"
|
||||||
|
|
||||||
|
output_stream: "DETECTIONS:detections"
|
||||||
|
|
||||||
|
graph_options: {
|
||||||
|
[type.googleapis.com/mediapipe.FaceDetectionOptions] {}
|
||||||
|
}
|
||||||
|
|
||||||
|
node {
|
||||||
|
calculator: "FaceDetectionOnnxCUDA"
|
||||||
|
input_stream: "IMAGE:image"
|
||||||
|
input_stream: "ROI:roi"
|
||||||
|
output_stream: "DETECTIONS:detections"
|
||||||
|
node_options: {
|
||||||
|
[type.googleapis.com/mediapipe.FaceDetectionOptions] {
|
||||||
|
model_path: "mediapipe/modules/face_detection/face_detection_short_range.onnx"
|
||||||
|
tensor_width: 128
|
||||||
|
tensor_height: 128
|
||||||
|
|
||||||
|
num_layers: 4
|
||||||
|
strides: 8
|
||||||
|
strides: 16
|
||||||
|
strides: 16
|
||||||
|
strides: 16
|
||||||
|
interpolated_scale_aspect_ratio: 1.0
|
||||||
|
|
||||||
|
num_boxes: 896
|
||||||
|
x_scale: 128.0
|
||||||
|
y_scale: 128.0
|
||||||
|
h_scale: 128.0
|
||||||
|
w_scale: 128.0
|
||||||
|
min_score_thresh: 0.5
|
||||||
|
}
|
||||||
|
}
|
||||||
|
option_value: "OPTIONS:options"
|
||||||
|
}
|
|
@ -0,0 +1,40 @@
|
||||||
|
type: "FaceDetectionShortRangeByRoiOnnxTensorRT"
|
||||||
|
|
||||||
|
input_stream: "IMAGE:image"
|
||||||
|
|
||||||
|
input_stream: "ROI:roi"
|
||||||
|
|
||||||
|
output_stream: "DETECTIONS:detections"
|
||||||
|
|
||||||
|
graph_options: {
|
||||||
|
[type.googleapis.com/mediapipe.FaceDetectionOptions] {}
|
||||||
|
}
|
||||||
|
|
||||||
|
node {
|
||||||
|
calculator: "FaceDetectionOnnxTensorRT"
|
||||||
|
input_stream: "IMAGE:image"
|
||||||
|
input_stream: "ROI:roi"
|
||||||
|
output_stream: "DETECTIONS:detections"
|
||||||
|
node_options: {
|
||||||
|
[type.googleapis.com/mediapipe.FaceDetectionOptions] {
|
||||||
|
model_path: "mediapipe/modules/face_detection/face_detection_short_range.onnx"
|
||||||
|
tensor_width: 128
|
||||||
|
tensor_height: 128
|
||||||
|
|
||||||
|
num_layers: 4
|
||||||
|
strides: 8
|
||||||
|
strides: 16
|
||||||
|
strides: 16
|
||||||
|
strides: 16
|
||||||
|
interpolated_scale_aspect_ratio: 1.0
|
||||||
|
|
||||||
|
num_boxes: 896
|
||||||
|
x_scale: 128.0
|
||||||
|
y_scale: 128.0
|
||||||
|
h_scale: 128.0
|
||||||
|
w_scale: 128.0
|
||||||
|
min_score_thresh: 0.5
|
||||||
|
}
|
||||||
|
}
|
||||||
|
option_value: "OPTIONS:options"
|
||||||
|
}
|
|
@ -0,0 +1,40 @@
|
||||||
|
type: "FaceDetectionShortRangeOnnxCUDA"
|
||||||
|
|
||||||
|
input_stream: "IMAGE:image"
|
||||||
|
|
||||||
|
input_stream: "ROI:roi"
|
||||||
|
|
||||||
|
output_stream: "DETECTIONS:detections"
|
||||||
|
|
||||||
|
graph_options: {
|
||||||
|
[type.googleapis.com/mediapipe.FaceDetectionOptions] {}
|
||||||
|
}
|
||||||
|
|
||||||
|
node {
|
||||||
|
calculator: "FaceDetectionOnnxCUDA"
|
||||||
|
input_stream: "IMAGE:image"
|
||||||
|
input_stream: "ROI:roi"
|
||||||
|
output_stream: "DETECTIONS:detections"
|
||||||
|
node_options: {
|
||||||
|
[type.googleapis.com/mediapipe.FaceDetectionOptions] {
|
||||||
|
model_path: "mediapipe/modules/face_detection/face_detection_short_range.onnx"
|
||||||
|
tensor_width: 128
|
||||||
|
tensor_height: 128
|
||||||
|
|
||||||
|
num_layers: 4
|
||||||
|
strides: 8
|
||||||
|
strides: 16
|
||||||
|
strides: 16
|
||||||
|
strides: 16
|
||||||
|
interpolated_scale_aspect_ratio: 1.0
|
||||||
|
|
||||||
|
num_boxes: 896
|
||||||
|
x_scale: 128.0
|
||||||
|
y_scale: 128.0
|
||||||
|
h_scale: 128.0
|
||||||
|
w_scale: 128.0
|
||||||
|
min_score_thresh: 0.5
|
||||||
|
}
|
||||||
|
}
|
||||||
|
option_value: "OPTIONS:options"
|
||||||
|
}
|
|
@ -0,0 +1,40 @@
|
||||||
|
type: "FaceDetectionShortRangeOnnxTensorRT"
|
||||||
|
|
||||||
|
input_stream: "IMAGE:image"
|
||||||
|
|
||||||
|
input_stream: "ROI:roi"
|
||||||
|
|
||||||
|
output_stream: "DETECTIONS:detections"
|
||||||
|
|
||||||
|
graph_options: {
|
||||||
|
[type.googleapis.com/mediapipe.FaceDetectionOptions] {}
|
||||||
|
}
|
||||||
|
|
||||||
|
node {
|
||||||
|
calculator: "FaceDetectionOnnxTensorRT"
|
||||||
|
input_stream: "IMAGE:image"
|
||||||
|
input_stream: "ROI:roi"
|
||||||
|
output_stream: "DETECTIONS:detections"
|
||||||
|
node_options: {
|
||||||
|
[type.googleapis.com/mediapipe.FaceDetectionOptions] {
|
||||||
|
model_path: "mediapipe/modules/face_detection/face_detection_short_range.onnx"
|
||||||
|
tensor_width: 128
|
||||||
|
tensor_height: 128
|
||||||
|
|
||||||
|
num_layers: 4
|
||||||
|
strides: 8
|
||||||
|
strides: 16
|
||||||
|
strides: 16
|
||||||
|
strides: 16
|
||||||
|
interpolated_scale_aspect_ratio: 1.0
|
||||||
|
|
||||||
|
num_boxes: 896
|
||||||
|
x_scale: 128.0
|
||||||
|
y_scale: 128.0
|
||||||
|
h_scale: 128.0
|
||||||
|
w_scale: 128.0
|
||||||
|
min_score_thresh: 0.5
|
||||||
|
}
|
||||||
|
}
|
||||||
|
option_value: "OPTIONS:options"
|
||||||
|
}
|
|
@ -42,6 +42,45 @@ mediapipe_simple_subgraph(
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
|
|
||||||
|
mediapipe_simple_subgraph(
|
||||||
|
name = "face_landmark_onnx_cuda",
|
||||||
|
graph = "face_landmark_onnx_cuda.pbtxt",
|
||||||
|
register_as = "FaceLandmarkOnnxCUDA",
|
||||||
|
deps = [
|
||||||
|
":tensors_to_face_landmarks",
|
||||||
|
":tensors_to_face_landmarks_with_attention",
|
||||||
|
"//mediapipe/calculators/core:gate_calculator",
|
||||||
|
"//mediapipe/calculators/core:split_vector_calculator",
|
||||||
|
"//mediapipe/calculators/tensor:image_to_tensor_calculator",
|
||||||
|
"//mediapipe/calculators/tensor:inference_calculator_onnx_cuda",
|
||||||
|
"//mediapipe/calculators/tensor:tensors_to_floats_calculator",
|
||||||
|
"//mediapipe/calculators/tensor:tensors_to_landmarks_calculator",
|
||||||
|
"//mediapipe/calculators/util:landmark_projection_calculator",
|
||||||
|
"//mediapipe/calculators/util:thresholding_calculator",
|
||||||
|
"//mediapipe/framework/tool:switch_container",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
mediapipe_simple_subgraph(
|
||||||
|
name = "face_landmark_onnx_tensorrt",
|
||||||
|
graph = "face_landmark_onnx_tensorrt.pbtxt",
|
||||||
|
register_as = "FaceLandmarkOnnxTensorRT",
|
||||||
|
deps = [
|
||||||
|
":tensors_to_face_landmarks",
|
||||||
|
":tensors_to_face_landmarks_with_attention",
|
||||||
|
"//mediapipe/calculators/core:gate_calculator",
|
||||||
|
"//mediapipe/calculators/core:split_vector_calculator",
|
||||||
|
"//mediapipe/calculators/tensor:image_to_tensor_calculator",
|
||||||
|
"//mediapipe/calculators/tensor:inference_calculator",
|
||||||
|
"//mediapipe/calculators/tensor:inference_calculator_onnx_tensorrt",
|
||||||
|
"//mediapipe/calculators/tensor:tensors_to_floats_calculator",
|
||||||
|
"//mediapipe/calculators/tensor:tensors_to_landmarks_calculator",
|
||||||
|
"//mediapipe/calculators/util:landmark_projection_calculator",
|
||||||
|
"//mediapipe/calculators/util:thresholding_calculator",
|
||||||
|
"//mediapipe/framework/tool:switch_container",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
mediapipe_simple_subgraph(
|
mediapipe_simple_subgraph(
|
||||||
name = "face_landmark_gpu",
|
name = "face_landmark_gpu",
|
||||||
graph = "face_landmark_gpu.pbtxt",
|
graph = "face_landmark_gpu.pbtxt",
|
||||||
|
@ -84,6 +123,48 @@ mediapipe_simple_subgraph(
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
|
|
||||||
|
mediapipe_simple_subgraph(
|
||||||
|
name = "face_landmark_front_onnx_cuda",
|
||||||
|
graph = "face_landmark_front_onnx_cuda.pbtxt",
|
||||||
|
register_as = "FaceLandmarkFrontOnnxCUDA",
|
||||||
|
deps = [
|
||||||
|
":face_detection_front_detection_to_roi",
|
||||||
|
":face_landmark_landmarks_to_roi",
|
||||||
|
":face_landmark_onnx_cuda",
|
||||||
|
"//mediapipe/calculators/core:begin_loop_calculator",
|
||||||
|
"//mediapipe/calculators/core:clip_vector_size_calculator",
|
||||||
|
"//mediapipe/calculators/core:constant_side_packet_calculator",
|
||||||
|
"//mediapipe/calculators/core:end_loop_calculator",
|
||||||
|
"//mediapipe/calculators/core:gate_calculator",
|
||||||
|
"//mediapipe/calculators/core:previous_loopback_calculator",
|
||||||
|
"//mediapipe/calculators/image:image_properties_calculator",
|
||||||
|
"//mediapipe/calculators/util:association_norm_rect_calculator",
|
||||||
|
"//mediapipe/calculators/util:collection_has_min_size_calculator",
|
||||||
|
"//mediapipe/modules/face_detection:face_detection_short_range_onnx_cuda",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
mediapipe_simple_subgraph(
|
||||||
|
name = "face_landmark_front_onnx_tensorrt",
|
||||||
|
graph = "face_landmark_front_onnx_tensorrt.pbtxt",
|
||||||
|
register_as = "FaceLandmarkFrontOnnxTensorRT",
|
||||||
|
deps = [
|
||||||
|
":face_detection_front_detection_to_roi",
|
||||||
|
":face_landmark_landmarks_to_roi",
|
||||||
|
":face_landmark_onnx_tensorrt",
|
||||||
|
"//mediapipe/calculators/core:begin_loop_calculator",
|
||||||
|
"//mediapipe/calculators/core:clip_vector_size_calculator",
|
||||||
|
"//mediapipe/calculators/core:constant_side_packet_calculator",
|
||||||
|
"//mediapipe/calculators/core:end_loop_calculator",
|
||||||
|
"//mediapipe/calculators/core:gate_calculator",
|
||||||
|
"//mediapipe/calculators/core:previous_loopback_calculator",
|
||||||
|
"//mediapipe/calculators/image:image_properties_calculator",
|
||||||
|
"//mediapipe/calculators/util:association_norm_rect_calculator",
|
||||||
|
"//mediapipe/calculators/util:collection_has_min_size_calculator",
|
||||||
|
"//mediapipe/modules/face_detection:face_detection_short_range_onnx_tensorrt",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
mediapipe_simple_subgraph(
|
mediapipe_simple_subgraph(
|
||||||
name = "face_landmark_front_gpu",
|
name = "face_landmark_front_gpu",
|
||||||
graph = "face_landmark_front_gpu.pbtxt",
|
graph = "face_landmark_front_gpu.pbtxt",
|
||||||
|
|
|
@ -0,0 +1,247 @@
|
||||||
|
# MediaPipe graph to detect/predict face landmarks. (CPU input, and inference is
|
||||||
|
# executed with onnxruntime on cuda.) This graph tries to skip face detection as much as possible
|
||||||
|
# by using previously detected/predicted landmarks for new images.
|
||||||
|
#
|
||||||
|
# It is required that "face_detection_short_range.onnxruntime" is available at
|
||||||
|
# "mediapipe/modules/face_detection/face_detection_short_range.onnxruntime"
|
||||||
|
# path during execution.
|
||||||
|
#
|
||||||
|
# It is required that "face_landmark.onnxruntime" is available at
|
||||||
|
# "mediapipe/modules/face_landmark/face_landmark.onnxruntime"
|
||||||
|
# path during execution if `with_attention` is not set or set to `false`.
|
||||||
|
#
|
||||||
|
# It is required that "face_landmark_with_attention.onnxruntime" is available at
|
||||||
|
# "mediapipe/modules/face_landmark/face_landmark_with_attention.onnxruntime"
|
||||||
|
# path during execution if `with_attention` is set to `true`.
|
||||||
|
#
|
||||||
|
# EXAMPLE:
|
||||||
|
# node {
|
||||||
|
# calculator: "FaceLandmarkFrontOnnxCUDA"
|
||||||
|
# input_stream: "IMAGE:image"
|
||||||
|
# input_side_packet: "NUM_FACES:num_faces"
|
||||||
|
# input_side_packet: "USE_PREV_LANDMARKS:use_prev_landmarks"
|
||||||
|
# input_side_packet: "WITH_ATTENTION:with_attention"
|
||||||
|
# output_stream: "LANDMARKS:multi_face_landmarks"
|
||||||
|
# }
|
||||||
|
|
||||||
|
type: "FaceLandmarkFrontOnnxCUDA"
|
||||||
|
|
||||||
|
# CPU image. (ImageFrame)
|
||||||
|
input_stream: "IMAGE:image"
|
||||||
|
|
||||||
|
# Max number of faces to detect/track. (int)
|
||||||
|
input_side_packet: "NUM_FACES:num_faces"
|
||||||
|
|
||||||
|
# Whether landmarks on the previous image should be used to help localize
|
||||||
|
# landmarks on the current image. (bool)
|
||||||
|
input_side_packet: "USE_PREV_LANDMARKS:use_prev_landmarks"
|
||||||
|
|
||||||
|
# Whether to run face mesh model with attention on lips and eyes. (bool)
|
||||||
|
# Attention provides more accuracy on lips and eye regions as well as iris
|
||||||
|
# landmarks.
|
||||||
|
input_side_packet: "WITH_ATTENTION:with_attention"
|
||||||
|
|
||||||
|
# Collection of detected/predicted faces, each represented as a list of 468 face
|
||||||
|
# landmarks. (std::vector<NormalizedLandmarkList>)
|
||||||
|
# NOTE: there will not be an output packet in the LANDMARKS stream for this
|
||||||
|
# particular timestamp if none of faces detected. However, the MediaPipe
|
||||||
|
# framework will internally inform the downstream calculators of the absence of
|
||||||
|
# this packet so that they don't wait for it unnecessarily.
|
||||||
|
output_stream: "LANDMARKS:multi_face_landmarks"
|
||||||
|
|
||||||
|
# Extra outputs (for debugging, for instance).
|
||||||
|
# Detected faces. (std::vector<Detection>)
|
||||||
|
output_stream: "DETECTIONS:face_detections"
|
||||||
|
# Regions of interest calculated based on landmarks.
|
||||||
|
# (std::vector<NormalizedRect>)
|
||||||
|
output_stream: "ROIS_FROM_LANDMARKS:face_rects_from_landmarks"
|
||||||
|
# Regions of interest calculated based on face detections.
|
||||||
|
# (std::vector<NormalizedRect>)
|
||||||
|
output_stream: "ROIS_FROM_DETECTIONS:face_rects_from_detections"
|
||||||
|
|
||||||
|
# When the optional input side packet "use_prev_landmarks" is either absent or
|
||||||
|
# set to true, uses the landmarks on the previous image to help localize
|
||||||
|
# landmarks on the current image.
|
||||||
|
node {
|
||||||
|
calculator: "GateCalculator"
|
||||||
|
input_side_packet: "ALLOW:use_prev_landmarks"
|
||||||
|
input_stream: "prev_face_rects_from_landmarks"
|
||||||
|
output_stream: "gated_prev_face_rects_from_landmarks"
|
||||||
|
options: {
|
||||||
|
[mediapipe.GateCalculatorOptions.ext] {
|
||||||
|
allow: true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Determines if an input vector of NormalizedRect has a size greater than or
|
||||||
|
# equal to the provided num_faces.
|
||||||
|
node {
|
||||||
|
calculator: "NormalizedRectVectorHasMinSizeCalculator"
|
||||||
|
input_stream: "ITERABLE:gated_prev_face_rects_from_landmarks"
|
||||||
|
input_side_packet: "num_faces"
|
||||||
|
output_stream: "prev_has_enough_faces"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Drops the incoming image if enough faces have already been identified from the
|
||||||
|
# previous image. Otherwise, passes the incoming image through to trigger a new
|
||||||
|
# round of face detection.
|
||||||
|
node {
|
||||||
|
calculator: "GateCalculator"
|
||||||
|
input_stream: "image"
|
||||||
|
input_stream: "DISALLOW:prev_has_enough_faces"
|
||||||
|
output_stream: "gated_image"
|
||||||
|
options: {
|
||||||
|
[mediapipe.GateCalculatorOptions.ext] {
|
||||||
|
empty_packets_as_allow: true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Detects faces.
|
||||||
|
node {
|
||||||
|
calculator: "FaceDetectionShortRangeOnnxCUDA"
|
||||||
|
input_stream: "IMAGE:gated_image"
|
||||||
|
output_stream: "DETECTIONS:all_face_detections"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Makes sure there are no more detections than the provided num_faces.
|
||||||
|
node {
|
||||||
|
calculator: "ClipDetectionVectorSizeCalculator"
|
||||||
|
input_stream: "all_face_detections"
|
||||||
|
output_stream: "face_detections"
|
||||||
|
input_side_packet: "num_faces"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Calculate size of the image.
|
||||||
|
node {
|
||||||
|
calculator: "ImagePropertiesCalculator"
|
||||||
|
input_stream: "IMAGE:gated_image"
|
||||||
|
output_stream: "SIZE:gated_image_size"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Outputs each element of face_detections at a fake timestamp for the rest of
|
||||||
|
# the graph to process. Clones the image size packet for each face_detection at
|
||||||
|
# the fake timestamp. At the end of the loop, outputs the BATCH_END timestamp
|
||||||
|
# for downstream calculators to inform them that all elements in the vector have
|
||||||
|
# been processed.
|
||||||
|
node {
|
||||||
|
calculator: "BeginLoopDetectionCalculator"
|
||||||
|
input_stream: "ITERABLE:face_detections"
|
||||||
|
input_stream: "CLONE:gated_image_size"
|
||||||
|
output_stream: "ITEM:face_detection"
|
||||||
|
output_stream: "CLONE:detections_loop_image_size"
|
||||||
|
output_stream: "BATCH_END:detections_loop_end_timestamp"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Calculates region of interest based on face detections, so that can be used
|
||||||
|
# to detect landmarks.
|
||||||
|
node {
|
||||||
|
calculator: "FaceDetectionFrontDetectionToRoi"
|
||||||
|
input_stream: "DETECTION:face_detection"
|
||||||
|
input_stream: "IMAGE_SIZE:detections_loop_image_size"
|
||||||
|
output_stream: "ROI:face_rect_from_detection"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Collects a NormalizedRect for each face into a vector. Upon receiving the
|
||||||
|
# BATCH_END timestamp, outputs the vector of NormalizedRect at the BATCH_END
|
||||||
|
# timestamp.
|
||||||
|
node {
|
||||||
|
calculator: "EndLoopNormalizedRectCalculator"
|
||||||
|
input_stream: "ITEM:face_rect_from_detection"
|
||||||
|
input_stream: "BATCH_END:detections_loop_end_timestamp"
|
||||||
|
output_stream: "ITERABLE:face_rects_from_detections"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Performs association between NormalizedRect vector elements from previous
|
||||||
|
# image and rects based on face detections from the current image. This
|
||||||
|
# calculator ensures that the output face_rects vector doesn't contain
|
||||||
|
# overlapping regions based on the specified min_similarity_threshold.
|
||||||
|
node {
|
||||||
|
calculator: "AssociationNormRectCalculator"
|
||||||
|
input_stream: "face_rects_from_detections"
|
||||||
|
input_stream: "gated_prev_face_rects_from_landmarks"
|
||||||
|
output_stream: "face_rects"
|
||||||
|
options: {
|
||||||
|
[mediapipe.AssociationCalculatorOptions.ext] {
|
||||||
|
min_similarity_threshold: 0.5
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Calculate size of the image.
|
||||||
|
node {
|
||||||
|
calculator: "ImagePropertiesCalculator"
|
||||||
|
input_stream: "IMAGE:image"
|
||||||
|
output_stream: "SIZE:image_size"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Outputs each element of face_rects at a fake timestamp for the rest of the
|
||||||
|
# graph to process. Clones image and image size packets for each
|
||||||
|
# single_face_rect at the fake timestamp. At the end of the loop, outputs the
|
||||||
|
# BATCH_END timestamp for downstream calculators to inform them that all
|
||||||
|
# elements in the vector have been processed.
|
||||||
|
node {
|
||||||
|
calculator: "BeginLoopNormalizedRectCalculator"
|
||||||
|
input_stream: "ITERABLE:face_rects"
|
||||||
|
input_stream: "CLONE:0:image"
|
||||||
|
input_stream: "CLONE:1:image_size"
|
||||||
|
output_stream: "ITEM:face_rect"
|
||||||
|
output_stream: "CLONE:0:landmarks_loop_image"
|
||||||
|
output_stream: "CLONE:1:landmarks_loop_image_size"
|
||||||
|
output_stream: "BATCH_END:landmarks_loop_end_timestamp"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Detects face landmarks within specified region of interest of the image.
|
||||||
|
node {
|
||||||
|
calculator: "FaceLandmarkOnnxCUDA"
|
||||||
|
input_stream: "IMAGE:landmarks_loop_image"
|
||||||
|
input_stream: "ROI:face_rect"
|
||||||
|
input_side_packet: "WITH_ATTENTION:with_attention"
|
||||||
|
output_stream: "LANDMARKS:face_landmarks"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Calculates region of interest based on face landmarks, so that can be reused
|
||||||
|
# for subsequent image.
|
||||||
|
node {
|
||||||
|
calculator: "FaceLandmarkLandmarksToRoi"
|
||||||
|
input_stream: "LANDMARKS:face_landmarks"
|
||||||
|
input_stream: "IMAGE_SIZE:landmarks_loop_image_size"
|
||||||
|
output_stream: "ROI:face_rect_from_landmarks"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Collects a set of landmarks for each face into a vector. Upon receiving the
|
||||||
|
# BATCH_END timestamp, outputs the vector of landmarks at the BATCH_END
|
||||||
|
# timestamp.
|
||||||
|
node {
|
||||||
|
calculator: "EndLoopNormalizedLandmarkListVectorCalculator"
|
||||||
|
input_stream: "ITEM:face_landmarks"
|
||||||
|
input_stream: "BATCH_END:landmarks_loop_end_timestamp"
|
||||||
|
output_stream: "ITERABLE:multi_face_landmarks"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Collects a NormalizedRect for each face into a vector. Upon receiving the
|
||||||
|
# BATCH_END timestamp, outputs the vector of NormalizedRect at the BATCH_END
|
||||||
|
# timestamp.
|
||||||
|
node {
|
||||||
|
calculator: "EndLoopNormalizedRectCalculator"
|
||||||
|
input_stream: "ITEM:face_rect_from_landmarks"
|
||||||
|
input_stream: "BATCH_END:landmarks_loop_end_timestamp"
|
||||||
|
output_stream: "ITERABLE:face_rects_from_landmarks"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Caches face rects calculated from landmarks, and upon the arrival of the next
|
||||||
|
# input image, sends out the cached rects with timestamps replaced by that of
|
||||||
|
# the input image, essentially generating a packet that carries the previous
|
||||||
|
# face rects. Note that upon the arrival of the very first input image, a
|
||||||
|
# timestamp bound update occurs to jump start the feedback loop.
|
||||||
|
node {
|
||||||
|
calculator: "PreviousLoopbackCalculator"
|
||||||
|
input_stream: "MAIN:image"
|
||||||
|
input_stream: "LOOP:face_rects_from_landmarks"
|
||||||
|
input_stream_info: {
|
||||||
|
tag_index: "LOOP"
|
||||||
|
back_edge: true
|
||||||
|
}
|
||||||
|
output_stream: "PREV_LOOP:prev_face_rects_from_landmarks"
|
||||||
|
}
|
|
@ -0,0 +1,247 @@
|
||||||
|
# MediaPipe graph to detect/predict face landmarks. (CPU input, and inference is
|
||||||
|
# executed with onnxruntime on tensorrt.) This graph tries to skip face detection as much as possible
|
||||||
|
# by using previously detected/predicted landmarks for new images.
|
||||||
|
#
|
||||||
|
# It is required that "face_detection_short_range.onnxruntime" is available at
|
||||||
|
# "mediapipe/modules/face_detection/face_detection_short_range.onnxruntime"
|
||||||
|
# path during execution.
|
||||||
|
#
|
||||||
|
# It is required that "face_landmark.onnxruntime" is available at
|
||||||
|
# "mediapipe/modules/face_landmark/face_landmark.onnxruntime"
|
||||||
|
# path during execution if `with_attention` is not set or set to `false`.
|
||||||
|
#
|
||||||
|
# It is required that "face_landmark_with_attention.onnxruntime" is available at
|
||||||
|
# "mediapipe/modules/face_landmark/face_landmark_with_attention.onnxruntime"
|
||||||
|
# path during execution if `with_attention` is set to `true`.
|
||||||
|
#
|
||||||
|
# EXAMPLE:
|
||||||
|
# node {
|
||||||
|
# calculator: "FaceLandmarkFrontTensorRT"
|
||||||
|
# input_stream: "IMAGE:image"
|
||||||
|
# input_side_packet: "NUM_FACES:num_faces"
|
||||||
|
# input_side_packet: "USE_PREV_LANDMARKS:use_prev_landmarks"
|
||||||
|
# input_side_packet: "WITH_ATTENTION:with_attention"
|
||||||
|
# output_stream: "LANDMARKS:multi_face_landmarks"
|
||||||
|
# }
|
||||||
|
|
||||||
|
type: "FaceLandmarkFrontTensorRT"
|
||||||
|
|
||||||
|
# CPU image. (ImageFrame)
|
||||||
|
input_stream: "IMAGE:image"
|
||||||
|
|
||||||
|
# Max number of faces to detect/track. (int)
|
||||||
|
input_side_packet: "NUM_FACES:num_faces"
|
||||||
|
|
||||||
|
# Whether landmarks on the previous image should be used to help localize
|
||||||
|
# landmarks on the current image. (bool)
|
||||||
|
input_side_packet: "USE_PREV_LANDMARKS:use_prev_landmarks"
|
||||||
|
|
||||||
|
# Whether to run face mesh model with attention on lips and eyes. (bool)
|
||||||
|
# Attention provides more accuracy on lips and eye regions as well as iris
|
||||||
|
# landmarks.
|
||||||
|
input_side_packet: "WITH_ATTENTION:with_attention"
|
||||||
|
|
||||||
|
# Collection of detected/predicted faces, each represented as a list of 468 face
|
||||||
|
# landmarks. (std::vector<NormalizedLandmarkList>)
|
||||||
|
# NOTE: there will not be an output packet in the LANDMARKS stream for this
|
||||||
|
# particular timestamp if none of faces detected. However, the MediaPipe
|
||||||
|
# framework will internally inform the downstream calculators of the absence of
|
||||||
|
# this packet so that they don't wait for it unnecessarily.
|
||||||
|
output_stream: "LANDMARKS:multi_face_landmarks"
|
||||||
|
|
||||||
|
# Extra outputs (for debugging, for instance).
|
||||||
|
# Detected faces. (std::vector<Detection>)
|
||||||
|
output_stream: "DETECTIONS:face_detections"
|
||||||
|
# Regions of interest calculated based on landmarks.
|
||||||
|
# (std::vector<NormalizedRect>)
|
||||||
|
output_stream: "ROIS_FROM_LANDMARKS:face_rects_from_landmarks"
|
||||||
|
# Regions of interest calculated based on face detections.
|
||||||
|
# (std::vector<NormalizedRect>)
|
||||||
|
output_stream: "ROIS_FROM_DETECTIONS:face_rects_from_detections"
|
||||||
|
|
||||||
|
# When the optional input side packet "use_prev_landmarks" is either absent or
|
||||||
|
# set to true, uses the landmarks on the previous image to help localize
|
||||||
|
# landmarks on the current image.
|
||||||
|
node {
|
||||||
|
calculator: "GateCalculator"
|
||||||
|
input_side_packet: "ALLOW:use_prev_landmarks"
|
||||||
|
input_stream: "prev_face_rects_from_landmarks"
|
||||||
|
output_stream: "gated_prev_face_rects_from_landmarks"
|
||||||
|
options: {
|
||||||
|
[mediapipe.GateCalculatorOptions.ext] {
|
||||||
|
allow: true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Determines if an input vector of NormalizedRect has a size greater than or
|
||||||
|
# equal to the provided num_faces.
|
||||||
|
node {
|
||||||
|
calculator: "NormalizedRectVectorHasMinSizeCalculator"
|
||||||
|
input_stream: "ITERABLE:gated_prev_face_rects_from_landmarks"
|
||||||
|
input_side_packet: "num_faces"
|
||||||
|
output_stream: "prev_has_enough_faces"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Drops the incoming image if enough faces have already been identified from the
|
||||||
|
# previous image. Otherwise, passes the incoming image through to trigger a new
|
||||||
|
# round of face detection.
|
||||||
|
node {
|
||||||
|
calculator: "GateCalculator"
|
||||||
|
input_stream: "image"
|
||||||
|
input_stream: "DISALLOW:prev_has_enough_faces"
|
||||||
|
output_stream: "gated_image"
|
||||||
|
options: {
|
||||||
|
[mediapipe.GateCalculatorOptions.ext] {
|
||||||
|
empty_packets_as_allow: true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Detects faces.
|
||||||
|
node {
|
||||||
|
calculator: "FaceDetectionShortRangeOnnxTensorRT"
|
||||||
|
input_stream: "IMAGE:gated_image"
|
||||||
|
output_stream: "DETECTIONS:all_face_detections"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Makes sure there are no more detections than the provided num_faces.
|
||||||
|
node {
|
||||||
|
calculator: "ClipDetectionVectorSizeCalculator"
|
||||||
|
input_stream: "all_face_detections"
|
||||||
|
output_stream: "face_detections"
|
||||||
|
input_side_packet: "num_faces"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Calculate size of the image.
|
||||||
|
node {
|
||||||
|
calculator: "ImagePropertiesCalculator"
|
||||||
|
input_stream: "IMAGE:gated_image"
|
||||||
|
output_stream: "SIZE:gated_image_size"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Outputs each element of face_detections at a fake timestamp for the rest of
|
||||||
|
# the graph to process. Clones the image size packet for each face_detection at
|
||||||
|
# the fake timestamp. At the end of the loop, outputs the BATCH_END timestamp
|
||||||
|
# for downstream calculators to inform them that all elements in the vector have
|
||||||
|
# been processed.
|
||||||
|
node {
|
||||||
|
calculator: "BeginLoopDetectionCalculator"
|
||||||
|
input_stream: "ITERABLE:face_detections"
|
||||||
|
input_stream: "CLONE:gated_image_size"
|
||||||
|
output_stream: "ITEM:face_detection"
|
||||||
|
output_stream: "CLONE:detections_loop_image_size"
|
||||||
|
output_stream: "BATCH_END:detections_loop_end_timestamp"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Calculates region of interest based on face detections, so that can be used
|
||||||
|
# to detect landmarks.
|
||||||
|
node {
|
||||||
|
calculator: "FaceDetectionFrontDetectionToRoi"
|
||||||
|
input_stream: "DETECTION:face_detection"
|
||||||
|
input_stream: "IMAGE_SIZE:detections_loop_image_size"
|
||||||
|
output_stream: "ROI:face_rect_from_detection"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Collects a NormalizedRect for each face into a vector. Upon receiving the
|
||||||
|
# BATCH_END timestamp, outputs the vector of NormalizedRect at the BATCH_END
|
||||||
|
# timestamp.
|
||||||
|
node {
|
||||||
|
calculator: "EndLoopNormalizedRectCalculator"
|
||||||
|
input_stream: "ITEM:face_rect_from_detection"
|
||||||
|
input_stream: "BATCH_END:detections_loop_end_timestamp"
|
||||||
|
output_stream: "ITERABLE:face_rects_from_detections"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Performs association between NormalizedRect vector elements from previous
|
||||||
|
# image and rects based on face detections from the current image. This
|
||||||
|
# calculator ensures that the output face_rects vector doesn't contain
|
||||||
|
# overlapping regions based on the specified min_similarity_threshold.
|
||||||
|
node {
|
||||||
|
calculator: "AssociationNormRectCalculator"
|
||||||
|
input_stream: "face_rects_from_detections"
|
||||||
|
input_stream: "gated_prev_face_rects_from_landmarks"
|
||||||
|
output_stream: "face_rects"
|
||||||
|
options: {
|
||||||
|
[mediapipe.AssociationCalculatorOptions.ext] {
|
||||||
|
min_similarity_threshold: 0.5
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Calculate size of the image.
|
||||||
|
node {
|
||||||
|
calculator: "ImagePropertiesCalculator"
|
||||||
|
input_stream: "IMAGE:image"
|
||||||
|
output_stream: "SIZE:image_size"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Outputs each element of face_rects at a fake timestamp for the rest of the
|
||||||
|
# graph to process. Clones image and image size packets for each
|
||||||
|
# single_face_rect at the fake timestamp. At the end of the loop, outputs the
|
||||||
|
# BATCH_END timestamp for downstream calculators to inform them that all
|
||||||
|
# elements in the vector have been processed.
|
||||||
|
node {
|
||||||
|
calculator: "BeginLoopNormalizedRectCalculator"
|
||||||
|
input_stream: "ITERABLE:face_rects"
|
||||||
|
input_stream: "CLONE:0:image"
|
||||||
|
input_stream: "CLONE:1:image_size"
|
||||||
|
output_stream: "ITEM:face_rect"
|
||||||
|
output_stream: "CLONE:0:landmarks_loop_image"
|
||||||
|
output_stream: "CLONE:1:landmarks_loop_image_size"
|
||||||
|
output_stream: "BATCH_END:landmarks_loop_end_timestamp"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Detects face landmarks within specified region of interest of the image.
|
||||||
|
node {
|
||||||
|
calculator: "FaceLandmarkOnnxTensorRT"
|
||||||
|
input_stream: "IMAGE:landmarks_loop_image"
|
||||||
|
input_stream: "ROI:face_rect"
|
||||||
|
input_side_packet: "WITH_ATTENTION:with_attention"
|
||||||
|
output_stream: "LANDMARKS:face_landmarks"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Calculates region of interest based on face landmarks, so that can be reused
|
||||||
|
# for subsequent image.
|
||||||
|
node {
|
||||||
|
calculator: "FaceLandmarkLandmarksToRoi"
|
||||||
|
input_stream: "LANDMARKS:face_landmarks"
|
||||||
|
input_stream: "IMAGE_SIZE:landmarks_loop_image_size"
|
||||||
|
output_stream: "ROI:face_rect_from_landmarks"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Collects a set of landmarks for each face into a vector. Upon receiving the
|
||||||
|
# BATCH_END timestamp, outputs the vector of landmarks at the BATCH_END
|
||||||
|
# timestamp.
|
||||||
|
node {
|
||||||
|
calculator: "EndLoopNormalizedLandmarkListVectorCalculator"
|
||||||
|
input_stream: "ITEM:face_landmarks"
|
||||||
|
input_stream: "BATCH_END:landmarks_loop_end_timestamp"
|
||||||
|
output_stream: "ITERABLE:multi_face_landmarks"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Collects a NormalizedRect for each face into a vector. Upon receiving the
|
||||||
|
# BATCH_END timestamp, outputs the vector of NormalizedRect at the BATCH_END
|
||||||
|
# timestamp.
|
||||||
|
node {
|
||||||
|
calculator: "EndLoopNormalizedRectCalculator"
|
||||||
|
input_stream: "ITEM:face_rect_from_landmarks"
|
||||||
|
input_stream: "BATCH_END:landmarks_loop_end_timestamp"
|
||||||
|
output_stream: "ITERABLE:face_rects_from_landmarks"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Caches face rects calculated from landmarks, and upon the arrival of the next
|
||||||
|
# input image, sends out the cached rects with timestamps replaced by that of
|
||||||
|
# the input image, essentially generating a packet that carries the previous
|
||||||
|
# face rects. Note that upon the arrival of the very first input image, a
|
||||||
|
# timestamp bound update occurs to jump start the feedback loop.
|
||||||
|
node {
|
||||||
|
calculator: "PreviousLoopbackCalculator"
|
||||||
|
input_stream: "MAIN:image"
|
||||||
|
input_stream: "LOOP:face_rects_from_landmarks"
|
||||||
|
input_stream_info: {
|
||||||
|
tag_index: "LOOP"
|
||||||
|
back_edge: true
|
||||||
|
}
|
||||||
|
output_stream: "PREV_LOOP:prev_face_rects_from_landmarks"
|
||||||
|
}
|
166
mediapipe/modules/face_landmark/face_landmark_onnx_cuda.pbtxt
Normal file
166
mediapipe/modules/face_landmark/face_landmark_onnx_cuda.pbtxt
Normal file
|
@ -0,0 +1,166 @@
|
||||||
|
# MediaPipe graph to detect/predict face landmarks. (CPU input, and inference is
|
||||||
|
# executed with onnxruntime on cuda.)
|
||||||
|
#
|
||||||
|
# It is required that "face_landmark.onnx" is available at
|
||||||
|
# "mediapipe/modules/face_landmark/face_landmark.onnx"
|
||||||
|
# path during execution if `with_attention` is not set or set to `false`.
|
||||||
|
#
|
||||||
|
# It is required that "face_landmark_with_attention.onnx" is available at
|
||||||
|
# "mediapipe/modules/face_landmark/face_landmark_with_attention.onnx"
|
||||||
|
# path during execution if `with_attention` is set to `true`.
|
||||||
|
#
|
||||||
|
# EXAMPLE:
|
||||||
|
# node {
|
||||||
|
# calculator: "FaceLandmarkOnnxCUDA"
|
||||||
|
# input_stream: "IMAGE:image"
|
||||||
|
# input_stream: "ROI:face_roi"
|
||||||
|
# input_side_packet: "WITH_ATTENTION:with_attention"
|
||||||
|
# output_stream: "LANDMARKS:face_landmarks"
|
||||||
|
# }
|
||||||
|
|
||||||
|
type: "FaceLandmarkOnnxCUDA"
|
||||||
|
|
||||||
|
# CPU image. (ImageFrame)
|
||||||
|
input_stream: "IMAGE:image"
|
||||||
|
# ROI (region of interest) within the given image where a face is located.
|
||||||
|
# (NormalizedRect)
|
||||||
|
input_stream: "ROI:roi"
|
||||||
|
# Whether to run face mesh model with attention on lips and eyes. (bool)
|
||||||
|
# Attention provides more accuracy on lips and eye regions as well as iris
|
||||||
|
# landmarks.
|
||||||
|
input_side_packet: "WITH_ATTENTION:with_attention"
|
||||||
|
|
||||||
|
# 468 or 478 facial landmarks within the given ROI. (NormalizedLandmarkList)
|
||||||
|
#
|
||||||
|
# Number of landmarks depends on the WITH_ATTENTION flag. If it's `true` - then
|
||||||
|
# there will be 478 landmarks with refined lips, eyes and irises (10 extra
|
||||||
|
# landmarks are for irises), otherwise 468 non-refined landmarks are returned.
|
||||||
|
#
|
||||||
|
# NOTE: if a face is not present within the given ROI, for this particular
|
||||||
|
# timestamp there will not be an output packet in the LANDMARKS stream. However,
|
||||||
|
# the MediaPipe framework will internally inform the downstream calculators of
|
||||||
|
# the absence of this packet so that they don't wait for it unnecessarily.
|
||||||
|
output_stream: "LANDMARKS:face_landmarks"
|
||||||
|
|
||||||
|
# Transforms the input image into a 192x192 tensor.
|
||||||
|
node: {
|
||||||
|
calculator: "ImageToTensorCalculator"
|
||||||
|
input_stream: "IMAGE:image"
|
||||||
|
input_stream: "NORM_RECT:roi"
|
||||||
|
output_stream: "TENSORS:input_tensors"
|
||||||
|
options: {
|
||||||
|
[mediapipe.ImageToTensorCalculatorOptions.ext] {
|
||||||
|
output_tensor_width: 192
|
||||||
|
output_tensor_height: 192
|
||||||
|
output_tensor_float_range {
|
||||||
|
min: 0.0
|
||||||
|
max: 1.0
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
node {
|
||||||
|
calculator: "InferenceCalculator"
|
||||||
|
input_stream: "TENSORS:input_tensors"
|
||||||
|
output_stream: "TENSORS:output_tensors"
|
||||||
|
options: {
|
||||||
|
[mediapipe.InferenceCalculatorOptions.ext] {
|
||||||
|
delegate { cuda {} }
|
||||||
|
model_path: "mediapipe/modules/face_landmark/face_landmark.onnx"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Splits a vector of tensors into landmark tensors and face flag tensor.
|
||||||
|
node {
|
||||||
|
calculator: "SwitchContainer"
|
||||||
|
input_side_packet: "ENABLE:with_attention"
|
||||||
|
input_stream: "output_tensors"
|
||||||
|
output_stream: "landmark_tensors"
|
||||||
|
output_stream: "face_flag_tensor"
|
||||||
|
options: {
|
||||||
|
[mediapipe.SwitchContainerOptions.ext] {
|
||||||
|
contained_node: {
|
||||||
|
calculator: "SplitTensorVectorCalculator"
|
||||||
|
options: {
|
||||||
|
[mediapipe.SplitVectorCalculatorOptions.ext] {
|
||||||
|
ranges: { begin: 0 end: 1 }
|
||||||
|
ranges: { begin: 1 end: 2 }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
contained_node: {
|
||||||
|
calculator: "SplitTensorVectorCalculator"
|
||||||
|
options: {
|
||||||
|
[mediapipe.SplitVectorCalculatorOptions.ext] {
|
||||||
|
ranges: { begin: 0 end: 6 }
|
||||||
|
ranges: { begin: 6 end: 7 }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Converts the face-flag tensor into a float that represents the confidence
|
||||||
|
# score of face presence.
|
||||||
|
node {
|
||||||
|
calculator: "TensorsToFloatsCalculator"
|
||||||
|
input_stream: "TENSORS:face_flag_tensor"
|
||||||
|
output_stream: "FLOAT:face_presence_score"
|
||||||
|
options {
|
||||||
|
[mediapipe.TensorsToFloatsCalculatorOptions.ext] {
|
||||||
|
activation: SIGMOID
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Applies a threshold to the confidence score to determine whether a face is
|
||||||
|
# present.
|
||||||
|
node {
|
||||||
|
calculator: "ThresholdingCalculator"
|
||||||
|
input_stream: "FLOAT:face_presence_score"
|
||||||
|
output_stream: "FLAG:face_presence"
|
||||||
|
options: {
|
||||||
|
[mediapipe.ThresholdingCalculatorOptions.ext] {
|
||||||
|
threshold: 0.5
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Drop landmarks tensors if face is not present.
|
||||||
|
node {
|
||||||
|
calculator: "GateCalculator"
|
||||||
|
input_stream: "landmark_tensors"
|
||||||
|
input_stream: "ALLOW:face_presence"
|
||||||
|
output_stream: "ensured_landmark_tensors"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Decodes the landmark tensors into a vector of landmarks, where the landmark
|
||||||
|
# coordinates are normalized by the size of the input image to the model.
|
||||||
|
node {
|
||||||
|
calculator: "SwitchContainer"
|
||||||
|
input_side_packet: "ENABLE:with_attention"
|
||||||
|
input_stream: "TENSORS:ensured_landmark_tensors"
|
||||||
|
output_stream: "LANDMARKS:landmarks"
|
||||||
|
options: {
|
||||||
|
[mediapipe.SwitchContainerOptions.ext] {
|
||||||
|
contained_node: {
|
||||||
|
calculator: "TensorsToFaceLandmarks"
|
||||||
|
}
|
||||||
|
contained_node: {
|
||||||
|
calculator: "TensorsToFaceLandmarksWithAttention"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Projects the landmarks from the cropped face image to the corresponding
|
||||||
|
# locations on the full image before cropping (input to the graph).
|
||||||
|
node {
|
||||||
|
calculator: "LandmarkProjectionCalculator"
|
||||||
|
input_stream: "NORM_LANDMARKS:landmarks"
|
||||||
|
input_stream: "NORM_RECT:roi"
|
||||||
|
output_stream: "NORM_LANDMARKS:face_landmarks"
|
||||||
|
}
|
|
@ -0,0 +1,166 @@
|
||||||
|
# MediaPipe graph to detect/predict face landmarks. (CPU input, and inference is
|
||||||
|
# executed with onnxruntime on TensorRT.)
|
||||||
|
#
|
||||||
|
# It is required that "face_landmark.onnx" is available at
|
||||||
|
# "mediapipe/modules/face_landmark/face_landmark.onnx"
|
||||||
|
# path during execution if `with_attention` is not set or set to `false`.
|
||||||
|
#
|
||||||
|
# It is required that "face_landmark_with_attention.onnx" is available at
|
||||||
|
# "mediapipe/modules/face_landmark/face_landmark_with_attention.onnx"
|
||||||
|
# path during execution if `with_attention` is set to `true`.
|
||||||
|
#
|
||||||
|
# EXAMPLE:
|
||||||
|
# node {
|
||||||
|
# calculator: "FaceLandmarkOnnxTensorrt"
|
||||||
|
# input_stream: "IMAGE:image"
|
||||||
|
# input_stream: "ROI:face_roi"
|
||||||
|
# input_side_packet: "WITH_ATTENTION:with_attention"
|
||||||
|
# output_stream: "LANDMARKS:face_landmarks"
|
||||||
|
# }
|
||||||
|
|
||||||
|
type: "FaceLandmarkOnnxTensorrt"
|
||||||
|
|
||||||
|
# CPU image. (ImageFrame)
|
||||||
|
input_stream: "IMAGE:image"
|
||||||
|
# ROI (region of interest) within the given image where a face is located.
|
||||||
|
# (NormalizedRect)
|
||||||
|
input_stream: "ROI:roi"
|
||||||
|
# Whether to run face mesh model with attention on lips and eyes. (bool)
|
||||||
|
# Attention provides more accuracy on lips and eye regions as well as iris
|
||||||
|
# landmarks.
|
||||||
|
input_side_packet: "WITH_ATTENTION:with_attention"
|
||||||
|
|
||||||
|
# 468 or 478 facial landmarks within the given ROI. (NormalizedLandmarkList)
|
||||||
|
#
|
||||||
|
# Number of landmarks depends on the WITH_ATTENTION flag. If it's `true` - then
|
||||||
|
# there will be 478 landmarks with refined lips, eyes and irises (10 extra
|
||||||
|
# landmarks are for irises), otherwise 468 non-refined landmarks are returned.
|
||||||
|
#
|
||||||
|
# NOTE: if a face is not present within the given ROI, for this particular
|
||||||
|
# timestamp there will not be an output packet in the LANDMARKS stream. However,
|
||||||
|
# the MediaPipe framework will internally inform the downstream calculators of
|
||||||
|
# the absence of this packet so that they don't wait for it unnecessarily.
|
||||||
|
output_stream: "LANDMARKS:face_landmarks"
|
||||||
|
|
||||||
|
# Transforms the input image into a 192x192 tensor.
|
||||||
|
node: {
|
||||||
|
calculator: "ImageToTensorCalculator"
|
||||||
|
input_stream: "IMAGE:image"
|
||||||
|
input_stream: "NORM_RECT:roi"
|
||||||
|
output_stream: "TENSORS:input_tensors"
|
||||||
|
options: {
|
||||||
|
[mediapipe.ImageToTensorCalculatorOptions.ext] {
|
||||||
|
output_tensor_width: 192
|
||||||
|
output_tensor_height: 192
|
||||||
|
output_tensor_float_range {
|
||||||
|
min: 0.0
|
||||||
|
max: 1.0
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
node {
|
||||||
|
calculator: "InferenceCalculator"
|
||||||
|
input_stream: "TENSORS:input_tensors"
|
||||||
|
output_stream: "TENSORS:output_tensors"
|
||||||
|
options: {
|
||||||
|
[mediapipe.InferenceCalculatorOptions.ext] {
|
||||||
|
delegate { tensorrt {} }
|
||||||
|
model_path: "mediapipe/modules/face_landmark/face_landmark.onnx"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Splits a vector of tensors into landmark tensors and face flag tensor.
|
||||||
|
node {
|
||||||
|
calculator: "SwitchContainer"
|
||||||
|
input_side_packet: "ENABLE:with_attention"
|
||||||
|
input_stream: "output_tensors"
|
||||||
|
output_stream: "landmark_tensors"
|
||||||
|
output_stream: "face_flag_tensor"
|
||||||
|
options: {
|
||||||
|
[mediapipe.SwitchContainerOptions.ext] {
|
||||||
|
contained_node: {
|
||||||
|
calculator: "SplitTensorVectorCalculator"
|
||||||
|
options: {
|
||||||
|
[mediapipe.SplitVectorCalculatorOptions.ext] {
|
||||||
|
ranges: { begin: 0 end: 1 }
|
||||||
|
ranges: { begin: 1 end: 2 }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
contained_node: {
|
||||||
|
calculator: "SplitTensorVectorCalculator"
|
||||||
|
options: {
|
||||||
|
[mediapipe.SplitVectorCalculatorOptions.ext] {
|
||||||
|
ranges: { begin: 0 end: 6 }
|
||||||
|
ranges: { begin: 6 end: 7 }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Converts the face-flag tensor into a float that represents the confidence
|
||||||
|
# score of face presence.
|
||||||
|
node {
|
||||||
|
calculator: "TensorsToFloatsCalculator"
|
||||||
|
input_stream: "TENSORS:face_flag_tensor"
|
||||||
|
output_stream: "FLOAT:face_presence_score"
|
||||||
|
options {
|
||||||
|
[mediapipe.TensorsToFloatsCalculatorOptions.ext] {
|
||||||
|
activation: SIGMOID
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Applies a threshold to the confidence score to determine whether a face is
|
||||||
|
# present.
|
||||||
|
node {
|
||||||
|
calculator: "ThresholdingCalculator"
|
||||||
|
input_stream: "FLOAT:face_presence_score"
|
||||||
|
output_stream: "FLAG:face_presence"
|
||||||
|
options: {
|
||||||
|
[mediapipe.ThresholdingCalculatorOptions.ext] {
|
||||||
|
threshold: 0.5
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Drop landmarks tensors if face is not present.
|
||||||
|
node {
|
||||||
|
calculator: "GateCalculator"
|
||||||
|
input_stream: "landmark_tensors"
|
||||||
|
input_stream: "ALLOW:face_presence"
|
||||||
|
output_stream: "ensured_landmark_tensors"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Decodes the landmark tensors into a vector of landmarks, where the landmark
|
||||||
|
# coordinates are normalized by the size of the input image to the model.
|
||||||
|
node {
|
||||||
|
calculator: "SwitchContainer"
|
||||||
|
input_side_packet: "ENABLE:with_attention"
|
||||||
|
input_stream: "TENSORS:ensured_landmark_tensors"
|
||||||
|
output_stream: "LANDMARKS:landmarks"
|
||||||
|
options: {
|
||||||
|
[mediapipe.SwitchContainerOptions.ext] {
|
||||||
|
contained_node: {
|
||||||
|
calculator: "TensorsToFaceLandmarks"
|
||||||
|
}
|
||||||
|
contained_node: {
|
||||||
|
calculator: "TensorsToFaceLandmarksWithAttention"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Projects the landmarks from the cropped face image to the corresponding
|
||||||
|
# locations on the full image before cropping (input to the graph).
|
||||||
|
node {
|
||||||
|
calculator: "LandmarkProjectionCalculator"
|
||||||
|
input_stream: "NORM_LANDMARKS:landmarks"
|
||||||
|
input_stream: "NORM_RECT:roi"
|
||||||
|
output_stream: "NORM_LANDMARKS:face_landmarks"
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user