pose detection和landmark支持onnxruntime的cuda和tensorrt

This commit is contained in:
liuyulvv 2022-08-12 09:42:03 +08:00
parent f3bf3ab3e3
commit 008ed46ee0
13 changed files with 1485 additions and 1 deletions

View File

@ -24,6 +24,46 @@ cc_binary(
], ],
) )
cc_binary(
name = "pose_tracking_cpu_fps",
deps = [
"//mediapipe/examples/desktop:demo_run_graph_main_fps",
"//mediapipe/graphs/pose_tracking:pose_tracking_cpu_deps",
],
)
cc_binary(
name = "pose_tracking_onnx_cuda",
deps = [
"//mediapipe/examples/desktop:demo_run_graph_main",
"//mediapipe/graphs/pose_tracking:pose_tracking_onnx_cuda_deps",
],
)
cc_binary(
name = "pose_tracking_onnx_cuda_fps",
deps = [
"//mediapipe/examples/desktop:demo_run_graph_main_fps",
"//mediapipe/graphs/pose_tracking:pose_tracking_onnx_cuda_deps",
],
)
cc_binary(
name = "pose_tracking_onnx_tensorrt",
deps = [
"//mediapipe/examples/desktop:demo_run_graph_main",
"//mediapipe/graphs/pose_tracking:pose_tracking_onnx_tensorrt_deps",
],
)
cc_binary(
name = "pose_tracking_onnx_tensorrt_fps",
deps = [
"//mediapipe/examples/desktop:demo_run_graph_main_fps",
"//mediapipe/graphs/pose_tracking:pose_tracking_onnx_tensorrt_deps",
],
)
# Linux only # Linux only
cc_binary( cc_binary(
name = "pose_tracking_gpu", name = "pose_tracking_gpu",

View File

@ -54,3 +54,37 @@ mediapipe_binary_graph(
output_name = "pose_tracking_cpu.binarypb", output_name = "pose_tracking_cpu.binarypb",
deps = [":pose_tracking_cpu_deps"], deps = [":pose_tracking_cpu_deps"],
) )
cc_library(
name = "pose_tracking_onnx_cuda_deps",
deps = [
"//mediapipe/calculators/core:constant_side_packet_calculator",
"//mediapipe/calculators/core:flow_limiter_calculator",
"//mediapipe/graphs/pose_tracking/subgraphs:pose_renderer_cpu",
"//mediapipe/modules/pose_landmark:pose_landmark_onnx_cuda",
],
)
mediapipe_binary_graph(
name = "pose_tracking_onnx_cuda_binary_graph",
graph = "pose_tracking_onnx_cuda.pbtxt",
output_name = "pose_tracking_onnx_cuda.binarypb",
deps = [":pose_tracking_onnx_cuda_deps"],
)
cc_library(
name = "pose_tracking_onnx_tensorrt_deps",
deps = [
"//mediapipe/calculators/core:constant_side_packet_calculator",
"//mediapipe/calculators/core:flow_limiter_calculator",
"//mediapipe/graphs/pose_tracking/subgraphs:pose_renderer_cpu",
"//mediapipe/modules/pose_landmark:pose_landmark_onnx_tensorrt",
],
)
mediapipe_binary_graph(
name = "pose_tracking_onnx_tensorrt_binary_graph",
graph = "pose_tracking_onnx_tensorrt.pbtxt",
output_name = "pose_tracking_onnx_tensorrt.binarypb",
deps = [":pose_tracking_onnx_tensorrt"],
)

View File

@ -14,7 +14,7 @@ node {
output_side_packet: "PACKET:enable_segmentation" output_side_packet: "PACKET:enable_segmentation"
node_options: { node_options: {
[type.googleapis.com/mediapipe.ConstantSidePacketCalculatorOptions]: { [type.googleapis.com/mediapipe.ConstantSidePacketCalculatorOptions]: {
packet { bool_value: true } packet { bool_value: false }
} }
} }
} }

View File

@ -0,0 +1,63 @@
# MediaPipe graph that performs pose tracking with onnxruntime on cuda.
# CPU buffer. (ImageFrame)
input_stream: "input_video"
# Output image with rendered results. (ImageFrame)
output_stream: "output_video"
# Pose landmarks. (NormalizedLandmarkList)
output_stream: "pose_landmarks"
# Generates side packet to enable segmentation.
node {
calculator: "ConstantSidePacketCalculator"
output_side_packet: "PACKET:enable_segmentation"
node_options: {
[type.googleapis.com/mediapipe.ConstantSidePacketCalculatorOptions]: {
packet { bool_value: false }
}
}
}
# Throttles the images flowing downstream for flow control. It passes through
# the very first incoming image unaltered, and waits for downstream nodes
# (calculators and subgraphs) in the graph to finish their tasks before it
# passes through another image. All images that come in while waiting are
# dropped, limiting the number of in-flight images in most part of the graph to
# 1. This prevents the downstream nodes from queuing up incoming images and data
# excessively, which leads to increased latency and memory usage, unwanted in
# real-time mobile applications. It also eliminates unnecessarily computation,
# e.g., the output produced by a node may get dropped downstream if the
# subsequent nodes are still busy processing previous inputs.
node {
calculator: "FlowLimiterCalculator"
input_stream: "input_video"
input_stream: "FINISHED:output_video"
input_stream_info: {
tag_index: "FINISHED"
back_edge: true
}
output_stream: "throttled_input_video"
}
# Subgraph that detects poses and corresponding landmarks.
node {
calculator: "PoseLandmarkOnnxCUDA"
input_side_packet: "ENABLE_SEGMENTATION:enable_segmentation"
input_stream: "IMAGE:throttled_input_video"
output_stream: "LANDMARKS:pose_landmarks"
output_stream: "SEGMENTATION_MASK:segmentation_mask"
output_stream: "DETECTION:pose_detection"
output_stream: "ROI_FROM_LANDMARKS:roi_from_landmarks"
}
# Subgraph that renders pose-landmark annotation onto the input image.
node {
calculator: "PoseRendererCpu"
input_stream: "IMAGE:throttled_input_video"
input_stream: "LANDMARKS:pose_landmarks"
input_stream: "SEGMENTATION_MASK:segmentation_mask"
input_stream: "DETECTION:pose_detection"
input_stream: "ROI:roi_from_landmarks"
output_stream: "IMAGE:output_video"
}

View File

@ -0,0 +1,63 @@
# MediaPipe graph that performs pose tracking with onnxruntime on tensorrt.
# CPU buffer. (ImageFrame)
input_stream: "input_video"
# Output image with rendered results. (ImageFrame)
output_stream: "output_video"
# Pose landmarks. (NormalizedLandmarkList)
output_stream: "pose_landmarks"
# Generates side packet to enable segmentation.
node {
calculator: "ConstantSidePacketCalculator"
output_side_packet: "PACKET:enable_segmentation"
node_options: {
[type.googleapis.com/mediapipe.ConstantSidePacketCalculatorOptions]: {
packet { bool_value: false }
}
}
}
# Throttles the images flowing downstream for flow control. It passes through
# the very first incoming image unaltered, and waits for downstream nodes
# (calculators and subgraphs) in the graph to finish their tasks before it
# passes through another image. All images that come in while waiting are
# dropped, limiting the number of in-flight images in most part of the graph to
# 1. This prevents the downstream nodes from queuing up incoming images and data
# excessively, which leads to increased latency and memory usage, unwanted in
# real-time mobile applications. It also eliminates unnecessarily computation,
# e.g., the output produced by a node may get dropped downstream if the
# subsequent nodes are still busy processing previous inputs.
node {
calculator: "FlowLimiterCalculator"
input_stream: "input_video"
input_stream: "FINISHED:output_video"
input_stream_info: {
tag_index: "FINISHED"
back_edge: true
}
output_stream: "throttled_input_video"
}
# Subgraph that detects poses and corresponding landmarks.
node {
calculator: "PoseLandmarkOnnxTensorRT"
input_side_packet: "ENABLE_SEGMENTATION:enable_segmentation"
input_stream: "IMAGE:throttled_input_video"
output_stream: "LANDMARKS:pose_landmarks"
output_stream: "SEGMENTATION_MASK:segmentation_mask"
output_stream: "DETECTION:pose_detection"
output_stream: "ROI_FROM_LANDMARKS:roi_from_landmarks"
}
# Subgraph that renders pose-landmark annotation onto the input image.
node {
calculator: "PoseRendererCpu"
input_stream: "IMAGE:throttled_input_video"
input_stream: "LANDMARKS:pose_landmarks"
input_stream: "SEGMENTATION_MASK:segmentation_mask"
input_stream: "DETECTION:pose_detection"
input_stream: "ROI:roi_from_landmarks"
output_stream: "IMAGE:output_video"
}

View File

@ -35,6 +35,34 @@ mediapipe_simple_subgraph(
], ],
) )
mediapipe_simple_subgraph(
name = "pose_detection_onnx_cuda",
graph = "pose_detection_onnx_cuda.pbtxt",
register_as = "PoseDetectionOnnxCUDA",
deps = [
"//mediapipe/calculators/tensor:image_to_tensor_calculator",
"//mediapipe/calculators/tensor:inference_calculator_onnx_cuda",
"//mediapipe/calculators/tensor:tensors_to_detections_calculator",
"//mediapipe/calculators/tflite:ssd_anchors_calculator",
"//mediapipe/calculators/util:detection_letterbox_removal_calculator",
"//mediapipe/calculators/util:non_max_suppression_calculator",
],
)
mediapipe_simple_subgraph(
name = "pose_detection_onnx_tensorrt",
graph = "pose_detection_onnx_tensorrt.pbtxt",
register_as = "PoseDetectionOnnxTensorRT",
deps = [
"//mediapipe/calculators/tensor:image_to_tensor_calculator",
"//mediapipe/calculators/tensor:inference_calculator_onnx_tensorrt",
"//mediapipe/calculators/tensor:tensors_to_detections_calculator",
"//mediapipe/calculators/tflite:ssd_anchors_calculator",
"//mediapipe/calculators/util:detection_letterbox_removal_calculator",
"//mediapipe/calculators/util:non_max_suppression_calculator",
],
)
mediapipe_simple_subgraph( mediapipe_simple_subgraph(
name = "pose_detection_gpu", name = "pose_detection_gpu",
graph = "pose_detection_gpu.pbtxt", graph = "pose_detection_gpu.pbtxt",

View File

@ -0,0 +1,157 @@
# MediaPipe graph to detect poses. (CPU input, and inference is executed with onnxruntime on
# cuda.)
#
# It is required that "pose_detection.onnx" is available at
# "mediapipe/modules/pose_detection/pose_detection.onnx"
# path during execution.
#
# EXAMPLE:
# node {
# calculator: "PoseDetectionOnnxCUDA"
# input_stream: "IMAGE:image"
# output_stream: "DETECTIONS:pose_detections"
# }
type: "PoseDetectionOnnxCUDA"
# CPU image. (ImageFrame)
input_stream: "IMAGE:image"
# Detected poses. (std::vector<Detection>)
# Bounding box in each pose detection is currently set to the bounding box of
# the detected face. However, 4 additional key points are available in each
# detection, which are used to further calculate a (rotated) bounding box that
# encloses the body region of interest. Among the 4 key points, the first two
# are for identifying the full-body region, and the second two for upper body
# only:
#
# Key point 0 - mid hip center
# Key point 1 - point that encodes size & rotation (for full body)
# Key point 2 - mid shoulder center
# Key point 3 - point that encodes size & rotation (for upper body)
#
# NOTE: there will not be an output packet in the DETECTIONS stream for this
# particular timestamp if none of poses detected. However, the MediaPipe
# framework will internally inform the downstream calculators of the absence of
# this packet so that they don't wait for it unnecessarily.
output_stream: "DETECTIONS:detections"
# Transforms the input image into a 224x224 one while keeping the aspect ratio
# (what is expected by the corresponding model), resulting in potential
# letterboxing in the transformed image.
node: {
calculator: "ImageToTensorCalculator"
input_stream: "IMAGE:image"
output_stream: "TENSORS:input_tensors"
output_stream: "LETTERBOX_PADDING:letterbox_padding"
options: {
[mediapipe.ImageToTensorCalculatorOptions.ext] {
output_tensor_width: 224
output_tensor_height: 224
keep_aspect_ratio: true
output_tensor_float_range {
min: -1.0
max: 1.0
}
border_mode: BORDER_ZERO
# If this calculator truly operates in the CPU, then gpu_origin is
# ignored, but if some build switch insists on GPU inference, then we will
# still need to set this.
gpu_origin: TOP_LEFT
}
}
}
# Runs a TensorFlow Lite model on CPU that takes an image tensor and outputs a
# vector of tensors representing, for instance, detection boxes/keypoints and
# scores.
node {
calculator: "InferenceCalculator"
input_stream: "TENSORS:input_tensors"
output_stream: "TENSORS:detection_tensors"
options: {
[mediapipe.InferenceCalculatorOptions.ext] {
model_path: "mediapipe/modules/pose_detection/pose_detection.onnx"
delegate { cuda {} }
}
}
}
# Generates a single side packet containing a vector of SSD anchors based on
# the specification in the options.
node {
calculator: "SsdAnchorsCalculator"
output_side_packet: "anchors"
options: {
[mediapipe.SsdAnchorsCalculatorOptions.ext] {
num_layers: 5
min_scale: 0.1484375
max_scale: 0.75
input_size_height: 224
input_size_width: 224
anchor_offset_x: 0.5
anchor_offset_y: 0.5
strides: 8
strides: 16
strides: 32
strides: 32
strides: 32
aspect_ratios: 1.0
fixed_anchor_size: true
}
}
}
# Decodes the detection tensors generated by the TensorFlow Lite model, based on
# the SSD anchors and the specification in the options, into a vector of
# detections. Each detection describes a detected object.
node {
calculator: "TensorsToDetectionsCalculator"
input_stream: "TENSORS:detection_tensors"
input_side_packet: "ANCHORS:anchors"
output_stream: "DETECTIONS:unfiltered_detections"
options: {
[mediapipe.TensorsToDetectionsCalculatorOptions.ext] {
num_classes: 1
num_boxes: 2254
num_coords: 12
box_coord_offset: 0
keypoint_coord_offset: 4
num_keypoints: 4
num_values_per_keypoint: 2
sigmoid_score: true
score_clipping_thresh: 100.0
reverse_output_order: true
x_scale: 224.0
y_scale: 224.0
h_scale: 224.0
w_scale: 224.0
min_score_thresh: 0.5
}
}
}
# Performs non-max suppression to remove excessive detections.
node {
calculator: "NonMaxSuppressionCalculator"
input_stream: "unfiltered_detections"
output_stream: "filtered_detections"
options: {
[mediapipe.NonMaxSuppressionCalculatorOptions.ext] {
min_suppression_threshold: 0.3
overlap_type: INTERSECTION_OVER_UNION
algorithm: WEIGHTED
}
}
}
# Adjusts detection locations (already normalized to [0.f, 1.f]) on the
# letterboxed image (after image transformation with the FIT scale mode) to the
# corresponding locations on the same image with the letterbox removed (the
# input image to the graph before image transformation).
node {
calculator: "DetectionLetterboxRemovalCalculator"
input_stream: "DETECTIONS:filtered_detections"
input_stream: "LETTERBOX_PADDING:letterbox_padding"
output_stream: "DETECTIONS:detections"
}

View File

@ -0,0 +1,157 @@
# MediaPipe graph to detect poses. (CPU input, and inference is executed with onnxruntime on
# tensorrt.)
#
# It is required that "pose_detection.onnx" is available at
# "mediapipe/modules/pose_detection/pose_detection.onnx"
# path during execution.
#
# EXAMPLE:
# node {
# calculator: "PoseDetectionOnnxTensorRT"
# input_stream: "IMAGE:image"
# output_stream: "DETECTIONS:pose_detections"
# }
type: "PoseDetectionOnnxTensorRT"
# CPU image. (ImageFrame)
input_stream: "IMAGE:image"
# Detected poses. (std::vector<Detection>)
# Bounding box in each pose detection is currently set to the bounding box of
# the detected face. However, 4 additional key points are available in each
# detection, which are used to further calculate a (rotated) bounding box that
# encloses the body region of interest. Among the 4 key points, the first two
# are for identifying the full-body region, and the second two for upper body
# only:
#
# Key point 0 - mid hip center
# Key point 1 - point that encodes size & rotation (for full body)
# Key point 2 - mid shoulder center
# Key point 3 - point that encodes size & rotation (for upper body)
#
# NOTE: there will not be an output packet in the DETECTIONS stream for this
# particular timestamp if none of poses detected. However, the MediaPipe
# framework will internally inform the downstream calculators of the absence of
# this packet so that they don't wait for it unnecessarily.
output_stream: "DETECTIONS:detections"
# Transforms the input image into a 224x224 one while keeping the aspect ratio
# (what is expected by the corresponding model), resulting in potential
# letterboxing in the transformed image.
node: {
calculator: "ImageToTensorCalculator"
input_stream: "IMAGE:image"
output_stream: "TENSORS:input_tensors"
output_stream: "LETTERBOX_PADDING:letterbox_padding"
options: {
[mediapipe.ImageToTensorCalculatorOptions.ext] {
output_tensor_width: 224
output_tensor_height: 224
keep_aspect_ratio: true
output_tensor_float_range {
min: -1.0
max: 1.0
}
border_mode: BORDER_ZERO
# If this calculator truly operates in the CPU, then gpu_origin is
# ignored, but if some build switch insists on GPU inference, then we will
# still need to set this.
gpu_origin: TOP_LEFT
}
}
}
# Runs a TensorFlow Lite model on CPU that takes an image tensor and outputs a
# vector of tensors representing, for instance, detection boxes/keypoints and
# scores.
node {
calculator: "InferenceCalculator"
input_stream: "TENSORS:input_tensors"
output_stream: "TENSORS:detection_tensors"
options: {
[mediapipe.InferenceCalculatorOptions.ext] {
model_path: "mediapipe/modules/pose_detection/pose_detection.onnx"
delegate { tensorrt {} }
}
}
}
# Generates a single side packet containing a vector of SSD anchors based on
# the specification in the options.
node {
calculator: "SsdAnchorsCalculator"
output_side_packet: "anchors"
options: {
[mediapipe.SsdAnchorsCalculatorOptions.ext] {
num_layers: 5
min_scale: 0.1484375
max_scale: 0.75
input_size_height: 224
input_size_width: 224
anchor_offset_x: 0.5
anchor_offset_y: 0.5
strides: 8
strides: 16
strides: 32
strides: 32
strides: 32
aspect_ratios: 1.0
fixed_anchor_size: true
}
}
}
# Decodes the detection tensors generated by the TensorFlow Lite model, based on
# the SSD anchors and the specification in the options, into a vector of
# detections. Each detection describes a detected object.
node {
calculator: "TensorsToDetectionsCalculator"
input_stream: "TENSORS:detection_tensors"
input_side_packet: "ANCHORS:anchors"
output_stream: "DETECTIONS:unfiltered_detections"
options: {
[mediapipe.TensorsToDetectionsCalculatorOptions.ext] {
num_classes: 1
num_boxes: 2254
num_coords: 12
box_coord_offset: 0
keypoint_coord_offset: 4
num_keypoints: 4
num_values_per_keypoint: 2
sigmoid_score: true
score_clipping_thresh: 100.0
reverse_output_order: true
x_scale: 224.0
y_scale: 224.0
h_scale: 224.0
w_scale: 224.0
min_score_thresh: 0.5
}
}
}
# Performs non-max suppression to remove excessive detections.
node {
calculator: "NonMaxSuppressionCalculator"
input_stream: "unfiltered_detections"
output_stream: "filtered_detections"
options: {
[mediapipe.NonMaxSuppressionCalculatorOptions.ext] {
min_suppression_threshold: 0.3
overlap_type: INTERSECTION_OVER_UNION
algorithm: WEIGHTED
}
}
}
# Adjusts detection locations (already normalized to [0.f, 1.f]) on the
# letterboxed image (after image transformation with the FIT scale mode) to the
# corresponding locations on the same image with the letterbox removed (the
# input image to the graph before image transformation).
node {
calculator: "DetectionLetterboxRemovalCalculator"
input_stream: "DETECTIONS:filtered_detections"
input_stream: "LETTERBOX_PADDING:letterbox_padding"
output_stream: "DETECTIONS:detections"
}

View File

@ -61,6 +61,35 @@ mediapipe_simple_subgraph(
], ],
) )
mediapipe_simple_subgraph(
name = "pose_landmark_by_roi_onnx_cuda",
graph = "pose_landmark_by_roi_onnx_cuda.pbtxt",
register_as = "PoseLandmarkByRoiOnnxCUDA",
deps = [
":pose_landmark_model_loader",
":pose_landmarks_and_segmentation_inverse_projection",
":tensors_to_pose_landmarks_and_segmentation",
"//mediapipe/calculators/image:image_properties_calculator",
"//mediapipe/calculators/tensor:image_to_tensor_calculator",
"//mediapipe/calculators/tensor:inference_calculator_onnx_cuda",
],
)
mediapipe_simple_subgraph(
name = "pose_landmark_by_roi_onnx_tensorrt",
graph = "pose_landmark_by_roi_onnx_tensorrt.pbtxt",
register_as = "PoseLandmarkByRoiOnnxTensorRT",
deps = [
":pose_landmark_model_loader",
":pose_landmarks_and_segmentation_inverse_projection",
":tensors_to_pose_landmarks_and_segmentation",
"//mediapipe/calculators/image:image_properties_calculator",
"//mediapipe/calculators/tensor:image_to_tensor_calculator",
"//mediapipe/calculators/tensor:inference_calculator_onnx_tensorrt",
],
)
mediapipe_simple_subgraph( mediapipe_simple_subgraph(
name = "tensors_to_pose_landmarks_and_segmentation", name = "tensors_to_pose_landmarks_and_segmentation",
graph = "tensors_to_pose_landmarks_and_segmentation.pbtxt", graph = "tensors_to_pose_landmarks_and_segmentation.pbtxt",
@ -159,10 +188,57 @@ mediapipe_simple_subgraph(
], ],
) )
mediapipe_simple_subgraph(
name = "pose_landmark_onnx_cuda",
graph = "pose_landmark_onnx_cuda.pbtxt",
register_as = "PoseLandmarkOnnxCUDA",
deps = [
":pose_detection_to_roi",
":pose_landmark_by_roi_onnx_cuda",
":pose_landmark_filtering",
":pose_landmarks_to_roi",
":pose_segmentation_filtering",
"//mediapipe/calculators/core:constant_side_packet_calculator",
"//mediapipe/calculators/core:gate_calculator",
"//mediapipe/calculators/core:merge_calculator",
"//mediapipe/calculators/core:packet_presence_calculator",
"//mediapipe/calculators/core:previous_loopback_calculator",
"//mediapipe/calculators/core:split_vector_calculator",
"//mediapipe/calculators/image:image_properties_calculator",
"//mediapipe/calculators/util:from_image_calculator",
"//mediapipe/modules/pose_detection:pose_detection_onnx_cuda",
],
)
mediapipe_simple_subgraph(
name = "pose_landmark_onnx_tensorrt",
graph = "pose_landmark_onnx_tensorrt.pbtxt",
register_as = "PoseLandmarkOnnxTensorRT",
deps = [
":pose_detection_to_roi",
":pose_landmark_by_roi_onnx_tensorrt",
":pose_landmark_filtering",
":pose_landmarks_to_roi",
":pose_segmentation_filtering",
"//mediapipe/calculators/core:constant_side_packet_calculator",
"//mediapipe/calculators/core:gate_calculator",
"//mediapipe/calculators/core:merge_calculator",
"//mediapipe/calculators/core:packet_presence_calculator",
"//mediapipe/calculators/core:previous_loopback_calculator",
"//mediapipe/calculators/core:split_vector_calculator",
"//mediapipe/calculators/image:image_properties_calculator",
"//mediapipe/calculators/util:from_image_calculator",
"//mediapipe/modules/pose_detection:pose_detection_onnx_tensorrt",
],
)
exports_files( exports_files(
srcs = [ srcs = [
"pose_landmark_full.onnx",
"pose_landmark_full.tflite", "pose_landmark_full.tflite",
"pose_landmark_heavy.onnx",
"pose_landmark_heavy.tflite", "pose_landmark_heavy.tflite",
"pose_landmark_lite.onnx",
"pose_landmark_lite.tflite", "pose_landmark_lite.tflite",
], ],
) )

View File

@ -0,0 +1,165 @@
# MediaPipe graph to detect/predict pose landmarks and optionally segmentation
# within an ROI. (CPU input, and inference is executed on CPU.)
#
# It is required that "pose_landmark_lite.onnx" or
# "pose_landmark_full.onnx" or "pose_landmark_heavy.onnx" is available at
# "mediapipe/modules/pose_landmark/pose_landmark_lite.onnx" or
# "mediapipe/modules/pose_landmark/pose_landmark_full.onnx" or
# "mediapipe/modules/pose_landmark/pose_landmark_heavy.onnx"
# path respectively during execution, depending on the specification in the
# MODEL_COMPLEXITY input side packet.
#
# EXAMPLE:
# node {
# calculator: "PoseLandmarkByRoiOnnxCUDA"
# input_side_packet: "MODEL_COMPLEXITY:model_complexity"
# input_side_packet: "ENABLE_SEGMENTATION:enable_segmentation"
# input_stream: "IMAGE:image"
# input_stream: "ROI:roi"
# output_stream: "LANDMARKS:landmarks"
# output_stream: "SEGMENTATION_MASK:segmentation_mask"
# }
type: "PoseLandmarkByRoiOnnxCUDA"
# CPU image. (ImageFrame)
input_stream: "IMAGE:image"
# ROI (region of interest) within the given image where a pose is located.
# (NormalizedRect)
input_stream: "ROI:roi"
# Whether to predict the segmentation mask. If unspecified, functions as set to
# false. (bool)
input_side_packet: "ENABLE_SEGMENTATION:enable_segmentation"
# Pose landmarks within the given ROI. (NormalizedLandmarkList)
# We have 33 landmarks (see pose_landmark_topology.svg) and there are other
# auxiliary key points.
# 0 - nose
# 1 - left eye (inner)
# 2 - left eye
# 3 - left eye (outer)
# 4 - right eye (inner)
# 5 - right eye
# 6 - right eye (outer)
# 7 - left ear
# 8 - right ear
# 9 - mouth (left)
# 10 - mouth (right)
# 11 - left shoulder
# 12 - right shoulder
# 13 - left elbow
# 14 - right elbow
# 15 - left wrist
# 16 - right wrist
# 17 - left pinky
# 18 - right pinky
# 19 - left index
# 20 - right index
# 21 - left thumb
# 22 - right thumb
# 23 - left hip
# 24 - right hip
# 25 - left knee
# 26 - right knee
# 27 - left ankle
# 28 - right ankle
# 29 - left heel
# 30 - right heel
# 31 - left foot index
# 32 - right foot index
#
# NOTE: If a pose is not present within the given ROI, for this particular
# timestamp there will not be an output packet in the LANDMARKS stream. However,
# the MediaPipe framework will internally inform the downstream calculators of
# the absence of this packet so that they don't wait for it unnecessarily.
output_stream: "LANDMARKS:landmarks"
# Auxiliary landmarks for deriving the ROI in the subsequent image.
# (NormalizedLandmarkList)
output_stream: "AUXILIARY_LANDMARKS:auxiliary_landmarks"
# Pose world landmarks within the given ROI. (LandmarkList)
# World landmarks are real-world 3D coordinates in meters with the origin at the
# center between hips. WORLD_LANDMARKS shares the same landmark topology as
# LANDMARKS. However, LANDMARKS provides coordinates (in pixels) of a 3D object
# projected onto the 2D image surface, while WORLD_LANDMARKS provides
# coordinates (in meters) of the 3D object itself.
output_stream: "WORLD_LANDMARKS:world_landmarks"
# Segmentation mask on CPU in ImageFormat::VEC32F1. (Image)
output_stream: "SEGMENTATION_MASK:segmentation_mask"
# Retrieves the image size.
node {
calculator: "ImagePropertiesCalculator"
input_stream: "IMAGE_CPU:image"
output_stream: "SIZE:image_size"
}
# Crops and transforms the specified ROI in the input image into an image patch
# represented as a tensor of dimension expected by the corresponding ML model,
# while maintaining the aspect ratio of the ROI (which can be different from
# that of the image patch). Therefore, there can be letterboxing around the ROI
# in the generated tensor representation.
node: {
calculator: "ImageToTensorCalculator"
input_stream: "IMAGE:image"
input_stream: "NORM_RECT:roi"
output_stream: "TENSORS:input_tensors"
output_stream: "LETTERBOX_PADDING:letterbox_padding"
output_stream: "MATRIX:transformation_matrix"
options: {
[mediapipe.ImageToTensorCalculatorOptions.ext] {
output_tensor_width: 256
output_tensor_height: 256
keep_aspect_ratio: true
output_tensor_float_range {
min: 0.0
max: 1.0
}
}
}
}
node {
calculator: "InferenceCalculator"
input_stream: "TENSORS:input_tensors"
output_stream: "TENSORS:output_tensors"
options: {
[mediapipe.InferenceCalculatorOptions.ext] {
model_path: "mediapipe/modules/pose_landmark/pose_landmark_full.onnx"
delegate { cuda {} }
}
}
}
# Decodes the tensors into the corresponding landmark and segmentation mask
# representation.
node {
calculator: "TensorsToPoseLandmarksAndSegmentation"
input_side_packet: "ENABLE_SEGMENTATION:enable_segmentation"
input_stream: "TENSORS:output_tensors"
output_stream: "LANDMARKS:roi_landmarks"
output_stream: "AUXILIARY_LANDMARKS:roi_auxiliary_landmarks"
output_stream: "WORLD_LANDMARKS:roi_world_landmarks"
output_stream: "SEGMENTATION_MASK:roi_segmentation_mask"
}
# Projects the landmarks and segmentation mask in the local coordinates of the
# (potentially letterboxed) ROI back to the global coordinates of the full input
# image.
node {
calculator: "PoseLandmarksAndSegmentationInverseProjection"
input_stream: "IMAGE_SIZE:image_size"
input_stream: "NORM_RECT:roi"
input_stream: "LETTERBOX_PADDING:letterbox_padding"
input_stream: "MATRIX:transformation_matrix"
input_stream: "LANDMARKS:roi_landmarks"
input_stream: "AUXILIARY_LANDMARKS:roi_auxiliary_landmarks"
input_stream: "WORLD_LANDMARKS:roi_world_landmarks"
input_stream: "SEGMENTATION_MASK:roi_segmentation_mask"
output_stream: "LANDMARKS:landmarks"
output_stream: "AUXILIARY_LANDMARKS:auxiliary_landmarks"
output_stream: "WORLD_LANDMARKS:world_landmarks"
output_stream: "SEGMENTATION_MASK:segmentation_mask"
}

View File

@ -0,0 +1,165 @@
# MediaPipe graph to detect/predict pose landmarks and optionally segmentation
# within an ROI. (CPU input, and inference is executed on CPU.)
#
# It is required that "pose_landmark_lite.onnx" or
# "pose_landmark_full.onnx" or "pose_landmark_heavy.onnx" is available at
# "mediapipe/modules/pose_landmark/pose_landmark_lite.onnx" or
# "mediapipe/modules/pose_landmark/pose_landmark_full.onnx" or
# "mediapipe/modules/pose_landmark/pose_landmark_heavy.onnx"
# path respectively during execution, depending on the specification in the
# MODEL_COMPLEXITY input side packet.
#
# EXAMPLE:
# node {
# calculator: "PoseLandmarkByRoiOnnxTensorRT"
# input_side_packet: "MODEL_COMPLEXITY:model_complexity"
# input_side_packet: "ENABLE_SEGMENTATION:enable_segmentation"
# input_stream: "IMAGE:image"
# input_stream: "ROI:roi"
# output_stream: "LANDMARKS:landmarks"
# output_stream: "SEGMENTATION_MASK:segmentation_mask"
# }
type: "PoseLandmarkByRoiOnnxTensorRT"
# CPU image. (ImageFrame)
input_stream: "IMAGE:image"
# ROI (region of interest) within the given image where a pose is located.
# (NormalizedRect)
input_stream: "ROI:roi"
# Whether to predict the segmentation mask. If unspecified, functions as set to
# false. (bool)
input_side_packet: "ENABLE_SEGMENTATION:enable_segmentation"
# Pose landmarks within the given ROI. (NormalizedLandmarkList)
# We have 33 landmarks (see pose_landmark_topology.svg) and there are other
# auxiliary key points.
# 0 - nose
# 1 - left eye (inner)
# 2 - left eye
# 3 - left eye (outer)
# 4 - right eye (inner)
# 5 - right eye
# 6 - right eye (outer)
# 7 - left ear
# 8 - right ear
# 9 - mouth (left)
# 10 - mouth (right)
# 11 - left shoulder
# 12 - right shoulder
# 13 - left elbow
# 14 - right elbow
# 15 - left wrist
# 16 - right wrist
# 17 - left pinky
# 18 - right pinky
# 19 - left index
# 20 - right index
# 21 - left thumb
# 22 - right thumb
# 23 - left hip
# 24 - right hip
# 25 - left knee
# 26 - right knee
# 27 - left ankle
# 28 - right ankle
# 29 - left heel
# 30 - right heel
# 31 - left foot index
# 32 - right foot index
#
# NOTE: If a pose is not present within the given ROI, for this particular
# timestamp there will not be an output packet in the LANDMARKS stream. However,
# the MediaPipe framework will internally inform the downstream calculators of
# the absence of this packet so that they don't wait for it unnecessarily.
output_stream: "LANDMARKS:landmarks"
# Auxiliary landmarks for deriving the ROI in the subsequent image.
# (NormalizedLandmarkList)
output_stream: "AUXILIARY_LANDMARKS:auxiliary_landmarks"
# Pose world landmarks within the given ROI. (LandmarkList)
# World landmarks are real-world 3D coordinates in meters with the origin at the
# center between hips. WORLD_LANDMARKS shares the same landmark topology as
# LANDMARKS. However, LANDMARKS provides coordinates (in pixels) of a 3D object
# projected onto the 2D image surface, while WORLD_LANDMARKS provides
# coordinates (in meters) of the 3D object itself.
output_stream: "WORLD_LANDMARKS:world_landmarks"
# Segmentation mask on CPU in ImageFormat::VEC32F1. (Image)
output_stream: "SEGMENTATION_MASK:segmentation_mask"
# Retrieves the image size.
node {
calculator: "ImagePropertiesCalculator"
input_stream: "IMAGE_CPU:image"
output_stream: "SIZE:image_size"
}
# Crops and transforms the specified ROI in the input image into an image patch
# represented as a tensor of dimension expected by the corresponding ML model,
# while maintaining the aspect ratio of the ROI (which can be different from
# that of the image patch). Therefore, there can be letterboxing around the ROI
# in the generated tensor representation.
node: {
calculator: "ImageToTensorCalculator"
input_stream: "IMAGE:image"
input_stream: "NORM_RECT:roi"
output_stream: "TENSORS:input_tensors"
output_stream: "LETTERBOX_PADDING:letterbox_padding"
output_stream: "MATRIX:transformation_matrix"
options: {
[mediapipe.ImageToTensorCalculatorOptions.ext] {
output_tensor_width: 256
output_tensor_height: 256
keep_aspect_ratio: true
output_tensor_float_range {
min: 0.0
max: 1.0
}
}
}
}
node {
calculator: "InferenceCalculator"
input_stream: "TENSORS:input_tensors"
output_stream: "TENSORS:output_tensors"
options: {
[mediapipe.InferenceCalculatorOptions.ext] {
model_path: "mediapipe/modules/pose_landmark/pose_landmark_full.onnx"
delegate { tensorrt {} }
}
}
}
# Decodes the tensors into the corresponding landmark and segmentation mask
# representation.
node {
calculator: "TensorsToPoseLandmarksAndSegmentation"
input_side_packet: "ENABLE_SEGMENTATION:enable_segmentation"
input_stream: "TENSORS:output_tensors"
output_stream: "LANDMARKS:roi_landmarks"
output_stream: "AUXILIARY_LANDMARKS:roi_auxiliary_landmarks"
output_stream: "WORLD_LANDMARKS:roi_world_landmarks"
output_stream: "SEGMENTATION_MASK:roi_segmentation_mask"
}
# Projects the landmarks and segmentation mask in the local coordinates of the
# (potentially letterboxed) ROI back to the global coordinates of the full input
# image.
node {
calculator: "PoseLandmarksAndSegmentationInverseProjection"
input_stream: "IMAGE_SIZE:image_size"
input_stream: "NORM_RECT:roi"
input_stream: "LETTERBOX_PADDING:letterbox_padding"
input_stream: "MATRIX:transformation_matrix"
input_stream: "LANDMARKS:roi_landmarks"
input_stream: "AUXILIARY_LANDMARKS:roi_auxiliary_landmarks"
input_stream: "WORLD_LANDMARKS:roi_world_landmarks"
input_stream: "SEGMENTATION_MASK:roi_segmentation_mask"
output_stream: "LANDMARKS:landmarks"
output_stream: "AUXILIARY_LANDMARKS:auxiliary_landmarks"
output_stream: "WORLD_LANDMARKS:world_landmarks"
output_stream: "SEGMENTATION_MASK:segmentation_mask"
}

View File

@ -0,0 +1,268 @@
# MediaPipe graph to detect/predict pose landmarks. (CPU input, and inference is
# executed on CPU.) This graph tries to skip pose detection as much as possible
# by using previously detected/predicted landmarks for new images.
#
# It is required that "pose_detection.onnx" is available at
# "mediapipe/modules/pose_detection/pose_detection.onnx"
# path during execution.
#
# It is required that "pose_landmark_lite.onnx" or
# "pose_landmark_full.onnx" or "pose_landmark_heavy.onnx" is available at
# "mediapipe/modules/pose_landmark/pose_landmark_lite.onnx" or
# "mediapipe/modules/pose_landmark/pose_landmark_full.onnx" or
# "mediapipe/modules/pose_landmark/pose_landmark_heavy.onnx"
# path respectively during execution, depending on the specification in the
# MODEL_COMPLEXITY input side packet.
#
# EXAMPLE:
# node {
# calculator: "PoseLandmarkOnnxCUDA"
# input_side_packet: "MODEL_COMPLEXITY:model_complexity"
# input_side_packet: "SMOOTH_LANDMARKS:smooth_landmarks"
# input_side_packet: "ENABLE_SEGMENTATION:enable_segmentation"
# input_side_packet: "SMOOTH_SEGMENTATION:smooth_segmentation"
# input_side_packet: "USE_PREV_LANDMARKS:use_prev_landmarks"
# input_stream: "IMAGE:image"
# output_stream: "LANDMARKS:pose_landmarks"
# output_stream: "SEGMENTATION_MASK:segmentation_mask"
# }
type: "PoseLandmarkOnnxCUDA"
# CPU image. (ImageFrame)
input_stream: "IMAGE:image"
# Whether to filter landmarks across different input images to reduce jitter.
# If unspecified, functions as set to true. (bool)
input_side_packet: "SMOOTH_LANDMARKS:smooth_landmarks"
# Whether to predict the segmentation mask. If unspecified, functions as set to
# false. (bool)
input_side_packet: "ENABLE_SEGMENTATION:enable_segmentation"
# Whether to filter segmentation mask across different input images to reduce
# jitter. If unspecified, functions as set to true. (bool)
input_side_packet: "SMOOTH_SEGMENTATION:smooth_segmentation"
# Complexity of the pose landmark model: 0, 1 or 2. Landmark accuracy as well as
# inference latency generally go up with the model complexity. If unspecified,
# functions as set to 1. (int)
input_side_packet: "MODEL_COMPLEXITY:model_complexity"
# Whether landmarks on the previous image should be used to help localize
# landmarks on the current image. (bool)
input_side_packet: "USE_PREV_LANDMARKS:use_prev_landmarks"
# Pose landmarks. (NormalizedLandmarkList)
# We have 33 landmarks (see pose_landmark_topology.svg), and there are other
# auxiliary key points.
# 0 - nose
# 1 - left eye (inner)
# 2 - left eye
# 3 - left eye (outer)
# 4 - right eye (inner)
# 5 - right eye
# 6 - right eye (outer)
# 7 - left ear
# 8 - right ear
# 9 - mouth (left)
# 10 - mouth (right)
# 11 - left shoulder
# 12 - right shoulder
# 13 - left elbow
# 14 - right elbow
# 15 - left wrist
# 16 - right wrist
# 17 - left pinky
# 18 - right pinky
# 19 - left index
# 20 - right index
# 21 - left thumb
# 22 - right thumb
# 23 - left hip
# 24 - right hip
# 25 - left knee
# 26 - right knee
# 27 - left ankle
# 28 - right ankle
# 29 - left heel
# 30 - right heel
# 31 - left foot index
# 32 - right foot index
#
# NOTE: if a pose is not present within the given ROI, for this particular
# timestamp there will not be an output packet in the LANDMARKS stream. However,
# the MediaPipe framework will internally inform the downstream calculators of
# the absence of this packet so that they don't wait for it unnecessarily.
output_stream: "LANDMARKS:pose_landmarks"
# Pose world landmarks. (LandmarkList)
# World landmarks are real-world 3D coordinates in meters with the origin at the
# center between hips. WORLD_LANDMARKS shares the same landmark topology as
# LANDMARKS. However, LANDMARKS provides coordinates (in pixels) of a 3D object
# projected onto the 2D image surface, while WORLD_LANDMARKS provides
# coordinates (in meters) of the 3D object itself.
output_stream: "WORLD_LANDMARKS:pose_world_landmarks"
# Segmentation mask. (ImageFrame in ImageFormat::VEC32F1)
output_stream: "SEGMENTATION_MASK:segmentation_mask"
# Extra outputs (for debugging, for instance).
# Detected poses. (Detection)
output_stream: "DETECTION:pose_detection"
# Regions of interest calculated based on landmarks. (NormalizedRect)
output_stream: "ROI_FROM_LANDMARKS:pose_rect_from_landmarks"
# Regions of interest calculated based on pose detections. (NormalizedRect)
output_stream: "ROI_FROM_DETECTION:pose_rect_from_detection"
# When the optional input side packet "use_prev_landmarks" is either absent or
# set to true, uses the landmarks on the previous image to help localize
# landmarks on the current image.
node {
calculator: "GateCalculator"
input_side_packet: "ALLOW:use_prev_landmarks"
input_stream: "prev_pose_rect_from_landmarks"
output_stream: "gated_prev_pose_rect_from_landmarks"
options: {
[mediapipe.GateCalculatorOptions.ext] {
allow: true
}
}
}
# Checks if there's previous pose rect calculated from landmarks.
node: {
calculator: "PacketPresenceCalculator"
input_stream: "PACKET:gated_prev_pose_rect_from_landmarks"
output_stream: "PRESENCE:prev_pose_rect_from_landmarks_is_present"
}
# Calculates size of the image.
node {
calculator: "ImagePropertiesCalculator"
input_stream: "IMAGE_CPU:image"
output_stream: "SIZE:image_size"
}
# Drops the incoming image if the pose has already been identified from the
# previous image. Otherwise, passes the incoming image through to trigger a new
# round of pose detection.
node {
calculator: "GateCalculator"
input_stream: "image"
input_stream: "image_size"
input_stream: "DISALLOW:prev_pose_rect_from_landmarks_is_present"
output_stream: "image_for_pose_detection"
output_stream: "image_size_for_pose_detection"
options: {
[mediapipe.GateCalculatorOptions.ext] {
empty_packets_as_allow: true
}
}
}
# Detects poses.
node {
calculator: "PoseDetectionOnnxCUDA"
input_stream: "IMAGE:image_for_pose_detection"
output_stream: "DETECTIONS:pose_detections"
}
# Gets the very first detection from "pose_detections" vector.
node {
calculator: "SplitDetectionVectorCalculator"
input_stream: "pose_detections"
output_stream: "pose_detection"
options: {
[mediapipe.SplitVectorCalculatorOptions.ext] {
ranges: { begin: 0 end: 1 }
element_only: true
}
}
}
# Calculates region of interest based on pose detection, so that can be used
# to detect landmarks.
node {
calculator: "PoseDetectionToRoi"
input_stream: "DETECTION:pose_detection"
input_stream: "IMAGE_SIZE:image_size_for_pose_detection"
output_stream: "ROI:pose_rect_from_detection"
}
# Selects either pose rect (or ROI) calculated from detection or from previously
# detected landmarks if available (in this case, calculation of pose rect from
# detection is skipped).
node {
calculator: "MergeCalculator"
input_stream: "pose_rect_from_detection"
input_stream: "gated_prev_pose_rect_from_landmarks"
output_stream: "pose_rect"
}
# Detects pose landmarks within specified region of interest of the image.
node {
calculator: "PoseLandmarkByRoiOnnxCUDA"
input_side_packet: "MODEL_COMPLEXITY:model_complexity"
input_side_packet: "ENABLE_SEGMENTATION:enable_segmentation"
input_stream: "IMAGE:image"
input_stream: "ROI:pose_rect"
output_stream: "LANDMARKS:unfiltered_pose_landmarks"
output_stream: "AUXILIARY_LANDMARKS:unfiltered_auxiliary_landmarks"
output_stream: "WORLD_LANDMARKS:unfiltered_world_landmarks"
output_stream: "SEGMENTATION_MASK:unfiltered_segmentation_mask"
}
# Smoothes landmarks to reduce jitter.
node {
calculator: "PoseLandmarkFiltering"
input_side_packet: "ENABLE:smooth_landmarks"
input_stream: "IMAGE_SIZE:image_size"
input_stream: "NORM_LANDMARKS:unfiltered_pose_landmarks"
input_stream: "AUX_NORM_LANDMARKS:unfiltered_auxiliary_landmarks"
input_stream: "WORLD_LANDMARKS:unfiltered_world_landmarks"
output_stream: "FILTERED_NORM_LANDMARKS:pose_landmarks"
output_stream: "FILTERED_AUX_NORM_LANDMARKS:auxiliary_landmarks"
output_stream: "FILTERED_WORLD_LANDMARKS:pose_world_landmarks"
}
# Calculates region of interest based on the auxiliary landmarks, to be used in
# the subsequent image.
node {
calculator: "PoseLandmarksToRoi"
input_stream: "LANDMARKS:auxiliary_landmarks"
input_stream: "IMAGE_SIZE:image_size"
output_stream: "ROI:pose_rect_from_landmarks"
}
# Caches pose rects calculated from landmarks, and upon the arrival of the next
# input image, sends out the cached rects with timestamps replaced by that of
# the input image, essentially generating a packet that carries the previous
# pose rects. Note that upon the arrival of the very first input image, a
# timestamp bound update occurs to jump start the feedback loop.
node {
calculator: "PreviousLoopbackCalculator"
input_stream: "MAIN:image"
input_stream: "LOOP:pose_rect_from_landmarks"
input_stream_info: {
tag_index: "LOOP"
back_edge: true
}
output_stream: "PREV_LOOP:prev_pose_rect_from_landmarks"
}
# Smoothes segmentation to reduce jitter.
node {
calculator: "PoseSegmentationFiltering"
input_side_packet: "ENABLE:smooth_segmentation"
input_stream: "SEGMENTATION_MASK:unfiltered_segmentation_mask"
output_stream: "FILTERED_SEGMENTATION_MASK:filtered_segmentation_mask"
}
# Converts the incoming segmentation mask represented as an Image into the
# corresponding ImageFrame type.
node: {
calculator: "FromImageCalculator"
input_stream: "IMAGE:filtered_segmentation_mask"
output_stream: "IMAGE_CPU:segmentation_mask"
}

View File

@ -0,0 +1,268 @@
# MediaPipe graph to detect/predict pose landmarks. (CPU input, and inference is
# executed on CPU.) This graph tries to skip pose detection as much as possible
# by using previously detected/predicted landmarks for new images.
#
# It is required that "pose_detection.tflite" is available at
# "mediapipe/modules/pose_detection/pose_detection.tflite"
# path during execution.
#
# It is required that "pose_landmark_lite.tflite" or
# "pose_landmark_full.tflite" or "pose_landmark_heavy.tflite" is available at
# "mediapipe/modules/pose_landmark/pose_landmark_lite.tflite" or
# "mediapipe/modules/pose_landmark/pose_landmark_full.tflite" or
# "mediapipe/modules/pose_landmark/pose_landmark_heavy.tflite"
# path respectively during execution, depending on the specification in the
# MODEL_COMPLEXITY input side packet.
#
# EXAMPLE:
# node {
# calculator: "PoseLandmarkOnnxTensorRT"
# input_side_packet: "MODEL_COMPLEXITY:model_complexity"
# input_side_packet: "SMOOTH_LANDMARKS:smooth_landmarks"
# input_side_packet: "ENABLE_SEGMENTATION:enable_segmentation"
# input_side_packet: "SMOOTH_SEGMENTATION:smooth_segmentation"
# input_side_packet: "USE_PREV_LANDMARKS:use_prev_landmarks"
# input_stream: "IMAGE:image"
# output_stream: "LANDMARKS:pose_landmarks"
# output_stream: "SEGMENTATION_MASK:segmentation_mask"
# }
type: "PoseLandmarkOnnxTensorRT"
# CPU image. (ImageFrame)
input_stream: "IMAGE:image"
# Whether to filter landmarks across different input images to reduce jitter.
# If unspecified, functions as set to true. (bool)
input_side_packet: "SMOOTH_LANDMARKS:smooth_landmarks"
# Whether to predict the segmentation mask. If unspecified, functions as set to
# false. (bool)
input_side_packet: "ENABLE_SEGMENTATION:enable_segmentation"
# Whether to filter segmentation mask across different input images to reduce
# jitter. If unspecified, functions as set to true. (bool)
input_side_packet: "SMOOTH_SEGMENTATION:smooth_segmentation"
# Complexity of the pose landmark model: 0, 1 or 2. Landmark accuracy as well as
# inference latency generally go up with the model complexity. If unspecified,
# functions as set to 1. (int)
input_side_packet: "MODEL_COMPLEXITY:model_complexity"
# Whether landmarks on the previous image should be used to help localize
# landmarks on the current image. (bool)
input_side_packet: "USE_PREV_LANDMARKS:use_prev_landmarks"
# Pose landmarks. (NormalizedLandmarkList)
# We have 33 landmarks (see pose_landmark_topology.svg), and there are other
# auxiliary key points.
# 0 - nose
# 1 - left eye (inner)
# 2 - left eye
# 3 - left eye (outer)
# 4 - right eye (inner)
# 5 - right eye
# 6 - right eye (outer)
# 7 - left ear
# 8 - right ear
# 9 - mouth (left)
# 10 - mouth (right)
# 11 - left shoulder
# 12 - right shoulder
# 13 - left elbow
# 14 - right elbow
# 15 - left wrist
# 16 - right wrist
# 17 - left pinky
# 18 - right pinky
# 19 - left index
# 20 - right index
# 21 - left thumb
# 22 - right thumb
# 23 - left hip
# 24 - right hip
# 25 - left knee
# 26 - right knee
# 27 - left ankle
# 28 - right ankle
# 29 - left heel
# 30 - right heel
# 31 - left foot index
# 32 - right foot index
#
# NOTE: if a pose is not present within the given ROI, for this particular
# timestamp there will not be an output packet in the LANDMARKS stream. However,
# the MediaPipe framework will internally inform the downstream calculators of
# the absence of this packet so that they don't wait for it unnecessarily.
output_stream: "LANDMARKS:pose_landmarks"
# Pose world landmarks. (LandmarkList)
# World landmarks are real-world 3D coordinates in meters with the origin at the
# center between hips. WORLD_LANDMARKS shares the same landmark topology as
# LANDMARKS. However, LANDMARKS provides coordinates (in pixels) of a 3D object
# projected onto the 2D image surface, while WORLD_LANDMARKS provides
# coordinates (in meters) of the 3D object itself.
output_stream: "WORLD_LANDMARKS:pose_world_landmarks"
# Segmentation mask. (ImageFrame in ImageFormat::VEC32F1)
output_stream: "SEGMENTATION_MASK:segmentation_mask"
# Extra outputs (for debugging, for instance).
# Detected poses. (Detection)
output_stream: "DETECTION:pose_detection"
# Regions of interest calculated based on landmarks. (NormalizedRect)
output_stream: "ROI_FROM_LANDMARKS:pose_rect_from_landmarks"
# Regions of interest calculated based on pose detections. (NormalizedRect)
output_stream: "ROI_FROM_DETECTION:pose_rect_from_detection"
# When the optional input side packet "use_prev_landmarks" is either absent or
# set to true, uses the landmarks on the previous image to help localize
# landmarks on the current image.
node {
calculator: "GateCalculator"
input_side_packet: "ALLOW:use_prev_landmarks"
input_stream: "prev_pose_rect_from_landmarks"
output_stream: "gated_prev_pose_rect_from_landmarks"
options: {
[mediapipe.GateCalculatorOptions.ext] {
allow: true
}
}
}
# Checks if there's previous pose rect calculated from landmarks.
node: {
calculator: "PacketPresenceCalculator"
input_stream: "PACKET:gated_prev_pose_rect_from_landmarks"
output_stream: "PRESENCE:prev_pose_rect_from_landmarks_is_present"
}
# Calculates size of the image.
node {
calculator: "ImagePropertiesCalculator"
input_stream: "IMAGE_CPU:image"
output_stream: "SIZE:image_size"
}
# Drops the incoming image if the pose has already been identified from the
# previous image. Otherwise, passes the incoming image through to trigger a new
# round of pose detection.
node {
calculator: "GateCalculator"
input_stream: "image"
input_stream: "image_size"
input_stream: "DISALLOW:prev_pose_rect_from_landmarks_is_present"
output_stream: "image_for_pose_detection"
output_stream: "image_size_for_pose_detection"
options: {
[mediapipe.GateCalculatorOptions.ext] {
empty_packets_as_allow: true
}
}
}
# Detects poses.
node {
calculator: "PoseDetectionOnnxTensorRT"
input_stream: "IMAGE:image_for_pose_detection"
output_stream: "DETECTIONS:pose_detections"
}
# Gets the very first detection from "pose_detections" vector.
node {
calculator: "SplitDetectionVectorCalculator"
input_stream: "pose_detections"
output_stream: "pose_detection"
options: {
[mediapipe.SplitVectorCalculatorOptions.ext] {
ranges: { begin: 0 end: 1 }
element_only: true
}
}
}
# Calculates region of interest based on pose detection, so that can be used
# to detect landmarks.
node {
calculator: "PoseDetectionToRoi"
input_stream: "DETECTION:pose_detection"
input_stream: "IMAGE_SIZE:image_size_for_pose_detection"
output_stream: "ROI:pose_rect_from_detection"
}
# Selects either pose rect (or ROI) calculated from detection or from previously
# detected landmarks if available (in this case, calculation of pose rect from
# detection is skipped).
node {
calculator: "MergeCalculator"
input_stream: "pose_rect_from_detection"
input_stream: "gated_prev_pose_rect_from_landmarks"
output_stream: "pose_rect"
}
# Detects pose landmarks within specified region of interest of the image.
node {
calculator: "PoseLandmarkByRoiOnnxTensorRT"
input_side_packet: "MODEL_COMPLEXITY:model_complexity"
input_side_packet: "ENABLE_SEGMENTATION:enable_segmentation"
input_stream: "IMAGE:image"
input_stream: "ROI:pose_rect"
output_stream: "LANDMARKS:unfiltered_pose_landmarks"
output_stream: "AUXILIARY_LANDMARKS:unfiltered_auxiliary_landmarks"
output_stream: "WORLD_LANDMARKS:unfiltered_world_landmarks"
output_stream: "SEGMENTATION_MASK:unfiltered_segmentation_mask"
}
# Smoothes landmarks to reduce jitter.
node {
calculator: "PoseLandmarkFiltering"
input_side_packet: "ENABLE:smooth_landmarks"
input_stream: "IMAGE_SIZE:image_size"
input_stream: "NORM_LANDMARKS:unfiltered_pose_landmarks"
input_stream: "AUX_NORM_LANDMARKS:unfiltered_auxiliary_landmarks"
input_stream: "WORLD_LANDMARKS:unfiltered_world_landmarks"
output_stream: "FILTERED_NORM_LANDMARKS:pose_landmarks"
output_stream: "FILTERED_AUX_NORM_LANDMARKS:auxiliary_landmarks"
output_stream: "FILTERED_WORLD_LANDMARKS:pose_world_landmarks"
}
# Calculates region of interest based on the auxiliary landmarks, to be used in
# the subsequent image.
node {
calculator: "PoseLandmarksToRoi"
input_stream: "LANDMARKS:auxiliary_landmarks"
input_stream: "IMAGE_SIZE:image_size"
output_stream: "ROI:pose_rect_from_landmarks"
}
# Caches pose rects calculated from landmarks, and upon the arrival of the next
# input image, sends out the cached rects with timestamps replaced by that of
# the input image, essentially generating a packet that carries the previous
# pose rects. Note that upon the arrival of the very first input image, a
# timestamp bound update occurs to jump start the feedback loop.
node {
calculator: "PreviousLoopbackCalculator"
input_stream: "MAIN:image"
input_stream: "LOOP:pose_rect_from_landmarks"
input_stream_info: {
tag_index: "LOOP"
back_edge: true
}
output_stream: "PREV_LOOP:prev_pose_rect_from_landmarks"
}
# Smoothes segmentation to reduce jitter.
node {
calculator: "PoseSegmentationFiltering"
input_side_packet: "ENABLE:smooth_segmentation"
input_stream: "SEGMENTATION_MASK:unfiltered_segmentation_mask"
output_stream: "FILTERED_SEGMENTATION_MASK:filtered_segmentation_mask"
}
# Converts the incoming segmentation mask represented as an Image into the
# corresponding ImageFrame type.
node: {
calculator: "FromImageCalculator"
input_stream: "IMAGE:filtered_segmentation_mask"
output_stream: "IMAGE_CPU:segmentation_mask"
}