pose detection和landmark支持onnxruntime的cuda和tensorrt
This commit is contained in:
parent
f3bf3ab3e3
commit
008ed46ee0
|
@ -24,6 +24,46 @@ cc_binary(
|
|||
],
|
||||
)
|
||||
|
||||
cc_binary(
|
||||
name = "pose_tracking_cpu_fps",
|
||||
deps = [
|
||||
"//mediapipe/examples/desktop:demo_run_graph_main_fps",
|
||||
"//mediapipe/graphs/pose_tracking:pose_tracking_cpu_deps",
|
||||
],
|
||||
)
|
||||
|
||||
cc_binary(
|
||||
name = "pose_tracking_onnx_cuda",
|
||||
deps = [
|
||||
"//mediapipe/examples/desktop:demo_run_graph_main",
|
||||
"//mediapipe/graphs/pose_tracking:pose_tracking_onnx_cuda_deps",
|
||||
],
|
||||
)
|
||||
|
||||
cc_binary(
|
||||
name = "pose_tracking_onnx_cuda_fps",
|
||||
deps = [
|
||||
"//mediapipe/examples/desktop:demo_run_graph_main_fps",
|
||||
"//mediapipe/graphs/pose_tracking:pose_tracking_onnx_cuda_deps",
|
||||
],
|
||||
)
|
||||
|
||||
cc_binary(
|
||||
name = "pose_tracking_onnx_tensorrt",
|
||||
deps = [
|
||||
"//mediapipe/examples/desktop:demo_run_graph_main",
|
||||
"//mediapipe/graphs/pose_tracking:pose_tracking_onnx_tensorrt_deps",
|
||||
],
|
||||
)
|
||||
|
||||
cc_binary(
|
||||
name = "pose_tracking_onnx_tensorrt_fps",
|
||||
deps = [
|
||||
"//mediapipe/examples/desktop:demo_run_graph_main_fps",
|
||||
"//mediapipe/graphs/pose_tracking:pose_tracking_onnx_tensorrt_deps",
|
||||
],
|
||||
)
|
||||
|
||||
# Linux only
|
||||
cc_binary(
|
||||
name = "pose_tracking_gpu",
|
||||
|
|
|
@ -54,3 +54,37 @@ mediapipe_binary_graph(
|
|||
output_name = "pose_tracking_cpu.binarypb",
|
||||
deps = [":pose_tracking_cpu_deps"],
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "pose_tracking_onnx_cuda_deps",
|
||||
deps = [
|
||||
"//mediapipe/calculators/core:constant_side_packet_calculator",
|
||||
"//mediapipe/calculators/core:flow_limiter_calculator",
|
||||
"//mediapipe/graphs/pose_tracking/subgraphs:pose_renderer_cpu",
|
||||
"//mediapipe/modules/pose_landmark:pose_landmark_onnx_cuda",
|
||||
],
|
||||
)
|
||||
|
||||
mediapipe_binary_graph(
|
||||
name = "pose_tracking_onnx_cuda_binary_graph",
|
||||
graph = "pose_tracking_onnx_cuda.pbtxt",
|
||||
output_name = "pose_tracking_onnx_cuda.binarypb",
|
||||
deps = [":pose_tracking_onnx_cuda_deps"],
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "pose_tracking_onnx_tensorrt_deps",
|
||||
deps = [
|
||||
"//mediapipe/calculators/core:constant_side_packet_calculator",
|
||||
"//mediapipe/calculators/core:flow_limiter_calculator",
|
||||
"//mediapipe/graphs/pose_tracking/subgraphs:pose_renderer_cpu",
|
||||
"//mediapipe/modules/pose_landmark:pose_landmark_onnx_tensorrt",
|
||||
],
|
||||
)
|
||||
|
||||
mediapipe_binary_graph(
|
||||
name = "pose_tracking_onnx_tensorrt_binary_graph",
|
||||
graph = "pose_tracking_onnx_tensorrt.pbtxt",
|
||||
output_name = "pose_tracking_onnx_tensorrt.binarypb",
|
||||
deps = [":pose_tracking_onnx_tensorrt"],
|
||||
)
|
||||
|
|
|
@ -14,7 +14,7 @@ node {
|
|||
output_side_packet: "PACKET:enable_segmentation"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.ConstantSidePacketCalculatorOptions]: {
|
||||
packet { bool_value: true }
|
||||
packet { bool_value: false }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
63
mediapipe/graphs/pose_tracking/pose_tracking_onnx_cuda.pbtxt
Normal file
63
mediapipe/graphs/pose_tracking/pose_tracking_onnx_cuda.pbtxt
Normal file
|
@ -0,0 +1,63 @@
|
|||
# MediaPipe graph that performs pose tracking with onnxruntime on cuda.
|
||||
|
||||
# CPU buffer. (ImageFrame)
|
||||
input_stream: "input_video"
|
||||
|
||||
# Output image with rendered results. (ImageFrame)
|
||||
output_stream: "output_video"
|
||||
# Pose landmarks. (NormalizedLandmarkList)
|
||||
output_stream: "pose_landmarks"
|
||||
|
||||
# Generates side packet to enable segmentation.
|
||||
node {
|
||||
calculator: "ConstantSidePacketCalculator"
|
||||
output_side_packet: "PACKET:enable_segmentation"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.ConstantSidePacketCalculatorOptions]: {
|
||||
packet { bool_value: false }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Throttles the images flowing downstream for flow control. It passes through
|
||||
# the very first incoming image unaltered, and waits for downstream nodes
|
||||
# (calculators and subgraphs) in the graph to finish their tasks before it
|
||||
# passes through another image. All images that come in while waiting are
|
||||
# dropped, limiting the number of in-flight images in most part of the graph to
|
||||
# 1. This prevents the downstream nodes from queuing up incoming images and data
|
||||
# excessively, which leads to increased latency and memory usage, unwanted in
|
||||
# real-time mobile applications. It also eliminates unnecessarily computation,
|
||||
# e.g., the output produced by a node may get dropped downstream if the
|
||||
# subsequent nodes are still busy processing previous inputs.
|
||||
node {
|
||||
calculator: "FlowLimiterCalculator"
|
||||
input_stream: "input_video"
|
||||
input_stream: "FINISHED:output_video"
|
||||
input_stream_info: {
|
||||
tag_index: "FINISHED"
|
||||
back_edge: true
|
||||
}
|
||||
output_stream: "throttled_input_video"
|
||||
}
|
||||
|
||||
# Subgraph that detects poses and corresponding landmarks.
|
||||
node {
|
||||
calculator: "PoseLandmarkOnnxCUDA"
|
||||
input_side_packet: "ENABLE_SEGMENTATION:enable_segmentation"
|
||||
input_stream: "IMAGE:throttled_input_video"
|
||||
output_stream: "LANDMARKS:pose_landmarks"
|
||||
output_stream: "SEGMENTATION_MASK:segmentation_mask"
|
||||
output_stream: "DETECTION:pose_detection"
|
||||
output_stream: "ROI_FROM_LANDMARKS:roi_from_landmarks"
|
||||
}
|
||||
|
||||
# Subgraph that renders pose-landmark annotation onto the input image.
|
||||
node {
|
||||
calculator: "PoseRendererCpu"
|
||||
input_stream: "IMAGE:throttled_input_video"
|
||||
input_stream: "LANDMARKS:pose_landmarks"
|
||||
input_stream: "SEGMENTATION_MASK:segmentation_mask"
|
||||
input_stream: "DETECTION:pose_detection"
|
||||
input_stream: "ROI:roi_from_landmarks"
|
||||
output_stream: "IMAGE:output_video"
|
||||
}
|
|
@ -0,0 +1,63 @@
|
|||
# MediaPipe graph that performs pose tracking with onnxruntime on tensorrt.
|
||||
|
||||
# CPU buffer. (ImageFrame)
|
||||
input_stream: "input_video"
|
||||
|
||||
# Output image with rendered results. (ImageFrame)
|
||||
output_stream: "output_video"
|
||||
# Pose landmarks. (NormalizedLandmarkList)
|
||||
output_stream: "pose_landmarks"
|
||||
|
||||
# Generates side packet to enable segmentation.
|
||||
node {
|
||||
calculator: "ConstantSidePacketCalculator"
|
||||
output_side_packet: "PACKET:enable_segmentation"
|
||||
node_options: {
|
||||
[type.googleapis.com/mediapipe.ConstantSidePacketCalculatorOptions]: {
|
||||
packet { bool_value: false }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Throttles the images flowing downstream for flow control. It passes through
|
||||
# the very first incoming image unaltered, and waits for downstream nodes
|
||||
# (calculators and subgraphs) in the graph to finish their tasks before it
|
||||
# passes through another image. All images that come in while waiting are
|
||||
# dropped, limiting the number of in-flight images in most part of the graph to
|
||||
# 1. This prevents the downstream nodes from queuing up incoming images and data
|
||||
# excessively, which leads to increased latency and memory usage, unwanted in
|
||||
# real-time mobile applications. It also eliminates unnecessarily computation,
|
||||
# e.g., the output produced by a node may get dropped downstream if the
|
||||
# subsequent nodes are still busy processing previous inputs.
|
||||
node {
|
||||
calculator: "FlowLimiterCalculator"
|
||||
input_stream: "input_video"
|
||||
input_stream: "FINISHED:output_video"
|
||||
input_stream_info: {
|
||||
tag_index: "FINISHED"
|
||||
back_edge: true
|
||||
}
|
||||
output_stream: "throttled_input_video"
|
||||
}
|
||||
|
||||
# Subgraph that detects poses and corresponding landmarks.
|
||||
node {
|
||||
calculator: "PoseLandmarkOnnxTensorRT"
|
||||
input_side_packet: "ENABLE_SEGMENTATION:enable_segmentation"
|
||||
input_stream: "IMAGE:throttled_input_video"
|
||||
output_stream: "LANDMARKS:pose_landmarks"
|
||||
output_stream: "SEGMENTATION_MASK:segmentation_mask"
|
||||
output_stream: "DETECTION:pose_detection"
|
||||
output_stream: "ROI_FROM_LANDMARKS:roi_from_landmarks"
|
||||
}
|
||||
|
||||
# Subgraph that renders pose-landmark annotation onto the input image.
|
||||
node {
|
||||
calculator: "PoseRendererCpu"
|
||||
input_stream: "IMAGE:throttled_input_video"
|
||||
input_stream: "LANDMARKS:pose_landmarks"
|
||||
input_stream: "SEGMENTATION_MASK:segmentation_mask"
|
||||
input_stream: "DETECTION:pose_detection"
|
||||
input_stream: "ROI:roi_from_landmarks"
|
||||
output_stream: "IMAGE:output_video"
|
||||
}
|
|
@ -35,6 +35,34 @@ mediapipe_simple_subgraph(
|
|||
],
|
||||
)
|
||||
|
||||
mediapipe_simple_subgraph(
|
||||
name = "pose_detection_onnx_cuda",
|
||||
graph = "pose_detection_onnx_cuda.pbtxt",
|
||||
register_as = "PoseDetectionOnnxCUDA",
|
||||
deps = [
|
||||
"//mediapipe/calculators/tensor:image_to_tensor_calculator",
|
||||
"//mediapipe/calculators/tensor:inference_calculator_onnx_cuda",
|
||||
"//mediapipe/calculators/tensor:tensors_to_detections_calculator",
|
||||
"//mediapipe/calculators/tflite:ssd_anchors_calculator",
|
||||
"//mediapipe/calculators/util:detection_letterbox_removal_calculator",
|
||||
"//mediapipe/calculators/util:non_max_suppression_calculator",
|
||||
],
|
||||
)
|
||||
|
||||
mediapipe_simple_subgraph(
|
||||
name = "pose_detection_onnx_tensorrt",
|
||||
graph = "pose_detection_onnx_tensorrt.pbtxt",
|
||||
register_as = "PoseDetectionOnnxTensorRT",
|
||||
deps = [
|
||||
"//mediapipe/calculators/tensor:image_to_tensor_calculator",
|
||||
"//mediapipe/calculators/tensor:inference_calculator_onnx_tensorrt",
|
||||
"//mediapipe/calculators/tensor:tensors_to_detections_calculator",
|
||||
"//mediapipe/calculators/tflite:ssd_anchors_calculator",
|
||||
"//mediapipe/calculators/util:detection_letterbox_removal_calculator",
|
||||
"//mediapipe/calculators/util:non_max_suppression_calculator",
|
||||
],
|
||||
)
|
||||
|
||||
mediapipe_simple_subgraph(
|
||||
name = "pose_detection_gpu",
|
||||
graph = "pose_detection_gpu.pbtxt",
|
||||
|
|
157
mediapipe/modules/pose_detection/pose_detection_onnx_cuda.pbtxt
Normal file
157
mediapipe/modules/pose_detection/pose_detection_onnx_cuda.pbtxt
Normal file
|
@ -0,0 +1,157 @@
|
|||
# MediaPipe graph to detect poses. (CPU input, and inference is executed with onnxruntime on
|
||||
# cuda.)
|
||||
#
|
||||
# It is required that "pose_detection.onnx" is available at
|
||||
# "mediapipe/modules/pose_detection/pose_detection.onnx"
|
||||
# path during execution.
|
||||
#
|
||||
# EXAMPLE:
|
||||
# node {
|
||||
# calculator: "PoseDetectionOnnxCUDA"
|
||||
# input_stream: "IMAGE:image"
|
||||
# output_stream: "DETECTIONS:pose_detections"
|
||||
# }
|
||||
|
||||
type: "PoseDetectionOnnxCUDA"
|
||||
|
||||
# CPU image. (ImageFrame)
|
||||
input_stream: "IMAGE:image"
|
||||
|
||||
# Detected poses. (std::vector<Detection>)
|
||||
# Bounding box in each pose detection is currently set to the bounding box of
|
||||
# the detected face. However, 4 additional key points are available in each
|
||||
# detection, which are used to further calculate a (rotated) bounding box that
|
||||
# encloses the body region of interest. Among the 4 key points, the first two
|
||||
# are for identifying the full-body region, and the second two for upper body
|
||||
# only:
|
||||
#
|
||||
# Key point 0 - mid hip center
|
||||
# Key point 1 - point that encodes size & rotation (for full body)
|
||||
# Key point 2 - mid shoulder center
|
||||
# Key point 3 - point that encodes size & rotation (for upper body)
|
||||
#
|
||||
# NOTE: there will not be an output packet in the DETECTIONS stream for this
|
||||
# particular timestamp if none of poses detected. However, the MediaPipe
|
||||
# framework will internally inform the downstream calculators of the absence of
|
||||
# this packet so that they don't wait for it unnecessarily.
|
||||
output_stream: "DETECTIONS:detections"
|
||||
|
||||
# Transforms the input image into a 224x224 one while keeping the aspect ratio
|
||||
# (what is expected by the corresponding model), resulting in potential
|
||||
# letterboxing in the transformed image.
|
||||
node: {
|
||||
calculator: "ImageToTensorCalculator"
|
||||
input_stream: "IMAGE:image"
|
||||
output_stream: "TENSORS:input_tensors"
|
||||
output_stream: "LETTERBOX_PADDING:letterbox_padding"
|
||||
options: {
|
||||
[mediapipe.ImageToTensorCalculatorOptions.ext] {
|
||||
output_tensor_width: 224
|
||||
output_tensor_height: 224
|
||||
keep_aspect_ratio: true
|
||||
output_tensor_float_range {
|
||||
min: -1.0
|
||||
max: 1.0
|
||||
}
|
||||
border_mode: BORDER_ZERO
|
||||
# If this calculator truly operates in the CPU, then gpu_origin is
|
||||
# ignored, but if some build switch insists on GPU inference, then we will
|
||||
# still need to set this.
|
||||
gpu_origin: TOP_LEFT
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Runs a TensorFlow Lite model on CPU that takes an image tensor and outputs a
|
||||
# vector of tensors representing, for instance, detection boxes/keypoints and
|
||||
# scores.
|
||||
node {
|
||||
calculator: "InferenceCalculator"
|
||||
input_stream: "TENSORS:input_tensors"
|
||||
output_stream: "TENSORS:detection_tensors"
|
||||
options: {
|
||||
[mediapipe.InferenceCalculatorOptions.ext] {
|
||||
model_path: "mediapipe/modules/pose_detection/pose_detection.onnx"
|
||||
delegate { cuda {} }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Generates a single side packet containing a vector of SSD anchors based on
|
||||
# the specification in the options.
|
||||
node {
|
||||
calculator: "SsdAnchorsCalculator"
|
||||
output_side_packet: "anchors"
|
||||
options: {
|
||||
[mediapipe.SsdAnchorsCalculatorOptions.ext] {
|
||||
num_layers: 5
|
||||
min_scale: 0.1484375
|
||||
max_scale: 0.75
|
||||
input_size_height: 224
|
||||
input_size_width: 224
|
||||
anchor_offset_x: 0.5
|
||||
anchor_offset_y: 0.5
|
||||
strides: 8
|
||||
strides: 16
|
||||
strides: 32
|
||||
strides: 32
|
||||
strides: 32
|
||||
aspect_ratios: 1.0
|
||||
fixed_anchor_size: true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Decodes the detection tensors generated by the TensorFlow Lite model, based on
|
||||
# the SSD anchors and the specification in the options, into a vector of
|
||||
# detections. Each detection describes a detected object.
|
||||
node {
|
||||
calculator: "TensorsToDetectionsCalculator"
|
||||
input_stream: "TENSORS:detection_tensors"
|
||||
input_side_packet: "ANCHORS:anchors"
|
||||
output_stream: "DETECTIONS:unfiltered_detections"
|
||||
options: {
|
||||
[mediapipe.TensorsToDetectionsCalculatorOptions.ext] {
|
||||
num_classes: 1
|
||||
num_boxes: 2254
|
||||
num_coords: 12
|
||||
box_coord_offset: 0
|
||||
keypoint_coord_offset: 4
|
||||
num_keypoints: 4
|
||||
num_values_per_keypoint: 2
|
||||
sigmoid_score: true
|
||||
score_clipping_thresh: 100.0
|
||||
reverse_output_order: true
|
||||
x_scale: 224.0
|
||||
y_scale: 224.0
|
||||
h_scale: 224.0
|
||||
w_scale: 224.0
|
||||
min_score_thresh: 0.5
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Performs non-max suppression to remove excessive detections.
|
||||
node {
|
||||
calculator: "NonMaxSuppressionCalculator"
|
||||
input_stream: "unfiltered_detections"
|
||||
output_stream: "filtered_detections"
|
||||
options: {
|
||||
[mediapipe.NonMaxSuppressionCalculatorOptions.ext] {
|
||||
min_suppression_threshold: 0.3
|
||||
overlap_type: INTERSECTION_OVER_UNION
|
||||
algorithm: WEIGHTED
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Adjusts detection locations (already normalized to [0.f, 1.f]) on the
|
||||
# letterboxed image (after image transformation with the FIT scale mode) to the
|
||||
# corresponding locations on the same image with the letterbox removed (the
|
||||
# input image to the graph before image transformation).
|
||||
node {
|
||||
calculator: "DetectionLetterboxRemovalCalculator"
|
||||
input_stream: "DETECTIONS:filtered_detections"
|
||||
input_stream: "LETTERBOX_PADDING:letterbox_padding"
|
||||
output_stream: "DETECTIONS:detections"
|
||||
}
|
|
@ -0,0 +1,157 @@
|
|||
# MediaPipe graph to detect poses. (CPU input, and inference is executed with onnxruntime on
|
||||
# tensorrt.)
|
||||
#
|
||||
# It is required that "pose_detection.onnx" is available at
|
||||
# "mediapipe/modules/pose_detection/pose_detection.onnx"
|
||||
# path during execution.
|
||||
#
|
||||
# EXAMPLE:
|
||||
# node {
|
||||
# calculator: "PoseDetectionOnnxTensorRT"
|
||||
# input_stream: "IMAGE:image"
|
||||
# output_stream: "DETECTIONS:pose_detections"
|
||||
# }
|
||||
|
||||
type: "PoseDetectionOnnxTensorRT"
|
||||
|
||||
# CPU image. (ImageFrame)
|
||||
input_stream: "IMAGE:image"
|
||||
|
||||
# Detected poses. (std::vector<Detection>)
|
||||
# Bounding box in each pose detection is currently set to the bounding box of
|
||||
# the detected face. However, 4 additional key points are available in each
|
||||
# detection, which are used to further calculate a (rotated) bounding box that
|
||||
# encloses the body region of interest. Among the 4 key points, the first two
|
||||
# are for identifying the full-body region, and the second two for upper body
|
||||
# only:
|
||||
#
|
||||
# Key point 0 - mid hip center
|
||||
# Key point 1 - point that encodes size & rotation (for full body)
|
||||
# Key point 2 - mid shoulder center
|
||||
# Key point 3 - point that encodes size & rotation (for upper body)
|
||||
#
|
||||
# NOTE: there will not be an output packet in the DETECTIONS stream for this
|
||||
# particular timestamp if none of poses detected. However, the MediaPipe
|
||||
# framework will internally inform the downstream calculators of the absence of
|
||||
# this packet so that they don't wait for it unnecessarily.
|
||||
output_stream: "DETECTIONS:detections"
|
||||
|
||||
# Transforms the input image into a 224x224 one while keeping the aspect ratio
|
||||
# (what is expected by the corresponding model), resulting in potential
|
||||
# letterboxing in the transformed image.
|
||||
node: {
|
||||
calculator: "ImageToTensorCalculator"
|
||||
input_stream: "IMAGE:image"
|
||||
output_stream: "TENSORS:input_tensors"
|
||||
output_stream: "LETTERBOX_PADDING:letterbox_padding"
|
||||
options: {
|
||||
[mediapipe.ImageToTensorCalculatorOptions.ext] {
|
||||
output_tensor_width: 224
|
||||
output_tensor_height: 224
|
||||
keep_aspect_ratio: true
|
||||
output_tensor_float_range {
|
||||
min: -1.0
|
||||
max: 1.0
|
||||
}
|
||||
border_mode: BORDER_ZERO
|
||||
# If this calculator truly operates in the CPU, then gpu_origin is
|
||||
# ignored, but if some build switch insists on GPU inference, then we will
|
||||
# still need to set this.
|
||||
gpu_origin: TOP_LEFT
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Runs a TensorFlow Lite model on CPU that takes an image tensor and outputs a
|
||||
# vector of tensors representing, for instance, detection boxes/keypoints and
|
||||
# scores.
|
||||
node {
|
||||
calculator: "InferenceCalculator"
|
||||
input_stream: "TENSORS:input_tensors"
|
||||
output_stream: "TENSORS:detection_tensors"
|
||||
options: {
|
||||
[mediapipe.InferenceCalculatorOptions.ext] {
|
||||
model_path: "mediapipe/modules/pose_detection/pose_detection.onnx"
|
||||
delegate { tensorrt {} }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Generates a single side packet containing a vector of SSD anchors based on
|
||||
# the specification in the options.
|
||||
node {
|
||||
calculator: "SsdAnchorsCalculator"
|
||||
output_side_packet: "anchors"
|
||||
options: {
|
||||
[mediapipe.SsdAnchorsCalculatorOptions.ext] {
|
||||
num_layers: 5
|
||||
min_scale: 0.1484375
|
||||
max_scale: 0.75
|
||||
input_size_height: 224
|
||||
input_size_width: 224
|
||||
anchor_offset_x: 0.5
|
||||
anchor_offset_y: 0.5
|
||||
strides: 8
|
||||
strides: 16
|
||||
strides: 32
|
||||
strides: 32
|
||||
strides: 32
|
||||
aspect_ratios: 1.0
|
||||
fixed_anchor_size: true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Decodes the detection tensors generated by the TensorFlow Lite model, based on
|
||||
# the SSD anchors and the specification in the options, into a vector of
|
||||
# detections. Each detection describes a detected object.
|
||||
node {
|
||||
calculator: "TensorsToDetectionsCalculator"
|
||||
input_stream: "TENSORS:detection_tensors"
|
||||
input_side_packet: "ANCHORS:anchors"
|
||||
output_stream: "DETECTIONS:unfiltered_detections"
|
||||
options: {
|
||||
[mediapipe.TensorsToDetectionsCalculatorOptions.ext] {
|
||||
num_classes: 1
|
||||
num_boxes: 2254
|
||||
num_coords: 12
|
||||
box_coord_offset: 0
|
||||
keypoint_coord_offset: 4
|
||||
num_keypoints: 4
|
||||
num_values_per_keypoint: 2
|
||||
sigmoid_score: true
|
||||
score_clipping_thresh: 100.0
|
||||
reverse_output_order: true
|
||||
x_scale: 224.0
|
||||
y_scale: 224.0
|
||||
h_scale: 224.0
|
||||
w_scale: 224.0
|
||||
min_score_thresh: 0.5
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Performs non-max suppression to remove excessive detections.
|
||||
node {
|
||||
calculator: "NonMaxSuppressionCalculator"
|
||||
input_stream: "unfiltered_detections"
|
||||
output_stream: "filtered_detections"
|
||||
options: {
|
||||
[mediapipe.NonMaxSuppressionCalculatorOptions.ext] {
|
||||
min_suppression_threshold: 0.3
|
||||
overlap_type: INTERSECTION_OVER_UNION
|
||||
algorithm: WEIGHTED
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Adjusts detection locations (already normalized to [0.f, 1.f]) on the
|
||||
# letterboxed image (after image transformation with the FIT scale mode) to the
|
||||
# corresponding locations on the same image with the letterbox removed (the
|
||||
# input image to the graph before image transformation).
|
||||
node {
|
||||
calculator: "DetectionLetterboxRemovalCalculator"
|
||||
input_stream: "DETECTIONS:filtered_detections"
|
||||
input_stream: "LETTERBOX_PADDING:letterbox_padding"
|
||||
output_stream: "DETECTIONS:detections"
|
||||
}
|
|
@ -61,6 +61,35 @@ mediapipe_simple_subgraph(
|
|||
],
|
||||
)
|
||||
|
||||
|
||||
mediapipe_simple_subgraph(
|
||||
name = "pose_landmark_by_roi_onnx_cuda",
|
||||
graph = "pose_landmark_by_roi_onnx_cuda.pbtxt",
|
||||
register_as = "PoseLandmarkByRoiOnnxCUDA",
|
||||
deps = [
|
||||
":pose_landmark_model_loader",
|
||||
":pose_landmarks_and_segmentation_inverse_projection",
|
||||
":tensors_to_pose_landmarks_and_segmentation",
|
||||
"//mediapipe/calculators/image:image_properties_calculator",
|
||||
"//mediapipe/calculators/tensor:image_to_tensor_calculator",
|
||||
"//mediapipe/calculators/tensor:inference_calculator_onnx_cuda",
|
||||
],
|
||||
)
|
||||
|
||||
mediapipe_simple_subgraph(
|
||||
name = "pose_landmark_by_roi_onnx_tensorrt",
|
||||
graph = "pose_landmark_by_roi_onnx_tensorrt.pbtxt",
|
||||
register_as = "PoseLandmarkByRoiOnnxTensorRT",
|
||||
deps = [
|
||||
":pose_landmark_model_loader",
|
||||
":pose_landmarks_and_segmentation_inverse_projection",
|
||||
":tensors_to_pose_landmarks_and_segmentation",
|
||||
"//mediapipe/calculators/image:image_properties_calculator",
|
||||
"//mediapipe/calculators/tensor:image_to_tensor_calculator",
|
||||
"//mediapipe/calculators/tensor:inference_calculator_onnx_tensorrt",
|
||||
],
|
||||
)
|
||||
|
||||
mediapipe_simple_subgraph(
|
||||
name = "tensors_to_pose_landmarks_and_segmentation",
|
||||
graph = "tensors_to_pose_landmarks_and_segmentation.pbtxt",
|
||||
|
@ -159,10 +188,57 @@ mediapipe_simple_subgraph(
|
|||
],
|
||||
)
|
||||
|
||||
mediapipe_simple_subgraph(
|
||||
name = "pose_landmark_onnx_cuda",
|
||||
graph = "pose_landmark_onnx_cuda.pbtxt",
|
||||
register_as = "PoseLandmarkOnnxCUDA",
|
||||
deps = [
|
||||
":pose_detection_to_roi",
|
||||
":pose_landmark_by_roi_onnx_cuda",
|
||||
":pose_landmark_filtering",
|
||||
":pose_landmarks_to_roi",
|
||||
":pose_segmentation_filtering",
|
||||
"//mediapipe/calculators/core:constant_side_packet_calculator",
|
||||
"//mediapipe/calculators/core:gate_calculator",
|
||||
"//mediapipe/calculators/core:merge_calculator",
|
||||
"//mediapipe/calculators/core:packet_presence_calculator",
|
||||
"//mediapipe/calculators/core:previous_loopback_calculator",
|
||||
"//mediapipe/calculators/core:split_vector_calculator",
|
||||
"//mediapipe/calculators/image:image_properties_calculator",
|
||||
"//mediapipe/calculators/util:from_image_calculator",
|
||||
"//mediapipe/modules/pose_detection:pose_detection_onnx_cuda",
|
||||
],
|
||||
)
|
||||
|
||||
mediapipe_simple_subgraph(
|
||||
name = "pose_landmark_onnx_tensorrt",
|
||||
graph = "pose_landmark_onnx_tensorrt.pbtxt",
|
||||
register_as = "PoseLandmarkOnnxTensorRT",
|
||||
deps = [
|
||||
":pose_detection_to_roi",
|
||||
":pose_landmark_by_roi_onnx_tensorrt",
|
||||
":pose_landmark_filtering",
|
||||
":pose_landmarks_to_roi",
|
||||
":pose_segmentation_filtering",
|
||||
"//mediapipe/calculators/core:constant_side_packet_calculator",
|
||||
"//mediapipe/calculators/core:gate_calculator",
|
||||
"//mediapipe/calculators/core:merge_calculator",
|
||||
"//mediapipe/calculators/core:packet_presence_calculator",
|
||||
"//mediapipe/calculators/core:previous_loopback_calculator",
|
||||
"//mediapipe/calculators/core:split_vector_calculator",
|
||||
"//mediapipe/calculators/image:image_properties_calculator",
|
||||
"//mediapipe/calculators/util:from_image_calculator",
|
||||
"//mediapipe/modules/pose_detection:pose_detection_onnx_tensorrt",
|
||||
],
|
||||
)
|
||||
|
||||
exports_files(
|
||||
srcs = [
|
||||
"pose_landmark_full.onnx",
|
||||
"pose_landmark_full.tflite",
|
||||
"pose_landmark_heavy.onnx",
|
||||
"pose_landmark_heavy.tflite",
|
||||
"pose_landmark_lite.onnx",
|
||||
"pose_landmark_lite.tflite",
|
||||
],
|
||||
)
|
||||
|
|
|
@ -0,0 +1,165 @@
|
|||
# MediaPipe graph to detect/predict pose landmarks and optionally segmentation
|
||||
# within an ROI. (CPU input, and inference is executed on CPU.)
|
||||
#
|
||||
# It is required that "pose_landmark_lite.onnx" or
|
||||
# "pose_landmark_full.onnx" or "pose_landmark_heavy.onnx" is available at
|
||||
# "mediapipe/modules/pose_landmark/pose_landmark_lite.onnx" or
|
||||
# "mediapipe/modules/pose_landmark/pose_landmark_full.onnx" or
|
||||
# "mediapipe/modules/pose_landmark/pose_landmark_heavy.onnx"
|
||||
# path respectively during execution, depending on the specification in the
|
||||
# MODEL_COMPLEXITY input side packet.
|
||||
#
|
||||
# EXAMPLE:
|
||||
# node {
|
||||
# calculator: "PoseLandmarkByRoiOnnxCUDA"
|
||||
# input_side_packet: "MODEL_COMPLEXITY:model_complexity"
|
||||
# input_side_packet: "ENABLE_SEGMENTATION:enable_segmentation"
|
||||
# input_stream: "IMAGE:image"
|
||||
# input_stream: "ROI:roi"
|
||||
# output_stream: "LANDMARKS:landmarks"
|
||||
# output_stream: "SEGMENTATION_MASK:segmentation_mask"
|
||||
# }
|
||||
|
||||
type: "PoseLandmarkByRoiOnnxCUDA"
|
||||
|
||||
# CPU image. (ImageFrame)
|
||||
input_stream: "IMAGE:image"
|
||||
# ROI (region of interest) within the given image where a pose is located.
|
||||
# (NormalizedRect)
|
||||
input_stream: "ROI:roi"
|
||||
|
||||
# Whether to predict the segmentation mask. If unspecified, functions as set to
|
||||
# false. (bool)
|
||||
input_side_packet: "ENABLE_SEGMENTATION:enable_segmentation"
|
||||
|
||||
# Pose landmarks within the given ROI. (NormalizedLandmarkList)
|
||||
# We have 33 landmarks (see pose_landmark_topology.svg) and there are other
|
||||
# auxiliary key points.
|
||||
# 0 - nose
|
||||
# 1 - left eye (inner)
|
||||
# 2 - left eye
|
||||
# 3 - left eye (outer)
|
||||
# 4 - right eye (inner)
|
||||
# 5 - right eye
|
||||
# 6 - right eye (outer)
|
||||
# 7 - left ear
|
||||
# 8 - right ear
|
||||
# 9 - mouth (left)
|
||||
# 10 - mouth (right)
|
||||
# 11 - left shoulder
|
||||
# 12 - right shoulder
|
||||
# 13 - left elbow
|
||||
# 14 - right elbow
|
||||
# 15 - left wrist
|
||||
# 16 - right wrist
|
||||
# 17 - left pinky
|
||||
# 18 - right pinky
|
||||
# 19 - left index
|
||||
# 20 - right index
|
||||
# 21 - left thumb
|
||||
# 22 - right thumb
|
||||
# 23 - left hip
|
||||
# 24 - right hip
|
||||
# 25 - left knee
|
||||
# 26 - right knee
|
||||
# 27 - left ankle
|
||||
# 28 - right ankle
|
||||
# 29 - left heel
|
||||
# 30 - right heel
|
||||
# 31 - left foot index
|
||||
# 32 - right foot index
|
||||
#
|
||||
# NOTE: If a pose is not present within the given ROI, for this particular
|
||||
# timestamp there will not be an output packet in the LANDMARKS stream. However,
|
||||
# the MediaPipe framework will internally inform the downstream calculators of
|
||||
# the absence of this packet so that they don't wait for it unnecessarily.
|
||||
output_stream: "LANDMARKS:landmarks"
|
||||
# Auxiliary landmarks for deriving the ROI in the subsequent image.
|
||||
# (NormalizedLandmarkList)
|
||||
output_stream: "AUXILIARY_LANDMARKS:auxiliary_landmarks"
|
||||
|
||||
# Pose world landmarks within the given ROI. (LandmarkList)
|
||||
# World landmarks are real-world 3D coordinates in meters with the origin at the
|
||||
# center between hips. WORLD_LANDMARKS shares the same landmark topology as
|
||||
# LANDMARKS. However, LANDMARKS provides coordinates (in pixels) of a 3D object
|
||||
# projected onto the 2D image surface, while WORLD_LANDMARKS provides
|
||||
# coordinates (in meters) of the 3D object itself.
|
||||
output_stream: "WORLD_LANDMARKS:world_landmarks"
|
||||
|
||||
# Segmentation mask on CPU in ImageFormat::VEC32F1. (Image)
|
||||
output_stream: "SEGMENTATION_MASK:segmentation_mask"
|
||||
|
||||
# Retrieves the image size.
|
||||
node {
|
||||
calculator: "ImagePropertiesCalculator"
|
||||
input_stream: "IMAGE_CPU:image"
|
||||
output_stream: "SIZE:image_size"
|
||||
}
|
||||
|
||||
# Crops and transforms the specified ROI in the input image into an image patch
|
||||
# represented as a tensor of dimension expected by the corresponding ML model,
|
||||
# while maintaining the aspect ratio of the ROI (which can be different from
|
||||
# that of the image patch). Therefore, there can be letterboxing around the ROI
|
||||
# in the generated tensor representation.
|
||||
node: {
|
||||
calculator: "ImageToTensorCalculator"
|
||||
input_stream: "IMAGE:image"
|
||||
input_stream: "NORM_RECT:roi"
|
||||
output_stream: "TENSORS:input_tensors"
|
||||
output_stream: "LETTERBOX_PADDING:letterbox_padding"
|
||||
output_stream: "MATRIX:transformation_matrix"
|
||||
options: {
|
||||
[mediapipe.ImageToTensorCalculatorOptions.ext] {
|
||||
output_tensor_width: 256
|
||||
output_tensor_height: 256
|
||||
keep_aspect_ratio: true
|
||||
output_tensor_float_range {
|
||||
min: 0.0
|
||||
max: 1.0
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
node {
|
||||
calculator: "InferenceCalculator"
|
||||
input_stream: "TENSORS:input_tensors"
|
||||
output_stream: "TENSORS:output_tensors"
|
||||
options: {
|
||||
[mediapipe.InferenceCalculatorOptions.ext] {
|
||||
model_path: "mediapipe/modules/pose_landmark/pose_landmark_full.onnx"
|
||||
delegate { cuda {} }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Decodes the tensors into the corresponding landmark and segmentation mask
|
||||
# representation.
|
||||
node {
|
||||
calculator: "TensorsToPoseLandmarksAndSegmentation"
|
||||
input_side_packet: "ENABLE_SEGMENTATION:enable_segmentation"
|
||||
input_stream: "TENSORS:output_tensors"
|
||||
output_stream: "LANDMARKS:roi_landmarks"
|
||||
output_stream: "AUXILIARY_LANDMARKS:roi_auxiliary_landmarks"
|
||||
output_stream: "WORLD_LANDMARKS:roi_world_landmarks"
|
||||
output_stream: "SEGMENTATION_MASK:roi_segmentation_mask"
|
||||
}
|
||||
|
||||
# Projects the landmarks and segmentation mask in the local coordinates of the
|
||||
# (potentially letterboxed) ROI back to the global coordinates of the full input
|
||||
# image.
|
||||
node {
|
||||
calculator: "PoseLandmarksAndSegmentationInverseProjection"
|
||||
input_stream: "IMAGE_SIZE:image_size"
|
||||
input_stream: "NORM_RECT:roi"
|
||||
input_stream: "LETTERBOX_PADDING:letterbox_padding"
|
||||
input_stream: "MATRIX:transformation_matrix"
|
||||
input_stream: "LANDMARKS:roi_landmarks"
|
||||
input_stream: "AUXILIARY_LANDMARKS:roi_auxiliary_landmarks"
|
||||
input_stream: "WORLD_LANDMARKS:roi_world_landmarks"
|
||||
input_stream: "SEGMENTATION_MASK:roi_segmentation_mask"
|
||||
output_stream: "LANDMARKS:landmarks"
|
||||
output_stream: "AUXILIARY_LANDMARKS:auxiliary_landmarks"
|
||||
output_stream: "WORLD_LANDMARKS:world_landmarks"
|
||||
output_stream: "SEGMENTATION_MASK:segmentation_mask"
|
||||
}
|
|
@ -0,0 +1,165 @@
|
|||
# MediaPipe graph to detect/predict pose landmarks and optionally segmentation
|
||||
# within an ROI. (CPU input, and inference is executed on CPU.)
|
||||
#
|
||||
# It is required that "pose_landmark_lite.onnx" or
|
||||
# "pose_landmark_full.onnx" or "pose_landmark_heavy.onnx" is available at
|
||||
# "mediapipe/modules/pose_landmark/pose_landmark_lite.onnx" or
|
||||
# "mediapipe/modules/pose_landmark/pose_landmark_full.onnx" or
|
||||
# "mediapipe/modules/pose_landmark/pose_landmark_heavy.onnx"
|
||||
# path respectively during execution, depending on the specification in the
|
||||
# MODEL_COMPLEXITY input side packet.
|
||||
#
|
||||
# EXAMPLE:
|
||||
# node {
|
||||
# calculator: "PoseLandmarkByRoiOnnxTensorRT"
|
||||
# input_side_packet: "MODEL_COMPLEXITY:model_complexity"
|
||||
# input_side_packet: "ENABLE_SEGMENTATION:enable_segmentation"
|
||||
# input_stream: "IMAGE:image"
|
||||
# input_stream: "ROI:roi"
|
||||
# output_stream: "LANDMARKS:landmarks"
|
||||
# output_stream: "SEGMENTATION_MASK:segmentation_mask"
|
||||
# }
|
||||
|
||||
type: "PoseLandmarkByRoiOnnxTensorRT"
|
||||
|
||||
# CPU image. (ImageFrame)
|
||||
input_stream: "IMAGE:image"
|
||||
# ROI (region of interest) within the given image where a pose is located.
|
||||
# (NormalizedRect)
|
||||
input_stream: "ROI:roi"
|
||||
|
||||
# Whether to predict the segmentation mask. If unspecified, functions as set to
|
||||
# false. (bool)
|
||||
input_side_packet: "ENABLE_SEGMENTATION:enable_segmentation"
|
||||
|
||||
# Pose landmarks within the given ROI. (NormalizedLandmarkList)
|
||||
# We have 33 landmarks (see pose_landmark_topology.svg) and there are other
|
||||
# auxiliary key points.
|
||||
# 0 - nose
|
||||
# 1 - left eye (inner)
|
||||
# 2 - left eye
|
||||
# 3 - left eye (outer)
|
||||
# 4 - right eye (inner)
|
||||
# 5 - right eye
|
||||
# 6 - right eye (outer)
|
||||
# 7 - left ear
|
||||
# 8 - right ear
|
||||
# 9 - mouth (left)
|
||||
# 10 - mouth (right)
|
||||
# 11 - left shoulder
|
||||
# 12 - right shoulder
|
||||
# 13 - left elbow
|
||||
# 14 - right elbow
|
||||
# 15 - left wrist
|
||||
# 16 - right wrist
|
||||
# 17 - left pinky
|
||||
# 18 - right pinky
|
||||
# 19 - left index
|
||||
# 20 - right index
|
||||
# 21 - left thumb
|
||||
# 22 - right thumb
|
||||
# 23 - left hip
|
||||
# 24 - right hip
|
||||
# 25 - left knee
|
||||
# 26 - right knee
|
||||
# 27 - left ankle
|
||||
# 28 - right ankle
|
||||
# 29 - left heel
|
||||
# 30 - right heel
|
||||
# 31 - left foot index
|
||||
# 32 - right foot index
|
||||
#
|
||||
# NOTE: If a pose is not present within the given ROI, for this particular
|
||||
# timestamp there will not be an output packet in the LANDMARKS stream. However,
|
||||
# the MediaPipe framework will internally inform the downstream calculators of
|
||||
# the absence of this packet so that they don't wait for it unnecessarily.
|
||||
output_stream: "LANDMARKS:landmarks"
|
||||
# Auxiliary landmarks for deriving the ROI in the subsequent image.
|
||||
# (NormalizedLandmarkList)
|
||||
output_stream: "AUXILIARY_LANDMARKS:auxiliary_landmarks"
|
||||
|
||||
# Pose world landmarks within the given ROI. (LandmarkList)
|
||||
# World landmarks are real-world 3D coordinates in meters with the origin at the
|
||||
# center between hips. WORLD_LANDMARKS shares the same landmark topology as
|
||||
# LANDMARKS. However, LANDMARKS provides coordinates (in pixels) of a 3D object
|
||||
# projected onto the 2D image surface, while WORLD_LANDMARKS provides
|
||||
# coordinates (in meters) of the 3D object itself.
|
||||
output_stream: "WORLD_LANDMARKS:world_landmarks"
|
||||
|
||||
# Segmentation mask on CPU in ImageFormat::VEC32F1. (Image)
|
||||
output_stream: "SEGMENTATION_MASK:segmentation_mask"
|
||||
|
||||
# Retrieves the image size.
|
||||
node {
|
||||
calculator: "ImagePropertiesCalculator"
|
||||
input_stream: "IMAGE_CPU:image"
|
||||
output_stream: "SIZE:image_size"
|
||||
}
|
||||
|
||||
# Crops and transforms the specified ROI in the input image into an image patch
|
||||
# represented as a tensor of dimension expected by the corresponding ML model,
|
||||
# while maintaining the aspect ratio of the ROI (which can be different from
|
||||
# that of the image patch). Therefore, there can be letterboxing around the ROI
|
||||
# in the generated tensor representation.
|
||||
node: {
|
||||
calculator: "ImageToTensorCalculator"
|
||||
input_stream: "IMAGE:image"
|
||||
input_stream: "NORM_RECT:roi"
|
||||
output_stream: "TENSORS:input_tensors"
|
||||
output_stream: "LETTERBOX_PADDING:letterbox_padding"
|
||||
output_stream: "MATRIX:transformation_matrix"
|
||||
options: {
|
||||
[mediapipe.ImageToTensorCalculatorOptions.ext] {
|
||||
output_tensor_width: 256
|
||||
output_tensor_height: 256
|
||||
keep_aspect_ratio: true
|
||||
output_tensor_float_range {
|
||||
min: 0.0
|
||||
max: 1.0
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
node {
|
||||
calculator: "InferenceCalculator"
|
||||
input_stream: "TENSORS:input_tensors"
|
||||
output_stream: "TENSORS:output_tensors"
|
||||
options: {
|
||||
[mediapipe.InferenceCalculatorOptions.ext] {
|
||||
model_path: "mediapipe/modules/pose_landmark/pose_landmark_full.onnx"
|
||||
delegate { tensorrt {} }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Decodes the tensors into the corresponding landmark and segmentation mask
|
||||
# representation.
|
||||
node {
|
||||
calculator: "TensorsToPoseLandmarksAndSegmentation"
|
||||
input_side_packet: "ENABLE_SEGMENTATION:enable_segmentation"
|
||||
input_stream: "TENSORS:output_tensors"
|
||||
output_stream: "LANDMARKS:roi_landmarks"
|
||||
output_stream: "AUXILIARY_LANDMARKS:roi_auxiliary_landmarks"
|
||||
output_stream: "WORLD_LANDMARKS:roi_world_landmarks"
|
||||
output_stream: "SEGMENTATION_MASK:roi_segmentation_mask"
|
||||
}
|
||||
|
||||
# Projects the landmarks and segmentation mask in the local coordinates of the
|
||||
# (potentially letterboxed) ROI back to the global coordinates of the full input
|
||||
# image.
|
||||
node {
|
||||
calculator: "PoseLandmarksAndSegmentationInverseProjection"
|
||||
input_stream: "IMAGE_SIZE:image_size"
|
||||
input_stream: "NORM_RECT:roi"
|
||||
input_stream: "LETTERBOX_PADDING:letterbox_padding"
|
||||
input_stream: "MATRIX:transformation_matrix"
|
||||
input_stream: "LANDMARKS:roi_landmarks"
|
||||
input_stream: "AUXILIARY_LANDMARKS:roi_auxiliary_landmarks"
|
||||
input_stream: "WORLD_LANDMARKS:roi_world_landmarks"
|
||||
input_stream: "SEGMENTATION_MASK:roi_segmentation_mask"
|
||||
output_stream: "LANDMARKS:landmarks"
|
||||
output_stream: "AUXILIARY_LANDMARKS:auxiliary_landmarks"
|
||||
output_stream: "WORLD_LANDMARKS:world_landmarks"
|
||||
output_stream: "SEGMENTATION_MASK:segmentation_mask"
|
||||
}
|
268
mediapipe/modules/pose_landmark/pose_landmark_onnx_cuda.pbtxt
Normal file
268
mediapipe/modules/pose_landmark/pose_landmark_onnx_cuda.pbtxt
Normal file
|
@ -0,0 +1,268 @@
|
|||
# MediaPipe graph to detect/predict pose landmarks. (CPU input, and inference is
|
||||
# executed on CPU.) This graph tries to skip pose detection as much as possible
|
||||
# by using previously detected/predicted landmarks for new images.
|
||||
#
|
||||
# It is required that "pose_detection.onnx" is available at
|
||||
# "mediapipe/modules/pose_detection/pose_detection.onnx"
|
||||
# path during execution.
|
||||
#
|
||||
# It is required that "pose_landmark_lite.onnx" or
|
||||
# "pose_landmark_full.onnx" or "pose_landmark_heavy.onnx" is available at
|
||||
# "mediapipe/modules/pose_landmark/pose_landmark_lite.onnx" or
|
||||
# "mediapipe/modules/pose_landmark/pose_landmark_full.onnx" or
|
||||
# "mediapipe/modules/pose_landmark/pose_landmark_heavy.onnx"
|
||||
# path respectively during execution, depending on the specification in the
|
||||
# MODEL_COMPLEXITY input side packet.
|
||||
#
|
||||
# EXAMPLE:
|
||||
# node {
|
||||
# calculator: "PoseLandmarkOnnxCUDA"
|
||||
# input_side_packet: "MODEL_COMPLEXITY:model_complexity"
|
||||
# input_side_packet: "SMOOTH_LANDMARKS:smooth_landmarks"
|
||||
# input_side_packet: "ENABLE_SEGMENTATION:enable_segmentation"
|
||||
# input_side_packet: "SMOOTH_SEGMENTATION:smooth_segmentation"
|
||||
# input_side_packet: "USE_PREV_LANDMARKS:use_prev_landmarks"
|
||||
# input_stream: "IMAGE:image"
|
||||
# output_stream: "LANDMARKS:pose_landmarks"
|
||||
# output_stream: "SEGMENTATION_MASK:segmentation_mask"
|
||||
# }
|
||||
|
||||
type: "PoseLandmarkOnnxCUDA"
|
||||
|
||||
# CPU image. (ImageFrame)
|
||||
input_stream: "IMAGE:image"
|
||||
|
||||
# Whether to filter landmarks across different input images to reduce jitter.
|
||||
# If unspecified, functions as set to true. (bool)
|
||||
input_side_packet: "SMOOTH_LANDMARKS:smooth_landmarks"
|
||||
|
||||
# Whether to predict the segmentation mask. If unspecified, functions as set to
|
||||
# false. (bool)
|
||||
input_side_packet: "ENABLE_SEGMENTATION:enable_segmentation"
|
||||
|
||||
# Whether to filter segmentation mask across different input images to reduce
|
||||
# jitter. If unspecified, functions as set to true. (bool)
|
||||
input_side_packet: "SMOOTH_SEGMENTATION:smooth_segmentation"
|
||||
|
||||
# Complexity of the pose landmark model: 0, 1 or 2. Landmark accuracy as well as
|
||||
# inference latency generally go up with the model complexity. If unspecified,
|
||||
# functions as set to 1. (int)
|
||||
input_side_packet: "MODEL_COMPLEXITY:model_complexity"
|
||||
|
||||
# Whether landmarks on the previous image should be used to help localize
|
||||
# landmarks on the current image. (bool)
|
||||
input_side_packet: "USE_PREV_LANDMARKS:use_prev_landmarks"
|
||||
|
||||
# Pose landmarks. (NormalizedLandmarkList)
|
||||
# We have 33 landmarks (see pose_landmark_topology.svg), and there are other
|
||||
# auxiliary key points.
|
||||
# 0 - nose
|
||||
# 1 - left eye (inner)
|
||||
# 2 - left eye
|
||||
# 3 - left eye (outer)
|
||||
# 4 - right eye (inner)
|
||||
# 5 - right eye
|
||||
# 6 - right eye (outer)
|
||||
# 7 - left ear
|
||||
# 8 - right ear
|
||||
# 9 - mouth (left)
|
||||
# 10 - mouth (right)
|
||||
# 11 - left shoulder
|
||||
# 12 - right shoulder
|
||||
# 13 - left elbow
|
||||
# 14 - right elbow
|
||||
# 15 - left wrist
|
||||
# 16 - right wrist
|
||||
# 17 - left pinky
|
||||
# 18 - right pinky
|
||||
# 19 - left index
|
||||
# 20 - right index
|
||||
# 21 - left thumb
|
||||
# 22 - right thumb
|
||||
# 23 - left hip
|
||||
# 24 - right hip
|
||||
# 25 - left knee
|
||||
# 26 - right knee
|
||||
# 27 - left ankle
|
||||
# 28 - right ankle
|
||||
# 29 - left heel
|
||||
# 30 - right heel
|
||||
# 31 - left foot index
|
||||
# 32 - right foot index
|
||||
#
|
||||
# NOTE: if a pose is not present within the given ROI, for this particular
|
||||
# timestamp there will not be an output packet in the LANDMARKS stream. However,
|
||||
# the MediaPipe framework will internally inform the downstream calculators of
|
||||
# the absence of this packet so that they don't wait for it unnecessarily.
|
||||
output_stream: "LANDMARKS:pose_landmarks"
|
||||
|
||||
# Pose world landmarks. (LandmarkList)
|
||||
# World landmarks are real-world 3D coordinates in meters with the origin at the
|
||||
# center between hips. WORLD_LANDMARKS shares the same landmark topology as
|
||||
# LANDMARKS. However, LANDMARKS provides coordinates (in pixels) of a 3D object
|
||||
# projected onto the 2D image surface, while WORLD_LANDMARKS provides
|
||||
# coordinates (in meters) of the 3D object itself.
|
||||
output_stream: "WORLD_LANDMARKS:pose_world_landmarks"
|
||||
|
||||
# Segmentation mask. (ImageFrame in ImageFormat::VEC32F1)
|
||||
output_stream: "SEGMENTATION_MASK:segmentation_mask"
|
||||
|
||||
# Extra outputs (for debugging, for instance).
|
||||
# Detected poses. (Detection)
|
||||
output_stream: "DETECTION:pose_detection"
|
||||
# Regions of interest calculated based on landmarks. (NormalizedRect)
|
||||
output_stream: "ROI_FROM_LANDMARKS:pose_rect_from_landmarks"
|
||||
# Regions of interest calculated based on pose detections. (NormalizedRect)
|
||||
output_stream: "ROI_FROM_DETECTION:pose_rect_from_detection"
|
||||
|
||||
# When the optional input side packet "use_prev_landmarks" is either absent or
|
||||
# set to true, uses the landmarks on the previous image to help localize
|
||||
# landmarks on the current image.
|
||||
node {
|
||||
calculator: "GateCalculator"
|
||||
input_side_packet: "ALLOW:use_prev_landmarks"
|
||||
input_stream: "prev_pose_rect_from_landmarks"
|
||||
output_stream: "gated_prev_pose_rect_from_landmarks"
|
||||
options: {
|
||||
[mediapipe.GateCalculatorOptions.ext] {
|
||||
allow: true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Checks if there's previous pose rect calculated from landmarks.
|
||||
node: {
|
||||
calculator: "PacketPresenceCalculator"
|
||||
input_stream: "PACKET:gated_prev_pose_rect_from_landmarks"
|
||||
output_stream: "PRESENCE:prev_pose_rect_from_landmarks_is_present"
|
||||
}
|
||||
|
||||
# Calculates size of the image.
|
||||
node {
|
||||
calculator: "ImagePropertiesCalculator"
|
||||
input_stream: "IMAGE_CPU:image"
|
||||
output_stream: "SIZE:image_size"
|
||||
}
|
||||
|
||||
# Drops the incoming image if the pose has already been identified from the
|
||||
# previous image. Otherwise, passes the incoming image through to trigger a new
|
||||
# round of pose detection.
|
||||
node {
|
||||
calculator: "GateCalculator"
|
||||
input_stream: "image"
|
||||
input_stream: "image_size"
|
||||
input_stream: "DISALLOW:prev_pose_rect_from_landmarks_is_present"
|
||||
output_stream: "image_for_pose_detection"
|
||||
output_stream: "image_size_for_pose_detection"
|
||||
options: {
|
||||
[mediapipe.GateCalculatorOptions.ext] {
|
||||
empty_packets_as_allow: true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Detects poses.
|
||||
node {
|
||||
calculator: "PoseDetectionOnnxCUDA"
|
||||
input_stream: "IMAGE:image_for_pose_detection"
|
||||
output_stream: "DETECTIONS:pose_detections"
|
||||
}
|
||||
|
||||
# Gets the very first detection from "pose_detections" vector.
|
||||
node {
|
||||
calculator: "SplitDetectionVectorCalculator"
|
||||
input_stream: "pose_detections"
|
||||
output_stream: "pose_detection"
|
||||
options: {
|
||||
[mediapipe.SplitVectorCalculatorOptions.ext] {
|
||||
ranges: { begin: 0 end: 1 }
|
||||
element_only: true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Calculates region of interest based on pose detection, so that can be used
|
||||
# to detect landmarks.
|
||||
node {
|
||||
calculator: "PoseDetectionToRoi"
|
||||
input_stream: "DETECTION:pose_detection"
|
||||
input_stream: "IMAGE_SIZE:image_size_for_pose_detection"
|
||||
output_stream: "ROI:pose_rect_from_detection"
|
||||
}
|
||||
|
||||
# Selects either pose rect (or ROI) calculated from detection or from previously
|
||||
# detected landmarks if available (in this case, calculation of pose rect from
|
||||
# detection is skipped).
|
||||
node {
|
||||
calculator: "MergeCalculator"
|
||||
input_stream: "pose_rect_from_detection"
|
||||
input_stream: "gated_prev_pose_rect_from_landmarks"
|
||||
output_stream: "pose_rect"
|
||||
}
|
||||
|
||||
# Detects pose landmarks within specified region of interest of the image.
|
||||
node {
|
||||
calculator: "PoseLandmarkByRoiOnnxCUDA"
|
||||
input_side_packet: "MODEL_COMPLEXITY:model_complexity"
|
||||
input_side_packet: "ENABLE_SEGMENTATION:enable_segmentation"
|
||||
input_stream: "IMAGE:image"
|
||||
input_stream: "ROI:pose_rect"
|
||||
output_stream: "LANDMARKS:unfiltered_pose_landmarks"
|
||||
output_stream: "AUXILIARY_LANDMARKS:unfiltered_auxiliary_landmarks"
|
||||
output_stream: "WORLD_LANDMARKS:unfiltered_world_landmarks"
|
||||
output_stream: "SEGMENTATION_MASK:unfiltered_segmentation_mask"
|
||||
}
|
||||
|
||||
# Smoothes landmarks to reduce jitter.
|
||||
node {
|
||||
calculator: "PoseLandmarkFiltering"
|
||||
input_side_packet: "ENABLE:smooth_landmarks"
|
||||
input_stream: "IMAGE_SIZE:image_size"
|
||||
input_stream: "NORM_LANDMARKS:unfiltered_pose_landmarks"
|
||||
input_stream: "AUX_NORM_LANDMARKS:unfiltered_auxiliary_landmarks"
|
||||
input_stream: "WORLD_LANDMARKS:unfiltered_world_landmarks"
|
||||
output_stream: "FILTERED_NORM_LANDMARKS:pose_landmarks"
|
||||
output_stream: "FILTERED_AUX_NORM_LANDMARKS:auxiliary_landmarks"
|
||||
output_stream: "FILTERED_WORLD_LANDMARKS:pose_world_landmarks"
|
||||
}
|
||||
|
||||
# Calculates region of interest based on the auxiliary landmarks, to be used in
|
||||
# the subsequent image.
|
||||
node {
|
||||
calculator: "PoseLandmarksToRoi"
|
||||
input_stream: "LANDMARKS:auxiliary_landmarks"
|
||||
input_stream: "IMAGE_SIZE:image_size"
|
||||
output_stream: "ROI:pose_rect_from_landmarks"
|
||||
}
|
||||
|
||||
# Caches pose rects calculated from landmarks, and upon the arrival of the next
|
||||
# input image, sends out the cached rects with timestamps replaced by that of
|
||||
# the input image, essentially generating a packet that carries the previous
|
||||
# pose rects. Note that upon the arrival of the very first input image, a
|
||||
# timestamp bound update occurs to jump start the feedback loop.
|
||||
node {
|
||||
calculator: "PreviousLoopbackCalculator"
|
||||
input_stream: "MAIN:image"
|
||||
input_stream: "LOOP:pose_rect_from_landmarks"
|
||||
input_stream_info: {
|
||||
tag_index: "LOOP"
|
||||
back_edge: true
|
||||
}
|
||||
output_stream: "PREV_LOOP:prev_pose_rect_from_landmarks"
|
||||
}
|
||||
|
||||
# Smoothes segmentation to reduce jitter.
|
||||
node {
|
||||
calculator: "PoseSegmentationFiltering"
|
||||
input_side_packet: "ENABLE:smooth_segmentation"
|
||||
input_stream: "SEGMENTATION_MASK:unfiltered_segmentation_mask"
|
||||
output_stream: "FILTERED_SEGMENTATION_MASK:filtered_segmentation_mask"
|
||||
}
|
||||
|
||||
# Converts the incoming segmentation mask represented as an Image into the
|
||||
# corresponding ImageFrame type.
|
||||
node: {
|
||||
calculator: "FromImageCalculator"
|
||||
input_stream: "IMAGE:filtered_segmentation_mask"
|
||||
output_stream: "IMAGE_CPU:segmentation_mask"
|
||||
}
|
|
@ -0,0 +1,268 @@
|
|||
# MediaPipe graph to detect/predict pose landmarks. (CPU input, and inference is
|
||||
# executed on CPU.) This graph tries to skip pose detection as much as possible
|
||||
# by using previously detected/predicted landmarks for new images.
|
||||
#
|
||||
# It is required that "pose_detection.tflite" is available at
|
||||
# "mediapipe/modules/pose_detection/pose_detection.tflite"
|
||||
# path during execution.
|
||||
#
|
||||
# It is required that "pose_landmark_lite.tflite" or
|
||||
# "pose_landmark_full.tflite" or "pose_landmark_heavy.tflite" is available at
|
||||
# "mediapipe/modules/pose_landmark/pose_landmark_lite.tflite" or
|
||||
# "mediapipe/modules/pose_landmark/pose_landmark_full.tflite" or
|
||||
# "mediapipe/modules/pose_landmark/pose_landmark_heavy.tflite"
|
||||
# path respectively during execution, depending on the specification in the
|
||||
# MODEL_COMPLEXITY input side packet.
|
||||
#
|
||||
# EXAMPLE:
|
||||
# node {
|
||||
# calculator: "PoseLandmarkOnnxTensorRT"
|
||||
# input_side_packet: "MODEL_COMPLEXITY:model_complexity"
|
||||
# input_side_packet: "SMOOTH_LANDMARKS:smooth_landmarks"
|
||||
# input_side_packet: "ENABLE_SEGMENTATION:enable_segmentation"
|
||||
# input_side_packet: "SMOOTH_SEGMENTATION:smooth_segmentation"
|
||||
# input_side_packet: "USE_PREV_LANDMARKS:use_prev_landmarks"
|
||||
# input_stream: "IMAGE:image"
|
||||
# output_stream: "LANDMARKS:pose_landmarks"
|
||||
# output_stream: "SEGMENTATION_MASK:segmentation_mask"
|
||||
# }
|
||||
|
||||
type: "PoseLandmarkOnnxTensorRT"
|
||||
|
||||
# CPU image. (ImageFrame)
|
||||
input_stream: "IMAGE:image"
|
||||
|
||||
# Whether to filter landmarks across different input images to reduce jitter.
|
||||
# If unspecified, functions as set to true. (bool)
|
||||
input_side_packet: "SMOOTH_LANDMARKS:smooth_landmarks"
|
||||
|
||||
# Whether to predict the segmentation mask. If unspecified, functions as set to
|
||||
# false. (bool)
|
||||
input_side_packet: "ENABLE_SEGMENTATION:enable_segmentation"
|
||||
|
||||
# Whether to filter segmentation mask across different input images to reduce
|
||||
# jitter. If unspecified, functions as set to true. (bool)
|
||||
input_side_packet: "SMOOTH_SEGMENTATION:smooth_segmentation"
|
||||
|
||||
# Complexity of the pose landmark model: 0, 1 or 2. Landmark accuracy as well as
|
||||
# inference latency generally go up with the model complexity. If unspecified,
|
||||
# functions as set to 1. (int)
|
||||
input_side_packet: "MODEL_COMPLEXITY:model_complexity"
|
||||
|
||||
# Whether landmarks on the previous image should be used to help localize
|
||||
# landmarks on the current image. (bool)
|
||||
input_side_packet: "USE_PREV_LANDMARKS:use_prev_landmarks"
|
||||
|
||||
# Pose landmarks. (NormalizedLandmarkList)
|
||||
# We have 33 landmarks (see pose_landmark_topology.svg), and there are other
|
||||
# auxiliary key points.
|
||||
# 0 - nose
|
||||
# 1 - left eye (inner)
|
||||
# 2 - left eye
|
||||
# 3 - left eye (outer)
|
||||
# 4 - right eye (inner)
|
||||
# 5 - right eye
|
||||
# 6 - right eye (outer)
|
||||
# 7 - left ear
|
||||
# 8 - right ear
|
||||
# 9 - mouth (left)
|
||||
# 10 - mouth (right)
|
||||
# 11 - left shoulder
|
||||
# 12 - right shoulder
|
||||
# 13 - left elbow
|
||||
# 14 - right elbow
|
||||
# 15 - left wrist
|
||||
# 16 - right wrist
|
||||
# 17 - left pinky
|
||||
# 18 - right pinky
|
||||
# 19 - left index
|
||||
# 20 - right index
|
||||
# 21 - left thumb
|
||||
# 22 - right thumb
|
||||
# 23 - left hip
|
||||
# 24 - right hip
|
||||
# 25 - left knee
|
||||
# 26 - right knee
|
||||
# 27 - left ankle
|
||||
# 28 - right ankle
|
||||
# 29 - left heel
|
||||
# 30 - right heel
|
||||
# 31 - left foot index
|
||||
# 32 - right foot index
|
||||
#
|
||||
# NOTE: if a pose is not present within the given ROI, for this particular
|
||||
# timestamp there will not be an output packet in the LANDMARKS stream. However,
|
||||
# the MediaPipe framework will internally inform the downstream calculators of
|
||||
# the absence of this packet so that they don't wait for it unnecessarily.
|
||||
output_stream: "LANDMARKS:pose_landmarks"
|
||||
|
||||
# Pose world landmarks. (LandmarkList)
|
||||
# World landmarks are real-world 3D coordinates in meters with the origin at the
|
||||
# center between hips. WORLD_LANDMARKS shares the same landmark topology as
|
||||
# LANDMARKS. However, LANDMARKS provides coordinates (in pixels) of a 3D object
|
||||
# projected onto the 2D image surface, while WORLD_LANDMARKS provides
|
||||
# coordinates (in meters) of the 3D object itself.
|
||||
output_stream: "WORLD_LANDMARKS:pose_world_landmarks"
|
||||
|
||||
# Segmentation mask. (ImageFrame in ImageFormat::VEC32F1)
|
||||
output_stream: "SEGMENTATION_MASK:segmentation_mask"
|
||||
|
||||
# Extra outputs (for debugging, for instance).
|
||||
# Detected poses. (Detection)
|
||||
output_stream: "DETECTION:pose_detection"
|
||||
# Regions of interest calculated based on landmarks. (NormalizedRect)
|
||||
output_stream: "ROI_FROM_LANDMARKS:pose_rect_from_landmarks"
|
||||
# Regions of interest calculated based on pose detections. (NormalizedRect)
|
||||
output_stream: "ROI_FROM_DETECTION:pose_rect_from_detection"
|
||||
|
||||
# When the optional input side packet "use_prev_landmarks" is either absent or
|
||||
# set to true, uses the landmarks on the previous image to help localize
|
||||
# landmarks on the current image.
|
||||
node {
|
||||
calculator: "GateCalculator"
|
||||
input_side_packet: "ALLOW:use_prev_landmarks"
|
||||
input_stream: "prev_pose_rect_from_landmarks"
|
||||
output_stream: "gated_prev_pose_rect_from_landmarks"
|
||||
options: {
|
||||
[mediapipe.GateCalculatorOptions.ext] {
|
||||
allow: true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Checks if there's previous pose rect calculated from landmarks.
|
||||
node: {
|
||||
calculator: "PacketPresenceCalculator"
|
||||
input_stream: "PACKET:gated_prev_pose_rect_from_landmarks"
|
||||
output_stream: "PRESENCE:prev_pose_rect_from_landmarks_is_present"
|
||||
}
|
||||
|
||||
# Calculates size of the image.
|
||||
node {
|
||||
calculator: "ImagePropertiesCalculator"
|
||||
input_stream: "IMAGE_CPU:image"
|
||||
output_stream: "SIZE:image_size"
|
||||
}
|
||||
|
||||
# Drops the incoming image if the pose has already been identified from the
|
||||
# previous image. Otherwise, passes the incoming image through to trigger a new
|
||||
# round of pose detection.
|
||||
node {
|
||||
calculator: "GateCalculator"
|
||||
input_stream: "image"
|
||||
input_stream: "image_size"
|
||||
input_stream: "DISALLOW:prev_pose_rect_from_landmarks_is_present"
|
||||
output_stream: "image_for_pose_detection"
|
||||
output_stream: "image_size_for_pose_detection"
|
||||
options: {
|
||||
[mediapipe.GateCalculatorOptions.ext] {
|
||||
empty_packets_as_allow: true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Detects poses.
|
||||
node {
|
||||
calculator: "PoseDetectionOnnxTensorRT"
|
||||
input_stream: "IMAGE:image_for_pose_detection"
|
||||
output_stream: "DETECTIONS:pose_detections"
|
||||
}
|
||||
|
||||
# Gets the very first detection from "pose_detections" vector.
|
||||
node {
|
||||
calculator: "SplitDetectionVectorCalculator"
|
||||
input_stream: "pose_detections"
|
||||
output_stream: "pose_detection"
|
||||
options: {
|
||||
[mediapipe.SplitVectorCalculatorOptions.ext] {
|
||||
ranges: { begin: 0 end: 1 }
|
||||
element_only: true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Calculates region of interest based on pose detection, so that can be used
|
||||
# to detect landmarks.
|
||||
node {
|
||||
calculator: "PoseDetectionToRoi"
|
||||
input_stream: "DETECTION:pose_detection"
|
||||
input_stream: "IMAGE_SIZE:image_size_for_pose_detection"
|
||||
output_stream: "ROI:pose_rect_from_detection"
|
||||
}
|
||||
|
||||
# Selects either pose rect (or ROI) calculated from detection or from previously
|
||||
# detected landmarks if available (in this case, calculation of pose rect from
|
||||
# detection is skipped).
|
||||
node {
|
||||
calculator: "MergeCalculator"
|
||||
input_stream: "pose_rect_from_detection"
|
||||
input_stream: "gated_prev_pose_rect_from_landmarks"
|
||||
output_stream: "pose_rect"
|
||||
}
|
||||
|
||||
# Detects pose landmarks within specified region of interest of the image.
|
||||
node {
|
||||
calculator: "PoseLandmarkByRoiOnnxTensorRT"
|
||||
input_side_packet: "MODEL_COMPLEXITY:model_complexity"
|
||||
input_side_packet: "ENABLE_SEGMENTATION:enable_segmentation"
|
||||
input_stream: "IMAGE:image"
|
||||
input_stream: "ROI:pose_rect"
|
||||
output_stream: "LANDMARKS:unfiltered_pose_landmarks"
|
||||
output_stream: "AUXILIARY_LANDMARKS:unfiltered_auxiliary_landmarks"
|
||||
output_stream: "WORLD_LANDMARKS:unfiltered_world_landmarks"
|
||||
output_stream: "SEGMENTATION_MASK:unfiltered_segmentation_mask"
|
||||
}
|
||||
|
||||
# Smoothes landmarks to reduce jitter.
|
||||
node {
|
||||
calculator: "PoseLandmarkFiltering"
|
||||
input_side_packet: "ENABLE:smooth_landmarks"
|
||||
input_stream: "IMAGE_SIZE:image_size"
|
||||
input_stream: "NORM_LANDMARKS:unfiltered_pose_landmarks"
|
||||
input_stream: "AUX_NORM_LANDMARKS:unfiltered_auxiliary_landmarks"
|
||||
input_stream: "WORLD_LANDMARKS:unfiltered_world_landmarks"
|
||||
output_stream: "FILTERED_NORM_LANDMARKS:pose_landmarks"
|
||||
output_stream: "FILTERED_AUX_NORM_LANDMARKS:auxiliary_landmarks"
|
||||
output_stream: "FILTERED_WORLD_LANDMARKS:pose_world_landmarks"
|
||||
}
|
||||
|
||||
# Calculates region of interest based on the auxiliary landmarks, to be used in
|
||||
# the subsequent image.
|
||||
node {
|
||||
calculator: "PoseLandmarksToRoi"
|
||||
input_stream: "LANDMARKS:auxiliary_landmarks"
|
||||
input_stream: "IMAGE_SIZE:image_size"
|
||||
output_stream: "ROI:pose_rect_from_landmarks"
|
||||
}
|
||||
|
||||
# Caches pose rects calculated from landmarks, and upon the arrival of the next
|
||||
# input image, sends out the cached rects with timestamps replaced by that of
|
||||
# the input image, essentially generating a packet that carries the previous
|
||||
# pose rects. Note that upon the arrival of the very first input image, a
|
||||
# timestamp bound update occurs to jump start the feedback loop.
|
||||
node {
|
||||
calculator: "PreviousLoopbackCalculator"
|
||||
input_stream: "MAIN:image"
|
||||
input_stream: "LOOP:pose_rect_from_landmarks"
|
||||
input_stream_info: {
|
||||
tag_index: "LOOP"
|
||||
back_edge: true
|
||||
}
|
||||
output_stream: "PREV_LOOP:prev_pose_rect_from_landmarks"
|
||||
}
|
||||
|
||||
# Smoothes segmentation to reduce jitter.
|
||||
node {
|
||||
calculator: "PoseSegmentationFiltering"
|
||||
input_side_packet: "ENABLE:smooth_segmentation"
|
||||
input_stream: "SEGMENTATION_MASK:unfiltered_segmentation_mask"
|
||||
output_stream: "FILTERED_SEGMENTATION_MASK:filtered_segmentation_mask"
|
||||
}
|
||||
|
||||
# Converts the incoming segmentation mask represented as an Image into the
|
||||
# corresponding ImageFrame type.
|
||||
node: {
|
||||
calculator: "FromImageCalculator"
|
||||
input_stream: "IMAGE:filtered_segmentation_mask"
|
||||
output_stream: "IMAGE_CPU:segmentation_mask"
|
||||
}
|
Loading…
Reference in New Issue
Block a user