holistic支持onnxruntime的cuda和tensorrt
This commit is contained in:
parent
008ed46ee0
commit
8f7e36b344
|
@ -32,6 +32,38 @@ cc_binary(
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
|
|
||||||
|
cc_binary(
|
||||||
|
name = "holistic_tracking_onnx_cuda",
|
||||||
|
deps = [
|
||||||
|
"//mediapipe/examples/desktop:demo_run_graph_main",
|
||||||
|
"//mediapipe/graphs/holistic_tracking:holistic_tracking_onnx_cuda_graph_deps",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
cc_binary(
|
||||||
|
name = "holistic_tracking_onnx_cuda_fps",
|
||||||
|
deps = [
|
||||||
|
"//mediapipe/examples/desktop:demo_run_graph_main_fps",
|
||||||
|
"//mediapipe/graphs/holistic_tracking:holistic_tracking_onnx_cuda_graph_deps",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
cc_binary(
|
||||||
|
name = "holistic_tracking_onnx_tensorrt",
|
||||||
|
deps = [
|
||||||
|
"//mediapipe/examples/desktop:demo_run_graph_main",
|
||||||
|
"//mediapipe/graphs/holistic_tracking:holistic_tracking_onnx_tensorrt_graph_deps",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
cc_binary(
|
||||||
|
name = "holistic_tracking_onnx_tensorrt_fps",
|
||||||
|
deps = [
|
||||||
|
"//mediapipe/examples/desktop:demo_run_graph_main_fps",
|
||||||
|
"//mediapipe/graphs/holistic_tracking:holistic_tracking_onnx_tensorrt_graph_deps",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
# Linux only
|
# Linux only
|
||||||
cc_binary(
|
cc_binary(
|
||||||
name = "holistic_tracking_gpu",
|
name = "holistic_tracking_gpu",
|
||||||
|
|
|
@ -68,3 +68,27 @@ cc_library(
|
||||||
"//mediapipe/modules/holistic_landmark:holistic_landmark_cpu",
|
"//mediapipe/modules/holistic_landmark:holistic_landmark_cpu",
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
|
|
||||||
|
cc_library(
|
||||||
|
name = "holistic_tracking_onnx_cuda_graph_deps",
|
||||||
|
deps = [
|
||||||
|
":holistic_tracking_to_render_data",
|
||||||
|
"//mediapipe/calculators/core:constant_side_packet_calculator",
|
||||||
|
"//mediapipe/calculators/core:flow_limiter_calculator",
|
||||||
|
"//mediapipe/calculators/image:image_properties_calculator",
|
||||||
|
"//mediapipe/calculators/util:annotation_overlay_calculator",
|
||||||
|
"//mediapipe/modules/holistic_landmark:holistic_landmark_onnx_cuda",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
cc_library(
|
||||||
|
name = "holistic_tracking_onnx_tensorrt_graph_deps",
|
||||||
|
deps = [
|
||||||
|
":holistic_tracking_to_render_data",
|
||||||
|
"//mediapipe/calculators/core:constant_side_packet_calculator",
|
||||||
|
"//mediapipe/calculators/core:flow_limiter_calculator",
|
||||||
|
"//mediapipe/calculators/image:image_properties_calculator",
|
||||||
|
"//mediapipe/calculators/util:annotation_overlay_calculator",
|
||||||
|
"//mediapipe/modules/holistic_landmark:holistic_landmark_onnx_tensorrt",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
|
@ -0,0 +1,75 @@
|
||||||
|
# Tracks and renders pose + hands + face landmarks.
|
||||||
|
|
||||||
|
# CPU image. (ImageFrame)
|
||||||
|
input_stream: "input_video"
|
||||||
|
|
||||||
|
# CPU image with rendered results. (ImageFrame)
|
||||||
|
output_stream: "output_video"
|
||||||
|
|
||||||
|
# Throttles the images flowing downstream for flow control. It passes through
|
||||||
|
# the very first incoming image unaltered, and waits for downstream nodes
|
||||||
|
# (calculators and subgraphs) in the graph to finish their tasks before it
|
||||||
|
# passes through another image. All images that come in while waiting are
|
||||||
|
# dropped, limiting the number of in-flight images in most part of the graph to
|
||||||
|
# 1. This prevents the downstream nodes from queuing up incoming images and data
|
||||||
|
# excessively, which leads to increased latency and memory usage, unwanted in
|
||||||
|
# real-time mobile applications. It also eliminates unnecessarily computation,
|
||||||
|
# e.g., the output produced by a node may get dropped downstream if the
|
||||||
|
# subsequent nodes are still busy processing previous inputs.
|
||||||
|
node {
|
||||||
|
calculator: "FlowLimiterCalculator"
|
||||||
|
input_stream: "input_video"
|
||||||
|
input_stream: "FINISHED:output_video"
|
||||||
|
input_stream_info: {
|
||||||
|
tag_index: "FINISHED"
|
||||||
|
back_edge: true
|
||||||
|
}
|
||||||
|
output_stream: "throttled_input_video"
|
||||||
|
node_options: {
|
||||||
|
[type.googleapis.com/mediapipe.FlowLimiterCalculatorOptions] {
|
||||||
|
max_in_flight: 1
|
||||||
|
max_in_queue: 1
|
||||||
|
# Timeout is disabled (set to 0) as first frame processing can take more
|
||||||
|
# than 1 second.
|
||||||
|
in_flight_timeout: 0
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
node {
|
||||||
|
calculator: "HolisticLandmarkOnnxCUDA"
|
||||||
|
input_stream: "IMAGE:throttled_input_video"
|
||||||
|
output_stream: "POSE_LANDMARKS:pose_landmarks"
|
||||||
|
output_stream: "POSE_ROI:pose_roi"
|
||||||
|
output_stream: "POSE_DETECTION:pose_detection"
|
||||||
|
output_stream: "FACE_LANDMARKS:face_landmarks"
|
||||||
|
output_stream: "LEFT_HAND_LANDMARKS:left_hand_landmarks"
|
||||||
|
output_stream: "RIGHT_HAND_LANDMARKS:right_hand_landmarks"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Gets image size.
|
||||||
|
node {
|
||||||
|
calculator: "ImagePropertiesCalculator"
|
||||||
|
input_stream: "IMAGE:throttled_input_video"
|
||||||
|
output_stream: "SIZE:image_size"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Converts pose, hands and face landmarks to a render data vector.
|
||||||
|
node {
|
||||||
|
calculator: "HolisticTrackingToRenderData"
|
||||||
|
input_stream: "IMAGE_SIZE:image_size"
|
||||||
|
input_stream: "POSE_LANDMARKS:pose_landmarks"
|
||||||
|
input_stream: "POSE_ROI:pose_roi"
|
||||||
|
input_stream: "LEFT_HAND_LANDMARKS:left_hand_landmarks"
|
||||||
|
input_stream: "RIGHT_HAND_LANDMARKS:right_hand_landmarks"
|
||||||
|
input_stream: "FACE_LANDMARKS:face_landmarks"
|
||||||
|
output_stream: "RENDER_DATA_VECTOR:render_data_vector"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Draws annotations and overlays them on top of the input images.
|
||||||
|
node {
|
||||||
|
calculator: "AnnotationOverlayCalculator"
|
||||||
|
input_stream: "IMAGE:throttled_input_video"
|
||||||
|
input_stream: "VECTOR:render_data_vector"
|
||||||
|
output_stream: "IMAGE:output_video"
|
||||||
|
}
|
|
@ -0,0 +1,75 @@
|
||||||
|
# Tracks and renders pose + hands + face landmarks.
|
||||||
|
|
||||||
|
# CPU image. (ImageFrame)
|
||||||
|
input_stream: "input_video"
|
||||||
|
|
||||||
|
# CPU image with rendered results. (ImageFrame)
|
||||||
|
output_stream: "output_video"
|
||||||
|
|
||||||
|
# Throttles the images flowing downstream for flow control. It passes through
|
||||||
|
# the very first incoming image unaltered, and waits for downstream nodes
|
||||||
|
# (calculators and subgraphs) in the graph to finish their tasks before it
|
||||||
|
# passes through another image. All images that come in while waiting are
|
||||||
|
# dropped, limiting the number of in-flight images in most part of the graph to
|
||||||
|
# 1. This prevents the downstream nodes from queuing up incoming images and data
|
||||||
|
# excessively, which leads to increased latency and memory usage, unwanted in
|
||||||
|
# real-time mobile applications. It also eliminates unnecessarily computation,
|
||||||
|
# e.g., the output produced by a node may get dropped downstream if the
|
||||||
|
# subsequent nodes are still busy processing previous inputs.
|
||||||
|
node {
|
||||||
|
calculator: "FlowLimiterCalculator"
|
||||||
|
input_stream: "input_video"
|
||||||
|
input_stream: "FINISHED:output_video"
|
||||||
|
input_stream_info: {
|
||||||
|
tag_index: "FINISHED"
|
||||||
|
back_edge: true
|
||||||
|
}
|
||||||
|
output_stream: "throttled_input_video"
|
||||||
|
node_options: {
|
||||||
|
[type.googleapis.com/mediapipe.FlowLimiterCalculatorOptions] {
|
||||||
|
max_in_flight: 1
|
||||||
|
max_in_queue: 1
|
||||||
|
# Timeout is disabled (set to 0) as first frame processing can take more
|
||||||
|
# than 1 second.
|
||||||
|
in_flight_timeout: 0
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
node {
|
||||||
|
calculator: "HolisticLandmarkOnnxTensorRT"
|
||||||
|
input_stream: "IMAGE:throttled_input_video"
|
||||||
|
output_stream: "POSE_LANDMARKS:pose_landmarks"
|
||||||
|
output_stream: "POSE_ROI:pose_roi"
|
||||||
|
output_stream: "POSE_DETECTION:pose_detection"
|
||||||
|
output_stream: "FACE_LANDMARKS:face_landmarks"
|
||||||
|
output_stream: "LEFT_HAND_LANDMARKS:left_hand_landmarks"
|
||||||
|
output_stream: "RIGHT_HAND_LANDMARKS:right_hand_landmarks"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Gets image size.
|
||||||
|
node {
|
||||||
|
calculator: "ImagePropertiesCalculator"
|
||||||
|
input_stream: "IMAGE:throttled_input_video"
|
||||||
|
output_stream: "SIZE:image_size"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Converts pose, hands and face landmarks to a render data vector.
|
||||||
|
node {
|
||||||
|
calculator: "HolisticTrackingToRenderData"
|
||||||
|
input_stream: "IMAGE_SIZE:image_size"
|
||||||
|
input_stream: "POSE_LANDMARKS:pose_landmarks"
|
||||||
|
input_stream: "POSE_ROI:pose_roi"
|
||||||
|
input_stream: "LEFT_HAND_LANDMARKS:left_hand_landmarks"
|
||||||
|
input_stream: "RIGHT_HAND_LANDMARKS:right_hand_landmarks"
|
||||||
|
input_stream: "FACE_LANDMARKS:face_landmarks"
|
||||||
|
output_stream: "RENDER_DATA_VECTOR:render_data_vector"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Draws annotations and overlays them on top of the input images.
|
||||||
|
node {
|
||||||
|
calculator: "AnnotationOverlayCalculator"
|
||||||
|
input_stream: "IMAGE:throttled_input_video"
|
||||||
|
input_stream: "VECTOR:render_data_vector"
|
||||||
|
output_stream: "IMAGE:output_video"
|
||||||
|
}
|
|
@ -59,6 +59,46 @@ mediapipe_simple_subgraph(
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
|
|
||||||
|
mediapipe_simple_subgraph(
|
||||||
|
name = "hand_landmark_onnx_cuda",
|
||||||
|
graph = "hand_landmark_onnx_cuda.pbtxt",
|
||||||
|
register_as = "HandLandmarkOnnxCUDA",
|
||||||
|
deps = [
|
||||||
|
":hand_landmark_model_loader",
|
||||||
|
"//mediapipe/calculators/core:gate_calculator",
|
||||||
|
"//mediapipe/calculators/core:split_vector_calculator",
|
||||||
|
"//mediapipe/calculators/tensor:image_to_tensor_calculator",
|
||||||
|
"//mediapipe/calculators/tensor:inference_calculator_onnx_cuda",
|
||||||
|
"//mediapipe/calculators/tensor:tensors_to_classification_calculator",
|
||||||
|
"//mediapipe/calculators/tensor:tensors_to_floats_calculator",
|
||||||
|
"//mediapipe/calculators/tensor:tensors_to_landmarks_calculator",
|
||||||
|
"//mediapipe/calculators/util:landmark_letterbox_removal_calculator",
|
||||||
|
"//mediapipe/calculators/util:landmark_projection_calculator",
|
||||||
|
"//mediapipe/calculators/util:thresholding_calculator",
|
||||||
|
"//mediapipe/calculators/util:world_landmark_projection_calculator",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
mediapipe_simple_subgraph(
|
||||||
|
name = "hand_landmark_onnx_tensorrt",
|
||||||
|
graph = "hand_landmark_onnx_tensorrt.pbtxt",
|
||||||
|
register_as = "HandLandmarkOnnxTensorRT",
|
||||||
|
deps = [
|
||||||
|
":hand_landmark_model_loader",
|
||||||
|
"//mediapipe/calculators/core:gate_calculator",
|
||||||
|
"//mediapipe/calculators/core:split_vector_calculator",
|
||||||
|
"//mediapipe/calculators/tensor:image_to_tensor_calculator",
|
||||||
|
"//mediapipe/calculators/tensor:inference_calculator_onnx_tensorrt",
|
||||||
|
"//mediapipe/calculators/tensor:tensors_to_classification_calculator",
|
||||||
|
"//mediapipe/calculators/tensor:tensors_to_floats_calculator",
|
||||||
|
"//mediapipe/calculators/tensor:tensors_to_landmarks_calculator",
|
||||||
|
"//mediapipe/calculators/util:landmark_letterbox_removal_calculator",
|
||||||
|
"//mediapipe/calculators/util:landmark_projection_calculator",
|
||||||
|
"//mediapipe/calculators/util:thresholding_calculator",
|
||||||
|
"//mediapipe/calculators/util:world_landmark_projection_calculator",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
mediapipe_simple_subgraph(
|
mediapipe_simple_subgraph(
|
||||||
name = "hand_landmark_gpu",
|
name = "hand_landmark_gpu",
|
||||||
graph = "hand_landmark_gpu.pbtxt",
|
graph = "hand_landmark_gpu.pbtxt",
|
||||||
|
|
205
mediapipe/modules/hand_landmark/hand_landmark_onnx_cuda.pbtxt
Normal file
205
mediapipe/modules/hand_landmark/hand_landmark_onnx_cuda.pbtxt
Normal file
|
@ -0,0 +1,205 @@
|
||||||
|
# MediaPipe graph to detect/predict hand landmarks on CPU.
|
||||||
|
|
||||||
|
type: "HandLandmarkOnnxCUDA"
|
||||||
|
|
||||||
|
# CPU image. (ImageFrame)
|
||||||
|
input_stream: "IMAGE:image"
|
||||||
|
# ROI (region of interest) within the given image where a palm/hand is located.
|
||||||
|
# (NormalizedRect)
|
||||||
|
input_stream: "ROI:hand_rect"
|
||||||
|
|
||||||
|
# 21 hand landmarks within the given ROI. (NormalizedLandmarkList)
|
||||||
|
# NOTE: if a hand is not present within the given ROI, for this particular
|
||||||
|
# timestamp there will not be an output packet in the LANDMARKS stream. However,
|
||||||
|
# the MediaPipe framework will internally inform the downstream calculators of
|
||||||
|
# the absence of this packet so that they don't wait for it unnecessarily.
|
||||||
|
output_stream: "LANDMARKS:hand_landmarks"
|
||||||
|
|
||||||
|
# Hand world landmarks within the given ROI. (LandmarkList)
|
||||||
|
# World landmarks are real-world 3D coordinates in meters with the origin in the
|
||||||
|
# center of the given ROI.
|
||||||
|
#
|
||||||
|
# WORLD_LANDMARKS shares the same landmark topology as LANDMARKS. However,
|
||||||
|
# LANDMARKS provides coordinates (in pixels) of a 3D object projected onto the
|
||||||
|
# 2D image surface, while WORLD_LANDMARKS provides coordinates (in meters) of
|
||||||
|
# the 3D object itself.
|
||||||
|
output_stream: "WORLD_LANDMARKS:hand_world_landmarks"
|
||||||
|
|
||||||
|
# Handedness of the detected hand (i.e. is hand left or right).
|
||||||
|
# (ClassificationList)
|
||||||
|
output_stream: "HANDEDNESS:handedness"
|
||||||
|
|
||||||
|
# Transforms a region of image into a 224x224 tensor while keeping the aspect
|
||||||
|
# ratio, and therefore may result in potential letterboxing.
|
||||||
|
node {
|
||||||
|
calculator: "ImageToTensorCalculator"
|
||||||
|
input_stream: "IMAGE:image"
|
||||||
|
input_stream: "NORM_RECT:hand_rect"
|
||||||
|
output_stream: "TENSORS:input_tensor"
|
||||||
|
output_stream: "LETTERBOX_PADDING:letterbox_padding"
|
||||||
|
options: {
|
||||||
|
[mediapipe.ImageToTensorCalculatorOptions.ext] {
|
||||||
|
output_tensor_width: 224
|
||||||
|
output_tensor_height: 224
|
||||||
|
keep_aspect_ratio: true
|
||||||
|
output_tensor_float_range {
|
||||||
|
min: 0.0
|
||||||
|
max: 1.0
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Runs a TensorFlow Lite model on CPU that takes an image tensor and outputs a
|
||||||
|
# vector of tensors representing, for instance, detection boxes/keypoints and
|
||||||
|
# scores.
|
||||||
|
node {
|
||||||
|
calculator: "InferenceCalculator"
|
||||||
|
input_stream: "TENSORS:input_tensor"
|
||||||
|
output_stream: "TENSORS:output_tensors"
|
||||||
|
options: {
|
||||||
|
[mediapipe.InferenceCalculatorOptions.ext] {
|
||||||
|
model_path: "mediapipe/modules/hand_landmark/hand_landmark_lite.onnx"
|
||||||
|
delegate { cuda {} }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Splits a vector of tensors to multiple vectors according to the ranges
|
||||||
|
# specified in option.
|
||||||
|
node {
|
||||||
|
calculator: "SplitTensorVectorCalculator"
|
||||||
|
input_stream: "output_tensors"
|
||||||
|
output_stream: "landmark_tensors"
|
||||||
|
output_stream: "hand_flag_tensor"
|
||||||
|
output_stream: "handedness_tensor"
|
||||||
|
output_stream: "world_landmark_tensor"
|
||||||
|
options: {
|
||||||
|
[mediapipe.SplitVectorCalculatorOptions.ext] {
|
||||||
|
ranges: { begin: 0 end: 1 }
|
||||||
|
ranges: { begin: 1 end: 2 }
|
||||||
|
ranges: { begin: 2 end: 3 }
|
||||||
|
ranges: { begin: 3 end: 4 }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Converts the hand-flag tensor into a float that represents the confidence
|
||||||
|
# score of hand presence.
|
||||||
|
node {
|
||||||
|
calculator: "TensorsToFloatsCalculator"
|
||||||
|
input_stream: "TENSORS:hand_flag_tensor"
|
||||||
|
output_stream: "FLOAT:hand_presence_score"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Applies a threshold to the confidence score to determine whether a hand is
|
||||||
|
# present.
|
||||||
|
node {
|
||||||
|
calculator: "ThresholdingCalculator"
|
||||||
|
input_stream: "FLOAT:hand_presence_score"
|
||||||
|
output_stream: "FLAG:hand_presence"
|
||||||
|
options: {
|
||||||
|
[mediapipe.ThresholdingCalculatorOptions.ext] {
|
||||||
|
threshold: 0.5
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Drops handedness tensor if hand is not present.
|
||||||
|
node {
|
||||||
|
calculator: "GateCalculator"
|
||||||
|
input_stream: "handedness_tensor"
|
||||||
|
input_stream: "ALLOW:hand_presence"
|
||||||
|
output_stream: "ensured_handedness_tensor"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Converts the handedness tensor into a float that represents the classification
|
||||||
|
# score of handedness.
|
||||||
|
node {
|
||||||
|
calculator: "TensorsToClassificationCalculator"
|
||||||
|
input_stream: "TENSORS:ensured_handedness_tensor"
|
||||||
|
output_stream: "CLASSIFICATIONS:handedness"
|
||||||
|
options: {
|
||||||
|
[mediapipe.TensorsToClassificationCalculatorOptions.ext] {
|
||||||
|
top_k: 1
|
||||||
|
label_map_path: "mediapipe/modules/hand_landmark/handedness.txt"
|
||||||
|
binary_classification: true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Drops landmarks tensors if hand is not present.
|
||||||
|
node {
|
||||||
|
calculator: "GateCalculator"
|
||||||
|
input_stream: "landmark_tensors"
|
||||||
|
input_stream: "ALLOW:hand_presence"
|
||||||
|
output_stream: "ensured_landmark_tensors"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Decodes the landmark tensors into a list of landmarks, where the landmark
|
||||||
|
# coordinates are normalized by the size of the input image to the model.
|
||||||
|
node {
|
||||||
|
calculator: "TensorsToLandmarksCalculator"
|
||||||
|
input_stream: "TENSORS:ensured_landmark_tensors"
|
||||||
|
output_stream: "NORM_LANDMARKS:landmarks"
|
||||||
|
options: {
|
||||||
|
[mediapipe.TensorsToLandmarksCalculatorOptions.ext] {
|
||||||
|
num_landmarks: 21
|
||||||
|
input_image_width: 224
|
||||||
|
input_image_height: 224
|
||||||
|
# The additional scaling factor is used to account for the Z coordinate
|
||||||
|
# distribution in the training data.
|
||||||
|
normalize_z: 0.4
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Adjusts landmarks (already normalized to [0.f, 1.f]) on the letterboxed hand
|
||||||
|
# image (after image transformation with the FIT scale mode) to the
|
||||||
|
# corresponding locations on the same image with the letterbox removed (hand
|
||||||
|
# image before image transformation).
|
||||||
|
node {
|
||||||
|
calculator: "LandmarkLetterboxRemovalCalculator"
|
||||||
|
input_stream: "LANDMARKS:landmarks"
|
||||||
|
input_stream: "LETTERBOX_PADDING:letterbox_padding"
|
||||||
|
output_stream: "LANDMARKS:scaled_landmarks"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Projects the landmarks from the cropped hand image to the corresponding
|
||||||
|
# locations on the full image before cropping (input to the graph).
|
||||||
|
node {
|
||||||
|
calculator: "LandmarkProjectionCalculator"
|
||||||
|
input_stream: "NORM_LANDMARKS:scaled_landmarks"
|
||||||
|
input_stream: "NORM_RECT:hand_rect"
|
||||||
|
output_stream: "NORM_LANDMARKS:hand_landmarks"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Drops world landmarks tensors if hand is not present.
|
||||||
|
node {
|
||||||
|
calculator: "GateCalculator"
|
||||||
|
input_stream: "world_landmark_tensor"
|
||||||
|
input_stream: "ALLOW:hand_presence"
|
||||||
|
output_stream: "ensured_world_landmark_tensor"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Decodes the landmark tensors into a list of landmarks, where the landmark
|
||||||
|
# coordinates are normalized by the size of the input image to the model.
|
||||||
|
node {
|
||||||
|
calculator: "TensorsToLandmarksCalculator"
|
||||||
|
input_stream: "TENSORS:ensured_world_landmark_tensor"
|
||||||
|
output_stream: "LANDMARKS:unprojected_world_landmarks"
|
||||||
|
options: {
|
||||||
|
[mediapipe.TensorsToLandmarksCalculatorOptions.ext] {
|
||||||
|
num_landmarks: 21
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Projects the world landmarks from the cropped hand image to the corresponding
|
||||||
|
# locations on the full image before cropping (input to the graph).
|
||||||
|
node {
|
||||||
|
calculator: "WorldLandmarkProjectionCalculator"
|
||||||
|
input_stream: "LANDMARKS:unprojected_world_landmarks"
|
||||||
|
input_stream: "NORM_RECT:hand_rect"
|
||||||
|
output_stream: "LANDMARKS:hand_world_landmarks"
|
||||||
|
}
|
|
@ -0,0 +1,205 @@
|
||||||
|
# MediaPipe graph to detect/predict hand landmarks on CPU.
|
||||||
|
|
||||||
|
type: "HandLandmarkOnnxTensorRT"
|
||||||
|
|
||||||
|
# CPU image. (ImageFrame)
|
||||||
|
input_stream: "IMAGE:image"
|
||||||
|
# ROI (region of interest) within the given image where a palm/hand is located.
|
||||||
|
# (NormalizedRect)
|
||||||
|
input_stream: "ROI:hand_rect"
|
||||||
|
|
||||||
|
# 21 hand landmarks within the given ROI. (NormalizedLandmarkList)
|
||||||
|
# NOTE: if a hand is not present within the given ROI, for this particular
|
||||||
|
# timestamp there will not be an output packet in the LANDMARKS stream. However,
|
||||||
|
# the MediaPipe framework will internally inform the downstream calculators of
|
||||||
|
# the absence of this packet so that they don't wait for it unnecessarily.
|
||||||
|
output_stream: "LANDMARKS:hand_landmarks"
|
||||||
|
|
||||||
|
# Hand world landmarks within the given ROI. (LandmarkList)
|
||||||
|
# World landmarks are real-world 3D coordinates in meters with the origin in the
|
||||||
|
# center of the given ROI.
|
||||||
|
#
|
||||||
|
# WORLD_LANDMARKS shares the same landmark topology as LANDMARKS. However,
|
||||||
|
# LANDMARKS provides coordinates (in pixels) of a 3D object projected onto the
|
||||||
|
# 2D image surface, while WORLD_LANDMARKS provides coordinates (in meters) of
|
||||||
|
# the 3D object itself.
|
||||||
|
output_stream: "WORLD_LANDMARKS:hand_world_landmarks"
|
||||||
|
|
||||||
|
# Handedness of the detected hand (i.e. is hand left or right).
|
||||||
|
# (ClassificationList)
|
||||||
|
output_stream: "HANDEDNESS:handedness"
|
||||||
|
|
||||||
|
# Transforms a region of image into a 224x224 tensor while keeping the aspect
|
||||||
|
# ratio, and therefore may result in potential letterboxing.
|
||||||
|
node {
|
||||||
|
calculator: "ImageToTensorCalculator"
|
||||||
|
input_stream: "IMAGE:image"
|
||||||
|
input_stream: "NORM_RECT:hand_rect"
|
||||||
|
output_stream: "TENSORS:input_tensor"
|
||||||
|
output_stream: "LETTERBOX_PADDING:letterbox_padding"
|
||||||
|
options: {
|
||||||
|
[mediapipe.ImageToTensorCalculatorOptions.ext] {
|
||||||
|
output_tensor_width: 224
|
||||||
|
output_tensor_height: 224
|
||||||
|
keep_aspect_ratio: true
|
||||||
|
output_tensor_float_range {
|
||||||
|
min: 0.0
|
||||||
|
max: 1.0
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Runs a TensorFlow Lite model on CPU that takes an image tensor and outputs a
|
||||||
|
# vector of tensors representing, for instance, detection boxes/keypoints and
|
||||||
|
# scores.
|
||||||
|
node {
|
||||||
|
calculator: "InferenceCalculator"
|
||||||
|
input_stream: "TENSORS:input_tensor"
|
||||||
|
output_stream: "TENSORS:output_tensors"
|
||||||
|
options: {
|
||||||
|
[mediapipe.InferenceCalculatorOptions.ext] {
|
||||||
|
model_path: "mediapipe/modules/hand_landmark/hand_landmark_lite.onnx"
|
||||||
|
delegate { tensorrt {} }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Splits a vector of tensors to multiple vectors according to the ranges
|
||||||
|
# specified in option.
|
||||||
|
node {
|
||||||
|
calculator: "SplitTensorVectorCalculator"
|
||||||
|
input_stream: "output_tensors"
|
||||||
|
output_stream: "landmark_tensors"
|
||||||
|
output_stream: "hand_flag_tensor"
|
||||||
|
output_stream: "handedness_tensor"
|
||||||
|
output_stream: "world_landmark_tensor"
|
||||||
|
options: {
|
||||||
|
[mediapipe.SplitVectorCalculatorOptions.ext] {
|
||||||
|
ranges: { begin: 0 end: 1 }
|
||||||
|
ranges: { begin: 1 end: 2 }
|
||||||
|
ranges: { begin: 2 end: 3 }
|
||||||
|
ranges: { begin: 3 end: 4 }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Converts the hand-flag tensor into a float that represents the confidence
|
||||||
|
# score of hand presence.
|
||||||
|
node {
|
||||||
|
calculator: "TensorsToFloatsCalculator"
|
||||||
|
input_stream: "TENSORS:hand_flag_tensor"
|
||||||
|
output_stream: "FLOAT:hand_presence_score"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Applies a threshold to the confidence score to determine whether a hand is
|
||||||
|
# present.
|
||||||
|
node {
|
||||||
|
calculator: "ThresholdingCalculator"
|
||||||
|
input_stream: "FLOAT:hand_presence_score"
|
||||||
|
output_stream: "FLAG:hand_presence"
|
||||||
|
options: {
|
||||||
|
[mediapipe.ThresholdingCalculatorOptions.ext] {
|
||||||
|
threshold: 0.5
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Drops handedness tensor if hand is not present.
|
||||||
|
node {
|
||||||
|
calculator: "GateCalculator"
|
||||||
|
input_stream: "handedness_tensor"
|
||||||
|
input_stream: "ALLOW:hand_presence"
|
||||||
|
output_stream: "ensured_handedness_tensor"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Converts the handedness tensor into a float that represents the classification
|
||||||
|
# score of handedness.
|
||||||
|
node {
|
||||||
|
calculator: "TensorsToClassificationCalculator"
|
||||||
|
input_stream: "TENSORS:ensured_handedness_tensor"
|
||||||
|
output_stream: "CLASSIFICATIONS:handedness"
|
||||||
|
options: {
|
||||||
|
[mediapipe.TensorsToClassificationCalculatorOptions.ext] {
|
||||||
|
top_k: 1
|
||||||
|
label_map_path: "mediapipe/modules/hand_landmark/handedness.txt"
|
||||||
|
binary_classification: true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Drops landmarks tensors if hand is not present.
|
||||||
|
node {
|
||||||
|
calculator: "GateCalculator"
|
||||||
|
input_stream: "landmark_tensors"
|
||||||
|
input_stream: "ALLOW:hand_presence"
|
||||||
|
output_stream: "ensured_landmark_tensors"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Decodes the landmark tensors into a list of landmarks, where the landmark
|
||||||
|
# coordinates are normalized by the size of the input image to the model.
|
||||||
|
node {
|
||||||
|
calculator: "TensorsToLandmarksCalculator"
|
||||||
|
input_stream: "TENSORS:ensured_landmark_tensors"
|
||||||
|
output_stream: "NORM_LANDMARKS:landmarks"
|
||||||
|
options: {
|
||||||
|
[mediapipe.TensorsToLandmarksCalculatorOptions.ext] {
|
||||||
|
num_landmarks: 21
|
||||||
|
input_image_width: 224
|
||||||
|
input_image_height: 224
|
||||||
|
# The additional scaling factor is used to account for the Z coordinate
|
||||||
|
# distribution in the training data.
|
||||||
|
normalize_z: 0.4
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Adjusts landmarks (already normalized to [0.f, 1.f]) on the letterboxed hand
|
||||||
|
# image (after image transformation with the FIT scale mode) to the
|
||||||
|
# corresponding locations on the same image with the letterbox removed (hand
|
||||||
|
# image before image transformation).
|
||||||
|
node {
|
||||||
|
calculator: "LandmarkLetterboxRemovalCalculator"
|
||||||
|
input_stream: "LANDMARKS:landmarks"
|
||||||
|
input_stream: "LETTERBOX_PADDING:letterbox_padding"
|
||||||
|
output_stream: "LANDMARKS:scaled_landmarks"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Projects the landmarks from the cropped hand image to the corresponding
|
||||||
|
# locations on the full image before cropping (input to the graph).
|
||||||
|
node {
|
||||||
|
calculator: "LandmarkProjectionCalculator"
|
||||||
|
input_stream: "NORM_LANDMARKS:scaled_landmarks"
|
||||||
|
input_stream: "NORM_RECT:hand_rect"
|
||||||
|
output_stream: "NORM_LANDMARKS:hand_landmarks"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Drops world landmarks tensors if hand is not present.
|
||||||
|
node {
|
||||||
|
calculator: "GateCalculator"
|
||||||
|
input_stream: "world_landmark_tensor"
|
||||||
|
input_stream: "ALLOW:hand_presence"
|
||||||
|
output_stream: "ensured_world_landmark_tensor"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Decodes the landmark tensors into a list of landmarks, where the landmark
|
||||||
|
# coordinates are normalized by the size of the input image to the model.
|
||||||
|
node {
|
||||||
|
calculator: "TensorsToLandmarksCalculator"
|
||||||
|
input_stream: "TENSORS:ensured_world_landmark_tensor"
|
||||||
|
output_stream: "LANDMARKS:unprojected_world_landmarks"
|
||||||
|
options: {
|
||||||
|
[mediapipe.TensorsToLandmarksCalculatorOptions.ext] {
|
||||||
|
num_landmarks: 21
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Projects the world landmarks from the cropped hand image to the corresponding
|
||||||
|
# locations on the full image before cropping (input to the graph).
|
||||||
|
node {
|
||||||
|
calculator: "WorldLandmarkProjectionCalculator"
|
||||||
|
input_stream: "LANDMARKS:unprojected_world_landmarks"
|
||||||
|
input_stream: "NORM_RECT:hand_rect"
|
||||||
|
output_stream: "LANDMARKS:hand_world_landmarks"
|
||||||
|
}
|
|
@ -53,6 +53,36 @@ mediapipe_simple_subgraph(
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
|
|
||||||
|
mediapipe_simple_subgraph(
|
||||||
|
name = "face_landmarks_from_pose_onnx_cuda",
|
||||||
|
graph = "face_landmarks_from_pose_onnx_cuda.pbtxt",
|
||||||
|
register_as = "FaceLandmarksFromPoseOnnxCUDA",
|
||||||
|
deps = [
|
||||||
|
":face_detection_front_detections_to_roi",
|
||||||
|
":face_landmarks_from_pose_to_recrop_roi",
|
||||||
|
":face_tracking",
|
||||||
|
"//mediapipe/calculators/core:split_proto_list_calculator",
|
||||||
|
"//mediapipe/calculators/image:image_properties_calculator",
|
||||||
|
"//mediapipe/modules/face_detection:face_detection_short_range_by_roi_onnx_cuda",
|
||||||
|
"//mediapipe/modules/face_landmark:face_landmark_onnx_cuda",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
mediapipe_simple_subgraph(
|
||||||
|
name = "face_landmarks_from_pose_onnx_tensorrt",
|
||||||
|
graph = "face_landmarks_from_pose_onnx_tensorrt.pbtxt",
|
||||||
|
register_as = "FaceLandmarksFromPoseOnnxTensorRT",
|
||||||
|
deps = [
|
||||||
|
":face_detection_front_detections_to_roi",
|
||||||
|
":face_landmarks_from_pose_to_recrop_roi",
|
||||||
|
":face_tracking",
|
||||||
|
"//mediapipe/calculators/core:split_proto_list_calculator",
|
||||||
|
"//mediapipe/calculators/image:image_properties_calculator",
|
||||||
|
"//mediapipe/modules/face_detection:face_detection_short_range_by_roi_onnx_tensorrt",
|
||||||
|
"//mediapipe/modules/face_landmark:face_landmark_onnx_tensorrt",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
mediapipe_simple_subgraph(
|
mediapipe_simple_subgraph(
|
||||||
name = "face_landmarks_to_roi",
|
name = "face_landmarks_to_roi",
|
||||||
graph = "face_landmarks_to_roi.pbtxt",
|
graph = "face_landmarks_to_roi.pbtxt",
|
||||||
|
@ -126,6 +156,36 @@ mediapipe_simple_subgraph(
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
|
|
||||||
|
mediapipe_simple_subgraph(
|
||||||
|
name = "hand_landmarks_from_pose_onnx_cuda",
|
||||||
|
graph = "hand_landmarks_from_pose_onnx_cuda.pbtxt",
|
||||||
|
register_as = "HandLandmarksFromPoseOnnxCUDA",
|
||||||
|
deps = [
|
||||||
|
":hand_landmarks_from_pose_to_recrop_roi",
|
||||||
|
":hand_recrop_by_roi_onnx_cuda",
|
||||||
|
":hand_tracking",
|
||||||
|
":hand_visibility_from_hand_landmarks_from_pose",
|
||||||
|
"//mediapipe/calculators/core:gate_calculator",
|
||||||
|
"//mediapipe/calculators/image:image_properties_calculator",
|
||||||
|
"//mediapipe/modules/hand_landmark:hand_landmark_onnx_cuda",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
mediapipe_simple_subgraph(
|
||||||
|
name = "hand_landmarks_from_pose_onnx_tensorrt",
|
||||||
|
graph = "hand_landmarks_from_pose_onnx_tensorrt.pbtxt",
|
||||||
|
register_as = "HandLandmarksFromPoseOnnxTensorRT",
|
||||||
|
deps = [
|
||||||
|
":hand_landmarks_from_pose_to_recrop_roi",
|
||||||
|
":hand_recrop_by_roi_onnx_tensorrt",
|
||||||
|
":hand_tracking",
|
||||||
|
":hand_visibility_from_hand_landmarks_from_pose",
|
||||||
|
"//mediapipe/calculators/core:gate_calculator",
|
||||||
|
"//mediapipe/calculators/image:image_properties_calculator",
|
||||||
|
"//mediapipe/modules/hand_landmark:hand_landmark_onnx_tensorrt",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
mediapipe_simple_subgraph(
|
mediapipe_simple_subgraph(
|
||||||
name = "hand_landmarks_to_roi",
|
name = "hand_landmarks_to_roi",
|
||||||
graph = "hand_landmarks_to_roi.pbtxt",
|
graph = "hand_landmarks_to_roi.pbtxt",
|
||||||
|
@ -170,6 +230,40 @@ mediapipe_simple_subgraph(
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
|
|
||||||
|
mediapipe_simple_subgraph(
|
||||||
|
name = "hand_recrop_by_roi_onnx_cuda",
|
||||||
|
graph = "hand_recrop_by_roi_onnx_cuda.pbtxt",
|
||||||
|
register_as = "HandRecropByRoiOnnxCUDA",
|
||||||
|
deps = [
|
||||||
|
"//mediapipe/calculators/image:image_properties_calculator",
|
||||||
|
"//mediapipe/calculators/tensor:image_to_tensor_calculator",
|
||||||
|
"//mediapipe/calculators/tensor:inference_calculator_onnx_cuda",
|
||||||
|
"//mediapipe/calculators/tensor:tensors_to_landmarks_calculator",
|
||||||
|
"//mediapipe/calculators/util:alignment_points_to_rects_calculator",
|
||||||
|
"//mediapipe/calculators/util:landmark_letterbox_removal_calculator",
|
||||||
|
"//mediapipe/calculators/util:landmark_projection_calculator",
|
||||||
|
"//mediapipe/calculators/util:landmarks_to_detection_calculator",
|
||||||
|
"//mediapipe/calculators/util:rect_transformation_calculator",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
mediapipe_simple_subgraph(
|
||||||
|
name = "hand_recrop_by_roi_onnx_tensorrt",
|
||||||
|
graph = "hand_recrop_by_roi_onnx_tensorrt.pbtxt",
|
||||||
|
register_as = "HandRecropByRoiOnnxTensorRT",
|
||||||
|
deps = [
|
||||||
|
"//mediapipe/calculators/image:image_properties_calculator",
|
||||||
|
"//mediapipe/calculators/tensor:image_to_tensor_calculator",
|
||||||
|
"//mediapipe/calculators/tensor:inference_calculator_onnx_tensorrt",
|
||||||
|
"//mediapipe/calculators/tensor:tensors_to_landmarks_calculator",
|
||||||
|
"//mediapipe/calculators/util:alignment_points_to_rects_calculator",
|
||||||
|
"//mediapipe/calculators/util:landmark_letterbox_removal_calculator",
|
||||||
|
"//mediapipe/calculators/util:landmark_projection_calculator",
|
||||||
|
"//mediapipe/calculators/util:landmarks_to_detection_calculator",
|
||||||
|
"//mediapipe/calculators/util:rect_transformation_calculator",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
mediapipe_simple_subgraph(
|
mediapipe_simple_subgraph(
|
||||||
name = "hand_tracking",
|
name = "hand_tracking",
|
||||||
graph = "hand_tracking.pbtxt",
|
graph = "hand_tracking.pbtxt",
|
||||||
|
@ -215,6 +309,26 @@ mediapipe_simple_subgraph(
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
|
|
||||||
|
mediapipe_simple_subgraph(
|
||||||
|
name = "hand_landmarks_left_and_right_onnx_cuda",
|
||||||
|
graph = "hand_landmarks_left_and_right_onnx_cuda.pbtxt",
|
||||||
|
register_as = "HandLandmarksLeftAndRightOnnxCUDA",
|
||||||
|
deps = [
|
||||||
|
":hand_landmarks_from_pose_onnx_cuda",
|
||||||
|
"//mediapipe/calculators/core:split_proto_list_calculator",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
mediapipe_simple_subgraph(
|
||||||
|
name = "hand_landmarks_left_and_right_onnx_tensorrt",
|
||||||
|
graph = "hand_landmarks_left_and_right_onnx_tensorrt.pbtxt",
|
||||||
|
register_as = "HandLandmarksLeftAndRightOnnxTensorRT",
|
||||||
|
deps = [
|
||||||
|
":hand_landmarks_from_pose_onnx_tensorrt",
|
||||||
|
"//mediapipe/calculators/core:split_proto_list_calculator",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
mediapipe_simple_subgraph(
|
mediapipe_simple_subgraph(
|
||||||
name = "hand_landmarks_from_pose_to_recrop_roi",
|
name = "hand_landmarks_from_pose_to_recrop_roi",
|
||||||
graph = "hand_landmarks_from_pose_to_recrop_roi.pbtxt",
|
graph = "hand_landmarks_from_pose_to_recrop_roi.pbtxt",
|
||||||
|
@ -264,3 +378,31 @@ mediapipe_simple_subgraph(
|
||||||
"//mediapipe/modules/pose_landmark:pose_landmark_cpu",
|
"//mediapipe/modules/pose_landmark:pose_landmark_cpu",
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
|
|
||||||
|
mediapipe_simple_subgraph(
|
||||||
|
name = "holistic_landmark_onnx_cuda",
|
||||||
|
graph = "holistic_landmark_onnx_cuda.pbtxt",
|
||||||
|
register_as = "HolisticLandmarkOnnxCUDA",
|
||||||
|
visibility = ["//visibility:public"],
|
||||||
|
deps = [
|
||||||
|
":face_landmarks_from_pose_onnx_cuda",
|
||||||
|
":hand_landmarks_left_and_right_onnx_cuda",
|
||||||
|
"//mediapipe/calculators/core:flow_limiter_calculator",
|
||||||
|
"//mediapipe/calculators/image:image_transformation_calculator",
|
||||||
|
"//mediapipe/modules/pose_landmark:pose_landmark_onnx_cuda",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
mediapipe_simple_subgraph(
|
||||||
|
name = "holistic_landmark_onnx_tensorrt",
|
||||||
|
graph = "holistic_landmark_onnx_tensorrt.pbtxt",
|
||||||
|
register_as = "HolisticLandmarkOnnxTensorRT",
|
||||||
|
visibility = ["//visibility:public"],
|
||||||
|
deps = [
|
||||||
|
":face_landmarks_from_pose_onnx_tensorrt",
|
||||||
|
":hand_landmarks_left_and_right_onnx_tensorrt",
|
||||||
|
"//mediapipe/calculators/core:flow_limiter_calculator",
|
||||||
|
"//mediapipe/calculators/image:image_transformation_calculator",
|
||||||
|
"//mediapipe/modules/pose_landmark:pose_landmark_onnx_tensorrt",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
|
@ -0,0 +1,82 @@
|
||||||
|
# Predicts face landmarks within an ROI derived from face-related pose
|
||||||
|
# landmarks.
|
||||||
|
|
||||||
|
type: "FaceLandmarksFromPoseOnnxCUDA"
|
||||||
|
|
||||||
|
# CPU image. (ImageFrame)
|
||||||
|
input_stream: "IMAGE:input_video"
|
||||||
|
# Face-related pose landmarks. (NormalizedLandmarkList)
|
||||||
|
input_stream: "FACE_LANDMARKS_FROM_POSE:face_landmarks_from_pose"
|
||||||
|
|
||||||
|
# Whether to run the face landmark model with attention on lips and eyes to
|
||||||
|
# provide more accuracy, and additionally output iris landmarks. If unspecified,
|
||||||
|
# functions as set to false. (bool)
|
||||||
|
input_side_packet: "REFINE_LANDMARKS:refine_landmarks"
|
||||||
|
|
||||||
|
# Face landmarks. (NormalizedLandmarkList)
|
||||||
|
output_stream: "FACE_LANDMARKS:face_landmarks"
|
||||||
|
|
||||||
|
# Debug outputs.
|
||||||
|
# Face ROI derived from face-related pose landmarks, which defines the search
|
||||||
|
# region for the face detection model. (NormalizedRect)
|
||||||
|
output_stream: "FACE_ROI_FROM_POSE:face_roi_from_pose"
|
||||||
|
# Refined face crop rectangle predicted by face detection model.
|
||||||
|
# (NormalizedRect)
|
||||||
|
output_stream: "FACE_ROI_FROM_DETECTION:face_roi_from_detection"
|
||||||
|
# Rectangle used to predict face landmarks. (NormalizedRect)
|
||||||
|
output_stream: "FACE_TRACKING_ROI:face_tracking_roi"
|
||||||
|
|
||||||
|
# TODO: do not predict face when most of the face landmarks from
|
||||||
|
# pose are invisible.
|
||||||
|
|
||||||
|
# Extracts image size from the input images.
|
||||||
|
node {
|
||||||
|
calculator: "ImagePropertiesCalculator"
|
||||||
|
input_stream: "IMAGE:input_video"
|
||||||
|
output_stream: "SIZE:image_size"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Gets ROI for re-crop model from face-related pose landmarks.
|
||||||
|
node {
|
||||||
|
calculator: "FaceLandmarksFromPoseToRecropRoi"
|
||||||
|
input_stream: "FACE_LANDMARKS_FROM_POSE:face_landmarks_from_pose"
|
||||||
|
input_stream: "IMAGE_SIZE:image_size"
|
||||||
|
output_stream: "ROI:face_roi_from_pose"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Detects faces within the face ROI calculated from pose landmarks. This is done
|
||||||
|
# to refine face ROI for further landmark detection as ROI calculated from
|
||||||
|
# pose landmarks may be inaccurate.
|
||||||
|
node {
|
||||||
|
calculator: "FaceDetectionShortRangeByRoiOnnxCUDA"
|
||||||
|
input_stream: "IMAGE:input_video"
|
||||||
|
input_stream: "ROI:face_roi_from_pose"
|
||||||
|
output_stream: "DETECTIONS:face_detections"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Calculates refined face ROI.
|
||||||
|
node {
|
||||||
|
calculator: "FaceDetectionFrontDetectionsToRoi"
|
||||||
|
input_stream: "DETECTIONS:face_detections"
|
||||||
|
input_stream: "IMAGE_SIZE:image_size"
|
||||||
|
output_stream: "ROI:face_roi_from_detection"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Gets face tracking rectangle (either face rectangle from the previous
|
||||||
|
# frame or face re-crop rectangle from the current frame) for face prediction.
|
||||||
|
node {
|
||||||
|
calculator: "FaceTracking"
|
||||||
|
input_stream: "LANDMARKS:face_landmarks"
|
||||||
|
input_stream: "FACE_RECROP_ROI:face_roi_from_detection"
|
||||||
|
input_stream: "IMAGE_SIZE:image_size"
|
||||||
|
output_stream: "FACE_TRACKING_ROI:face_tracking_roi"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Predicts face landmarks from the tracking rectangle.
|
||||||
|
node {
|
||||||
|
calculator: "FaceLandmarkOnnxCUDA"
|
||||||
|
input_stream: "IMAGE:input_video"
|
||||||
|
input_stream: "ROI:face_tracking_roi"
|
||||||
|
input_side_packet: "WITH_ATTENTION:refine_landmarks"
|
||||||
|
output_stream: "LANDMARKS:face_landmarks"
|
||||||
|
}
|
|
@ -0,0 +1,82 @@
|
||||||
|
# Predicts face landmarks within an ROI derived from face-related pose
|
||||||
|
# landmarks.
|
||||||
|
|
||||||
|
type: "FaceLandmarksFromPoseOnnxTensorRT"
|
||||||
|
|
||||||
|
# CPU image. (ImageFrame)
|
||||||
|
input_stream: "IMAGE:input_video"
|
||||||
|
# Face-related pose landmarks. (NormalizedLandmarkList)
|
||||||
|
input_stream: "FACE_LANDMARKS_FROM_POSE:face_landmarks_from_pose"
|
||||||
|
|
||||||
|
# Whether to run the face landmark model with attention on lips and eyes to
|
||||||
|
# provide more accuracy, and additionally output iris landmarks. If unspecified,
|
||||||
|
# functions as set to false. (bool)
|
||||||
|
input_side_packet: "REFINE_LANDMARKS:refine_landmarks"
|
||||||
|
|
||||||
|
# Face landmarks. (NormalizedLandmarkList)
|
||||||
|
output_stream: "FACE_LANDMARKS:face_landmarks"
|
||||||
|
|
||||||
|
# Debug outputs.
|
||||||
|
# Face ROI derived from face-related pose landmarks, which defines the search
|
||||||
|
# region for the face detection model. (NormalizedRect)
|
||||||
|
output_stream: "FACE_ROI_FROM_POSE:face_roi_from_pose"
|
||||||
|
# Refined face crop rectangle predicted by face detection model.
|
||||||
|
# (NormalizedRect)
|
||||||
|
output_stream: "FACE_ROI_FROM_DETECTION:face_roi_from_detection"
|
||||||
|
# Rectangle used to predict face landmarks. (NormalizedRect)
|
||||||
|
output_stream: "FACE_TRACKING_ROI:face_tracking_roi"
|
||||||
|
|
||||||
|
# TODO: do not predict face when most of the face landmarks from
|
||||||
|
# pose are invisible.
|
||||||
|
|
||||||
|
# Extracts image size from the input images.
|
||||||
|
node {
|
||||||
|
calculator: "ImagePropertiesCalculator"
|
||||||
|
input_stream: "IMAGE:input_video"
|
||||||
|
output_stream: "SIZE:image_size"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Gets ROI for re-crop model from face-related pose landmarks.
|
||||||
|
node {
|
||||||
|
calculator: "FaceLandmarksFromPoseToRecropRoi"
|
||||||
|
input_stream: "FACE_LANDMARKS_FROM_POSE:face_landmarks_from_pose"
|
||||||
|
input_stream: "IMAGE_SIZE:image_size"
|
||||||
|
output_stream: "ROI:face_roi_from_pose"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Detects faces within the face ROI calculated from pose landmarks. This is done
|
||||||
|
# to refine face ROI for further landmark detection as ROI calculated from
|
||||||
|
# pose landmarks may be inaccurate.
|
||||||
|
node {
|
||||||
|
calculator: "FaceDetectionShortRangeByRoiOnnxTensorRT"
|
||||||
|
input_stream: "IMAGE:input_video"
|
||||||
|
input_stream: "ROI:face_roi_from_pose"
|
||||||
|
output_stream: "DETECTIONS:face_detections"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Calculates refined face ROI.
|
||||||
|
node {
|
||||||
|
calculator: "FaceDetectionFrontDetectionsToRoi"
|
||||||
|
input_stream: "DETECTIONS:face_detections"
|
||||||
|
input_stream: "IMAGE_SIZE:image_size"
|
||||||
|
output_stream: "ROI:face_roi_from_detection"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Gets face tracking rectangle (either face rectangle from the previous
|
||||||
|
# frame or face re-crop rectangle from the current frame) for face prediction.
|
||||||
|
node {
|
||||||
|
calculator: "FaceTracking"
|
||||||
|
input_stream: "LANDMARKS:face_landmarks"
|
||||||
|
input_stream: "FACE_RECROP_ROI:face_roi_from_detection"
|
||||||
|
input_stream: "IMAGE_SIZE:image_size"
|
||||||
|
output_stream: "FACE_TRACKING_ROI:face_tracking_roi"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Predicts face landmarks from the tracking rectangle.
|
||||||
|
node {
|
||||||
|
calculator: "FaceLandmarkOnnxTensorRT"
|
||||||
|
input_stream: "IMAGE:input_video"
|
||||||
|
input_stream: "ROI:face_tracking_roi"
|
||||||
|
input_side_packet: "WITH_ATTENTION:refine_landmarks"
|
||||||
|
output_stream: "LANDMARKS:face_landmarks"
|
||||||
|
}
|
|
@ -0,0 +1,78 @@
|
||||||
|
# Predicts hand landmarks within a ROI derived from hand-related pose landmarks.
|
||||||
|
|
||||||
|
type: "HandLandmarksFromPoseOnnxCUDA"
|
||||||
|
|
||||||
|
# CPU image. (ImageFrame)
|
||||||
|
input_stream: "IMAGE:input_video"
|
||||||
|
# Hand-related pose landmarks in [wrist, pinky, index] order.
|
||||||
|
# (NormalizedLandmarkList)
|
||||||
|
input_stream: "HAND_LANDMARKS_FROM_POSE:hand_landmarks_from_pose"
|
||||||
|
|
||||||
|
# Hand landmarks. (NormalizedLandmarkList)
|
||||||
|
output_stream: "HAND_LANDMARKS:hand_landmarks"
|
||||||
|
|
||||||
|
# Debug outputs.
|
||||||
|
# Hand ROI derived from hand-related landmarks, which defines the search region
|
||||||
|
# for the hand re-crop model. (NormalizedRect)
|
||||||
|
output_stream: "HAND_ROI_FROM_POSE:hand_roi_from_pose"
|
||||||
|
# Refined hand crop rectangle predicted by hand re-crop model. (NormalizedRect)
|
||||||
|
output_stream: "HAND_ROI_FROM_RECROP:hand_roi_from_recrop"
|
||||||
|
# Rectangle used to predict hand landmarks. (NormalizedRect)
|
||||||
|
output_stream: "HAND_TRACKING_ROI:hand_tracking_roi"
|
||||||
|
|
||||||
|
# Gets hand visibility.
|
||||||
|
node {
|
||||||
|
calculator: "HandVisibilityFromHandLandmarksFromPose"
|
||||||
|
input_stream: "HAND_LANDMARKS_FROM_POSE:hand_landmarks_from_pose"
|
||||||
|
output_stream: "VISIBILITY:hand_visibility"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Drops hand-related pose landmarks if pose wrist is not visible. It will
|
||||||
|
# prevent from predicting hand landmarks on the current frame.
|
||||||
|
node {
|
||||||
|
calculator: "GateCalculator"
|
||||||
|
input_stream: "hand_landmarks_from_pose"
|
||||||
|
input_stream: "ALLOW:hand_visibility"
|
||||||
|
output_stream: "ensured_hand_landmarks_from_pose"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Extracts image size from the input images.
|
||||||
|
node {
|
||||||
|
calculator: "ImagePropertiesCalculator"
|
||||||
|
input_stream: "IMAGE:input_video"
|
||||||
|
output_stream: "SIZE:image_size"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Gets ROI for re-crop model from hand-related pose landmarks.
|
||||||
|
node {
|
||||||
|
calculator: "HandLandmarksFromPoseToRecropRoi"
|
||||||
|
input_stream: "HAND_LANDMARKS_FROM_POSE:hand_landmarks_from_pose"
|
||||||
|
input_stream: "IMAGE_SIZE:image_size"
|
||||||
|
output_stream: "ROI:hand_roi_from_pose"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Predicts hand re-crop rectangle on the current frame.
|
||||||
|
node {
|
||||||
|
calculator: "HandRecropByRoiOnnxCUDA",
|
||||||
|
input_stream: "IMAGE:input_video"
|
||||||
|
input_stream: "ROI:hand_roi_from_pose"
|
||||||
|
output_stream: "HAND_ROI_FROM_RECROP:hand_roi_from_recrop"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Gets hand tracking rectangle (either hand rectangle from the previous
|
||||||
|
# frame or hand re-crop rectangle from the current frame) for hand prediction.
|
||||||
|
node {
|
||||||
|
calculator: "HandTracking"
|
||||||
|
input_stream: "LANDMARKS:hand_landmarks"
|
||||||
|
input_stream: "HAND_ROI_FROM_RECROP:hand_roi_from_recrop"
|
||||||
|
input_stream: "IMAGE_SIZE:image_size"
|
||||||
|
output_stream: "HAND_TRACKING_ROI:hand_tracking_roi"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Predicts hand landmarks from the tracking rectangle.
|
||||||
|
node {
|
||||||
|
calculator: "HandLandmarkOnnxCUDA"
|
||||||
|
input_stream: "IMAGE:input_video"
|
||||||
|
input_stream: "ROI:hand_tracking_roi"
|
||||||
|
output_stream: "LANDMARKS:hand_landmarks"
|
||||||
|
}
|
|
@ -0,0 +1,78 @@
|
||||||
|
# Predicts hand landmarks within a ROI derived from hand-related pose landmarks.
|
||||||
|
|
||||||
|
type: "HandLandmarksFromPoseOnnxTensorRT"
|
||||||
|
|
||||||
|
# CPU image. (ImageFrame)
|
||||||
|
input_stream: "IMAGE:input_video"
|
||||||
|
# Hand-related pose landmarks in [wrist, pinky, index] order.
|
||||||
|
# (NormalizedLandmarkList)
|
||||||
|
input_stream: "HAND_LANDMARKS_FROM_POSE:hand_landmarks_from_pose"
|
||||||
|
|
||||||
|
# Hand landmarks. (NormalizedLandmarkList)
|
||||||
|
output_stream: "HAND_LANDMARKS:hand_landmarks"
|
||||||
|
|
||||||
|
# Debug outputs.
|
||||||
|
# Hand ROI derived from hand-related landmarks, which defines the search region
|
||||||
|
# for the hand re-crop model. (NormalizedRect)
|
||||||
|
output_stream: "HAND_ROI_FROM_POSE:hand_roi_from_pose"
|
||||||
|
# Refined hand crop rectangle predicted by hand re-crop model. (NormalizedRect)
|
||||||
|
output_stream: "HAND_ROI_FROM_RECROP:hand_roi_from_recrop"
|
||||||
|
# Rectangle used to predict hand landmarks. (NormalizedRect)
|
||||||
|
output_stream: "HAND_TRACKING_ROI:hand_tracking_roi"
|
||||||
|
|
||||||
|
# Gets hand visibility.
|
||||||
|
node {
|
||||||
|
calculator: "HandVisibilityFromHandLandmarksFromPose"
|
||||||
|
input_stream: "HAND_LANDMARKS_FROM_POSE:hand_landmarks_from_pose"
|
||||||
|
output_stream: "VISIBILITY:hand_visibility"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Drops hand-related pose landmarks if pose wrist is not visible. It will
|
||||||
|
# prevent from predicting hand landmarks on the current frame.
|
||||||
|
node {
|
||||||
|
calculator: "GateCalculator"
|
||||||
|
input_stream: "hand_landmarks_from_pose"
|
||||||
|
input_stream: "ALLOW:hand_visibility"
|
||||||
|
output_stream: "ensured_hand_landmarks_from_pose"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Extracts image size from the input images.
|
||||||
|
node {
|
||||||
|
calculator: "ImagePropertiesCalculator"
|
||||||
|
input_stream: "IMAGE:input_video"
|
||||||
|
output_stream: "SIZE:image_size"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Gets ROI for re-crop model from hand-related pose landmarks.
|
||||||
|
node {
|
||||||
|
calculator: "HandLandmarksFromPoseToRecropRoi"
|
||||||
|
input_stream: "HAND_LANDMARKS_FROM_POSE:hand_landmarks_from_pose"
|
||||||
|
input_stream: "IMAGE_SIZE:image_size"
|
||||||
|
output_stream: "ROI:hand_roi_from_pose"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Predicts hand re-crop rectangle on the current frame.
|
||||||
|
node {
|
||||||
|
calculator: "HandRecropByRoiOnnxTensorRT",
|
||||||
|
input_stream: "IMAGE:input_video"
|
||||||
|
input_stream: "ROI:hand_roi_from_pose"
|
||||||
|
output_stream: "HAND_ROI_FROM_RECROP:hand_roi_from_recrop"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Gets hand tracking rectangle (either hand rectangle from the previous
|
||||||
|
# frame or hand re-crop rectangle from the current frame) for hand prediction.
|
||||||
|
node {
|
||||||
|
calculator: "HandTracking"
|
||||||
|
input_stream: "LANDMARKS:hand_landmarks"
|
||||||
|
input_stream: "HAND_ROI_FROM_RECROP:hand_roi_from_recrop"
|
||||||
|
input_stream: "IMAGE_SIZE:image_size"
|
||||||
|
output_stream: "HAND_TRACKING_ROI:hand_tracking_roi"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Predicts hand landmarks from the tracking rectangle.
|
||||||
|
node {
|
||||||
|
calculator: "HandLandmarkOnnxTensorRT"
|
||||||
|
input_stream: "IMAGE:input_video"
|
||||||
|
input_stream: "ROI:hand_tracking_roi"
|
||||||
|
output_stream: "LANDMARKS:hand_landmarks"
|
||||||
|
}
|
|
@ -0,0 +1,76 @@
|
||||||
|
# Predicts left and right hand landmarks within corresponding ROIs derived from
|
||||||
|
# hand-related pose landmarks.
|
||||||
|
|
||||||
|
type: "HandLandmarksLeftAndRightOnnxCUDA"
|
||||||
|
|
||||||
|
# CPU image. (ImageFrame)
|
||||||
|
input_stream: "IMAGE:input_video"
|
||||||
|
# Pose landmarks to derive initial hand location from. (NormalizedLandmarkList)
|
||||||
|
input_stream: "POSE_LANDMARKS:pose_landmarks"
|
||||||
|
|
||||||
|
# Left hand landmarks. (NormalizedLandmarkList)
|
||||||
|
output_stream: "LEFT_HAND_LANDMARKS:left_hand_landmarks"
|
||||||
|
# RIght hand landmarks. (NormalizedLandmarkList)
|
||||||
|
output_stream: "RIGHT_HAND_LANDMARKS:right_hand_landmarks"
|
||||||
|
|
||||||
|
# Debug outputs.
|
||||||
|
output_stream: "LEFT_HAND_ROI_FROM_POSE:left_hand_roi_from_pose"
|
||||||
|
output_stream: "LEFT_HAND_ROI_FROM_RECROP:left_hand_roi_from_recrop"
|
||||||
|
output_stream: "LEFT_HAND_TRACKING_ROI:left_hand_tracking_roi"
|
||||||
|
output_stream: "RIGHT_HAND_ROI_FROM_POSE:right_hand_roi_from_pose"
|
||||||
|
output_stream: "RIGHT_HAND_ROI_FROM_RECROP:right_hand_roi_from_recrop"
|
||||||
|
output_stream: "RIGHT_HAND_TRACKING_ROI:right_hand_tracking_roi"
|
||||||
|
|
||||||
|
# Extracts left-hand-related landmarks from the pose landmarks.
|
||||||
|
node {
|
||||||
|
calculator: "SplitNormalizedLandmarkListCalculator"
|
||||||
|
input_stream: "pose_landmarks"
|
||||||
|
output_stream: "left_hand_landmarks_from_pose"
|
||||||
|
options: {
|
||||||
|
[mediapipe.SplitVectorCalculatorOptions.ext] {
|
||||||
|
ranges: { begin: 15 end: 16 }
|
||||||
|
ranges: { begin: 17 end: 18 }
|
||||||
|
ranges: { begin: 19 end: 20 }
|
||||||
|
combine_outputs: true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Predicts left hand landmarks.
|
||||||
|
node {
|
||||||
|
calculator: "HandLandmarksFromPoseOnnxCUDA"
|
||||||
|
input_stream: "IMAGE:input_video"
|
||||||
|
input_stream: "HAND_LANDMARKS_FROM_POSE:left_hand_landmarks_from_pose"
|
||||||
|
output_stream: "HAND_LANDMARKS:left_hand_landmarks"
|
||||||
|
# Debug outputs.
|
||||||
|
output_stream: "HAND_ROI_FROM_POSE:left_hand_roi_from_pose"
|
||||||
|
output_stream: "HAND_ROI_FROM_RECROP:left_hand_roi_from_recrop"
|
||||||
|
output_stream: "HAND_TRACKING_ROI:left_hand_tracking_roi"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Extracts right-hand-related landmarks from the pose landmarks.
|
||||||
|
node {
|
||||||
|
calculator: "SplitNormalizedLandmarkListCalculator"
|
||||||
|
input_stream: "pose_landmarks"
|
||||||
|
output_stream: "right_hand_landmarks_from_pose"
|
||||||
|
options: {
|
||||||
|
[mediapipe.SplitVectorCalculatorOptions.ext] {
|
||||||
|
ranges: { begin: 16 end: 17 }
|
||||||
|
ranges: { begin: 18 end: 19 }
|
||||||
|
ranges: { begin: 20 end: 21 }
|
||||||
|
combine_outputs: true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Extracts right-hand-related landmarks from the pose landmarks.
|
||||||
|
node {
|
||||||
|
calculator: "HandLandmarksFromPoseOnnxCUDA"
|
||||||
|
input_stream: "IMAGE:input_video"
|
||||||
|
input_stream: "HAND_LANDMARKS_FROM_POSE:right_hand_landmarks_from_pose"
|
||||||
|
output_stream: "HAND_LANDMARKS:right_hand_landmarks"
|
||||||
|
# Debug outputs.
|
||||||
|
output_stream: "HAND_ROI_FROM_POSE:right_hand_roi_from_pose"
|
||||||
|
output_stream: "HAND_ROI_FROM_RECROP:right_hand_roi_from_recrop"
|
||||||
|
output_stream: "HAND_TRACKING_ROI:right_hand_tracking_roi"
|
||||||
|
}
|
|
@ -0,0 +1,76 @@
|
||||||
|
# Predicts left and right hand landmarks within corresponding ROIs derived from
|
||||||
|
# hand-related pose landmarks.
|
||||||
|
|
||||||
|
type: "HandLandmarksLeftAndRightOnnxTensorRT"
|
||||||
|
|
||||||
|
# CPU image. (ImageFrame)
|
||||||
|
input_stream: "IMAGE:input_video"
|
||||||
|
# Pose landmarks to derive initial hand location from. (NormalizedLandmarkList)
|
||||||
|
input_stream: "POSE_LANDMARKS:pose_landmarks"
|
||||||
|
|
||||||
|
# Left hand landmarks. (NormalizedLandmarkList)
|
||||||
|
output_stream: "LEFT_HAND_LANDMARKS:left_hand_landmarks"
|
||||||
|
# RIght hand landmarks. (NormalizedLandmarkList)
|
||||||
|
output_stream: "RIGHT_HAND_LANDMARKS:right_hand_landmarks"
|
||||||
|
|
||||||
|
# Debug outputs.
|
||||||
|
output_stream: "LEFT_HAND_ROI_FROM_POSE:left_hand_roi_from_pose"
|
||||||
|
output_stream: "LEFT_HAND_ROI_FROM_RECROP:left_hand_roi_from_recrop"
|
||||||
|
output_stream: "LEFT_HAND_TRACKING_ROI:left_hand_tracking_roi"
|
||||||
|
output_stream: "RIGHT_HAND_ROI_FROM_POSE:right_hand_roi_from_pose"
|
||||||
|
output_stream: "RIGHT_HAND_ROI_FROM_RECROP:right_hand_roi_from_recrop"
|
||||||
|
output_stream: "RIGHT_HAND_TRACKING_ROI:right_hand_tracking_roi"
|
||||||
|
|
||||||
|
# Extracts left-hand-related landmarks from the pose landmarks.
|
||||||
|
node {
|
||||||
|
calculator: "SplitNormalizedLandmarkListCalculator"
|
||||||
|
input_stream: "pose_landmarks"
|
||||||
|
output_stream: "left_hand_landmarks_from_pose"
|
||||||
|
options: {
|
||||||
|
[mediapipe.SplitVectorCalculatorOptions.ext] {
|
||||||
|
ranges: { begin: 15 end: 16 }
|
||||||
|
ranges: { begin: 17 end: 18 }
|
||||||
|
ranges: { begin: 19 end: 20 }
|
||||||
|
combine_outputs: true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Predicts left hand landmarks.
|
||||||
|
node {
|
||||||
|
calculator: "HandLandmarksFromPoseOnnxTensorRT"
|
||||||
|
input_stream: "IMAGE:input_video"
|
||||||
|
input_stream: "HAND_LANDMARKS_FROM_POSE:left_hand_landmarks_from_pose"
|
||||||
|
output_stream: "HAND_LANDMARKS:left_hand_landmarks"
|
||||||
|
# Debug outputs.
|
||||||
|
output_stream: "HAND_ROI_FROM_POSE:left_hand_roi_from_pose"
|
||||||
|
output_stream: "HAND_ROI_FROM_RECROP:left_hand_roi_from_recrop"
|
||||||
|
output_stream: "HAND_TRACKING_ROI:left_hand_tracking_roi"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Extracts right-hand-related landmarks from the pose landmarks.
|
||||||
|
node {
|
||||||
|
calculator: "SplitNormalizedLandmarkListCalculator"
|
||||||
|
input_stream: "pose_landmarks"
|
||||||
|
output_stream: "right_hand_landmarks_from_pose"
|
||||||
|
options: {
|
||||||
|
[mediapipe.SplitVectorCalculatorOptions.ext] {
|
||||||
|
ranges: { begin: 16 end: 17 }
|
||||||
|
ranges: { begin: 18 end: 19 }
|
||||||
|
ranges: { begin: 20 end: 21 }
|
||||||
|
combine_outputs: true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Extracts right-hand-related landmarks from the pose landmarks.
|
||||||
|
node {
|
||||||
|
calculator: "HandLandmarksFromPoseOnnxTensorRT"
|
||||||
|
input_stream: "IMAGE:input_video"
|
||||||
|
input_stream: "HAND_LANDMARKS_FROM_POSE:right_hand_landmarks_from_pose"
|
||||||
|
output_stream: "HAND_LANDMARKS:right_hand_landmarks"
|
||||||
|
# Debug outputs.
|
||||||
|
output_stream: "HAND_ROI_FROM_POSE:right_hand_roi_from_pose"
|
||||||
|
output_stream: "HAND_ROI_FROM_RECROP:right_hand_roi_from_recrop"
|
||||||
|
output_stream: "HAND_TRACKING_ROI:right_hand_tracking_roi"
|
||||||
|
}
|
|
@ -0,0 +1,137 @@
|
||||||
|
# Predicts more accurate hand location (re-crop ROI) within a given ROI.
|
||||||
|
|
||||||
|
type: "HandRecropByRoiOnnxCUDA"
|
||||||
|
|
||||||
|
# CPU image. (ImageFrame)
|
||||||
|
input_stream: "IMAGE:input_video"
|
||||||
|
# ROI (region of interest) within the given image where a palm/hand is located.
|
||||||
|
# (NormalizedRect)
|
||||||
|
input_stream: "ROI:roi"
|
||||||
|
|
||||||
|
# Refined (more accurate) ROI to use for hand landmark prediction.
|
||||||
|
# (NormalizedRect)
|
||||||
|
output_stream: "HAND_ROI_FROM_RECROP:hand_roi_from_recrop_refined"
|
||||||
|
|
||||||
|
# Transforms hand ROI from the input image to a 256x256 tensor. Preserves aspect
|
||||||
|
# ratio, which results in a letterbox padding.
|
||||||
|
node {
|
||||||
|
calculator: "ImageToTensorCalculator"
|
||||||
|
input_stream: "IMAGE:input_video"
|
||||||
|
input_stream: "NORM_RECT:roi"
|
||||||
|
output_stream: "TENSORS:initial_crop_tensor"
|
||||||
|
output_stream: "LETTERBOX_PADDING:letterbox_padding"
|
||||||
|
options: {
|
||||||
|
[mediapipe.ImageToTensorCalculatorOptions.ext] {
|
||||||
|
output_tensor_width: 256
|
||||||
|
output_tensor_height: 256
|
||||||
|
keep_aspect_ratio: true
|
||||||
|
output_tensor_float_range {
|
||||||
|
min: 0.0
|
||||||
|
max: 1.0
|
||||||
|
}
|
||||||
|
# For OpenGL origin should be at the top left corner.
|
||||||
|
gpu_origin: TOP_LEFT,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Predicts hand re-crop rectangle.
|
||||||
|
node {
|
||||||
|
calculator: "InferenceCalculator"
|
||||||
|
input_stream: "TENSORS:initial_crop_tensor"
|
||||||
|
output_stream: "TENSORS:landmark_tensors"
|
||||||
|
options: {
|
||||||
|
[mediapipe.InferenceCalculatorOptions.ext] {
|
||||||
|
model_path: "mediapipe/modules/holistic_landmark/hand_recrop.onnx"
|
||||||
|
delegate { cuda {} }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Decodes the landmark tensors into a vector of landmarks, where the landmark
|
||||||
|
# coordinates are normalized by the size of the input image to the model. Two
|
||||||
|
# landmarks represent two virtual points: crop and scale of the new crop.
|
||||||
|
node {
|
||||||
|
calculator: "TensorsToLandmarksCalculator"
|
||||||
|
input_stream: "TENSORS:landmark_tensors"
|
||||||
|
output_stream: "NORM_LANDMARKS:landmarks"
|
||||||
|
options: {
|
||||||
|
[mediapipe.TensorsToLandmarksCalculatorOptions.ext] {
|
||||||
|
num_landmarks: 2
|
||||||
|
input_image_width: 256
|
||||||
|
input_image_height: 256
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Adjusts landmarks (already normalized to [0.f, 1.f]) on the letterboxed hand
|
||||||
|
# image (after image transformation with the FIT scale mode) to the
|
||||||
|
# corresponding locations on the same image with the letterbox removed (hand
|
||||||
|
# image before image transformation).
|
||||||
|
node {
|
||||||
|
calculator: "LandmarkLetterboxRemovalCalculator"
|
||||||
|
input_stream: "LANDMARKS:landmarks"
|
||||||
|
input_stream: "LETTERBOX_PADDING:letterbox_padding"
|
||||||
|
output_stream: "LANDMARKS:scaled_landmarks"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Projects the landmarks from the cropped hand image to the corresponding
|
||||||
|
# locations on the full image before cropping (input to the graph).
|
||||||
|
node {
|
||||||
|
calculator: "LandmarkProjectionCalculator"
|
||||||
|
input_stream: "NORM_LANDMARKS:scaled_landmarks"
|
||||||
|
input_stream: "NORM_RECT:roi"
|
||||||
|
output_stream: "NORM_LANDMARKS:alignment_landmarks"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Converts hand landmarks to a detection that tightly encloses all landmarks.
|
||||||
|
node {
|
||||||
|
calculator: "LandmarksToDetectionCalculator"
|
||||||
|
input_stream: "NORM_LANDMARKS:alignment_landmarks"
|
||||||
|
output_stream: "DETECTION:hand_detection"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Extracts image size from the input images.
|
||||||
|
node {
|
||||||
|
calculator: "ImagePropertiesCalculator"
|
||||||
|
input_stream: "IMAGE:input_video"
|
||||||
|
output_stream: "SIZE:image_size"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Converts hand detection into a rectangle based on center and scale alignment
|
||||||
|
# points.
|
||||||
|
node {
|
||||||
|
calculator: "AlignmentPointsRectsCalculator"
|
||||||
|
input_stream: "DETECTION:hand_detection"
|
||||||
|
input_stream: "IMAGE_SIZE:image_size"
|
||||||
|
output_stream: "NORM_RECT:hand_roi_from_recrop"
|
||||||
|
options: {
|
||||||
|
[mediapipe.DetectionsToRectsCalculatorOptions.ext] {
|
||||||
|
rotation_vector_start_keypoint_index: 0
|
||||||
|
rotation_vector_end_keypoint_index: 1
|
||||||
|
rotation_vector_target_angle_degrees: -90
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# TODO: revise hand recrop roi calculation.
|
||||||
|
# Slighly moves hand re-crop rectangle from wrist towards fingertips. Due to the
|
||||||
|
# new hand cropping logic, crop border is to close to finger tips while a lot of
|
||||||
|
# space is below the wrist. And when moving hand up fast (with fingers pointing
|
||||||
|
# up) and using hand rect from the previous frame for tracking - fingertips can
|
||||||
|
# be cropped. This adjustment partially solves it, but hand cropping logic
|
||||||
|
# should be reviewed.
|
||||||
|
node {
|
||||||
|
calculator: "RectTransformationCalculator"
|
||||||
|
input_stream: "NORM_RECT:hand_roi_from_recrop"
|
||||||
|
input_stream: "IMAGE_SIZE:image_size"
|
||||||
|
output_stream: "hand_roi_from_recrop_refined"
|
||||||
|
options: {
|
||||||
|
[mediapipe.RectTransformationCalculatorOptions.ext] {
|
||||||
|
scale_x: 1.0
|
||||||
|
scale_y: 1.0
|
||||||
|
shift_y: -0.1
|
||||||
|
square_long: true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,137 @@
|
||||||
|
# Predicts more accurate hand location (re-crop ROI) within a given ROI.
|
||||||
|
|
||||||
|
type: "HandRecropByRoiOnnxTensorRT"
|
||||||
|
|
||||||
|
# CPU image. (ImageFrame)
|
||||||
|
input_stream: "IMAGE:input_video"
|
||||||
|
# ROI (region of interest) within the given image where a palm/hand is located.
|
||||||
|
# (NormalizedRect)
|
||||||
|
input_stream: "ROI:roi"
|
||||||
|
|
||||||
|
# Refined (more accurate) ROI to use for hand landmark prediction.
|
||||||
|
# (NormalizedRect)
|
||||||
|
output_stream: "HAND_ROI_FROM_RECROP:hand_roi_from_recrop_refined"
|
||||||
|
|
||||||
|
# Transforms hand ROI from the input image to a 256x256 tensor. Preserves aspect
|
||||||
|
# ratio, which results in a letterbox padding.
|
||||||
|
node {
|
||||||
|
calculator: "ImageToTensorCalculator"
|
||||||
|
input_stream: "IMAGE:input_video"
|
||||||
|
input_stream: "NORM_RECT:roi"
|
||||||
|
output_stream: "TENSORS:initial_crop_tensor"
|
||||||
|
output_stream: "LETTERBOX_PADDING:letterbox_padding"
|
||||||
|
options: {
|
||||||
|
[mediapipe.ImageToTensorCalculatorOptions.ext] {
|
||||||
|
output_tensor_width: 256
|
||||||
|
output_tensor_height: 256
|
||||||
|
keep_aspect_ratio: true
|
||||||
|
output_tensor_float_range {
|
||||||
|
min: 0.0
|
||||||
|
max: 1.0
|
||||||
|
}
|
||||||
|
# For OpenGL origin should be at the top left corner.
|
||||||
|
gpu_origin: TOP_LEFT,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Predicts hand re-crop rectangle.
|
||||||
|
node {
|
||||||
|
calculator: "InferenceCalculator"
|
||||||
|
input_stream: "TENSORS:initial_crop_tensor"
|
||||||
|
output_stream: "TENSORS:landmark_tensors"
|
||||||
|
options: {
|
||||||
|
[mediapipe.InferenceCalculatorOptions.ext] {
|
||||||
|
model_path: "mediapipe/modules/holistic_landmark/hand_recrop.onnx"
|
||||||
|
delegate { tensorrt {} }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Decodes the landmark tensors into a vector of landmarks, where the landmark
|
||||||
|
# coordinates are normalized by the size of the input image to the model. Two
|
||||||
|
# landmarks represent two virtual points: crop and scale of the new crop.
|
||||||
|
node {
|
||||||
|
calculator: "TensorsToLandmarksCalculator"
|
||||||
|
input_stream: "TENSORS:landmark_tensors"
|
||||||
|
output_stream: "NORM_LANDMARKS:landmarks"
|
||||||
|
options: {
|
||||||
|
[mediapipe.TensorsToLandmarksCalculatorOptions.ext] {
|
||||||
|
num_landmarks: 2
|
||||||
|
input_image_width: 256
|
||||||
|
input_image_height: 256
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Adjusts landmarks (already normalized to [0.f, 1.f]) on the letterboxed hand
|
||||||
|
# image (after image transformation with the FIT scale mode) to the
|
||||||
|
# corresponding locations on the same image with the letterbox removed (hand
|
||||||
|
# image before image transformation).
|
||||||
|
node {
|
||||||
|
calculator: "LandmarkLetterboxRemovalCalculator"
|
||||||
|
input_stream: "LANDMARKS:landmarks"
|
||||||
|
input_stream: "LETTERBOX_PADDING:letterbox_padding"
|
||||||
|
output_stream: "LANDMARKS:scaled_landmarks"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Projects the landmarks from the cropped hand image to the corresponding
|
||||||
|
# locations on the full image before cropping (input to the graph).
|
||||||
|
node {
|
||||||
|
calculator: "LandmarkProjectionCalculator"
|
||||||
|
input_stream: "NORM_LANDMARKS:scaled_landmarks"
|
||||||
|
input_stream: "NORM_RECT:roi"
|
||||||
|
output_stream: "NORM_LANDMARKS:alignment_landmarks"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Converts hand landmarks to a detection that tightly encloses all landmarks.
|
||||||
|
node {
|
||||||
|
calculator: "LandmarksToDetectionCalculator"
|
||||||
|
input_stream: "NORM_LANDMARKS:alignment_landmarks"
|
||||||
|
output_stream: "DETECTION:hand_detection"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Extracts image size from the input images.
|
||||||
|
node {
|
||||||
|
calculator: "ImagePropertiesCalculator"
|
||||||
|
input_stream: "IMAGE:input_video"
|
||||||
|
output_stream: "SIZE:image_size"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Converts hand detection into a rectangle based on center and scale alignment
|
||||||
|
# points.
|
||||||
|
node {
|
||||||
|
calculator: "AlignmentPointsRectsCalculator"
|
||||||
|
input_stream: "DETECTION:hand_detection"
|
||||||
|
input_stream: "IMAGE_SIZE:image_size"
|
||||||
|
output_stream: "NORM_RECT:hand_roi_from_recrop"
|
||||||
|
options: {
|
||||||
|
[mediapipe.DetectionsToRectsCalculatorOptions.ext] {
|
||||||
|
rotation_vector_start_keypoint_index: 0
|
||||||
|
rotation_vector_end_keypoint_index: 1
|
||||||
|
rotation_vector_target_angle_degrees: -90
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# TODO: revise hand recrop roi calculation.
|
||||||
|
# Slighly moves hand re-crop rectangle from wrist towards fingertips. Due to the
|
||||||
|
# new hand cropping logic, crop border is to close to finger tips while a lot of
|
||||||
|
# space is below the wrist. And when moving hand up fast (with fingers pointing
|
||||||
|
# up) and using hand rect from the previous frame for tracking - fingertips can
|
||||||
|
# be cropped. This adjustment partially solves it, but hand cropping logic
|
||||||
|
# should be reviewed.
|
||||||
|
node {
|
||||||
|
calculator: "RectTransformationCalculator"
|
||||||
|
input_stream: "NORM_RECT:hand_roi_from_recrop"
|
||||||
|
input_stream: "IMAGE_SIZE:image_size"
|
||||||
|
output_stream: "hand_roi_from_recrop_refined"
|
||||||
|
options: {
|
||||||
|
[mediapipe.RectTransformationCalculatorOptions.ext] {
|
||||||
|
scale_x: 1.0
|
||||||
|
scale_y: 1.0
|
||||||
|
shift_y: -0.1
|
||||||
|
square_long: true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,146 @@
|
||||||
|
# Predicts pose + left/right hand + face landmarks.
|
||||||
|
#
|
||||||
|
# It is required that:
|
||||||
|
# - "face_detection_short_range.onnx" is available at
|
||||||
|
# "mediapipe/modules/face_detection/face_detection_short_range.onnx"
|
||||||
|
#
|
||||||
|
# - "face_landmark.onnx" is available at
|
||||||
|
# "mediapipe/modules/face_landmark/face_landmark.onnx"
|
||||||
|
#
|
||||||
|
# - "hand_landmark_full.onnx" is available at
|
||||||
|
# "mediapipe/modules/hand_landmark/hand_landmark_full.onnx"
|
||||||
|
#
|
||||||
|
# - "hand_recrop.onnx" is available at
|
||||||
|
# "mediapipe/modules/holistic_landmark/hand_recrop.onnx"
|
||||||
|
#
|
||||||
|
# - "handedness.txt" is available at
|
||||||
|
# "mediapipe/modules/hand_landmark/handedness.txt"
|
||||||
|
#
|
||||||
|
# - "pose_detection.onnx" is available at
|
||||||
|
# "mediapipe/modules/pose_detection/pose_detection.onnx"
|
||||||
|
#
|
||||||
|
# - "pose_landmark_lite.onnx" or "pose_landmark_full.onnx" or
|
||||||
|
# "pose_landmark_heavy.onnx" is available at
|
||||||
|
# "mediapipe/modules/pose_landmark/pose_landmark_lite.onnx" or
|
||||||
|
# "mediapipe/modules/pose_landmark/pose_landmark_full.onnx" or
|
||||||
|
# "mediapipe/modules/pose_landmark/pose_landmark_heavy.onnx"
|
||||||
|
# path respectively during execution, depending on the specification in the
|
||||||
|
# MODEL_COMPLEXITY input side packet.
|
||||||
|
#
|
||||||
|
# EXAMPLE:
|
||||||
|
# node {
|
||||||
|
# calculator: "HolisticLandmarkOnnxCUDA"
|
||||||
|
# input_stream: "IMAGE:input_video"
|
||||||
|
# input_side_packet: "MODEL_COMPLEXITY:model_complexity"
|
||||||
|
# input_side_packet: "SMOOTH_LANDMARKS:smooth_landmarks"
|
||||||
|
# input_side_packet: "ENABLE_SEGMENTATION:enable_segmentation"
|
||||||
|
# input_side_packet: "SMOOTH_SEGMENTATION:smooth_segmentation"
|
||||||
|
# input_side_packet: "REFINE_FACE_LANDMARKS:refine_face_landmarks"
|
||||||
|
# input_side_packet: "USE_PREV_LANDMARKS:use_prev_landmarks"
|
||||||
|
# output_stream: "POSE_LANDMARKS:pose_landmarks"
|
||||||
|
# output_stream: "FACE_LANDMARKS:face_landmarks"
|
||||||
|
# output_stream: "LEFT_HAND_LANDMARKS:left_hand_landmarks"
|
||||||
|
# output_stream: "RIGHT_HAND_LANDMARKS:right_hand_landmarks"
|
||||||
|
# }
|
||||||
|
#
|
||||||
|
# NOTE: if a pose/hand/face output is not present in the image, for this
|
||||||
|
# particular timestamp there will not be an output packet in the corresponding
|
||||||
|
# output stream below. However, the MediaPipe framework will internally inform
|
||||||
|
# the downstream calculators of the absence of this packet so that they don't
|
||||||
|
# wait for it unnecessarily.
|
||||||
|
|
||||||
|
type: "HolisticLandmarkOnnxCUDA"
|
||||||
|
|
||||||
|
# CPU image. (ImageFrame)
|
||||||
|
input_stream: "IMAGE:image"
|
||||||
|
|
||||||
|
# Complexity of the pose landmark model: 0, 1 or 2. Landmark accuracy as well as
|
||||||
|
# inference latency generally go up with the model complexity. If unspecified,
|
||||||
|
# functions as set to 1. (int)
|
||||||
|
input_side_packet: "MODEL_COMPLEXITY:model_complexity"
|
||||||
|
|
||||||
|
# Whether to filter landmarks across different input images to reduce jitter.
|
||||||
|
# If unspecified, functions as set to true. (bool)
|
||||||
|
input_side_packet: "SMOOTH_LANDMARKS:smooth_landmarks"
|
||||||
|
|
||||||
|
# Whether to predict the segmentation mask. If unspecified, functions as set to
|
||||||
|
# false. (bool)
|
||||||
|
input_side_packet: "ENABLE_SEGMENTATION:enable_segmentation"
|
||||||
|
|
||||||
|
# Whether to filter segmentation mask across different input images to reduce
|
||||||
|
# jitter. If unspecified, functions as set to true. (bool)
|
||||||
|
input_side_packet: "SMOOTH_SEGMENTATION:smooth_segmentation"
|
||||||
|
|
||||||
|
# Whether to run the face landmark model with attention on lips and eyes to
|
||||||
|
# provide more accuracy, and additionally output iris landmarks. If unspecified,
|
||||||
|
# functions as set to false. (bool)
|
||||||
|
input_side_packet: "REFINE_FACE_LANDMARKS:refine_face_landmarks"
|
||||||
|
|
||||||
|
# Whether landmarks on the previous image should be used to help localize
|
||||||
|
# landmarks on the current image. (bool)
|
||||||
|
input_side_packet: "USE_PREV_LANDMARKS:use_prev_landmarks"
|
||||||
|
|
||||||
|
# Pose landmarks. (NormalizedLandmarkList)
|
||||||
|
# 33 pose landmarks.
|
||||||
|
output_stream: "POSE_LANDMARKS:pose_landmarks"
|
||||||
|
# 33 pose world landmarks. (LandmarkList)
|
||||||
|
output_stream: "WORLD_LANDMARKS:pose_world_landmarks"
|
||||||
|
# 21 left hand landmarks. (NormalizedLandmarkList)
|
||||||
|
output_stream: "LEFT_HAND_LANDMARKS:left_hand_landmarks"
|
||||||
|
# 21 right hand landmarks. (NormalizedLandmarkList)
|
||||||
|
output_stream: "RIGHT_HAND_LANDMARKS:right_hand_landmarks"
|
||||||
|
# 468 face landmarks. (NormalizedLandmarkList)
|
||||||
|
output_stream: "FACE_LANDMARKS:face_landmarks"
|
||||||
|
|
||||||
|
# Segmentation mask. (ImageFrame in ImageFormat::VEC32F1)
|
||||||
|
output_stream: "SEGMENTATION_MASK:segmentation_mask"
|
||||||
|
|
||||||
|
# Debug outputs
|
||||||
|
output_stream: "POSE_ROI:pose_landmarks_roi"
|
||||||
|
output_stream: "POSE_DETECTION:pose_detection"
|
||||||
|
|
||||||
|
# Predicts pose landmarks.
|
||||||
|
node {
|
||||||
|
calculator: "PoseLandmarkOnnxCUDA"
|
||||||
|
input_stream: "IMAGE:image"
|
||||||
|
input_side_packet: "MODEL_COMPLEXITY:model_complexity"
|
||||||
|
input_side_packet: "SMOOTH_LANDMARKS:smooth_landmarks"
|
||||||
|
input_side_packet: "ENABLE_SEGMENTATION:enable_segmentation"
|
||||||
|
input_side_packet: "SMOOTH_SEGMENTATION:smooth_segmentation"
|
||||||
|
input_side_packet: "USE_PREV_LANDMARKS:use_prev_landmarks"
|
||||||
|
output_stream: "LANDMARKS:pose_landmarks"
|
||||||
|
output_stream: "WORLD_LANDMARKS:pose_world_landmarks"
|
||||||
|
output_stream: "SEGMENTATION_MASK:segmentation_mask"
|
||||||
|
output_stream: "ROI_FROM_LANDMARKS:pose_landmarks_roi"
|
||||||
|
output_stream: "DETECTION:pose_detection"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Predicts left and right hand landmarks based on the initial pose landmarks.
|
||||||
|
node {
|
||||||
|
calculator: "HandLandmarksLeftAndRightOnnxCUDA"
|
||||||
|
input_stream: "IMAGE:image"
|
||||||
|
input_stream: "POSE_LANDMARKS:pose_landmarks"
|
||||||
|
output_stream: "LEFT_HAND_LANDMARKS:left_hand_landmarks"
|
||||||
|
output_stream: "RIGHT_HAND_LANDMARKS:right_hand_landmarks"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Extracts face-related pose landmarks.
|
||||||
|
node {
|
||||||
|
calculator: "SplitNormalizedLandmarkListCalculator"
|
||||||
|
input_stream: "pose_landmarks"
|
||||||
|
output_stream: "face_landmarks_from_pose"
|
||||||
|
options: {
|
||||||
|
[mediapipe.SplitVectorCalculatorOptions.ext] {
|
||||||
|
ranges: { begin: 0 end: 11 }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Predicts face landmarks based on the initial pose landmarks.
|
||||||
|
node {
|
||||||
|
calculator: "FaceLandmarksFromPoseOnnxCUDA"
|
||||||
|
input_stream: "IMAGE:image"
|
||||||
|
input_stream: "FACE_LANDMARKS_FROM_POSE:face_landmarks_from_pose"
|
||||||
|
input_side_packet: "REFINE_LANDMARKS:refine_face_landmarks"
|
||||||
|
output_stream: "FACE_LANDMARKS:face_landmarks"
|
||||||
|
}
|
|
@ -0,0 +1,146 @@
|
||||||
|
# Predicts pose + left/right hand + face landmarks.
|
||||||
|
#
|
||||||
|
# It is required that:
|
||||||
|
# - "face_detection_short_range.onnx" is available at
|
||||||
|
# "mediapipe/modules/face_detection/face_detection_short_range.onnx"
|
||||||
|
#
|
||||||
|
# - "face_landmark.onnx" is available at
|
||||||
|
# "mediapipe/modules/face_landmark/face_landmark.onnx"
|
||||||
|
#
|
||||||
|
# - "hand_landmark_full.onnx" is available at
|
||||||
|
# "mediapipe/modules/hand_landmark/hand_landmark_full.onnx"
|
||||||
|
#
|
||||||
|
# - "hand_recrop.onnx" is available at
|
||||||
|
# "mediapipe/modules/holistic_landmark/hand_recrop.onnx"
|
||||||
|
#
|
||||||
|
# - "handedness.txt" is available at
|
||||||
|
# "mediapipe/modules/hand_landmark/handedness.txt"
|
||||||
|
#
|
||||||
|
# - "pose_detection.onnx" is available at
|
||||||
|
# "mediapipe/modules/pose_detection/pose_detection.onnx"
|
||||||
|
#
|
||||||
|
# - "pose_landmark_lite.onnx" or "pose_landmark_full.onnx" or
|
||||||
|
# "pose_landmark_heavy.onnx" is available at
|
||||||
|
# "mediapipe/modules/pose_landmark/pose_landmark_lite.onnx" or
|
||||||
|
# "mediapipe/modules/pose_landmark/pose_landmark_full.onnx" or
|
||||||
|
# "mediapipe/modules/pose_landmark/pose_landmark_heavy.onnx"
|
||||||
|
# path respectively during execution, depending on the specification in the
|
||||||
|
# MODEL_COMPLEXITY input side packet.
|
||||||
|
#
|
||||||
|
# EXAMPLE:
|
||||||
|
# node {
|
||||||
|
# calculator: "HolisticLandmarkOnnxTensorRT"
|
||||||
|
# input_stream: "IMAGE:input_video"
|
||||||
|
# input_side_packet: "MODEL_COMPLEXITY:model_complexity"
|
||||||
|
# input_side_packet: "SMOOTH_LANDMARKS:smooth_landmarks"
|
||||||
|
# input_side_packet: "ENABLE_SEGMENTATION:enable_segmentation"
|
||||||
|
# input_side_packet: "SMOOTH_SEGMENTATION:smooth_segmentation"
|
||||||
|
# input_side_packet: "REFINE_FACE_LANDMARKS:refine_face_landmarks"
|
||||||
|
# input_side_packet: "USE_PREV_LANDMARKS:use_prev_landmarks"
|
||||||
|
# output_stream: "POSE_LANDMARKS:pose_landmarks"
|
||||||
|
# output_stream: "FACE_LANDMARKS:face_landmarks"
|
||||||
|
# output_stream: "LEFT_HAND_LANDMARKS:left_hand_landmarks"
|
||||||
|
# output_stream: "RIGHT_HAND_LANDMARKS:right_hand_landmarks"
|
||||||
|
# }
|
||||||
|
#
|
||||||
|
# NOTE: if a pose/hand/face output is not present in the image, for this
|
||||||
|
# particular timestamp there will not be an output packet in the corresponding
|
||||||
|
# output stream below. However, the MediaPipe framework will internally inform
|
||||||
|
# the downstream calculators of the absence of this packet so that they don't
|
||||||
|
# wait for it unnecessarily.
|
||||||
|
|
||||||
|
type: "HolisticLandmarkOnnxTensorRT"
|
||||||
|
|
||||||
|
# CPU image. (ImageFrame)
|
||||||
|
input_stream: "IMAGE:image"
|
||||||
|
|
||||||
|
# Complexity of the pose landmark model: 0, 1 or 2. Landmark accuracy as well as
|
||||||
|
# inference latency generally go up with the model complexity. If unspecified,
|
||||||
|
# functions as set to 1. (int)
|
||||||
|
input_side_packet: "MODEL_COMPLEXITY:model_complexity"
|
||||||
|
|
||||||
|
# Whether to filter landmarks across different input images to reduce jitter.
|
||||||
|
# If unspecified, functions as set to true. (bool)
|
||||||
|
input_side_packet: "SMOOTH_LANDMARKS:smooth_landmarks"
|
||||||
|
|
||||||
|
# Whether to predict the segmentation mask. If unspecified, functions as set to
|
||||||
|
# false. (bool)
|
||||||
|
input_side_packet: "ENABLE_SEGMENTATION:enable_segmentation"
|
||||||
|
|
||||||
|
# Whether to filter segmentation mask across different input images to reduce
|
||||||
|
# jitter. If unspecified, functions as set to true. (bool)
|
||||||
|
input_side_packet: "SMOOTH_SEGMENTATION:smooth_segmentation"
|
||||||
|
|
||||||
|
# Whether to run the face landmark model with attention on lips and eyes to
|
||||||
|
# provide more accuracy, and additionally output iris landmarks. If unspecified,
|
||||||
|
# functions as set to false. (bool)
|
||||||
|
input_side_packet: "REFINE_FACE_LANDMARKS:refine_face_landmarks"
|
||||||
|
|
||||||
|
# Whether landmarks on the previous image should be used to help localize
|
||||||
|
# landmarks on the current image. (bool)
|
||||||
|
input_side_packet: "USE_PREV_LANDMARKS:use_prev_landmarks"
|
||||||
|
|
||||||
|
# Pose landmarks. (NormalizedLandmarkList)
|
||||||
|
# 33 pose landmarks.
|
||||||
|
output_stream: "POSE_LANDMARKS:pose_landmarks"
|
||||||
|
# 33 pose world landmarks. (LandmarkList)
|
||||||
|
output_stream: "WORLD_LANDMARKS:pose_world_landmarks"
|
||||||
|
# 21 left hand landmarks. (NormalizedLandmarkList)
|
||||||
|
output_stream: "LEFT_HAND_LANDMARKS:left_hand_landmarks"
|
||||||
|
# 21 right hand landmarks. (NormalizedLandmarkList)
|
||||||
|
output_stream: "RIGHT_HAND_LANDMARKS:right_hand_landmarks"
|
||||||
|
# 468 face landmarks. (NormalizedLandmarkList)
|
||||||
|
output_stream: "FACE_LANDMARKS:face_landmarks"
|
||||||
|
|
||||||
|
# Segmentation mask. (ImageFrame in ImageFormat::VEC32F1)
|
||||||
|
output_stream: "SEGMENTATION_MASK:segmentation_mask"
|
||||||
|
|
||||||
|
# Debug outputs
|
||||||
|
output_stream: "POSE_ROI:pose_landmarks_roi"
|
||||||
|
output_stream: "POSE_DETECTION:pose_detection"
|
||||||
|
|
||||||
|
# Predicts pose landmarks.
|
||||||
|
node {
|
||||||
|
calculator: "PoseLandmarkOnnxTensorRT"
|
||||||
|
input_stream: "IMAGE:image"
|
||||||
|
input_side_packet: "MODEL_COMPLEXITY:model_complexity"
|
||||||
|
input_side_packet: "SMOOTH_LANDMARKS:smooth_landmarks"
|
||||||
|
input_side_packet: "ENABLE_SEGMENTATION:enable_segmentation"
|
||||||
|
input_side_packet: "SMOOTH_SEGMENTATION:smooth_segmentation"
|
||||||
|
input_side_packet: "USE_PREV_LANDMARKS:use_prev_landmarks"
|
||||||
|
output_stream: "LANDMARKS:pose_landmarks"
|
||||||
|
output_stream: "WORLD_LANDMARKS:pose_world_landmarks"
|
||||||
|
output_stream: "SEGMENTATION_MASK:segmentation_mask"
|
||||||
|
output_stream: "ROI_FROM_LANDMARKS:pose_landmarks_roi"
|
||||||
|
output_stream: "DETECTION:pose_detection"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Predicts left and right hand landmarks based on the initial pose landmarks.
|
||||||
|
node {
|
||||||
|
calculator: "HandLandmarksLeftAndRightOnnxTensorRT"
|
||||||
|
input_stream: "IMAGE:image"
|
||||||
|
input_stream: "POSE_LANDMARKS:pose_landmarks"
|
||||||
|
output_stream: "LEFT_HAND_LANDMARKS:left_hand_landmarks"
|
||||||
|
output_stream: "RIGHT_HAND_LANDMARKS:right_hand_landmarks"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Extracts face-related pose landmarks.
|
||||||
|
node {
|
||||||
|
calculator: "SplitNormalizedLandmarkListCalculator"
|
||||||
|
input_stream: "pose_landmarks"
|
||||||
|
output_stream: "face_landmarks_from_pose"
|
||||||
|
options: {
|
||||||
|
[mediapipe.SplitVectorCalculatorOptions.ext] {
|
||||||
|
ranges: { begin: 0 end: 11 }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Predicts face landmarks based on the initial pose landmarks.
|
||||||
|
node {
|
||||||
|
calculator: "FaceLandmarksFromPoseOnnxTensorRT"
|
||||||
|
input_stream: "IMAGE:image"
|
||||||
|
input_stream: "FACE_LANDMARKS_FROM_POSE:face_landmarks_from_pose"
|
||||||
|
input_side_packet: "REFINE_LANDMARKS:refine_face_landmarks"
|
||||||
|
output_stream: "FACE_LANDMARKS:face_landmarks"
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user