holistic支持onnxruntime的cuda和tensorrt
This commit is contained in:
		
							parent
							
								
									008ed46ee0
								
							
						
					
					
						commit
						8f7e36b344
					
				|  | @ -32,6 +32,38 @@ cc_binary( | ||||||
|     ], |     ], | ||||||
| ) | ) | ||||||
| 
 | 
 | ||||||
|  | cc_binary( | ||||||
|  |     name = "holistic_tracking_onnx_cuda", | ||||||
|  |     deps = [ | ||||||
|  |         "//mediapipe/examples/desktop:demo_run_graph_main", | ||||||
|  |         "//mediapipe/graphs/holistic_tracking:holistic_tracking_onnx_cuda_graph_deps", | ||||||
|  |     ], | ||||||
|  | ) | ||||||
|  | 
 | ||||||
|  | cc_binary( | ||||||
|  |     name = "holistic_tracking_onnx_cuda_fps", | ||||||
|  |     deps = [ | ||||||
|  |         "//mediapipe/examples/desktop:demo_run_graph_main_fps", | ||||||
|  |         "//mediapipe/graphs/holistic_tracking:holistic_tracking_onnx_cuda_graph_deps", | ||||||
|  |     ], | ||||||
|  | ) | ||||||
|  | 
 | ||||||
|  | cc_binary( | ||||||
|  |     name = "holistic_tracking_onnx_tensorrt", | ||||||
|  |     deps = [ | ||||||
|  |         "//mediapipe/examples/desktop:demo_run_graph_main", | ||||||
|  |         "//mediapipe/graphs/holistic_tracking:holistic_tracking_onnx_tensorrt_graph_deps", | ||||||
|  |     ], | ||||||
|  | ) | ||||||
|  | 
 | ||||||
|  | cc_binary( | ||||||
|  |     name = "holistic_tracking_onnx_tensorrt_fps", | ||||||
|  |     deps = [ | ||||||
|  |         "//mediapipe/examples/desktop:demo_run_graph_main_fps", | ||||||
|  |         "//mediapipe/graphs/holistic_tracking:holistic_tracking_onnx_tensorrt_graph_deps", | ||||||
|  |     ], | ||||||
|  | ) | ||||||
|  | 
 | ||||||
| # Linux only | # Linux only | ||||||
| cc_binary( | cc_binary( | ||||||
|     name = "holistic_tracking_gpu", |     name = "holistic_tracking_gpu", | ||||||
|  |  | ||||||
|  | @ -68,3 +68,27 @@ cc_library( | ||||||
|         "//mediapipe/modules/holistic_landmark:holistic_landmark_cpu", |         "//mediapipe/modules/holistic_landmark:holistic_landmark_cpu", | ||||||
|     ], |     ], | ||||||
| ) | ) | ||||||
|  | 
 | ||||||
|  | cc_library( | ||||||
|  |     name = "holistic_tracking_onnx_cuda_graph_deps", | ||||||
|  |     deps = [ | ||||||
|  |         ":holistic_tracking_to_render_data", | ||||||
|  |         "//mediapipe/calculators/core:constant_side_packet_calculator", | ||||||
|  |         "//mediapipe/calculators/core:flow_limiter_calculator", | ||||||
|  |         "//mediapipe/calculators/image:image_properties_calculator", | ||||||
|  |         "//mediapipe/calculators/util:annotation_overlay_calculator", | ||||||
|  |         "//mediapipe/modules/holistic_landmark:holistic_landmark_onnx_cuda", | ||||||
|  |     ], | ||||||
|  | ) | ||||||
|  | 
 | ||||||
|  | cc_library( | ||||||
|  |     name = "holistic_tracking_onnx_tensorrt_graph_deps", | ||||||
|  |     deps = [ | ||||||
|  |         ":holistic_tracking_to_render_data", | ||||||
|  |         "//mediapipe/calculators/core:constant_side_packet_calculator", | ||||||
|  |         "//mediapipe/calculators/core:flow_limiter_calculator", | ||||||
|  |         "//mediapipe/calculators/image:image_properties_calculator", | ||||||
|  |         "//mediapipe/calculators/util:annotation_overlay_calculator", | ||||||
|  |         "//mediapipe/modules/holistic_landmark:holistic_landmark_onnx_tensorrt", | ||||||
|  |     ], | ||||||
|  | ) | ||||||
|  |  | ||||||
|  | @ -0,0 +1,75 @@ | ||||||
|  | # Tracks and renders pose + hands + face landmarks. | ||||||
|  | 
 | ||||||
|  | # CPU image. (ImageFrame) | ||||||
|  | input_stream: "input_video" | ||||||
|  | 
 | ||||||
|  | # CPU image with rendered results. (ImageFrame) | ||||||
|  | output_stream: "output_video" | ||||||
|  | 
 | ||||||
|  | # Throttles the images flowing downstream for flow control. It passes through | ||||||
|  | # the very first incoming image unaltered, and waits for downstream nodes | ||||||
|  | # (calculators and subgraphs) in the graph to finish their tasks before it | ||||||
|  | # passes through another image. All images that come in while waiting are | ||||||
|  | # dropped, limiting the number of in-flight images in most part of the graph to | ||||||
|  | # 1. This prevents the downstream nodes from queuing up incoming images and data | ||||||
|  | # excessively, which leads to increased latency and memory usage, unwanted in | ||||||
|  | # real-time mobile applications. It also eliminates unnecessarily computation, | ||||||
|  | # e.g., the output produced by a node may get dropped downstream if the | ||||||
|  | # subsequent nodes are still busy processing previous inputs. | ||||||
|  | node { | ||||||
|  |   calculator: "FlowLimiterCalculator" | ||||||
|  |   input_stream: "input_video" | ||||||
|  |   input_stream: "FINISHED:output_video" | ||||||
|  |   input_stream_info: { | ||||||
|  |     tag_index: "FINISHED" | ||||||
|  |     back_edge: true | ||||||
|  |   } | ||||||
|  |   output_stream: "throttled_input_video" | ||||||
|  |   node_options: { | ||||||
|  |     [type.googleapis.com/mediapipe.FlowLimiterCalculatorOptions] { | ||||||
|  |       max_in_flight: 1 | ||||||
|  |       max_in_queue: 1 | ||||||
|  |       # Timeout is disabled (set to 0) as first frame processing can take more | ||||||
|  |       # than 1 second. | ||||||
|  |       in_flight_timeout: 0 | ||||||
|  |     } | ||||||
|  |   } | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | node { | ||||||
|  |   calculator: "HolisticLandmarkOnnxCUDA" | ||||||
|  |   input_stream: "IMAGE:throttled_input_video" | ||||||
|  |   output_stream: "POSE_LANDMARKS:pose_landmarks" | ||||||
|  |   output_stream: "POSE_ROI:pose_roi" | ||||||
|  |   output_stream: "POSE_DETECTION:pose_detection" | ||||||
|  |   output_stream: "FACE_LANDMARKS:face_landmarks" | ||||||
|  |   output_stream: "LEFT_HAND_LANDMARKS:left_hand_landmarks" | ||||||
|  |   output_stream: "RIGHT_HAND_LANDMARKS:right_hand_landmarks" | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | # Gets image size. | ||||||
|  | node { | ||||||
|  |   calculator: "ImagePropertiesCalculator" | ||||||
|  |   input_stream: "IMAGE:throttled_input_video" | ||||||
|  |   output_stream: "SIZE:image_size" | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | # Converts pose, hands and face landmarks to a render data vector. | ||||||
|  | node { | ||||||
|  |   calculator: "HolisticTrackingToRenderData" | ||||||
|  |   input_stream: "IMAGE_SIZE:image_size" | ||||||
|  |   input_stream: "POSE_LANDMARKS:pose_landmarks" | ||||||
|  |   input_stream: "POSE_ROI:pose_roi" | ||||||
|  |   input_stream: "LEFT_HAND_LANDMARKS:left_hand_landmarks" | ||||||
|  |   input_stream: "RIGHT_HAND_LANDMARKS:right_hand_landmarks" | ||||||
|  |   input_stream: "FACE_LANDMARKS:face_landmarks" | ||||||
|  |   output_stream: "RENDER_DATA_VECTOR:render_data_vector" | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | # Draws annotations and overlays them on top of the input images. | ||||||
|  | node { | ||||||
|  |   calculator: "AnnotationOverlayCalculator" | ||||||
|  |   input_stream: "IMAGE:throttled_input_video" | ||||||
|  |   input_stream: "VECTOR:render_data_vector" | ||||||
|  |   output_stream: "IMAGE:output_video" | ||||||
|  | } | ||||||
|  | @ -0,0 +1,75 @@ | ||||||
|  | # Tracks and renders pose + hands + face landmarks. | ||||||
|  | 
 | ||||||
|  | # CPU image. (ImageFrame) | ||||||
|  | input_stream: "input_video" | ||||||
|  | 
 | ||||||
|  | # CPU image with rendered results. (ImageFrame) | ||||||
|  | output_stream: "output_video" | ||||||
|  | 
 | ||||||
|  | # Throttles the images flowing downstream for flow control. It passes through | ||||||
|  | # the very first incoming image unaltered, and waits for downstream nodes | ||||||
|  | # (calculators and subgraphs) in the graph to finish their tasks before it | ||||||
|  | # passes through another image. All images that come in while waiting are | ||||||
|  | # dropped, limiting the number of in-flight images in most part of the graph to | ||||||
|  | # 1. This prevents the downstream nodes from queuing up incoming images and data | ||||||
|  | # excessively, which leads to increased latency and memory usage, unwanted in | ||||||
|  | # real-time mobile applications. It also eliminates unnecessarily computation, | ||||||
|  | # e.g., the output produced by a node may get dropped downstream if the | ||||||
|  | # subsequent nodes are still busy processing previous inputs. | ||||||
|  | node { | ||||||
|  |   calculator: "FlowLimiterCalculator" | ||||||
|  |   input_stream: "input_video" | ||||||
|  |   input_stream: "FINISHED:output_video" | ||||||
|  |   input_stream_info: { | ||||||
|  |     tag_index: "FINISHED" | ||||||
|  |     back_edge: true | ||||||
|  |   } | ||||||
|  |   output_stream: "throttled_input_video" | ||||||
|  |   node_options: { | ||||||
|  |     [type.googleapis.com/mediapipe.FlowLimiterCalculatorOptions] { | ||||||
|  |       max_in_flight: 1 | ||||||
|  |       max_in_queue: 1 | ||||||
|  |       # Timeout is disabled (set to 0) as first frame processing can take more | ||||||
|  |       # than 1 second. | ||||||
|  |       in_flight_timeout: 0 | ||||||
|  |     } | ||||||
|  |   } | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | node { | ||||||
|  |   calculator: "HolisticLandmarkOnnxTensorRT" | ||||||
|  |   input_stream: "IMAGE:throttled_input_video" | ||||||
|  |   output_stream: "POSE_LANDMARKS:pose_landmarks" | ||||||
|  |   output_stream: "POSE_ROI:pose_roi" | ||||||
|  |   output_stream: "POSE_DETECTION:pose_detection" | ||||||
|  |   output_stream: "FACE_LANDMARKS:face_landmarks" | ||||||
|  |   output_stream: "LEFT_HAND_LANDMARKS:left_hand_landmarks" | ||||||
|  |   output_stream: "RIGHT_HAND_LANDMARKS:right_hand_landmarks" | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | # Gets image size. | ||||||
|  | node { | ||||||
|  |   calculator: "ImagePropertiesCalculator" | ||||||
|  |   input_stream: "IMAGE:throttled_input_video" | ||||||
|  |   output_stream: "SIZE:image_size" | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | # Converts pose, hands and face landmarks to a render data vector. | ||||||
|  | node { | ||||||
|  |   calculator: "HolisticTrackingToRenderData" | ||||||
|  |   input_stream: "IMAGE_SIZE:image_size" | ||||||
|  |   input_stream: "POSE_LANDMARKS:pose_landmarks" | ||||||
|  |   input_stream: "POSE_ROI:pose_roi" | ||||||
|  |   input_stream: "LEFT_HAND_LANDMARKS:left_hand_landmarks" | ||||||
|  |   input_stream: "RIGHT_HAND_LANDMARKS:right_hand_landmarks" | ||||||
|  |   input_stream: "FACE_LANDMARKS:face_landmarks" | ||||||
|  |   output_stream: "RENDER_DATA_VECTOR:render_data_vector" | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | # Draws annotations and overlays them on top of the input images. | ||||||
|  | node { | ||||||
|  |   calculator: "AnnotationOverlayCalculator" | ||||||
|  |   input_stream: "IMAGE:throttled_input_video" | ||||||
|  |   input_stream: "VECTOR:render_data_vector" | ||||||
|  |   output_stream: "IMAGE:output_video" | ||||||
|  | } | ||||||
|  | @ -59,6 +59,46 @@ mediapipe_simple_subgraph( | ||||||
|     ], |     ], | ||||||
| ) | ) | ||||||
| 
 | 
 | ||||||
|  | mediapipe_simple_subgraph( | ||||||
|  |     name = "hand_landmark_onnx_cuda", | ||||||
|  |     graph = "hand_landmark_onnx_cuda.pbtxt", | ||||||
|  |     register_as = "HandLandmarkOnnxCUDA", | ||||||
|  |     deps = [ | ||||||
|  |         ":hand_landmark_model_loader", | ||||||
|  |         "//mediapipe/calculators/core:gate_calculator", | ||||||
|  |         "//mediapipe/calculators/core:split_vector_calculator", | ||||||
|  |         "//mediapipe/calculators/tensor:image_to_tensor_calculator", | ||||||
|  |         "//mediapipe/calculators/tensor:inference_calculator_onnx_cuda", | ||||||
|  |         "//mediapipe/calculators/tensor:tensors_to_classification_calculator", | ||||||
|  |         "//mediapipe/calculators/tensor:tensors_to_floats_calculator", | ||||||
|  |         "//mediapipe/calculators/tensor:tensors_to_landmarks_calculator", | ||||||
|  |         "//mediapipe/calculators/util:landmark_letterbox_removal_calculator", | ||||||
|  |         "//mediapipe/calculators/util:landmark_projection_calculator", | ||||||
|  |         "//mediapipe/calculators/util:thresholding_calculator", | ||||||
|  |         "//mediapipe/calculators/util:world_landmark_projection_calculator", | ||||||
|  |     ], | ||||||
|  | ) | ||||||
|  | 
 | ||||||
|  | mediapipe_simple_subgraph( | ||||||
|  |     name = "hand_landmark_onnx_tensorrt", | ||||||
|  |     graph = "hand_landmark_onnx_tensorrt.pbtxt", | ||||||
|  |     register_as = "HandLandmarkOnnxTensorRT", | ||||||
|  |     deps = [ | ||||||
|  |         ":hand_landmark_model_loader", | ||||||
|  |         "//mediapipe/calculators/core:gate_calculator", | ||||||
|  |         "//mediapipe/calculators/core:split_vector_calculator", | ||||||
|  |         "//mediapipe/calculators/tensor:image_to_tensor_calculator", | ||||||
|  |         "//mediapipe/calculators/tensor:inference_calculator_onnx_tensorrt", | ||||||
|  |         "//mediapipe/calculators/tensor:tensors_to_classification_calculator", | ||||||
|  |         "//mediapipe/calculators/tensor:tensors_to_floats_calculator", | ||||||
|  |         "//mediapipe/calculators/tensor:tensors_to_landmarks_calculator", | ||||||
|  |         "//mediapipe/calculators/util:landmark_letterbox_removal_calculator", | ||||||
|  |         "//mediapipe/calculators/util:landmark_projection_calculator", | ||||||
|  |         "//mediapipe/calculators/util:thresholding_calculator", | ||||||
|  |         "//mediapipe/calculators/util:world_landmark_projection_calculator", | ||||||
|  |     ], | ||||||
|  | ) | ||||||
|  | 
 | ||||||
| mediapipe_simple_subgraph( | mediapipe_simple_subgraph( | ||||||
|     name = "hand_landmark_gpu", |     name = "hand_landmark_gpu", | ||||||
|     graph = "hand_landmark_gpu.pbtxt", |     graph = "hand_landmark_gpu.pbtxt", | ||||||
|  |  | ||||||
							
								
								
									
										205
									
								
								mediapipe/modules/hand_landmark/hand_landmark_onnx_cuda.pbtxt
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										205
									
								
								mediapipe/modules/hand_landmark/hand_landmark_onnx_cuda.pbtxt
									
									
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,205 @@ | ||||||
|  | # MediaPipe graph to detect/predict hand landmarks on CPU. | ||||||
|  | 
 | ||||||
|  | type: "HandLandmarkOnnxCUDA" | ||||||
|  | 
 | ||||||
|  | # CPU image. (ImageFrame) | ||||||
|  | input_stream: "IMAGE:image" | ||||||
|  | # ROI (region of interest) within the given image where a palm/hand is located. | ||||||
|  | # (NormalizedRect) | ||||||
|  | input_stream: "ROI:hand_rect" | ||||||
|  | 
 | ||||||
|  | # 21 hand landmarks within the given ROI. (NormalizedLandmarkList) | ||||||
|  | # NOTE: if a hand is not present within the given ROI, for this particular | ||||||
|  | # timestamp there will not be an output packet in the LANDMARKS stream. However, | ||||||
|  | # the MediaPipe framework will internally inform the downstream calculators of | ||||||
|  | # the absence of this packet so that they don't wait for it unnecessarily. | ||||||
|  | output_stream: "LANDMARKS:hand_landmarks" | ||||||
|  | 
 | ||||||
|  | # Hand world landmarks within the given ROI. (LandmarkList) | ||||||
|  | # World landmarks are real-world 3D coordinates in meters with the origin in the | ||||||
|  | # center of the given ROI. | ||||||
|  | # | ||||||
|  | # WORLD_LANDMARKS shares the same landmark topology as LANDMARKS. However, | ||||||
|  | # LANDMARKS provides coordinates (in pixels) of a 3D object projected onto the | ||||||
|  | # 2D image surface, while WORLD_LANDMARKS provides coordinates (in meters) of | ||||||
|  | # the 3D object itself. | ||||||
|  | output_stream: "WORLD_LANDMARKS:hand_world_landmarks" | ||||||
|  | 
 | ||||||
|  | # Handedness of the detected hand (i.e. is hand left or right). | ||||||
|  | # (ClassificationList) | ||||||
|  | output_stream: "HANDEDNESS:handedness" | ||||||
|  | 
 | ||||||
|  | # Transforms a region of image into a 224x224 tensor while keeping the aspect | ||||||
|  | # ratio, and therefore may result in potential letterboxing. | ||||||
|  | node { | ||||||
|  |   calculator: "ImageToTensorCalculator" | ||||||
|  |   input_stream: "IMAGE:image" | ||||||
|  |   input_stream: "NORM_RECT:hand_rect" | ||||||
|  |   output_stream: "TENSORS:input_tensor" | ||||||
|  |   output_stream: "LETTERBOX_PADDING:letterbox_padding" | ||||||
|  |   options: { | ||||||
|  |     [mediapipe.ImageToTensorCalculatorOptions.ext] { | ||||||
|  |       output_tensor_width: 224 | ||||||
|  |       output_tensor_height: 224 | ||||||
|  |       keep_aspect_ratio: true | ||||||
|  |       output_tensor_float_range { | ||||||
|  |         min: 0.0 | ||||||
|  |         max: 1.0 | ||||||
|  |       } | ||||||
|  |     } | ||||||
|  |   } | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | # Runs a TensorFlow Lite model on CPU that takes an image tensor and outputs a | ||||||
|  | # vector of tensors representing, for instance, detection boxes/keypoints and | ||||||
|  | # scores. | ||||||
|  | node { | ||||||
|  |   calculator: "InferenceCalculator" | ||||||
|  |   input_stream: "TENSORS:input_tensor" | ||||||
|  |   output_stream: "TENSORS:output_tensors" | ||||||
|  |   options: { | ||||||
|  |     [mediapipe.InferenceCalculatorOptions.ext] { | ||||||
|  |       model_path: "mediapipe/modules/hand_landmark/hand_landmark_lite.onnx" | ||||||
|  |       delegate { cuda {} } | ||||||
|  |     } | ||||||
|  |   } | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | # Splits a vector of tensors to multiple vectors according to the ranges | ||||||
|  | # specified in option. | ||||||
|  | node { | ||||||
|  |   calculator: "SplitTensorVectorCalculator" | ||||||
|  |   input_stream: "output_tensors" | ||||||
|  |   output_stream: "landmark_tensors" | ||||||
|  |   output_stream: "hand_flag_tensor" | ||||||
|  |   output_stream: "handedness_tensor" | ||||||
|  |   output_stream: "world_landmark_tensor" | ||||||
|  |   options: { | ||||||
|  |     [mediapipe.SplitVectorCalculatorOptions.ext] { | ||||||
|  |       ranges: { begin: 0 end: 1 } | ||||||
|  |       ranges: { begin: 1 end: 2 } | ||||||
|  |       ranges: { begin: 2 end: 3 } | ||||||
|  |       ranges: { begin: 3 end: 4 } | ||||||
|  |     } | ||||||
|  |   } | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | # Converts the hand-flag tensor into a float that represents the confidence | ||||||
|  | # score of hand presence. | ||||||
|  | node { | ||||||
|  |   calculator: "TensorsToFloatsCalculator" | ||||||
|  |   input_stream: "TENSORS:hand_flag_tensor" | ||||||
|  |   output_stream: "FLOAT:hand_presence_score" | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | # Applies a threshold to the confidence score to determine whether a hand is | ||||||
|  | # present. | ||||||
|  | node { | ||||||
|  |   calculator: "ThresholdingCalculator" | ||||||
|  |   input_stream: "FLOAT:hand_presence_score" | ||||||
|  |   output_stream: "FLAG:hand_presence" | ||||||
|  |   options: { | ||||||
|  |     [mediapipe.ThresholdingCalculatorOptions.ext] { | ||||||
|  |       threshold: 0.5 | ||||||
|  |     } | ||||||
|  |   } | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | # Drops handedness tensor if hand is not present. | ||||||
|  | node { | ||||||
|  |   calculator: "GateCalculator" | ||||||
|  |   input_stream: "handedness_tensor" | ||||||
|  |   input_stream: "ALLOW:hand_presence" | ||||||
|  |   output_stream: "ensured_handedness_tensor" | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | # Converts the handedness tensor into a float that represents the classification | ||||||
|  | # score of handedness. | ||||||
|  | node { | ||||||
|  |   calculator: "TensorsToClassificationCalculator" | ||||||
|  |   input_stream: "TENSORS:ensured_handedness_tensor" | ||||||
|  |   output_stream: "CLASSIFICATIONS:handedness" | ||||||
|  |   options: { | ||||||
|  |     [mediapipe.TensorsToClassificationCalculatorOptions.ext] { | ||||||
|  |       top_k: 1 | ||||||
|  |       label_map_path: "mediapipe/modules/hand_landmark/handedness.txt" | ||||||
|  |       binary_classification: true | ||||||
|  |     } | ||||||
|  |   } | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | # Drops landmarks tensors if hand is not present. | ||||||
|  | node { | ||||||
|  |   calculator: "GateCalculator" | ||||||
|  |   input_stream: "landmark_tensors" | ||||||
|  |   input_stream: "ALLOW:hand_presence" | ||||||
|  |   output_stream: "ensured_landmark_tensors" | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | # Decodes the landmark tensors into a list of landmarks, where the landmark | ||||||
|  | # coordinates are normalized by the size of the input image to the model. | ||||||
|  | node { | ||||||
|  |   calculator: "TensorsToLandmarksCalculator" | ||||||
|  |   input_stream: "TENSORS:ensured_landmark_tensors" | ||||||
|  |   output_stream: "NORM_LANDMARKS:landmarks" | ||||||
|  |   options: { | ||||||
|  |     [mediapipe.TensorsToLandmarksCalculatorOptions.ext] { | ||||||
|  |       num_landmarks: 21 | ||||||
|  |       input_image_width: 224 | ||||||
|  |       input_image_height: 224 | ||||||
|  |       # The additional scaling factor is used to account for the Z coordinate | ||||||
|  |       # distribution in the training data. | ||||||
|  |       normalize_z: 0.4 | ||||||
|  |     } | ||||||
|  |   } | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | # Adjusts landmarks (already normalized to [0.f, 1.f]) on the letterboxed hand | ||||||
|  | # image (after image transformation with the FIT scale mode) to the | ||||||
|  | # corresponding locations on the same image with the letterbox removed (hand | ||||||
|  | # image before image transformation). | ||||||
|  | node { | ||||||
|  |   calculator: "LandmarkLetterboxRemovalCalculator" | ||||||
|  |   input_stream: "LANDMARKS:landmarks" | ||||||
|  |   input_stream: "LETTERBOX_PADDING:letterbox_padding" | ||||||
|  |   output_stream: "LANDMARKS:scaled_landmarks" | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | # Projects the landmarks from the cropped hand image to the corresponding | ||||||
|  | # locations on the full image before cropping (input to the graph). | ||||||
|  | node { | ||||||
|  |   calculator: "LandmarkProjectionCalculator" | ||||||
|  |   input_stream: "NORM_LANDMARKS:scaled_landmarks" | ||||||
|  |   input_stream: "NORM_RECT:hand_rect" | ||||||
|  |   output_stream: "NORM_LANDMARKS:hand_landmarks" | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | # Drops world landmarks tensors if hand is not present. | ||||||
|  | node { | ||||||
|  |   calculator: "GateCalculator" | ||||||
|  |   input_stream: "world_landmark_tensor" | ||||||
|  |   input_stream: "ALLOW:hand_presence" | ||||||
|  |   output_stream: "ensured_world_landmark_tensor" | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | # Decodes the landmark tensors into a list of landmarks, where the landmark | ||||||
|  | # coordinates are normalized by the size of the input image to the model. | ||||||
|  | node { | ||||||
|  |   calculator: "TensorsToLandmarksCalculator" | ||||||
|  |   input_stream: "TENSORS:ensured_world_landmark_tensor" | ||||||
|  |   output_stream: "LANDMARKS:unprojected_world_landmarks" | ||||||
|  |   options: { | ||||||
|  |     [mediapipe.TensorsToLandmarksCalculatorOptions.ext] { | ||||||
|  |       num_landmarks: 21 | ||||||
|  |     } | ||||||
|  |   } | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | # Projects the world landmarks from the cropped hand image to the corresponding | ||||||
|  | # locations on the full image before cropping (input to the graph). | ||||||
|  | node { | ||||||
|  |   calculator: "WorldLandmarkProjectionCalculator" | ||||||
|  |   input_stream: "LANDMARKS:unprojected_world_landmarks" | ||||||
|  |   input_stream: "NORM_RECT:hand_rect" | ||||||
|  |   output_stream: "LANDMARKS:hand_world_landmarks" | ||||||
|  | } | ||||||
|  | @ -0,0 +1,205 @@ | ||||||
|  | # MediaPipe graph to detect/predict hand landmarks on CPU. | ||||||
|  | 
 | ||||||
|  | type: "HandLandmarkOnnxTensorRT" | ||||||
|  | 
 | ||||||
|  | # CPU image. (ImageFrame) | ||||||
|  | input_stream: "IMAGE:image" | ||||||
|  | # ROI (region of interest) within the given image where a palm/hand is located. | ||||||
|  | # (NormalizedRect) | ||||||
|  | input_stream: "ROI:hand_rect" | ||||||
|  | 
 | ||||||
|  | # 21 hand landmarks within the given ROI. (NormalizedLandmarkList) | ||||||
|  | # NOTE: if a hand is not present within the given ROI, for this particular | ||||||
|  | # timestamp there will not be an output packet in the LANDMARKS stream. However, | ||||||
|  | # the MediaPipe framework will internally inform the downstream calculators of | ||||||
|  | # the absence of this packet so that they don't wait for it unnecessarily. | ||||||
|  | output_stream: "LANDMARKS:hand_landmarks" | ||||||
|  | 
 | ||||||
|  | # Hand world landmarks within the given ROI. (LandmarkList) | ||||||
|  | # World landmarks are real-world 3D coordinates in meters with the origin in the | ||||||
|  | # center of the given ROI. | ||||||
|  | # | ||||||
|  | # WORLD_LANDMARKS shares the same landmark topology as LANDMARKS. However, | ||||||
|  | # LANDMARKS provides coordinates (in pixels) of a 3D object projected onto the | ||||||
|  | # 2D image surface, while WORLD_LANDMARKS provides coordinates (in meters) of | ||||||
|  | # the 3D object itself. | ||||||
|  | output_stream: "WORLD_LANDMARKS:hand_world_landmarks" | ||||||
|  | 
 | ||||||
|  | # Handedness of the detected hand (i.e. is hand left or right). | ||||||
|  | # (ClassificationList) | ||||||
|  | output_stream: "HANDEDNESS:handedness" | ||||||
|  | 
 | ||||||
|  | # Transforms a region of image into a 224x224 tensor while keeping the aspect | ||||||
|  | # ratio, and therefore may result in potential letterboxing. | ||||||
|  | node { | ||||||
|  |   calculator: "ImageToTensorCalculator" | ||||||
|  |   input_stream: "IMAGE:image" | ||||||
|  |   input_stream: "NORM_RECT:hand_rect" | ||||||
|  |   output_stream: "TENSORS:input_tensor" | ||||||
|  |   output_stream: "LETTERBOX_PADDING:letterbox_padding" | ||||||
|  |   options: { | ||||||
|  |     [mediapipe.ImageToTensorCalculatorOptions.ext] { | ||||||
|  |       output_tensor_width: 224 | ||||||
|  |       output_tensor_height: 224 | ||||||
|  |       keep_aspect_ratio: true | ||||||
|  |       output_tensor_float_range { | ||||||
|  |         min: 0.0 | ||||||
|  |         max: 1.0 | ||||||
|  |       } | ||||||
|  |     } | ||||||
|  |   } | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | # Runs a TensorFlow Lite model on CPU that takes an image tensor and outputs a | ||||||
|  | # vector of tensors representing, for instance, detection boxes/keypoints and | ||||||
|  | # scores. | ||||||
|  | node { | ||||||
|  |   calculator: "InferenceCalculator" | ||||||
|  |   input_stream: "TENSORS:input_tensor" | ||||||
|  |   output_stream: "TENSORS:output_tensors" | ||||||
|  |   options: { | ||||||
|  |     [mediapipe.InferenceCalculatorOptions.ext] { | ||||||
|  |       model_path: "mediapipe/modules/hand_landmark/hand_landmark_lite.onnx" | ||||||
|  |       delegate { tensorrt {} } | ||||||
|  |     } | ||||||
|  |   } | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | # Splits a vector of tensors to multiple vectors according to the ranges | ||||||
|  | # specified in option. | ||||||
|  | node { | ||||||
|  |   calculator: "SplitTensorVectorCalculator" | ||||||
|  |   input_stream: "output_tensors" | ||||||
|  |   output_stream: "landmark_tensors" | ||||||
|  |   output_stream: "hand_flag_tensor" | ||||||
|  |   output_stream: "handedness_tensor" | ||||||
|  |   output_stream: "world_landmark_tensor" | ||||||
|  |   options: { | ||||||
|  |     [mediapipe.SplitVectorCalculatorOptions.ext] { | ||||||
|  |       ranges: { begin: 0 end: 1 } | ||||||
|  |       ranges: { begin: 1 end: 2 } | ||||||
|  |       ranges: { begin: 2 end: 3 } | ||||||
|  |       ranges: { begin: 3 end: 4 } | ||||||
|  |     } | ||||||
|  |   } | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | # Converts the hand-flag tensor into a float that represents the confidence | ||||||
|  | # score of hand presence. | ||||||
|  | node { | ||||||
|  |   calculator: "TensorsToFloatsCalculator" | ||||||
|  |   input_stream: "TENSORS:hand_flag_tensor" | ||||||
|  |   output_stream: "FLOAT:hand_presence_score" | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | # Applies a threshold to the confidence score to determine whether a hand is | ||||||
|  | # present. | ||||||
|  | node { | ||||||
|  |   calculator: "ThresholdingCalculator" | ||||||
|  |   input_stream: "FLOAT:hand_presence_score" | ||||||
|  |   output_stream: "FLAG:hand_presence" | ||||||
|  |   options: { | ||||||
|  |     [mediapipe.ThresholdingCalculatorOptions.ext] { | ||||||
|  |       threshold: 0.5 | ||||||
|  |     } | ||||||
|  |   } | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | # Drops handedness tensor if hand is not present. | ||||||
|  | node { | ||||||
|  |   calculator: "GateCalculator" | ||||||
|  |   input_stream: "handedness_tensor" | ||||||
|  |   input_stream: "ALLOW:hand_presence" | ||||||
|  |   output_stream: "ensured_handedness_tensor" | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | # Converts the handedness tensor into a float that represents the classification | ||||||
|  | # score of handedness. | ||||||
|  | node { | ||||||
|  |   calculator: "TensorsToClassificationCalculator" | ||||||
|  |   input_stream: "TENSORS:ensured_handedness_tensor" | ||||||
|  |   output_stream: "CLASSIFICATIONS:handedness" | ||||||
|  |   options: { | ||||||
|  |     [mediapipe.TensorsToClassificationCalculatorOptions.ext] { | ||||||
|  |       top_k: 1 | ||||||
|  |       label_map_path: "mediapipe/modules/hand_landmark/handedness.txt" | ||||||
|  |       binary_classification: true | ||||||
|  |     } | ||||||
|  |   } | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | # Drops landmarks tensors if hand is not present. | ||||||
|  | node { | ||||||
|  |   calculator: "GateCalculator" | ||||||
|  |   input_stream: "landmark_tensors" | ||||||
|  |   input_stream: "ALLOW:hand_presence" | ||||||
|  |   output_stream: "ensured_landmark_tensors" | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | # Decodes the landmark tensors into a list of landmarks, where the landmark | ||||||
|  | # coordinates are normalized by the size of the input image to the model. | ||||||
|  | node { | ||||||
|  |   calculator: "TensorsToLandmarksCalculator" | ||||||
|  |   input_stream: "TENSORS:ensured_landmark_tensors" | ||||||
|  |   output_stream: "NORM_LANDMARKS:landmarks" | ||||||
|  |   options: { | ||||||
|  |     [mediapipe.TensorsToLandmarksCalculatorOptions.ext] { | ||||||
|  |       num_landmarks: 21 | ||||||
|  |       input_image_width: 224 | ||||||
|  |       input_image_height: 224 | ||||||
|  |       # The additional scaling factor is used to account for the Z coordinate | ||||||
|  |       # distribution in the training data. | ||||||
|  |       normalize_z: 0.4 | ||||||
|  |     } | ||||||
|  |   } | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | # Adjusts landmarks (already normalized to [0.f, 1.f]) on the letterboxed hand | ||||||
|  | # image (after image transformation with the FIT scale mode) to the | ||||||
|  | # corresponding locations on the same image with the letterbox removed (hand | ||||||
|  | # image before image transformation). | ||||||
|  | node { | ||||||
|  |   calculator: "LandmarkLetterboxRemovalCalculator" | ||||||
|  |   input_stream: "LANDMARKS:landmarks" | ||||||
|  |   input_stream: "LETTERBOX_PADDING:letterbox_padding" | ||||||
|  |   output_stream: "LANDMARKS:scaled_landmarks" | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | # Projects the landmarks from the cropped hand image to the corresponding | ||||||
|  | # locations on the full image before cropping (input to the graph). | ||||||
|  | node { | ||||||
|  |   calculator: "LandmarkProjectionCalculator" | ||||||
|  |   input_stream: "NORM_LANDMARKS:scaled_landmarks" | ||||||
|  |   input_stream: "NORM_RECT:hand_rect" | ||||||
|  |   output_stream: "NORM_LANDMARKS:hand_landmarks" | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | # Drops world landmarks tensors if hand is not present. | ||||||
|  | node { | ||||||
|  |   calculator: "GateCalculator" | ||||||
|  |   input_stream: "world_landmark_tensor" | ||||||
|  |   input_stream: "ALLOW:hand_presence" | ||||||
|  |   output_stream: "ensured_world_landmark_tensor" | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | # Decodes the landmark tensors into a list of landmarks, where the landmark | ||||||
|  | # coordinates are normalized by the size of the input image to the model. | ||||||
|  | node { | ||||||
|  |   calculator: "TensorsToLandmarksCalculator" | ||||||
|  |   input_stream: "TENSORS:ensured_world_landmark_tensor" | ||||||
|  |   output_stream: "LANDMARKS:unprojected_world_landmarks" | ||||||
|  |   options: { | ||||||
|  |     [mediapipe.TensorsToLandmarksCalculatorOptions.ext] { | ||||||
|  |       num_landmarks: 21 | ||||||
|  |     } | ||||||
|  |   } | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | # Projects the world landmarks from the cropped hand image to the corresponding | ||||||
|  | # locations on the full image before cropping (input to the graph). | ||||||
|  | node { | ||||||
|  |   calculator: "WorldLandmarkProjectionCalculator" | ||||||
|  |   input_stream: "LANDMARKS:unprojected_world_landmarks" | ||||||
|  |   input_stream: "NORM_RECT:hand_rect" | ||||||
|  |   output_stream: "LANDMARKS:hand_world_landmarks" | ||||||
|  | } | ||||||
|  | @ -53,6 +53,36 @@ mediapipe_simple_subgraph( | ||||||
|     ], |     ], | ||||||
| ) | ) | ||||||
| 
 | 
 | ||||||
|  | mediapipe_simple_subgraph( | ||||||
|  |     name = "face_landmarks_from_pose_onnx_cuda", | ||||||
|  |     graph = "face_landmarks_from_pose_onnx_cuda.pbtxt", | ||||||
|  |     register_as = "FaceLandmarksFromPoseOnnxCUDA", | ||||||
|  |     deps = [ | ||||||
|  |         ":face_detection_front_detections_to_roi", | ||||||
|  |         ":face_landmarks_from_pose_to_recrop_roi", | ||||||
|  |         ":face_tracking", | ||||||
|  |         "//mediapipe/calculators/core:split_proto_list_calculator", | ||||||
|  |         "//mediapipe/calculators/image:image_properties_calculator", | ||||||
|  |         "//mediapipe/modules/face_detection:face_detection_short_range_by_roi_onnx_cuda", | ||||||
|  |         "//mediapipe/modules/face_landmark:face_landmark_onnx_cuda", | ||||||
|  |     ], | ||||||
|  | ) | ||||||
|  | 
 | ||||||
|  | mediapipe_simple_subgraph( | ||||||
|  |     name = "face_landmarks_from_pose_onnx_tensorrt", | ||||||
|  |     graph = "face_landmarks_from_pose_onnx_tensorrt.pbtxt", | ||||||
|  |     register_as = "FaceLandmarksFromPoseOnnxTensorRT", | ||||||
|  |     deps = [ | ||||||
|  |         ":face_detection_front_detections_to_roi", | ||||||
|  |         ":face_landmarks_from_pose_to_recrop_roi", | ||||||
|  |         ":face_tracking", | ||||||
|  |         "//mediapipe/calculators/core:split_proto_list_calculator", | ||||||
|  |         "//mediapipe/calculators/image:image_properties_calculator", | ||||||
|  |         "//mediapipe/modules/face_detection:face_detection_short_range_by_roi_onnx_tensorrt", | ||||||
|  |         "//mediapipe/modules/face_landmark:face_landmark_onnx_tensorrt", | ||||||
|  |     ], | ||||||
|  | ) | ||||||
|  | 
 | ||||||
| mediapipe_simple_subgraph( | mediapipe_simple_subgraph( | ||||||
|     name = "face_landmarks_to_roi", |     name = "face_landmarks_to_roi", | ||||||
|     graph = "face_landmarks_to_roi.pbtxt", |     graph = "face_landmarks_to_roi.pbtxt", | ||||||
|  | @ -126,6 +156,36 @@ mediapipe_simple_subgraph( | ||||||
|     ], |     ], | ||||||
| ) | ) | ||||||
| 
 | 
 | ||||||
|  | mediapipe_simple_subgraph( | ||||||
|  |     name = "hand_landmarks_from_pose_onnx_cuda", | ||||||
|  |     graph = "hand_landmarks_from_pose_onnx_cuda.pbtxt", | ||||||
|  |     register_as = "HandLandmarksFromPoseOnnxCUDA", | ||||||
|  |     deps = [ | ||||||
|  |         ":hand_landmarks_from_pose_to_recrop_roi", | ||||||
|  |         ":hand_recrop_by_roi_onnx_cuda", | ||||||
|  |         ":hand_tracking", | ||||||
|  |         ":hand_visibility_from_hand_landmarks_from_pose", | ||||||
|  |         "//mediapipe/calculators/core:gate_calculator", | ||||||
|  |         "//mediapipe/calculators/image:image_properties_calculator", | ||||||
|  |         "//mediapipe/modules/hand_landmark:hand_landmark_onnx_cuda", | ||||||
|  |     ], | ||||||
|  | ) | ||||||
|  | 
 | ||||||
|  | mediapipe_simple_subgraph( | ||||||
|  |     name = "hand_landmarks_from_pose_onnx_tensorrt", | ||||||
|  |     graph = "hand_landmarks_from_pose_onnx_tensorrt.pbtxt", | ||||||
|  |     register_as = "HandLandmarksFromPoseOnnxTensorRT", | ||||||
|  |     deps = [ | ||||||
|  |         ":hand_landmarks_from_pose_to_recrop_roi", | ||||||
|  |         ":hand_recrop_by_roi_onnx_tensorrt", | ||||||
|  |         ":hand_tracking", | ||||||
|  |         ":hand_visibility_from_hand_landmarks_from_pose", | ||||||
|  |         "//mediapipe/calculators/core:gate_calculator", | ||||||
|  |         "//mediapipe/calculators/image:image_properties_calculator", | ||||||
|  |         "//mediapipe/modules/hand_landmark:hand_landmark_onnx_tensorrt", | ||||||
|  |     ], | ||||||
|  | ) | ||||||
|  | 
 | ||||||
| mediapipe_simple_subgraph( | mediapipe_simple_subgraph( | ||||||
|     name = "hand_landmarks_to_roi", |     name = "hand_landmarks_to_roi", | ||||||
|     graph = "hand_landmarks_to_roi.pbtxt", |     graph = "hand_landmarks_to_roi.pbtxt", | ||||||
|  | @ -170,6 +230,40 @@ mediapipe_simple_subgraph( | ||||||
|     ], |     ], | ||||||
| ) | ) | ||||||
| 
 | 
 | ||||||
|  | mediapipe_simple_subgraph( | ||||||
|  |     name = "hand_recrop_by_roi_onnx_cuda", | ||||||
|  |     graph = "hand_recrop_by_roi_onnx_cuda.pbtxt", | ||||||
|  |     register_as = "HandRecropByRoiOnnxCUDA", | ||||||
|  |     deps = [ | ||||||
|  |         "//mediapipe/calculators/image:image_properties_calculator", | ||||||
|  |         "//mediapipe/calculators/tensor:image_to_tensor_calculator", | ||||||
|  |         "//mediapipe/calculators/tensor:inference_calculator_onnx_cuda", | ||||||
|  |         "//mediapipe/calculators/tensor:tensors_to_landmarks_calculator", | ||||||
|  |         "//mediapipe/calculators/util:alignment_points_to_rects_calculator", | ||||||
|  |         "//mediapipe/calculators/util:landmark_letterbox_removal_calculator", | ||||||
|  |         "//mediapipe/calculators/util:landmark_projection_calculator", | ||||||
|  |         "//mediapipe/calculators/util:landmarks_to_detection_calculator", | ||||||
|  |         "//mediapipe/calculators/util:rect_transformation_calculator", | ||||||
|  |     ], | ||||||
|  | ) | ||||||
|  | 
 | ||||||
|  | mediapipe_simple_subgraph( | ||||||
|  |     name = "hand_recrop_by_roi_onnx_tensorrt", | ||||||
|  |     graph = "hand_recrop_by_roi_onnx_tensorrt.pbtxt", | ||||||
|  |     register_as = "HandRecropByRoiOnnxTensorRT", | ||||||
|  |     deps = [ | ||||||
|  |         "//mediapipe/calculators/image:image_properties_calculator", | ||||||
|  |         "//mediapipe/calculators/tensor:image_to_tensor_calculator", | ||||||
|  |         "//mediapipe/calculators/tensor:inference_calculator_onnx_tensorrt", | ||||||
|  |         "//mediapipe/calculators/tensor:tensors_to_landmarks_calculator", | ||||||
|  |         "//mediapipe/calculators/util:alignment_points_to_rects_calculator", | ||||||
|  |         "//mediapipe/calculators/util:landmark_letterbox_removal_calculator", | ||||||
|  |         "//mediapipe/calculators/util:landmark_projection_calculator", | ||||||
|  |         "//mediapipe/calculators/util:landmarks_to_detection_calculator", | ||||||
|  |         "//mediapipe/calculators/util:rect_transformation_calculator", | ||||||
|  |     ], | ||||||
|  | ) | ||||||
|  | 
 | ||||||
| mediapipe_simple_subgraph( | mediapipe_simple_subgraph( | ||||||
|     name = "hand_tracking", |     name = "hand_tracking", | ||||||
|     graph = "hand_tracking.pbtxt", |     graph = "hand_tracking.pbtxt", | ||||||
|  | @ -215,6 +309,26 @@ mediapipe_simple_subgraph( | ||||||
|     ], |     ], | ||||||
| ) | ) | ||||||
| 
 | 
 | ||||||
|  | mediapipe_simple_subgraph( | ||||||
|  |     name = "hand_landmarks_left_and_right_onnx_cuda", | ||||||
|  |     graph = "hand_landmarks_left_and_right_onnx_cuda.pbtxt", | ||||||
|  |     register_as = "HandLandmarksLeftAndRightOnnxCUDA", | ||||||
|  |     deps = [ | ||||||
|  |         ":hand_landmarks_from_pose_onnx_cuda", | ||||||
|  |         "//mediapipe/calculators/core:split_proto_list_calculator", | ||||||
|  |     ], | ||||||
|  | ) | ||||||
|  | 
 | ||||||
|  | mediapipe_simple_subgraph( | ||||||
|  |     name = "hand_landmarks_left_and_right_onnx_tensorrt", | ||||||
|  |     graph = "hand_landmarks_left_and_right_onnx_tensorrt.pbtxt", | ||||||
|  |     register_as = "HandLandmarksLeftAndRightOnnxTensorRT", | ||||||
|  |     deps = [ | ||||||
|  |         ":hand_landmarks_from_pose_onnx_tensorrt", | ||||||
|  |         "//mediapipe/calculators/core:split_proto_list_calculator", | ||||||
|  |     ], | ||||||
|  | ) | ||||||
|  | 
 | ||||||
| mediapipe_simple_subgraph( | mediapipe_simple_subgraph( | ||||||
|     name = "hand_landmarks_from_pose_to_recrop_roi", |     name = "hand_landmarks_from_pose_to_recrop_roi", | ||||||
|     graph = "hand_landmarks_from_pose_to_recrop_roi.pbtxt", |     graph = "hand_landmarks_from_pose_to_recrop_roi.pbtxt", | ||||||
|  | @ -264,3 +378,31 @@ mediapipe_simple_subgraph( | ||||||
|         "//mediapipe/modules/pose_landmark:pose_landmark_cpu", |         "//mediapipe/modules/pose_landmark:pose_landmark_cpu", | ||||||
|     ], |     ], | ||||||
| ) | ) | ||||||
|  | 
 | ||||||
|  | mediapipe_simple_subgraph( | ||||||
|  |     name = "holistic_landmark_onnx_cuda", | ||||||
|  |     graph = "holistic_landmark_onnx_cuda.pbtxt", | ||||||
|  |     register_as = "HolisticLandmarkOnnxCUDA", | ||||||
|  |     visibility = ["//visibility:public"], | ||||||
|  |     deps = [ | ||||||
|  |         ":face_landmarks_from_pose_onnx_cuda", | ||||||
|  |         ":hand_landmarks_left_and_right_onnx_cuda", | ||||||
|  |         "//mediapipe/calculators/core:flow_limiter_calculator", | ||||||
|  |         "//mediapipe/calculators/image:image_transformation_calculator", | ||||||
|  |         "//mediapipe/modules/pose_landmark:pose_landmark_onnx_cuda", | ||||||
|  |     ], | ||||||
|  | ) | ||||||
|  | 
 | ||||||
|  | mediapipe_simple_subgraph( | ||||||
|  |     name = "holistic_landmark_onnx_tensorrt", | ||||||
|  |     graph = "holistic_landmark_onnx_tensorrt.pbtxt", | ||||||
|  |     register_as = "HolisticLandmarkOnnxTensorRT", | ||||||
|  |     visibility = ["//visibility:public"], | ||||||
|  |     deps = [ | ||||||
|  |         ":face_landmarks_from_pose_onnx_tensorrt", | ||||||
|  |         ":hand_landmarks_left_and_right_onnx_tensorrt", | ||||||
|  |         "//mediapipe/calculators/core:flow_limiter_calculator", | ||||||
|  |         "//mediapipe/calculators/image:image_transformation_calculator", | ||||||
|  |         "//mediapipe/modules/pose_landmark:pose_landmark_onnx_tensorrt", | ||||||
|  |     ], | ||||||
|  | ) | ||||||
|  |  | ||||||
|  | @ -0,0 +1,82 @@ | ||||||
|  | # Predicts face landmarks within an ROI derived from face-related pose | ||||||
|  | # landmarks. | ||||||
|  | 
 | ||||||
|  | type: "FaceLandmarksFromPoseOnnxCUDA" | ||||||
|  | 
 | ||||||
|  | # CPU image. (ImageFrame) | ||||||
|  | input_stream: "IMAGE:input_video" | ||||||
|  | # Face-related pose landmarks. (NormalizedLandmarkList) | ||||||
|  | input_stream: "FACE_LANDMARKS_FROM_POSE:face_landmarks_from_pose" | ||||||
|  | 
 | ||||||
|  | # Whether to run the face landmark model with attention on lips and eyes to | ||||||
|  | # provide more accuracy, and additionally output iris landmarks. If unspecified, | ||||||
|  | # functions as set to false. (bool) | ||||||
|  | input_side_packet: "REFINE_LANDMARKS:refine_landmarks" | ||||||
|  | 
 | ||||||
|  | # Face landmarks. (NormalizedLandmarkList) | ||||||
|  | output_stream: "FACE_LANDMARKS:face_landmarks" | ||||||
|  | 
 | ||||||
|  | # Debug outputs. | ||||||
|  | # Face ROI derived from face-related pose landmarks, which defines the search | ||||||
|  | # region for the face detection model. (NormalizedRect) | ||||||
|  | output_stream: "FACE_ROI_FROM_POSE:face_roi_from_pose" | ||||||
|  | # Refined face crop rectangle predicted by face detection model. | ||||||
|  | # (NormalizedRect) | ||||||
|  | output_stream: "FACE_ROI_FROM_DETECTION:face_roi_from_detection" | ||||||
|  | # Rectangle used to predict face landmarks. (NormalizedRect) | ||||||
|  | output_stream: "FACE_TRACKING_ROI:face_tracking_roi" | ||||||
|  | 
 | ||||||
|  | # TODO: do not predict face when most of the face landmarks from | ||||||
|  | # pose are invisible. | ||||||
|  | 
 | ||||||
|  | # Extracts image size from the input images. | ||||||
|  | node { | ||||||
|  |   calculator: "ImagePropertiesCalculator" | ||||||
|  |   input_stream: "IMAGE:input_video" | ||||||
|  |   output_stream: "SIZE:image_size" | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | # Gets ROI for re-crop model from face-related pose landmarks. | ||||||
|  | node { | ||||||
|  |   calculator: "FaceLandmarksFromPoseToRecropRoi" | ||||||
|  |   input_stream: "FACE_LANDMARKS_FROM_POSE:face_landmarks_from_pose" | ||||||
|  |   input_stream: "IMAGE_SIZE:image_size" | ||||||
|  |   output_stream: "ROI:face_roi_from_pose" | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | # Detects faces within the face ROI calculated from pose landmarks. This is done | ||||||
|  | # to refine face ROI for further landmark detection as ROI calculated from | ||||||
|  | # pose landmarks may be inaccurate. | ||||||
|  | node { | ||||||
|  |   calculator: "FaceDetectionShortRangeByRoiOnnxCUDA" | ||||||
|  |   input_stream: "IMAGE:input_video" | ||||||
|  |   input_stream: "ROI:face_roi_from_pose" | ||||||
|  |   output_stream: "DETECTIONS:face_detections" | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | # Calculates refined face ROI. | ||||||
|  | node { | ||||||
|  |   calculator: "FaceDetectionFrontDetectionsToRoi" | ||||||
|  |   input_stream: "DETECTIONS:face_detections" | ||||||
|  |   input_stream: "IMAGE_SIZE:image_size" | ||||||
|  |   output_stream: "ROI:face_roi_from_detection" | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | # Gets face tracking rectangle (either face rectangle from the previous | ||||||
|  | # frame or face re-crop rectangle from the current frame) for face prediction. | ||||||
|  | node { | ||||||
|  |   calculator: "FaceTracking" | ||||||
|  |   input_stream: "LANDMARKS:face_landmarks" | ||||||
|  |   input_stream: "FACE_RECROP_ROI:face_roi_from_detection" | ||||||
|  |   input_stream: "IMAGE_SIZE:image_size" | ||||||
|  |   output_stream: "FACE_TRACKING_ROI:face_tracking_roi" | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | # Predicts face landmarks from the tracking rectangle. | ||||||
|  | node { | ||||||
|  |   calculator: "FaceLandmarkOnnxCUDA" | ||||||
|  |   input_stream: "IMAGE:input_video" | ||||||
|  |   input_stream: "ROI:face_tracking_roi" | ||||||
|  |   input_side_packet: "WITH_ATTENTION:refine_landmarks" | ||||||
|  |   output_stream: "LANDMARKS:face_landmarks" | ||||||
|  | } | ||||||
|  | @ -0,0 +1,82 @@ | ||||||
|  | # Predicts face landmarks within an ROI derived from face-related pose | ||||||
|  | # landmarks. | ||||||
|  | 
 | ||||||
|  | type: "FaceLandmarksFromPoseOnnxTensorRT" | ||||||
|  | 
 | ||||||
|  | # CPU image. (ImageFrame) | ||||||
|  | input_stream: "IMAGE:input_video" | ||||||
|  | # Face-related pose landmarks. (NormalizedLandmarkList) | ||||||
|  | input_stream: "FACE_LANDMARKS_FROM_POSE:face_landmarks_from_pose" | ||||||
|  | 
 | ||||||
|  | # Whether to run the face landmark model with attention on lips and eyes to | ||||||
|  | # provide more accuracy, and additionally output iris landmarks. If unspecified, | ||||||
|  | # functions as set to false. (bool) | ||||||
|  | input_side_packet: "REFINE_LANDMARKS:refine_landmarks" | ||||||
|  | 
 | ||||||
|  | # Face landmarks. (NormalizedLandmarkList) | ||||||
|  | output_stream: "FACE_LANDMARKS:face_landmarks" | ||||||
|  | 
 | ||||||
|  | # Debug outputs. | ||||||
|  | # Face ROI derived from face-related pose landmarks, which defines the search | ||||||
|  | # region for the face detection model. (NormalizedRect) | ||||||
|  | output_stream: "FACE_ROI_FROM_POSE:face_roi_from_pose" | ||||||
|  | # Refined face crop rectangle predicted by face detection model. | ||||||
|  | # (NormalizedRect) | ||||||
|  | output_stream: "FACE_ROI_FROM_DETECTION:face_roi_from_detection" | ||||||
|  | # Rectangle used to predict face landmarks. (NormalizedRect) | ||||||
|  | output_stream: "FACE_TRACKING_ROI:face_tracking_roi" | ||||||
|  | 
 | ||||||
|  | # TODO: do not predict face when most of the face landmarks from | ||||||
|  | # pose are invisible. | ||||||
|  | 
 | ||||||
|  | # Extracts image size from the input images. | ||||||
|  | node { | ||||||
|  |   calculator: "ImagePropertiesCalculator" | ||||||
|  |   input_stream: "IMAGE:input_video" | ||||||
|  |   output_stream: "SIZE:image_size" | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | # Gets ROI for re-crop model from face-related pose landmarks. | ||||||
|  | node { | ||||||
|  |   calculator: "FaceLandmarksFromPoseToRecropRoi" | ||||||
|  |   input_stream: "FACE_LANDMARKS_FROM_POSE:face_landmarks_from_pose" | ||||||
|  |   input_stream: "IMAGE_SIZE:image_size" | ||||||
|  |   output_stream: "ROI:face_roi_from_pose" | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | # Detects faces within the face ROI calculated from pose landmarks. This is done | ||||||
|  | # to refine face ROI for further landmark detection as ROI calculated from | ||||||
|  | # pose landmarks may be inaccurate. | ||||||
|  | node { | ||||||
|  |   calculator: "FaceDetectionShortRangeByRoiOnnxTensorRT" | ||||||
|  |   input_stream: "IMAGE:input_video" | ||||||
|  |   input_stream: "ROI:face_roi_from_pose" | ||||||
|  |   output_stream: "DETECTIONS:face_detections" | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | # Calculates refined face ROI. | ||||||
|  | node { | ||||||
|  |   calculator: "FaceDetectionFrontDetectionsToRoi" | ||||||
|  |   input_stream: "DETECTIONS:face_detections" | ||||||
|  |   input_stream: "IMAGE_SIZE:image_size" | ||||||
|  |   output_stream: "ROI:face_roi_from_detection" | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | # Gets face tracking rectangle (either face rectangle from the previous | ||||||
|  | # frame or face re-crop rectangle from the current frame) for face prediction. | ||||||
|  | node { | ||||||
|  |   calculator: "FaceTracking" | ||||||
|  |   input_stream: "LANDMARKS:face_landmarks" | ||||||
|  |   input_stream: "FACE_RECROP_ROI:face_roi_from_detection" | ||||||
|  |   input_stream: "IMAGE_SIZE:image_size" | ||||||
|  |   output_stream: "FACE_TRACKING_ROI:face_tracking_roi" | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | # Predicts face landmarks from the tracking rectangle. | ||||||
|  | node { | ||||||
|  |   calculator: "FaceLandmarkOnnxTensorRT" | ||||||
|  |   input_stream: "IMAGE:input_video" | ||||||
|  |   input_stream: "ROI:face_tracking_roi" | ||||||
|  |   input_side_packet: "WITH_ATTENTION:refine_landmarks" | ||||||
|  |   output_stream: "LANDMARKS:face_landmarks" | ||||||
|  | } | ||||||
|  | @ -0,0 +1,78 @@ | ||||||
|  | # Predicts hand landmarks within a ROI derived from hand-related pose landmarks. | ||||||
|  | 
 | ||||||
|  | type: "HandLandmarksFromPoseOnnxCUDA" | ||||||
|  | 
 | ||||||
|  | # CPU image. (ImageFrame) | ||||||
|  | input_stream: "IMAGE:input_video" | ||||||
|  | # Hand-related pose landmarks in [wrist, pinky, index] order. | ||||||
|  | # (NormalizedLandmarkList) | ||||||
|  | input_stream: "HAND_LANDMARKS_FROM_POSE:hand_landmarks_from_pose" | ||||||
|  | 
 | ||||||
|  | # Hand landmarks. (NormalizedLandmarkList) | ||||||
|  | output_stream: "HAND_LANDMARKS:hand_landmarks" | ||||||
|  | 
 | ||||||
|  | # Debug outputs. | ||||||
|  | # Hand ROI derived from hand-related landmarks, which defines the search region | ||||||
|  | # for the hand re-crop model. (NormalizedRect) | ||||||
|  | output_stream: "HAND_ROI_FROM_POSE:hand_roi_from_pose" | ||||||
|  | # Refined hand crop rectangle predicted by hand re-crop model. (NormalizedRect) | ||||||
|  | output_stream: "HAND_ROI_FROM_RECROP:hand_roi_from_recrop" | ||||||
|  | # Rectangle used to predict hand landmarks. (NormalizedRect) | ||||||
|  | output_stream: "HAND_TRACKING_ROI:hand_tracking_roi" | ||||||
|  | 
 | ||||||
|  | # Gets hand visibility. | ||||||
|  | node { | ||||||
|  |   calculator: "HandVisibilityFromHandLandmarksFromPose" | ||||||
|  |   input_stream: "HAND_LANDMARKS_FROM_POSE:hand_landmarks_from_pose" | ||||||
|  |   output_stream: "VISIBILITY:hand_visibility" | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | # Drops hand-related pose landmarks if pose wrist is not visible. It will | ||||||
|  | # prevent from predicting hand landmarks on the current frame. | ||||||
|  | node { | ||||||
|  |   calculator: "GateCalculator" | ||||||
|  |   input_stream: "hand_landmarks_from_pose" | ||||||
|  |   input_stream: "ALLOW:hand_visibility" | ||||||
|  |   output_stream: "ensured_hand_landmarks_from_pose" | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | # Extracts image size from the input images. | ||||||
|  | node { | ||||||
|  |   calculator: "ImagePropertiesCalculator" | ||||||
|  |   input_stream: "IMAGE:input_video" | ||||||
|  |   output_stream: "SIZE:image_size" | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | # Gets ROI for re-crop model from hand-related pose landmarks. | ||||||
|  | node { | ||||||
|  |   calculator: "HandLandmarksFromPoseToRecropRoi" | ||||||
|  |   input_stream: "HAND_LANDMARKS_FROM_POSE:hand_landmarks_from_pose" | ||||||
|  |   input_stream: "IMAGE_SIZE:image_size" | ||||||
|  |   output_stream: "ROI:hand_roi_from_pose" | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | # Predicts hand re-crop rectangle on the current frame. | ||||||
|  | node { | ||||||
|  |   calculator: "HandRecropByRoiOnnxCUDA", | ||||||
|  |   input_stream: "IMAGE:input_video" | ||||||
|  |   input_stream: "ROI:hand_roi_from_pose" | ||||||
|  |   output_stream: "HAND_ROI_FROM_RECROP:hand_roi_from_recrop" | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | # Gets hand tracking rectangle (either hand rectangle from the previous | ||||||
|  | # frame or hand re-crop rectangle from the current frame) for hand prediction. | ||||||
|  | node { | ||||||
|  |   calculator: "HandTracking" | ||||||
|  |   input_stream: "LANDMARKS:hand_landmarks" | ||||||
|  |   input_stream: "HAND_ROI_FROM_RECROP:hand_roi_from_recrop" | ||||||
|  |   input_stream: "IMAGE_SIZE:image_size" | ||||||
|  |   output_stream: "HAND_TRACKING_ROI:hand_tracking_roi" | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | # Predicts hand landmarks from the tracking rectangle. | ||||||
|  | node { | ||||||
|  |   calculator: "HandLandmarkOnnxCUDA" | ||||||
|  |   input_stream: "IMAGE:input_video" | ||||||
|  |   input_stream: "ROI:hand_tracking_roi" | ||||||
|  |   output_stream: "LANDMARKS:hand_landmarks" | ||||||
|  | } | ||||||
|  | @ -0,0 +1,78 @@ | ||||||
|  | # Predicts hand landmarks within a ROI derived from hand-related pose landmarks. | ||||||
|  | 
 | ||||||
|  | type: "HandLandmarksFromPoseOnnxTensorRT" | ||||||
|  | 
 | ||||||
|  | # CPU image. (ImageFrame) | ||||||
|  | input_stream: "IMAGE:input_video" | ||||||
|  | # Hand-related pose landmarks in [wrist, pinky, index] order. | ||||||
|  | # (NormalizedLandmarkList) | ||||||
|  | input_stream: "HAND_LANDMARKS_FROM_POSE:hand_landmarks_from_pose" | ||||||
|  | 
 | ||||||
|  | # Hand landmarks. (NormalizedLandmarkList) | ||||||
|  | output_stream: "HAND_LANDMARKS:hand_landmarks" | ||||||
|  | 
 | ||||||
|  | # Debug outputs. | ||||||
|  | # Hand ROI derived from hand-related landmarks, which defines the search region | ||||||
|  | # for the hand re-crop model. (NormalizedRect) | ||||||
|  | output_stream: "HAND_ROI_FROM_POSE:hand_roi_from_pose" | ||||||
|  | # Refined hand crop rectangle predicted by hand re-crop model. (NormalizedRect) | ||||||
|  | output_stream: "HAND_ROI_FROM_RECROP:hand_roi_from_recrop" | ||||||
|  | # Rectangle used to predict hand landmarks. (NormalizedRect) | ||||||
|  | output_stream: "HAND_TRACKING_ROI:hand_tracking_roi" | ||||||
|  | 
 | ||||||
|  | # Gets hand visibility. | ||||||
|  | node { | ||||||
|  |   calculator: "HandVisibilityFromHandLandmarksFromPose" | ||||||
|  |   input_stream: "HAND_LANDMARKS_FROM_POSE:hand_landmarks_from_pose" | ||||||
|  |   output_stream: "VISIBILITY:hand_visibility" | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | # Drops hand-related pose landmarks if pose wrist is not visible. It will | ||||||
|  | # prevent from predicting hand landmarks on the current frame. | ||||||
|  | node { | ||||||
|  |   calculator: "GateCalculator" | ||||||
|  |   input_stream: "hand_landmarks_from_pose" | ||||||
|  |   input_stream: "ALLOW:hand_visibility" | ||||||
|  |   output_stream: "ensured_hand_landmarks_from_pose" | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | # Extracts image size from the input images. | ||||||
|  | node { | ||||||
|  |   calculator: "ImagePropertiesCalculator" | ||||||
|  |   input_stream: "IMAGE:input_video" | ||||||
|  |   output_stream: "SIZE:image_size" | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | # Gets ROI for re-crop model from hand-related pose landmarks. | ||||||
|  | node { | ||||||
|  |   calculator: "HandLandmarksFromPoseToRecropRoi" | ||||||
|  |   input_stream: "HAND_LANDMARKS_FROM_POSE:hand_landmarks_from_pose" | ||||||
|  |   input_stream: "IMAGE_SIZE:image_size" | ||||||
|  |   output_stream: "ROI:hand_roi_from_pose" | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | # Predicts hand re-crop rectangle on the current frame. | ||||||
|  | node { | ||||||
|  |   calculator: "HandRecropByRoiOnnxTensorRT", | ||||||
|  |   input_stream: "IMAGE:input_video" | ||||||
|  |   input_stream: "ROI:hand_roi_from_pose" | ||||||
|  |   output_stream: "HAND_ROI_FROM_RECROP:hand_roi_from_recrop" | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | # Gets hand tracking rectangle (either hand rectangle from the previous | ||||||
|  | # frame or hand re-crop rectangle from the current frame) for hand prediction. | ||||||
|  | node { | ||||||
|  |   calculator: "HandTracking" | ||||||
|  |   input_stream: "LANDMARKS:hand_landmarks" | ||||||
|  |   input_stream: "HAND_ROI_FROM_RECROP:hand_roi_from_recrop" | ||||||
|  |   input_stream: "IMAGE_SIZE:image_size" | ||||||
|  |   output_stream: "HAND_TRACKING_ROI:hand_tracking_roi" | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | # Predicts hand landmarks from the tracking rectangle. | ||||||
|  | node { | ||||||
|  |   calculator: "HandLandmarkOnnxTensorRT" | ||||||
|  |   input_stream: "IMAGE:input_video" | ||||||
|  |   input_stream: "ROI:hand_tracking_roi" | ||||||
|  |   output_stream: "LANDMARKS:hand_landmarks" | ||||||
|  | } | ||||||
|  | @ -0,0 +1,76 @@ | ||||||
|  | # Predicts left and right hand landmarks within corresponding ROIs derived from | ||||||
|  | # hand-related pose landmarks. | ||||||
|  | 
 | ||||||
|  | type: "HandLandmarksLeftAndRightOnnxCUDA" | ||||||
|  | 
 | ||||||
|  | # CPU image. (ImageFrame) | ||||||
|  | input_stream: "IMAGE:input_video" | ||||||
|  | # Pose landmarks to derive initial hand location from. (NormalizedLandmarkList) | ||||||
|  | input_stream: "POSE_LANDMARKS:pose_landmarks" | ||||||
|  | 
 | ||||||
|  | # Left hand landmarks. (NormalizedLandmarkList) | ||||||
|  | output_stream: "LEFT_HAND_LANDMARKS:left_hand_landmarks" | ||||||
|  | # RIght hand landmarks. (NormalizedLandmarkList) | ||||||
|  | output_stream: "RIGHT_HAND_LANDMARKS:right_hand_landmarks" | ||||||
|  | 
 | ||||||
|  | # Debug outputs. | ||||||
|  | output_stream: "LEFT_HAND_ROI_FROM_POSE:left_hand_roi_from_pose" | ||||||
|  | output_stream: "LEFT_HAND_ROI_FROM_RECROP:left_hand_roi_from_recrop" | ||||||
|  | output_stream: "LEFT_HAND_TRACKING_ROI:left_hand_tracking_roi" | ||||||
|  | output_stream: "RIGHT_HAND_ROI_FROM_POSE:right_hand_roi_from_pose" | ||||||
|  | output_stream: "RIGHT_HAND_ROI_FROM_RECROP:right_hand_roi_from_recrop" | ||||||
|  | output_stream: "RIGHT_HAND_TRACKING_ROI:right_hand_tracking_roi" | ||||||
|  | 
 | ||||||
|  | # Extracts left-hand-related landmarks from the pose landmarks. | ||||||
|  | node { | ||||||
|  |   calculator: "SplitNormalizedLandmarkListCalculator" | ||||||
|  |   input_stream: "pose_landmarks" | ||||||
|  |   output_stream: "left_hand_landmarks_from_pose" | ||||||
|  |   options: { | ||||||
|  |     [mediapipe.SplitVectorCalculatorOptions.ext] { | ||||||
|  |       ranges: { begin: 15 end: 16 } | ||||||
|  |       ranges: { begin: 17 end: 18 } | ||||||
|  |       ranges: { begin: 19 end: 20 } | ||||||
|  |       combine_outputs: true | ||||||
|  |     } | ||||||
|  |   } | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | # Predicts left hand landmarks. | ||||||
|  | node { | ||||||
|  |   calculator: "HandLandmarksFromPoseOnnxCUDA" | ||||||
|  |   input_stream: "IMAGE:input_video" | ||||||
|  |   input_stream: "HAND_LANDMARKS_FROM_POSE:left_hand_landmarks_from_pose" | ||||||
|  |   output_stream: "HAND_LANDMARKS:left_hand_landmarks" | ||||||
|  |   # Debug outputs. | ||||||
|  |   output_stream: "HAND_ROI_FROM_POSE:left_hand_roi_from_pose" | ||||||
|  |   output_stream: "HAND_ROI_FROM_RECROP:left_hand_roi_from_recrop" | ||||||
|  |   output_stream: "HAND_TRACKING_ROI:left_hand_tracking_roi" | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | # Extracts right-hand-related landmarks from the pose landmarks. | ||||||
|  | node { | ||||||
|  |   calculator: "SplitNormalizedLandmarkListCalculator" | ||||||
|  |   input_stream: "pose_landmarks" | ||||||
|  |   output_stream: "right_hand_landmarks_from_pose" | ||||||
|  |   options: { | ||||||
|  |     [mediapipe.SplitVectorCalculatorOptions.ext] { | ||||||
|  |       ranges: { begin: 16 end: 17 } | ||||||
|  |       ranges: { begin: 18 end: 19 } | ||||||
|  |       ranges: { begin: 20 end: 21 } | ||||||
|  |       combine_outputs: true | ||||||
|  |     } | ||||||
|  |   } | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | # Extracts right-hand-related landmarks from the pose landmarks. | ||||||
|  | node { | ||||||
|  |   calculator: "HandLandmarksFromPoseOnnxCUDA" | ||||||
|  |   input_stream: "IMAGE:input_video" | ||||||
|  |   input_stream: "HAND_LANDMARKS_FROM_POSE:right_hand_landmarks_from_pose" | ||||||
|  |   output_stream: "HAND_LANDMARKS:right_hand_landmarks" | ||||||
|  |   # Debug outputs. | ||||||
|  |   output_stream: "HAND_ROI_FROM_POSE:right_hand_roi_from_pose" | ||||||
|  |   output_stream: "HAND_ROI_FROM_RECROP:right_hand_roi_from_recrop" | ||||||
|  |   output_stream: "HAND_TRACKING_ROI:right_hand_tracking_roi" | ||||||
|  | } | ||||||
|  | @ -0,0 +1,76 @@ | ||||||
|  | # Predicts left and right hand landmarks within corresponding ROIs derived from | ||||||
|  | # hand-related pose landmarks. | ||||||
|  | 
 | ||||||
|  | type: "HandLandmarksLeftAndRightOnnxTensorRT" | ||||||
|  | 
 | ||||||
|  | # CPU image. (ImageFrame) | ||||||
|  | input_stream: "IMAGE:input_video" | ||||||
|  | # Pose landmarks to derive initial hand location from. (NormalizedLandmarkList) | ||||||
|  | input_stream: "POSE_LANDMARKS:pose_landmarks" | ||||||
|  | 
 | ||||||
|  | # Left hand landmarks. (NormalizedLandmarkList) | ||||||
|  | output_stream: "LEFT_HAND_LANDMARKS:left_hand_landmarks" | ||||||
|  | # RIght hand landmarks. (NormalizedLandmarkList) | ||||||
|  | output_stream: "RIGHT_HAND_LANDMARKS:right_hand_landmarks" | ||||||
|  | 
 | ||||||
|  | # Debug outputs. | ||||||
|  | output_stream: "LEFT_HAND_ROI_FROM_POSE:left_hand_roi_from_pose" | ||||||
|  | output_stream: "LEFT_HAND_ROI_FROM_RECROP:left_hand_roi_from_recrop" | ||||||
|  | output_stream: "LEFT_HAND_TRACKING_ROI:left_hand_tracking_roi" | ||||||
|  | output_stream: "RIGHT_HAND_ROI_FROM_POSE:right_hand_roi_from_pose" | ||||||
|  | output_stream: "RIGHT_HAND_ROI_FROM_RECROP:right_hand_roi_from_recrop" | ||||||
|  | output_stream: "RIGHT_HAND_TRACKING_ROI:right_hand_tracking_roi" | ||||||
|  | 
 | ||||||
|  | # Extracts left-hand-related landmarks from the pose landmarks. | ||||||
|  | node { | ||||||
|  |   calculator: "SplitNormalizedLandmarkListCalculator" | ||||||
|  |   input_stream: "pose_landmarks" | ||||||
|  |   output_stream: "left_hand_landmarks_from_pose" | ||||||
|  |   options: { | ||||||
|  |     [mediapipe.SplitVectorCalculatorOptions.ext] { | ||||||
|  |       ranges: { begin: 15 end: 16 } | ||||||
|  |       ranges: { begin: 17 end: 18 } | ||||||
|  |       ranges: { begin: 19 end: 20 } | ||||||
|  |       combine_outputs: true | ||||||
|  |     } | ||||||
|  |   } | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | # Predicts left hand landmarks. | ||||||
|  | node { | ||||||
|  |   calculator: "HandLandmarksFromPoseOnnxTensorRT" | ||||||
|  |   input_stream: "IMAGE:input_video" | ||||||
|  |   input_stream: "HAND_LANDMARKS_FROM_POSE:left_hand_landmarks_from_pose" | ||||||
|  |   output_stream: "HAND_LANDMARKS:left_hand_landmarks" | ||||||
|  |   # Debug outputs. | ||||||
|  |   output_stream: "HAND_ROI_FROM_POSE:left_hand_roi_from_pose" | ||||||
|  |   output_stream: "HAND_ROI_FROM_RECROP:left_hand_roi_from_recrop" | ||||||
|  |   output_stream: "HAND_TRACKING_ROI:left_hand_tracking_roi" | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | # Extracts right-hand-related landmarks from the pose landmarks. | ||||||
|  | node { | ||||||
|  |   calculator: "SplitNormalizedLandmarkListCalculator" | ||||||
|  |   input_stream: "pose_landmarks" | ||||||
|  |   output_stream: "right_hand_landmarks_from_pose" | ||||||
|  |   options: { | ||||||
|  |     [mediapipe.SplitVectorCalculatorOptions.ext] { | ||||||
|  |       ranges: { begin: 16 end: 17 } | ||||||
|  |       ranges: { begin: 18 end: 19 } | ||||||
|  |       ranges: { begin: 20 end: 21 } | ||||||
|  |       combine_outputs: true | ||||||
|  |     } | ||||||
|  |   } | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | # Extracts right-hand-related landmarks from the pose landmarks. | ||||||
|  | node { | ||||||
|  |   calculator: "HandLandmarksFromPoseOnnxTensorRT" | ||||||
|  |   input_stream: "IMAGE:input_video" | ||||||
|  |   input_stream: "HAND_LANDMARKS_FROM_POSE:right_hand_landmarks_from_pose" | ||||||
|  |   output_stream: "HAND_LANDMARKS:right_hand_landmarks" | ||||||
|  |   # Debug outputs. | ||||||
|  |   output_stream: "HAND_ROI_FROM_POSE:right_hand_roi_from_pose" | ||||||
|  |   output_stream: "HAND_ROI_FROM_RECROP:right_hand_roi_from_recrop" | ||||||
|  |   output_stream: "HAND_TRACKING_ROI:right_hand_tracking_roi" | ||||||
|  | } | ||||||
|  | @ -0,0 +1,137 @@ | ||||||
|  | # Predicts more accurate hand location (re-crop ROI) within a given ROI. | ||||||
|  | 
 | ||||||
|  | type: "HandRecropByRoiOnnxCUDA" | ||||||
|  | 
 | ||||||
|  | # CPU image. (ImageFrame) | ||||||
|  | input_stream: "IMAGE:input_video" | ||||||
|  | # ROI (region of interest) within the given image where a palm/hand is located. | ||||||
|  | # (NormalizedRect) | ||||||
|  | input_stream: "ROI:roi" | ||||||
|  | 
 | ||||||
|  | # Refined (more accurate) ROI to use for hand landmark prediction. | ||||||
|  | # (NormalizedRect) | ||||||
|  | output_stream: "HAND_ROI_FROM_RECROP:hand_roi_from_recrop_refined" | ||||||
|  | 
 | ||||||
|  | # Transforms hand ROI from the input image to a 256x256 tensor. Preserves aspect | ||||||
|  | # ratio, which results in a letterbox padding. | ||||||
|  | node { | ||||||
|  |   calculator: "ImageToTensorCalculator" | ||||||
|  |   input_stream: "IMAGE:input_video" | ||||||
|  |   input_stream: "NORM_RECT:roi" | ||||||
|  |   output_stream: "TENSORS:initial_crop_tensor" | ||||||
|  |   output_stream: "LETTERBOX_PADDING:letterbox_padding" | ||||||
|  |   options: { | ||||||
|  |     [mediapipe.ImageToTensorCalculatorOptions.ext] { | ||||||
|  |       output_tensor_width: 256 | ||||||
|  |       output_tensor_height: 256 | ||||||
|  |       keep_aspect_ratio: true | ||||||
|  |       output_tensor_float_range { | ||||||
|  |         min: 0.0 | ||||||
|  |         max: 1.0 | ||||||
|  |       } | ||||||
|  |       # For OpenGL origin should be at the top left corner. | ||||||
|  |       gpu_origin: TOP_LEFT, | ||||||
|  |     } | ||||||
|  |   } | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | # Predicts hand re-crop rectangle. | ||||||
|  | node { | ||||||
|  |   calculator: "InferenceCalculator" | ||||||
|  |   input_stream: "TENSORS:initial_crop_tensor" | ||||||
|  |   output_stream: "TENSORS:landmark_tensors" | ||||||
|  |   options: { | ||||||
|  |     [mediapipe.InferenceCalculatorOptions.ext] { | ||||||
|  |       model_path: "mediapipe/modules/holistic_landmark/hand_recrop.onnx" | ||||||
|  |       delegate { cuda {} } | ||||||
|  |     } | ||||||
|  |   } | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | # Decodes the landmark tensors into a vector of landmarks, where the landmark | ||||||
|  | # coordinates are normalized by the size of the input image to the model. Two | ||||||
|  | # landmarks represent two virtual points: crop and scale of the new crop. | ||||||
|  | node { | ||||||
|  |   calculator: "TensorsToLandmarksCalculator" | ||||||
|  |   input_stream: "TENSORS:landmark_tensors" | ||||||
|  |   output_stream: "NORM_LANDMARKS:landmarks" | ||||||
|  |   options: { | ||||||
|  |     [mediapipe.TensorsToLandmarksCalculatorOptions.ext] { | ||||||
|  |       num_landmarks: 2 | ||||||
|  |       input_image_width: 256 | ||||||
|  |       input_image_height: 256 | ||||||
|  |     } | ||||||
|  |   } | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | # Adjusts landmarks (already normalized to [0.f, 1.f]) on the letterboxed hand | ||||||
|  | # image (after image transformation with the FIT scale mode) to the | ||||||
|  | # corresponding locations on the same image with the letterbox removed (hand | ||||||
|  | # image before image transformation). | ||||||
|  | node { | ||||||
|  |   calculator: "LandmarkLetterboxRemovalCalculator" | ||||||
|  |   input_stream: "LANDMARKS:landmarks" | ||||||
|  |   input_stream: "LETTERBOX_PADDING:letterbox_padding" | ||||||
|  |   output_stream: "LANDMARKS:scaled_landmarks" | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | # Projects the landmarks from the cropped hand image to the corresponding | ||||||
|  | # locations on the full image before cropping (input to the graph). | ||||||
|  | node { | ||||||
|  |   calculator: "LandmarkProjectionCalculator" | ||||||
|  |   input_stream: "NORM_LANDMARKS:scaled_landmarks" | ||||||
|  |   input_stream: "NORM_RECT:roi" | ||||||
|  |   output_stream: "NORM_LANDMARKS:alignment_landmarks" | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | # Converts hand landmarks to a detection that tightly encloses all landmarks. | ||||||
|  | node { | ||||||
|  |   calculator: "LandmarksToDetectionCalculator" | ||||||
|  |   input_stream: "NORM_LANDMARKS:alignment_landmarks" | ||||||
|  |   output_stream: "DETECTION:hand_detection" | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | # Extracts image size from the input images. | ||||||
|  | node { | ||||||
|  |   calculator: "ImagePropertiesCalculator" | ||||||
|  |   input_stream: "IMAGE:input_video" | ||||||
|  |   output_stream: "SIZE:image_size" | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | # Converts hand detection into a rectangle based on center and scale alignment | ||||||
|  | # points. | ||||||
|  | node { | ||||||
|  |   calculator: "AlignmentPointsRectsCalculator" | ||||||
|  |   input_stream: "DETECTION:hand_detection" | ||||||
|  |   input_stream: "IMAGE_SIZE:image_size" | ||||||
|  |   output_stream: "NORM_RECT:hand_roi_from_recrop" | ||||||
|  |   options: { | ||||||
|  |     [mediapipe.DetectionsToRectsCalculatorOptions.ext] { | ||||||
|  |       rotation_vector_start_keypoint_index: 0 | ||||||
|  |       rotation_vector_end_keypoint_index: 1 | ||||||
|  |       rotation_vector_target_angle_degrees: -90 | ||||||
|  |     } | ||||||
|  |   } | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | # TODO: revise hand recrop roi calculation. | ||||||
|  | # Slighly moves hand re-crop rectangle from wrist towards fingertips. Due to the | ||||||
|  | # new hand cropping logic, crop border is to close to finger tips while a lot of | ||||||
|  | # space is below the wrist. And when moving hand up fast (with fingers pointing | ||||||
|  | # up) and using hand rect from the previous frame for tracking - fingertips can | ||||||
|  | # be cropped. This adjustment partially solves it, but hand cropping logic | ||||||
|  | # should be reviewed. | ||||||
|  | node { | ||||||
|  |   calculator: "RectTransformationCalculator" | ||||||
|  |   input_stream: "NORM_RECT:hand_roi_from_recrop" | ||||||
|  |   input_stream: "IMAGE_SIZE:image_size" | ||||||
|  |   output_stream: "hand_roi_from_recrop_refined" | ||||||
|  |   options: { | ||||||
|  |     [mediapipe.RectTransformationCalculatorOptions.ext] { | ||||||
|  |       scale_x: 1.0 | ||||||
|  |       scale_y: 1.0 | ||||||
|  |       shift_y: -0.1 | ||||||
|  |       square_long: true | ||||||
|  |     } | ||||||
|  |   } | ||||||
|  | } | ||||||
|  | @ -0,0 +1,137 @@ | ||||||
|  | # Predicts more accurate hand location (re-crop ROI) within a given ROI. | ||||||
|  | 
 | ||||||
|  | type: "HandRecropByRoiOnnxTensorRT" | ||||||
|  | 
 | ||||||
|  | # CPU image. (ImageFrame) | ||||||
|  | input_stream: "IMAGE:input_video" | ||||||
|  | # ROI (region of interest) within the given image where a palm/hand is located. | ||||||
|  | # (NormalizedRect) | ||||||
|  | input_stream: "ROI:roi" | ||||||
|  | 
 | ||||||
|  | # Refined (more accurate) ROI to use for hand landmark prediction. | ||||||
|  | # (NormalizedRect) | ||||||
|  | output_stream: "HAND_ROI_FROM_RECROP:hand_roi_from_recrop_refined" | ||||||
|  | 
 | ||||||
|  | # Transforms hand ROI from the input image to a 256x256 tensor. Preserves aspect | ||||||
|  | # ratio, which results in a letterbox padding. | ||||||
|  | node { | ||||||
|  |   calculator: "ImageToTensorCalculator" | ||||||
|  |   input_stream: "IMAGE:input_video" | ||||||
|  |   input_stream: "NORM_RECT:roi" | ||||||
|  |   output_stream: "TENSORS:initial_crop_tensor" | ||||||
|  |   output_stream: "LETTERBOX_PADDING:letterbox_padding" | ||||||
|  |   options: { | ||||||
|  |     [mediapipe.ImageToTensorCalculatorOptions.ext] { | ||||||
|  |       output_tensor_width: 256 | ||||||
|  |       output_tensor_height: 256 | ||||||
|  |       keep_aspect_ratio: true | ||||||
|  |       output_tensor_float_range { | ||||||
|  |         min: 0.0 | ||||||
|  |         max: 1.0 | ||||||
|  |       } | ||||||
|  |       # For OpenGL origin should be at the top left corner. | ||||||
|  |       gpu_origin: TOP_LEFT, | ||||||
|  |     } | ||||||
|  |   } | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | # Predicts hand re-crop rectangle. | ||||||
|  | node { | ||||||
|  |   calculator: "InferenceCalculator" | ||||||
|  |   input_stream: "TENSORS:initial_crop_tensor" | ||||||
|  |   output_stream: "TENSORS:landmark_tensors" | ||||||
|  |   options: { | ||||||
|  |     [mediapipe.InferenceCalculatorOptions.ext] { | ||||||
|  |       model_path: "mediapipe/modules/holistic_landmark/hand_recrop.onnx" | ||||||
|  |       delegate { tensorrt {} } | ||||||
|  |     } | ||||||
|  |   } | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | # Decodes the landmark tensors into a vector of landmarks, where the landmark | ||||||
|  | # coordinates are normalized by the size of the input image to the model. Two | ||||||
|  | # landmarks represent two virtual points: crop and scale of the new crop. | ||||||
|  | node { | ||||||
|  |   calculator: "TensorsToLandmarksCalculator" | ||||||
|  |   input_stream: "TENSORS:landmark_tensors" | ||||||
|  |   output_stream: "NORM_LANDMARKS:landmarks" | ||||||
|  |   options: { | ||||||
|  |     [mediapipe.TensorsToLandmarksCalculatorOptions.ext] { | ||||||
|  |       num_landmarks: 2 | ||||||
|  |       input_image_width: 256 | ||||||
|  |       input_image_height: 256 | ||||||
|  |     } | ||||||
|  |   } | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | # Adjusts landmarks (already normalized to [0.f, 1.f]) on the letterboxed hand | ||||||
|  | # image (after image transformation with the FIT scale mode) to the | ||||||
|  | # corresponding locations on the same image with the letterbox removed (hand | ||||||
|  | # image before image transformation). | ||||||
|  | node { | ||||||
|  |   calculator: "LandmarkLetterboxRemovalCalculator" | ||||||
|  |   input_stream: "LANDMARKS:landmarks" | ||||||
|  |   input_stream: "LETTERBOX_PADDING:letterbox_padding" | ||||||
|  |   output_stream: "LANDMARKS:scaled_landmarks" | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | # Projects the landmarks from the cropped hand image to the corresponding | ||||||
|  | # locations on the full image before cropping (input to the graph). | ||||||
|  | node { | ||||||
|  |   calculator: "LandmarkProjectionCalculator" | ||||||
|  |   input_stream: "NORM_LANDMARKS:scaled_landmarks" | ||||||
|  |   input_stream: "NORM_RECT:roi" | ||||||
|  |   output_stream: "NORM_LANDMARKS:alignment_landmarks" | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | # Converts hand landmarks to a detection that tightly encloses all landmarks. | ||||||
|  | node { | ||||||
|  |   calculator: "LandmarksToDetectionCalculator" | ||||||
|  |   input_stream: "NORM_LANDMARKS:alignment_landmarks" | ||||||
|  |   output_stream: "DETECTION:hand_detection" | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | # Extracts image size from the input images. | ||||||
|  | node { | ||||||
|  |   calculator: "ImagePropertiesCalculator" | ||||||
|  |   input_stream: "IMAGE:input_video" | ||||||
|  |   output_stream: "SIZE:image_size" | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | # Converts hand detection into a rectangle based on center and scale alignment | ||||||
|  | # points. | ||||||
|  | node { | ||||||
|  |   calculator: "AlignmentPointsRectsCalculator" | ||||||
|  |   input_stream: "DETECTION:hand_detection" | ||||||
|  |   input_stream: "IMAGE_SIZE:image_size" | ||||||
|  |   output_stream: "NORM_RECT:hand_roi_from_recrop" | ||||||
|  |   options: { | ||||||
|  |     [mediapipe.DetectionsToRectsCalculatorOptions.ext] { | ||||||
|  |       rotation_vector_start_keypoint_index: 0 | ||||||
|  |       rotation_vector_end_keypoint_index: 1 | ||||||
|  |       rotation_vector_target_angle_degrees: -90 | ||||||
|  |     } | ||||||
|  |   } | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | # TODO: revise hand recrop roi calculation. | ||||||
|  | # Slighly moves hand re-crop rectangle from wrist towards fingertips. Due to the | ||||||
|  | # new hand cropping logic, crop border is to close to finger tips while a lot of | ||||||
|  | # space is below the wrist. And when moving hand up fast (with fingers pointing | ||||||
|  | # up) and using hand rect from the previous frame for tracking - fingertips can | ||||||
|  | # be cropped. This adjustment partially solves it, but hand cropping logic | ||||||
|  | # should be reviewed. | ||||||
|  | node { | ||||||
|  |   calculator: "RectTransformationCalculator" | ||||||
|  |   input_stream: "NORM_RECT:hand_roi_from_recrop" | ||||||
|  |   input_stream: "IMAGE_SIZE:image_size" | ||||||
|  |   output_stream: "hand_roi_from_recrop_refined" | ||||||
|  |   options: { | ||||||
|  |     [mediapipe.RectTransformationCalculatorOptions.ext] { | ||||||
|  |       scale_x: 1.0 | ||||||
|  |       scale_y: 1.0 | ||||||
|  |       shift_y: -0.1 | ||||||
|  |       square_long: true | ||||||
|  |     } | ||||||
|  |   } | ||||||
|  | } | ||||||
|  | @ -0,0 +1,146 @@ | ||||||
|  | # Predicts pose + left/right hand + face landmarks. | ||||||
|  | # | ||||||
|  | # It is required that: | ||||||
|  | # - "face_detection_short_range.onnx" is available at | ||||||
|  | # "mediapipe/modules/face_detection/face_detection_short_range.onnx" | ||||||
|  | # | ||||||
|  | # - "face_landmark.onnx" is available at | ||||||
|  | # "mediapipe/modules/face_landmark/face_landmark.onnx" | ||||||
|  | # | ||||||
|  | # - "hand_landmark_full.onnx" is available at | ||||||
|  | # "mediapipe/modules/hand_landmark/hand_landmark_full.onnx" | ||||||
|  | # | ||||||
|  | # - "hand_recrop.onnx" is available at | ||||||
|  | # "mediapipe/modules/holistic_landmark/hand_recrop.onnx" | ||||||
|  | # | ||||||
|  | # - "handedness.txt" is available at | ||||||
|  | # "mediapipe/modules/hand_landmark/handedness.txt" | ||||||
|  | # | ||||||
|  | # - "pose_detection.onnx" is available at | ||||||
|  | # "mediapipe/modules/pose_detection/pose_detection.onnx" | ||||||
|  | # | ||||||
|  | # - "pose_landmark_lite.onnx" or "pose_landmark_full.onnx" or | ||||||
|  | # "pose_landmark_heavy.onnx" is available at | ||||||
|  | # "mediapipe/modules/pose_landmark/pose_landmark_lite.onnx" or | ||||||
|  | # "mediapipe/modules/pose_landmark/pose_landmark_full.onnx" or | ||||||
|  | # "mediapipe/modules/pose_landmark/pose_landmark_heavy.onnx" | ||||||
|  | # path respectively during execution, depending on the specification in the | ||||||
|  | # MODEL_COMPLEXITY input side packet. | ||||||
|  | # | ||||||
|  | # EXAMPLE: | ||||||
|  | #   node { | ||||||
|  | #     calculator: "HolisticLandmarkOnnxCUDA" | ||||||
|  | #     input_stream: "IMAGE:input_video" | ||||||
|  | #     input_side_packet: "MODEL_COMPLEXITY:model_complexity" | ||||||
|  | #     input_side_packet: "SMOOTH_LANDMARKS:smooth_landmarks" | ||||||
|  | #     input_side_packet: "ENABLE_SEGMENTATION:enable_segmentation" | ||||||
|  | #     input_side_packet: "SMOOTH_SEGMENTATION:smooth_segmentation" | ||||||
|  | #     input_side_packet: "REFINE_FACE_LANDMARKS:refine_face_landmarks" | ||||||
|  | #     input_side_packet: "USE_PREV_LANDMARKS:use_prev_landmarks" | ||||||
|  | #     output_stream: "POSE_LANDMARKS:pose_landmarks" | ||||||
|  | #     output_stream: "FACE_LANDMARKS:face_landmarks" | ||||||
|  | #     output_stream: "LEFT_HAND_LANDMARKS:left_hand_landmarks" | ||||||
|  | #     output_stream: "RIGHT_HAND_LANDMARKS:right_hand_landmarks" | ||||||
|  | #   } | ||||||
|  | # | ||||||
|  | # NOTE: if a pose/hand/face output is not present in the image, for this | ||||||
|  | # particular timestamp there will not be an output packet in the corresponding | ||||||
|  | # output stream below. However, the MediaPipe framework will internally inform | ||||||
|  | # the downstream calculators of the absence of this packet so that they don't | ||||||
|  | # wait for it unnecessarily. | ||||||
|  | 
 | ||||||
|  | type: "HolisticLandmarkOnnxCUDA" | ||||||
|  | 
 | ||||||
|  | # CPU image. (ImageFrame) | ||||||
|  | input_stream: "IMAGE:image" | ||||||
|  | 
 | ||||||
|  | # Complexity of the pose landmark model: 0, 1 or 2. Landmark accuracy as well as | ||||||
|  | # inference latency generally go up with the model complexity. If unspecified, | ||||||
|  | # functions as set to 1. (int) | ||||||
|  | input_side_packet: "MODEL_COMPLEXITY:model_complexity" | ||||||
|  | 
 | ||||||
|  | # Whether to filter landmarks across different input images to reduce jitter. | ||||||
|  | # If unspecified, functions as set to true. (bool) | ||||||
|  | input_side_packet: "SMOOTH_LANDMARKS:smooth_landmarks" | ||||||
|  | 
 | ||||||
|  | # Whether to predict the segmentation mask. If unspecified, functions as set to | ||||||
|  | # false. (bool) | ||||||
|  | input_side_packet: "ENABLE_SEGMENTATION:enable_segmentation" | ||||||
|  | 
 | ||||||
|  | # Whether to filter segmentation mask across different input images to reduce | ||||||
|  | # jitter. If unspecified, functions as set to true. (bool) | ||||||
|  | input_side_packet: "SMOOTH_SEGMENTATION:smooth_segmentation" | ||||||
|  | 
 | ||||||
|  | # Whether to run the face landmark model with attention on lips and eyes to | ||||||
|  | # provide more accuracy, and additionally output iris landmarks. If unspecified, | ||||||
|  | # functions as set to false. (bool) | ||||||
|  | input_side_packet: "REFINE_FACE_LANDMARKS:refine_face_landmarks" | ||||||
|  | 
 | ||||||
|  | # Whether landmarks on the previous image should be used to help localize | ||||||
|  | # landmarks on the current image. (bool) | ||||||
|  | input_side_packet: "USE_PREV_LANDMARKS:use_prev_landmarks" | ||||||
|  | 
 | ||||||
|  | # Pose landmarks. (NormalizedLandmarkList) | ||||||
|  | # 33 pose landmarks. | ||||||
|  | output_stream: "POSE_LANDMARKS:pose_landmarks" | ||||||
|  | # 33 pose world landmarks. (LandmarkList) | ||||||
|  | output_stream: "WORLD_LANDMARKS:pose_world_landmarks" | ||||||
|  | # 21 left hand landmarks. (NormalizedLandmarkList) | ||||||
|  | output_stream: "LEFT_HAND_LANDMARKS:left_hand_landmarks" | ||||||
|  | # 21 right hand landmarks. (NormalizedLandmarkList) | ||||||
|  | output_stream: "RIGHT_HAND_LANDMARKS:right_hand_landmarks" | ||||||
|  | # 468 face landmarks. (NormalizedLandmarkList) | ||||||
|  | output_stream: "FACE_LANDMARKS:face_landmarks" | ||||||
|  | 
 | ||||||
|  | # Segmentation mask. (ImageFrame in ImageFormat::VEC32F1) | ||||||
|  | output_stream: "SEGMENTATION_MASK:segmentation_mask" | ||||||
|  | 
 | ||||||
|  | # Debug outputs | ||||||
|  | output_stream: "POSE_ROI:pose_landmarks_roi" | ||||||
|  | output_stream: "POSE_DETECTION:pose_detection" | ||||||
|  | 
 | ||||||
|  | # Predicts pose landmarks. | ||||||
|  | node { | ||||||
|  |   calculator: "PoseLandmarkOnnxCUDA" | ||||||
|  |   input_stream: "IMAGE:image" | ||||||
|  |   input_side_packet: "MODEL_COMPLEXITY:model_complexity" | ||||||
|  |   input_side_packet: "SMOOTH_LANDMARKS:smooth_landmarks" | ||||||
|  |   input_side_packet: "ENABLE_SEGMENTATION:enable_segmentation" | ||||||
|  |   input_side_packet: "SMOOTH_SEGMENTATION:smooth_segmentation" | ||||||
|  |   input_side_packet: "USE_PREV_LANDMARKS:use_prev_landmarks" | ||||||
|  |   output_stream: "LANDMARKS:pose_landmarks" | ||||||
|  |   output_stream: "WORLD_LANDMARKS:pose_world_landmarks" | ||||||
|  |   output_stream: "SEGMENTATION_MASK:segmentation_mask" | ||||||
|  |   output_stream: "ROI_FROM_LANDMARKS:pose_landmarks_roi" | ||||||
|  |   output_stream: "DETECTION:pose_detection" | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | # Predicts left and right hand landmarks based on the initial pose landmarks. | ||||||
|  | node { | ||||||
|  |   calculator: "HandLandmarksLeftAndRightOnnxCUDA" | ||||||
|  |   input_stream: "IMAGE:image" | ||||||
|  |   input_stream: "POSE_LANDMARKS:pose_landmarks" | ||||||
|  |   output_stream: "LEFT_HAND_LANDMARKS:left_hand_landmarks" | ||||||
|  |   output_stream: "RIGHT_HAND_LANDMARKS:right_hand_landmarks" | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | # Extracts face-related pose landmarks. | ||||||
|  | node { | ||||||
|  |   calculator: "SplitNormalizedLandmarkListCalculator" | ||||||
|  |   input_stream: "pose_landmarks" | ||||||
|  |   output_stream: "face_landmarks_from_pose" | ||||||
|  |   options: { | ||||||
|  |     [mediapipe.SplitVectorCalculatorOptions.ext] { | ||||||
|  |       ranges: { begin: 0 end: 11 } | ||||||
|  |     } | ||||||
|  |   } | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | # Predicts face landmarks based on the initial pose landmarks. | ||||||
|  | node { | ||||||
|  |   calculator: "FaceLandmarksFromPoseOnnxCUDA" | ||||||
|  |   input_stream: "IMAGE:image" | ||||||
|  |   input_stream: "FACE_LANDMARKS_FROM_POSE:face_landmarks_from_pose" | ||||||
|  |   input_side_packet: "REFINE_LANDMARKS:refine_face_landmarks" | ||||||
|  |   output_stream: "FACE_LANDMARKS:face_landmarks" | ||||||
|  | } | ||||||
|  | @ -0,0 +1,146 @@ | ||||||
|  | # Predicts pose + left/right hand + face landmarks. | ||||||
|  | # | ||||||
|  | # It is required that: | ||||||
|  | # - "face_detection_short_range.onnx" is available at | ||||||
|  | # "mediapipe/modules/face_detection/face_detection_short_range.onnx" | ||||||
|  | # | ||||||
|  | # - "face_landmark.onnx" is available at | ||||||
|  | # "mediapipe/modules/face_landmark/face_landmark.onnx" | ||||||
|  | # | ||||||
|  | # - "hand_landmark_full.onnx" is available at | ||||||
|  | # "mediapipe/modules/hand_landmark/hand_landmark_full.onnx" | ||||||
|  | # | ||||||
|  | # - "hand_recrop.onnx" is available at | ||||||
|  | # "mediapipe/modules/holistic_landmark/hand_recrop.onnx" | ||||||
|  | # | ||||||
|  | # - "handedness.txt" is available at | ||||||
|  | # "mediapipe/modules/hand_landmark/handedness.txt" | ||||||
|  | # | ||||||
|  | # - "pose_detection.onnx" is available at | ||||||
|  | # "mediapipe/modules/pose_detection/pose_detection.onnx" | ||||||
|  | # | ||||||
|  | # - "pose_landmark_lite.onnx" or "pose_landmark_full.onnx" or | ||||||
|  | # "pose_landmark_heavy.onnx" is available at | ||||||
|  | # "mediapipe/modules/pose_landmark/pose_landmark_lite.onnx" or | ||||||
|  | # "mediapipe/modules/pose_landmark/pose_landmark_full.onnx" or | ||||||
|  | # "mediapipe/modules/pose_landmark/pose_landmark_heavy.onnx" | ||||||
|  | # path respectively during execution, depending on the specification in the | ||||||
|  | # MODEL_COMPLEXITY input side packet. | ||||||
|  | # | ||||||
|  | # EXAMPLE: | ||||||
|  | #   node { | ||||||
|  | #     calculator: "HolisticLandmarkOnnxTensorRT" | ||||||
|  | #     input_stream: "IMAGE:input_video" | ||||||
|  | #     input_side_packet: "MODEL_COMPLEXITY:model_complexity" | ||||||
|  | #     input_side_packet: "SMOOTH_LANDMARKS:smooth_landmarks" | ||||||
|  | #     input_side_packet: "ENABLE_SEGMENTATION:enable_segmentation" | ||||||
|  | #     input_side_packet: "SMOOTH_SEGMENTATION:smooth_segmentation" | ||||||
|  | #     input_side_packet: "REFINE_FACE_LANDMARKS:refine_face_landmarks" | ||||||
|  | #     input_side_packet: "USE_PREV_LANDMARKS:use_prev_landmarks" | ||||||
|  | #     output_stream: "POSE_LANDMARKS:pose_landmarks" | ||||||
|  | #     output_stream: "FACE_LANDMARKS:face_landmarks" | ||||||
|  | #     output_stream: "LEFT_HAND_LANDMARKS:left_hand_landmarks" | ||||||
|  | #     output_stream: "RIGHT_HAND_LANDMARKS:right_hand_landmarks" | ||||||
|  | #   } | ||||||
|  | # | ||||||
|  | # NOTE: if a pose/hand/face output is not present in the image, for this | ||||||
|  | # particular timestamp there will not be an output packet in the corresponding | ||||||
|  | # output stream below. However, the MediaPipe framework will internally inform | ||||||
|  | # the downstream calculators of the absence of this packet so that they don't | ||||||
|  | # wait for it unnecessarily. | ||||||
|  | 
 | ||||||
|  | type: "HolisticLandmarkOnnxTensorRT" | ||||||
|  | 
 | ||||||
|  | # CPU image. (ImageFrame) | ||||||
|  | input_stream: "IMAGE:image" | ||||||
|  | 
 | ||||||
|  | # Complexity of the pose landmark model: 0, 1 or 2. Landmark accuracy as well as | ||||||
|  | # inference latency generally go up with the model complexity. If unspecified, | ||||||
|  | # functions as set to 1. (int) | ||||||
|  | input_side_packet: "MODEL_COMPLEXITY:model_complexity" | ||||||
|  | 
 | ||||||
|  | # Whether to filter landmarks across different input images to reduce jitter. | ||||||
|  | # If unspecified, functions as set to true. (bool) | ||||||
|  | input_side_packet: "SMOOTH_LANDMARKS:smooth_landmarks" | ||||||
|  | 
 | ||||||
|  | # Whether to predict the segmentation mask. If unspecified, functions as set to | ||||||
|  | # false. (bool) | ||||||
|  | input_side_packet: "ENABLE_SEGMENTATION:enable_segmentation" | ||||||
|  | 
 | ||||||
|  | # Whether to filter segmentation mask across different input images to reduce | ||||||
|  | # jitter. If unspecified, functions as set to true. (bool) | ||||||
|  | input_side_packet: "SMOOTH_SEGMENTATION:smooth_segmentation" | ||||||
|  | 
 | ||||||
|  | # Whether to run the face landmark model with attention on lips and eyes to | ||||||
|  | # provide more accuracy, and additionally output iris landmarks. If unspecified, | ||||||
|  | # functions as set to false. (bool) | ||||||
|  | input_side_packet: "REFINE_FACE_LANDMARKS:refine_face_landmarks" | ||||||
|  | 
 | ||||||
|  | # Whether landmarks on the previous image should be used to help localize | ||||||
|  | # landmarks on the current image. (bool) | ||||||
|  | input_side_packet: "USE_PREV_LANDMARKS:use_prev_landmarks" | ||||||
|  | 
 | ||||||
|  | # Pose landmarks. (NormalizedLandmarkList) | ||||||
|  | # 33 pose landmarks. | ||||||
|  | output_stream: "POSE_LANDMARKS:pose_landmarks" | ||||||
|  | # 33 pose world landmarks. (LandmarkList) | ||||||
|  | output_stream: "WORLD_LANDMARKS:pose_world_landmarks" | ||||||
|  | # 21 left hand landmarks. (NormalizedLandmarkList) | ||||||
|  | output_stream: "LEFT_HAND_LANDMARKS:left_hand_landmarks" | ||||||
|  | # 21 right hand landmarks. (NormalizedLandmarkList) | ||||||
|  | output_stream: "RIGHT_HAND_LANDMARKS:right_hand_landmarks" | ||||||
|  | # 468 face landmarks. (NormalizedLandmarkList) | ||||||
|  | output_stream: "FACE_LANDMARKS:face_landmarks" | ||||||
|  | 
 | ||||||
|  | # Segmentation mask. (ImageFrame in ImageFormat::VEC32F1) | ||||||
|  | output_stream: "SEGMENTATION_MASK:segmentation_mask" | ||||||
|  | 
 | ||||||
|  | # Debug outputs | ||||||
|  | output_stream: "POSE_ROI:pose_landmarks_roi" | ||||||
|  | output_stream: "POSE_DETECTION:pose_detection" | ||||||
|  | 
 | ||||||
|  | # Predicts pose landmarks. | ||||||
|  | node { | ||||||
|  |   calculator: "PoseLandmarkOnnxTensorRT" | ||||||
|  |   input_stream: "IMAGE:image" | ||||||
|  |   input_side_packet: "MODEL_COMPLEXITY:model_complexity" | ||||||
|  |   input_side_packet: "SMOOTH_LANDMARKS:smooth_landmarks" | ||||||
|  |   input_side_packet: "ENABLE_SEGMENTATION:enable_segmentation" | ||||||
|  |   input_side_packet: "SMOOTH_SEGMENTATION:smooth_segmentation" | ||||||
|  |   input_side_packet: "USE_PREV_LANDMARKS:use_prev_landmarks" | ||||||
|  |   output_stream: "LANDMARKS:pose_landmarks" | ||||||
|  |   output_stream: "WORLD_LANDMARKS:pose_world_landmarks" | ||||||
|  |   output_stream: "SEGMENTATION_MASK:segmentation_mask" | ||||||
|  |   output_stream: "ROI_FROM_LANDMARKS:pose_landmarks_roi" | ||||||
|  |   output_stream: "DETECTION:pose_detection" | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | # Predicts left and right hand landmarks based on the initial pose landmarks. | ||||||
|  | node { | ||||||
|  |   calculator: "HandLandmarksLeftAndRightOnnxTensorRT" | ||||||
|  |   input_stream: "IMAGE:image" | ||||||
|  |   input_stream: "POSE_LANDMARKS:pose_landmarks" | ||||||
|  |   output_stream: "LEFT_HAND_LANDMARKS:left_hand_landmarks" | ||||||
|  |   output_stream: "RIGHT_HAND_LANDMARKS:right_hand_landmarks" | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | # Extracts face-related pose landmarks. | ||||||
|  | node { | ||||||
|  |   calculator: "SplitNormalizedLandmarkListCalculator" | ||||||
|  |   input_stream: "pose_landmarks" | ||||||
|  |   output_stream: "face_landmarks_from_pose" | ||||||
|  |   options: { | ||||||
|  |     [mediapipe.SplitVectorCalculatorOptions.ext] { | ||||||
|  |       ranges: { begin: 0 end: 11 } | ||||||
|  |     } | ||||||
|  |   } | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | # Predicts face landmarks based on the initial pose landmarks. | ||||||
|  | node { | ||||||
|  |   calculator: "FaceLandmarksFromPoseOnnxTensorRT" | ||||||
|  |   input_stream: "IMAGE:image" | ||||||
|  |   input_stream: "FACE_LANDMARKS_FROM_POSE:face_landmarks_from_pose" | ||||||
|  |   input_side_packet: "REFINE_LANDMARKS:refine_face_landmarks" | ||||||
|  |   output_stream: "FACE_LANDMARKS:face_landmarks" | ||||||
|  | } | ||||||
		Loading…
	
		Reference in New Issue
	
	Block a user