pose detection和landmark支持onnxruntime的cuda和tensorrt
This commit is contained in:
		
							parent
							
								
									f3bf3ab3e3
								
							
						
					
					
						commit
						008ed46ee0
					
				|  | @ -24,6 +24,46 @@ cc_binary( | ||||||
|     ], |     ], | ||||||
| ) | ) | ||||||
| 
 | 
 | ||||||
|  | cc_binary( | ||||||
|  |     name = "pose_tracking_cpu_fps", | ||||||
|  |     deps = [ | ||||||
|  |         "//mediapipe/examples/desktop:demo_run_graph_main_fps", | ||||||
|  |         "//mediapipe/graphs/pose_tracking:pose_tracking_cpu_deps", | ||||||
|  |     ], | ||||||
|  | ) | ||||||
|  | 
 | ||||||
|  | cc_binary( | ||||||
|  |     name = "pose_tracking_onnx_cuda", | ||||||
|  |     deps = [ | ||||||
|  |         "//mediapipe/examples/desktop:demo_run_graph_main", | ||||||
|  |         "//mediapipe/graphs/pose_tracking:pose_tracking_onnx_cuda_deps", | ||||||
|  |     ], | ||||||
|  | ) | ||||||
|  | 
 | ||||||
|  | cc_binary( | ||||||
|  |     name = "pose_tracking_onnx_cuda_fps", | ||||||
|  |     deps = [ | ||||||
|  |         "//mediapipe/examples/desktop:demo_run_graph_main_fps", | ||||||
|  |         "//mediapipe/graphs/pose_tracking:pose_tracking_onnx_cuda_deps", | ||||||
|  |     ], | ||||||
|  | ) | ||||||
|  | 
 | ||||||
|  | cc_binary( | ||||||
|  |     name = "pose_tracking_onnx_tensorrt", | ||||||
|  |     deps = [ | ||||||
|  |         "//mediapipe/examples/desktop:demo_run_graph_main", | ||||||
|  |         "//mediapipe/graphs/pose_tracking:pose_tracking_onnx_tensorrt_deps", | ||||||
|  |     ], | ||||||
|  | ) | ||||||
|  | 
 | ||||||
|  | cc_binary( | ||||||
|  |     name = "pose_tracking_onnx_tensorrt_fps", | ||||||
|  |     deps = [ | ||||||
|  |         "//mediapipe/examples/desktop:demo_run_graph_main_fps", | ||||||
|  |         "//mediapipe/graphs/pose_tracking:pose_tracking_onnx_tensorrt_deps", | ||||||
|  |     ], | ||||||
|  | ) | ||||||
|  | 
 | ||||||
| # Linux only | # Linux only | ||||||
| cc_binary( | cc_binary( | ||||||
|     name = "pose_tracking_gpu", |     name = "pose_tracking_gpu", | ||||||
|  |  | ||||||
|  | @ -54,3 +54,37 @@ mediapipe_binary_graph( | ||||||
|     output_name = "pose_tracking_cpu.binarypb", |     output_name = "pose_tracking_cpu.binarypb", | ||||||
|     deps = [":pose_tracking_cpu_deps"], |     deps = [":pose_tracking_cpu_deps"], | ||||||
| ) | ) | ||||||
|  | 
 | ||||||
|  | cc_library( | ||||||
|  |     name = "pose_tracking_onnx_cuda_deps", | ||||||
|  |     deps = [ | ||||||
|  |         "//mediapipe/calculators/core:constant_side_packet_calculator", | ||||||
|  |         "//mediapipe/calculators/core:flow_limiter_calculator", | ||||||
|  |         "//mediapipe/graphs/pose_tracking/subgraphs:pose_renderer_cpu", | ||||||
|  |         "//mediapipe/modules/pose_landmark:pose_landmark_onnx_cuda", | ||||||
|  |     ], | ||||||
|  | ) | ||||||
|  | 
 | ||||||
|  | mediapipe_binary_graph( | ||||||
|  |     name = "pose_tracking_onnx_cuda_binary_graph", | ||||||
|  |     graph = "pose_tracking_onnx_cuda.pbtxt", | ||||||
|  |     output_name = "pose_tracking_onnx_cuda.binarypb", | ||||||
|  |     deps = [":pose_tracking_onnx_cuda_deps"], | ||||||
|  | ) | ||||||
|  | 
 | ||||||
|  | cc_library( | ||||||
|  |     name = "pose_tracking_onnx_tensorrt_deps", | ||||||
|  |     deps = [ | ||||||
|  |         "//mediapipe/calculators/core:constant_side_packet_calculator", | ||||||
|  |         "//mediapipe/calculators/core:flow_limiter_calculator", | ||||||
|  |         "//mediapipe/graphs/pose_tracking/subgraphs:pose_renderer_cpu", | ||||||
|  |         "//mediapipe/modules/pose_landmark:pose_landmark_onnx_tensorrt", | ||||||
|  |     ], | ||||||
|  | ) | ||||||
|  | 
 | ||||||
|  | mediapipe_binary_graph( | ||||||
|  |     name = "pose_tracking_onnx_tensorrt_binary_graph", | ||||||
|  |     graph = "pose_tracking_onnx_tensorrt.pbtxt", | ||||||
|  |     output_name = "pose_tracking_onnx_tensorrt.binarypb", | ||||||
|  |     deps = [":pose_tracking_onnx_tensorrt"], | ||||||
|  | ) | ||||||
|  |  | ||||||
|  | @ -14,7 +14,7 @@ node { | ||||||
|   output_side_packet: "PACKET:enable_segmentation" |   output_side_packet: "PACKET:enable_segmentation" | ||||||
|   node_options: { |   node_options: { | ||||||
|     [type.googleapis.com/mediapipe.ConstantSidePacketCalculatorOptions]: { |     [type.googleapis.com/mediapipe.ConstantSidePacketCalculatorOptions]: { | ||||||
|       packet { bool_value: true } |       packet { bool_value: false } | ||||||
|     } |     } | ||||||
|   } |   } | ||||||
| } | } | ||||||
|  |  | ||||||
							
								
								
									
										63
									
								
								mediapipe/graphs/pose_tracking/pose_tracking_onnx_cuda.pbtxt
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										63
									
								
								mediapipe/graphs/pose_tracking/pose_tracking_onnx_cuda.pbtxt
									
									
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,63 @@ | ||||||
|  | # MediaPipe graph that performs pose tracking with onnxruntime on cuda. | ||||||
|  | 
 | ||||||
|  | # CPU buffer. (ImageFrame) | ||||||
|  | input_stream: "input_video" | ||||||
|  | 
 | ||||||
|  | # Output image with rendered results. (ImageFrame) | ||||||
|  | output_stream: "output_video" | ||||||
|  | # Pose landmarks. (NormalizedLandmarkList) | ||||||
|  | output_stream: "pose_landmarks" | ||||||
|  | 
 | ||||||
|  | # Generates side packet to enable segmentation. | ||||||
|  | node { | ||||||
|  |   calculator: "ConstantSidePacketCalculator" | ||||||
|  |   output_side_packet: "PACKET:enable_segmentation" | ||||||
|  |   node_options: { | ||||||
|  |     [type.googleapis.com/mediapipe.ConstantSidePacketCalculatorOptions]: { | ||||||
|  |       packet { bool_value: false } | ||||||
|  |     } | ||||||
|  |   } | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | # Throttles the images flowing downstream for flow control. It passes through | ||||||
|  | # the very first incoming image unaltered, and waits for downstream nodes | ||||||
|  | # (calculators and subgraphs) in the graph to finish their tasks before it | ||||||
|  | # passes through another image. All images that come in while waiting are | ||||||
|  | # dropped, limiting the number of in-flight images in most part of the graph to | ||||||
|  | # 1. This prevents the downstream nodes from queuing up incoming images and data | ||||||
|  | # excessively, which leads to increased latency and memory usage, unwanted in | ||||||
|  | # real-time mobile applications. It also eliminates unnecessarily computation, | ||||||
|  | # e.g., the output produced by a node may get dropped downstream if the | ||||||
|  | # subsequent nodes are still busy processing previous inputs. | ||||||
|  | node { | ||||||
|  |   calculator: "FlowLimiterCalculator" | ||||||
|  |   input_stream: "input_video" | ||||||
|  |   input_stream: "FINISHED:output_video" | ||||||
|  |   input_stream_info: { | ||||||
|  |     tag_index: "FINISHED" | ||||||
|  |     back_edge: true | ||||||
|  |   } | ||||||
|  |   output_stream: "throttled_input_video" | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | # Subgraph that detects poses and corresponding landmarks. | ||||||
|  | node { | ||||||
|  |   calculator: "PoseLandmarkOnnxCUDA" | ||||||
|  |   input_side_packet: "ENABLE_SEGMENTATION:enable_segmentation" | ||||||
|  |   input_stream: "IMAGE:throttled_input_video" | ||||||
|  |   output_stream: "LANDMARKS:pose_landmarks" | ||||||
|  |   output_stream: "SEGMENTATION_MASK:segmentation_mask" | ||||||
|  |   output_stream: "DETECTION:pose_detection" | ||||||
|  |   output_stream: "ROI_FROM_LANDMARKS:roi_from_landmarks" | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | # Subgraph that renders pose-landmark annotation onto the input image. | ||||||
|  | node { | ||||||
|  |   calculator: "PoseRendererCpu" | ||||||
|  |   input_stream: "IMAGE:throttled_input_video" | ||||||
|  |   input_stream: "LANDMARKS:pose_landmarks" | ||||||
|  |   input_stream: "SEGMENTATION_MASK:segmentation_mask" | ||||||
|  |   input_stream: "DETECTION:pose_detection" | ||||||
|  |   input_stream: "ROI:roi_from_landmarks" | ||||||
|  |   output_stream: "IMAGE:output_video" | ||||||
|  | } | ||||||
|  | @ -0,0 +1,63 @@ | ||||||
|  | # MediaPipe graph that performs pose tracking with onnxruntime on tensorrt. | ||||||
|  | 
 | ||||||
|  | # CPU buffer. (ImageFrame) | ||||||
|  | input_stream: "input_video" | ||||||
|  | 
 | ||||||
|  | # Output image with rendered results. (ImageFrame) | ||||||
|  | output_stream: "output_video" | ||||||
|  | # Pose landmarks. (NormalizedLandmarkList) | ||||||
|  | output_stream: "pose_landmarks" | ||||||
|  | 
 | ||||||
|  | # Generates side packet to enable segmentation. | ||||||
|  | node { | ||||||
|  |   calculator: "ConstantSidePacketCalculator" | ||||||
|  |   output_side_packet: "PACKET:enable_segmentation" | ||||||
|  |   node_options: { | ||||||
|  |     [type.googleapis.com/mediapipe.ConstantSidePacketCalculatorOptions]: { | ||||||
|  |       packet { bool_value: false } | ||||||
|  |     } | ||||||
|  |   } | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | # Throttles the images flowing downstream for flow control. It passes through | ||||||
|  | # the very first incoming image unaltered, and waits for downstream nodes | ||||||
|  | # (calculators and subgraphs) in the graph to finish their tasks before it | ||||||
|  | # passes through another image. All images that come in while waiting are | ||||||
|  | # dropped, limiting the number of in-flight images in most part of the graph to | ||||||
|  | # 1. This prevents the downstream nodes from queuing up incoming images and data | ||||||
|  | # excessively, which leads to increased latency and memory usage, unwanted in | ||||||
|  | # real-time mobile applications. It also eliminates unnecessarily computation, | ||||||
|  | # e.g., the output produced by a node may get dropped downstream if the | ||||||
|  | # subsequent nodes are still busy processing previous inputs. | ||||||
|  | node { | ||||||
|  |   calculator: "FlowLimiterCalculator" | ||||||
|  |   input_stream: "input_video" | ||||||
|  |   input_stream: "FINISHED:output_video" | ||||||
|  |   input_stream_info: { | ||||||
|  |     tag_index: "FINISHED" | ||||||
|  |     back_edge: true | ||||||
|  |   } | ||||||
|  |   output_stream: "throttled_input_video" | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | # Subgraph that detects poses and corresponding landmarks. | ||||||
|  | node { | ||||||
|  |   calculator: "PoseLandmarkOnnxTensorRT" | ||||||
|  |   input_side_packet: "ENABLE_SEGMENTATION:enable_segmentation" | ||||||
|  |   input_stream: "IMAGE:throttled_input_video" | ||||||
|  |   output_stream: "LANDMARKS:pose_landmarks" | ||||||
|  |   output_stream: "SEGMENTATION_MASK:segmentation_mask" | ||||||
|  |   output_stream: "DETECTION:pose_detection" | ||||||
|  |   output_stream: "ROI_FROM_LANDMARKS:roi_from_landmarks" | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | # Subgraph that renders pose-landmark annotation onto the input image. | ||||||
|  | node { | ||||||
|  |   calculator: "PoseRendererCpu" | ||||||
|  |   input_stream: "IMAGE:throttled_input_video" | ||||||
|  |   input_stream: "LANDMARKS:pose_landmarks" | ||||||
|  |   input_stream: "SEGMENTATION_MASK:segmentation_mask" | ||||||
|  |   input_stream: "DETECTION:pose_detection" | ||||||
|  |   input_stream: "ROI:roi_from_landmarks" | ||||||
|  |   output_stream: "IMAGE:output_video" | ||||||
|  | } | ||||||
|  | @ -35,6 +35,34 @@ mediapipe_simple_subgraph( | ||||||
|     ], |     ], | ||||||
| ) | ) | ||||||
| 
 | 
 | ||||||
|  | mediapipe_simple_subgraph( | ||||||
|  |     name = "pose_detection_onnx_cuda", | ||||||
|  |     graph = "pose_detection_onnx_cuda.pbtxt", | ||||||
|  |     register_as = "PoseDetectionOnnxCUDA", | ||||||
|  |     deps = [ | ||||||
|  |         "//mediapipe/calculators/tensor:image_to_tensor_calculator", | ||||||
|  |         "//mediapipe/calculators/tensor:inference_calculator_onnx_cuda", | ||||||
|  |         "//mediapipe/calculators/tensor:tensors_to_detections_calculator", | ||||||
|  |         "//mediapipe/calculators/tflite:ssd_anchors_calculator", | ||||||
|  |         "//mediapipe/calculators/util:detection_letterbox_removal_calculator", | ||||||
|  |         "//mediapipe/calculators/util:non_max_suppression_calculator", | ||||||
|  |     ], | ||||||
|  | ) | ||||||
|  | 
 | ||||||
|  | mediapipe_simple_subgraph( | ||||||
|  |     name = "pose_detection_onnx_tensorrt", | ||||||
|  |     graph = "pose_detection_onnx_tensorrt.pbtxt", | ||||||
|  |     register_as = "PoseDetectionOnnxTensorRT", | ||||||
|  |     deps = [ | ||||||
|  |         "//mediapipe/calculators/tensor:image_to_tensor_calculator", | ||||||
|  |         "//mediapipe/calculators/tensor:inference_calculator_onnx_tensorrt", | ||||||
|  |         "//mediapipe/calculators/tensor:tensors_to_detections_calculator", | ||||||
|  |         "//mediapipe/calculators/tflite:ssd_anchors_calculator", | ||||||
|  |         "//mediapipe/calculators/util:detection_letterbox_removal_calculator", | ||||||
|  |         "//mediapipe/calculators/util:non_max_suppression_calculator", | ||||||
|  |     ], | ||||||
|  | ) | ||||||
|  | 
 | ||||||
| mediapipe_simple_subgraph( | mediapipe_simple_subgraph( | ||||||
|     name = "pose_detection_gpu", |     name = "pose_detection_gpu", | ||||||
|     graph = "pose_detection_gpu.pbtxt", |     graph = "pose_detection_gpu.pbtxt", | ||||||
|  |  | ||||||
							
								
								
									
										157
									
								
								mediapipe/modules/pose_detection/pose_detection_onnx_cuda.pbtxt
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										157
									
								
								mediapipe/modules/pose_detection/pose_detection_onnx_cuda.pbtxt
									
									
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,157 @@ | ||||||
|  | # MediaPipe graph to detect poses. (CPU input, and inference is executed with onnxruntime on | ||||||
|  | # cuda.) | ||||||
|  | # | ||||||
|  | # It is required that "pose_detection.onnx" is available at | ||||||
|  | # "mediapipe/modules/pose_detection/pose_detection.onnx" | ||||||
|  | # path during execution. | ||||||
|  | # | ||||||
|  | # EXAMPLE: | ||||||
|  | #   node { | ||||||
|  | #     calculator: "PoseDetectionOnnxCUDA" | ||||||
|  | #     input_stream: "IMAGE:image" | ||||||
|  | #     output_stream: "DETECTIONS:pose_detections" | ||||||
|  | #   } | ||||||
|  | 
 | ||||||
|  | type: "PoseDetectionOnnxCUDA" | ||||||
|  | 
 | ||||||
|  | # CPU image. (ImageFrame) | ||||||
|  | input_stream: "IMAGE:image" | ||||||
|  | 
 | ||||||
|  | # Detected poses. (std::vector<Detection>) | ||||||
|  | # Bounding box in each pose detection is currently set to the bounding box of | ||||||
|  | # the detected face. However, 4 additional key points are available in each | ||||||
|  | # detection, which are used to further calculate a (rotated) bounding box that | ||||||
|  | # encloses the body region of interest. Among the 4 key points, the first two | ||||||
|  | # are for identifying the full-body region, and the second two for upper body | ||||||
|  | # only: | ||||||
|  | # | ||||||
|  | # Key point 0 - mid hip center | ||||||
|  | # Key point 1 - point that encodes size & rotation (for full body) | ||||||
|  | # Key point 2 - mid shoulder center | ||||||
|  | # Key point 3 - point that encodes size & rotation (for upper body) | ||||||
|  | # | ||||||
|  | # NOTE: there will not be an output packet in the DETECTIONS stream for this | ||||||
|  | # particular timestamp if none of poses detected. However, the MediaPipe | ||||||
|  | # framework will internally inform the downstream calculators of the absence of | ||||||
|  | # this packet so that they don't wait for it unnecessarily. | ||||||
|  | output_stream: "DETECTIONS:detections" | ||||||
|  | 
 | ||||||
|  | # Transforms the input image into a 224x224 one while keeping the aspect ratio | ||||||
|  | # (what is expected by the corresponding model), resulting in potential | ||||||
|  | # letterboxing in the transformed image. | ||||||
|  | node: { | ||||||
|  |   calculator: "ImageToTensorCalculator" | ||||||
|  |   input_stream: "IMAGE:image" | ||||||
|  |   output_stream: "TENSORS:input_tensors" | ||||||
|  |   output_stream: "LETTERBOX_PADDING:letterbox_padding" | ||||||
|  |   options: { | ||||||
|  |     [mediapipe.ImageToTensorCalculatorOptions.ext] { | ||||||
|  |       output_tensor_width: 224 | ||||||
|  |       output_tensor_height: 224 | ||||||
|  |       keep_aspect_ratio: true | ||||||
|  |       output_tensor_float_range { | ||||||
|  |         min: -1.0 | ||||||
|  |         max: 1.0 | ||||||
|  |       } | ||||||
|  |       border_mode: BORDER_ZERO | ||||||
|  |       # If this calculator truly operates in the CPU, then gpu_origin is | ||||||
|  |       # ignored, but if some build switch insists on GPU inference, then we will | ||||||
|  |       # still need to set this. | ||||||
|  |       gpu_origin: TOP_LEFT | ||||||
|  |     } | ||||||
|  |   } | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | # Runs a TensorFlow Lite model on CPU that takes an image tensor and outputs a | ||||||
|  | # vector of tensors representing, for instance, detection boxes/keypoints and | ||||||
|  | # scores. | ||||||
|  | node { | ||||||
|  |   calculator: "InferenceCalculator" | ||||||
|  |   input_stream: "TENSORS:input_tensors" | ||||||
|  |   output_stream: "TENSORS:detection_tensors" | ||||||
|  |   options: { | ||||||
|  |     [mediapipe.InferenceCalculatorOptions.ext] { | ||||||
|  |       model_path: "mediapipe/modules/pose_detection/pose_detection.onnx" | ||||||
|  |       delegate { cuda {} } | ||||||
|  |     } | ||||||
|  |   } | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | # Generates a single side packet containing a vector of SSD anchors based on | ||||||
|  | # the specification in the options. | ||||||
|  | node { | ||||||
|  |   calculator: "SsdAnchorsCalculator" | ||||||
|  |   output_side_packet: "anchors" | ||||||
|  |   options: { | ||||||
|  |     [mediapipe.SsdAnchorsCalculatorOptions.ext] { | ||||||
|  |       num_layers: 5 | ||||||
|  |       min_scale: 0.1484375 | ||||||
|  |       max_scale: 0.75 | ||||||
|  |       input_size_height: 224 | ||||||
|  |       input_size_width: 224 | ||||||
|  |       anchor_offset_x: 0.5 | ||||||
|  |       anchor_offset_y: 0.5 | ||||||
|  |       strides: 8 | ||||||
|  |       strides: 16 | ||||||
|  |       strides: 32 | ||||||
|  |       strides: 32 | ||||||
|  |       strides: 32 | ||||||
|  |       aspect_ratios: 1.0 | ||||||
|  |       fixed_anchor_size: true | ||||||
|  |     } | ||||||
|  |   } | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | # Decodes the detection tensors generated by the TensorFlow Lite model, based on | ||||||
|  | # the SSD anchors and the specification in the options, into a vector of | ||||||
|  | # detections. Each detection describes a detected object. | ||||||
|  | node { | ||||||
|  |   calculator: "TensorsToDetectionsCalculator" | ||||||
|  |   input_stream: "TENSORS:detection_tensors" | ||||||
|  |   input_side_packet: "ANCHORS:anchors" | ||||||
|  |   output_stream: "DETECTIONS:unfiltered_detections" | ||||||
|  |   options: { | ||||||
|  |     [mediapipe.TensorsToDetectionsCalculatorOptions.ext] { | ||||||
|  |       num_classes: 1 | ||||||
|  |       num_boxes: 2254 | ||||||
|  |       num_coords: 12 | ||||||
|  |       box_coord_offset: 0 | ||||||
|  |       keypoint_coord_offset: 4 | ||||||
|  |       num_keypoints: 4 | ||||||
|  |       num_values_per_keypoint: 2 | ||||||
|  |       sigmoid_score: true | ||||||
|  |       score_clipping_thresh: 100.0 | ||||||
|  |       reverse_output_order: true | ||||||
|  |       x_scale: 224.0 | ||||||
|  |       y_scale: 224.0 | ||||||
|  |       h_scale: 224.0 | ||||||
|  |       w_scale: 224.0 | ||||||
|  |       min_score_thresh: 0.5 | ||||||
|  |     } | ||||||
|  |   } | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | # Performs non-max suppression to remove excessive detections. | ||||||
|  | node { | ||||||
|  |   calculator: "NonMaxSuppressionCalculator" | ||||||
|  |   input_stream: "unfiltered_detections" | ||||||
|  |   output_stream: "filtered_detections" | ||||||
|  |   options: { | ||||||
|  |     [mediapipe.NonMaxSuppressionCalculatorOptions.ext] { | ||||||
|  |       min_suppression_threshold: 0.3 | ||||||
|  |       overlap_type: INTERSECTION_OVER_UNION | ||||||
|  |       algorithm: WEIGHTED | ||||||
|  |     } | ||||||
|  |   } | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | # Adjusts detection locations (already normalized to [0.f, 1.f]) on the | ||||||
|  | # letterboxed image (after image transformation with the FIT scale mode) to the | ||||||
|  | # corresponding locations on the same image with the letterbox removed (the | ||||||
|  | # input image to the graph before image transformation). | ||||||
|  | node { | ||||||
|  |   calculator: "DetectionLetterboxRemovalCalculator" | ||||||
|  |   input_stream: "DETECTIONS:filtered_detections" | ||||||
|  |   input_stream: "LETTERBOX_PADDING:letterbox_padding" | ||||||
|  |   output_stream: "DETECTIONS:detections" | ||||||
|  | } | ||||||
|  | @ -0,0 +1,157 @@ | ||||||
|  | # MediaPipe graph to detect poses. (CPU input, and inference is executed with onnxruntime on | ||||||
|  | # tensorrt.) | ||||||
|  | # | ||||||
|  | # It is required that "pose_detection.onnx" is available at | ||||||
|  | # "mediapipe/modules/pose_detection/pose_detection.onnx" | ||||||
|  | # path during execution. | ||||||
|  | # | ||||||
|  | # EXAMPLE: | ||||||
|  | #   node { | ||||||
|  | #     calculator: "PoseDetectionOnnxTensorRT" | ||||||
|  | #     input_stream: "IMAGE:image" | ||||||
|  | #     output_stream: "DETECTIONS:pose_detections" | ||||||
|  | #   } | ||||||
|  | 
 | ||||||
|  | type: "PoseDetectionOnnxTensorRT" | ||||||
|  | 
 | ||||||
|  | # CPU image. (ImageFrame) | ||||||
|  | input_stream: "IMAGE:image" | ||||||
|  | 
 | ||||||
|  | # Detected poses. (std::vector<Detection>) | ||||||
|  | # Bounding box in each pose detection is currently set to the bounding box of | ||||||
|  | # the detected face. However, 4 additional key points are available in each | ||||||
|  | # detection, which are used to further calculate a (rotated) bounding box that | ||||||
|  | # encloses the body region of interest. Among the 4 key points, the first two | ||||||
|  | # are for identifying the full-body region, and the second two for upper body | ||||||
|  | # only: | ||||||
|  | # | ||||||
|  | # Key point 0 - mid hip center | ||||||
|  | # Key point 1 - point that encodes size & rotation (for full body) | ||||||
|  | # Key point 2 - mid shoulder center | ||||||
|  | # Key point 3 - point that encodes size & rotation (for upper body) | ||||||
|  | # | ||||||
|  | # NOTE: there will not be an output packet in the DETECTIONS stream for this | ||||||
|  | # particular timestamp if none of poses detected. However, the MediaPipe | ||||||
|  | # framework will internally inform the downstream calculators of the absence of | ||||||
|  | # this packet so that they don't wait for it unnecessarily. | ||||||
|  | output_stream: "DETECTIONS:detections" | ||||||
|  | 
 | ||||||
|  | # Transforms the input image into a 224x224 one while keeping the aspect ratio | ||||||
|  | # (what is expected by the corresponding model), resulting in potential | ||||||
|  | # letterboxing in the transformed image. | ||||||
|  | node: { | ||||||
|  |   calculator: "ImageToTensorCalculator" | ||||||
|  |   input_stream: "IMAGE:image" | ||||||
|  |   output_stream: "TENSORS:input_tensors" | ||||||
|  |   output_stream: "LETTERBOX_PADDING:letterbox_padding" | ||||||
|  |   options: { | ||||||
|  |     [mediapipe.ImageToTensorCalculatorOptions.ext] { | ||||||
|  |       output_tensor_width: 224 | ||||||
|  |       output_tensor_height: 224 | ||||||
|  |       keep_aspect_ratio: true | ||||||
|  |       output_tensor_float_range { | ||||||
|  |         min: -1.0 | ||||||
|  |         max: 1.0 | ||||||
|  |       } | ||||||
|  |       border_mode: BORDER_ZERO | ||||||
|  |       # If this calculator truly operates in the CPU, then gpu_origin is | ||||||
|  |       # ignored, but if some build switch insists on GPU inference, then we will | ||||||
|  |       # still need to set this. | ||||||
|  |       gpu_origin: TOP_LEFT | ||||||
|  |     } | ||||||
|  |   } | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | # Runs a TensorFlow Lite model on CPU that takes an image tensor and outputs a | ||||||
|  | # vector of tensors representing, for instance, detection boxes/keypoints and | ||||||
|  | # scores. | ||||||
|  | node { | ||||||
|  |   calculator: "InferenceCalculator" | ||||||
|  |   input_stream: "TENSORS:input_tensors" | ||||||
|  |   output_stream: "TENSORS:detection_tensors" | ||||||
|  |   options: { | ||||||
|  |     [mediapipe.InferenceCalculatorOptions.ext] { | ||||||
|  |       model_path: "mediapipe/modules/pose_detection/pose_detection.onnx" | ||||||
|  |       delegate { tensorrt {} } | ||||||
|  |     } | ||||||
|  |   } | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | # Generates a single side packet containing a vector of SSD anchors based on | ||||||
|  | # the specification in the options. | ||||||
|  | node { | ||||||
|  |   calculator: "SsdAnchorsCalculator" | ||||||
|  |   output_side_packet: "anchors" | ||||||
|  |   options: { | ||||||
|  |     [mediapipe.SsdAnchorsCalculatorOptions.ext] { | ||||||
|  |       num_layers: 5 | ||||||
|  |       min_scale: 0.1484375 | ||||||
|  |       max_scale: 0.75 | ||||||
|  |       input_size_height: 224 | ||||||
|  |       input_size_width: 224 | ||||||
|  |       anchor_offset_x: 0.5 | ||||||
|  |       anchor_offset_y: 0.5 | ||||||
|  |       strides: 8 | ||||||
|  |       strides: 16 | ||||||
|  |       strides: 32 | ||||||
|  |       strides: 32 | ||||||
|  |       strides: 32 | ||||||
|  |       aspect_ratios: 1.0 | ||||||
|  |       fixed_anchor_size: true | ||||||
|  |     } | ||||||
|  |   } | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | # Decodes the detection tensors generated by the TensorFlow Lite model, based on | ||||||
|  | # the SSD anchors and the specification in the options, into a vector of | ||||||
|  | # detections. Each detection describes a detected object. | ||||||
|  | node { | ||||||
|  |   calculator: "TensorsToDetectionsCalculator" | ||||||
|  |   input_stream: "TENSORS:detection_tensors" | ||||||
|  |   input_side_packet: "ANCHORS:anchors" | ||||||
|  |   output_stream: "DETECTIONS:unfiltered_detections" | ||||||
|  |   options: { | ||||||
|  |     [mediapipe.TensorsToDetectionsCalculatorOptions.ext] { | ||||||
|  |       num_classes: 1 | ||||||
|  |       num_boxes: 2254 | ||||||
|  |       num_coords: 12 | ||||||
|  |       box_coord_offset: 0 | ||||||
|  |       keypoint_coord_offset: 4 | ||||||
|  |       num_keypoints: 4 | ||||||
|  |       num_values_per_keypoint: 2 | ||||||
|  |       sigmoid_score: true | ||||||
|  |       score_clipping_thresh: 100.0 | ||||||
|  |       reverse_output_order: true | ||||||
|  |       x_scale: 224.0 | ||||||
|  |       y_scale: 224.0 | ||||||
|  |       h_scale: 224.0 | ||||||
|  |       w_scale: 224.0 | ||||||
|  |       min_score_thresh: 0.5 | ||||||
|  |     } | ||||||
|  |   } | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | # Performs non-max suppression to remove excessive detections. | ||||||
|  | node { | ||||||
|  |   calculator: "NonMaxSuppressionCalculator" | ||||||
|  |   input_stream: "unfiltered_detections" | ||||||
|  |   output_stream: "filtered_detections" | ||||||
|  |   options: { | ||||||
|  |     [mediapipe.NonMaxSuppressionCalculatorOptions.ext] { | ||||||
|  |       min_suppression_threshold: 0.3 | ||||||
|  |       overlap_type: INTERSECTION_OVER_UNION | ||||||
|  |       algorithm: WEIGHTED | ||||||
|  |     } | ||||||
|  |   } | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | # Adjusts detection locations (already normalized to [0.f, 1.f]) on the | ||||||
|  | # letterboxed image (after image transformation with the FIT scale mode) to the | ||||||
|  | # corresponding locations on the same image with the letterbox removed (the | ||||||
|  | # input image to the graph before image transformation). | ||||||
|  | node { | ||||||
|  |   calculator: "DetectionLetterboxRemovalCalculator" | ||||||
|  |   input_stream: "DETECTIONS:filtered_detections" | ||||||
|  |   input_stream: "LETTERBOX_PADDING:letterbox_padding" | ||||||
|  |   output_stream: "DETECTIONS:detections" | ||||||
|  | } | ||||||
|  | @ -61,6 +61,35 @@ mediapipe_simple_subgraph( | ||||||
|     ], |     ], | ||||||
| ) | ) | ||||||
| 
 | 
 | ||||||
|  | 
 | ||||||
|  | mediapipe_simple_subgraph( | ||||||
|  |     name = "pose_landmark_by_roi_onnx_cuda", | ||||||
|  |     graph = "pose_landmark_by_roi_onnx_cuda.pbtxt", | ||||||
|  |     register_as = "PoseLandmarkByRoiOnnxCUDA", | ||||||
|  |     deps = [ | ||||||
|  |         ":pose_landmark_model_loader", | ||||||
|  |         ":pose_landmarks_and_segmentation_inverse_projection", | ||||||
|  |         ":tensors_to_pose_landmarks_and_segmentation", | ||||||
|  |         "//mediapipe/calculators/image:image_properties_calculator", | ||||||
|  |         "//mediapipe/calculators/tensor:image_to_tensor_calculator", | ||||||
|  |         "//mediapipe/calculators/tensor:inference_calculator_onnx_cuda", | ||||||
|  |     ], | ||||||
|  | ) | ||||||
|  | 
 | ||||||
|  | mediapipe_simple_subgraph( | ||||||
|  |     name = "pose_landmark_by_roi_onnx_tensorrt", | ||||||
|  |     graph = "pose_landmark_by_roi_onnx_tensorrt.pbtxt", | ||||||
|  |     register_as = "PoseLandmarkByRoiOnnxTensorRT", | ||||||
|  |     deps = [ | ||||||
|  |         ":pose_landmark_model_loader", | ||||||
|  |         ":pose_landmarks_and_segmentation_inverse_projection", | ||||||
|  |         ":tensors_to_pose_landmarks_and_segmentation", | ||||||
|  |         "//mediapipe/calculators/image:image_properties_calculator", | ||||||
|  |         "//mediapipe/calculators/tensor:image_to_tensor_calculator", | ||||||
|  |         "//mediapipe/calculators/tensor:inference_calculator_onnx_tensorrt", | ||||||
|  |     ], | ||||||
|  | ) | ||||||
|  | 
 | ||||||
| mediapipe_simple_subgraph( | mediapipe_simple_subgraph( | ||||||
|     name = "tensors_to_pose_landmarks_and_segmentation", |     name = "tensors_to_pose_landmarks_and_segmentation", | ||||||
|     graph = "tensors_to_pose_landmarks_and_segmentation.pbtxt", |     graph = "tensors_to_pose_landmarks_and_segmentation.pbtxt", | ||||||
|  | @ -159,10 +188,57 @@ mediapipe_simple_subgraph( | ||||||
|     ], |     ], | ||||||
| ) | ) | ||||||
| 
 | 
 | ||||||
|  | mediapipe_simple_subgraph( | ||||||
|  |     name = "pose_landmark_onnx_cuda", | ||||||
|  |     graph = "pose_landmark_onnx_cuda.pbtxt", | ||||||
|  |     register_as = "PoseLandmarkOnnxCUDA", | ||||||
|  |     deps = [ | ||||||
|  |         ":pose_detection_to_roi", | ||||||
|  |         ":pose_landmark_by_roi_onnx_cuda", | ||||||
|  |         ":pose_landmark_filtering", | ||||||
|  |         ":pose_landmarks_to_roi", | ||||||
|  |         ":pose_segmentation_filtering", | ||||||
|  |         "//mediapipe/calculators/core:constant_side_packet_calculator", | ||||||
|  |         "//mediapipe/calculators/core:gate_calculator", | ||||||
|  |         "//mediapipe/calculators/core:merge_calculator", | ||||||
|  |         "//mediapipe/calculators/core:packet_presence_calculator", | ||||||
|  |         "//mediapipe/calculators/core:previous_loopback_calculator", | ||||||
|  |         "//mediapipe/calculators/core:split_vector_calculator", | ||||||
|  |         "//mediapipe/calculators/image:image_properties_calculator", | ||||||
|  |         "//mediapipe/calculators/util:from_image_calculator", | ||||||
|  |         "//mediapipe/modules/pose_detection:pose_detection_onnx_cuda", | ||||||
|  |     ], | ||||||
|  | ) | ||||||
|  | 
 | ||||||
|  | mediapipe_simple_subgraph( | ||||||
|  |     name = "pose_landmark_onnx_tensorrt", | ||||||
|  |     graph = "pose_landmark_onnx_tensorrt.pbtxt", | ||||||
|  |     register_as = "PoseLandmarkOnnxTensorRT", | ||||||
|  |     deps = [ | ||||||
|  |         ":pose_detection_to_roi", | ||||||
|  |         ":pose_landmark_by_roi_onnx_tensorrt", | ||||||
|  |         ":pose_landmark_filtering", | ||||||
|  |         ":pose_landmarks_to_roi", | ||||||
|  |         ":pose_segmentation_filtering", | ||||||
|  |         "//mediapipe/calculators/core:constant_side_packet_calculator", | ||||||
|  |         "//mediapipe/calculators/core:gate_calculator", | ||||||
|  |         "//mediapipe/calculators/core:merge_calculator", | ||||||
|  |         "//mediapipe/calculators/core:packet_presence_calculator", | ||||||
|  |         "//mediapipe/calculators/core:previous_loopback_calculator", | ||||||
|  |         "//mediapipe/calculators/core:split_vector_calculator", | ||||||
|  |         "//mediapipe/calculators/image:image_properties_calculator", | ||||||
|  |         "//mediapipe/calculators/util:from_image_calculator", | ||||||
|  |         "//mediapipe/modules/pose_detection:pose_detection_onnx_tensorrt", | ||||||
|  |     ], | ||||||
|  | ) | ||||||
|  | 
 | ||||||
| exports_files( | exports_files( | ||||||
|     srcs = [ |     srcs = [ | ||||||
|  |         "pose_landmark_full.onnx", | ||||||
|         "pose_landmark_full.tflite", |         "pose_landmark_full.tflite", | ||||||
|  |         "pose_landmark_heavy.onnx", | ||||||
|         "pose_landmark_heavy.tflite", |         "pose_landmark_heavy.tflite", | ||||||
|  |         "pose_landmark_lite.onnx", | ||||||
|         "pose_landmark_lite.tflite", |         "pose_landmark_lite.tflite", | ||||||
|     ], |     ], | ||||||
| ) | ) | ||||||
|  |  | ||||||
|  | @ -0,0 +1,165 @@ | ||||||
|  | # MediaPipe graph to detect/predict pose landmarks and optionally segmentation | ||||||
|  | # within an ROI. (CPU input, and inference is executed on CPU.) | ||||||
|  | # | ||||||
|  | # It is required that "pose_landmark_lite.onnx" or | ||||||
|  | # "pose_landmark_full.onnx" or "pose_landmark_heavy.onnx" is available at | ||||||
|  | # "mediapipe/modules/pose_landmark/pose_landmark_lite.onnx" or | ||||||
|  | # "mediapipe/modules/pose_landmark/pose_landmark_full.onnx" or | ||||||
|  | # "mediapipe/modules/pose_landmark/pose_landmark_heavy.onnx" | ||||||
|  | # path respectively during execution, depending on the specification in the | ||||||
|  | # MODEL_COMPLEXITY input side packet. | ||||||
|  | # | ||||||
|  | # EXAMPLE: | ||||||
|  | #   node { | ||||||
|  | #     calculator: "PoseLandmarkByRoiOnnxCUDA" | ||||||
|  | #     input_side_packet: "MODEL_COMPLEXITY:model_complexity" | ||||||
|  | #     input_side_packet: "ENABLE_SEGMENTATION:enable_segmentation" | ||||||
|  | #     input_stream: "IMAGE:image" | ||||||
|  | #     input_stream: "ROI:roi" | ||||||
|  | #     output_stream: "LANDMARKS:landmarks" | ||||||
|  | #     output_stream: "SEGMENTATION_MASK:segmentation_mask" | ||||||
|  | #   } | ||||||
|  | 
 | ||||||
|  | type: "PoseLandmarkByRoiOnnxCUDA" | ||||||
|  | 
 | ||||||
|  | # CPU image. (ImageFrame) | ||||||
|  | input_stream: "IMAGE:image" | ||||||
|  | # ROI (region of interest) within the given image where a pose is located. | ||||||
|  | # (NormalizedRect) | ||||||
|  | input_stream: "ROI:roi" | ||||||
|  | 
 | ||||||
|  | # Whether to predict the segmentation mask. If unspecified, functions as set to | ||||||
|  | # false. (bool) | ||||||
|  | input_side_packet: "ENABLE_SEGMENTATION:enable_segmentation" | ||||||
|  | 
 | ||||||
|  | # Pose landmarks within the given ROI. (NormalizedLandmarkList) | ||||||
|  | # We have 33 landmarks (see pose_landmark_topology.svg) and there are other | ||||||
|  | # auxiliary key points. | ||||||
|  | # 0 - nose | ||||||
|  | # 1 - left eye (inner) | ||||||
|  | # 2 - left eye | ||||||
|  | # 3 - left eye (outer) | ||||||
|  | # 4 - right eye (inner) | ||||||
|  | # 5 - right eye | ||||||
|  | # 6 - right eye (outer) | ||||||
|  | # 7 - left ear | ||||||
|  | # 8 - right ear | ||||||
|  | # 9 - mouth (left) | ||||||
|  | # 10 - mouth (right) | ||||||
|  | # 11 - left shoulder | ||||||
|  | # 12 - right shoulder | ||||||
|  | # 13 - left elbow | ||||||
|  | # 14 - right elbow | ||||||
|  | # 15 - left wrist | ||||||
|  | # 16 - right wrist | ||||||
|  | # 17 - left pinky | ||||||
|  | # 18 - right pinky | ||||||
|  | # 19 - left index | ||||||
|  | # 20 - right index | ||||||
|  | # 21 - left thumb | ||||||
|  | # 22 - right thumb | ||||||
|  | # 23 - left hip | ||||||
|  | # 24 - right hip | ||||||
|  | # 25 - left knee | ||||||
|  | # 26 - right knee | ||||||
|  | # 27 - left ankle | ||||||
|  | # 28 - right ankle | ||||||
|  | # 29 - left heel | ||||||
|  | # 30 - right heel | ||||||
|  | # 31 - left foot index | ||||||
|  | # 32 - right foot index | ||||||
|  | # | ||||||
|  | # NOTE: If a pose is not present within the given ROI, for this particular | ||||||
|  | # timestamp there will not be an output packet in the LANDMARKS stream. However, | ||||||
|  | # the MediaPipe framework will internally inform the downstream calculators of | ||||||
|  | # the absence of this packet so that they don't wait for it unnecessarily. | ||||||
|  | output_stream: "LANDMARKS:landmarks" | ||||||
|  | # Auxiliary landmarks for deriving the ROI in the subsequent image. | ||||||
|  | # (NormalizedLandmarkList) | ||||||
|  | output_stream: "AUXILIARY_LANDMARKS:auxiliary_landmarks" | ||||||
|  | 
 | ||||||
|  | # Pose world landmarks within the given ROI. (LandmarkList) | ||||||
|  | # World landmarks are real-world 3D coordinates in meters with the origin at the | ||||||
|  | # center between hips. WORLD_LANDMARKS shares the same landmark topology as | ||||||
|  | # LANDMARKS. However, LANDMARKS provides coordinates (in pixels) of a 3D object | ||||||
|  | # projected onto the 2D image surface, while WORLD_LANDMARKS provides | ||||||
|  | # coordinates (in meters) of the 3D object itself. | ||||||
|  | output_stream: "WORLD_LANDMARKS:world_landmarks" | ||||||
|  | 
 | ||||||
|  | # Segmentation mask on CPU in ImageFormat::VEC32F1. (Image) | ||||||
|  | output_stream: "SEGMENTATION_MASK:segmentation_mask" | ||||||
|  | 
 | ||||||
|  | # Retrieves the image size. | ||||||
|  | node { | ||||||
|  |   calculator: "ImagePropertiesCalculator" | ||||||
|  |   input_stream: "IMAGE_CPU:image" | ||||||
|  |   output_stream: "SIZE:image_size" | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | # Crops and transforms the specified ROI in the input image into an image patch | ||||||
|  | # represented as a tensor of dimension expected by the corresponding ML model, | ||||||
|  | # while maintaining the aspect ratio of the ROI (which can be different from | ||||||
|  | # that of the image patch). Therefore, there can be letterboxing around the ROI | ||||||
|  | # in the generated tensor representation. | ||||||
|  | node: { | ||||||
|  |   calculator: "ImageToTensorCalculator" | ||||||
|  |   input_stream: "IMAGE:image" | ||||||
|  |   input_stream: "NORM_RECT:roi" | ||||||
|  |   output_stream: "TENSORS:input_tensors" | ||||||
|  |   output_stream: "LETTERBOX_PADDING:letterbox_padding" | ||||||
|  |   output_stream: "MATRIX:transformation_matrix" | ||||||
|  |   options: { | ||||||
|  |     [mediapipe.ImageToTensorCalculatorOptions.ext] { | ||||||
|  |       output_tensor_width: 256 | ||||||
|  |       output_tensor_height: 256 | ||||||
|  |       keep_aspect_ratio: true | ||||||
|  |       output_tensor_float_range { | ||||||
|  |         min: 0.0 | ||||||
|  |         max: 1.0 | ||||||
|  |       } | ||||||
|  |     } | ||||||
|  |   } | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | node { | ||||||
|  |   calculator: "InferenceCalculator" | ||||||
|  |   input_stream: "TENSORS:input_tensors" | ||||||
|  |   output_stream: "TENSORS:output_tensors" | ||||||
|  |   options: { | ||||||
|  |     [mediapipe.InferenceCalculatorOptions.ext] { | ||||||
|  |       model_path: "mediapipe/modules/pose_landmark/pose_landmark_full.onnx" | ||||||
|  |       delegate { cuda {} } | ||||||
|  |     } | ||||||
|  |   } | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | # Decodes the tensors into the corresponding landmark and segmentation mask | ||||||
|  | # representation. | ||||||
|  | node { | ||||||
|  |   calculator: "TensorsToPoseLandmarksAndSegmentation" | ||||||
|  |   input_side_packet: "ENABLE_SEGMENTATION:enable_segmentation" | ||||||
|  |   input_stream: "TENSORS:output_tensors" | ||||||
|  |   output_stream: "LANDMARKS:roi_landmarks" | ||||||
|  |   output_stream: "AUXILIARY_LANDMARKS:roi_auxiliary_landmarks" | ||||||
|  |   output_stream: "WORLD_LANDMARKS:roi_world_landmarks" | ||||||
|  |   output_stream: "SEGMENTATION_MASK:roi_segmentation_mask" | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | # Projects the landmarks and segmentation mask in the local coordinates of the | ||||||
|  | # (potentially letterboxed) ROI back to the global coordinates of the full input | ||||||
|  | # image. | ||||||
|  | node { | ||||||
|  |   calculator: "PoseLandmarksAndSegmentationInverseProjection" | ||||||
|  |   input_stream: "IMAGE_SIZE:image_size" | ||||||
|  |   input_stream: "NORM_RECT:roi" | ||||||
|  |   input_stream: "LETTERBOX_PADDING:letterbox_padding" | ||||||
|  |   input_stream: "MATRIX:transformation_matrix" | ||||||
|  |   input_stream: "LANDMARKS:roi_landmarks" | ||||||
|  |   input_stream: "AUXILIARY_LANDMARKS:roi_auxiliary_landmarks" | ||||||
|  |   input_stream: "WORLD_LANDMARKS:roi_world_landmarks" | ||||||
|  |   input_stream: "SEGMENTATION_MASK:roi_segmentation_mask" | ||||||
|  |   output_stream: "LANDMARKS:landmarks" | ||||||
|  |   output_stream: "AUXILIARY_LANDMARKS:auxiliary_landmarks" | ||||||
|  |   output_stream: "WORLD_LANDMARKS:world_landmarks" | ||||||
|  |   output_stream: "SEGMENTATION_MASK:segmentation_mask" | ||||||
|  | } | ||||||
|  | @ -0,0 +1,165 @@ | ||||||
|  | # MediaPipe graph to detect/predict pose landmarks and optionally segmentation | ||||||
|  | # within an ROI. (CPU input, and inference is executed on CPU.) | ||||||
|  | # | ||||||
|  | # It is required that "pose_landmark_lite.onnx" or | ||||||
|  | # "pose_landmark_full.onnx" or "pose_landmark_heavy.onnx" is available at | ||||||
|  | # "mediapipe/modules/pose_landmark/pose_landmark_lite.onnx" or | ||||||
|  | # "mediapipe/modules/pose_landmark/pose_landmark_full.onnx" or | ||||||
|  | # "mediapipe/modules/pose_landmark/pose_landmark_heavy.onnx" | ||||||
|  | # path respectively during execution, depending on the specification in the | ||||||
|  | # MODEL_COMPLEXITY input side packet. | ||||||
|  | # | ||||||
|  | # EXAMPLE: | ||||||
|  | #   node { | ||||||
|  | #     calculator: "PoseLandmarkByRoiOnnxTensorRT" | ||||||
|  | #     input_side_packet: "MODEL_COMPLEXITY:model_complexity" | ||||||
|  | #     input_side_packet: "ENABLE_SEGMENTATION:enable_segmentation" | ||||||
|  | #     input_stream: "IMAGE:image" | ||||||
|  | #     input_stream: "ROI:roi" | ||||||
|  | #     output_stream: "LANDMARKS:landmarks" | ||||||
|  | #     output_stream: "SEGMENTATION_MASK:segmentation_mask" | ||||||
|  | #   } | ||||||
|  | 
 | ||||||
|  | type: "PoseLandmarkByRoiOnnxTensorRT" | ||||||
|  | 
 | ||||||
|  | # CPU image. (ImageFrame) | ||||||
|  | input_stream: "IMAGE:image" | ||||||
|  | # ROI (region of interest) within the given image where a pose is located. | ||||||
|  | # (NormalizedRect) | ||||||
|  | input_stream: "ROI:roi" | ||||||
|  | 
 | ||||||
|  | # Whether to predict the segmentation mask. If unspecified, functions as set to | ||||||
|  | # false. (bool) | ||||||
|  | input_side_packet: "ENABLE_SEGMENTATION:enable_segmentation" | ||||||
|  | 
 | ||||||
|  | # Pose landmarks within the given ROI. (NormalizedLandmarkList) | ||||||
|  | # We have 33 landmarks (see pose_landmark_topology.svg) and there are other | ||||||
|  | # auxiliary key points. | ||||||
|  | # 0 - nose | ||||||
|  | # 1 - left eye (inner) | ||||||
|  | # 2 - left eye | ||||||
|  | # 3 - left eye (outer) | ||||||
|  | # 4 - right eye (inner) | ||||||
|  | # 5 - right eye | ||||||
|  | # 6 - right eye (outer) | ||||||
|  | # 7 - left ear | ||||||
|  | # 8 - right ear | ||||||
|  | # 9 - mouth (left) | ||||||
|  | # 10 - mouth (right) | ||||||
|  | # 11 - left shoulder | ||||||
|  | # 12 - right shoulder | ||||||
|  | # 13 - left elbow | ||||||
|  | # 14 - right elbow | ||||||
|  | # 15 - left wrist | ||||||
|  | # 16 - right wrist | ||||||
|  | # 17 - left pinky | ||||||
|  | # 18 - right pinky | ||||||
|  | # 19 - left index | ||||||
|  | # 20 - right index | ||||||
|  | # 21 - left thumb | ||||||
|  | # 22 - right thumb | ||||||
|  | # 23 - left hip | ||||||
|  | # 24 - right hip | ||||||
|  | # 25 - left knee | ||||||
|  | # 26 - right knee | ||||||
|  | # 27 - left ankle | ||||||
|  | # 28 - right ankle | ||||||
|  | # 29 - left heel | ||||||
|  | # 30 - right heel | ||||||
|  | # 31 - left foot index | ||||||
|  | # 32 - right foot index | ||||||
|  | # | ||||||
|  | # NOTE: If a pose is not present within the given ROI, for this particular | ||||||
|  | # timestamp there will not be an output packet in the LANDMARKS stream. However, | ||||||
|  | # the MediaPipe framework will internally inform the downstream calculators of | ||||||
|  | # the absence of this packet so that they don't wait for it unnecessarily. | ||||||
|  | output_stream: "LANDMARKS:landmarks" | ||||||
|  | # Auxiliary landmarks for deriving the ROI in the subsequent image. | ||||||
|  | # (NormalizedLandmarkList) | ||||||
|  | output_stream: "AUXILIARY_LANDMARKS:auxiliary_landmarks" | ||||||
|  | 
 | ||||||
|  | # Pose world landmarks within the given ROI. (LandmarkList) | ||||||
|  | # World landmarks are real-world 3D coordinates in meters with the origin at the | ||||||
|  | # center between hips. WORLD_LANDMARKS shares the same landmark topology as | ||||||
|  | # LANDMARKS. However, LANDMARKS provides coordinates (in pixels) of a 3D object | ||||||
|  | # projected onto the 2D image surface, while WORLD_LANDMARKS provides | ||||||
|  | # coordinates (in meters) of the 3D object itself. | ||||||
|  | output_stream: "WORLD_LANDMARKS:world_landmarks" | ||||||
|  | 
 | ||||||
|  | # Segmentation mask on CPU in ImageFormat::VEC32F1. (Image) | ||||||
|  | output_stream: "SEGMENTATION_MASK:segmentation_mask" | ||||||
|  | 
 | ||||||
|  | # Retrieves the image size. | ||||||
|  | node { | ||||||
|  |   calculator: "ImagePropertiesCalculator" | ||||||
|  |   input_stream: "IMAGE_CPU:image" | ||||||
|  |   output_stream: "SIZE:image_size" | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | # Crops and transforms the specified ROI in the input image into an image patch | ||||||
|  | # represented as a tensor of dimension expected by the corresponding ML model, | ||||||
|  | # while maintaining the aspect ratio of the ROI (which can be different from | ||||||
|  | # that of the image patch). Therefore, there can be letterboxing around the ROI | ||||||
|  | # in the generated tensor representation. | ||||||
|  | node: { | ||||||
|  |   calculator: "ImageToTensorCalculator" | ||||||
|  |   input_stream: "IMAGE:image" | ||||||
|  |   input_stream: "NORM_RECT:roi" | ||||||
|  |   output_stream: "TENSORS:input_tensors" | ||||||
|  |   output_stream: "LETTERBOX_PADDING:letterbox_padding" | ||||||
|  |   output_stream: "MATRIX:transformation_matrix" | ||||||
|  |   options: { | ||||||
|  |     [mediapipe.ImageToTensorCalculatorOptions.ext] { | ||||||
|  |       output_tensor_width: 256 | ||||||
|  |       output_tensor_height: 256 | ||||||
|  |       keep_aspect_ratio: true | ||||||
|  |       output_tensor_float_range { | ||||||
|  |         min: 0.0 | ||||||
|  |         max: 1.0 | ||||||
|  |       } | ||||||
|  |     } | ||||||
|  |   } | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | node { | ||||||
|  |   calculator: "InferenceCalculator" | ||||||
|  |   input_stream: "TENSORS:input_tensors" | ||||||
|  |   output_stream: "TENSORS:output_tensors" | ||||||
|  |   options: { | ||||||
|  |     [mediapipe.InferenceCalculatorOptions.ext] { | ||||||
|  |       model_path: "mediapipe/modules/pose_landmark/pose_landmark_full.onnx" | ||||||
|  |       delegate { tensorrt {} } | ||||||
|  |     } | ||||||
|  |   } | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | # Decodes the tensors into the corresponding landmark and segmentation mask | ||||||
|  | # representation. | ||||||
|  | node { | ||||||
|  |   calculator: "TensorsToPoseLandmarksAndSegmentation" | ||||||
|  |   input_side_packet: "ENABLE_SEGMENTATION:enable_segmentation" | ||||||
|  |   input_stream: "TENSORS:output_tensors" | ||||||
|  |   output_stream: "LANDMARKS:roi_landmarks" | ||||||
|  |   output_stream: "AUXILIARY_LANDMARKS:roi_auxiliary_landmarks" | ||||||
|  |   output_stream: "WORLD_LANDMARKS:roi_world_landmarks" | ||||||
|  |   output_stream: "SEGMENTATION_MASK:roi_segmentation_mask" | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | # Projects the landmarks and segmentation mask in the local coordinates of the | ||||||
|  | # (potentially letterboxed) ROI back to the global coordinates of the full input | ||||||
|  | # image. | ||||||
|  | node { | ||||||
|  |   calculator: "PoseLandmarksAndSegmentationInverseProjection" | ||||||
|  |   input_stream: "IMAGE_SIZE:image_size" | ||||||
|  |   input_stream: "NORM_RECT:roi" | ||||||
|  |   input_stream: "LETTERBOX_PADDING:letterbox_padding" | ||||||
|  |   input_stream: "MATRIX:transformation_matrix" | ||||||
|  |   input_stream: "LANDMARKS:roi_landmarks" | ||||||
|  |   input_stream: "AUXILIARY_LANDMARKS:roi_auxiliary_landmarks" | ||||||
|  |   input_stream: "WORLD_LANDMARKS:roi_world_landmarks" | ||||||
|  |   input_stream: "SEGMENTATION_MASK:roi_segmentation_mask" | ||||||
|  |   output_stream: "LANDMARKS:landmarks" | ||||||
|  |   output_stream: "AUXILIARY_LANDMARKS:auxiliary_landmarks" | ||||||
|  |   output_stream: "WORLD_LANDMARKS:world_landmarks" | ||||||
|  |   output_stream: "SEGMENTATION_MASK:segmentation_mask" | ||||||
|  | } | ||||||
							
								
								
									
										268
									
								
								mediapipe/modules/pose_landmark/pose_landmark_onnx_cuda.pbtxt
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										268
									
								
								mediapipe/modules/pose_landmark/pose_landmark_onnx_cuda.pbtxt
									
									
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,268 @@ | ||||||
|  | # MediaPipe graph to detect/predict pose landmarks. (CPU input, and inference is | ||||||
|  | # executed on CPU.) This graph tries to skip pose detection as much as possible | ||||||
|  | # by using previously detected/predicted landmarks for new images. | ||||||
|  | # | ||||||
|  | # It is required that "pose_detection.onnx" is available at | ||||||
|  | # "mediapipe/modules/pose_detection/pose_detection.onnx" | ||||||
|  | # path during execution. | ||||||
|  | # | ||||||
|  | # It is required that "pose_landmark_lite.onnx" or | ||||||
|  | # "pose_landmark_full.onnx" or "pose_landmark_heavy.onnx" is available at | ||||||
|  | # "mediapipe/modules/pose_landmark/pose_landmark_lite.onnx" or | ||||||
|  | # "mediapipe/modules/pose_landmark/pose_landmark_full.onnx" or | ||||||
|  | # "mediapipe/modules/pose_landmark/pose_landmark_heavy.onnx" | ||||||
|  | # path respectively during execution, depending on the specification in the | ||||||
|  | # MODEL_COMPLEXITY input side packet. | ||||||
|  | # | ||||||
|  | # EXAMPLE: | ||||||
|  | #   node { | ||||||
|  | #     calculator: "PoseLandmarkOnnxCUDA" | ||||||
|  | #     input_side_packet: "MODEL_COMPLEXITY:model_complexity" | ||||||
|  | #     input_side_packet: "SMOOTH_LANDMARKS:smooth_landmarks" | ||||||
|  | #     input_side_packet: "ENABLE_SEGMENTATION:enable_segmentation" | ||||||
|  | #     input_side_packet: "SMOOTH_SEGMENTATION:smooth_segmentation" | ||||||
|  | #     input_side_packet: "USE_PREV_LANDMARKS:use_prev_landmarks" | ||||||
|  | #     input_stream: "IMAGE:image" | ||||||
|  | #     output_stream: "LANDMARKS:pose_landmarks" | ||||||
|  | #     output_stream: "SEGMENTATION_MASK:segmentation_mask" | ||||||
|  | #   } | ||||||
|  | 
 | ||||||
|  | type: "PoseLandmarkOnnxCUDA" | ||||||
|  | 
 | ||||||
|  | # CPU image. (ImageFrame) | ||||||
|  | input_stream: "IMAGE:image" | ||||||
|  | 
 | ||||||
|  | # Whether to filter landmarks across different input images to reduce jitter. | ||||||
|  | # If unspecified, functions as set to true. (bool) | ||||||
|  | input_side_packet: "SMOOTH_LANDMARKS:smooth_landmarks" | ||||||
|  | 
 | ||||||
|  | # Whether to predict the segmentation mask. If unspecified, functions as set to | ||||||
|  | # false. (bool) | ||||||
|  | input_side_packet: "ENABLE_SEGMENTATION:enable_segmentation" | ||||||
|  | 
 | ||||||
|  | # Whether to filter segmentation mask across different input images to reduce | ||||||
|  | # jitter. If unspecified, functions as set to true. (bool) | ||||||
|  | input_side_packet: "SMOOTH_SEGMENTATION:smooth_segmentation" | ||||||
|  | 
 | ||||||
|  | # Complexity of the pose landmark model: 0, 1 or 2. Landmark accuracy as well as | ||||||
|  | # inference latency generally go up with the model complexity. If unspecified, | ||||||
|  | # functions as set to 1. (int) | ||||||
|  | input_side_packet: "MODEL_COMPLEXITY:model_complexity" | ||||||
|  | 
 | ||||||
|  | # Whether landmarks on the previous image should be used to help localize | ||||||
|  | # landmarks on the current image. (bool) | ||||||
|  | input_side_packet: "USE_PREV_LANDMARKS:use_prev_landmarks" | ||||||
|  | 
 | ||||||
|  | # Pose landmarks. (NormalizedLandmarkList) | ||||||
|  | # We have 33 landmarks (see pose_landmark_topology.svg), and there are other | ||||||
|  | # auxiliary key points. | ||||||
|  | # 0 - nose | ||||||
|  | # 1 - left eye (inner) | ||||||
|  | # 2 - left eye | ||||||
|  | # 3 - left eye (outer) | ||||||
|  | # 4 - right eye (inner) | ||||||
|  | # 5 - right eye | ||||||
|  | # 6 - right eye (outer) | ||||||
|  | # 7 - left ear | ||||||
|  | # 8 - right ear | ||||||
|  | # 9 - mouth (left) | ||||||
|  | # 10 - mouth (right) | ||||||
|  | # 11 - left shoulder | ||||||
|  | # 12 - right shoulder | ||||||
|  | # 13 - left elbow | ||||||
|  | # 14 - right elbow | ||||||
|  | # 15 - left wrist | ||||||
|  | # 16 - right wrist | ||||||
|  | # 17 - left pinky | ||||||
|  | # 18 - right pinky | ||||||
|  | # 19 - left index | ||||||
|  | # 20 - right index | ||||||
|  | # 21 - left thumb | ||||||
|  | # 22 - right thumb | ||||||
|  | # 23 - left hip | ||||||
|  | # 24 - right hip | ||||||
|  | # 25 - left knee | ||||||
|  | # 26 - right knee | ||||||
|  | # 27 - left ankle | ||||||
|  | # 28 - right ankle | ||||||
|  | # 29 - left heel | ||||||
|  | # 30 - right heel | ||||||
|  | # 31 - left foot index | ||||||
|  | # 32 - right foot index | ||||||
|  | # | ||||||
|  | # NOTE: if a pose is not present within the given ROI, for this particular | ||||||
|  | # timestamp there will not be an output packet in the LANDMARKS stream. However, | ||||||
|  | # the MediaPipe framework will internally inform the downstream calculators of | ||||||
|  | # the absence of this packet so that they don't wait for it unnecessarily. | ||||||
|  | output_stream: "LANDMARKS:pose_landmarks" | ||||||
|  | 
 | ||||||
|  | # Pose world landmarks. (LandmarkList) | ||||||
|  | # World landmarks are real-world 3D coordinates in meters with the origin at the | ||||||
|  | # center between hips. WORLD_LANDMARKS shares the same landmark topology as | ||||||
|  | # LANDMARKS. However, LANDMARKS provides coordinates (in pixels) of a 3D object | ||||||
|  | # projected onto the 2D image surface, while WORLD_LANDMARKS provides | ||||||
|  | # coordinates (in meters) of the 3D object itself. | ||||||
|  | output_stream: "WORLD_LANDMARKS:pose_world_landmarks" | ||||||
|  | 
 | ||||||
|  | # Segmentation mask. (ImageFrame in ImageFormat::VEC32F1) | ||||||
|  | output_stream: "SEGMENTATION_MASK:segmentation_mask" | ||||||
|  | 
 | ||||||
|  | # Extra outputs (for debugging, for instance). | ||||||
|  | # Detected poses. (Detection) | ||||||
|  | output_stream: "DETECTION:pose_detection" | ||||||
|  | # Regions of interest calculated based on landmarks. (NormalizedRect) | ||||||
|  | output_stream: "ROI_FROM_LANDMARKS:pose_rect_from_landmarks" | ||||||
|  | # Regions of interest calculated based on pose detections. (NormalizedRect) | ||||||
|  | output_stream: "ROI_FROM_DETECTION:pose_rect_from_detection" | ||||||
|  | 
 | ||||||
|  | # When the optional input side packet "use_prev_landmarks" is either absent or | ||||||
|  | # set to true, uses the landmarks on the previous image to help localize | ||||||
|  | # landmarks on the current image. | ||||||
|  | node { | ||||||
|  |   calculator: "GateCalculator" | ||||||
|  |   input_side_packet: "ALLOW:use_prev_landmarks" | ||||||
|  |   input_stream: "prev_pose_rect_from_landmarks" | ||||||
|  |   output_stream: "gated_prev_pose_rect_from_landmarks" | ||||||
|  |   options: { | ||||||
|  |     [mediapipe.GateCalculatorOptions.ext] { | ||||||
|  |       allow: true | ||||||
|  |     } | ||||||
|  |   } | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | # Checks if there's previous pose rect calculated from landmarks. | ||||||
|  | node: { | ||||||
|  |   calculator: "PacketPresenceCalculator" | ||||||
|  |   input_stream: "PACKET:gated_prev_pose_rect_from_landmarks" | ||||||
|  |   output_stream: "PRESENCE:prev_pose_rect_from_landmarks_is_present" | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | # Calculates size of the image. | ||||||
|  | node { | ||||||
|  |   calculator: "ImagePropertiesCalculator" | ||||||
|  |   input_stream: "IMAGE_CPU:image" | ||||||
|  |   output_stream: "SIZE:image_size" | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | # Drops the incoming image if the pose has already been identified from the | ||||||
|  | # previous image. Otherwise, passes the incoming image through to trigger a new | ||||||
|  | # round of pose detection. | ||||||
|  | node { | ||||||
|  |   calculator: "GateCalculator" | ||||||
|  |   input_stream: "image" | ||||||
|  |   input_stream: "image_size" | ||||||
|  |   input_stream: "DISALLOW:prev_pose_rect_from_landmarks_is_present" | ||||||
|  |   output_stream: "image_for_pose_detection" | ||||||
|  |   output_stream: "image_size_for_pose_detection" | ||||||
|  |   options: { | ||||||
|  |     [mediapipe.GateCalculatorOptions.ext] { | ||||||
|  |       empty_packets_as_allow: true | ||||||
|  |     } | ||||||
|  |   } | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | # Detects poses. | ||||||
|  | node { | ||||||
|  |   calculator: "PoseDetectionOnnxCUDA" | ||||||
|  |   input_stream: "IMAGE:image_for_pose_detection" | ||||||
|  |   output_stream: "DETECTIONS:pose_detections" | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | # Gets the very first detection from "pose_detections" vector. | ||||||
|  | node { | ||||||
|  |   calculator: "SplitDetectionVectorCalculator" | ||||||
|  |   input_stream: "pose_detections" | ||||||
|  |   output_stream: "pose_detection" | ||||||
|  |   options: { | ||||||
|  |     [mediapipe.SplitVectorCalculatorOptions.ext] { | ||||||
|  |       ranges: { begin: 0 end: 1 } | ||||||
|  |       element_only: true | ||||||
|  |     } | ||||||
|  |   } | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | # Calculates region of interest based on pose detection, so that can be used | ||||||
|  | # to detect landmarks. | ||||||
|  | node { | ||||||
|  |   calculator: "PoseDetectionToRoi" | ||||||
|  |   input_stream: "DETECTION:pose_detection" | ||||||
|  |   input_stream: "IMAGE_SIZE:image_size_for_pose_detection" | ||||||
|  |   output_stream: "ROI:pose_rect_from_detection" | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | # Selects either pose rect (or ROI) calculated from detection or from previously | ||||||
|  | # detected landmarks if available (in this case, calculation of pose rect from | ||||||
|  | # detection is skipped). | ||||||
|  | node { | ||||||
|  |   calculator: "MergeCalculator" | ||||||
|  |   input_stream: "pose_rect_from_detection" | ||||||
|  |   input_stream: "gated_prev_pose_rect_from_landmarks" | ||||||
|  |   output_stream: "pose_rect" | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | # Detects pose landmarks within specified region of interest of the image. | ||||||
|  | node { | ||||||
|  |   calculator: "PoseLandmarkByRoiOnnxCUDA" | ||||||
|  |   input_side_packet: "MODEL_COMPLEXITY:model_complexity" | ||||||
|  |   input_side_packet: "ENABLE_SEGMENTATION:enable_segmentation" | ||||||
|  |   input_stream: "IMAGE:image" | ||||||
|  |   input_stream: "ROI:pose_rect" | ||||||
|  |   output_stream: "LANDMARKS:unfiltered_pose_landmarks" | ||||||
|  |   output_stream: "AUXILIARY_LANDMARKS:unfiltered_auxiliary_landmarks" | ||||||
|  |   output_stream: "WORLD_LANDMARKS:unfiltered_world_landmarks" | ||||||
|  |   output_stream: "SEGMENTATION_MASK:unfiltered_segmentation_mask" | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | # Smoothes landmarks to reduce jitter. | ||||||
|  | node { | ||||||
|  |   calculator: "PoseLandmarkFiltering" | ||||||
|  |   input_side_packet: "ENABLE:smooth_landmarks" | ||||||
|  |   input_stream: "IMAGE_SIZE:image_size" | ||||||
|  |   input_stream: "NORM_LANDMARKS:unfiltered_pose_landmarks" | ||||||
|  |   input_stream: "AUX_NORM_LANDMARKS:unfiltered_auxiliary_landmarks" | ||||||
|  |   input_stream: "WORLD_LANDMARKS:unfiltered_world_landmarks" | ||||||
|  |   output_stream: "FILTERED_NORM_LANDMARKS:pose_landmarks" | ||||||
|  |   output_stream: "FILTERED_AUX_NORM_LANDMARKS:auxiliary_landmarks" | ||||||
|  |   output_stream: "FILTERED_WORLD_LANDMARKS:pose_world_landmarks" | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | # Calculates region of interest based on the auxiliary landmarks, to be used in | ||||||
|  | # the subsequent image. | ||||||
|  | node { | ||||||
|  |   calculator: "PoseLandmarksToRoi" | ||||||
|  |   input_stream: "LANDMARKS:auxiliary_landmarks" | ||||||
|  |   input_stream: "IMAGE_SIZE:image_size" | ||||||
|  |   output_stream: "ROI:pose_rect_from_landmarks" | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | # Caches pose rects calculated from landmarks, and upon the arrival of the next | ||||||
|  | # input image, sends out the cached rects with timestamps replaced by that of | ||||||
|  | # the input image, essentially generating a packet that carries the previous | ||||||
|  | # pose rects. Note that upon the arrival of the very first input image, a | ||||||
|  | # timestamp bound update occurs to jump start the feedback loop. | ||||||
|  | node { | ||||||
|  |   calculator: "PreviousLoopbackCalculator" | ||||||
|  |   input_stream: "MAIN:image" | ||||||
|  |   input_stream: "LOOP:pose_rect_from_landmarks" | ||||||
|  |   input_stream_info: { | ||||||
|  |     tag_index: "LOOP" | ||||||
|  |     back_edge: true | ||||||
|  |   } | ||||||
|  |   output_stream: "PREV_LOOP:prev_pose_rect_from_landmarks" | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | # Smoothes segmentation to reduce jitter. | ||||||
|  | node { | ||||||
|  |   calculator: "PoseSegmentationFiltering" | ||||||
|  |   input_side_packet: "ENABLE:smooth_segmentation" | ||||||
|  |   input_stream: "SEGMENTATION_MASK:unfiltered_segmentation_mask" | ||||||
|  |   output_stream: "FILTERED_SEGMENTATION_MASK:filtered_segmentation_mask" | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | # Converts the incoming segmentation mask represented as an Image into the | ||||||
|  | # corresponding ImageFrame type. | ||||||
|  | node: { | ||||||
|  |   calculator: "FromImageCalculator" | ||||||
|  |   input_stream: "IMAGE:filtered_segmentation_mask" | ||||||
|  |   output_stream: "IMAGE_CPU:segmentation_mask" | ||||||
|  | } | ||||||
|  | @ -0,0 +1,268 @@ | ||||||
|  | # MediaPipe graph to detect/predict pose landmarks. (CPU input, and inference is | ||||||
|  | # executed on CPU.) This graph tries to skip pose detection as much as possible | ||||||
|  | # by using previously detected/predicted landmarks for new images. | ||||||
|  | # | ||||||
|  | # It is required that "pose_detection.tflite" is available at | ||||||
|  | # "mediapipe/modules/pose_detection/pose_detection.tflite" | ||||||
|  | # path during execution. | ||||||
|  | # | ||||||
|  | # It is required that "pose_landmark_lite.tflite" or | ||||||
|  | # "pose_landmark_full.tflite" or "pose_landmark_heavy.tflite" is available at | ||||||
|  | # "mediapipe/modules/pose_landmark/pose_landmark_lite.tflite" or | ||||||
|  | # "mediapipe/modules/pose_landmark/pose_landmark_full.tflite" or | ||||||
|  | # "mediapipe/modules/pose_landmark/pose_landmark_heavy.tflite" | ||||||
|  | # path respectively during execution, depending on the specification in the | ||||||
|  | # MODEL_COMPLEXITY input side packet. | ||||||
|  | # | ||||||
|  | # EXAMPLE: | ||||||
|  | #   node { | ||||||
|  | #     calculator: "PoseLandmarkOnnxTensorRT" | ||||||
|  | #     input_side_packet: "MODEL_COMPLEXITY:model_complexity" | ||||||
|  | #     input_side_packet: "SMOOTH_LANDMARKS:smooth_landmarks" | ||||||
|  | #     input_side_packet: "ENABLE_SEGMENTATION:enable_segmentation" | ||||||
|  | #     input_side_packet: "SMOOTH_SEGMENTATION:smooth_segmentation" | ||||||
|  | #     input_side_packet: "USE_PREV_LANDMARKS:use_prev_landmarks" | ||||||
|  | #     input_stream: "IMAGE:image" | ||||||
|  | #     output_stream: "LANDMARKS:pose_landmarks" | ||||||
|  | #     output_stream: "SEGMENTATION_MASK:segmentation_mask" | ||||||
|  | #   } | ||||||
|  | 
 | ||||||
|  | type: "PoseLandmarkOnnxTensorRT" | ||||||
|  | 
 | ||||||
|  | # CPU image. (ImageFrame) | ||||||
|  | input_stream: "IMAGE:image" | ||||||
|  | 
 | ||||||
|  | # Whether to filter landmarks across different input images to reduce jitter. | ||||||
|  | # If unspecified, functions as set to true. (bool) | ||||||
|  | input_side_packet: "SMOOTH_LANDMARKS:smooth_landmarks" | ||||||
|  | 
 | ||||||
|  | # Whether to predict the segmentation mask. If unspecified, functions as set to | ||||||
|  | # false. (bool) | ||||||
|  | input_side_packet: "ENABLE_SEGMENTATION:enable_segmentation" | ||||||
|  | 
 | ||||||
|  | # Whether to filter segmentation mask across different input images to reduce | ||||||
|  | # jitter. If unspecified, functions as set to true. (bool) | ||||||
|  | input_side_packet: "SMOOTH_SEGMENTATION:smooth_segmentation" | ||||||
|  | 
 | ||||||
|  | # Complexity of the pose landmark model: 0, 1 or 2. Landmark accuracy as well as | ||||||
|  | # inference latency generally go up with the model complexity. If unspecified, | ||||||
|  | # functions as set to 1. (int) | ||||||
|  | input_side_packet: "MODEL_COMPLEXITY:model_complexity" | ||||||
|  | 
 | ||||||
|  | # Whether landmarks on the previous image should be used to help localize | ||||||
|  | # landmarks on the current image. (bool) | ||||||
|  | input_side_packet: "USE_PREV_LANDMARKS:use_prev_landmarks" | ||||||
|  | 
 | ||||||
|  | # Pose landmarks. (NormalizedLandmarkList) | ||||||
|  | # We have 33 landmarks (see pose_landmark_topology.svg), and there are other | ||||||
|  | # auxiliary key points. | ||||||
|  | # 0 - nose | ||||||
|  | # 1 - left eye (inner) | ||||||
|  | # 2 - left eye | ||||||
|  | # 3 - left eye (outer) | ||||||
|  | # 4 - right eye (inner) | ||||||
|  | # 5 - right eye | ||||||
|  | # 6 - right eye (outer) | ||||||
|  | # 7 - left ear | ||||||
|  | # 8 - right ear | ||||||
|  | # 9 - mouth (left) | ||||||
|  | # 10 - mouth (right) | ||||||
|  | # 11 - left shoulder | ||||||
|  | # 12 - right shoulder | ||||||
|  | # 13 - left elbow | ||||||
|  | # 14 - right elbow | ||||||
|  | # 15 - left wrist | ||||||
|  | # 16 - right wrist | ||||||
|  | # 17 - left pinky | ||||||
|  | # 18 - right pinky | ||||||
|  | # 19 - left index | ||||||
|  | # 20 - right index | ||||||
|  | # 21 - left thumb | ||||||
|  | # 22 - right thumb | ||||||
|  | # 23 - left hip | ||||||
|  | # 24 - right hip | ||||||
|  | # 25 - left knee | ||||||
|  | # 26 - right knee | ||||||
|  | # 27 - left ankle | ||||||
|  | # 28 - right ankle | ||||||
|  | # 29 - left heel | ||||||
|  | # 30 - right heel | ||||||
|  | # 31 - left foot index | ||||||
|  | # 32 - right foot index | ||||||
|  | # | ||||||
|  | # NOTE: if a pose is not present within the given ROI, for this particular | ||||||
|  | # timestamp there will not be an output packet in the LANDMARKS stream. However, | ||||||
|  | # the MediaPipe framework will internally inform the downstream calculators of | ||||||
|  | # the absence of this packet so that they don't wait for it unnecessarily. | ||||||
|  | output_stream: "LANDMARKS:pose_landmarks" | ||||||
|  | 
 | ||||||
|  | # Pose world landmarks. (LandmarkList) | ||||||
|  | # World landmarks are real-world 3D coordinates in meters with the origin at the | ||||||
|  | # center between hips. WORLD_LANDMARKS shares the same landmark topology as | ||||||
|  | # LANDMARKS. However, LANDMARKS provides coordinates (in pixels) of a 3D object | ||||||
|  | # projected onto the 2D image surface, while WORLD_LANDMARKS provides | ||||||
|  | # coordinates (in meters) of the 3D object itself. | ||||||
|  | output_stream: "WORLD_LANDMARKS:pose_world_landmarks" | ||||||
|  | 
 | ||||||
|  | # Segmentation mask. (ImageFrame in ImageFormat::VEC32F1) | ||||||
|  | output_stream: "SEGMENTATION_MASK:segmentation_mask" | ||||||
|  | 
 | ||||||
|  | # Extra outputs (for debugging, for instance). | ||||||
|  | # Detected poses. (Detection) | ||||||
|  | output_stream: "DETECTION:pose_detection" | ||||||
|  | # Regions of interest calculated based on landmarks. (NormalizedRect) | ||||||
|  | output_stream: "ROI_FROM_LANDMARKS:pose_rect_from_landmarks" | ||||||
|  | # Regions of interest calculated based on pose detections. (NormalizedRect) | ||||||
|  | output_stream: "ROI_FROM_DETECTION:pose_rect_from_detection" | ||||||
|  | 
 | ||||||
|  | # When the optional input side packet "use_prev_landmarks" is either absent or | ||||||
|  | # set to true, uses the landmarks on the previous image to help localize | ||||||
|  | # landmarks on the current image. | ||||||
|  | node { | ||||||
|  |   calculator: "GateCalculator" | ||||||
|  |   input_side_packet: "ALLOW:use_prev_landmarks" | ||||||
|  |   input_stream: "prev_pose_rect_from_landmarks" | ||||||
|  |   output_stream: "gated_prev_pose_rect_from_landmarks" | ||||||
|  |   options: { | ||||||
|  |     [mediapipe.GateCalculatorOptions.ext] { | ||||||
|  |       allow: true | ||||||
|  |     } | ||||||
|  |   } | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | # Checks if there's previous pose rect calculated from landmarks. | ||||||
|  | node: { | ||||||
|  |   calculator: "PacketPresenceCalculator" | ||||||
|  |   input_stream: "PACKET:gated_prev_pose_rect_from_landmarks" | ||||||
|  |   output_stream: "PRESENCE:prev_pose_rect_from_landmarks_is_present" | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | # Calculates size of the image. | ||||||
|  | node { | ||||||
|  |   calculator: "ImagePropertiesCalculator" | ||||||
|  |   input_stream: "IMAGE_CPU:image" | ||||||
|  |   output_stream: "SIZE:image_size" | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | # Drops the incoming image if the pose has already been identified from the | ||||||
|  | # previous image. Otherwise, passes the incoming image through to trigger a new | ||||||
|  | # round of pose detection. | ||||||
|  | node { | ||||||
|  |   calculator: "GateCalculator" | ||||||
|  |   input_stream: "image" | ||||||
|  |   input_stream: "image_size" | ||||||
|  |   input_stream: "DISALLOW:prev_pose_rect_from_landmarks_is_present" | ||||||
|  |   output_stream: "image_for_pose_detection" | ||||||
|  |   output_stream: "image_size_for_pose_detection" | ||||||
|  |   options: { | ||||||
|  |     [mediapipe.GateCalculatorOptions.ext] { | ||||||
|  |       empty_packets_as_allow: true | ||||||
|  |     } | ||||||
|  |   } | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | # Detects poses. | ||||||
|  | node { | ||||||
|  |   calculator: "PoseDetectionOnnxTensorRT" | ||||||
|  |   input_stream: "IMAGE:image_for_pose_detection" | ||||||
|  |   output_stream: "DETECTIONS:pose_detections" | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | # Gets the very first detection from "pose_detections" vector. | ||||||
|  | node { | ||||||
|  |   calculator: "SplitDetectionVectorCalculator" | ||||||
|  |   input_stream: "pose_detections" | ||||||
|  |   output_stream: "pose_detection" | ||||||
|  |   options: { | ||||||
|  |     [mediapipe.SplitVectorCalculatorOptions.ext] { | ||||||
|  |       ranges: { begin: 0 end: 1 } | ||||||
|  |       element_only: true | ||||||
|  |     } | ||||||
|  |   } | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | # Calculates region of interest based on pose detection, so that can be used | ||||||
|  | # to detect landmarks. | ||||||
|  | node { | ||||||
|  |   calculator: "PoseDetectionToRoi" | ||||||
|  |   input_stream: "DETECTION:pose_detection" | ||||||
|  |   input_stream: "IMAGE_SIZE:image_size_for_pose_detection" | ||||||
|  |   output_stream: "ROI:pose_rect_from_detection" | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | # Selects either pose rect (or ROI) calculated from detection or from previously | ||||||
|  | # detected landmarks if available (in this case, calculation of pose rect from | ||||||
|  | # detection is skipped). | ||||||
|  | node { | ||||||
|  |   calculator: "MergeCalculator" | ||||||
|  |   input_stream: "pose_rect_from_detection" | ||||||
|  |   input_stream: "gated_prev_pose_rect_from_landmarks" | ||||||
|  |   output_stream: "pose_rect" | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | # Detects pose landmarks within specified region of interest of the image. | ||||||
|  | node { | ||||||
|  |   calculator: "PoseLandmarkByRoiOnnxTensorRT" | ||||||
|  |   input_side_packet: "MODEL_COMPLEXITY:model_complexity" | ||||||
|  |   input_side_packet: "ENABLE_SEGMENTATION:enable_segmentation" | ||||||
|  |   input_stream: "IMAGE:image" | ||||||
|  |   input_stream: "ROI:pose_rect" | ||||||
|  |   output_stream: "LANDMARKS:unfiltered_pose_landmarks" | ||||||
|  |   output_stream: "AUXILIARY_LANDMARKS:unfiltered_auxiliary_landmarks" | ||||||
|  |   output_stream: "WORLD_LANDMARKS:unfiltered_world_landmarks" | ||||||
|  |   output_stream: "SEGMENTATION_MASK:unfiltered_segmentation_mask" | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | # Smoothes landmarks to reduce jitter. | ||||||
|  | node { | ||||||
|  |   calculator: "PoseLandmarkFiltering" | ||||||
|  |   input_side_packet: "ENABLE:smooth_landmarks" | ||||||
|  |   input_stream: "IMAGE_SIZE:image_size" | ||||||
|  |   input_stream: "NORM_LANDMARKS:unfiltered_pose_landmarks" | ||||||
|  |   input_stream: "AUX_NORM_LANDMARKS:unfiltered_auxiliary_landmarks" | ||||||
|  |   input_stream: "WORLD_LANDMARKS:unfiltered_world_landmarks" | ||||||
|  |   output_stream: "FILTERED_NORM_LANDMARKS:pose_landmarks" | ||||||
|  |   output_stream: "FILTERED_AUX_NORM_LANDMARKS:auxiliary_landmarks" | ||||||
|  |   output_stream: "FILTERED_WORLD_LANDMARKS:pose_world_landmarks" | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | # Calculates region of interest based on the auxiliary landmarks, to be used in | ||||||
|  | # the subsequent image. | ||||||
|  | node { | ||||||
|  |   calculator: "PoseLandmarksToRoi" | ||||||
|  |   input_stream: "LANDMARKS:auxiliary_landmarks" | ||||||
|  |   input_stream: "IMAGE_SIZE:image_size" | ||||||
|  |   output_stream: "ROI:pose_rect_from_landmarks" | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | # Caches pose rects calculated from landmarks, and upon the arrival of the next | ||||||
|  | # input image, sends out the cached rects with timestamps replaced by that of | ||||||
|  | # the input image, essentially generating a packet that carries the previous | ||||||
|  | # pose rects. Note that upon the arrival of the very first input image, a | ||||||
|  | # timestamp bound update occurs to jump start the feedback loop. | ||||||
|  | node { | ||||||
|  |   calculator: "PreviousLoopbackCalculator" | ||||||
|  |   input_stream: "MAIN:image" | ||||||
|  |   input_stream: "LOOP:pose_rect_from_landmarks" | ||||||
|  |   input_stream_info: { | ||||||
|  |     tag_index: "LOOP" | ||||||
|  |     back_edge: true | ||||||
|  |   } | ||||||
|  |   output_stream: "PREV_LOOP:prev_pose_rect_from_landmarks" | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | # Smoothes segmentation to reduce jitter. | ||||||
|  | node { | ||||||
|  |   calculator: "PoseSegmentationFiltering" | ||||||
|  |   input_side_packet: "ENABLE:smooth_segmentation" | ||||||
|  |   input_stream: "SEGMENTATION_MASK:unfiltered_segmentation_mask" | ||||||
|  |   output_stream: "FILTERED_SEGMENTATION_MASK:filtered_segmentation_mask" | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | # Converts the incoming segmentation mask represented as an Image into the | ||||||
|  | # corresponding ImageFrame type. | ||||||
|  | node: { | ||||||
|  |   calculator: "FromImageCalculator" | ||||||
|  |   input_stream: "IMAGE:filtered_segmentation_mask" | ||||||
|  |   output_stream: "IMAGE_CPU:segmentation_mask" | ||||||
|  | } | ||||||
		Loading…
	
		Reference in New Issue
	
	Block a user