# MediaPipe graph to detect poses. (GPU input, and inference is executed on # GPU.) # # It is required that "pose_detection.tflite" is available at # "mediapipe/modules/pose_detection/pose_detection.tflite" # path during execution. # # EXAMPLE: # node { # calculator: "PoseDetectionGpu" # input_stream: "IMAGE:image" # output_stream: "DETECTIONS:pose_detections" # } type: "PoseDetectionGpu" # GPU image. (GpuBuffer) input_stream: "IMAGE:image" # Detected poses. (std::vector) # Bounding box in each pose detection is currently set to the bounding box of # the detected face. However, 4 additional key points are available in each # detection, which are used to further calculate a (rotated) bounding box that # encloses the body region of interest. Among the 4 key points, the first two # are for identifying the full-body region, and the second two for upper body # only: # # Key point 0 - mid hip center # Key point 1 - point that encodes size & rotation (for full body) # Key point 2 - mid shoulder center # Key point 3 - point that encodes size & rotation (for upper body) # # NOTE: there will not be an output packet in the DETECTIONS stream for this # particular timestamp if none of poses detected. However, the MediaPipe # framework will internally inform the downstream calculators of the absence of # this packet so that they don't wait for it unnecessarily. output_stream: "DETECTIONS:detections" # Transforms the input image into a 224x224 one while keeping the aspect ratio # (what is expected by the corresponding model), resulting in potential # letterboxing in the transformed image. node: { calculator: "ImageToTensorCalculator" input_stream: "IMAGE_GPU:image" output_stream: "TENSORS:input_tensors" output_stream: "LETTERBOX_PADDING:letterbox_padding" options: { [mediapipe.ImageToTensorCalculatorOptions.ext] { output_tensor_width: 224 output_tensor_height: 224 keep_aspect_ratio: true output_tensor_float_range { min: -1.0 max: 1.0 } border_mode: BORDER_ZERO gpu_origin: TOP_LEFT } } } # Runs a TensorFlow Lite model on CPU that takes an image tensor and outputs a # vector of tensors representing, for instance, detection boxes/keypoints and # scores. node { calculator: "InferenceCalculator" input_stream: "TENSORS:input_tensors" output_stream: "TENSORS:detection_tensors" options: { [mediapipe.InferenceCalculatorOptions.ext] { model_path: "mediapipe/modules/pose_detection/pose_detection.tflite" # delegate: { gpu { use_advanced_gpu_api: true } } } } } # Generates a single side packet containing a vector of SSD anchors based on # the specification in the options. node { calculator: "SsdAnchorsCalculator" output_side_packet: "anchors" options: { [mediapipe.SsdAnchorsCalculatorOptions.ext] { num_layers: 5 min_scale: 0.1484375 max_scale: 0.75 input_size_height: 224 input_size_width: 224 anchor_offset_x: 0.5 anchor_offset_y: 0.5 strides: 8 strides: 16 strides: 32 strides: 32 strides: 32 aspect_ratios: 1.0 fixed_anchor_size: true } } } # Decodes the detection tensors generated by the TensorFlow Lite model, based on # the SSD anchors and the specification in the options, into a vector of # detections. Each detection describes a detected object. node { calculator: "TensorsToDetectionsCalculator" input_stream: "TENSORS:detection_tensors" input_side_packet: "ANCHORS:anchors" output_stream: "DETECTIONS:unfiltered_detections" options: { [mediapipe.TensorsToDetectionsCalculatorOptions.ext] { num_classes: 1 num_boxes: 2254 num_coords: 12 box_coord_offset: 0 keypoint_coord_offset: 4 num_keypoints: 4 num_values_per_keypoint: 2 sigmoid_score: true score_clipping_thresh: 100.0 reverse_output_order: true x_scale: 224.0 y_scale: 224.0 h_scale: 224.0 w_scale: 224.0 min_score_thresh: 0.5 } } } # Performs non-max suppression to remove excessive detections. node { calculator: "NonMaxSuppressionCalculator" input_stream: "unfiltered_detections" output_stream: "filtered_detections" options: { [mediapipe.NonMaxSuppressionCalculatorOptions.ext] { min_suppression_threshold: 0.3 overlap_type: INTERSECTION_OVER_UNION algorithm: WEIGHTED } } } # Adjusts detection locations (already normalized to [0.f, 1.f]) on the # letterboxed image (after image transformation with the FIT scale mode) to the # corresponding locations on the same image with the letterbox removed (the # input image to the graph before image transformation). node { calculator: "DetectionLetterboxRemovalCalculator" input_stream: "DETECTIONS:filtered_detections" input_stream: "LETTERBOX_PADDING:letterbox_padding" output_stream: "DETECTIONS:detections" }