# MediaPipe graph to detect/predict pose landmarks. (GPU input, and inference is # executed on GPU.) # # It is required that "pose_landmark_lite.tflite" or # "pose_landmark_full.tflite" or "pose_landmark_heavy.tflite" is available at # "mediapipe/modules/pose_landmark/pose_landmark_lite.tflite" or # "mediapipe/modules/pose_landmark/pose_landmark_full.tflite" or # "mediapipe/modules/pose_landmark/pose_landmark_heavy.tflite" # path respectively during execution, depending on the specification in the # MODEL_COMPLEXITY input side packet. # # EXAMPLE: # node { # calculator: "PoseLandmarkByRoiGpu" # input_side_packet: "MODEL_COMPLEXITY:model_complexity" # input_stream: "IMAGE:image" # input_stream: "ROI:roi" # output_stream: "LANDMARKS:landmarks" # } type: "PoseLandmarkByRoiGpu" # GPU image. (GpuBuffer) input_stream: "IMAGE:image" # ROI (region of interest) within the given image where a pose is located. # (NormalizedRect) input_stream: "ROI:roi" # Complexity of the pose landmark model: 0, 1 or 2. Landmark accuracy as well as # inference latency generally go up with the model complexity. If unspecified, # functions as set to 1. (int) input_side_packet: "MODEL_COMPLEXITY:model_complexity" # Pose landmarks within the given ROI. (NormalizedLandmarkList) # We have 33 landmarks (see pose_landmark_topology.svg), and there are other # auxiliary key points. # 0 - nose # 1 - left eye (inner) # 2 - left eye # 3 - left eye (outer) # 4 - right eye (inner) # 5 - right eye # 6 - right eye (outer) # 7 - left ear # 8 - right ear # 9 - mouth (left) # 10 - mouth (right) # 11 - left shoulder # 12 - right shoulder # 13 - left elbow # 14 - right elbow # 15 - left wrist # 16 - right wrist # 17 - left pinky # 18 - right pinky # 19 - left index # 20 - right index # 21 - left thumb # 22 - right thumb # 23 - left hip # 24 - right hip # 25 - left knee # 26 - right knee # 27 - left ankle # 28 - right ankle # 29 - left heel # 30 - right heel # 31 - left foot index # 32 - right foot index # # NOTE: if a pose is not present within the given ROI, for this particular # timestamp there will not be an output packet in the LANDMARKS stream. However, # the MediaPipe framework will internally inform the downstream calculators of # the absence of this packet so that they don't wait for it unnecessarily. output_stream: "LANDMARKS:landmarks" # Auxiliary landmarks for deriving the ROI in the subsequent image. # (NormalizedLandmarkList) output_stream: "AUXILIARY_LANDMARKS:auxiliary_landmarks" # Transforms the input image into a 256x256 tensor while keeping the aspect # ratio (what is expected by the corresponding model), resulting in potential # letterboxing in the transformed image. node: { calculator: "ImageToTensorCalculator" input_stream: "IMAGE_GPU:image" input_stream: "NORM_RECT:roi" output_stream: "TENSORS:input_tensors" output_stream: "LETTERBOX_PADDING:letterbox_padding" options: { [mediapipe.ImageToTensorCalculatorOptions.ext] { output_tensor_width: 256 output_tensor_height: 256 keep_aspect_ratio: true output_tensor_float_range { min: 0.0 max: 1.0 } gpu_origin: TOP_LEFT } } } # Loads the pose landmark TF Lite model. node { calculator: "PoseLandmarkModelLoader" input_side_packet: "MODEL_COMPLEXITY:model_complexity" output_side_packet: "MODEL:model" } # Runs model inference on GPU. node { calculator: "InferenceCalculator" input_side_packet: "MODEL:model" input_stream: "TENSORS:input_tensors" output_stream: "TENSORS:output_tensors" options: { [mediapipe.InferenceCalculatorOptions.ext] { delegate { gpu { allow_precision_loss: false } } } } } # Splits a vector of TFLite tensors to multiple vectors according to the ranges # specified in option. node { calculator: "SplitTensorVectorCalculator" input_stream: "output_tensors" output_stream: "landmark_tensors" output_stream: "pose_flag_tensor" output_stream: "heatmap_tensor" options: { [mediapipe.SplitVectorCalculatorOptions.ext] { ranges: { begin: 0 end: 1 } ranges: { begin: 1 end: 2 } ranges: { begin: 3 end: 4 } } } } # Converts the pose-flag tensor into a float that represents the confidence # score of pose presence. node { calculator: "TensorsToFloatsCalculator" input_stream: "TENSORS:pose_flag_tensor" output_stream: "FLOAT:pose_presence_score" } # Applies a threshold to the confidence score to determine whether a pose is # present. node { calculator: "ThresholdingCalculator" input_stream: "FLOAT:pose_presence_score" output_stream: "FLAG:pose_presence" options: { [mediapipe.ThresholdingCalculatorOptions.ext] { threshold: 0.5 } } } # Drops landmark tensors if pose is not present. node { calculator: "GateCalculator" input_stream: "landmark_tensors" input_stream: "ALLOW:pose_presence" output_stream: "ensured_landmark_tensors" } # Decodes the landmark tensors into a vector of landmarks, where the landmark # coordinates are normalized by the size of the input image to the model. node { calculator: "TensorsToLandmarksCalculator" input_stream: "TENSORS:ensured_landmark_tensors" output_stream: "NORM_LANDMARKS:raw_landmarks" options: { [mediapipe.TensorsToLandmarksCalculatorOptions.ext] { num_landmarks: 39 input_image_width: 256 input_image_height: 256 visibility_activation: SIGMOID presence_activation: SIGMOID } } } # Refines landmarks with the heatmap tensor. node { calculator: "RefineLandmarksFromHeatmapCalculator" input_stream: "NORM_LANDMARKS:raw_landmarks" input_stream: "TENSORS:heatmap_tensor" output_stream: "NORM_LANDMARKS:refined_landmarks" options: { [mediapipe.RefineLandmarksFromHeatmapCalculatorOptions.ext] { kernel_size: 7 } } } # Adjusts landmarks (already normalized to [0.f, 1.f]) on the letterboxed pose # image (after image transformation with the FIT scale mode) to the # corresponding locations on the same image with the letterbox removed (pose # image before image transformation). node { calculator: "LandmarkLetterboxRemovalCalculator" input_stream: "LANDMARKS:refined_landmarks" input_stream: "LETTERBOX_PADDING:letterbox_padding" output_stream: "LANDMARKS:adjusted_landmarks" } # Projects the landmarks from the cropped pose image to the corresponding # locations on the full image before cropping (input to the graph). node { calculator: "LandmarkProjectionCalculator" input_stream: "NORM_LANDMARKS:adjusted_landmarks" input_stream: "NORM_RECT:roi" output_stream: "NORM_LANDMARKS:all_landmarks" } # Splits the landmarks into two sets: the actual pose landmarks and the # auxiliary landmarks. node { calculator: "SplitNormalizedLandmarkListCalculator" input_stream: "all_landmarks" output_stream: "landmarks" output_stream: "auxiliary_landmarks" options: { [mediapipe.SplitVectorCalculatorOptions.ext] { ranges: { begin: 0 end: 33 } ranges: { begin: 33 end: 35 } } } }