237 lines
6.9 KiB
Plaintext
237 lines
6.9 KiB
Plaintext
# MediaPipe graph to detect/predict pose landmarks. (GPU input, and inference is
|
|
# executed on GPU.)
|
|
#
|
|
# It is required that "pose_landmark_lite.tflite" or
|
|
# "pose_landmark_full.tflite" or "pose_landmark_heavy.tflite" is available at
|
|
# "mediapipe/modules/pose_landmark/pose_landmark_lite.tflite" or
|
|
# "mediapipe/modules/pose_landmark/pose_landmark_full.tflite" or
|
|
# "mediapipe/modules/pose_landmark/pose_landmark_heavy.tflite"
|
|
# path respectively during execution, depending on the specification in the
|
|
# MODEL_COMPLEXITY input side packet.
|
|
#
|
|
# EXAMPLE:
|
|
# node {
|
|
# calculator: "PoseLandmarkByRoiGpu"
|
|
# input_side_packet: "MODEL_COMPLEXITY:model_complexity"
|
|
# input_stream: "IMAGE:image"
|
|
# input_stream: "ROI:roi"
|
|
# output_stream: "LANDMARKS:landmarks"
|
|
# }
|
|
|
|
type: "PoseLandmarkByRoiGpu"
|
|
|
|
# GPU image. (GpuBuffer)
|
|
input_stream: "IMAGE:image"
|
|
# ROI (region of interest) within the given image where a pose is located.
|
|
# (NormalizedRect)
|
|
input_stream: "ROI:roi"
|
|
|
|
# Complexity of the pose landmark model: 0, 1 or 2. Landmark accuracy as well as
|
|
# inference latency generally go up with the model complexity. If unspecified,
|
|
# functions as set to 1. (int)
|
|
input_side_packet: "MODEL_COMPLEXITY:model_complexity"
|
|
|
|
# Pose landmarks within the given ROI. (NormalizedLandmarkList)
|
|
# We have 33 landmarks (see pose_landmark_topology.svg), and there are other
|
|
# auxiliary key points.
|
|
# 0 - nose
|
|
# 1 - left eye (inner)
|
|
# 2 - left eye
|
|
# 3 - left eye (outer)
|
|
# 4 - right eye (inner)
|
|
# 5 - right eye
|
|
# 6 - right eye (outer)
|
|
# 7 - left ear
|
|
# 8 - right ear
|
|
# 9 - mouth (left)
|
|
# 10 - mouth (right)
|
|
# 11 - left shoulder
|
|
# 12 - right shoulder
|
|
# 13 - left elbow
|
|
# 14 - right elbow
|
|
# 15 - left wrist
|
|
# 16 - right wrist
|
|
# 17 - left pinky
|
|
# 18 - right pinky
|
|
# 19 - left index
|
|
# 20 - right index
|
|
# 21 - left thumb
|
|
# 22 - right thumb
|
|
# 23 - left hip
|
|
# 24 - right hip
|
|
# 25 - left knee
|
|
# 26 - right knee
|
|
# 27 - left ankle
|
|
# 28 - right ankle
|
|
# 29 - left heel
|
|
# 30 - right heel
|
|
# 31 - left foot index
|
|
# 32 - right foot index
|
|
#
|
|
# NOTE: if a pose is not present within the given ROI, for this particular
|
|
# timestamp there will not be an output packet in the LANDMARKS stream. However,
|
|
# the MediaPipe framework will internally inform the downstream calculators of
|
|
# the absence of this packet so that they don't wait for it unnecessarily.
|
|
output_stream: "LANDMARKS:landmarks"
|
|
# Auxiliary landmarks for deriving the ROI in the subsequent image.
|
|
# (NormalizedLandmarkList)
|
|
output_stream: "AUXILIARY_LANDMARKS:auxiliary_landmarks"
|
|
|
|
# Transforms the input image into a 256x256 tensor while keeping the aspect
|
|
# ratio (what is expected by the corresponding model), resulting in potential
|
|
# letterboxing in the transformed image.
|
|
node: {
|
|
calculator: "ImageToTensorCalculator"
|
|
input_stream: "IMAGE_GPU:image"
|
|
input_stream: "NORM_RECT:roi"
|
|
output_stream: "TENSORS:input_tensors"
|
|
output_stream: "LETTERBOX_PADDING:letterbox_padding"
|
|
options: {
|
|
[mediapipe.ImageToTensorCalculatorOptions.ext] {
|
|
output_tensor_width: 256
|
|
output_tensor_height: 256
|
|
keep_aspect_ratio: true
|
|
output_tensor_float_range {
|
|
min: 0.0
|
|
max: 1.0
|
|
}
|
|
gpu_origin: TOP_LEFT
|
|
}
|
|
}
|
|
}
|
|
|
|
# Loads the pose landmark TF Lite model.
|
|
node {
|
|
calculator: "PoseLandmarkModelLoader"
|
|
input_side_packet: "MODEL_COMPLEXITY:model_complexity"
|
|
output_side_packet: "MODEL:model"
|
|
}
|
|
|
|
# Runs model inference on GPU.
|
|
node {
|
|
calculator: "InferenceCalculator"
|
|
input_side_packet: "MODEL:model"
|
|
input_stream: "TENSORS:input_tensors"
|
|
output_stream: "TENSORS:output_tensors"
|
|
options: {
|
|
[mediapipe.InferenceCalculatorOptions.ext] {
|
|
delegate {
|
|
gpu {
|
|
allow_precision_loss: false
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
# Splits a vector of TFLite tensors to multiple vectors according to the ranges
|
|
# specified in option.
|
|
node {
|
|
calculator: "SplitTensorVectorCalculator"
|
|
input_stream: "output_tensors"
|
|
output_stream: "landmark_tensors"
|
|
output_stream: "pose_flag_tensor"
|
|
output_stream: "heatmap_tensor"
|
|
options: {
|
|
[mediapipe.SplitVectorCalculatorOptions.ext] {
|
|
ranges: { begin: 0 end: 1 }
|
|
ranges: { begin: 1 end: 2 }
|
|
ranges: { begin: 3 end: 4 }
|
|
}
|
|
}
|
|
}
|
|
|
|
# Converts the pose-flag tensor into a float that represents the confidence
|
|
# score of pose presence.
|
|
node {
|
|
calculator: "TensorsToFloatsCalculator"
|
|
input_stream: "TENSORS:pose_flag_tensor"
|
|
output_stream: "FLOAT:pose_presence_score"
|
|
}
|
|
|
|
# Applies a threshold to the confidence score to determine whether a pose is
|
|
# present.
|
|
node {
|
|
calculator: "ThresholdingCalculator"
|
|
input_stream: "FLOAT:pose_presence_score"
|
|
output_stream: "FLAG:pose_presence"
|
|
options: {
|
|
[mediapipe.ThresholdingCalculatorOptions.ext] {
|
|
threshold: 0.5
|
|
}
|
|
}
|
|
}
|
|
|
|
# Drops landmark tensors if pose is not present.
|
|
node {
|
|
calculator: "GateCalculator"
|
|
input_stream: "landmark_tensors"
|
|
input_stream: "ALLOW:pose_presence"
|
|
output_stream: "ensured_landmark_tensors"
|
|
}
|
|
|
|
# Decodes the landmark tensors into a vector of landmarks, where the landmark
|
|
# coordinates are normalized by the size of the input image to the model.
|
|
node {
|
|
calculator: "TensorsToLandmarksCalculator"
|
|
input_stream: "TENSORS:ensured_landmark_tensors"
|
|
output_stream: "NORM_LANDMARKS:raw_landmarks"
|
|
options: {
|
|
[mediapipe.TensorsToLandmarksCalculatorOptions.ext] {
|
|
num_landmarks: 39
|
|
input_image_width: 256
|
|
input_image_height: 256
|
|
visibility_activation: SIGMOID
|
|
presence_activation: SIGMOID
|
|
}
|
|
}
|
|
}
|
|
|
|
# Refines landmarks with the heatmap tensor.
|
|
node {
|
|
calculator: "RefineLandmarksFromHeatmapCalculator"
|
|
input_stream: "NORM_LANDMARKS:raw_landmarks"
|
|
input_stream: "TENSORS:heatmap_tensor"
|
|
output_stream: "NORM_LANDMARKS:refined_landmarks"
|
|
options: {
|
|
[mediapipe.RefineLandmarksFromHeatmapCalculatorOptions.ext] {
|
|
kernel_size: 7
|
|
}
|
|
}
|
|
}
|
|
|
|
# Adjusts landmarks (already normalized to [0.f, 1.f]) on the letterboxed pose
|
|
# image (after image transformation with the FIT scale mode) to the
|
|
# corresponding locations on the same image with the letterbox removed (pose
|
|
# image before image transformation).
|
|
node {
|
|
calculator: "LandmarkLetterboxRemovalCalculator"
|
|
input_stream: "LANDMARKS:refined_landmarks"
|
|
input_stream: "LETTERBOX_PADDING:letterbox_padding"
|
|
output_stream: "LANDMARKS:adjusted_landmarks"
|
|
}
|
|
|
|
# Projects the landmarks from the cropped pose image to the corresponding
|
|
# locations on the full image before cropping (input to the graph).
|
|
node {
|
|
calculator: "LandmarkProjectionCalculator"
|
|
input_stream: "NORM_LANDMARKS:adjusted_landmarks"
|
|
input_stream: "NORM_RECT:roi"
|
|
output_stream: "NORM_LANDMARKS:all_landmarks"
|
|
}
|
|
|
|
# Splits the landmarks into two sets: the actual pose landmarks and the
|
|
# auxiliary landmarks.
|
|
node {
|
|
calculator: "SplitNormalizedLandmarkListCalculator"
|
|
input_stream: "all_landmarks"
|
|
output_stream: "landmarks"
|
|
output_stream: "auxiliary_landmarks"
|
|
options: {
|
|
[mediapipe.SplitVectorCalculatorOptions.ext] {
|
|
ranges: { begin: 0 end: 33 }
|
|
ranges: { begin: 33 end: 35 }
|
|
}
|
|
}
|
|
}
|