33d683c671
GitOrigin-RevId: 373e3ac1e5839befd95bf7d73ceff3c5f1171969
269 lines
9.3 KiB
Plaintext
269 lines
9.3 KiB
Plaintext
# MediaPipe graph to detect/predict pose landmarks. (GPU input, and inference is
|
|
# executed on GPU.) This graph tries to skip pose detection as much as possible
|
|
# by using previously detected/predicted landmarks for new images.
|
|
#
|
|
# It is required that "pose_detection.tflite" is available at
|
|
# "mediapipe/modules/pose_detection/pose_detection.tflite"
|
|
# path during execution.
|
|
#
|
|
# It is required that "pose_landmark_lite.tflite" or
|
|
# "pose_landmark_full.tflite" or "pose_landmark_heavy.tflite" is available at
|
|
# "mediapipe/modules/pose_landmark/pose_landmark_lite.tflite" or
|
|
# "mediapipe/modules/pose_landmark/pose_landmark_full.tflite" or
|
|
# "mediapipe/modules/pose_landmark/pose_landmark_heavy.tflite"
|
|
# path respectively during execution, depending on the specification in the
|
|
# MODEL_COMPLEXITY input side packet.
|
|
#
|
|
# EXAMPLE:
|
|
# node {
|
|
# calculator: "PoseLandmarkGpu"
|
|
# input_side_packet: "MODEL_COMPLEXITY:model_complexity"
|
|
# input_side_packet: "SMOOTH_LANDMARKS:smooth_landmarks"
|
|
# input_side_packet: "ENABLE_SEGMENTATION:enable_segmentation"
|
|
# input_side_packet: "SMOOTH_SEGMENTATION:smooth_segmentation"
|
|
# input_side_packet: "USE_PREV_LANDMARKS:use_prev_landmarks"
|
|
# input_stream: "IMAGE:image"
|
|
# output_stream: "LANDMARKS:pose_landmarks"
|
|
# output_stream: "SEGMENTATION_MASK:segmentation_mask"
|
|
# }
|
|
|
|
type: "PoseLandmarkGpu"
|
|
|
|
# GPU image. (GpuBuffer)
|
|
input_stream: "IMAGE:image"
|
|
|
|
# Whether to filter landmarks across different input images to reduce jitter.
|
|
# If unspecified, functions as set to true. (bool)
|
|
input_side_packet: "SMOOTH_LANDMARKS:smooth_landmarks"
|
|
|
|
# Whether to predict the segmentation mask. If unspecified, functions as set to
|
|
# false. (bool)
|
|
input_side_packet: "ENABLE_SEGMENTATION:enable_segmentation"
|
|
|
|
# Whether to filter segmentation mask across different input images to reduce
|
|
# jitter. If unspecified, functions as set to true. (bool)
|
|
input_side_packet: "SMOOTH_SEGMENTATION:smooth_segmentation"
|
|
|
|
# Complexity of the pose landmark model: 0, 1 or 2. Landmark accuracy as well as
|
|
# inference latency generally go up with the model complexity. If unspecified,
|
|
# functions as set to 1. (int)
|
|
input_side_packet: "MODEL_COMPLEXITY:model_complexity"
|
|
|
|
# Whether landmarks on the previous image should be used to help localize
|
|
# landmarks on the current image. (bool)
|
|
input_side_packet: "USE_PREV_LANDMARKS:use_prev_landmarks"
|
|
|
|
# Pose landmarks. (NormalizedLandmarkList)
|
|
# We have 33 landmarks (see pose_landmark_topology.svg), and there are other
|
|
# auxiliary key points.
|
|
# 0 - nose
|
|
# 1 - left eye (inner)
|
|
# 2 - left eye
|
|
# 3 - left eye (outer)
|
|
# 4 - right eye (inner)
|
|
# 5 - right eye
|
|
# 6 - right eye (outer)
|
|
# 7 - left ear
|
|
# 8 - right ear
|
|
# 9 - mouth (left)
|
|
# 10 - mouth (right)
|
|
# 11 - left shoulder
|
|
# 12 - right shoulder
|
|
# 13 - left elbow
|
|
# 14 - right elbow
|
|
# 15 - left wrist
|
|
# 16 - right wrist
|
|
# 17 - left pinky
|
|
# 18 - right pinky
|
|
# 19 - left index
|
|
# 20 - right index
|
|
# 21 - left thumb
|
|
# 22 - right thumb
|
|
# 23 - left hip
|
|
# 24 - right hip
|
|
# 25 - left knee
|
|
# 26 - right knee
|
|
# 27 - left ankle
|
|
# 28 - right ankle
|
|
# 29 - left heel
|
|
# 30 - right heel
|
|
# 31 - left foot index
|
|
# 32 - right foot index
|
|
#
|
|
# NOTE: if a pose is not present within the given ROI, for this particular
|
|
# timestamp there will not be an output packet in the LANDMARKS stream. However,
|
|
# the MediaPipe framework will internally inform the downstream calculators of
|
|
# the absence of this packet so that they don't wait for it unnecessarily.
|
|
output_stream: "LANDMARKS:pose_landmarks"
|
|
|
|
# Pose world landmarks. (LandmarkList)
|
|
# World landmarks are real-world 3D coordinates in meters with the origin at the
|
|
# center between hips. WORLD_LANDMARKS shares the same landmark topology as
|
|
# LANDMARKS. However, LANDMARKS provides coordinates (in pixels) of a 3D object
|
|
# projected onto the 2D image surface, while WORLD_LANDMARKS provides
|
|
# coordinates (in meters) of the 3D object itself.
|
|
output_stream: "WORLD_LANDMARKS:pose_world_landmarks"
|
|
|
|
# Segmentation mask. (GpuBuffer in RGBA, with the same mask values in R and A)
|
|
output_stream: "SEGMENTATION_MASK:segmentation_mask"
|
|
|
|
# Extra outputs (for debugging, for instance).
|
|
# Detected poses. (Detection)
|
|
output_stream: "DETECTION:pose_detection"
|
|
# Regions of interest calculated based on landmarks. (NormalizedRect)
|
|
output_stream: "ROI_FROM_LANDMARKS:pose_rect_from_landmarks"
|
|
# Regions of interest calculated based on pose detections. (NormalizedRect)
|
|
output_stream: "ROI_FROM_DETECTION:pose_rect_from_detection"
|
|
|
|
# When the optional input side packet "use_prev_landmarks" is either absent or
|
|
# set to true, uses the landmarks on the previous image to help localize
|
|
# landmarks on the current image.
|
|
node {
|
|
calculator: "GateCalculator"
|
|
input_side_packet: "ALLOW:use_prev_landmarks"
|
|
input_stream: "prev_pose_rect_from_landmarks"
|
|
output_stream: "gated_prev_pose_rect_from_landmarks"
|
|
options: {
|
|
[mediapipe.GateCalculatorOptions.ext] {
|
|
allow: true
|
|
}
|
|
}
|
|
}
|
|
|
|
# Checks if there's previous pose rect calculated from landmarks.
|
|
node: {
|
|
calculator: "PacketPresenceCalculator"
|
|
input_stream: "PACKET:gated_prev_pose_rect_from_landmarks"
|
|
output_stream: "PRESENCE:prev_pose_rect_from_landmarks_is_present"
|
|
}
|
|
|
|
# Calculates size of the image.
|
|
node {
|
|
calculator: "ImagePropertiesCalculator"
|
|
input_stream: "IMAGE_GPU:image"
|
|
output_stream: "SIZE:image_size"
|
|
}
|
|
|
|
# Drops the incoming image if the pose has already been identified from the
|
|
# previous image. Otherwise, passes the incoming image through to trigger a new
|
|
# round of pose detection.
|
|
node {
|
|
calculator: "GateCalculator"
|
|
input_stream: "image"
|
|
input_stream: "image_size"
|
|
input_stream: "DISALLOW:prev_pose_rect_from_landmarks_is_present"
|
|
output_stream: "image_for_pose_detection"
|
|
output_stream: "image_size_for_pose_detection"
|
|
options: {
|
|
[mediapipe.GateCalculatorOptions.ext] {
|
|
empty_packets_as_allow: true
|
|
}
|
|
}
|
|
}
|
|
|
|
# Detects poses.
|
|
node {
|
|
calculator: "PoseDetectionGpu"
|
|
input_stream: "IMAGE:image_for_pose_detection"
|
|
output_stream: "DETECTIONS:pose_detections"
|
|
}
|
|
|
|
# Gets the very first detection from "pose_detections" vector.
|
|
node {
|
|
calculator: "SplitDetectionVectorCalculator"
|
|
input_stream: "pose_detections"
|
|
output_stream: "pose_detection"
|
|
options: {
|
|
[mediapipe.SplitVectorCalculatorOptions.ext] {
|
|
ranges: { begin: 0 end: 1 }
|
|
element_only: true
|
|
}
|
|
}
|
|
}
|
|
|
|
# Calculates region of interest based on pose detection, so that can be used
|
|
# to detect landmarks.
|
|
node {
|
|
calculator: "PoseDetectionToRoi"
|
|
input_stream: "DETECTION:pose_detection"
|
|
input_stream: "IMAGE_SIZE:image_size_for_pose_detection"
|
|
output_stream: "ROI:pose_rect_from_detection"
|
|
}
|
|
|
|
# Selects either pose rect (or ROI) calculated from detection or from previously
|
|
# detected landmarks if available (in this case, calculation of pose rect from
|
|
# detection is skipped).
|
|
node {
|
|
calculator: "MergeCalculator"
|
|
input_stream: "pose_rect_from_detection"
|
|
input_stream: "gated_prev_pose_rect_from_landmarks"
|
|
output_stream: "pose_rect"
|
|
}
|
|
|
|
# Detects pose landmarks within specified region of interest of the image.
|
|
node {
|
|
calculator: "PoseLandmarkByRoiGpu"
|
|
input_side_packet: "MODEL_COMPLEXITY:model_complexity"
|
|
input_side_packet: "ENABLE_SEGMENTATION:enable_segmentation"
|
|
input_stream: "IMAGE:image"
|
|
input_stream: "ROI:pose_rect"
|
|
output_stream: "LANDMARKS:unfiltered_pose_landmarks"
|
|
output_stream: "AUXILIARY_LANDMARKS:unfiltered_auxiliary_landmarks"
|
|
output_stream: "WORLD_LANDMARKS:unfiltered_world_landmarks"
|
|
output_stream: "SEGMENTATION_MASK:unfiltered_segmentation_mask"
|
|
}
|
|
|
|
# Smoothes landmarks to reduce jitter.
|
|
node {
|
|
calculator: "PoseLandmarkFiltering"
|
|
input_side_packet: "ENABLE:smooth_landmarks"
|
|
input_stream: "IMAGE_SIZE:image_size"
|
|
input_stream: "NORM_LANDMARKS:unfiltered_pose_landmarks"
|
|
input_stream: "AUX_NORM_LANDMARKS:unfiltered_auxiliary_landmarks"
|
|
input_stream: "WORLD_LANDMARKS:unfiltered_world_landmarks"
|
|
output_stream: "FILTERED_NORM_LANDMARKS:pose_landmarks"
|
|
output_stream: "FILTERED_AUX_NORM_LANDMARKS:auxiliary_landmarks"
|
|
output_stream: "FILTERED_WORLD_LANDMARKS:pose_world_landmarks"
|
|
}
|
|
|
|
# Calculates region of interest based on the auxiliary landmarks, to be used in
|
|
# the subsequent image.
|
|
node {
|
|
calculator: "PoseLandmarksToRoi"
|
|
input_stream: "LANDMARKS:auxiliary_landmarks"
|
|
input_stream: "IMAGE_SIZE:image_size"
|
|
output_stream: "ROI:pose_rect_from_landmarks"
|
|
}
|
|
|
|
# Caches pose rects calculated from landmarks, and upon the arrival of the next
|
|
# input image, sends out the cached rects with timestamps replaced by that of
|
|
# the input image, essentially generating a packet that carries the previous
|
|
# pose rects. Note that upon the arrival of the very first input image, a
|
|
# timestamp bound update occurs to jump start the feedback loop.
|
|
node {
|
|
calculator: "PreviousLoopbackCalculator"
|
|
input_stream: "MAIN:image"
|
|
input_stream: "LOOP:pose_rect_from_landmarks"
|
|
input_stream_info: {
|
|
tag_index: "LOOP"
|
|
back_edge: true
|
|
}
|
|
output_stream: "PREV_LOOP:prev_pose_rect_from_landmarks"
|
|
}
|
|
|
|
# Smoothes segmentation to reduce jitter.
|
|
node {
|
|
calculator: "PoseSegmentationFiltering"
|
|
input_side_packet: "ENABLE:smooth_segmentation"
|
|
input_stream: "SEGMENTATION_MASK:unfiltered_segmentation_mask"
|
|
output_stream: "FILTERED_SEGMENTATION_MASK:filtered_segmentation_mask"
|
|
}
|
|
|
|
# Converts the incoming segmentation mask represented as an Image into the
|
|
# corresponding GpuBuffer type.
|
|
node: {
|
|
calculator: "FromImageCalculator"
|
|
input_stream: "IMAGE:filtered_segmentation_mask"
|
|
output_stream: "IMAGE_GPU:segmentation_mask"
|
|
}
|