mediapipe-rs/mediapipe/modules/holistic_landmark/holistic_landmark_gpu.pbtxt

# Predicts pose + left/right hand + face landmarks.
#
# It is required that:
# - "face_detection_short_range.tflite" is available at
# "mediapipe/modules/face_detection/face_detection_short_range.tflite"
#
# - "face_landmark.tflite" is available at
# "mediapipe/modules/face_landmark/face_landmark.tflite"
#
# - "hand_landmark_full.tflite" is available at
# "mediapipe/modules/hand_landmark/hand_landmark_full.tflite"
#
# - "hand_recrop.tflite" is available at
# "mediapipe/modules/holistic_landmark/hand_recrop.tflite"
#
# - "handedness.txt" is available at
# "mediapipe/modules/hand_landmark/handedness.txt"
#
# - "pose_detection.tflite" is available at
# "mediapipe/modules/pose_detection/pose_detection.tflite"
#
# - "pose_landmark_lite.tflite" or "pose_landmark_full.tflite" or
# "pose_landmark_heavy.tflite" is available at
# "mediapipe/modules/pose_landmark/pose_landmark_lite.tflite" or
# "mediapipe/modules/pose_landmark/pose_landmark_full.tflite" or
# "mediapipe/modules/pose_landmark/pose_landmark_heavy.tflite"
# path respectively during execution, depending on the specification in the
# MODEL_COMPLEXITY input side packet.
#
# EXAMPLE:
#   node {
#     calculator: "HolisticLandmarkGpu"
#     input_stream: "IMAGE:input_video"
#     input_side_packet: "MODEL_COMPLEXITY:model_complexity"
#     input_side_packet: "SMOOTH_LANDMARKS:smooth_landmarks"
#     input_side_packet: "ENABLE_SEGMENTATION:enable_segmentation"
#     input_side_packet: "SMOOTH_SEGMENTATION:smooth_segmentation"
#     input_side_packet: "REFINE_FACE_LANDMARKS:refine_face_landmarks"
#     input_side_packet: "USE_PREV_LANDMARKS:use_prev_landmarks"
#     output_stream: "POSE_LANDMARKS:pose_landmarks"
#     output_stream: "FACE_LANDMARKS:face_landmarks"
#     output_stream: "LEFT_HAND_LANDMARKS:left_hand_landmarks"
#     output_stream: "RIGHT_HAND_LANDMARKS:right_hand_landmarks"
#   }
#
# NOTE: if a pose/hand/face output is not present in the image, for this
# particular timestamp there will not be an output packet in the corresponding
# output stream below. However, the MediaPipe framework will internally inform
# the downstream calculators of the absence of this packet so that they don't
# wait for it unnecessarily.

type: "HolisticLandmarkGpu"

# GPU image. (GpuBuffer)
input_stream: "IMAGE:image"

# Complexity of the pose landmark model: 0, 1 or 2. Landmark accuracy as well as
# inference latency generally go up with the model complexity. If unspecified,
# functions as set to 1. (int)
input_side_packet: "MODEL_COMPLEXITY:model_complexity"

# Whether to filter landmarks across different input images to reduce jitter.
# If unspecified, functions as set to true. (bool)
input_side_packet: "SMOOTH_LANDMARKS:smooth_landmarks"

# Whether to predict the segmentation mask. If unspecified, functions as set to
# false. (bool)
input_side_packet: "ENABLE_SEGMENTATION:enable_segmentation"

# Whether to filter segmentation mask across different input images to reduce
# jitter. If unspecified, functions as set to true. (bool)
input_side_packet: "SMOOTH_SEGMENTATION:smooth_segmentation"

# Whether to run the face landmark model with attention on lips and eyes to
# provide more accuracy, and additionally output iris landmarks. If unspecified,
# functions as set to false. (bool)
input_side_packet: "REFINE_FACE_LANDMARKS:refine_face_landmarks"

# Whether landmarks on the previous image should be used to help localize
# landmarks on the current image. (bool)
input_side_packet: "USE_PREV_LANDMARKS:use_prev_landmarks"

# Pose landmarks. (NormalizedLandmarkList)
# 33 pose landmarks.
output_stream: "POSE_LANDMARKS:pose_landmarks"
# 33 pose world landmarks. (LandmarkList)
output_stream: "WORLD_LANDMARKS:pose_world_landmarks"
# 21 left hand landmarks. (NormalizedLandmarkList)
output_stream: "LEFT_HAND_LANDMARKS:left_hand_landmarks"
# 21 right hand landmarks. (NormalizedLandmarkList)
output_stream: "RIGHT_HAND_LANDMARKS:right_hand_landmarks"
# 468 face landmarks. (NormalizedLandmarkList)
output_stream: "FACE_LANDMARKS:face_landmarks"

# Segmentation mask. (GpuBuffer in RGBA, with the same mask values in R and A)
output_stream: "SEGMENTATION_MASK:segmentation_mask"

# Debug outputs
output_stream: "POSE_ROI:pose_landmarks_roi"
output_stream: "POSE_DETECTION:pose_detection"

# Predicts pose landmarks.
node {
  calculator: "PoseLandmarkGpu"
  input_stream: "IMAGE:image"
  input_side_packet: "MODEL_COMPLEXITY:model_complexity"
  input_side_packet: "SMOOTH_LANDMARKS:smooth_landmarks"
  input_side_packet: "ENABLE_SEGMENTATION:enable_segmentation"
  input_side_packet: "SMOOTH_SEGMENTATION:smooth_segmentation"
  input_side_packet: "USE_PREV_LANDMARKS:use_prev_landmarks"
  output_stream: "LANDMARKS:pose_landmarks"
  output_stream: "WORLD_LANDMARKS:pose_world_landmarks"
  output_stream: "SEGMENTATION_MASK:segmentation_mask"
  output_stream: "ROI_FROM_LANDMARKS:pose_landmarks_roi"
  output_stream: "DETECTION:pose_detection"
}

# Predicts left and right hand landmarks based on the initial pose landmarks.
node {
  calculator: "HandLandmarksLeftAndRightGpu"
  input_stream: "IMAGE:image"
  input_stream: "POSE_LANDMARKS:pose_landmarks"
  output_stream: "LEFT_HAND_LANDMARKS:left_hand_landmarks"
  output_stream: "RIGHT_HAND_LANDMARKS:right_hand_landmarks"
}

# Extracts face-related pose landmarks.
node {
  calculator: "SplitNormalizedLandmarkListCalculator"
  input_stream: "pose_landmarks"
  output_stream: "face_landmarks_from_pose"
  options: {
    [mediapipe.SplitVectorCalculatorOptions.ext] {
      ranges: { begin: 0 end: 11 }
    }
  }
}

# Predicts face landmarks based on the initial pose landmarks.
node {
  calculator: "FaceLandmarksFromPoseGpu"
  input_stream: "IMAGE:image"
  input_stream: "FACE_LANDMARKS_FROM_POSE:face_landmarks_from_pose"
  input_side_packet: "REFINE_LANDMARKS:refine_face_landmarks"
  output_stream: "FACE_LANDMARKS:face_landmarks"
}