mediapipe-rs/mediapipe/modules/pose_landmark/pose_landmark_by_roi_gpu.pbtxt

# MediaPipe graph to detect/predict pose landmarks and optionally segmentation
# within an ROI. (GPU input, and inference is executed on GPU.)
#
# It is required that "pose_landmark_lite.tflite" or
# "pose_landmark_full.tflite" or "pose_landmark_heavy.tflite" is available at
# "mediapipe/modules/pose_landmark/pose_landmark_lite.tflite" or
# "mediapipe/modules/pose_landmark/pose_landmark_full.tflite" or
# "mediapipe/modules/pose_landmark/pose_landmark_heavy.tflite"
# path respectively during execution, depending on the specification in the
# MODEL_COMPLEXITY input side packet.
#
# EXAMPLE:
#   node {
#     calculator: "PoseLandmarkByRoiGpu"
#     input_side_packet: "MODEL_COMPLEXITY:model_complexity"
#     input_side_packet: "ENABLE_SEGMENTATION:enable_segmentation"
#     input_stream: "IMAGE:image"
#     input_stream: "ROI:roi"
#     output_stream: "LANDMARKS:landmarks"
#     output_stream: "SEGMENTATION_MASK:segmentation_mask"
#   }

type: "PoseLandmarkByRoiGpu"

# GPU image. (GpuBuffer)
input_stream: "IMAGE:image"
# ROI (region of interest) within the given image where a pose is located.
# (NormalizedRect)
input_stream: "ROI:roi"

# Whether to predict the segmentation mask. If unspecified, functions as set to
# false. (bool)
input_side_packet: "ENABLE_SEGMENTATION:enable_segmentation"

# Complexity of the pose landmark model: 0, 1 or 2. Landmark accuracy as well as
# inference latency generally go up with the model complexity. If unspecified,
# functions as set to 1. (int)
input_side_packet: "MODEL_COMPLEXITY:model_complexity"

# Pose landmarks within the given ROI. (NormalizedLandmarkList)
# We have 33 landmarks (see pose_landmark_topology.svg), and there are other
# auxiliary key points.
# 0 - nose
# 1 - left eye (inner)
# 2 - left eye
# 3 - left eye (outer)
# 4 - right eye (inner)
# 5 - right eye
# 6 - right eye (outer)
# 7 - left ear
# 8 - right ear
# 9 - mouth (left)
# 10 - mouth (right)
# 11 - left shoulder
# 12 - right shoulder
# 13 - left elbow
# 14 - right elbow
# 15 - left wrist
# 16 - right wrist
# 17 - left pinky
# 18 - right pinky
# 19 - left index
# 20 - right index
# 21 - left thumb
# 22 - right thumb
# 23 - left hip
# 24 - right hip
# 25 - left knee
# 26 - right knee
# 27 - left ankle
# 28 - right ankle
# 29 - left heel
# 30 - right heel
# 31 - left foot index
# 32 - right foot index
#
# NOTE: If a pose is not present within the given ROI, for this particular
# timestamp there will not be an output packet in the LANDMARKS stream. However,
# the MediaPipe framework will internally inform the downstream calculators of
# the absence of this packet so that they don't wait for it unnecessarily.
output_stream: "LANDMARKS:landmarks"
# Auxiliary landmarks for deriving the ROI in the subsequent image.
# (NormalizedLandmarkList)
output_stream: "AUXILIARY_LANDMARKS:auxiliary_landmarks"

# Pose world landmarks within the given ROI. (LandmarkList)
# World landmarks are real-world 3D coordinates in meters with the origin at the
# center between hips. WORLD_LANDMARKS shares the same landmark topology as
# LANDMARKS. However, LANDMARKS provides coordinates (in pixels) of a 3D object
# projected onto the 2D image surface, while WORLD_LANDMARKS provides
# coordinates (in meters) of the 3D object itself.
output_stream: "WORLD_LANDMARKS:world_landmarks"

# Segmentation mask on GPU in RGBA with the same mask values in R and A. (Image)
output_stream: "SEGMENTATION_MASK:segmentation_mask"

# Retrieves the image size.
node {
  calculator: "ImagePropertiesCalculator"
  input_stream: "IMAGE_GPU:image"
  output_stream: "SIZE:image_size"
}

# Crops and transforms the specified ROI in the input image into an image patch
# represented as a tensor of dimension expected by the corresponding ML model,
# while maintaining the aspect ratio of the ROI (which can be different from
# that of the image patch). Therefore, there can be letterboxing around the ROI
# in the generated tensor representation.
node: {
  calculator: "ImageToTensorCalculator"
  input_stream: "IMAGE_GPU:image"
  input_stream: "NORM_RECT:roi"
  output_stream: "TENSORS:input_tensors"
  output_stream: "LETTERBOX_PADDING:letterbox_padding"
  output_stream: "MATRIX:transformation_matrix"
  options: {
    [mediapipe.ImageToTensorCalculatorOptions.ext] {
      output_tensor_width: 256
      output_tensor_height: 256
      keep_aspect_ratio: true
      output_tensor_float_range {
        min: 0.0
        max: 1.0
      }
      gpu_origin: TOP_LEFT
    }
  }
}

# Loads the pose landmark TF Lite model.
node {
  calculator: "PoseLandmarkModelLoader"
  input_side_packet: "MODEL_COMPLEXITY:model_complexity"
  output_side_packet: "MODEL:model"
}

# Runs model inference on GPU.
node {
  calculator: "InferenceCalculator"
  input_side_packet: "MODEL:model"
  input_stream: "TENSORS:input_tensors"
  output_stream: "TENSORS:output_tensors"
}

# Decodes the tensors into the corresponding landmark and segmentation mask
# representation.
node {
  calculator: "TensorsToPoseLandmarksAndSegmentation"
  input_side_packet: "ENABLE_SEGMENTATION:enable_segmentation"
  input_stream: "TENSORS:output_tensors"
  output_stream: "LANDMARKS:roi_landmarks"
  output_stream: "AUXILIARY_LANDMARKS:roi_auxiliary_landmarks"
  output_stream: "WORLD_LANDMARKS:roi_world_landmarks"
  output_stream: "SEGMENTATION_MASK:roi_segmentation_mask"
}

# Projects the landmarks and segmentation mask in the local coordinates of the
# (potentially letterboxed) ROI back to the global coordinates of the full input
# image.
node {
  calculator: "PoseLandmarksAndSegmentationInverseProjection"
  input_stream: "IMAGE_SIZE:image_size"
  input_stream: "NORM_RECT:roi"
  input_stream: "LETTERBOX_PADDING:letterbox_padding"
  input_stream: "MATRIX:transformation_matrix"
  input_stream: "LANDMARKS:roi_landmarks"
  input_stream: "AUXILIARY_LANDMARKS:roi_auxiliary_landmarks"
  input_stream: "WORLD_LANDMARKS:roi_world_landmarks"
  input_stream: "SEGMENTATION_MASK:roi_segmentation_mask"
  output_stream: "LANDMARKS:landmarks"
  output_stream: "AUXILIARY_LANDMARKS:auxiliary_landmarks"
  output_stream: "WORLD_LANDMARKS:world_landmarks"
  output_stream: "SEGMENTATION_MASK:segmentation_mask"
}
add mediapipe modules to fix examples 2022-06-11 21:25:48 +02:00			`# MediaPipe graph to detect/predict pose landmarks and optionally segmentation`
			`# within an ROI. (GPU input, and inference is executed on GPU.)`
			`#`
			`# It is required that "pose_landmark_lite.tflite" or`
			`# "pose_landmark_full.tflite" or "pose_landmark_heavy.tflite" is available at`
			`# "mediapipe/modules/pose_landmark/pose_landmark_lite.tflite" or`
			`# "mediapipe/modules/pose_landmark/pose_landmark_full.tflite" or`
			`# "mediapipe/modules/pose_landmark/pose_landmark_heavy.tflite"`
			`# path respectively during execution, depending on the specification in the`
			`# MODEL_COMPLEXITY input side packet.`
			`#`
			`# EXAMPLE:`
			`# node {`
			`# calculator: "PoseLandmarkByRoiGpu"`
			`# input_side_packet: "MODEL_COMPLEXITY:model_complexity"`
			`# input_side_packet: "ENABLE_SEGMENTATION:enable_segmentation"`
			`# input_stream: "IMAGE:image"`
			`# input_stream: "ROI:roi"`
			`# output_stream: "LANDMARKS:landmarks"`
			`# output_stream: "SEGMENTATION_MASK:segmentation_mask"`
			`# }`

			`type: "PoseLandmarkByRoiGpu"`

			`# GPU image. (GpuBuffer)`
			`input_stream: "IMAGE:image"`
			`# ROI (region of interest) within the given image where a pose is located.`
			`# (NormalizedRect)`
			`input_stream: "ROI:roi"`

			`# Whether to predict the segmentation mask. If unspecified, functions as set to`
			`# false. (bool)`
			`input_side_packet: "ENABLE_SEGMENTATION:enable_segmentation"`

			`# Complexity of the pose landmark model: 0, 1 or 2. Landmark accuracy as well as`
			`# inference latency generally go up with the model complexity. If unspecified,`
			`# functions as set to 1. (int)`
			`input_side_packet: "MODEL_COMPLEXITY:model_complexity"`

			`# Pose landmarks within the given ROI. (NormalizedLandmarkList)`
			`# We have 33 landmarks (see pose_landmark_topology.svg), and there are other`
			`# auxiliary key points.`
			`# 0 - nose`
			`# 1 - left eye (inner)`
			`# 2 - left eye`
			`# 3 - left eye (outer)`
			`# 4 - right eye (inner)`
			`# 5 - right eye`
			`# 6 - right eye (outer)`
			`# 7 - left ear`
			`# 8 - right ear`
			`# 9 - mouth (left)`
			`# 10 - mouth (right)`
			`# 11 - left shoulder`
			`# 12 - right shoulder`
			`# 13 - left elbow`
			`# 14 - right elbow`
			`# 15 - left wrist`
			`# 16 - right wrist`
			`# 17 - left pinky`
			`# 18 - right pinky`
			`# 19 - left index`
			`# 20 - right index`
			`# 21 - left thumb`
			`# 22 - right thumb`
			`# 23 - left hip`
			`# 24 - right hip`
			`# 25 - left knee`
			`# 26 - right knee`
			`# 27 - left ankle`
			`# 28 - right ankle`
			`# 29 - left heel`
			`# 30 - right heel`
			`# 31 - left foot index`
			`# 32 - right foot index`
			`#`
			`# NOTE: If a pose is not present within the given ROI, for this particular`
			`# timestamp there will not be an output packet in the LANDMARKS stream. However,`
			`# the MediaPipe framework will internally inform the downstream calculators of`
			`# the absence of this packet so that they don't wait for it unnecessarily.`
			`output_stream: "LANDMARKS:landmarks"`
			`# Auxiliary landmarks for deriving the ROI in the subsequent image.`
			`# (NormalizedLandmarkList)`
			`output_stream: "AUXILIARY_LANDMARKS:auxiliary_landmarks"`

			`# Pose world landmarks within the given ROI. (LandmarkList)`
			`# World landmarks are real-world 3D coordinates in meters with the origin at the`
			`# center between hips. WORLD_LANDMARKS shares the same landmark topology as`
			`# LANDMARKS. However, LANDMARKS provides coordinates (in pixels) of a 3D object`
			`# projected onto the 2D image surface, while WORLD_LANDMARKS provides`
			`# coordinates (in meters) of the 3D object itself.`
			`output_stream: "WORLD_LANDMARKS:world_landmarks"`

			`# Segmentation mask on GPU in RGBA with the same mask values in R and A. (Image)`
			`output_stream: "SEGMENTATION_MASK:segmentation_mask"`

			`# Retrieves the image size.`
			`node {`
			`calculator: "ImagePropertiesCalculator"`
			`input_stream: "IMAGE_GPU:image"`
			`output_stream: "SIZE:image_size"`
			`}`

			`# Crops and transforms the specified ROI in the input image into an image patch`
			`# represented as a tensor of dimension expected by the corresponding ML model,`
			`# while maintaining the aspect ratio of the ROI (which can be different from`
			`# that of the image patch). Therefore, there can be letterboxing around the ROI`
			`# in the generated tensor representation.`
			`node: {`
			`calculator: "ImageToTensorCalculator"`
			`input_stream: "IMAGE_GPU:image"`
			`input_stream: "NORM_RECT:roi"`
			`output_stream: "TENSORS:input_tensors"`
			`output_stream: "LETTERBOX_PADDING:letterbox_padding"`
			`output_stream: "MATRIX:transformation_matrix"`
			`options: {`
			`[mediapipe.ImageToTensorCalculatorOptions.ext] {`
			`output_tensor_width: 256`
			`output_tensor_height: 256`
			`keep_aspect_ratio: true`
			`output_tensor_float_range {`
			`min: 0.0`
			`max: 1.0`
			`}`
			`gpu_origin: TOP_LEFT`
			`}`
			`}`
			`}`

			`# Loads the pose landmark TF Lite model.`
			`node {`
			`calculator: "PoseLandmarkModelLoader"`
			`input_side_packet: "MODEL_COMPLEXITY:model_complexity"`
			`output_side_packet: "MODEL:model"`
			`}`

			`# Runs model inference on GPU.`
			`node {`
			`calculator: "InferenceCalculator"`
			`input_side_packet: "MODEL:model"`
			`input_stream: "TENSORS:input_tensors"`
			`output_stream: "TENSORS:output_tensors"`
			`}`

			`# Decodes the tensors into the corresponding landmark and segmentation mask`
			`# representation.`
			`node {`
			`calculator: "TensorsToPoseLandmarksAndSegmentation"`
			`input_side_packet: "ENABLE_SEGMENTATION:enable_segmentation"`
			`input_stream: "TENSORS:output_tensors"`
			`output_stream: "LANDMARKS:roi_landmarks"`
			`output_stream: "AUXILIARY_LANDMARKS:roi_auxiliary_landmarks"`
			`output_stream: "WORLD_LANDMARKS:roi_world_landmarks"`
			`output_stream: "SEGMENTATION_MASK:roi_segmentation_mask"`
			`}`

			`# Projects the landmarks and segmentation mask in the local coordinates of the`
			`# (potentially letterboxed) ROI back to the global coordinates of the full input`
			`# image.`
			`node {`
			`calculator: "PoseLandmarksAndSegmentationInverseProjection"`
			`input_stream: "IMAGE_SIZE:image_size"`
			`input_stream: "NORM_RECT:roi"`
			`input_stream: "LETTERBOX_PADDING:letterbox_padding"`
			`input_stream: "MATRIX:transformation_matrix"`
			`input_stream: "LANDMARKS:roi_landmarks"`
			`input_stream: "AUXILIARY_LANDMARKS:roi_auxiliary_landmarks"`
			`input_stream: "WORLD_LANDMARKS:roi_world_landmarks"`
			`input_stream: "SEGMENTATION_MASK:roi_segmentation_mask"`
			`output_stream: "LANDMARKS:landmarks"`
			`output_stream: "AUXILIARY_LANDMARKS:auxiliary_landmarks"`
			`output_stream: "WORLD_LANDMARKS:world_landmarks"`
			`output_stream: "SEGMENTATION_MASK:segmentation_mask"`
			`}`