mediapipe-rs/mediapipe/modules/hand_landmark/hand_landmark_gpu.pbtxt

# MediaPipe graph to detect/predict hand landmarks on CPU.

type: "HandLandmarkGpu"

# GPU image. (GpuBuffer)
input_stream: "IMAGE:image"
# ROI (region of interest) within the given image where a palm/hand is located.
# (NormalizedRect)
input_stream: "ROI:hand_rect"

# Complexity of the hand landmark model: 0 or 1. Landmark accuracy as well as
# inference latency generally go up with the model complexity. If unspecified,
# functions as set to 1. (int)
input_side_packet: "MODEL_COMPLEXITY:model_complexity"

# 21 hand landmarks within the given ROI. (NormalizedLandmarkList)
# NOTE: if a hand is not present within the given ROI, for this particular
# timestamp there will not be an output packet in the LANDMARKS stream. However,
# the MediaPipe framework will internally inform the downstream calculators of
# the absence of this packet so that they don't wait for it unnecessarily.
output_stream: "LANDMARKS:hand_landmarks"

# Hand world landmarks within the given ROI. (LandmarkList)
# World landmarks are real-world 3D coordinates in meters with the origin in the
# center of the given ROI.
#
# WORLD_LANDMARKS shares the same landmark topology as LANDMARKS. However,
# LANDMARKS provides coordinates (in pixels) of a 3D object projected onto the
# 2D image surface, while WORLD_LANDMARKS provides coordinates (in meters) of
# the 3D object itself.
output_stream: "WORLD_LANDMARKS:hand_world_landmarks"

# Handedness of the detected hand (i.e. is hand left or right).
# (ClassificationList)
output_stream: "HANDEDNESS:handedness"

# Transforms a region of image into a 224x224 tensor while keeping the aspect
# ratio, and therefore may result in potential letterboxing.
node {
  calculator: "ImageToTensorCalculator"
  input_stream: "IMAGE_GPU:image"
  input_stream: "NORM_RECT:hand_rect"
  output_stream: "TENSORS:input_tensor"
  output_stream: "LETTERBOX_PADDING:letterbox_padding"
  options: {
    [mediapipe.ImageToTensorCalculatorOptions.ext] {
      output_tensor_width: 224
      output_tensor_height: 224
      keep_aspect_ratio: true
      output_tensor_float_range {
        min: 0.0
        max: 1.0
      }
      gpu_origin: TOP_LEFT
    }
  }
}

# Loads the hand landmark TF Lite model.
node {
  calculator: "HandLandmarkModelLoader"
  input_side_packet: "MODEL_COMPLEXITY:model_complexity"
  output_side_packet: "MODEL:model"
}

# Runs a TensorFlow Lite model on GPU that takes an image tensor and outputs a
# vector of tensors representing, for instance, detection boxes/keypoints and
# scores.
node {
  calculator: "InferenceCalculator"
  input_side_packet: "MODEL:model"
  input_stream: "TENSORS:input_tensor"
  output_stream: "TENSORS:output_tensors"
}

# Splits a vector of tensors to multiple vectors according to the ranges
# specified in option.
node {
  calculator: "SplitTensorVectorCalculator"
  input_stream: "output_tensors"
  output_stream: "landmark_tensors"
  output_stream: "hand_flag_tensor"
  output_stream: "handedness_tensor"
  output_stream: "world_landmark_tensor"
  options: {
    [mediapipe.SplitVectorCalculatorOptions.ext] {
      ranges: { begin: 0 end: 1 }
      ranges: { begin: 1 end: 2 }
      ranges: { begin: 2 end: 3 }
      ranges: { begin: 3 end: 4 }
    }
  }
}

# Converts the hand-flag tensor into a float that represents the confidence
# score of hand presence.
node {
  calculator: "TensorsToFloatsCalculator"
  input_stream: "TENSORS:hand_flag_tensor"
  output_stream: "FLOAT:hand_presence_score"
}

# Applies a threshold to the confidence score to determine whether a hand is
# present.
node {
  calculator: "ThresholdingCalculator"
  input_stream: "FLOAT:hand_presence_score"
  output_stream: "FLAG:hand_presence"
  options: {
    [mediapipe.ThresholdingCalculatorOptions.ext] {
      threshold: 0.5
    }
  }
}

# Drops handedness tensor if hand is not present.
node {
  calculator: "GateCalculator"
  input_stream: "handedness_tensor"
  input_stream: "ALLOW:hand_presence"
  output_stream: "ensured_handedness_tensor"
}

# Converts the handedness tensor into a float that represents the classification
# score of handedness.
node {
  calculator: "TensorsToClassificationCalculator"
  input_stream: "TENSORS:ensured_handedness_tensor"
  output_stream: "CLASSIFICATIONS:handedness"
  options: {
    [mediapipe.TensorsToClassificationCalculatorOptions.ext] {
      top_k: 1
      label_map_path: "mediapipe/modules/hand_landmark/handedness.txt"
      binary_classification: true
    }
  }
}

# Drops landmarks tensors if hand is not present.
node {
  calculator: "GateCalculator"
  input_stream: "landmark_tensors"
  input_stream: "ALLOW:hand_presence"
  output_stream: "ensured_landmark_tensors"
}

# Decodes the landmark tensors into a list of landmarks, where the landmark
# coordinates are normalized by the size of the input image to the model.
node {
  calculator: "TensorsToLandmarksCalculator"
  input_stream: "TENSORS:ensured_landmark_tensors"
  output_stream: "NORM_LANDMARKS:landmarks"
  options: {
    [mediapipe.TensorsToLandmarksCalculatorOptions.ext] {
      num_landmarks: 21
      input_image_width: 224
      input_image_height: 224
      # The additional scaling factor is used to account for the Z coordinate
      # distribution in the training data.
      normalize_z: 0.4
    }
  }
}

# Adjusts landmarks (already normalized to [0.f, 1.f]) on the letterboxed hand
# image (after image transformation with the FIT scale mode) to the
# corresponding locations on the same image with the letterbox removed (hand
# image before image transformation).
node {
  calculator: "LandmarkLetterboxRemovalCalculator"
  input_stream: "LANDMARKS:landmarks"
  input_stream: "LETTERBOX_PADDING:letterbox_padding"
  output_stream: "LANDMARKS:scaled_landmarks"
}

# Projects the landmarks from the cropped hand image to the corresponding
# locations on the full image before cropping (input to the graph).
node {
  calculator: "LandmarkProjectionCalculator"
  input_stream: "NORM_LANDMARKS:scaled_landmarks"
  input_stream: "NORM_RECT:hand_rect"
  output_stream: "NORM_LANDMARKS:hand_landmarks"
}

# Drops world landmarks tensors if hand is not present.
node {
  calculator: "GateCalculator"
  input_stream: "world_landmark_tensor"
  input_stream: "ALLOW:hand_presence"
  output_stream: "ensured_world_landmark_tensor"
}

# Decodes the landmark tensors into a list of landmarks, where the landmark
# coordinates are normalized by the size of the input image to the model.
node {
  calculator: "TensorsToLandmarksCalculator"
  input_stream: "TENSORS:ensured_world_landmark_tensor"
  output_stream: "LANDMARKS:unprojected_world_landmarks"
  options: {
    [mediapipe.TensorsToLandmarksCalculatorOptions.ext] {
      num_landmarks: 21
    }
  }
}

# Projects the world landmarks from the cropped hand image to the corresponding
# locations on the full image before cropping (input to the graph).
node {
  calculator: "WorldLandmarkProjectionCalculator"
  input_stream: "LANDMARKS:unprojected_world_landmarks"
  input_stream: "NORM_RECT:hand_rect"
  output_stream: "LANDMARKS:hand_world_landmarks"
}
add mediapipe modules to fix examples 2022-06-11 21:25:48 +02:00			`# MediaPipe graph to detect/predict hand landmarks on CPU.`

			`type: "HandLandmarkGpu"`

			`# GPU image. (GpuBuffer)`
			`input_stream: "IMAGE:image"`
			`# ROI (region of interest) within the given image where a palm/hand is located.`
			`# (NormalizedRect)`
			`input_stream: "ROI:hand_rect"`

			`# Complexity of the hand landmark model: 0 or 1. Landmark accuracy as well as`
			`# inference latency generally go up with the model complexity. If unspecified,`
			`# functions as set to 1. (int)`
			`input_side_packet: "MODEL_COMPLEXITY:model_complexity"`

			`# 21 hand landmarks within the given ROI. (NormalizedLandmarkList)`
			`# NOTE: if a hand is not present within the given ROI, for this particular`
			`# timestamp there will not be an output packet in the LANDMARKS stream. However,`
			`# the MediaPipe framework will internally inform the downstream calculators of`
			`# the absence of this packet so that they don't wait for it unnecessarily.`
			`output_stream: "LANDMARKS:hand_landmarks"`

			`# Hand world landmarks within the given ROI. (LandmarkList)`
			`# World landmarks are real-world 3D coordinates in meters with the origin in the`
			`# center of the given ROI.`
			`#`
			`# WORLD_LANDMARKS shares the same landmark topology as LANDMARKS. However,`
			`# LANDMARKS provides coordinates (in pixels) of a 3D object projected onto the`
			`# 2D image surface, while WORLD_LANDMARKS provides coordinates (in meters) of`
			`# the 3D object itself.`
			`output_stream: "WORLD_LANDMARKS:hand_world_landmarks"`

			`# Handedness of the detected hand (i.e. is hand left or right).`
			`# (ClassificationList)`
			`output_stream: "HANDEDNESS:handedness"`

			`# Transforms a region of image into a 224x224 tensor while keeping the aspect`
			`# ratio, and therefore may result in potential letterboxing.`
			`node {`
			`calculator: "ImageToTensorCalculator"`
			`input_stream: "IMAGE_GPU:image"`
			`input_stream: "NORM_RECT:hand_rect"`
			`output_stream: "TENSORS:input_tensor"`
			`output_stream: "LETTERBOX_PADDING:letterbox_padding"`
			`options: {`
			`[mediapipe.ImageToTensorCalculatorOptions.ext] {`
			`output_tensor_width: 224`
			`output_tensor_height: 224`
			`keep_aspect_ratio: true`
			`output_tensor_float_range {`
			`min: 0.0`
			`max: 1.0`
			`}`
			`gpu_origin: TOP_LEFT`
			`}`
			`}`
			`}`

			`# Loads the hand landmark TF Lite model.`
			`node {`
			`calculator: "HandLandmarkModelLoader"`
			`input_side_packet: "MODEL_COMPLEXITY:model_complexity"`
			`output_side_packet: "MODEL:model"`
			`}`

			`# Runs a TensorFlow Lite model on GPU that takes an image tensor and outputs a`
			`# vector of tensors representing, for instance, detection boxes/keypoints and`
			`# scores.`
			`node {`
			`calculator: "InferenceCalculator"`
			`input_side_packet: "MODEL:model"`
			`input_stream: "TENSORS:input_tensor"`
			`output_stream: "TENSORS:output_tensors"`
			`}`

			`# Splits a vector of tensors to multiple vectors according to the ranges`
			`# specified in option.`
			`node {`
			`calculator: "SplitTensorVectorCalculator"`
			`input_stream: "output_tensors"`
			`output_stream: "landmark_tensors"`
			`output_stream: "hand_flag_tensor"`
			`output_stream: "handedness_tensor"`
			`output_stream: "world_landmark_tensor"`
			`options: {`
			`[mediapipe.SplitVectorCalculatorOptions.ext] {`
			`ranges: { begin: 0 end: 1 }`
			`ranges: { begin: 1 end: 2 }`
			`ranges: { begin: 2 end: 3 }`
			`ranges: { begin: 3 end: 4 }`
			`}`
			`}`
			`}`

			`# Converts the hand-flag tensor into a float that represents the confidence`
			`# score of hand presence.`
			`node {`
			`calculator: "TensorsToFloatsCalculator"`
			`input_stream: "TENSORS:hand_flag_tensor"`
			`output_stream: "FLOAT:hand_presence_score"`
			`}`

			`# Applies a threshold to the confidence score to determine whether a hand is`
			`# present.`
			`node {`
			`calculator: "ThresholdingCalculator"`
			`input_stream: "FLOAT:hand_presence_score"`
			`output_stream: "FLAG:hand_presence"`
			`options: {`
			`[mediapipe.ThresholdingCalculatorOptions.ext] {`
			`threshold: 0.5`
			`}`
			`}`
			`}`

			`# Drops handedness tensor if hand is not present.`
			`node {`
			`calculator: "GateCalculator"`
			`input_stream: "handedness_tensor"`
			`input_stream: "ALLOW:hand_presence"`
			`output_stream: "ensured_handedness_tensor"`
			`}`

			`# Converts the handedness tensor into a float that represents the classification`
			`# score of handedness.`
			`node {`
			`calculator: "TensorsToClassificationCalculator"`
			`input_stream: "TENSORS:ensured_handedness_tensor"`
			`output_stream: "CLASSIFICATIONS:handedness"`
			`options: {`
			`[mediapipe.TensorsToClassificationCalculatorOptions.ext] {`
			`top_k: 1`
			`label_map_path: "mediapipe/modules/hand_landmark/handedness.txt"`
			`binary_classification: true`
			`}`
			`}`
			`}`

			`# Drops landmarks tensors if hand is not present.`
			`node {`
			`calculator: "GateCalculator"`
			`input_stream: "landmark_tensors"`
			`input_stream: "ALLOW:hand_presence"`
			`output_stream: "ensured_landmark_tensors"`
			`}`

			`# Decodes the landmark tensors into a list of landmarks, where the landmark`
			`# coordinates are normalized by the size of the input image to the model.`
			`node {`
			`calculator: "TensorsToLandmarksCalculator"`
			`input_stream: "TENSORS:ensured_landmark_tensors"`
			`output_stream: "NORM_LANDMARKS:landmarks"`
			`options: {`
			`[mediapipe.TensorsToLandmarksCalculatorOptions.ext] {`
			`num_landmarks: 21`
			`input_image_width: 224`
			`input_image_height: 224`
			`# The additional scaling factor is used to account for the Z coordinate`
			`# distribution in the training data.`
			`normalize_z: 0.4`
			`}`
			`}`
			`}`

			`# Adjusts landmarks (already normalized to [0.f, 1.f]) on the letterboxed hand`
			`# image (after image transformation with the FIT scale mode) to the`
			`# corresponding locations on the same image with the letterbox removed (hand`
			`# image before image transformation).`
			`node {`
			`calculator: "LandmarkLetterboxRemovalCalculator"`
			`input_stream: "LANDMARKS:landmarks"`
			`input_stream: "LETTERBOX_PADDING:letterbox_padding"`
			`output_stream: "LANDMARKS:scaled_landmarks"`
			`}`

			`# Projects the landmarks from the cropped hand image to the corresponding`
			`# locations on the full image before cropping (input to the graph).`
			`node {`
			`calculator: "LandmarkProjectionCalculator"`
			`input_stream: "NORM_LANDMARKS:scaled_landmarks"`
			`input_stream: "NORM_RECT:hand_rect"`
			`output_stream: "NORM_LANDMARKS:hand_landmarks"`
			`}`

			`# Drops world landmarks tensors if hand is not present.`
			`node {`
			`calculator: "GateCalculator"`
			`input_stream: "world_landmark_tensor"`
			`input_stream: "ALLOW:hand_presence"`
			`output_stream: "ensured_world_landmark_tensor"`
			`}`

			`# Decodes the landmark tensors into a list of landmarks, where the landmark`
			`# coordinates are normalized by the size of the input image to the model.`
			`node {`
			`calculator: "TensorsToLandmarksCalculator"`
			`input_stream: "TENSORS:ensured_world_landmark_tensor"`
			`output_stream: "LANDMARKS:unprojected_world_landmarks"`
			`options: {`
			`[mediapipe.TensorsToLandmarksCalculatorOptions.ext] {`
			`num_landmarks: 21`
			`}`
			`}`
			`}`

			`# Projects the world landmarks from the cropped hand image to the corresponding`
			`# locations on the full image before cropping (input to the graph).`
			`node {`
			`calculator: "WorldLandmarkProjectionCalculator"`
			`input_stream: "LANDMARKS:unprojected_world_landmarks"`
			`input_stream: "NORM_RECT:hand_rect"`
			`output_stream: "LANDMARKS:hand_world_landmarks"`
			`}`