mediapipe-rs/mediapipe/modules/hand_landmark/hand_landmark_gpu.pbtxt

# MediaPipe graph to detect/predict hand landmarks on CPU.

type: "HandLandmarkGpu"

# GPU image. (GpuBuffer)
input_stream: "IMAGE:image"
# ROI (region of interest) within the given image where a palm/hand is located.
# (NormalizedRect)
input_stream: "ROI:hand_rect"

# Complexity of the hand landmark model: 0 or 1. Landmark accuracy as well as
# inference latency generally go up with the model complexity. If unspecified,
# functions as set to 1. (int)
input_side_packet: "MODEL_COMPLEXITY:model_complexity"

# 21 hand landmarks within the given ROI. (NormalizedLandmarkList)
# NOTE: if a hand is not present within the given ROI, for this particular
# timestamp there will not be an output packet in the LANDMARKS stream. However,
# the MediaPipe framework will internally inform the downstream calculators of
# the absence of this packet so that they don't wait for it unnecessarily.
output_stream: "LANDMARKS:hand_landmarks"

# Hand world landmarks within the given ROI. (LandmarkList)
# World landmarks are real-world 3D coordinates in meters with the origin in the
# center of the given ROI.
#
# WORLD_LANDMARKS shares the same landmark topology as LANDMARKS. However,
# LANDMARKS provides coordinates (in pixels) of a 3D object projected onto the
# 2D image surface, while WORLD_LANDMARKS provides coordinates (in meters) of
# the 3D object itself.
output_stream: "WORLD_LANDMARKS:hand_world_landmarks"

# Handedness of the detected hand (i.e. is hand left or right).
# (ClassificationList)
output_stream: "HANDEDNESS:handedness"

# Transforms a region of image into a 224x224 tensor while keeping the aspect
# ratio, and therefore may result in potential letterboxing.
node {
  calculator: "ImageToTensorCalculator"
  input_stream: "IMAGE_GPU:image"
  input_stream: "NORM_RECT:hand_rect"
  output_stream: "TENSORS:input_tensor"
  output_stream: "LETTERBOX_PADDING:letterbox_padding"
  options: {
    [mediapipe.ImageToTensorCalculatorOptions.ext] {
      output_tensor_width: 224
      output_tensor_height: 224
      keep_aspect_ratio: true
      output_tensor_float_range {
        min: 0.0
        max: 1.0
      }
      gpu_origin: TOP_LEFT
    }
  }
}

# Loads the hand landmark TF Lite model.
node {
  calculator: "HandLandmarkModelLoader"
  input_side_packet: "MODEL_COMPLEXITY:model_complexity"
  output_side_packet: "MODEL:model"
}

# Runs a TensorFlow Lite model on GPU that takes an image tensor and outputs a
# vector of tensors representing, for instance, detection boxes/keypoints and
# scores.
node {
  calculator: "InferenceCalculator"
  input_side_packet: "MODEL:model"
  input_stream: "TENSORS:input_tensor"
  output_stream: "TENSORS:output_tensors"
}

# Splits a vector of tensors to multiple vectors according to the ranges
# specified in option.
node {
  calculator: "SplitTensorVectorCalculator"
  input_stream: "output_tensors"
  output_stream: "landmark_tensors"
  output_stream: "hand_flag_tensor"
  output_stream: "handedness_tensor"
  output_stream: "world_landmark_tensor"
  options: {
    [mediapipe.SplitVectorCalculatorOptions.ext] {
      ranges: { begin: 0 end: 1 }
      ranges: { begin: 1 end: 2 }
      ranges: { begin: 2 end: 3 }
      ranges: { begin: 3 end: 4 }
    }
  }
}

# Converts the hand-flag tensor into a float that represents the confidence
# score of hand presence.
node {
  calculator: "TensorsToFloatsCalculator"
  input_stream: "TENSORS:hand_flag_tensor"
  output_stream: "FLOAT:hand_presence_score"
}

# Applies a threshold to the confidence score to determine whether a hand is
# present.
node {
  calculator: "ThresholdingCalculator"
  input_stream: "FLOAT:hand_presence_score"
  output_stream: "FLAG:hand_presence"
  options: {
    [mediapipe.ThresholdingCalculatorOptions.ext] {
      threshold: 0.5
    }
  }
}

# Drops handedness tensor if hand is not present.
node {
  calculator: "GateCalculator"
  input_stream: "handedness_tensor"
  input_stream: "ALLOW:hand_presence"
  output_stream: "ensured_handedness_tensor"
}

# Converts the handedness tensor into a float that represents the classification
# score of handedness.
node {
  calculator: "TensorsToClassificationCalculator"
  input_stream: "TENSORS:ensured_handedness_tensor"
  output_stream: "CLASSIFICATIONS:handedness"
  options: {
    [mediapipe.TensorsToClassificationCalculatorOptions.ext] {
      top_k: 1
      label_map_path: "mediapipe/modules/hand_landmark/handedness.txt"
      binary_classification: true
    }
  }
}

# Drops landmarks tensors if hand is not present.
node {
  calculator: "GateCalculator"
  input_stream: "landmark_tensors"
  input_stream: "ALLOW:hand_presence"
  output_stream: "ensured_landmark_tensors"
}

# Decodes the landmark tensors into a list of landmarks, where the landmark
# coordinates are normalized by the size of the input image to the model.
node {
  calculator: "TensorsToLandmarksCalculator"
  input_stream: "TENSORS:ensured_landmark_tensors"
  output_stream: "NORM_LANDMARKS:landmarks"
  options: {
    [mediapipe.TensorsToLandmarksCalculatorOptions.ext] {
      num_landmarks: 21
      input_image_width: 224
      input_image_height: 224
      # The additional scaling factor is used to account for the Z coordinate
      # distribution in the training data.
      normalize_z: 0.4
    }
  }
}

# Adjusts landmarks (already normalized to [0.f, 1.f]) on the letterboxed hand
# image (after image transformation with the FIT scale mode) to the
# corresponding locations on the same image with the letterbox removed (hand
# image before image transformation).
node {
  calculator: "LandmarkLetterboxRemovalCalculator"
  input_stream: "LANDMARKS:landmarks"
  input_stream: "LETTERBOX_PADDING:letterbox_padding"
  output_stream: "LANDMARKS:scaled_landmarks"
}

# Projects the landmarks from the cropped hand image to the corresponding
# locations on the full image before cropping (input to the graph).
node {
  calculator: "LandmarkProjectionCalculator"
  input_stream: "NORM_LANDMARKS:scaled_landmarks"
  input_stream: "NORM_RECT:hand_rect"
  output_stream: "NORM_LANDMARKS:hand_landmarks"
}

# Drops world landmarks tensors if hand is not present.
node {
  calculator: "GateCalculator"
  input_stream: "world_landmark_tensor"
  input_stream: "ALLOW:hand_presence"
  output_stream: "ensured_world_landmark_tensor"
}

# Decodes the landmark tensors into a list of landmarks, where the landmark
# coordinates are normalized by the size of the input image to the model.
node {
  calculator: "TensorsToLandmarksCalculator"
  input_stream: "TENSORS:ensured_world_landmark_tensor"
  output_stream: "LANDMARKS:unprojected_world_landmarks"
  options: {
    [mediapipe.TensorsToLandmarksCalculatorOptions.ext] {
      num_landmarks: 21
    }
  }
}

# Projects the world landmarks from the cropped hand image to the corresponding
# locations on the full image before cropping (input to the graph).
node {
  calculator: "WorldLandmarkProjectionCalculator"
  input_stream: "LANDMARKS:unprojected_world_landmarks"
  input_stream: "NORM_RECT:hand_rect"
  output_stream: "LANDMARKS:hand_world_landmarks"
}