220 lines
7.0 KiB
Plaintext
220 lines
7.0 KiB
Plaintext
|
# MediaPipe graph to detect/predict hand landmarks on CPU.
|
||
|
|
||
|
type: "HandLandmarkCpu"
|
||
|
|
||
|
# CPU image. (ImageFrame)
|
||
|
input_stream: "IMAGE:image"
|
||
|
# ROI (region of interest) within the given image where a palm/hand is located.
|
||
|
# (NormalizedRect)
|
||
|
input_stream: "ROI:hand_rect"
|
||
|
|
||
|
# Complexity of the hand landmark model: 0 or 1. Landmark accuracy as well as
|
||
|
# inference latency generally go up with the model complexity. If unspecified,
|
||
|
# functions as set to 1. (int)
|
||
|
input_side_packet: "MODEL_COMPLEXITY:model_complexity"
|
||
|
|
||
|
# 21 hand landmarks within the given ROI. (NormalizedLandmarkList)
|
||
|
# NOTE: if a hand is not present within the given ROI, for this particular
|
||
|
# timestamp there will not be an output packet in the LANDMARKS stream. However,
|
||
|
# the MediaPipe framework will internally inform the downstream calculators of
|
||
|
# the absence of this packet so that they don't wait for it unnecessarily.
|
||
|
output_stream: "LANDMARKS:hand_landmarks"
|
||
|
|
||
|
# Hand world landmarks within the given ROI. (LandmarkList)
|
||
|
# World landmarks are real-world 3D coordinates in meters with the origin in the
|
||
|
# center of the given ROI.
|
||
|
#
|
||
|
# WORLD_LANDMARKS shares the same landmark topology as LANDMARKS. However,
|
||
|
# LANDMARKS provides coordinates (in pixels) of a 3D object projected onto the
|
||
|
# 2D image surface, while WORLD_LANDMARKS provides coordinates (in meters) of
|
||
|
# the 3D object itself.
|
||
|
output_stream: "WORLD_LANDMARKS:hand_world_landmarks"
|
||
|
|
||
|
# Handedness of the detected hand (i.e. is hand left or right).
|
||
|
# (ClassificationList)
|
||
|
output_stream: "HANDEDNESS:handedness"
|
||
|
|
||
|
# Transforms a region of image into a 224x224 tensor while keeping the aspect
|
||
|
# ratio, and therefore may result in potential letterboxing.
|
||
|
node {
|
||
|
calculator: "ImageToTensorCalculator"
|
||
|
input_stream: "IMAGE:image"
|
||
|
input_stream: "NORM_RECT:hand_rect"
|
||
|
output_stream: "TENSORS:input_tensor"
|
||
|
output_stream: "LETTERBOX_PADDING:letterbox_padding"
|
||
|
options: {
|
||
|
[mediapipe.ImageToTensorCalculatorOptions.ext] {
|
||
|
output_tensor_width: 224
|
||
|
output_tensor_height: 224
|
||
|
keep_aspect_ratio: true
|
||
|
output_tensor_float_range {
|
||
|
min: 0.0
|
||
|
max: 1.0
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
# Loads the hand landmark TF Lite model.
|
||
|
node {
|
||
|
calculator: "HandLandmarkModelLoader"
|
||
|
input_side_packet: "MODEL_COMPLEXITY:model_complexity"
|
||
|
output_side_packet: "MODEL:model"
|
||
|
}
|
||
|
|
||
|
# Runs a TensorFlow Lite model on CPU that takes an image tensor and outputs a
|
||
|
# vector of tensors representing, for instance, detection boxes/keypoints and
|
||
|
# scores.
|
||
|
node {
|
||
|
calculator: "InferenceCalculator"
|
||
|
input_side_packet: "MODEL:model"
|
||
|
input_stream: "TENSORS:input_tensor"
|
||
|
output_stream: "TENSORS:output_tensors"
|
||
|
options: {
|
||
|
[mediapipe.InferenceCalculatorOptions.ext] {
|
||
|
delegate {
|
||
|
xnnpack {}
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
# Splits a vector of tensors to multiple vectors according to the ranges
|
||
|
# specified in option.
|
||
|
node {
|
||
|
calculator: "SplitTensorVectorCalculator"
|
||
|
input_stream: "output_tensors"
|
||
|
output_stream: "landmark_tensors"
|
||
|
output_stream: "hand_flag_tensor"
|
||
|
output_stream: "handedness_tensor"
|
||
|
output_stream: "world_landmark_tensor"
|
||
|
options: {
|
||
|
[mediapipe.SplitVectorCalculatorOptions.ext] {
|
||
|
ranges: { begin: 0 end: 1 }
|
||
|
ranges: { begin: 1 end: 2 }
|
||
|
ranges: { begin: 2 end: 3 }
|
||
|
ranges: { begin: 3 end: 4 }
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
# Converts the hand-flag tensor into a float that represents the confidence
|
||
|
# score of hand presence.
|
||
|
node {
|
||
|
calculator: "TensorsToFloatsCalculator"
|
||
|
input_stream: "TENSORS:hand_flag_tensor"
|
||
|
output_stream: "FLOAT:hand_presence_score"
|
||
|
}
|
||
|
|
||
|
# Applies a threshold to the confidence score to determine whether a hand is
|
||
|
# present.
|
||
|
node {
|
||
|
calculator: "ThresholdingCalculator"
|
||
|
input_stream: "FLOAT:hand_presence_score"
|
||
|
output_stream: "FLAG:hand_presence"
|
||
|
options: {
|
||
|
[mediapipe.ThresholdingCalculatorOptions.ext] {
|
||
|
threshold: 0.5
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
# Drops handedness tensor if hand is not present.
|
||
|
node {
|
||
|
calculator: "GateCalculator"
|
||
|
input_stream: "handedness_tensor"
|
||
|
input_stream: "ALLOW:hand_presence"
|
||
|
output_stream: "ensured_handedness_tensor"
|
||
|
}
|
||
|
|
||
|
# Converts the handedness tensor into a float that represents the classification
|
||
|
# score of handedness.
|
||
|
node {
|
||
|
calculator: "TensorsToClassificationCalculator"
|
||
|
input_stream: "TENSORS:ensured_handedness_tensor"
|
||
|
output_stream: "CLASSIFICATIONS:handedness"
|
||
|
options: {
|
||
|
[mediapipe.TensorsToClassificationCalculatorOptions.ext] {
|
||
|
top_k: 1
|
||
|
label_map_path: "mediapipe/modules/hand_landmark/handedness.txt"
|
||
|
binary_classification: true
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
# Drops landmarks tensors if hand is not present.
|
||
|
node {
|
||
|
calculator: "GateCalculator"
|
||
|
input_stream: "landmark_tensors"
|
||
|
input_stream: "ALLOW:hand_presence"
|
||
|
output_stream: "ensured_landmark_tensors"
|
||
|
}
|
||
|
|
||
|
# Decodes the landmark tensors into a list of landmarks, where the landmark
|
||
|
# coordinates are normalized by the size of the input image to the model.
|
||
|
node {
|
||
|
calculator: "TensorsToLandmarksCalculator"
|
||
|
input_stream: "TENSORS:ensured_landmark_tensors"
|
||
|
output_stream: "NORM_LANDMARKS:landmarks"
|
||
|
options: {
|
||
|
[mediapipe.TensorsToLandmarksCalculatorOptions.ext] {
|
||
|
num_landmarks: 21
|
||
|
input_image_width: 224
|
||
|
input_image_height: 224
|
||
|
# The additional scaling factor is used to account for the Z coordinate
|
||
|
# distribution in the training data.
|
||
|
normalize_z: 0.4
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
# Adjusts landmarks (already normalized to [0.f, 1.f]) on the letterboxed hand
|
||
|
# image (after image transformation with the FIT scale mode) to the
|
||
|
# corresponding locations on the same image with the letterbox removed (hand
|
||
|
# image before image transformation).
|
||
|
node {
|
||
|
calculator: "LandmarkLetterboxRemovalCalculator"
|
||
|
input_stream: "LANDMARKS:landmarks"
|
||
|
input_stream: "LETTERBOX_PADDING:letterbox_padding"
|
||
|
output_stream: "LANDMARKS:scaled_landmarks"
|
||
|
}
|
||
|
|
||
|
# Projects the landmarks from the cropped hand image to the corresponding
|
||
|
# locations on the full image before cropping (input to the graph).
|
||
|
node {
|
||
|
calculator: "LandmarkProjectionCalculator"
|
||
|
input_stream: "NORM_LANDMARKS:scaled_landmarks"
|
||
|
input_stream: "NORM_RECT:hand_rect"
|
||
|
output_stream: "NORM_LANDMARKS:hand_landmarks"
|
||
|
}
|
||
|
|
||
|
# Drops world landmarks tensors if hand is not present.
|
||
|
node {
|
||
|
calculator: "GateCalculator"
|
||
|
input_stream: "world_landmark_tensor"
|
||
|
input_stream: "ALLOW:hand_presence"
|
||
|
output_stream: "ensured_world_landmark_tensor"
|
||
|
}
|
||
|
|
||
|
# Decodes the landmark tensors into a list of landmarks, where the landmark
|
||
|
# coordinates are normalized by the size of the input image to the model.
|
||
|
node {
|
||
|
calculator: "TensorsToLandmarksCalculator"
|
||
|
input_stream: "TENSORS:ensured_world_landmark_tensor"
|
||
|
output_stream: "LANDMARKS:unprojected_world_landmarks"
|
||
|
options: {
|
||
|
[mediapipe.TensorsToLandmarksCalculatorOptions.ext] {
|
||
|
num_landmarks: 21
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
# Projects the world landmarks from the cropped hand image to the corresponding
|
||
|
# locations on the full image before cropping (input to the graph).
|
||
|
node {
|
||
|
calculator: "WorldLandmarkProjectionCalculator"
|
||
|
input_stream: "LANDMARKS:unprojected_world_landmarks"
|
||
|
input_stream: "NORM_RECT:hand_rect"
|
||
|
output_stream: "LANDMARKS:hand_world_landmarks"
|
||
|
}
|