187 lines
5.9 KiB
Plaintext
187 lines
5.9 KiB
Plaintext
# MediaPipe hand landmark localization subgraph.
|
|
|
|
type: "HandLandmarkSubgraph"
|
|
|
|
input_stream: "IMAGE:input_video"
|
|
input_stream: "NORM_RECT:hand_rect"
|
|
output_stream: "LANDMARKS:hand_landmarks"
|
|
output_stream: "NORM_RECT:hand_rect_for_next_frame"
|
|
output_stream: "PRESENCE:hand_presence"
|
|
|
|
# Crops the rectangle that contains a hand from the input image.
|
|
node {
|
|
calculator: "ImageCroppingCalculator"
|
|
input_stream: "IMAGE:input_video"
|
|
input_stream: "NORM_RECT:hand_rect"
|
|
output_stream: "IMAGE:hand_image"
|
|
node_options: {
|
|
[type.googleapis.com/mediapipe.ImageCroppingCalculatorOptions] {
|
|
border_mode: BORDER_REPLICATE
|
|
}
|
|
}
|
|
}
|
|
|
|
# Transforms the input image on CPU to a 256x256 image. To scale the input
|
|
# image, the scale_mode option is set to FIT to preserve the aspect ratio,
|
|
# resulting in potential letterboxing in the transformed image.
|
|
node: {
|
|
calculator: "ImageTransformationCalculator"
|
|
input_stream: "IMAGE:hand_image"
|
|
output_stream: "IMAGE:transformed_input_video"
|
|
output_stream: "LETTERBOX_PADDING:letterbox_padding"
|
|
node_options: {
|
|
[type.googleapis.com/mediapipe.ImageTransformationCalculatorOptions] {
|
|
output_width: 256
|
|
output_height: 256
|
|
scale_mode: FIT
|
|
}
|
|
}
|
|
}
|
|
|
|
# Converts the transformed input image on CPU into an image tensor stored in
|
|
# TfliteTensor. The zero_center option is set to false to normalize the
|
|
# pixel values to [0.f, 1.f].
|
|
node {
|
|
calculator: "TfLiteConverterCalculator"
|
|
input_stream: "IMAGE:transformed_input_video"
|
|
output_stream: "TENSORS:image_tensor"
|
|
node_options: {
|
|
[type.googleapis.com/mediapipe.TfLiteConverterCalculatorOptions] {
|
|
zero_center: false
|
|
}
|
|
}
|
|
}
|
|
|
|
# Runs a TensorFlow Lite model on CPU that takes an image tensor and outputs a
|
|
# vector of tensors representing, for instance, detection boxes/keypoints and
|
|
# scores.
|
|
node {
|
|
calculator: "TfLiteInferenceCalculator"
|
|
input_stream: "TENSORS:image_tensor"
|
|
output_stream: "TENSORS:output_tensors"
|
|
node_options: {
|
|
[type.googleapis.com/mediapipe.TfLiteInferenceCalculatorOptions] {
|
|
model_path: "mediapipe/models/hand_landmark.tflite"
|
|
}
|
|
}
|
|
}
|
|
|
|
# Splits a vector of TFLite tensors to multiple vectors according to the ranges
|
|
# specified in option.
|
|
node {
|
|
calculator: "SplitTfLiteTensorVectorCalculator"
|
|
input_stream: "output_tensors"
|
|
output_stream: "landmark_tensors"
|
|
output_stream: "hand_flag_tensor"
|
|
node_options: {
|
|
[type.googleapis.com/mediapipe.SplitVectorCalculatorOptions] {
|
|
ranges: { begin: 0 end: 1 }
|
|
ranges: { begin: 1 end: 2 }
|
|
}
|
|
}
|
|
}
|
|
|
|
# Converts the hand-flag tensor into a float that represents the confidence
|
|
# score of hand presence.
|
|
node {
|
|
calculator: "TfLiteTensorsToFloatsCalculator"
|
|
input_stream: "TENSORS:hand_flag_tensor"
|
|
output_stream: "FLOAT:hand_presence_score"
|
|
}
|
|
|
|
# Applies a threshold to the confidence score to determine whether a hand is
|
|
# present.
|
|
node {
|
|
calculator: "ThresholdingCalculator"
|
|
input_stream: "FLOAT:hand_presence_score"
|
|
output_stream: "FLAG:hand_presence"
|
|
node_options: {
|
|
[type.googleapis.com/mediapipe.ThresholdingCalculatorOptions] {
|
|
threshold: 0.1
|
|
}
|
|
}
|
|
}
|
|
|
|
# Decodes the landmark tensors into a list of landmarks, where the landmark
|
|
# coordinates are normalized by the size of the input image to the model.
|
|
node {
|
|
calculator: "TfLiteTensorsToLandmarksCalculator"
|
|
input_stream: "TENSORS:landmark_tensors"
|
|
output_stream: "NORM_LANDMARKS:landmarks"
|
|
node_options: {
|
|
[type.googleapis.com/mediapipe.TfLiteTensorsToLandmarksCalculatorOptions] {
|
|
num_landmarks: 21
|
|
input_image_width: 256
|
|
input_image_height: 256
|
|
}
|
|
}
|
|
}
|
|
|
|
# Adjusts landmarks (already normalized to [0.f, 1.f]) on the letterboxed hand
|
|
# image (after image transformation with the FIT scale mode) to the
|
|
# corresponding locations on the same image with the letterbox removed (hand
|
|
# image before image transformation).
|
|
node {
|
|
calculator: "LandmarkLetterboxRemovalCalculator"
|
|
input_stream: "LANDMARKS:landmarks"
|
|
input_stream: "LETTERBOX_PADDING:letterbox_padding"
|
|
output_stream: "LANDMARKS:scaled_landmarks"
|
|
}
|
|
|
|
# Projects the landmarks from the cropped hand image to the corresponding
|
|
# locations on the full image before cropping (input to the graph).
|
|
node {
|
|
calculator: "LandmarkProjectionCalculator"
|
|
input_stream: "NORM_LANDMARKS:scaled_landmarks"
|
|
input_stream: "NORM_RECT:hand_rect"
|
|
output_stream: "NORM_LANDMARKS:hand_landmarks"
|
|
}
|
|
|
|
# Extracts image size from the input images.
|
|
node {
|
|
calculator: "ImagePropertiesCalculator"
|
|
input_stream: "IMAGE:input_video"
|
|
output_stream: "SIZE:image_size"
|
|
}
|
|
|
|
# Converts hand landmarks to a detection that tightly encloses all landmarks.
|
|
node {
|
|
calculator: "LandmarksToDetectionCalculator"
|
|
input_stream: "NORM_LANDMARKS:hand_landmarks"
|
|
output_stream: "DETECTION:hand_detection"
|
|
}
|
|
|
|
# Converts the hand detection into a rectangle (normalized by image size)
|
|
# that encloses the hand and is rotated such that the line connecting center of
|
|
# the wrist and MCP of the middle finger is aligned with the Y-axis of the
|
|
# rectangle.
|
|
node {
|
|
calculator: "DetectionsToRectsCalculator"
|
|
input_stream: "DETECTION:hand_detection"
|
|
input_stream: "IMAGE_SIZE:image_size"
|
|
output_stream: "NORM_RECT:hand_rect_from_landmarks"
|
|
node_options: {
|
|
[type.googleapis.com/mediapipe.DetectionsToRectsCalculatorOptions] {
|
|
rotation_vector_start_keypoint_index: 0 # Center of wrist.
|
|
rotation_vector_end_keypoint_index: 9 # MCP of middle finger.
|
|
rotation_vector_target_angle_degrees: 90
|
|
}
|
|
}
|
|
}
|
|
|
|
# Expands the hand rectangle so that in the next video frame it's likely to
|
|
# still contain the hand even with some motion.
|
|
node {
|
|
calculator: "RectTransformationCalculator"
|
|
input_stream: "NORM_RECT:hand_rect_from_landmarks"
|
|
input_stream: "IMAGE_SIZE:image_size"
|
|
output_stream: "hand_rect_for_next_frame"
|
|
node_options: {
|
|
[type.googleapis.com/mediapipe.RectTransformationCalculatorOptions] {
|
|
scale_x: 1.6
|
|
scale_y: 1.6
|
|
square_long: true
|
|
}
|
|
}
|
|
}
|