# MediaPipe graph to detect/predict hand landmarks on CPU. type: "HandLandmarkGpu" # GPU image. (GpuBuffer) input_stream: "IMAGE:image" # ROI (region of interest) within the given image where a palm/hand is located. # (NormalizedRect) input_stream: "ROI:hand_rect" # Complexity of the hand landmark model: 0 or 1. Landmark accuracy as well as # inference latency generally go up with the model complexity. If unspecified, # functions as set to 1. (int) input_side_packet: "MODEL_COMPLEXITY:model_complexity" # 21 hand landmarks within the given ROI. (NormalizedLandmarkList) # NOTE: if a hand is not present within the given ROI, for this particular # timestamp there will not be an output packet in the LANDMARKS stream. However, # the MediaPipe framework will internally inform the downstream calculators of # the absence of this packet so that they don't wait for it unnecessarily. output_stream: "LANDMARKS:hand_landmarks" # Hand world landmarks within the given ROI. (LandmarkList) # World landmarks are real-world 3D coordinates in meters with the origin in the # center of the given ROI. # # WORLD_LANDMARKS shares the same landmark topology as LANDMARKS. However, # LANDMARKS provides coordinates (in pixels) of a 3D object projected onto the # 2D image surface, while WORLD_LANDMARKS provides coordinates (in meters) of # the 3D object itself. output_stream: "WORLD_LANDMARKS:hand_world_landmarks" # Handedness of the detected hand (i.e. is hand left or right). # (ClassificationList) output_stream: "HANDEDNESS:handedness" # Transforms a region of image into a 224x224 tensor while keeping the aspect # ratio, and therefore may result in potential letterboxing. node { calculator: "ImageToTensorCalculator" input_stream: "IMAGE_GPU:image" input_stream: "NORM_RECT:hand_rect" output_stream: "TENSORS:input_tensor" output_stream: "LETTERBOX_PADDING:letterbox_padding" options: { [mediapipe.ImageToTensorCalculatorOptions.ext] { output_tensor_width: 224 output_tensor_height: 224 keep_aspect_ratio: true output_tensor_float_range { min: 0.0 max: 1.0 } gpu_origin: TOP_LEFT } } } # Loads the hand landmark TF Lite model. node { calculator: "HandLandmarkModelLoader" input_side_packet: "MODEL_COMPLEXITY:model_complexity" output_side_packet: "MODEL:model" } # Runs a TensorFlow Lite model on GPU that takes an image tensor and outputs a # vector of tensors representing, for instance, detection boxes/keypoints and # scores. node { calculator: "InferenceCalculator" input_side_packet: "MODEL:model" input_stream: "TENSORS:input_tensor" output_stream: "TENSORS:output_tensors" } # Splits a vector of tensors to multiple vectors according to the ranges # specified in option. node { calculator: "SplitTensorVectorCalculator" input_stream: "output_tensors" output_stream: "landmark_tensors" output_stream: "hand_flag_tensor" output_stream: "handedness_tensor" output_stream: "world_landmark_tensor" options: { [mediapipe.SplitVectorCalculatorOptions.ext] { ranges: { begin: 0 end: 1 } ranges: { begin: 1 end: 2 } ranges: { begin: 2 end: 3 } ranges: { begin: 3 end: 4 } } } } # Converts the hand-flag tensor into a float that represents the confidence # score of hand presence. node { calculator: "TensorsToFloatsCalculator" input_stream: "TENSORS:hand_flag_tensor" output_stream: "FLOAT:hand_presence_score" } # Applies a threshold to the confidence score to determine whether a hand is # present. node { calculator: "ThresholdingCalculator" input_stream: "FLOAT:hand_presence_score" output_stream: "FLAG:hand_presence" options: { [mediapipe.ThresholdingCalculatorOptions.ext] { threshold: 0.5 } } } # Drops handedness tensor if hand is not present. node { calculator: "GateCalculator" input_stream: "handedness_tensor" input_stream: "ALLOW:hand_presence" output_stream: "ensured_handedness_tensor" } # Converts the handedness tensor into a float that represents the classification # score of handedness. node { calculator: "TensorsToClassificationCalculator" input_stream: "TENSORS:ensured_handedness_tensor" output_stream: "CLASSIFICATIONS:handedness" options: { [mediapipe.TensorsToClassificationCalculatorOptions.ext] { top_k: 1 label_map_path: "mediapipe/modules/hand_landmark/handedness.txt" binary_classification: true } } } # Drops landmarks tensors if hand is not present. node { calculator: "GateCalculator" input_stream: "landmark_tensors" input_stream: "ALLOW:hand_presence" output_stream: "ensured_landmark_tensors" } # Decodes the landmark tensors into a list of landmarks, where the landmark # coordinates are normalized by the size of the input image to the model. node { calculator: "TensorsToLandmarksCalculator" input_stream: "TENSORS:ensured_landmark_tensors" output_stream: "NORM_LANDMARKS:landmarks" options: { [mediapipe.TensorsToLandmarksCalculatorOptions.ext] { num_landmarks: 21 input_image_width: 224 input_image_height: 224 # The additional scaling factor is used to account for the Z coordinate # distribution in the training data. normalize_z: 0.4 } } } # Adjusts landmarks (already normalized to [0.f, 1.f]) on the letterboxed hand # image (after image transformation with the FIT scale mode) to the # corresponding locations on the same image with the letterbox removed (hand # image before image transformation). node { calculator: "LandmarkLetterboxRemovalCalculator" input_stream: "LANDMARKS:landmarks" input_stream: "LETTERBOX_PADDING:letterbox_padding" output_stream: "LANDMARKS:scaled_landmarks" } # Projects the landmarks from the cropped hand image to the corresponding # locations on the full image before cropping (input to the graph). node { calculator: "LandmarkProjectionCalculator" input_stream: "NORM_LANDMARKS:scaled_landmarks" input_stream: "NORM_RECT:hand_rect" output_stream: "NORM_LANDMARKS:hand_landmarks" } # Drops world landmarks tensors if hand is not present. node { calculator: "GateCalculator" input_stream: "world_landmark_tensor" input_stream: "ALLOW:hand_presence" output_stream: "ensured_world_landmark_tensor" } # Decodes the landmark tensors into a list of landmarks, where the landmark # coordinates are normalized by the size of the input image to the model. node { calculator: "TensorsToLandmarksCalculator" input_stream: "TENSORS:ensured_world_landmark_tensor" output_stream: "LANDMARKS:unprojected_world_landmarks" options: { [mediapipe.TensorsToLandmarksCalculatorOptions.ext] { num_landmarks: 21 } } } # Projects the world landmarks from the cropped hand image to the corresponding # locations on the full image before cropping (input to the graph). node { calculator: "WorldLandmarkProjectionCalculator" input_stream: "LANDMARKS:unprojected_world_landmarks" input_stream: "NORM_RECT:hand_rect" output_stream: "LANDMARKS:hand_world_landmarks" }