165 lines
6.7 KiB
Python
165 lines
6.7 KiB
Python
# Copyright 2020 The MediaPipe Authors.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
|
|
"""MediaPipe Hands."""
|
|
|
|
import enum
|
|
from typing import NamedTuple
|
|
|
|
import numpy as np
|
|
|
|
from mediapipe.calculators.core import constant_side_packet_calculator_pb2
|
|
# pylint: disable=unused-import
|
|
from mediapipe.calculators.core import gate_calculator_pb2
|
|
from mediapipe.calculators.core import split_vector_calculator_pb2
|
|
from mediapipe.calculators.tensor import image_to_tensor_calculator_pb2
|
|
from mediapipe.calculators.tensor import inference_calculator_pb2
|
|
from mediapipe.calculators.tensor import tensors_to_classification_calculator_pb2
|
|
from mediapipe.calculators.tensor import tensors_to_detections_calculator_pb2
|
|
from mediapipe.calculators.tensor import tensors_to_landmarks_calculator_pb2
|
|
from mediapipe.calculators.tflite import ssd_anchors_calculator_pb2
|
|
from mediapipe.calculators.util import association_calculator_pb2
|
|
from mediapipe.calculators.util import detections_to_rects_calculator_pb2
|
|
from mediapipe.calculators.util import logic_calculator_pb2
|
|
from mediapipe.calculators.util import non_max_suppression_calculator_pb2
|
|
from mediapipe.calculators.util import rect_transformation_calculator_pb2
|
|
from mediapipe.calculators.util import thresholding_calculator_pb2
|
|
# pylint: enable=unused-import
|
|
from mediapipe.python.solution_base import SolutionBase
|
|
|
|
|
|
class HandLandmark(enum.IntEnum):
|
|
"""The 21 hand landmarks."""
|
|
WRIST = 0
|
|
THUMB_CMC = 1
|
|
THUMB_MCP = 2
|
|
THUMB_IP = 3
|
|
THUMB_TIP = 4
|
|
INDEX_FINGER_MCP = 5
|
|
INDEX_FINGER_PIP = 6
|
|
INDEX_FINGER_DIP = 7
|
|
INDEX_FINGER_TIP = 8
|
|
MIDDLE_FINGER_MCP = 9
|
|
MIDDLE_FINGER_PIP = 10
|
|
MIDDLE_FINGER_DIP = 11
|
|
MIDDLE_FINGER_TIP = 12
|
|
RING_FINGER_MCP = 13
|
|
RING_FINGER_PIP = 14
|
|
RING_FINGER_DIP = 15
|
|
RING_FINGER_TIP = 16
|
|
PINKY_MCP = 17
|
|
PINKY_PIP = 18
|
|
PINKY_DIP = 19
|
|
PINKY_TIP = 20
|
|
|
|
|
|
BINARYPB_FILE_PATH = 'mediapipe/modules/hand_landmark/hand_landmark_tracking_cpu.binarypb'
|
|
HAND_CONNECTIONS = frozenset([
|
|
(HandLandmark.WRIST, HandLandmark.THUMB_CMC),
|
|
(HandLandmark.THUMB_CMC, HandLandmark.THUMB_MCP),
|
|
(HandLandmark.THUMB_MCP, HandLandmark.THUMB_IP),
|
|
(HandLandmark.THUMB_IP, HandLandmark.THUMB_TIP),
|
|
(HandLandmark.WRIST, HandLandmark.INDEX_FINGER_MCP),
|
|
(HandLandmark.INDEX_FINGER_MCP, HandLandmark.INDEX_FINGER_PIP),
|
|
(HandLandmark.INDEX_FINGER_PIP, HandLandmark.INDEX_FINGER_DIP),
|
|
(HandLandmark.INDEX_FINGER_DIP, HandLandmark.INDEX_FINGER_TIP),
|
|
(HandLandmark.INDEX_FINGER_MCP, HandLandmark.MIDDLE_FINGER_MCP),
|
|
(HandLandmark.MIDDLE_FINGER_MCP, HandLandmark.MIDDLE_FINGER_PIP),
|
|
(HandLandmark.MIDDLE_FINGER_PIP, HandLandmark.MIDDLE_FINGER_DIP),
|
|
(HandLandmark.MIDDLE_FINGER_DIP, HandLandmark.MIDDLE_FINGER_TIP),
|
|
(HandLandmark.MIDDLE_FINGER_MCP, HandLandmark.RING_FINGER_MCP),
|
|
(HandLandmark.RING_FINGER_MCP, HandLandmark.RING_FINGER_PIP),
|
|
(HandLandmark.RING_FINGER_PIP, HandLandmark.RING_FINGER_DIP),
|
|
(HandLandmark.RING_FINGER_DIP, HandLandmark.RING_FINGER_TIP),
|
|
(HandLandmark.RING_FINGER_MCP, HandLandmark.PINKY_MCP),
|
|
(HandLandmark.WRIST, HandLandmark.PINKY_MCP),
|
|
(HandLandmark.PINKY_MCP, HandLandmark.PINKY_PIP),
|
|
(HandLandmark.PINKY_PIP, HandLandmark.PINKY_DIP),
|
|
(HandLandmark.PINKY_DIP, HandLandmark.PINKY_TIP)
|
|
])
|
|
|
|
|
|
class Hands(SolutionBase):
|
|
"""MediaPipe Hands.
|
|
|
|
MediaPipe Hands processes an RGB image and returns the hand landmarks and
|
|
handedness (left v.s. right hand) of each detected hand.
|
|
|
|
Note that it determines handedness assuming the input image is mirrored,
|
|
i.e., taken with a front-facing/selfie camera (
|
|
https://en.wikipedia.org/wiki/Front-facing_camera) with images flipped
|
|
horizontally. If that is not the case, use, for instance, cv2.flip(image, 1)
|
|
to flip the image first for a correct handedness output.
|
|
|
|
Please refer to https://solutions.mediapipe.dev/hands#python-solution-api for
|
|
usage examples.
|
|
"""
|
|
|
|
def __init__(self,
|
|
static_image_mode=False,
|
|
max_num_hands=2,
|
|
min_detection_confidence=0.5,
|
|
min_tracking_confidence=0.5):
|
|
"""Initializes a MediaPipe Hand object.
|
|
|
|
Args:
|
|
static_image_mode: Whether to treat the input images as a batch of static
|
|
and possibly unrelated images, or a video stream. See details in
|
|
https://solutions.mediapipe.dev/hands#static_image_mode.
|
|
max_num_hands: Maximum number of hands to detect. See details in
|
|
https://solutions.mediapipe.dev/hands#max_num_hands.
|
|
min_detection_confidence: Minimum confidence value ([0.0, 1.0]) for hand
|
|
detection to be considered successful. See details in
|
|
https://solutions.mediapipe.dev/hands#min_detection_confidence.
|
|
min_tracking_confidence: Minimum confidence value ([0.0, 1.0]) for the
|
|
hand landmarks to be considered tracked successfully. See details in
|
|
https://solutions.mediapipe.dev/hands#min_tracking_confidence.
|
|
"""
|
|
super().__init__(
|
|
binary_graph_path=BINARYPB_FILE_PATH,
|
|
side_inputs={
|
|
'num_hands': max_num_hands,
|
|
},
|
|
calculator_params={
|
|
'ConstantSidePacketCalculator.packet': [
|
|
constant_side_packet_calculator_pb2
|
|
.ConstantSidePacketCalculatorOptions.ConstantSidePacket(
|
|
bool_value=not static_image_mode)
|
|
],
|
|
'palmdetectioncpu__TensorsToDetectionsCalculator.min_score_thresh':
|
|
min_detection_confidence,
|
|
'handlandmarkcpu__ThresholdingCalculator.threshold':
|
|
min_tracking_confidence,
|
|
},
|
|
outputs=['multi_hand_landmarks', 'multi_handedness'])
|
|
|
|
def process(self, image: np.ndarray) -> NamedTuple:
|
|
"""Processes an RGB image and returns the hand landmarks and handedness of each detected hand.
|
|
|
|
Args:
|
|
image: An RGB image represented as a numpy ndarray.
|
|
|
|
Raises:
|
|
RuntimeError: If the underlying graph throws any error.
|
|
ValueError: If the input image is not three channel RGB.
|
|
|
|
Returns:
|
|
A NamedTuple object with two fields: a "multi_hand_landmarks" field that
|
|
contains the hand landmarks on each detected hand and a "multi_handedness"
|
|
field that contains the handedness (left v.s. right hand) of the detected
|
|
hand.
|
|
"""
|
|
|
|
return super().process(input_data={'image': image})
|