diff --git a/mediapipe/python/solutions/face_mesh.py b/mediapipe/python/solutions/face_mesh.py index 2afcbfdf3..5f1073fd9 100644 --- a/mediapipe/python/solutions/face_mesh.py +++ b/mediapipe/python/solutions/face_mesh.py @@ -15,7 +15,7 @@ # Lint as: python3 """MediaPipe FaceMesh.""" -from typing import NamedTuple +from typing import NamedTuple, Optional, Tuple import numpy as np @@ -249,7 +249,8 @@ class FaceMesh(SolutionBase): static_image_mode=False, max_num_faces=2, min_detection_confidence=0.5, - min_tracking_confidence=0.5): + min_tracking_confidence=0.5, + outputs: Optional[Tuple[str]] = ('multi_face_landmarks',)): """Initializes a MediaPipe FaceMesh object. Args: @@ -274,6 +275,9 @@ class FaceMesh(SolutionBase): robustness of the solution, at the expense of a higher latency. Ignored if "static_image_mode" is True, where face detection simply runs on every image. Default to 0.5. + outputs: A list of the graph output stream names to observe. If the list + is empty, all the output streams listed in the graph config will be + automatically observed by default. """ super().__init__( binary_graph_path=BINARYPB_FILE_PATH, @@ -287,7 +291,7 @@ class FaceMesh(SolutionBase): 'facelandmarkcpu__ThresholdingCalculator.threshold': min_tracking_confidence, }, - outputs=['multi_face_landmarks']) + outputs=list(outputs) if outputs else []) def process(self, image: np.ndarray) -> NamedTuple: """Processes an RGB image and returns the face landmarks on each detected face. @@ -300,8 +304,12 @@ class FaceMesh(SolutionBase): ValueError: If the input image is not three channel RGB. Returns: - A NamedTuple object with a "multi_face_landmarks" field that contains the - face landmarks on each detected face. + A NamedTuple object with fields corresponding to the set of outputs passed to the + constructor. Fields may include: + "multi_hand_landmarks" The face landmarks on each detected face + "face_detections" The detected faces + "face_rects_from_landmarks" Regions of interest calculated based on landmarks + "face_rects_from_detections" Regions of interest calculated based on face detections """ return super().process(input_data={'image': image}) diff --git a/mediapipe/python/solutions/hands.py b/mediapipe/python/solutions/hands.py index 8253e344c..2895d08d7 100644 --- a/mediapipe/python/solutions/hands.py +++ b/mediapipe/python/solutions/hands.py @@ -16,7 +16,7 @@ """MediaPipe Hands.""" import enum -from typing import NamedTuple +from typing import NamedTuple, Optional, Tuple import numpy as np @@ -168,7 +168,8 @@ class Hands(SolutionBase): static_image_mode=False, max_num_hands=2, min_detection_confidence=0.7, - min_tracking_confidence=0.5): + min_tracking_confidence=0.5, + outputs: Optional[Tuple[str]] = ('multi_hand_landmarks', 'multi_handedness')): """Initializes a MediaPipe Hand object. Args: @@ -193,6 +194,9 @@ class Hands(SolutionBase): robustness of the solution, at the expense of a higher latency. Ignored if "static_image_mode" is True, where hand detection simply runs on every image. Default to 0.5. + outputs: A tuple of the graph output stream names to observe. If the tuple + is empty, all the output streams listed in the graph config will be + automatically observed by default. """ super().__init__( binary_graph_path=BINARYPB_FILE_PATH, @@ -206,7 +210,7 @@ class Hands(SolutionBase): 'handlandmarkcpu__ThresholdingCalculator.threshold': min_tracking_confidence, }, - outputs=['multi_hand_landmarks', 'multi_handedness']) + outputs=list(outputs) if outputs else []) def process(self, image: np.ndarray) -> NamedTuple: """Processes an RGB image and returns the hand landmarks and handedness of each detected hand. @@ -219,10 +223,13 @@ class Hands(SolutionBase): ValueError: If the input image is not three channel RGB. Returns: - A NamedTuple object with two fields: a "multi_hand_landmarks" field that - contains the hand landmarks on each detected hand and a "multi_handedness" - field that contains the handedness (left v.s. right hand) of the detected - hand. + A NamedTuple object with fields corresponding to the set of outputs passed to the + constructor. Fields may include: + "multi_hand_landmarks" The hand landmarks on each detected hand + "multi_handedness" The handedness (left v.s. right hand) of the detected hand + "palm_detections" The detected palms + "hand_rects" Regions of interest calculated based on landmarks + "hand_rects_from_palm_detections" Regions of interest calculated based on palm detections """ return super().process(input_data={'image': image}) diff --git a/mediapipe/python/solutions/pose.py b/mediapipe/python/solutions/pose.py index 2f60be8b0..ac46dec7a 100644 --- a/mediapipe/python/solutions/pose.py +++ b/mediapipe/python/solutions/pose.py @@ -16,7 +16,7 @@ """MediaPipe Pose.""" import enum -from typing import NamedTuple +from typing import NamedTuple, Optional, Tuple import numpy as np @@ -159,7 +159,8 @@ class Pose(SolutionBase): def __init__(self, static_image_mode=False, min_detection_confidence=0.5, - min_tracking_confidence=0.5): + min_tracking_confidence=0.5, + outputs: Optional[Tuple[str]] = ('pose_landmarks',)): """Initializes a MediaPipe Pose object. Args: @@ -181,6 +182,9 @@ class Pose(SolutionBase): increase robustness of the solution, at the expense of a higher latency. Ignored if "static_image_mode" is True, where person detection simply runs on every image. Default to 0.5. + outputs: A list of the graph output stream names to observe. If the list + is empty, all the output streams listed in the graph config will be + automatically observed by default. """ super().__init__( binary_graph_path=BINARYPB_FILE_PATH, @@ -193,7 +197,7 @@ class Pose(SolutionBase): 'poselandmarkupperbodycpu__poselandmarkupperbodybyroicpu__ThresholdingCalculator.threshold': min_tracking_confidence, }, - outputs=['pose_landmarks']) + outputs=list(outputs) if outputs else []) def process(self, image: np.ndarray) -> NamedTuple: """Processes an RGB image and returns the pose landmarks on the most prominent person detected. @@ -206,8 +210,12 @@ class Pose(SolutionBase): ValueError: If the input image is not three channel RGB. Returns: - A NamedTuple object with a "pose_landmarks" field that contains the pose - landmarks on the most prominent person detected. + A NamedTuple object with fields corresponding to the set of outputs passed to the + constructor. Fields may include: + "pose_landmarks" The pose landmarks on the most prominent person detected + "pose_detection" The detected pose + "pose_rect_from_landmarks" Region of interest calculated based on landmarks + "pose_rect_from_detection" Region of interest calculated based on pose detection """ return super().process(input_data={'image': image})