From 12502b6f96e5678979cbff71ff9979d9f6f9a801 Mon Sep 17 00:00:00 2001 From: Sebastian Schmidt Date: Mon, 11 Sep 2023 18:20:30 -0700 Subject: [PATCH] Add Handedness to JS, C++ and Android API PiperOrigin-RevId: 564559718 --- .../hand_landmarker/hand_landmarker_graph.cc | 4 ++-- .../hand_landmarks_detector_graph.cc | 6 +++--- .../hand_landmarks_detector_graph_test.cc | 6 +++--- .../GestureRecognizerResult.java | 14 ++++++++++++-- .../handlandmarker/HandLandmarkerResult.java | 14 ++++++++++++-- .../GestureRecognizerTest.java | 14 +++++++------- .../handlandmarker/HandLandmarkerTest.java | 10 +++++----- .../gesture_recognizer/gesture_recognizer.ts | 12 +++++++----- .../gesture_recognizer_result.d.ts | 6 ++++++ .../gesture_recognizer_test.ts | 18 +++++++++++------- .../vision/hand_landmarker/hand_landmarker.ts | 9 +++++---- .../hand_landmarker_result.d.ts | 8 +++++++- .../hand_landmarker/hand_landmarker_test.ts | 12 +++++++++--- 13 files changed, 89 insertions(+), 44 deletions(-) diff --git a/mediapipe/tasks/cc/vision/hand_landmarker/hand_landmarker_graph.cc b/mediapipe/tasks/cc/vision/hand_landmarker/hand_landmarker_graph.cc index 61bccb2a8..b051dc571 100644 --- a/mediapipe/tasks/cc/vision/hand_landmarker/hand_landmarker_graph.cc +++ b/mediapipe/tasks/cc/vision/hand_landmarker/hand_landmarker_graph.cc @@ -83,7 +83,7 @@ struct HandLandmarkerOutputs { Stream> landmark_lists; Stream> world_landmark_lists; Stream> hand_rects_next_frame; - Stream> handednesses; + Stream> handedness; Stream> palm_rects; Stream> palm_detections; Stream image; @@ -241,7 +241,7 @@ class HandLandmarkerGraph : public core::ModelTaskGraph { graph[Output>(kWorldLandmarksTag)]; hand_landmarker_outputs.hand_rects_next_frame >> graph[Output>(kHandRectNextFrameTag)]; - hand_landmarker_outputs.handednesses >> + hand_landmarker_outputs.handedness >> graph[Output>(kHandednessTag)]; hand_landmarker_outputs.palm_rects >> graph[Output>(kPalmRectsTag)]; diff --git a/mediapipe/tasks/cc/vision/hand_landmarker/hand_landmarks_detector_graph.cc b/mediapipe/tasks/cc/vision/hand_landmarker/hand_landmarks_detector_graph.cc index c3a4edecd..51cbc9e89 100644 --- a/mediapipe/tasks/cc/vision/hand_landmarker/hand_landmarks_detector_graph.cc +++ b/mediapipe/tasks/cc/vision/hand_landmarker/hand_landmarks_detector_graph.cc @@ -93,7 +93,7 @@ struct HandLandmarkerOutputs { Source> hand_rects_next_frame; Source> presences; Source> presence_scores; - Source> handednesses; + Source> handedness; }; absl::Status SanityCheckOptions( @@ -478,7 +478,7 @@ class MultipleHandLandmarksDetectorGraph : public core::ModelTaskGraph { graph[Output>(kPresenceTag)]; hand_landmark_detection_outputs.presence_scores >> graph[Output>(kPresenceScoreTag)]; - hand_landmark_detection_outputs.handednesses >> + hand_landmark_detection_outputs.handedness >> graph[Output>(kHandednessTag)]; return graph.GetConfig(); @@ -562,7 +562,7 @@ class MultipleHandLandmarksDetectorGraph : public core::ModelTaskGraph { /* hand_rects_next_frame= */ hand_rects_next_frame, /* presences= */ presences, /* presence_scores= */ presence_scores, - /* handednesses= */ handednesses, + /* handedness= */ handednesses, }}; } }; diff --git a/mediapipe/tasks/cc/vision/hand_landmarker/hand_landmarks_detector_graph_test.cc b/mediapipe/tasks/cc/vision/hand_landmarker/hand_landmarks_detector_graph_test.cc index b51381b17..5af62e11a 100644 --- a/mediapipe/tasks/cc/vision/hand_landmarker/hand_landmarks_detector_graph_test.cc +++ b/mediapipe/tasks/cc/vision/hand_landmarker/hand_landmarks_detector_graph_test.cc @@ -319,15 +319,15 @@ TEST_P(MultiHandLandmarkerTest, Succeeds) { const std::vector& presences = (*output_packets)[kPresenceName].Get>(); - const std::vector& handednesses = + const std::vector& handedness = (*output_packets)[kHandednessName].Get>(); const std::vector& landmark_lists = (*output_packets)[kLandmarksName] .Get>(); EXPECT_THAT(presences, ElementsAreArray(GetParam().expected_presences)); - EXPECT_THAT(handednesses, Pointwise(Partially(EqualsProto()), - GetParam().expected_handedness)); + EXPECT_THAT(handedness, Pointwise(Partially(EqualsProto()), + GetParam().expected_handedness)); EXPECT_THAT( landmark_lists, Pointwise(Approximately(Partially(EqualsProto()), /*margin=*/kAbsMargin, diff --git a/mediapipe/tasks/java/com/google/mediapipe/tasks/vision/gesturerecognizer/GestureRecognizerResult.java b/mediapipe/tasks/java/com/google/mediapipe/tasks/vision/gesturerecognizer/GestureRecognizerResult.java index d4438efe7..c8d43e2ca 100644 --- a/mediapipe/tasks/java/com/google/mediapipe/tasks/vision/gesturerecognizer/GestureRecognizerResult.java +++ b/mediapipe/tasks/java/com/google/mediapipe/tasks/vision/gesturerecognizer/GestureRecognizerResult.java @@ -114,11 +114,21 @@ public abstract class GestureRecognizerResult implements TaskResult { /** Hand landmarks of detected hands. */ public abstract List> landmarks(); - /** Hand landmarks in world coordniates of detected hands. */ + /** Hand landmarks in world coordinates of detected hands. */ public abstract List> worldLandmarks(); + /** + * Handedness of detected hands. + * + * @deprecated Use {@link #handedness()} instead. + */ + @Deprecated + public List> handednesses() { + return handedness(); + } + /** Handedness of detected hands. */ - public abstract List> handednesses(); + public abstract List> handedness(); /** * Recognized hand gestures of detected hands. Note that the index of the gesture is always -1, diff --git a/mediapipe/tasks/java/com/google/mediapipe/tasks/vision/handlandmarker/HandLandmarkerResult.java b/mediapipe/tasks/java/com/google/mediapipe/tasks/vision/handlandmarker/HandLandmarkerResult.java index b8b236d42..14d2fa926 100644 --- a/mediapipe/tasks/java/com/google/mediapipe/tasks/vision/handlandmarker/HandLandmarkerResult.java +++ b/mediapipe/tasks/java/com/google/mediapipe/tasks/vision/handlandmarker/HandLandmarkerResult.java @@ -108,9 +108,19 @@ public abstract class HandLandmarkerResult implements TaskResult { /** Hand landmarks of detected hands. */ public abstract List> landmarks(); - /** Hand landmarks in world coordniates of detected hands. */ + /** Hand landmarks in world coordinates of detected hands. */ public abstract List> worldLandmarks(); + /** + * Handedness of detected hands. + * + * @deprecated Use {@link #handedness()} instead. + */ + @Deprecated + public List> handednesses() { + return handedness(); + } + /** Handedness of detected hands. */ - public abstract List> handednesses(); + public abstract List> handedness(); } diff --git a/mediapipe/tasks/javatests/com/google/mediapipe/tasks/vision/gesturerecognizer/GestureRecognizerTest.java b/mediapipe/tasks/javatests/com/google/mediapipe/tasks/vision/gesturerecognizer/GestureRecognizerTest.java index 5f461a4c1..68d800fe9 100644 --- a/mediapipe/tasks/javatests/com/google/mediapipe/tasks/vision/gesturerecognizer/GestureRecognizerTest.java +++ b/mediapipe/tasks/javatests/com/google/mediapipe/tasks/vision/gesturerecognizer/GestureRecognizerTest.java @@ -102,7 +102,7 @@ public class GestureRecognizerTest { gestureRecognizer.recognize(getImageFromAsset(NO_HANDS_IMAGE)); assertThat(actualResult.landmarks()).isEmpty(); assertThat(actualResult.worldLandmarks()).isEmpty(); - assertThat(actualResult.handednesses()).isEmpty(); + assertThat(actualResult.handedness()).isEmpty(); assertThat(actualResult.gestures()).isEmpty(); } @@ -143,7 +143,7 @@ public class GestureRecognizerTest { GestureRecognizer.createFromOptions(ApplicationProvider.getApplicationContext(), options); GestureRecognizerResult actualResult = gestureRecognizer.recognize(getImageFromAsset(TWO_HANDS_IMAGE)); - assertThat(actualResult.handednesses()).hasSize(2); + assertThat(actualResult.handedness()).hasSize(2); } @Test @@ -251,7 +251,7 @@ public class GestureRecognizerTest { gestureRecognizer.recognize(getImageFromAsset(FIST_IMAGE)); assertThat(actualResult.landmarks()).isEmpty(); assertThat(actualResult.worldLandmarks()).isEmpty(); - assertThat(actualResult.handednesses()).isEmpty(); + assertThat(actualResult.handedness()).isEmpty(); assertThat(actualResult.gestures()).isEmpty(); } @@ -284,7 +284,7 @@ public class GestureRecognizerTest { gestureRecognizer.recognize(getImageFromAsset(FIST_IMAGE)); assertThat(actualResult.landmarks()).isEmpty(); assertThat(actualResult.worldLandmarks()).isEmpty(); - assertThat(actualResult.handednesses()).isEmpty(); + assertThat(actualResult.handedness()).isEmpty(); assertThat(actualResult.gestures()).isEmpty(); } @@ -596,7 +596,7 @@ public class GestureRecognizerTest { // Expects to have the same number of hands detected. assertThat(actualResult.landmarks()).hasSize(expectedResult.landmarks().size()); assertThat(actualResult.worldLandmarks()).hasSize(expectedResult.worldLandmarks().size()); - assertThat(actualResult.handednesses()).hasSize(expectedResult.handednesses().size()); + assertThat(actualResult.handedness()).hasSize(expectedResult.handedness().size()); assertThat(actualResult.gestures()).hasSize(expectedResult.gestures().size()); // Actual landmarks match expected landmarks. @@ -614,8 +614,8 @@ public class GestureRecognizerTest { .containsExactlyElementsIn(expectedResult.landmarks().get(0)); // Actual handedness matches expected handedness. - Category actualTopHandedness = actualResult.handednesses().get(0).get(0); - Category expectedTopHandedness = expectedResult.handednesses().get(0).get(0); + Category actualTopHandedness = actualResult.handedness().get(0).get(0); + Category expectedTopHandedness = expectedResult.handedness().get(0).get(0); assertThat(actualTopHandedness.index()).isEqualTo(expectedTopHandedness.index()); assertThat(actualTopHandedness.categoryName()).isEqualTo(expectedTopHandedness.categoryName()); diff --git a/mediapipe/tasks/javatests/com/google/mediapipe/tasks/vision/handlandmarker/HandLandmarkerTest.java b/mediapipe/tasks/javatests/com/google/mediapipe/tasks/vision/handlandmarker/HandLandmarkerTest.java index 94434a217..ce60fa592 100644 --- a/mediapipe/tasks/javatests/com/google/mediapipe/tasks/vision/handlandmarker/HandLandmarkerTest.java +++ b/mediapipe/tasks/javatests/com/google/mediapipe/tasks/vision/handlandmarker/HandLandmarkerTest.java @@ -92,7 +92,7 @@ public class HandLandmarkerTest { handLandmarker.detect(getImageFromAsset(NO_HANDS_IMAGE)); assertThat(actualResult.landmarks()).isEmpty(); assertThat(actualResult.worldLandmarks()).isEmpty(); - assertThat(actualResult.handednesses()).isEmpty(); + assertThat(actualResult.handedness()).isEmpty(); } @Test @@ -109,7 +109,7 @@ public class HandLandmarkerTest { HandLandmarker.createFromOptions(ApplicationProvider.getApplicationContext(), options); HandLandmarkerResult actualResult = handLandmarker.detect(getImageFromAsset(TWO_HANDS_IMAGE)); - assertThat(actualResult.handednesses()).hasSize(2); + assertThat(actualResult.handedness()).hasSize(2); } @Test @@ -393,7 +393,7 @@ public class HandLandmarkerTest { // Expects to have the same number of hands detected. assertThat(actualResult.landmarks()).hasSize(expectedResult.landmarks().size()); assertThat(actualResult.worldLandmarks()).hasSize(expectedResult.worldLandmarks().size()); - assertThat(actualResult.handednesses()).hasSize(expectedResult.handednesses().size()); + assertThat(actualResult.handedness()).hasSize(expectedResult.handedness().size()); // Actual landmarks match expected landmarks. assertThat(actualResult.landmarks().get(0)) @@ -410,8 +410,8 @@ public class HandLandmarkerTest { .containsExactlyElementsIn(expectedResult.landmarks().get(0)); // Actual handedness matches expected handedness. - Category actualTopHandedness = actualResult.handednesses().get(0).get(0); - Category expectedTopHandedness = expectedResult.handednesses().get(0).get(0); + Category actualTopHandedness = actualResult.handedness().get(0).get(0); + Category expectedTopHandedness = expectedResult.handedness().get(0).get(0); assertThat(actualTopHandedness.index()).isEqualTo(expectedTopHandedness.index()); assertThat(actualTopHandedness.categoryName()).isEqualTo(expectedTopHandedness.categoryName()); } diff --git a/mediapipe/tasks/web/vision/gesture_recognizer/gesture_recognizer.ts b/mediapipe/tasks/web/vision/gesture_recognizer/gesture_recognizer.ts index 67f355f53..4127bc9ad 100644 --- a/mediapipe/tasks/web/vision/gesture_recognizer/gesture_recognizer.ts +++ b/mediapipe/tasks/web/vision/gesture_recognizer/gesture_recognizer.ts @@ -63,7 +63,7 @@ export class GestureRecognizer extends VisionTaskRunner { private gestures: Category[][] = []; private landmarks: NormalizedLandmark[][] = []; private worldLandmarks: Landmark[][] = []; - private handednesses: Category[][] = []; + private handedness: Category[][] = []; private readonly options: GestureRecognizerGraphOptions; private readonly handLandmarkerGraphOptions: HandLandmarkerGraphOptions; @@ -273,7 +273,7 @@ export class GestureRecognizer extends VisionTaskRunner { this.gestures = []; this.landmarks = []; this.worldLandmarks = []; - this.handednesses = []; + this.handedness = []; } private processResults(): GestureRecognizerResult { @@ -283,14 +283,16 @@ export class GestureRecognizer extends VisionTaskRunner { gestures: [], landmarks: [], worldLandmarks: [], - handednesses: [], + handedness: [], + handednesses: [] }; } else { return { gestures: this.gestures, landmarks: this.landmarks, worldLandmarks: this.worldLandmarks, - handednesses: this.handednesses + handedness: this.handedness, + handednesses: this.handedness }; } } @@ -416,7 +418,7 @@ export class GestureRecognizer extends VisionTaskRunner { this.graphRunner.attachProtoVectorListener( HANDEDNESS_STREAM, (binaryProto, timestamp) => { - this.handednesses.push(...this.toJsCategories(binaryProto)); + this.handedness.push(...this.toJsCategories(binaryProto)); this.setLatestOutputTimestamp(timestamp); }); this.graphRunner.attachEmptyPacketListener(HANDEDNESS_STREAM, timestamp => { diff --git a/mediapipe/tasks/web/vision/gesture_recognizer/gesture_recognizer_result.d.ts b/mediapipe/tasks/web/vision/gesture_recognizer/gesture_recognizer_result.d.ts index f3dadf413..7ad78aa67 100644 --- a/mediapipe/tasks/web/vision/gesture_recognizer/gesture_recognizer_result.d.ts +++ b/mediapipe/tasks/web/vision/gesture_recognizer/gesture_recognizer_result.d.ts @@ -30,6 +30,12 @@ export declare interface GestureRecognizerResult { worldLandmarks: Landmark[][]; /** Handedness of detected hands. */ + handedness: Category[][]; + + /** + * Handedness of detected hands. + * @deprecated Use `.handedness` instead. + */ handednesses: Category[][]; /** diff --git a/mediapipe/tasks/web/vision/gesture_recognizer/gesture_recognizer_test.ts b/mediapipe/tasks/web/vision/gesture_recognizer/gesture_recognizer_test.ts index 95ba06cca..ed4453b97 100644 --- a/mediapipe/tasks/web/vision/gesture_recognizer/gesture_recognizer_test.ts +++ b/mediapipe/tasks/web/vision/gesture_recognizer/gesture_recognizer_test.ts @@ -28,7 +28,7 @@ import {GestureRecognizer, GestureRecognizerOptions} from './gesture_recognizer' type ProtoListener = ((binaryProtos: Uint8Array[], timestamp: number) => void); -function createHandednesses(): Uint8Array[] { +function createHandedness(): Uint8Array[] { const handsProto = new ClassificationList(); const classification = new Classification(); classification.setScore(0.1); @@ -282,8 +282,7 @@ describe('GestureRecognizer', () => { (createLandmarks(), 1337); gestureRecognizer.listeners.get('world_hand_landmarks')! (createWorldLandmarks(), 1337); - gestureRecognizer.listeners.get('handedness')! - (createHandednesses(), 1337); + gestureRecognizer.listeners.get('handedness')!(createHandedness(), 1337); gestureRecognizer.listeners.get('hand_gestures')!(createGestures(), 1337); }); @@ -304,6 +303,12 @@ describe('GestureRecognizer', () => { }]], 'landmarks': [[{'x': 0.3, 'y': 0.4, 'z': 0.5}]], 'worldLandmarks': [[{'x': 21, 'y': 22, 'z': 23}]], + 'handedness': [[{ + 'score': 0.1, + 'index': 1, + 'categoryName': 'handedness_label', + 'displayName': 'handedness_display_name' + }]], 'handednesses': [[{ 'score': 0.1, 'index': 1, @@ -320,8 +325,7 @@ describe('GestureRecognizer', () => { (createLandmarks(), 1337); gestureRecognizer.listeners.get('world_hand_landmarks')! (createWorldLandmarks(), 1337); - gestureRecognizer.listeners.get('handedness')! - (createHandednesses(), 1337); + gestureRecognizer.listeners.get('handedness')!(createHandedness(), 1337); gestureRecognizer.listeners.get('hand_gestures')!(createGestures(), 1337); }); @@ -342,8 +346,7 @@ describe('GestureRecognizer', () => { (createLandmarks(), 1337); gestureRecognizer.listeners.get('world_hand_landmarks')! (createWorldLandmarks(), 1337); - gestureRecognizer.listeners.get('handedness')! - (createHandednesses(), 1337); + gestureRecognizer.listeners.get('handedness')!(createHandedness(), 1337); gestureRecognizer.listeners.get('hand_gestures')!([], 1337); }); @@ -353,6 +356,7 @@ describe('GestureRecognizer', () => { 'gestures': [], 'landmarks': [], 'worldLandmarks': [], + 'handedness': [], 'handednesses': [] }); }); diff --git a/mediapipe/tasks/web/vision/hand_landmarker/hand_landmarker.ts b/mediapipe/tasks/web/vision/hand_landmarker/hand_landmarker.ts index c2180b3d2..344eb5885 100644 --- a/mediapipe/tasks/web/vision/hand_landmarker/hand_landmarker.ts +++ b/mediapipe/tasks/web/vision/hand_landmarker/hand_landmarker.ts @@ -58,7 +58,7 @@ const DEFAULT_CATEGORY_INDEX = -1; export class HandLandmarker extends VisionTaskRunner { private landmarks: NormalizedLandmark[][] = []; private worldLandmarks: Landmark[][] = []; - private handednesses: Category[][] = []; + private handedness: Category[][] = []; private readonly options: HandLandmarkerGraphOptions; private readonly handLandmarksDetectorGraphOptions: @@ -222,14 +222,15 @@ export class HandLandmarker extends VisionTaskRunner { private resetResults(): void { this.landmarks = []; this.worldLandmarks = []; - this.handednesses = []; + this.handedness = []; } private processResults(): HandLandmarkerResult { return { landmarks: this.landmarks, worldLandmarks: this.worldLandmarks, - handednesses: this.handednesses + handednesses: this.handedness, + handedness: this.handedness, }; } @@ -330,7 +331,7 @@ export class HandLandmarker extends VisionTaskRunner { this.graphRunner.attachProtoVectorListener( HANDEDNESS_STREAM, (binaryProto, timestamp) => { - this.handednesses.push(...this.toJsCategories(binaryProto)); + this.handedness.push(...this.toJsCategories(binaryProto)); this.setLatestOutputTimestamp(timestamp); }); this.graphRunner.attachEmptyPacketListener( diff --git a/mediapipe/tasks/web/vision/hand_landmarker/hand_landmarker_result.d.ts b/mediapipe/tasks/web/vision/hand_landmarker/hand_landmarker_result.d.ts index 69bf6a78f..ee1c53cd6 100644 --- a/mediapipe/tasks/web/vision/hand_landmarker/hand_landmarker_result.d.ts +++ b/mediapipe/tasks/web/vision/hand_landmarker/hand_landmarker_result.d.ts @@ -29,6 +29,12 @@ export declare interface HandLandmarkerResult { /** Hand landmarks in world coordinates of detected hands. */ worldLandmarks: Landmark[][]; - /** Handedness of detected hands. */ + /** + * Handedness of detected hands. + * @deprecated Use `.handedness` instead. + */ handednesses: Category[][]; + + /** Handedness of detected hands. */ + handedness: Category[][]; } diff --git a/mediapipe/tasks/web/vision/hand_landmarker/hand_landmarker_test.ts b/mediapipe/tasks/web/vision/hand_landmarker/hand_landmarker_test.ts index 91cba1f1f..3d5cd4274 100644 --- a/mediapipe/tasks/web/vision/hand_landmarker/hand_landmarker_test.ts +++ b/mediapipe/tasks/web/vision/hand_landmarker/hand_landmarker_test.ts @@ -30,7 +30,7 @@ import {HandLandmarkerOptions} from './hand_landmarker_options'; type ProtoListener = ((binaryProtos: Uint8Array[], timestamp: number) => void); -function createHandednesses(): ClassificationList { +function createHandedness(): ClassificationList { const handsProto = new ClassificationList(); const classification = new Classification(); classification.setScore(0.1); @@ -198,7 +198,7 @@ describe('HandLandmarker', () => { it('transforms results', async () => { const landmarksProto = [createLandmarks().serializeBinary()]; const worldLandmarksProto = [createWorldLandmarks().serializeBinary()]; - const handednessProto = [createHandednesses().serializeBinary()]; + const handednessProto = [createHandedness().serializeBinary()]; // Pass the test data to our listener handLandmarker.fakeWasmModule._waitUntilIdle.and.callFake(() => { @@ -220,6 +220,12 @@ describe('HandLandmarker', () => { expect(landmarks).toEqual({ 'landmarks': [[{'x': 0, 'y': 0, 'z': 0}]], 'worldLandmarks': [[{'x': 0, 'y': 0, 'z': 0}]], + 'handedness': [[{ + 'score': 0.1, + 'index': 1, + 'categoryName': 'handedness_label', + 'displayName': 'handedness_display_name' + }]], 'handednesses': [[{ 'score': 0.1, 'index': 1, @@ -232,7 +238,7 @@ describe('HandLandmarker', () => { it('clears results between invoations', async () => { const landmarks = [createLandmarks().serializeBinary()]; const worldLandmarks = [createWorldLandmarks().serializeBinary()]; - const handedness = [createHandednesses().serializeBinary()]; + const handedness = [createHandedness().serializeBinary()]; // Pass the test data to our listener handLandmarker.fakeWasmModule._waitUntilIdle.and.callFake(() => {