diff --git a/mediapipe/tasks/ios/test/vision/gesture_recognizer/BUILD b/mediapipe/tasks/ios/test/vision/gesture_recognizer/BUILD new file mode 100644 index 000000000..b4840741b --- /dev/null +++ b/mediapipe/tasks/ios/test/vision/gesture_recognizer/BUILD @@ -0,0 +1,62 @@ +load("@build_bazel_rules_apple//apple:ios.bzl", "ios_unit_test") +load( + "//mediapipe/framework/tool:ios.bzl", + "MPP_TASK_MINIMUM_OS_VERSION", +) +load( + "@org_tensorflow//tensorflow/lite:special_rules.bzl", + "tflite_ios_lab_runner", +) + +package(default_visibility = ["//mediapipe/tasks:internal"]) + +licenses(["notice"]) + +# Default tags for filtering iOS targets. Targets are restricted to Apple platforms. +TFL_DEFAULT_TAGS = [ + "apple", +] + +# Following sanitizer tests are not supported by iOS test targets. +TFL_DISABLED_SANITIZER_TAGS = [ + "noasan", + "nomsan", + "notsan", +] + +objc_library( + name = "MPPGestureRecognizerObjcTestLibrary", + testonly = 1, + srcs = ["MPPGestureRecognizerTests.m"], + copts = [ + "-ObjC++", + "-std=c++17", + "-x objective-c++", + ], + data = [ + "//mediapipe/tasks/testdata/vision:gesture_recognizer.task", + "//mediapipe/tasks/testdata/vision:test_images", + "//mediapipe/tasks/testdata/vision:test_protos", + ], + deps = [ + "//mediapipe/tasks/ios/common:MPPCommon", + "//mediapipe/tasks/ios/test/vision/gesture_recognizer/utils:MPPGestureRecognizerResultProtobufHelpers", + "//mediapipe/tasks/ios/test/vision/utils:MPPImageTestUtils", + "//mediapipe/tasks/ios/vision/gesture_recognizer:MPPGestureRecognizer", + ] + select({ + "//third_party:opencv_ios_sim_arm64_source_build": ["@ios_opencv_source//:opencv_xcframework"], + "//third_party:opencv_ios_arm64_source_build": ["@ios_opencv_source//:opencv_xcframework"], + "//third_party:opencv_ios_x86_64_source_build": ["@ios_opencv_source//:opencv_xcframework"], + "//conditions:default": ["@ios_opencv//:OpencvFramework"], + }), +) + +ios_unit_test( + name = "MPPGestureRecognizerObjcTest", + minimum_os_version = MPP_TASK_MINIMUM_OS_VERSION, + runner = tflite_ios_lab_runner("IOS_LATEST"), + tags = TFL_DEFAULT_TAGS + TFL_DISABLED_SANITIZER_TAGS, + deps = [ + ":MPPGestureRecognizerObjcTestLibrary", + ], +) diff --git a/mediapipe/tasks/ios/test/vision/gesture_recognizer/MPPGestureRecognizerTests.m b/mediapipe/tasks/ios/test/vision/gesture_recognizer/MPPGestureRecognizerTests.m new file mode 100644 index 000000000..58e3bc9cf --- /dev/null +++ b/mediapipe/tasks/ios/test/vision/gesture_recognizer/MPPGestureRecognizerTests.m @@ -0,0 +1,282 @@ +// Copyright 2023 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#import + +#import "mediapipe/tasks/ios/common/sources/MPPCommon.h" +#import "mediapipe/tasks/ios/test/vision/gesture_recognizer/utils/sources/MPPGestureRecognizerResult+ProtobufHelpers.h" +#import "mediapipe/tasks/ios/test/vision/utils/sources/MPPImage+TestUtils.h" +#import "mediapipe/tasks/ios/vision/gesture_recognizer/sources/MPPGestureRecognizer.h" + +static NSString *const kPbFileExtension = @"pbtxt"; + +typedef NSDictionary ResourceFileInfo; + +static ResourceFileInfo *const kGestureRecognizerBundleAssetFile = + @{@"name" : @"gesture_recognizer", @"type" : @"task"}; + +static ResourceFileInfo *const kTwoHandsImage = @{@"name" : @"right_hands", @"type" : @"jpg"}; +static ResourceFileInfo *const kFistImage = @{@"name" : @"fist", @"type" : @"jpg"}; +static ResourceFileInfo *const kNoHandsImage = @{@"name" : @"cats_and_dogs", @"type" : @"jpg"}; +static ResourceFileInfo *const kThumbUpImage = @{@"name" : @"thumb_up", @"type" : @"jpg"}; +static ResourceFileInfo *const kPointingUpRotatedImage = + @{@"name" : @"pointing_up_rotated", @"type" : @"jpg"}; + +static ResourceFileInfo *const kExpectedFistLandmarksFile = + @{@"name" : @"fist_landmarks", @"type" : kPbFileExtension}; +static ResourceFileInfo *const kExpectedThumbUpLandmarksFile = + @{@"name" : @"thumb_up_landmarks", @"type" : kPbFileExtension}; + +static NSString *const kFistLabel = @"Closed_Fist"; +static NSString *const kExpectedThumbUpLabel = @"Thumb_Up"; +static NSString *const kExpectedPointingUpLabel = @"Pointing_Up"; +static NSString *const kRockLabel = @"Rock"; + +static const NSInteger kGestureExpectedIndex = -1; + +static NSString *const kExpectedErrorDomain = @"com.google.mediapipe.tasks"; +static const float kLandmarksErrorTolerance = 0.03f; + +#define AssertEqualErrors(error, expectedError) \ + XCTAssertNotNil(error); \ + XCTAssertEqualObjects(error.domain, expectedError.domain); \ + XCTAssertEqual(error.code, expectedError.code); \ + XCTAssertEqualObjects(error.localizedDescription, expectedError.localizedDescription) + +#define AssertEqualGestures(gesture, expectedGesture, handIndex, gestureIndex) \ + XCTAssertEqual(gesture.index, kGestureExpectedIndex, @"hand index = %d gesture index j = %d", \ + handIndex, gestureIndex); \ + XCTAssertEqualObjects(gesture.categoryName, expectedGesture.categoryName, \ + @"hand index = %d gesture index j = %d", handIndex, gestureIndex); + +#define AssertApproximatelyEqualLandmarks(landmark, expectedLandmark, handIndex, landmarkIndex) \ + XCTAssertEqualWithAccuracy(landmark.x, expectedLandmark.x, kLandmarksErrorTolerance, \ + @"hand index = %d landmark index j = %d", handIndex, landmarkIndex); \ + XCTAssertEqualWithAccuracy(landmark.y, expectedLandmark.y, kLandmarksErrorTolerance, \ + @"hand index = %d landmark index j = %d", handIndex, landmarkIndex); + +#define AssertGestureRecognizerResultIsEmpty(gestureRecognizerResult) \ + XCTAssertTrue(gestureRecognizerResult.gestures.count == 0); \ + XCTAssertTrue(gestureRecognizerResult.handedness.count == 0); \ + XCTAssertTrue(gestureRecognizerResult.landmarks.count == 0); \ + XCTAssertTrue(gestureRecognizerResult.worldLandmarks.count == 0); + +@interface MPPGestureRecognizerTests : XCTestCase +@end + +@implementation MPPGestureRecognizerTests + +#pragma mark Results + ++ (MPPGestureRecognizerResult *)emptyGestureRecognizerResult { + return [[MPPGestureRecognizerResult alloc] initWithGestures:@[] + handedness:@[] + landmarks:@[] + worldLandmarks:@[] + timestampInMilliseconds:0]; +} + ++ (MPPGestureRecognizerResult *)thumbUpGestureRecognizerResult { + NSString *filePath = + [MPPGestureRecognizerTests filePathWithFileInfo:kExpectedThumbUpLandmarksFile]; + + return [MPPGestureRecognizerResult + gestureRecognizerResultsFromTextEncodedProtobufFileWithName:filePath + gestureLabel:kExpectedThumbUpLabel + shouldRemoveZPosition:YES]; +} + ++ (MPPGestureRecognizerResult *)fistGestureRecognizerResultWithLabel:(NSString *)gestureLabel { + NSString *filePath = [MPPGestureRecognizerTests filePathWithFileInfo:kExpectedFistLandmarksFile]; + + return [MPPGestureRecognizerResult + gestureRecognizerResultsFromTextEncodedProtobufFileWithName:filePath + gestureLabel:gestureLabel + shouldRemoveZPosition:YES]; +} + +- (void)assertMultiHandLandmarks:(NSArray *> *)multiHandLandmarks + areApproximatelyEqualToExpectedMultiHandLandmarks: + (NSArray *> *)expectedMultiHandLandmarks { + XCTAssertEqual(multiHandLandmarks.count, expectedMultiHandLandmarks.count); + if (multiHandLandmarks.count == 0) { + return; + } + + NSArray *topHandLandmarks = multiHandLandmarks[0]; + NSArray *expectedTopHandLandmarks = expectedMultiHandLandmarks[0]; + + XCTAssertEqual(topHandLandmarks.count, expectedTopHandLandmarks.count); + for (int i = 0; i < expectedTopHandLandmarks.count; i++) { + MPPNormalizedLandmark *landmark = topHandLandmarks[i]; + XCTAssertNotNil(landmark); + AssertApproximatelyEqualLandmarks(landmark, expectedTopHandLandmarks[i], 0, i); + } +} + +- (void)assertMultiHandWorldLandmarks:(NSArray *> *)multiHandWorldLandmarks + areApproximatelyEqualToExpectedMultiHandWorldLandmarks: + (NSArray *> *)expectedMultiHandWorldLandmarks { + XCTAssertEqual(multiHandWorldLandmarks.count, expectedMultiHandWorldLandmarks.count); + if (expectedMultiHandWorldLandmarks.count == 0) { + return; + } + + NSArray *topHandWorldLandmarks = multiHandWorldLandmarks[0]; + NSArray *expectedTopHandWorldLandmarks = expectedMultiHandWorldLandmarks[0]; + + XCTAssertEqual(topHandWorldLandmarks.count, expectedTopHandWorldLandmarks.count); + for (int i = 0; i < expectedTopHandWorldLandmarks.count; i++) { + MPPLandmark *landmark = topHandWorldLandmarks[i]; + XCTAssertNotNil(landmark); + AssertApproximatelyEqualLandmarks(landmark, expectedTopHandWorldLandmarks[i], 0, i); + } +} + +- (void)assertMultiHandGestures:(NSArray *> *)multiHandGestures + areApproximatelyEqualToExpectedMultiHandGestures: + (NSArray *> *)expectedMultiHandGestures { + XCTAssertEqual(multiHandGestures.count, expectedMultiHandGestures.count); + if (multiHandGestures.count == 0) { + return; + } + + NSArray *topHandGestures = multiHandGestures[0]; + NSArray *expectedTopHandGestures = expectedMultiHandGestures[0]; + + XCTAssertEqual(topHandGestures.count, expectedTopHandGestures.count); + for (int i = 0; i < expectedTopHandGestures.count; i++) { + MPPCategory *gesture = topHandGestures[i]; + XCTAssertNotNil(gesture); + AssertEqualGestures(gesture, expectedTopHandGestures[i], 0, i); + } +} + +- (void)assertGestureRecognizerResult:(MPPGestureRecognizerResult *)gestureRecognizerResult + isApproximatelyEqualToExpectedResult: + (MPPGestureRecognizerResult *)expectedGestureRecognizerResult { + [self assertMultiHandLandmarks:gestureRecognizerResult.landmarks + areApproximatelyEqualToExpectedMultiHandLandmarks:expectedGestureRecognizerResult.landmarks]; + [self assertMultiHandWorldLandmarks:gestureRecognizerResult.worldLandmarks + areApproximatelyEqualToExpectedMultiHandWorldLandmarks:expectedGestureRecognizerResult + .worldLandmarks]; + [self assertMultiHandGestures:gestureRecognizerResult.gestures + areApproximatelyEqualToExpectedMultiHandGestures:expectedGestureRecognizerResult.gestures]; +} + +#pragma mark File + ++ (NSString *)filePathWithFileInfo:(ResourceFileInfo *)fileInfo { + NSString *filePath = [MPPGestureRecognizerTests filePathWithName:fileInfo[@"name"] + extension:fileInfo[@"type"]]; + return filePath; +} + ++ (NSString *)filePathWithName:(NSString *)fileName extension:(NSString *)extension { + NSString *filePath = [[NSBundle bundleForClass:self.class] pathForResource:fileName + ofType:extension]; + return filePath; +} + +#pragma mark Gesture Recognizer Initializers + +- (MPPGestureRecognizerOptions *)gestureRecognizerOptionsWithModelFileInfo: + (ResourceFileInfo *)modelFileInfo { + NSString *modelPath = [MPPGestureRecognizerTests filePathWithFileInfo:modelFileInfo]; + MPPGestureRecognizerOptions *gestureRecognizerOptions = + [[MPPGestureRecognizerOptions alloc] init]; + gestureRecognizerOptions.baseOptions.modelAssetPath = modelPath; + + return gestureRecognizerOptions; +} + +- (MPPGestureRecognizer *)createGestureRecognizerWithOptionsSucceeds: + (MPPGestureRecognizerOptions *)gestureRecognizerOptions { + MPPGestureRecognizer *gestureRecognizer = + [[MPPGestureRecognizer alloc] initWithOptions:gestureRecognizerOptions error:nil]; + XCTAssertNotNil(gestureRecognizer); + + return gestureRecognizer; +} + +- (void)assertCreateGestureRecognizerWithOptions: + (MPPGestureRecognizerOptions *)gestureRecognizerOptions + failsWithExpectedError:(NSError *)expectedError { + NSError *error = nil; + MPPGestureRecognizer *gestureRecognizer = + [[MPPGestureRecognizer alloc] initWithOptions:gestureRecognizerOptions error:&error]; + + XCTAssertNil(gestureRecognizer); + AssertEqualErrors(error, expectedError); +} + +#pragma mark Assert Gesture Recognizer Results + +- (MPPImage *)imageWithFileInfo:(ResourceFileInfo *)fileInfo { + MPPImage *image = [MPPImage imageFromBundleWithClass:[MPPGestureRecognizerTests class] + fileName:fileInfo[@"name"] + ofType:fileInfo[@"type"]]; + XCTAssertNotNil(image); + + return image; +} + +- (MPPImage *)imageWithFileInfo:(ResourceFileInfo *)fileInfo + orientation:(UIImageOrientation)orientation { + MPPImage *image = [MPPImage imageFromBundleWithClass:[MPPGestureRecognizerTests class] + fileName:fileInfo[@"name"] + ofType:fileInfo[@"type"] + orientation:orientation]; + XCTAssertNotNil(image); + + return image; +} + +- (MPPGestureRecognizerResult *)recognizeImageWithFileInfo:(ResourceFileInfo *)imageFileInfo + usingGestureRecognizer: + (MPPGestureRecognizer *)gestureRecognizer { + MPPImage *mppImage = [self imageWithFileInfo:imageFileInfo]; + MPPGestureRecognizerResult *gestureRecognizerResult = [gestureRecognizer recognizeImage:mppImage + error:nil]; + XCTAssertNotNil(gestureRecognizerResult); + + return gestureRecognizerResult; +} + +- (void)assertResultsOfRecognizeImageWithFileInfo:(ResourceFileInfo *)fileInfo + usingGestureRecognizer:(MPPGestureRecognizer *)gestureRecognizer + approximatelyEqualsGestureRecognizerResult: + (MPPGestureRecognizerResult *)expectedGestureRecognizerResult { + MPPGestureRecognizerResult *gestureRecognizerResult = + [self recognizeImageWithFileInfo:fileInfo usingGestureRecognizer:gestureRecognizer]; + [self assertGestureRecognizerResult:gestureRecognizerResult + isApproximatelyEqualToExpectedResult:expectedGestureRecognizerResult]; +} + +#pragma mark General Tests + +- (void)testRecognizeWithModelPathSucceeds { + NSString *modelPath = + [MPPGestureRecognizerTests filePathWithFileInfo:kGestureRecognizerBundleAssetFile]; + MPPGestureRecognizer *gestureRecognizer = + [[MPPGestureRecognizer alloc] initWithModelPath:modelPath error:nil]; + XCTAssertNotNil(gestureRecognizer); + + [self assertResultsOfRecognizeImageWithFileInfo:kThumbUpImage + usingGestureRecognizer:gestureRecognizer + approximatelyEqualsGestureRecognizerResult:[MPPGestureRecognizerTests + thumbUpGestureRecognizerResult]]; +} + +@end diff --git a/mediapipe/tasks/ios/test/vision/gesture_recognizer/utils/BUILD b/mediapipe/tasks/ios/test/vision/gesture_recognizer/utils/BUILD new file mode 100644 index 000000000..c2972fc60 --- /dev/null +++ b/mediapipe/tasks/ios/test/vision/gesture_recognizer/utils/BUILD @@ -0,0 +1,22 @@ +package(default_visibility = ["//mediapipe/tasks:internal"]) + +licenses(["notice"]) + +objc_library( + name = "MPPGestureRecognizerResultProtobufHelpers", + srcs = ["sources/MPPGestureRecognizerResult+ProtobufHelpers.mm"], + hdrs = ["sources/MPPGestureRecognizerResult+ProtobufHelpers.h"], + copts = [ + "-ObjC++", + "-std=c++17", + "-x objective-c++", + ], + deps = [ + "//mediapipe/framework/formats:classification_cc_proto", + "//mediapipe/tasks/cc/components/containers/proto:landmarks_detection_result_cc_proto", + "//mediapipe/tasks/ios/common/utils:NSStringHelpers", + "//mediapipe/tasks/ios/test/vision/utils:parse_proto_utils", + "//mediapipe/tasks/ios/vision/gesture_recognizer:MPPGestureRecognizerResult", + "//mediapipe/tasks/ios/vision/gesture_recognizer/utils:MPPGestureRecognizerResultHelpers", + ], +) diff --git a/mediapipe/tasks/ios/test/vision/gesture_recognizer/utils/sources/MPPGestureRecognizerResult+ProtobufHelpers.h b/mediapipe/tasks/ios/test/vision/gesture_recognizer/utils/sources/MPPGestureRecognizerResult+ProtobufHelpers.h new file mode 100644 index 000000000..cfa0a5e53 --- /dev/null +++ b/mediapipe/tasks/ios/test/vision/gesture_recognizer/utils/sources/MPPGestureRecognizerResult+ProtobufHelpers.h @@ -0,0 +1,28 @@ +// Copyright 2022 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#import +#import "mediapipe/tasks/ios/vision/gesture_recognizer/sources/MPPGestureRecognizerResult.h" + +NS_ASSUME_NONNULL_BEGIN +@interface MPPGestureRecognizerResult (ProtobufHelpers) + ++ (MPPGestureRecognizerResult *) + gestureRecognizerResultsFromTextEncodedProtobufFileWithName:(NSString *)fileName + gestureLabel:(NSString *)gestureLabel + shouldRemoveZPosition:(BOOL)removeZPosition; + +@end + +NS_ASSUME_NONNULL_END diff --git a/mediapipe/tasks/ios/test/vision/gesture_recognizer/utils/sources/MPPGestureRecognizerResult+ProtobufHelpers.mm b/mediapipe/tasks/ios/test/vision/gesture_recognizer/utils/sources/MPPGestureRecognizerResult+ProtobufHelpers.mm new file mode 100644 index 000000000..b115229c7 --- /dev/null +++ b/mediapipe/tasks/ios/test/vision/gesture_recognizer/utils/sources/MPPGestureRecognizerResult+ProtobufHelpers.mm @@ -0,0 +1,65 @@ +// Copyright 2023 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#import "mediapipe/tasks/ios/test/vision/gesture_recognizer/utils/sources/MPPGestureRecognizerResult+ProtobufHelpers.h" + +#import "mediapipe/tasks/ios/common/utils/sources/NSString+Helpers.h" +#import "mediapipe/tasks/ios/vision/gesture_recognizer/utils/sources/MPPGestureRecognizerResult+Helpers.h" + +#include "mediapipe/framework/formats/classification.pb.h" +#include "mediapipe/tasks/cc/components/containers/proto/landmarks_detection_result.pb.h" +#include "mediapipe/tasks/ios/test/vision/utils/sources/parse_proto_utils.h" + +namespace { +using ClassificationListProto = ::mediapipe::ClassificationList; +using ClassificationProto = ::mediapipe::Classification; +using LandmarksDetectionResultProto = + ::mediapipe::tasks::containers::proto::LandmarksDetectionResult; +using ::mediapipe::tasks::ios::test::vision::utils::get_proto_from_pbtxt; +} // anonymous namespace + +@implementation MPPGestureRecognizerResult (ProtobufHelpers) + ++ (MPPGestureRecognizerResult *) + gestureRecognizerResultsFromTextEncodedProtobufFileWithName:(NSString *)fileName + gestureLabel:(NSString *)gestureLabel + shouldRemoveZPosition:(BOOL)removeZPosition { + LandmarksDetectionResultProto landmarkDetectionResultProto; + + if (!get_proto_from_pbtxt(fileName.cppString, landmarkDetectionResultProto).ok()) { + return nil; + } + + if (removeZPosition) { + // Remove z position of landmarks, because they are not used in correctness testing. For video + // or live stream mode, the z positions varies a lot during tracking from frame to frame. + for (int i = 0; i < landmarkDetectionResultProto.landmarks().landmark().size(); i++) { + auto &landmark = *landmarkDetectionResultProto.mutable_landmarks()->mutable_landmark(i); + landmark.clear_z(); + } + } + + ClassificationListProto gesturesProto; + ClassificationProto *classificationProto = gesturesProto.add_classification(); + classificationProto->set_label([gestureLabel UTF8String]); + + return [MPPGestureRecognizerResult + gestureRecognizerResultWithHandGesturesProto:{gesturesProto} + handednessProto:{landmarkDetectionResultProto.classifications()} + handLandmarksProto:{landmarkDetectionResultProto.landmarks()} + worldLandmarksProto:{landmarkDetectionResultProto.world_landmarks()} + timestampInMilliSeconds:0]; +} + +@end diff --git a/mediapipe/tasks/ios/vision/gesture_recognizer/BUILD b/mediapipe/tasks/ios/vision/gesture_recognizer/BUILD index 78a07e17d..d9a76afde 100644 --- a/mediapipe/tasks/ios/vision/gesture_recognizer/BUILD +++ b/mediapipe/tasks/ios/vision/gesture_recognizer/BUILD @@ -56,7 +56,6 @@ objc_library( "//mediapipe/tasks/ios/common/utils:MPPCommonUtils", "//mediapipe/tasks/ios/common/utils:NSStringHelpers", "//mediapipe/tasks/ios/core:MPPTaskInfo", - "//mediapipe/tasks/ios/core:MPPTaskOptions", "//mediapipe/tasks/ios/vision/core:MPPImage", "//mediapipe/tasks/ios/vision/core:MPPVisionPacketCreator", "//mediapipe/tasks/ios/vision/core:MPPVisionTaskRunner", diff --git a/mediapipe/tasks/ios/vision/gesture_recognizer/sources/MPPGestureRecognizer.h b/mediapipe/tasks/ios/vision/gesture_recognizer/sources/MPPGestureRecognizer.h index ed8ff30f9..65136dc83 100644 --- a/mediapipe/tasks/ios/vision/gesture_recognizer/sources/MPPGestureRecognizer.h +++ b/mediapipe/tasks/ios/vision/gesture_recognizer/sources/MPPGestureRecognizer.h @@ -14,7 +14,6 @@ #import -#import "mediapipe/tasks/ios/core/sources/MPPTaskOptions.h" #import "mediapipe/tasks/ios/vision/core/sources/MPPImage.h" #import "mediapipe/tasks/ios/vision/gesture_recognizer/sources/MPPGestureRecognizerOptions.h" #import "mediapipe/tasks/ios/vision/gesture_recognizer/sources/MPPGestureRecognizerResult.h" diff --git a/mediapipe/tasks/ios/vision/gesture_recognizer/utils/sources/MPPGestureRecognizerResult+Helpers.h b/mediapipe/tasks/ios/vision/gesture_recognizer/utils/sources/MPPGestureRecognizerResult+Helpers.h index 6b0f8bf81..0e2eede03 100644 --- a/mediapipe/tasks/ios/vision/gesture_recognizer/utils/sources/MPPGestureRecognizerResult+Helpers.h +++ b/mediapipe/tasks/ios/vision/gesture_recognizer/utils/sources/MPPGestureRecognizerResult+Helpers.h @@ -43,6 +43,31 @@ static const int kMicroSecondsPerMilliSecond = 1000; handLandmarksPacket:(const mediapipe::Packet &)handLandmarksPacket worldLandmarksPacket:(const mediapipe::Packet &)worldLandmarksPacket; +/** + * Creates an `MPPGestureRecognizerResult` from hand gestures, handedness, hand landmarks and world + * landmarks proto vectors. + * + * @param handGesturesProto A vector of protos of type `std::vector`. + * @param handednessPacket A vector of protos of type `std::vector`. + * @param handLandmarksPacket A vector of protos of type `std::vector`. + * @param handLandmarksPacket A vector of protos of type `std::vector`. + * @param timestampInMilliSeconds The timestamp of the result. + * + * @return An `MPPGestureRecognizerResult` object that contains the hand gesture recognition + * results. + */ ++ (MPPGestureRecognizerResult *) + gestureRecognizerResultWithHandGesturesProto: + (const std::vector<::mediapipe::ClassificationList> &)handGesturesProto + handednessProto: + (const std::vector<::mediapipe::ClassificationList> &) + handednessProto + handLandmarksProto: + (const std::vector<::mediapipe::NormalizedLandmarkList> &) + handLandmarksProto + worldLandmarksProto: + (const std::vector<::mediapipe::LandmarkList> &)worldLandmarksProto + timestampInMilliSeconds:(NSInteger)timestampInMilliseconds; @end NS_ASSUME_NONNULL_END diff --git a/mediapipe/tasks/ios/vision/gesture_recognizer/utils/sources/MPPGestureRecognizerResult+Helpers.mm b/mediapipe/tasks/ios/vision/gesture_recognizer/utils/sources/MPPGestureRecognizerResult+Helpers.mm index 8eed2a923..f129da8e5 100644 --- a/mediapipe/tasks/ios/vision/gesture_recognizer/utils/sources/MPPGestureRecognizerResult+Helpers.mm +++ b/mediapipe/tasks/ios/vision/gesture_recognizer/utils/sources/MPPGestureRecognizerResult+Helpers.mm @@ -17,12 +17,6 @@ #import "mediapipe/tasks/ios/components/containers/utils/sources/MPPCategory+Helpers.h" #import "mediapipe/tasks/ios/components/containers/utils/sources/MPPLandmark+Helpers.h" -#include "mediapipe/framework/formats/classification.pb.h" -#include "mediapipe/framework/formats/landmark.pb.h" -#include "mediapipe/framework/packet.h" - -static const NSInteger kDefaultGestureIndex = -1; - namespace { using ClassificationListProto = ::mediapipe::ClassificationList; using LandmarkListProto = ::mediapipe::LandmarkList; @@ -30,6 +24,8 @@ using NormalizedLandmarkListProto = ::mediapipe::NormalizedLandmarkList; using ::mediapipe::Packet; } // namespace +static const NSInteger kDefaultGestureIndex = -1; + @implementation MPPGestureRecognizerResult (Helpers) + (MPPGestureRecognizerResult *)emptyGestureRecognizerResultWithTimestampInMilliseconds: @@ -41,6 +37,80 @@ using ::mediapipe::Packet; timestampInMilliseconds:timestampInMilliseconds]; } ++ (MPPGestureRecognizerResult *) + gestureRecognizerResultWithHandGesturesProto: + (const std::vector &)handGesturesProto + handednessProto: + (const std::vector &)handednessProto + handLandmarksProto:(const std::vector &) + handLandmarksProto + worldLandmarksProto: + (const std::vector &)worldLandmarksProto + timestampInMilliSeconds:(NSInteger)timestampInMilliseconds { + NSMutableArray *> *multiHandGestures = + [NSMutableArray arrayWithCapacity:(NSUInteger)handGesturesProto.size()]; + + for (const auto &classificationListProto : handGesturesProto) { + NSMutableArray *gestures = [NSMutableArray + arrayWithCapacity:(NSUInteger)classificationListProto.classification().size()]; + for (const auto &classificationProto : classificationListProto.classification()) { + MPPCategory *category = [MPPCategory categoryWithProto:classificationProto + index:kDefaultGestureIndex]; + [gestures addObject:category]; + } + [multiHandGestures addObject:gestures]; + } + + NSMutableArray *> *multiHandHandedness = + [NSMutableArray arrayWithCapacity:(NSUInteger)handednessProto.size()]; + + for (const auto &classificationListProto : handednessProto) { + NSMutableArray *handedness = [NSMutableArray + arrayWithCapacity:(NSUInteger)classificationListProto.classification().size()]; + for (const auto &classificationProto : classificationListProto.classification()) { + MPPCategory *category = [MPPCategory categoryWithProto:classificationProto]; + [handedness addObject:category]; + } + [multiHandHandedness addObject:handedness]; + } + + NSMutableArray *> *multiHandLandmarks = + [NSMutableArray arrayWithCapacity:(NSUInteger)handLandmarksProto.size()]; + + for (const auto &handLandmarkListProto : handLandmarksProto) { + NSMutableArray *handLandmarks = + [NSMutableArray arrayWithCapacity:(NSUInteger)handLandmarkListProto.landmark().size()]; + for (const auto &normalizedLandmarkProto : handLandmarkListProto.landmark()) { + MPPNormalizedLandmark *normalizedLandmark = + [MPPNormalizedLandmark normalizedLandmarkWithProto:normalizedLandmarkProto]; + [handLandmarks addObject:normalizedLandmark]; + } + [multiHandLandmarks addObject:handLandmarks]; + } + + NSMutableArray *> *multiHandWorldLandmarks = + [NSMutableArray arrayWithCapacity:(NSUInteger)worldLandmarksProto.size()]; + + for (const auto &worldLandmarkListProto : worldLandmarksProto) { + NSMutableArray *worldLandmarks = + [NSMutableArray arrayWithCapacity:(NSUInteger)worldLandmarkListProto.landmark().size()]; + for (const auto &landmarkProto : worldLandmarkListProto.landmark()) { + MPPLandmark *landmark = [MPPLandmark landmarkWithProto:landmarkProto]; + [worldLandmarks addObject:landmark]; + } + [multiHandWorldLandmarks addObject:worldLandmarks]; + } + + MPPGestureRecognizerResult *gestureRecognizerResult = + [[MPPGestureRecognizerResult alloc] initWithGestures:multiHandGestures + handedness:multiHandHandedness + landmarks:multiHandLandmarks + worldLandmarks:multiHandWorldLandmarks + timestampInMilliseconds:timestampInMilliseconds]; + + return gestureRecognizerResult; +} + + (MPPGestureRecognizerResult *) gestureRecognizerResultWithHandGesturesPacket:(const Packet &)handGesturesPacket handednessPacket:(const Packet &)handednessPacket @@ -62,76 +132,16 @@ using ::mediapipe::Packet; emptyGestureRecognizerResultWithTimestampInMilliseconds:timestampInMilliseconds]; } - const std::vector &handGesturesClassificationListProtos = - handGesturesPacket.Get>(); - NSMutableArray *> *multiHandGestures = - [NSMutableArray arrayWithCapacity:(NSUInteger)handGesturesClassificationListProtos.size()]; - - for (const auto &classificationListProto : handGesturesClassificationListProtos) { - NSMutableArray *gestures = [NSMutableArray - arrayWithCapacity:(NSUInteger)classificationListProto.classification().size()]; - for (const auto &classificationProto : classificationListProto.classification()) { - MPPCategory *category = [MPPCategory categoryWithProto:classificationProto - index:kDefaultGestureIndex]; - [gestures addObject:category]; - } - [multiHandGestures addObject:gestures]; - } - - const std::vector &handednessClassificationListProtos = - handednessPacket.Get>(); - NSMutableArray *> *multiHandHandedness = - [NSMutableArray arrayWithCapacity:(NSUInteger)handednessClassificationListProtos.size()]; - - for (const auto &classificationListProto : handednessClassificationListProtos) { - NSMutableArray *handedness = [NSMutableArray - arrayWithCapacity:(NSUInteger)classificationListProto.classification().size()]; - for (const auto &classificationProto : classificationListProto.classification()) { - MPPCategory *category = [MPPCategory categoryWithProto:classificationProto]; - [handedness addObject:category]; - } - [multiHandHandedness addObject:handedness]; - } - - const std::vector &handLandmarkListProtos = - handLandmarksPacket.Get>(); - NSMutableArray *> *multiHandLandmarks = - [NSMutableArray arrayWithCapacity:(NSUInteger)handLandmarkListProtos.size()]; - - for (const auto &handLandmarkListProto : handLandmarkListProtos) { - NSMutableArray *handLandmarks = - [NSMutableArray arrayWithCapacity:(NSUInteger)handLandmarkListProto.landmark().size()]; - for (const auto &normalizedLandmarkProto : handLandmarkListProto.landmark()) { - MPPNormalizedLandmark *normalizedLandmark = - [MPPNormalizedLandmark normalizedLandmarkWithProto:normalizedLandmarkProto]; - [handLandmarks addObject:normalizedLandmark]; - } - [multiHandLandmarks addObject:handLandmarks]; - } - - const std::vector &worldLandmarkListProtos = - worldLandmarksPacket.Get>(); - NSMutableArray *> *multiHandWorldLandmarks = - [NSMutableArray arrayWithCapacity:(NSUInteger)worldLandmarkListProtos.size()]; - - for (const auto &worldLandmarkListProto : worldLandmarkListProtos) { - NSMutableArray *worldLandmarks = - [NSMutableArray arrayWithCapacity:(NSUInteger)worldLandmarkListProto.landmark().size()]; - for (const auto &landmarkProto : worldLandmarkListProto.landmark()) { - MPPLandmark *landmark = [MPPLandmark landmarkWithProto:landmarkProto]; - [worldLandmarks addObject:landmark]; - } - [multiHandWorldLandmarks addObject:worldLandmarks]; - } - - MPPGestureRecognizerResult *gestureRecognizerResult = - [[MPPGestureRecognizerResult alloc] initWithGestures:multiHandGestures - handedness:multiHandHandedness - landmarks:multiHandLandmarks - worldLandmarks:multiHandWorldLandmarks - timestampInMilliseconds:timestampInMilliseconds]; - - return gestureRecognizerResult; + return [MPPGestureRecognizerResult + gestureRecognizerResultWithHandGesturesProto:handGesturesPacket + .Get>() + handednessProto:handednessPacket + .Get>() + handLandmarksProto:handLandmarksPacket.Get< + std::vector>() + worldLandmarksProto:worldLandmarksPacket + .Get>() + timestampInMilliSeconds:timestampInMilliseconds]; } @end diff --git a/mediapipe/tasks/testdata/vision/BUILD b/mediapipe/tasks/testdata/vision/BUILD index 632e8aa4e..e2622a3c8 100644 --- a/mediapipe/tasks/testdata/vision/BUILD +++ b/mediapipe/tasks/testdata/vision/BUILD @@ -50,6 +50,7 @@ mediapipe_files(srcs = [ "face_landmarker_v2.task", "fist.jpg", "fist.png", + "gesture_recognizer.task", "hair_segmentation.tflite", "hand_landmark_full.tflite", "hand_landmark_lite.tflite", @@ -104,7 +105,6 @@ exports_files( "expected_right_down_hand_landmarks.prototxt", "expected_right_up_hand_landmarks.prototxt", "face_geometry_expected_out.pbtxt", - "gesture_recognizer.task", "portrait_expected_detection.pbtxt", "portrait_expected_face_geometry.pbtxt", "portrait_rotated_expected_detection.pbtxt", diff --git a/third_party/external_files.bzl b/third_party/external_files.bzl index 652a2947f..722ec3426 100644 --- a/third_party/external_files.bzl +++ b/third_party/external_files.bzl @@ -646,6 +646,18 @@ def external_files(): urls = ["https://storage.googleapis.com/mediapipe-assets/left_hands_rotated.jpg?generation=1666037068103465"], ) + http_file( + name = "com_google_mediapipe_leopard_bg_removal_result_png", + sha256 = "afd33f2058fd58d189cda86ec931647741a6139970c9bcbc637cdd151ec657c5", + urls = ["https://storage.googleapis.com/mediapipe-assets/leopard_bg_removal_result.png?generation=1685997278308542"], + ) + + http_file( + name = "com_google_mediapipe_leopard_jpg", + sha256 = "d66fda0aa655f87c9fe87965a642e7b33ec990a3d9ed5812f1e5513da9d7d744", + urls = ["https://storage.googleapis.com/mediapipe-assets/leopard.jpg?generation=1685997280368627"], + ) + http_file( name = "com_google_mediapipe_mobilebert_embedding_with_metadata_tflite", sha256 = "fa47142dcc6f446168bc672f2df9605b6da5d0c0d6264e9be62870282365b95c", @@ -664,6 +676,12 @@ def external_files(): urls = ["https://storage.googleapis.com/mediapipe-assets/mobilebert_with_metadata.tflite?generation=1661875806733025"], ) + http_file( + name = "com_google_mediapipe_mobile_bg_removal_tflite", + sha256 = "f85797391cd1ef03988441710781342a77a980665965771fba603e5aee940ee8", + urls = ["https://storage.googleapis.com/mediapipe-assets/mobile_bg_removal.tflite?generation=1685997284190857"], + ) + http_file( name = "com_google_mediapipe_mobile_ica_8bit-with-custom-metadata_tflite", sha256 = "31f34f0dd0dc39e69e9c3deb1e3f3278febeb82ecf57c235834348a75df8fb51",