diff --git a/mediapipe/tasks/ios/components/containers/utils/sources/MPPCategory+Helpers.h b/mediapipe/tasks/ios/components/containers/utils/sources/MPPCategory+Helpers.h index 9a11d1e29..9ad958479 100644 --- a/mediapipe/tasks/ios/components/containers/utils/sources/MPPCategory+Helpers.h +++ b/mediapipe/tasks/ios/components/containers/utils/sources/MPPCategory+Helpers.h @@ -19,8 +19,30 @@ NS_ASSUME_NONNULL_BEGIN @interface MPPCategory (Helpers) +/** + * Creates an `MPPCategory` with the given MediaPipe `Classification` proto. + * + * @param classificationProto A MediaPipe `Classification` proto. + * @return An `MPPCategory` object that with the given MediaPipe `Classification` proto. + */ + (MPPCategory *)categoryWithProto:(const ::mediapipe::Classification &)classificationProto; +/** + * Creates an `MPPCategory` with the given MediaPipe `Classification` proto and the given category + * index. The resulting `MPPCategory` is created with the given category index instead of the + * category index specified in the `Classification` proto. This method is useful for tasks like + * gesture recognizer which always returns a default index for the recognized gestures. + * + * @param classificationProto A MediaPipe `Classification` proto. + * @param index The index to be used for creating the `MPPCategory` instead of the category index + * specified in the `Classification` proto. + * + * @return An `MPPGestureRecognizerResult` object that contains the hand gesture recognition + * results. + */ ++ (MPPCategory *)categoryWithProto:(const ::mediapipe::Classification &)classificationProto + index:(NSInteger)index; + @end NS_ASSUME_NONNULL_END diff --git a/mediapipe/tasks/ios/components/containers/utils/sources/MPPCategory+Helpers.mm b/mediapipe/tasks/ios/components/containers/utils/sources/MPPCategory+Helpers.mm index 12cfa5627..542b8c41d 100644 --- a/mediapipe/tasks/ios/components/containers/utils/sources/MPPCategory+Helpers.mm +++ b/mediapipe/tasks/ios/components/containers/utils/sources/MPPCategory+Helpers.mm @@ -21,7 +21,8 @@ using ClassificationProto = ::mediapipe::Classification; @implementation MPPCategory (Helpers) -+ (MPPCategory *)categoryWithProto:(const ClassificationProto &)classificationProto { ++ (MPPCategory *)categoryWithProto:(const ClassificationProto &)classificationProto + index:(NSInteger)index { NSString *categoryName; NSString *displayName; @@ -33,10 +34,14 @@ using ClassificationProto = ::mediapipe::Classification; displayName = [NSString stringWithCppString:classificationProto.display_name()]; } - return [[MPPCategory alloc] initWithIndex:classificationProto.index() + return [[MPPCategory alloc] initWithIndex:index score:classificationProto.score() categoryName:categoryName displayName:displayName]; } ++ (MPPCategory *)categoryWithProto:(const ClassificationProto &)classificationProto { + return [MPPCategory categoryWithProto:classificationProto index:classificationProto.index()]; +} + @end diff --git a/mediapipe/tasks/ios/test/vision/gesture_recognizer/BUILD b/mediapipe/tasks/ios/test/vision/gesture_recognizer/BUILD new file mode 100644 index 000000000..5be17a26c --- /dev/null +++ b/mediapipe/tasks/ios/test/vision/gesture_recognizer/BUILD @@ -0,0 +1,62 @@ +load("@build_bazel_rules_apple//apple:ios.bzl", "ios_unit_test") +load( + "//mediapipe/framework/tool:ios.bzl", + "MPP_TASK_MINIMUM_OS_VERSION", +) +load( + "@org_tensorflow//tensorflow/lite:special_rules.bzl", + "tflite_ios_lab_runner", +) + +package(default_visibility = ["//mediapipe/tasks:internal"]) + +licenses(["notice"]) + +# Default tags for filtering iOS targets. Targets are restricted to Apple platforms. +TFL_DEFAULT_TAGS = [ + "apple", +] + +# Following sanitizer tests are not supported by iOS test targets. +TFL_DISABLED_SANITIZER_TAGS = [ + "noasan", + "nomsan", + "notsan", +] + +objc_library( + name = "MPPGestureRecognizerObjcTestLibrary", + testonly = 1, + srcs = ["MPPGestureRecognizerTests.m"], + copts = [ + "-ObjC++", + "-std=c++17", + "-x objective-c++", + ], + data = [ + "//mediapipe/tasks/testdata/vision:test_images", + "//mediapipe/tasks/testdata/vision:gesture_recognizer.task", + "//mediapipe/tasks/testdata/vision:test_protos", + ], + deps = [ + "//mediapipe/tasks/ios/common:MPPCommon", + "//mediapipe/tasks/ios/test/vision/utils:MPPImageTestUtils", + "//mediapipe/tasks/ios/vision/gesture_recognizer:MPPGestureRecognizer", + "//mediapipe/tasks/ios/test/vision/gesture_recognizer/utils:MPPGestureRecognizerResultProtoHelpers", + ] + select({ + "//third_party:opencv_ios_sim_arm64_source_build": ["@ios_opencv_source//:opencv_xcframework"], + "//third_party:opencv_ios_arm64_source_build": ["@ios_opencv_source//:opencv_xcframework"], + "//third_party:opencv_ios_x86_64_source_build": ["@ios_opencv_source//:opencv_xcframework"], + "//conditions:default": ["@ios_opencv//:OpencvFramework"], + }), +) + +ios_unit_test( + name = "MPPGestureRecognizerObjcTest", + minimum_os_version = MPP_TASK_MINIMUM_OS_VERSION, + runner = tflite_ios_lab_runner("IOS_LATEST"), + tags = TFL_DEFAULT_TAGS + TFL_DISABLED_SANITIZER_TAGS, + deps = [ + ":MPPGestureRecognizerObjcTestLibrary", + ], +) diff --git a/mediapipe/tasks/ios/test/vision/gesture_recognizer/MPPGestureRecognizerTests.m b/mediapipe/tasks/ios/test/vision/gesture_recognizer/MPPGestureRecognizerTests.m new file mode 100644 index 000000000..1a48322b4 --- /dev/null +++ b/mediapipe/tasks/ios/test/vision/gesture_recognizer/MPPGestureRecognizerTests.m @@ -0,0 +1,287 @@ +// Copyright 2023 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#import + +#import "mediapipe/tasks/ios/common/sources/MPPCommon.h" +#import "mediapipe/tasks/ios/test/vision/gesture_recognizer/utils/sources/MPPGestureRecognizerResult+ProtoHelpers.h" +#import "mediapipe/tasks/ios/test/vision/utils/sources/MPPImage+TestUtils.h" +#import "mediapipe/tasks/ios/vision/gesture_recognizer/sources/MPPGestureRecognizer.h" + +static NSDictionary *const kGestureRecognizerBundleAssetFile = + @{@"name" : @"gesture_recognizer", @"type" : @"task"}; + +static NSDictionary *const kTwoHandsImage = @{@"name" : @"right_hands", @"type" : @"jpg"}; +static NSDictionary *const kFistImage = @{@"name" : @"fist", @"type" : @"jpg"}; +static NSDictionary *const kNoHandsImage = @{@"name" : @"cats_and_dogs", @"type" : @"jpg"}; +static NSDictionary *const kThumbUpImage = @{@"name" : @"thumb_up", @"type" : @"jpg"}; +static NSDictionary *const kPointingUpRotatedImage = + @{@"name" : @"pointing_up_rotated", @"type" : @"jpg"}; + +static NSDictionary *const kExpectedFistLandmarksFile = + @{@"name" : @"fist_landmarks", @"type" : @"pbtxt"}; +static NSDictionary *const kExpectedThumbUpLandmarksFile = + @{@"name" : @"thumb_up_landmarks", @"type" : @"pbtxt"}; + +static NSString *const kFistLabel = @"Closed_Fist"; +static NSString *const kExpectedThumbUpLabel = @"Thumb_Up"; +static NSString *const kExpectedPointingUpLabel = @"Pointing_Up"; +static NSString *const kRockLabel = @"Rock"; + +static const NSInteger kGestureExpectedIndex = -1; + +static NSString *const kExpectedErrorDomain = @"com.google.mediapipe.tasks"; +static const float kLandmarksErrorTolerance = 0.03f; + +#define AssertEqualErrors(error, expectedError) \ + XCTAssertNotNil(error); \ + XCTAssertEqualObjects(error.domain, expectedError.domain); \ + XCTAssertEqual(error.code, expectedError.code); \ + XCTAssertNotEqual( \ + [error.localizedDescription rangeOfString:expectedError.localizedDescription].location, \ + NSNotFound) + +#define AssertEqualGestures(gesture, expectedGesture, handIndex, gestureIndex) \ + XCTAssertEqual(gesture.index, kGestureExpectedIndex, @"hand index = %d gesture index j = %d", \ + handIndex, gestureIndex); \ + XCTAssertEqualObjects(gesture.categoryName, expectedGesture.categoryName, \ + @"hand index = %d gesture index j = %d", handIndex, gestureIndex); + +#define AssertApproximatelyEqualLandmarks(landmark, expectedLandmark, handIndex, landmarkIndex) \ + XCTAssertEqualWithAccuracy(landmark.x, expectedLandmark.x, kLandmarksErrorTolerance, \ + @"hand index = %d landmark index j = %d", handIndex, landmarkIndex); \ + XCTAssertEqualWithAccuracy(landmark.y, expectedLandmark.y, kLandmarksErrorTolerance, \ + @"hand index = %d landmark index j = %d", handIndex, landmarkIndex); + +#define AssertApproximatelyEqualMultiHandLandmarks(multiHandLandmarks, expectedMultiHandLandmars) \ + XCTAssertEqual(multiHandLandmarks.count, expectedMultiHandLandmars.count) \ + XCTAssertEqualWithAccuracy(landmark.x, expectedLandmark.x, kLandmarksErrorTolerance, \ + @"hand index = %d landmark index j = %d", handIndex, handIndex); \ + XCTAssertEqualWithAccuracy(landmark.y, expectedLandmark.y, kLandmarksErrorTolerance, \ + @"hand index = %d landmark index j = %d", handIndex, handIndex); + +#define AssertGestureRecognizerResultIsEmpty(gestureRecognizerResult) \ + XCTAssertTrue(gestureRecognizerResult.gestures.count == 0); \ + XCTAssertTrue(gestureRecognizerResult.handedness.count == 0); \ + XCTAssertTrue(gestureRecognizerResult.landmarks.count == 0); \ + XCTAssertTrue(gestureRecognizerResult.worldLandmarks.count == 0); + +@interface MPPGestureRecognizerTests : XCTestCase +@end + +@implementation MPPGestureRecognizerTests + +#pragma mark Results + ++ (MPPGestureRecognizerResult *)emptyGestureRecognizerResult { + return [[MPPGestureRecognizerResult alloc] initWithGestures:@[] + handedness:@[] + landmarks:@[] + worldLandmarks:@[] + timestampInMilliseconds:0]; +} + ++ (MPPGestureRecognizerResult *)thumbUpGestureRecognizerResult { + NSString *filePath = + [MPPGestureRecognizerTests filePathWithFileInfo:kExpectedThumbUpLandmarksFile]; + + return [MPPGestureRecognizerResult + gestureRecognizerResultsFromTextEncodedProtobufFileWithName:filePath + gestureLabel:kExpectedThumbUpLabel + shouldRemoveZPosition:YES]; +} + ++ (MPPGestureRecognizerResult *)fistGestureRecognizerResultWithLabel:(NSString *)gestureLabel { + NSString *filePath = [MPPGestureRecognizerTests filePathWithFileInfo:kExpectedFistLandmarksFile]; + + return [MPPGestureRecognizerResult + gestureRecognizerResultsFromTextEncodedProtobufFileWithName:filePath + gestureLabel:gestureLabel + shouldRemoveZPosition:YES]; +} + +- (void)assertMultiHandLandmarks:(NSArray *> *)multiHandLandmarks + isApproximatelyEqualToExpectedMultiHandLandmarks: + (NSArray *> *)expectedMultiHandLandmarks { + XCTAssertEqual(multiHandLandmarks.count, expectedMultiHandLandmarks.count); + if (multiHandLandmarks.count == 0) { + return; + } + + NSArray *topHandLandmarks = multiHandLandmarks[0]; + NSArray *expectedTopHandLandmarks = expectedMultiHandLandmarks[0]; + + XCTAssertEqual(topHandLandmarks.count, expectedTopHandLandmarks.count); + for (int i = 0; i < expectedTopHandLandmarks.count; i++) { + MPPNormalizedLandmark *landmark = topHandLandmarks[i]; + XCTAssertNotNil(landmark); + AssertApproximatelyEqualLandmarks(landmark, expectedTopHandLandmarks[i], 0, i); + } +} + +- (void)assertMultiHandWorldLandmarks:(NSArray *> *)multiHandWorldLandmarks + isApproximatelyEqualToExpectedMultiHandWorldLandmarks: + (NSArray *> *)expectedMultiHandWorldLandmarks { + XCTAssertEqual(multiHandWorldLandmarks.count, expectedMultiHandWorldLandmarks.count); + if (expectedMultiHandWorldLandmarks.count == 0) { + return; + } + + NSArray *topHandWorldLandmarks = multiHandWorldLandmarks[0]; + NSArray *expectedTopHandWorldLandmarks = expectedMultiHandWorldLandmarks[0]; + + XCTAssertEqual(topHandWorldLandmarks.count, expectedTopHandWorldLandmarks.count); + for (int i = 0; i < expectedTopHandWorldLandmarks.count; i++) { + MPPLandmark *landmark = topHandWorldLandmarks[i]; + XCTAssertNotNil(landmark); + AssertApproximatelyEqualLandmarks(landmark, expectedTopHandWorldLandmarks[i], 0, i); + } +} + +- (void)assertMultiHandGestures:(NSArray *> *)multiHandGestures + isApproximatelyEqualToExpectedMultiHandGestures: + (NSArray *> *)expectedMultiHandGestures { + XCTAssertEqual(multiHandGestures.count, expectedMultiHandGestures.count); + if (multiHandGestures.count == 0) { + return; + } + + NSArray *topHandGestures = multiHandGestures[0]; + NSArray *expectedTopHandGestures = expectedMultiHandGestures[0]; + + XCTAssertEqual(topHandGestures.count, expectedTopHandGestures.count); + for (int i = 0; i < expectedTopHandGestures.count; i++) { + MPPCategory *gesture = topHandGestures[i]; + XCTAssertNotNil(gesture); + AssertEqualGestures(gesture, expectedTopHandGestures[i], 0, i); + } +} + +- (void)assertGestureRecognizerResult:(MPPGestureRecognizerResult *)gestureRecognizerResult + isApproximatelyEqualToExpectedResult: + (MPPGestureRecognizerResult *)expectedGestureRecognizerResult { + [self assertMultiHandLandmarks:gestureRecognizerResult.landmarks + isApproximatelyEqualToExpectedMultiHandLandmarks:expectedGestureRecognizerResult.landmarks]; + [self assertMultiHandWorldLandmarks:gestureRecognizerResult.worldLandmarks + isApproximatelyEqualToExpectedMultiHandWorldLandmarks:expectedGestureRecognizerResult + .worldLandmarks]; + [self assertMultiHandGestures:gestureRecognizerResult.gestures + isApproximatelyEqualToExpectedMultiHandGestures:expectedGestureRecognizerResult.gestures]; +} + +#pragma mark File + ++ (NSString *)filePathWithFileInfo:(NSDictionary *)fileInfo { + NSString *filePath = [MPPGestureRecognizerTests filePathWithName:fileInfo[@"name"] + extension:fileInfo[@"type"]]; + return filePath; +} + ++ (NSString *)filePathWithName:(NSString *)fileName extension:(NSString *)extension { + NSString *filePath = [[NSBundle bundleForClass:self.class] pathForResource:fileName + ofType:extension]; + return filePath; +} + +#pragma mark Gesture Recognizer Initializers + +- (MPPGestureRecognizerOptions *)gestureRecognizerOptionsWithModelFileInfo: + (NSDictionary *)modelFileInfo { + NSString *modelPath = [MPPGestureRecognizerTests filePathWithFileInfo:modelFileInfo]; + MPPGestureRecognizerOptions *gestureRecognizerOptions = + [[MPPGestureRecognizerOptions alloc] init]; + gestureRecognizerOptions.baseOptions.modelAssetPath = modelPath; + + return gestureRecognizerOptions; +} + +- (MPPGestureRecognizer *)createGestureRecognizerWithOptionsSucceeds: + (MPPGestureRecognizerOptions *)gestureRecognizerOptions { + MPPGestureRecognizer *gestureRecognizer = + [[MPPGestureRecognizer alloc] initWithOptions:gestureRecognizerOptions error:nil]; + XCTAssertNotNil(gestureRecognizer); + + return gestureRecognizer; +} + +- (void)assertCreateGestureRecognizerWithOptions: + (MPPGestureRecognizerOptions *)gestureRecognizerOptions + failsWithExpectedError:(NSError *)expectedError { + NSError *error = nil; + MPPGestureRecognizer *gestureRecognizer = + [[MPPGestureRecognizer alloc] initWithOptions:gestureRecognizerOptions error:&error]; + + XCTAssertNil(gestureRecognizer); + AssertEqualErrors(error, expectedError); +} + +#pragma mark Assert Gesture Recognizer Results + +- (MPPImage *)imageWithFileInfo:(NSDictionary *)fileInfo { + MPPImage *image = [MPPImage imageFromBundleWithClass:[MPPGestureRecognizerTests class] + fileName:fileInfo[@"name"] + ofType:fileInfo[@"type"]]; + XCTAssertNotNil(image); + + return image; +} + +- (MPPImage *)imageWithFileInfo:(NSDictionary *)fileInfo + orientation:(UIImageOrientation)orientation { + MPPImage *image = [MPPImage imageFromBundleWithClass:[MPPGestureRecognizerTests class] + fileName:fileInfo[@"name"] + ofType:fileInfo[@"type"] + orientation:orientation]; + XCTAssertNotNil(image); + + return image; +} + +- (MPPGestureRecognizerResult *)recognizeImageWithFileInfo:(NSDictionary *)imageFileInfo + usingGestureRecognizer: + (MPPGestureRecognizer *)gestureRecognizer { + MPPImage *mppImage = [self imageWithFileInfo:imageFileInfo]; + MPPGestureRecognizerResult *gestureRecognizerResult = [gestureRecognizer recognizeImage:mppImage + error:nil]; + XCTAssertNotNil(gestureRecognizerResult); + + return gestureRecognizerResult; +} + +- (void)assertResultsOfRecognizeImageWithFileInfo:(NSDictionary *)fileInfo + usingGestureRecognizer:(MPPGestureRecognizer *)gestureRecognizer + approximatelyEqualsGestureRecognizerResult: + (MPPGestureRecognizerResult *)expectedGestureRecognizerResult { + MPPGestureRecognizerResult *gestureRecognizerResult = + [self recognizeImageWithFileInfo:fileInfo usingGestureRecognizer:gestureRecognizer]; + [self assertGestureRecognizerResult:gestureRecognizerResult + isApproximatelyEqualToExpectedResult:expectedGestureRecognizerResult]; +} + +#pragma mark General Tests + +- (void)testRecognizeWithModelPathSucceeds { + NSString *modelPath = + [MPPGestureRecognizerTests filePathWithFileInfo:kGestureRecognizerBundleAssetFile]; + MPPGestureRecognizer *gestureRecognizer = + [[MPPGestureRecognizer alloc] initWithModelPath:modelPath error:nil]; + XCTAssertNotNil(gestureRecognizer); + + [self assertResultsOfRecognizeImageWithFileInfo:kThumbUpImage + usingGestureRecognizer:gestureRecognizer + approximatelyEqualsGestureRecognizerResult:[MPPGestureRecognizerTests + thumbUpGestureRecognizerResult]]; +} + +@end diff --git a/mediapipe/tasks/ios/test/vision/gesture_recognizer/utils/BUILD b/mediapipe/tasks/ios/test/vision/gesture_recognizer/utils/BUILD new file mode 100644 index 000000000..ddac21ed2 --- /dev/null +++ b/mediapipe/tasks/ios/test/vision/gesture_recognizer/utils/BUILD @@ -0,0 +1,21 @@ +package(default_visibility = ["//mediapipe/tasks:internal"]) + +licenses(["notice"]) + +objc_library( + name = "MPPGestureRecognizerResultProtoHelpers", + srcs = ["sources/MPPGestureRecognizerResult+ProtoHelpers.mm"], + hdrs = ["sources/MPPGestureRecognizerResult+ProtoHelpers.h"], + copts = [ + "-ObjC++", + "-std=c++17", + "-x objective-c++", + ], + deps = [ + "//mediapipe/tasks/ios/test/vision/utils:parse_proto_utils", + "//mediapipe/framework/formats:classification_cc_proto", + "//mediapipe/tasks/cc/components/containers/proto:landmarks_detection_result_cc_proto", + "//mediapipe/tasks/ios/vision/gesture_recognizer/utils:MPPGestureRecognizerResultHelpers", + "//mediapipe/tasks/ios/common/utils:NSStringHelpers", + ], +) diff --git a/mediapipe/tasks/ios/test/vision/gesture_recognizer/utils/sources/MPPGestureRecognizerResult+ProtoHelpers.h b/mediapipe/tasks/ios/test/vision/gesture_recognizer/utils/sources/MPPGestureRecognizerResult+ProtoHelpers.h new file mode 100644 index 000000000..6bb2e5182 --- /dev/null +++ b/mediapipe/tasks/ios/test/vision/gesture_recognizer/utils/sources/MPPGestureRecognizerResult+ProtoHelpers.h @@ -0,0 +1,28 @@ +// Copyright 2022 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#import +#import "mediapipe/tasks/ios/vision/gesture_recognizer/sources/MPPGestureRecognizerResult.h" + +NS_ASSUME_NONNULL_BEGIN +@interface MPPGestureRecognizerResult (ProtoHelpers) + ++ (MPPGestureRecognizerResult *) + gestureRecognizerResultsFromTextEncodedProtobufFileWithName:(NSString *)fileName + gestureLabel:(NSString *)gestureLabel + shouldRemoveZPosition:(BOOL)removeZPosition; + +@end + +NS_ASSUME_NONNULL_END diff --git a/mediapipe/tasks/ios/test/vision/gesture_recognizer/utils/sources/MPPGestureRecognizerResult+ProtoHelpers.mm b/mediapipe/tasks/ios/test/vision/gesture_recognizer/utils/sources/MPPGestureRecognizerResult+ProtoHelpers.mm new file mode 100644 index 000000000..39002eb7e --- /dev/null +++ b/mediapipe/tasks/ios/test/vision/gesture_recognizer/utils/sources/MPPGestureRecognizerResult+ProtoHelpers.mm @@ -0,0 +1,66 @@ +// Copyright 2023 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#import "mediapipe/tasks/ios/test/vision/gesture_recognizer/utils/sources/MPPGestureRecognizerResult+ProtoHelpers.h" + +#import "mediapipe/tasks/ios/common/utils/sources/NSString+Helpers.h" +#import "mediapipe/tasks/ios/vision/gesture_recognizer/utils/sources/MPPGestureRecognizerResult+Helpers.h" + +#include "mediapipe/framework/formats/classification.pb.h" +#include "mediapipe/tasks/cc/components/containers/proto/landmarks_detection_result.pb.h" +#include "mediapipe/tasks/ios/test/vision/utils/sources/parse_proto_utils.h" + +namespace { +using ClassificationListProto = ::mediapipe::ClassificationList; +using ClassificationProto = ::mediapipe::Classification; +using LandmarksDetectionResultProto = + ::mediapipe::tasks::containers::proto::LandmarksDetectionResult; +using ::mediapipe::tasks::ios::test::vision::utils::get_proto_from_pbtxt; +} // namespace + +@implementation MPPGestureRecognizerResult (ProtoHelpers) + ++ (MPPGestureRecognizerResult *) + gestureRecognizerResultsFromTextEncodedProtobufFileWithName:(NSString *)fileName + gestureLabel:(NSString *)gestureLabel + shouldRemoveZPosition:(BOOL)removeZPosition { + LandmarksDetectionResultProto landmarkDetectionResultProto; + + if (!get_proto_from_pbtxt(fileName.cppString, landmarkDetectionResultProto).ok()) { + return nil; + } + + if (removeZPosition) { + // Remove z position of landmarks, because they are not used in correctness + // testing. For video or live stream mode, the z positions varies a lot during + // tracking from frame to frame. + for (int i = 0; i < landmarkDetectionResultProto.landmarks().landmark().size(); i++) { + auto &landmark = *landmarkDetectionResultProto.mutable_landmarks()->mutable_landmark(i); + landmark.clear_z(); + } + } + + ClassificationListProto gesturesProto; + ClassificationProto *classificationProto = gesturesProto.add_classification(); + classificationProto->set_label([gestureLabel UTF8String]); + + return [MPPGestureRecognizerResult + gestureRecognizerResultWithHandGesturesProto:{gesturesProto} + handednessroto:{landmarkDetectionResultProto.classifications()} + handLandmarksPacket:{landmarkDetectionResultProto.landmarks()} + worldLandmarksPacket:{landmarkDetectionResultProto.world_landmarks()} + timestampInMilliSeconds:0]; +} + +@end diff --git a/mediapipe/tasks/ios/vision/core/sources/MPPVisionTaskRunner.h b/mediapipe/tasks/ios/vision/core/sources/MPPVisionTaskRunner.h index 318b24051..a79302446 100644 --- a/mediapipe/tasks/ios/vision/core/sources/MPPVisionTaskRunner.h +++ b/mediapipe/tasks/ios/vision/core/sources/MPPVisionTaskRunner.h @@ -58,38 +58,57 @@ NS_ASSUME_NONNULL_BEGIN error:(NSError **)error NS_DESIGNATED_INITIALIZER; /** - * Creates a `NormalizedRect` from a region of interest and an image orientation, performing - * sanity checks on-the-fly. - * If the input region of interest equals `CGRectZero`, returns a default `NormalizedRect` covering - * the whole image with rotation set according `imageOrientation`. If `ROIAllowed` is NO, an error - * will be returned if the input region of interest is not equal to `CGRectZero`. Mirrored - * orientations (`UIImageOrientationUpMirrored`,`UIImageOrientationDownMirrored`, + * Creates a `NormalizedRect` from image orientation for a task which does not support roi, + * performing sanity checks on-the-fly. Mirrored orientations + * (`UIImageOrientationUpMirrored`,`UIImageOrientationDownMirrored`, * `UIImageOrientationLeftMirrored`,`UIImageOrientationRightMirrored`) are not supported. An error * will be returned if `imageOrientation` is equal to any one of them. * - * @param roi A `CGRect` specifying the region of interest. If the input region of interest equals - * `CGRectZero`, the returned `NormalizedRect` covers the whole image. Make sure that `roi` equals - * `CGRectZero` if `ROIAllowed` is NO. Otherwise, an error will be returned. - * @param imageSize A `CGSize` specifying the size of the image within which normalized rect is - * calculated. * @param imageOrientation A `UIImageOrientation` indicating the rotation to be applied to the * image. The resulting `NormalizedRect` will convert the `imageOrientation` to degrees clockwise. * Mirrored orientations (`UIImageOrientationUpMirrored`, `UIImageOrientationDownMirrored`, * `UIImageOrientationLeftMirrored`, `UIImageOrientationRightMirrored`) are not supported. An error * will be returned if `imageOrientation` is equal to any one of them. - * @param ROIAllowed Indicates if the `roi` field is allowed to be a value other than `CGRectZero`. + * @param imageSize A `CGSize` specifying the size of the image within which normalized rect is + * calculated. + * @param error Pointer to the memory location where errors if any should be saved. If @c NULL, no + * error will be saved. + * + * @return An optional `NormalizedRect` from the given region of interest and image orientation. + */ +- (std::optional)normalizedRectWithImageOrientation: + (UIImageOrientation)imageOrientation + imageSize:(CGSize)imageSize + error:(NSError **)error; + +/** + * Creates a `NormalizedRect` from roi and image orientation for a task which supports roi, + * performing sanity checks on-the-fly. If the input region of interest equals `CGRectZero`, returns + * a default `NormalizedRect` covering the whole image with rotation set according + * `imageOrientation`. Mirrored orientations + * (`UIImageOrientationUpMirrored`,`UIImageOrientationDownMirrored`, + * `UIImageOrientationLeftMirrored`,`UIImageOrientationRightMirrored`) are not supported. An error + * will be returned if `imageOrientation` is equal to any one of them. + * + * @param roi A `CGRect` specifying the region of interest. If the input region of interest equals + * `CGRectZero`, the returned `NormalizedRect` covers the whole image. + * @param imageOrientation A `UIImageOrientation` indicating the rotation to be applied to the + * image. The resulting `NormalizedRect` will convert the `imageOrientation` to degrees clockwise. + * Mirrored orientations (`UIImageOrientationUpMirrored`, `UIImageOrientationDownMirrored`, + * `UIImageOrientationLeftMirrored`, `UIImageOrientationRightMirrored`) are not supported. An error + * will be returned if `imageOrientation` is equal to any one of them. + * @param imageSize A `CGSize` specifying the size of the image within which normalized rect is + * calculated. * @param error Pointer to the memory location where errors if any should be saved. If @c NULL, no * error will be saved. * * @return An optional `NormalizedRect` from the given region of interest and image orientation. */ - (std::optional) - normalizedRectFromRegionOfInterest:(CGRect)roi - imageSize:(CGSize)imageSize + normalizedRectWithRegionOfInterest:(CGRect)roi imageOrientation:(UIImageOrientation)imageOrientation - ROIAllowed:(BOOL)ROIAllowed + imageSize:(CGSize)imageSize error:(NSError **)error; - /** * A synchronous method to invoke the C++ task runner to process single image inputs. The call * blocks the current thread until a failure status or a successful result is returned. diff --git a/mediapipe/tasks/ios/vision/core/sources/MPPVisionTaskRunner.mm b/mediapipe/tasks/ios/vision/core/sources/MPPVisionTaskRunner.mm index 0089e516f..c1b5d0587 100644 --- a/mediapipe/tasks/ios/vision/core/sources/MPPVisionTaskRunner.mm +++ b/mediapipe/tasks/ios/vision/core/sources/MPPVisionTaskRunner.mm @@ -91,7 +91,30 @@ static NSString *const kTaskPrefix = @"com.mediapipe.tasks.vision"; return self; } -- (std::optional)normalizedRectFromRegionOfInterest:(CGRect)roi +- (std::optional)normalizedRectWithRegionOfInterest:(CGRect)roi + imageOrientation: + (UIImageOrientation)imageOrientation + imageSize:(CGSize)imageSize + error:(NSError **)error { + return [self normalizedRectWithRegionOfInterest:roi + imageSize:imageSize + imageOrientation:imageOrientation + ROIAllowed:YES + error:error]; +} + +- (std::optional)normalizedRectWithImageOrientation: + (UIImageOrientation)imageOrientation + imageSize:(CGSize)imageSize + error:(NSError **)error { + return [self normalizedRectWithRegionOfInterest:CGRectZero + imageSize:imageSize + imageOrientation:imageOrientation + ROIAllowed:NO + error:error]; +} + +- (std::optional)normalizedRectWithRegionOfInterest:(CGRect)roi imageSize:(CGSize)imageSize imageOrientation: (UIImageOrientation)imageOrientation diff --git a/mediapipe/tasks/ios/vision/gesture_recognizer/BUILD b/mediapipe/tasks/ios/vision/gesture_recognizer/BUILD index efe33718f..78a07e17d 100644 --- a/mediapipe/tasks/ios/vision/gesture_recognizer/BUILD +++ b/mediapipe/tasks/ios/vision/gesture_recognizer/BUILD @@ -41,6 +41,7 @@ objc_library( objc_library( name = "MPPGestureRecognizer", + srcs = ["sources/MPPGestureRecognizer.mm"], hdrs = ["sources/MPPGestureRecognizer.h"], copts = [ "-ObjC++", @@ -51,7 +52,15 @@ objc_library( deps = [ ":MPPGestureRecognizerOptions", ":MPPGestureRecognizerResult", + "//mediapipe/tasks/cc/vision/gesture_recognizer:gesture_recognizer_graph", + "//mediapipe/tasks/ios/common/utils:MPPCommonUtils", + "//mediapipe/tasks/ios/common/utils:NSStringHelpers", + "//mediapipe/tasks/ios/core:MPPTaskInfo", "//mediapipe/tasks/ios/core:MPPTaskOptions", "//mediapipe/tasks/ios/vision/core:MPPImage", + "//mediapipe/tasks/ios/vision/core:MPPVisionPacketCreator", + "//mediapipe/tasks/ios/vision/core:MPPVisionTaskRunner", + "//mediapipe/tasks/ios/vision/gesture_recognizer/utils:MPPGestureRecognizerOptionsHelpers", + "//mediapipe/tasks/ios/vision/gesture_recognizer/utils:MPPGestureRecognizerResultHelpers", ], ) diff --git a/mediapipe/tasks/ios/vision/gesture_recognizer/sources/MPPGestureRecognizer.mm b/mediapipe/tasks/ios/vision/gesture_recognizer/sources/MPPGestureRecognizer.mm new file mode 100644 index 000000000..b722163b2 --- /dev/null +++ b/mediapipe/tasks/ios/vision/gesture_recognizer/sources/MPPGestureRecognizer.mm @@ -0,0 +1,273 @@ +// Copyright 2023 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#import "mediapipe/tasks/ios/vision/gesture_recognizer/sources/MPPGestureRecognizer.h" + +#import "mediapipe/tasks/ios/common/utils/sources/MPPCommonUtils.h" +#import "mediapipe/tasks/ios/common/utils/sources/NSString+Helpers.h" +#import "mediapipe/tasks/ios/core/sources/MPPTaskInfo.h" +#import "mediapipe/tasks/ios/vision/core/sources/MPPVisionPacketCreator.h" +#import "mediapipe/tasks/ios/vision/core/sources/MPPVisionTaskRunner.h" +#import "mediapipe/tasks/ios/vision/gesture_recognizer/utils/sources/MPPGestureRecognizerOptions+Helpers.h" +#import "mediapipe/tasks/ios/vision/gesture_recognizer/utils/sources/MPPGestureRecognizerResult+Helpers.h" + +namespace { +using ::mediapipe::NormalizedRect; +using ::mediapipe::Packet; +using ::mediapipe::Timestamp; +using ::mediapipe::tasks::core::PacketMap; +using ::mediapipe::tasks::core::PacketsCallback; +} // namespace + +static NSString *const kImageTag = @"IMAGE"; +static NSString *const kImageInStreamName = @"image_in"; +static NSString *const kNormRectTag = @"NORM_RECT"; +static NSString *const kNormRectInStreamName = @"norm_rect_in"; +static NSString *const kImageOutStreamName = @"image_out"; +static NSString *const kLandmarksTag = @"LANDMARKS"; +static NSString *const kLandmarksOutStreamName = @"hand_landmarks"; +static NSString *const kWorldLandmarksTag = @"WORLD_LANDMARKS"; +static NSString *const kWorldLandmarksOutStreamName = @"world_hand_landmarks"; +static NSString *const kHandednessTag = @"HANDEDNESS"; +static NSString *const kHandednessOutStreamName = @"handedness"; +static NSString *const kHandGesturesTag = @"HAND_GESTURES"; +static NSString *const kHandGesturesOutStreamName = @"hand_gestures"; +static NSString *const kTaskGraphName = + @"mediapipe.tasks.vision.gesture_recognizer.GestureRecognizerGraph"; +static NSString *const kTaskName = @"gestureRecognizer"; + +#define InputPacketMap(imagePacket, normalizedRectPacket) \ + { \ + {kImageInStreamName.cppString, imagePacket}, { \ + kNormRectInStreamName.cppString, normalizedRectPacket \ + } \ + } + +@interface MPPGestureRecognizer () { + /** iOS Vision Task Runner */ + MPPVisionTaskRunner *_visionTaskRunner; + dispatch_queue_t _callbackQueue; +} +@property(nonatomic, weak) id + gestureRecognizerLiveStreamDelegate; +@end + +@implementation MPPGestureRecognizer + +- (nullable MPPGestureRecognizerResult *)gestureRecognizerResultWithOutputPacketMap: + (PacketMap &)outputPacketMap { + return [MPPGestureRecognizerResult + gestureRecognizerResultWithHandGesturesPacket:outputPacketMap[kHandGesturesOutStreamName + .cppString] + handednessPacket:outputPacketMap[kHandednessOutStreamName + .cppString] + handLandmarksPacket:outputPacketMap[kLandmarksOutStreamName + .cppString] + worldLandmarksPacket:outputPacketMap[kWorldLandmarksOutStreamName + .cppString]]; +} + +- (void)processLiveStreamResult:(absl::StatusOr)liveStreamResult { + if (![self.gestureRecognizerLiveStreamDelegate + respondsToSelector:@selector(gestureRecognizer: + didFinishRecognitionWithResult:timestampInMilliseconds:error:)]) { + return; + } + + NSError *callbackError = nil; + if (![MPPCommonUtils checkCppError:liveStreamResult.status() toError:&callbackError]) { + dispatch_async(_callbackQueue, ^{ + [self.gestureRecognizerLiveStreamDelegate gestureRecognizer:self + didFinishRecognitionWithResult:nil + timestampInMilliseconds:Timestamp::Unset().Value() + error:callbackError]; + }); + return; + } + + PacketMap &outputPacketMap = liveStreamResult.value(); + if (outputPacketMap[kImageOutStreamName.cppString].IsEmpty()) { + return; + } + + MPPGestureRecognizerResult *result = + [self gestureRecognizerResultWithOutputPacketMap:outputPacketMap]; + + NSInteger timeStampInMilliseconds = + outputPacketMap[kImageOutStreamName.cppString].Timestamp().Value() / + kMicroSecondsPerMilliSecond; + dispatch_async(_callbackQueue, ^{ + [self.gestureRecognizerLiveStreamDelegate gestureRecognizer:self + didFinishRecognitionWithResult:result + timestampInMilliseconds:timeStampInMilliseconds + error:callbackError]; + }); +} + +- (instancetype)initWithOptions:(MPPGestureRecognizerOptions *)options error:(NSError **)error { + self = [super init]; + if (self) { + MPPTaskInfo *taskInfo = [[MPPTaskInfo alloc] + initWithTaskGraphName:kTaskGraphName + inputStreams:@[ + [NSString stringWithFormat:@"%@:%@", kImageTag, kImageInStreamName], + [NSString stringWithFormat:@"%@:%@", kNormRectTag, kNormRectInStreamName] + ] + outputStreams:@[ + [NSString stringWithFormat:@"%@:%@", kLandmarksTag, kLandmarksOutStreamName], + [NSString + stringWithFormat:@"%@:%@", kWorldLandmarksTag, kWorldLandmarksOutStreamName], + [NSString stringWithFormat:@"%@:%@", kHandednessTag, kHandednessOutStreamName], + [NSString + stringWithFormat:@"%@:%@", kHandGesturesTag, kHandGesturesOutStreamName], + [NSString stringWithFormat:@"%@:%@", kImageTag, kImageOutStreamName] + ] + taskOptions:options + enableFlowLimiting:options.runningMode == MPPRunningModeLiveStream + error:error]; + + if (!taskInfo) { + return nil; + } + + PacketsCallback packetsCallback = nullptr; + + if (options.gestureRecognizerLiveStreamDelegate) { + _gestureRecognizerLiveStreamDelegate = options.gestureRecognizerLiveStreamDelegate; + + // Create a private serial dispatch queue in which the deleagte method will be called + // asynchronously. This is to ensure that if the client performs a long running operation in + // the delegate method, the queue on which the C++ callbacks is invoked is not blocked and is + // freed up to continue with its operations. + _callbackQueue = dispatch_queue_create( + [MPPVisionTaskRunner uniqueDispatchQueueNameWithSuffix:kTaskName], NULL); + + // Capturing `self` as weak in order to avoid `self` being kept in memory + // and cause a retain cycle, after self is set to `nil`. + MPPGestureRecognizer *__weak weakSelf = self; + packetsCallback = [=](absl::StatusOr liveStreamResult) { + [weakSelf processLiveStreamResult:liveStreamResult]; + }; + } + + _visionTaskRunner = + [[MPPVisionTaskRunner alloc] initWithCalculatorGraphConfig:[taskInfo generateGraphConfig] + runningMode:options.runningMode + packetsCallback:std::move(packetsCallback) + error:error]; + if (!_visionTaskRunner) { + return nil; + } + } + return self; +} + +- (instancetype)initWithModelPath:(NSString *)modelPath error:(NSError **)error { + MPPGestureRecognizerOptions *options = [[MPPGestureRecognizerOptions alloc] init]; + + options.baseOptions.modelAssetPath = modelPath; + + return [self initWithOptions:options error:error]; +} + +- (nullable MPPGestureRecognizerResult *)gestureRecognizerResultWithOptionalOutputPacketMap: + (std::optional &)outputPacketMap { + if (!outputPacketMap.has_value()) { + return nil; + } + MPPGestureRecognizerResult *result = + [self gestureRecognizerResultWithOutputPacketMap:outputPacketMap.value()]; + return result; +} + +- (nullable MPPGestureRecognizerResult *)recognizeImage:(MPPImage *)image error:(NSError **)error { + std::optional rect = + [_visionTaskRunner normalizedRectWithImageOrientation:image.orientation + imageSize:CGSizeMake(image.width, image.height) + error:error]; + if (!rect.has_value()) { + return nil; + } + + Packet imagePacket = [MPPVisionPacketCreator createPacketWithMPPImage:image error:error]; + if (imagePacket.IsEmpty()) { + return nil; + } + + Packet normalizedRectPacket = + [MPPVisionPacketCreator createPacketWithNormalizedRect:rect.value()]; + + PacketMap inputPacketMap = InputPacketMap(imagePacket, normalizedRectPacket); + + std::optional outputPacketMap = [_visionTaskRunner processImagePacketMap:inputPacketMap + error:error]; + return [self gestureRecognizerResultWithOptionalOutputPacketMap:outputPacketMap]; +} + +- (std::optional)inputPacketMapWithMPPImage:(MPPImage *)image + timestampInMilliseconds:(NSInteger)timestampInMilliseconds + error:(NSError **)error { + std::optional rect = + [_visionTaskRunner normalizedRectWithImageOrientation:image.orientation + imageSize:CGSizeMake(image.width, image.height) + error:error]; + if (!rect.has_value()) { + return std::nullopt; + } + + Packet imagePacket = [MPPVisionPacketCreator createPacketWithMPPImage:image + timestampInMilliseconds:timestampInMilliseconds + error:error]; + if (imagePacket.IsEmpty()) { + return std::nullopt; + } + + Packet normalizedRectPacket = + [MPPVisionPacketCreator createPacketWithNormalizedRect:rect.value() + timestampInMilliseconds:timestampInMilliseconds]; + + PacketMap inputPacketMap = InputPacketMap(imagePacket, normalizedRectPacket); + return inputPacketMap; +} + +- (nullable MPPGestureRecognizerResult *)recognizeVideoFrame:(MPPImage *)image + timestampInMilliseconds:(NSInteger)timestampInMilliseconds + error:(NSError **)error { + std::optional inputPacketMap = [self inputPacketMapWithMPPImage:image + timestampInMilliseconds:timestampInMilliseconds + error:error]; + if (!inputPacketMap.has_value()) { + return nil; + } + + std::optional outputPacketMap = + [_visionTaskRunner processVideoFramePacketMap:inputPacketMap.value() error:error]; + + return [self gestureRecognizerResultWithOptionalOutputPacketMap:outputPacketMap]; +} + +- (BOOL)recognizeAsyncImage:(MPPImage *)image + timestampInMilliseconds:(NSInteger)timestampInMilliseconds + error:(NSError **)error { + std::optional inputPacketMap = [self inputPacketMapWithMPPImage:image + timestampInMilliseconds:timestampInMilliseconds + error:error]; + if (!inputPacketMap.has_value()) { + return NO; + } + + return [_visionTaskRunner processLiveStreamPacketMap:inputPacketMap.value() error:error]; +} + +@end diff --git a/mediapipe/tasks/ios/vision/gesture_recognizer/utils/sources/MPPGestureRecognizerResult+Helpers.h b/mediapipe/tasks/ios/vision/gesture_recognizer/utils/sources/MPPGestureRecognizerResult+Helpers.h index 649c11c8a..5e75febf3 100644 --- a/mediapipe/tasks/ios/vision/gesture_recognizer/utils/sources/MPPGestureRecognizerResult+Helpers.h +++ b/mediapipe/tasks/ios/vision/gesture_recognizer/utils/sources/MPPGestureRecognizerResult+Helpers.h @@ -14,6 +14,8 @@ #import "mediapipe/tasks/ios/vision/gesture_recognizer/sources/MPPGestureRecognizerResult.h" +#include "mediapipe/framework/formats/classification.pb.h" +#include "mediapipe/framework/formats/landmark.pb.h" #include "mediapipe/framework/packet.h" NS_ASSUME_NONNULL_BEGIN @@ -23,14 +25,14 @@ static const int kMicroSecondsPerMilliSecond = 1000; @interface MPPGestureRecognizerResult (Helpers) /** - * Creates an `MPPGestureRecognizerResult` from hand gestures, handedness, hand landmarks and world + * Creates an `MPPGestureRecognizerResult` from hand gestures, handedness, hand landmarks and world * landmarks packets. * - * @param handGesturesPacket a MediaPipe packet wrapping a`std::vector`. - * @param handednessPacket a MediaPipe packet wrapping a`std::vector`. - * @param handLandmarksPacket a MediaPipe packet wrapping + * @param handGesturesPacket A MediaPipe packet wrapping a`std::vector`. + * @param handednessPacket A MediaPipe packet wrapping a`std::vector`. + * @param handLandmarksPacket A MediaPipe packet wrapping * a`std::vector`. - * @param handLandmarksPacket a MediaPipe packet wrapping a`std::vector`. + * @param worldLandmarksPacket A MediaPipe packet wrapping a`std::vector`. * * @return An `MPPGestureRecognizerResult` object that contains the hand gesture recognition * results. @@ -41,6 +43,30 @@ static const int kMicroSecondsPerMilliSecond = 1000; handLandmarksPacket:(const mediapipe::Packet &)handLandmarksPacket worldLandmarksPacket:(const mediapipe::Packet &)worldLandmarksPacket; +/** + * Creates an `MPPGestureRecognizerResult` from hand gestures, handedness, hand landmarks and world + * landmarks proto vectors. + * + * @param handGesturesProto A vector of protos of type `std::vector`. + * @param handednessPacket A vector of protos of type `std::vector`. + * @param handLandmarksPacket A vector of protos of type `std::vector`. + * @param handLandmarksPacket A vector of protos of type `std::vector`. + * + * @return An `MPPGestureRecognizerResult` object that contains the hand gesture recognition + * results. + */ ++ (MPPGestureRecognizerResult *) + gestureRecognizerResultWithHandGesturesProto: + (const std::vector &)handGesturesProto + handednessroto: + (const std::vector &) + handednessProto + handLandmarksPacket: + (const std::vector &) + handLandmarksProto + worldLandmarksPacket: + (const std::vector &)worldLandmarksProto + timestampInMilliSeconds:(NSInteger)timestampInMilliseconds; @end NS_ASSUME_NONNULL_END diff --git a/mediapipe/tasks/ios/vision/gesture_recognizer/utils/sources/MPPGestureRecognizerResult+Helpers.mm b/mediapipe/tasks/ios/vision/gesture_recognizer/utils/sources/MPPGestureRecognizerResult+Helpers.mm index 70773a940..a775a961e 100644 --- a/mediapipe/tasks/ios/vision/gesture_recognizer/utils/sources/MPPGestureRecognizerResult+Helpers.mm +++ b/mediapipe/tasks/ios/vision/gesture_recognizer/utils/sources/MPPGestureRecognizerResult+Helpers.mm @@ -17,10 +17,6 @@ #import "mediapipe/tasks/ios/components/containers/utils/sources/MPPCategory+Helpers.h" #import "mediapipe/tasks/ios/components/containers/utils/sources/MPPLandmark+Helpers.h" -#include "mediapipe/framework/formats/classification.pb.h" -#include "mediapipe/framework/formats/landmark.pb.h" -#include "mediapipe/framework/packet.h" - namespace { using ClassificationListProto = ::mediapipe::ClassificationList; using LandmarkListProto = ::mediapipe::LandmarkList; @@ -28,52 +24,47 @@ using NormalizedLandmarkListProto = ::mediapipe::NormalizedLandmarkList; using ::mediapipe::Packet; } // namespace +static const NSInteger kDefaultGestureIndex = -1; + @implementation MPPGestureRecognizerResult (Helpers) ++ (MPPGestureRecognizerResult *)emptyGestureRecognizerResultWithTimestampInMilliseconds: + (NSInteger)timestampInMilliseconds { + return [[MPPGestureRecognizerResult alloc] initWithGestures:@[] + handedness:@[] + landmarks:@[] + worldLandmarks:@[] + timestampInMilliseconds:timestampInMilliseconds]; +} + + (MPPGestureRecognizerResult *) - gestureRecognizerResultWithHandGesturesPacket:(const Packet &)handGesturesPacket - handednessPacket:(const Packet &)handednessPacket - handLandmarksPacket:(const Packet &)handLandmarksPacket - worldLandmarksPacket:(const Packet &)worldLandmarksPacket { - NSInteger timestampInMilliseconds = - (NSInteger)(handGesturesPacket.Timestamp().Value() / kMicroSecondsPerMilliSecond); - - if (handGesturesPacket.IsEmpty()) { - return [[MPPGestureRecognizerResult alloc] initWithGestures:@[] - handedness:@[] - landmarks:@[] - worldLandmarks:@[] - timestampInMilliseconds:timestampInMilliseconds]; - } - - if (!handGesturesPacket.ValidateAsType>().ok() || - !handednessPacket.ValidateAsType>().ok() || - !handLandmarksPacket.ValidateAsType>().ok() || - !worldLandmarksPacket.ValidateAsType>().ok()) { - return nil; - } - - const std::vector &handGesturesClassificationListProtos = - handGesturesPacket.Get>(); + gestureRecognizerResultWithHandGesturesProto: + (const std::vector &)handGesturesProto + handednessroto: + (const std::vector &)handednessProto + handLandmarksPacket:(const std::vector &) + handLandmarksProto + worldLandmarksPacket: + (const std::vector &)worldLandmarksProto + timestampInMilliSeconds:(NSInteger)timestampInMilliseconds { NSMutableArray *> *multiHandGestures = - [NSMutableArray arrayWithCapacity:(NSUInteger)handGesturesClassificationListProtos.size()]; + [NSMutableArray arrayWithCapacity:(NSUInteger)handGesturesProto.size()]; - for (const auto &classificationListProto : handGesturesClassificationListProtos) { + for (const auto &classificationListProto : handGesturesProto) { NSMutableArray *gestures = [NSMutableArray arrayWithCapacity:(NSUInteger)classificationListProto.classification().size()]; for (const auto &classificationProto : classificationListProto.classification()) { - MPPCategory *category = [MPPCategory categoryWithProto:classificationProto]; + MPPCategory *category = [MPPCategory categoryWithProto:classificationProto + index:kDefaultGestureIndex]; [gestures addObject:category]; } [multiHandGestures addObject:gestures]; } - const std::vector &handednessClassificationListProtos = - handednessPacket.Get>(); NSMutableArray *> *multiHandHandedness = - [NSMutableArray arrayWithCapacity:(NSUInteger)handednessClassificationListProtos.size()]; + [NSMutableArray arrayWithCapacity:(NSUInteger)handednessProto.size()]; - for (const auto &classificationListProto : handednessClassificationListProtos) { + for (const auto &classificationListProto : handednessProto) { NSMutableArray *handedness = [NSMutableArray arrayWithCapacity:(NSUInteger)classificationListProto.classification().size()]; for (const auto &classificationProto : classificationListProto.classification()) { @@ -83,12 +74,10 @@ using ::mediapipe::Packet; [multiHandHandedness addObject:handedness]; } - const std::vector &handLandmarkListProtos = - handLandmarksPacket.Get>(); NSMutableArray *> *multiHandLandmarks = - [NSMutableArray arrayWithCapacity:(NSUInteger)handLandmarkListProtos.size()]; + [NSMutableArray arrayWithCapacity:(NSUInteger)handLandmarksProto.size()]; - for (const auto &handLandmarkListProto : handLandmarkListProtos) { + for (const auto &handLandmarkListProto : handLandmarksProto) { NSMutableArray *handLandmarks = [NSMutableArray arrayWithCapacity:(NSUInteger)handLandmarkListProto.landmark().size()]; for (const auto &normalizedLandmarkProto : handLandmarkListProto.landmark()) { @@ -99,12 +88,10 @@ using ::mediapipe::Packet; [multiHandLandmarks addObject:handLandmarks]; } - const std::vector &worldLandmarkListProtos = - worldLandmarksPacket.Get>(); NSMutableArray *> *multiHandWorldLandmarks = - [NSMutableArray arrayWithCapacity:(NSUInteger)worldLandmarkListProtos.size()]; + [NSMutableArray arrayWithCapacity:(NSUInteger)worldLandmarksProto.size()]; - for (const auto &worldLandmarkListProto : worldLandmarkListProtos) { + for (const auto &worldLandmarkListProto : worldLandmarksProto) { NSMutableArray *worldLandmarks = [NSMutableArray arrayWithCapacity:(NSUInteger)worldLandmarkListProto.landmark().size()]; for (const auto &landmarkProto : worldLandmarkListProto.landmark()) { @@ -124,4 +111,37 @@ using ::mediapipe::Packet; return gestureRecognizerResult; } ++ (MPPGestureRecognizerResult *) + gestureRecognizerResultWithHandGesturesPacket:(const Packet &)handGesturesPacket + handednessPacket:(const Packet &)handednessPacket + handLandmarksPacket:(const Packet &)handLandmarksPacket + worldLandmarksPacket:(const Packet &)worldLandmarksPacket { + NSInteger timestampInMilliseconds = + (NSInteger)(handGesturesPacket.Timestamp().Value() / kMicroSecondsPerMilliSecond); + + if (handGesturesPacket.IsEmpty()) { + return [MPPGestureRecognizerResult + emptyGestureRecognizerResultWithTimestampInMilliseconds:timestampInMilliseconds]; + } + + if (!handGesturesPacket.ValidateAsType>().ok() || + !handednessPacket.ValidateAsType>().ok() || + !handLandmarksPacket.ValidateAsType>().ok() || + !worldLandmarksPacket.ValidateAsType>().ok()) { + return [MPPGestureRecognizerResult + emptyGestureRecognizerResultWithTimestampInMilliseconds:timestampInMilliseconds]; + } + + return [MPPGestureRecognizerResult + gestureRecognizerResultWithHandGesturesProto:handGesturesPacket + .Get>() + handednessroto:handednessPacket + .Get>() + handLandmarksPacket:handLandmarksPacket.Get< + std::vector>() + worldLandmarksPacket:worldLandmarksPacket + .Get>() + timestampInMilliSeconds:timestampInMilliseconds]; +} + @end diff --git a/mediapipe/tasks/ios/vision/image_classifier/sources/MPPImageClassifier.mm b/mediapipe/tasks/ios/vision/image_classifier/sources/MPPImageClassifier.mm index 3ad8d0ded..10f6fc267 100644 --- a/mediapipe/tasks/ios/vision/image_classifier/sources/MPPImageClassifier.mm +++ b/mediapipe/tasks/ios/vision/image_classifier/sources/MPPImageClassifier.mm @@ -166,10 +166,9 @@ static const int kMicroSecondsPerMilliSecond = 1000; regionOfInterest:(CGRect)roi error:(NSError **)error { std::optional rect = - [_visionTaskRunner normalizedRectFromRegionOfInterest:roi - imageSize:CGSizeMake(image.width, image.height) + [_visionTaskRunner normalizedRectWithRegionOfInterest:roi imageOrientation:image.orientation - ROIAllowed:YES + imageSize:CGSizeMake(image.width, image.height) error:error]; if (!rect.has_value()) { return nil; @@ -196,15 +195,18 @@ static const int kMicroSecondsPerMilliSecond = 1000; outputPacketMap.value()[kClassificationsStreamName.cppString]]; } +- (nullable MPPImageClassifierResult *)classifyImage:(MPPImage *)image error:(NSError **)error { + return [self classifyImage:image regionOfInterest:CGRectZero error:error]; +} + - (std::optional)inputPacketMapWithMPPImage:(MPPImage *)image timestampInMilliseconds:(NSInteger)timestampInMilliseconds regionOfInterest:(CGRect)roi error:(NSError **)error { std::optional rect = - [_visionTaskRunner normalizedRectFromRegionOfInterest:roi - imageSize:CGSizeMake(image.width, image.height) + [_visionTaskRunner normalizedRectWithRegionOfInterest:roi imageOrientation:image.orientation - ROIAllowed:YES + imageSize:CGSizeMake(image.width, image.height) error:error]; if (!rect.has_value()) { return std::nullopt; @@ -225,10 +227,6 @@ static const int kMicroSecondsPerMilliSecond = 1000; return inputPacketMap; } -- (nullable MPPImageClassifierResult *)classifyImage:(MPPImage *)image error:(NSError **)error { - return [self classifyImage:image regionOfInterest:CGRectZero error:error]; -} - - (nullable MPPImageClassifierResult *)classifyVideoFrame:(MPPImage *)image timestampInMilliseconds:(NSInteger)timestampInMilliseconds regionOfInterest:(CGRect)roi diff --git a/mediapipe/tasks/ios/vision/object_detector/sources/MPPObjectDetector.mm b/mediapipe/tasks/ios/vision/object_detector/sources/MPPObjectDetector.mm index 27b196d7f..b7924996d 100644 --- a/mediapipe/tasks/ios/vision/object_detector/sources/MPPObjectDetector.mm +++ b/mediapipe/tasks/ios/vision/object_detector/sources/MPPObjectDetector.mm @@ -160,10 +160,8 @@ static NSString *const kTaskName = @"objectDetector"; timestampInMilliseconds:(NSInteger)timestampInMilliseconds error:(NSError **)error { std::optional rect = - [_visionTaskRunner normalizedRectFromRegionOfInterest:CGRectZero + [_visionTaskRunner normalizedRectWithImageOrientation:image.orientation imageSize:CGSizeMake(image.width, image.height) - imageOrientation:image.orientation - ROIAllowed:NO error:error]; if (!rect.has_value()) { return std::nullopt; @@ -188,10 +186,8 @@ static NSString *const kTaskName = @"objectDetector"; regionOfInterest:(CGRect)roi error:(NSError **)error { std::optional rect = - [_visionTaskRunner normalizedRectFromRegionOfInterest:roi + [_visionTaskRunner normalizedRectWithImageOrientation:image.orientation imageSize:CGSizeMake(image.width, image.height) - imageOrientation:image.orientation - ROIAllowed:NO error:error]; if (!rect.has_value()) { return nil; diff --git a/mediapipe/tasks/testdata/vision/BUILD b/mediapipe/tasks/testdata/vision/BUILD index 632e8aa4e..f6153dd12 100644 --- a/mediapipe/tasks/testdata/vision/BUILD +++ b/mediapipe/tasks/testdata/vision/BUILD @@ -54,6 +54,7 @@ mediapipe_files(srcs = [ "hand_landmark_full.tflite", "hand_landmark_lite.tflite", "hand_landmarker.task", + "gesture_recognizer.task", "left_hands.jpg", "left_hands_rotated.jpg", "mobilenet_v1_0.25_192_quantized_1_default_1.tflite", @@ -104,7 +105,6 @@ exports_files( "expected_right_down_hand_landmarks.prototxt", "expected_right_up_hand_landmarks.prototxt", "face_geometry_expected_out.pbtxt", - "gesture_recognizer.task", "portrait_expected_detection.pbtxt", "portrait_expected_face_geometry.pbtxt", "portrait_rotated_expected_detection.pbtxt",