From 9483ac4651ae1aa4a9f05752eab4536a3f0f6d91 Mon Sep 17 00:00:00 2001 From: Prianka Liz Kariat Date: Thu, 25 May 2023 20:42:24 +0530 Subject: [PATCH 1/5] Updated iOS gesture recognizer results to initialize hand gestures with a default index --- .../utils/sources/MPPCategory+Helpers.h | 22 ++++++++++++++++++ .../utils/sources/MPPCategory+Helpers.mm | 9 ++++++-- .../MPPGestureRecognizerResult+Helpers.h | 12 ++++++---- .../MPPGestureRecognizerResult+Helpers.mm | 23 +++++++++++++------ 4 files changed, 52 insertions(+), 14 deletions(-) diff --git a/mediapipe/tasks/ios/components/containers/utils/sources/MPPCategory+Helpers.h b/mediapipe/tasks/ios/components/containers/utils/sources/MPPCategory+Helpers.h index 9a11d1e29..9ad958479 100644 --- a/mediapipe/tasks/ios/components/containers/utils/sources/MPPCategory+Helpers.h +++ b/mediapipe/tasks/ios/components/containers/utils/sources/MPPCategory+Helpers.h @@ -19,8 +19,30 @@ NS_ASSUME_NONNULL_BEGIN @interface MPPCategory (Helpers) +/** + * Creates an `MPPCategory` with the given MediaPipe `Classification` proto. + * + * @param classificationProto A MediaPipe `Classification` proto. + * @return An `MPPCategory` object that with the given MediaPipe `Classification` proto. + */ + (MPPCategory *)categoryWithProto:(const ::mediapipe::Classification &)classificationProto; +/** + * Creates an `MPPCategory` with the given MediaPipe `Classification` proto and the given category + * index. The resulting `MPPCategory` is created with the given category index instead of the + * category index specified in the `Classification` proto. This method is useful for tasks like + * gesture recognizer which always returns a default index for the recognized gestures. + * + * @param classificationProto A MediaPipe `Classification` proto. + * @param index The index to be used for creating the `MPPCategory` instead of the category index + * specified in the `Classification` proto. + * + * @return An `MPPGestureRecognizerResult` object that contains the hand gesture recognition + * results. + */ ++ (MPPCategory *)categoryWithProto:(const ::mediapipe::Classification &)classificationProto + index:(NSInteger)index; + @end NS_ASSUME_NONNULL_END diff --git a/mediapipe/tasks/ios/components/containers/utils/sources/MPPCategory+Helpers.mm b/mediapipe/tasks/ios/components/containers/utils/sources/MPPCategory+Helpers.mm index 12cfa5627..542b8c41d 100644 --- a/mediapipe/tasks/ios/components/containers/utils/sources/MPPCategory+Helpers.mm +++ b/mediapipe/tasks/ios/components/containers/utils/sources/MPPCategory+Helpers.mm @@ -21,7 +21,8 @@ using ClassificationProto = ::mediapipe::Classification; @implementation MPPCategory (Helpers) -+ (MPPCategory *)categoryWithProto:(const ClassificationProto &)classificationProto { ++ (MPPCategory *)categoryWithProto:(const ClassificationProto &)classificationProto + index:(NSInteger)index { NSString *categoryName; NSString *displayName; @@ -33,10 +34,14 @@ using ClassificationProto = ::mediapipe::Classification; displayName = [NSString stringWithCppString:classificationProto.display_name()]; } - return [[MPPCategory alloc] initWithIndex:classificationProto.index() + return [[MPPCategory alloc] initWithIndex:index score:classificationProto.score() categoryName:categoryName displayName:displayName]; } ++ (MPPCategory *)categoryWithProto:(const ClassificationProto &)classificationProto { + return [MPPCategory categoryWithProto:classificationProto index:classificationProto.index()]; +} + @end diff --git a/mediapipe/tasks/ios/vision/gesture_recognizer/utils/sources/MPPGestureRecognizerResult+Helpers.h b/mediapipe/tasks/ios/vision/gesture_recognizer/utils/sources/MPPGestureRecognizerResult+Helpers.h index 649c11c8a..6b0f8bf81 100644 --- a/mediapipe/tasks/ios/vision/gesture_recognizer/utils/sources/MPPGestureRecognizerResult+Helpers.h +++ b/mediapipe/tasks/ios/vision/gesture_recognizer/utils/sources/MPPGestureRecognizerResult+Helpers.h @@ -14,6 +14,8 @@ #import "mediapipe/tasks/ios/vision/gesture_recognizer/sources/MPPGestureRecognizerResult.h" +#include "mediapipe/framework/formats/classification.pb.h" +#include "mediapipe/framework/formats/landmark.pb.h" #include "mediapipe/framework/packet.h" NS_ASSUME_NONNULL_BEGIN @@ -23,14 +25,14 @@ static const int kMicroSecondsPerMilliSecond = 1000; @interface MPPGestureRecognizerResult (Helpers) /** - * Creates an `MPPGestureRecognizerResult` from hand gestures, handedness, hand landmarks and world + * Creates an `MPPGestureRecognizerResult` from hand gestures, handedness, hand landmarks and world * landmarks packets. * - * @param handGesturesPacket a MediaPipe packet wrapping a`std::vector`. - * @param handednessPacket a MediaPipe packet wrapping a`std::vector`. - * @param handLandmarksPacket a MediaPipe packet wrapping + * @param handGesturesPacket A MediaPipe packet wrapping a`std::vector`. + * @param handednessPacket A MediaPipe packet wrapping a`std::vector`. + * @param handLandmarksPacket A MediaPipe packet wrapping * a`std::vector`. - * @param handLandmarksPacket a MediaPipe packet wrapping a`std::vector`. + * @param worldLandmarksPacket A MediaPipe packet wrapping a`std::vector`. * * @return An `MPPGestureRecognizerResult` object that contains the hand gesture recognition * results. diff --git a/mediapipe/tasks/ios/vision/gesture_recognizer/utils/sources/MPPGestureRecognizerResult+Helpers.mm b/mediapipe/tasks/ios/vision/gesture_recognizer/utils/sources/MPPGestureRecognizerResult+Helpers.mm index 70773a940..b2e7ccded 100644 --- a/mediapipe/tasks/ios/vision/gesture_recognizer/utils/sources/MPPGestureRecognizerResult+Helpers.mm +++ b/mediapipe/tasks/ios/vision/gesture_recognizer/utils/sources/MPPGestureRecognizerResult+Helpers.mm @@ -21,6 +21,8 @@ #include "mediapipe/framework/formats/landmark.pb.h" #include "mediapipe/framework/packet.h" +static const NSInteger kDefaultGestureIndex = -1; + namespace { using ClassificationListProto = ::mediapipe::ClassificationList; using LandmarkListProto = ::mediapipe::LandmarkList; @@ -30,6 +32,15 @@ using ::mediapipe::Packet; @implementation MPPGestureRecognizerResult (Helpers) ++ (MPPGestureRecognizerResult *)emptyGestureRecognizerResultWithTimestampInMilliseconds: + (NSInteger)timestampInMilliseconds { + return [[MPPGestureRecognizerResult alloc] initWithGestures:@[] + handedness:@[] + landmarks:@[] + worldLandmarks:@[] + timestampInMilliseconds:timestampInMilliseconds]; +} + + (MPPGestureRecognizerResult *) gestureRecognizerResultWithHandGesturesPacket:(const Packet &)handGesturesPacket handednessPacket:(const Packet &)handednessPacket @@ -39,18 +50,16 @@ using ::mediapipe::Packet; (NSInteger)(handGesturesPacket.Timestamp().Value() / kMicroSecondsPerMilliSecond); if (handGesturesPacket.IsEmpty()) { - return [[MPPGestureRecognizerResult alloc] initWithGestures:@[] - handedness:@[] - landmarks:@[] - worldLandmarks:@[] - timestampInMilliseconds:timestampInMilliseconds]; + return [MPPGestureRecognizerResult + emptyGestureRecognizerResultWithTimestampInMilliseconds:timestampInMilliseconds]; } if (!handGesturesPacket.ValidateAsType>().ok() || !handednessPacket.ValidateAsType>().ok() || !handLandmarksPacket.ValidateAsType>().ok() || !worldLandmarksPacket.ValidateAsType>().ok()) { - return nil; + return [MPPGestureRecognizerResult + emptyGestureRecognizerResultWithTimestampInMilliseconds:timestampInMilliseconds]; } const std::vector &handGesturesClassificationListProtos = @@ -62,7 +71,7 @@ using ::mediapipe::Packet; NSMutableArray *gestures = [NSMutableArray arrayWithCapacity:(NSUInteger)classificationListProto.classification().size()]; for (const auto &classificationProto : classificationListProto.classification()) { - MPPCategory *category = [MPPCategory categoryWithProto:classificationProto]; + MPPCategory *category = [MPPCategory categoryWithProto:classificationProto index:kDefaultGestureIndex]; [gestures addObject:category]; } [multiHandGestures addObject:gestures]; From e6fd39b3eed179c031d1825c48e93610f60eea7d Mon Sep 17 00:00:00 2001 From: Prianka Liz Kariat Date: Thu, 25 May 2023 20:43:51 +0530 Subject: [PATCH 2/5] Updated the vision task runner to split the method that creates normalized rect based on ROI --- .../vision/core/sources/MPPVisionTaskRunner.h | 51 +++++++++++++------ .../core/sources/MPPVisionTaskRunner.mm | 25 ++++++++- .../sources/MPPImageClassifier.mm | 18 +++---- .../sources/MPPObjectDetector.mm | 8 +-- 4 files changed, 69 insertions(+), 33 deletions(-) diff --git a/mediapipe/tasks/ios/vision/core/sources/MPPVisionTaskRunner.h b/mediapipe/tasks/ios/vision/core/sources/MPPVisionTaskRunner.h index 318b24051..a79302446 100644 --- a/mediapipe/tasks/ios/vision/core/sources/MPPVisionTaskRunner.h +++ b/mediapipe/tasks/ios/vision/core/sources/MPPVisionTaskRunner.h @@ -58,38 +58,57 @@ NS_ASSUME_NONNULL_BEGIN error:(NSError **)error NS_DESIGNATED_INITIALIZER; /** - * Creates a `NormalizedRect` from a region of interest and an image orientation, performing - * sanity checks on-the-fly. - * If the input region of interest equals `CGRectZero`, returns a default `NormalizedRect` covering - * the whole image with rotation set according `imageOrientation`. If `ROIAllowed` is NO, an error - * will be returned if the input region of interest is not equal to `CGRectZero`. Mirrored - * orientations (`UIImageOrientationUpMirrored`,`UIImageOrientationDownMirrored`, + * Creates a `NormalizedRect` from image orientation for a task which does not support roi, + * performing sanity checks on-the-fly. Mirrored orientations + * (`UIImageOrientationUpMirrored`,`UIImageOrientationDownMirrored`, * `UIImageOrientationLeftMirrored`,`UIImageOrientationRightMirrored`) are not supported. An error * will be returned if `imageOrientation` is equal to any one of them. * - * @param roi A `CGRect` specifying the region of interest. If the input region of interest equals - * `CGRectZero`, the returned `NormalizedRect` covers the whole image. Make sure that `roi` equals - * `CGRectZero` if `ROIAllowed` is NO. Otherwise, an error will be returned. - * @param imageSize A `CGSize` specifying the size of the image within which normalized rect is - * calculated. * @param imageOrientation A `UIImageOrientation` indicating the rotation to be applied to the * image. The resulting `NormalizedRect` will convert the `imageOrientation` to degrees clockwise. * Mirrored orientations (`UIImageOrientationUpMirrored`, `UIImageOrientationDownMirrored`, * `UIImageOrientationLeftMirrored`, `UIImageOrientationRightMirrored`) are not supported. An error * will be returned if `imageOrientation` is equal to any one of them. - * @param ROIAllowed Indicates if the `roi` field is allowed to be a value other than `CGRectZero`. + * @param imageSize A `CGSize` specifying the size of the image within which normalized rect is + * calculated. + * @param error Pointer to the memory location where errors if any should be saved. If @c NULL, no + * error will be saved. + * + * @return An optional `NormalizedRect` from the given region of interest and image orientation. + */ +- (std::optional)normalizedRectWithImageOrientation: + (UIImageOrientation)imageOrientation + imageSize:(CGSize)imageSize + error:(NSError **)error; + +/** + * Creates a `NormalizedRect` from roi and image orientation for a task which supports roi, + * performing sanity checks on-the-fly. If the input region of interest equals `CGRectZero`, returns + * a default `NormalizedRect` covering the whole image with rotation set according + * `imageOrientation`. Mirrored orientations + * (`UIImageOrientationUpMirrored`,`UIImageOrientationDownMirrored`, + * `UIImageOrientationLeftMirrored`,`UIImageOrientationRightMirrored`) are not supported. An error + * will be returned if `imageOrientation` is equal to any one of them. + * + * @param roi A `CGRect` specifying the region of interest. If the input region of interest equals + * `CGRectZero`, the returned `NormalizedRect` covers the whole image. + * @param imageOrientation A `UIImageOrientation` indicating the rotation to be applied to the + * image. The resulting `NormalizedRect` will convert the `imageOrientation` to degrees clockwise. + * Mirrored orientations (`UIImageOrientationUpMirrored`, `UIImageOrientationDownMirrored`, + * `UIImageOrientationLeftMirrored`, `UIImageOrientationRightMirrored`) are not supported. An error + * will be returned if `imageOrientation` is equal to any one of them. + * @param imageSize A `CGSize` specifying the size of the image within which normalized rect is + * calculated. * @param error Pointer to the memory location where errors if any should be saved. If @c NULL, no * error will be saved. * * @return An optional `NormalizedRect` from the given region of interest and image orientation. */ - (std::optional) - normalizedRectFromRegionOfInterest:(CGRect)roi - imageSize:(CGSize)imageSize + normalizedRectWithRegionOfInterest:(CGRect)roi imageOrientation:(UIImageOrientation)imageOrientation - ROIAllowed:(BOOL)ROIAllowed + imageSize:(CGSize)imageSize error:(NSError **)error; - /** * A synchronous method to invoke the C++ task runner to process single image inputs. The call * blocks the current thread until a failure status or a successful result is returned. diff --git a/mediapipe/tasks/ios/vision/core/sources/MPPVisionTaskRunner.mm b/mediapipe/tasks/ios/vision/core/sources/MPPVisionTaskRunner.mm index 0089e516f..c1b5d0587 100644 --- a/mediapipe/tasks/ios/vision/core/sources/MPPVisionTaskRunner.mm +++ b/mediapipe/tasks/ios/vision/core/sources/MPPVisionTaskRunner.mm @@ -91,7 +91,30 @@ static NSString *const kTaskPrefix = @"com.mediapipe.tasks.vision"; return self; } -- (std::optional)normalizedRectFromRegionOfInterest:(CGRect)roi +- (std::optional)normalizedRectWithRegionOfInterest:(CGRect)roi + imageOrientation: + (UIImageOrientation)imageOrientation + imageSize:(CGSize)imageSize + error:(NSError **)error { + return [self normalizedRectWithRegionOfInterest:roi + imageSize:imageSize + imageOrientation:imageOrientation + ROIAllowed:YES + error:error]; +} + +- (std::optional)normalizedRectWithImageOrientation: + (UIImageOrientation)imageOrientation + imageSize:(CGSize)imageSize + error:(NSError **)error { + return [self normalizedRectWithRegionOfInterest:CGRectZero + imageSize:imageSize + imageOrientation:imageOrientation + ROIAllowed:NO + error:error]; +} + +- (std::optional)normalizedRectWithRegionOfInterest:(CGRect)roi imageSize:(CGSize)imageSize imageOrientation: (UIImageOrientation)imageOrientation diff --git a/mediapipe/tasks/ios/vision/image_classifier/sources/MPPImageClassifier.mm b/mediapipe/tasks/ios/vision/image_classifier/sources/MPPImageClassifier.mm index 3ad8d0ded..10f6fc267 100644 --- a/mediapipe/tasks/ios/vision/image_classifier/sources/MPPImageClassifier.mm +++ b/mediapipe/tasks/ios/vision/image_classifier/sources/MPPImageClassifier.mm @@ -166,10 +166,9 @@ static const int kMicroSecondsPerMilliSecond = 1000; regionOfInterest:(CGRect)roi error:(NSError **)error { std::optional rect = - [_visionTaskRunner normalizedRectFromRegionOfInterest:roi - imageSize:CGSizeMake(image.width, image.height) + [_visionTaskRunner normalizedRectWithRegionOfInterest:roi imageOrientation:image.orientation - ROIAllowed:YES + imageSize:CGSizeMake(image.width, image.height) error:error]; if (!rect.has_value()) { return nil; @@ -196,15 +195,18 @@ static const int kMicroSecondsPerMilliSecond = 1000; outputPacketMap.value()[kClassificationsStreamName.cppString]]; } +- (nullable MPPImageClassifierResult *)classifyImage:(MPPImage *)image error:(NSError **)error { + return [self classifyImage:image regionOfInterest:CGRectZero error:error]; +} + - (std::optional)inputPacketMapWithMPPImage:(MPPImage *)image timestampInMilliseconds:(NSInteger)timestampInMilliseconds regionOfInterest:(CGRect)roi error:(NSError **)error { std::optional rect = - [_visionTaskRunner normalizedRectFromRegionOfInterest:roi - imageSize:CGSizeMake(image.width, image.height) + [_visionTaskRunner normalizedRectWithRegionOfInterest:roi imageOrientation:image.orientation - ROIAllowed:YES + imageSize:CGSizeMake(image.width, image.height) error:error]; if (!rect.has_value()) { return std::nullopt; @@ -225,10 +227,6 @@ static const int kMicroSecondsPerMilliSecond = 1000; return inputPacketMap; } -- (nullable MPPImageClassifierResult *)classifyImage:(MPPImage *)image error:(NSError **)error { - return [self classifyImage:image regionOfInterest:CGRectZero error:error]; -} - - (nullable MPPImageClassifierResult *)classifyVideoFrame:(MPPImage *)image timestampInMilliseconds:(NSInteger)timestampInMilliseconds regionOfInterest:(CGRect)roi diff --git a/mediapipe/tasks/ios/vision/object_detector/sources/MPPObjectDetector.mm b/mediapipe/tasks/ios/vision/object_detector/sources/MPPObjectDetector.mm index 27b196d7f..b7924996d 100644 --- a/mediapipe/tasks/ios/vision/object_detector/sources/MPPObjectDetector.mm +++ b/mediapipe/tasks/ios/vision/object_detector/sources/MPPObjectDetector.mm @@ -160,10 +160,8 @@ static NSString *const kTaskName = @"objectDetector"; timestampInMilliseconds:(NSInteger)timestampInMilliseconds error:(NSError **)error { std::optional rect = - [_visionTaskRunner normalizedRectFromRegionOfInterest:CGRectZero + [_visionTaskRunner normalizedRectWithImageOrientation:image.orientation imageSize:CGSizeMake(image.width, image.height) - imageOrientation:image.orientation - ROIAllowed:NO error:error]; if (!rect.has_value()) { return std::nullopt; @@ -188,10 +186,8 @@ static NSString *const kTaskName = @"objectDetector"; regionOfInterest:(CGRect)roi error:(NSError **)error { std::optional rect = - [_visionTaskRunner normalizedRectFromRegionOfInterest:roi + [_visionTaskRunner normalizedRectWithImageOrientation:image.orientation imageSize:CGSizeMake(image.width, image.height) - imageOrientation:image.orientation - ROIAllowed:NO error:error]; if (!rect.has_value()) { return nil; From 52f3333cc1ed3f77001fadbc3cacb5444d013343 Mon Sep 17 00:00:00 2001 From: Prianka Liz Kariat Date: Thu, 25 May 2023 20:44:16 +0530 Subject: [PATCH 3/5] Added MPPGesture Recognizer implementation --- .../sources/MPPGestureRecognizer.mm | 273 ++++++++++++++++++ 1 file changed, 273 insertions(+) create mode 100644 mediapipe/tasks/ios/vision/gesture_recognizer/sources/MPPGestureRecognizer.mm diff --git a/mediapipe/tasks/ios/vision/gesture_recognizer/sources/MPPGestureRecognizer.mm b/mediapipe/tasks/ios/vision/gesture_recognizer/sources/MPPGestureRecognizer.mm new file mode 100644 index 000000000..b6acc05e8 --- /dev/null +++ b/mediapipe/tasks/ios/vision/gesture_recognizer/sources/MPPGestureRecognizer.mm @@ -0,0 +1,273 @@ +// Copyright 2023 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#import "mediapipe/tasks/ios/vision/gesture_recognizer/sources/MPPGestureRecognizer.h" + +#import "mediapipe/tasks/ios/common/utils/sources/MPPCommonUtils.h" +#import "mediapipe/tasks/ios/common/utils/sources/NSString+Helpers.h" +#import "mediapipe/tasks/ios/core/sources/MPPTaskInfo.h" +#import "mediapipe/tasks/ios/vision/core/sources/MPPVisionPacketCreator.h" +#import "mediapipe/tasks/ios/vision/core/sources/MPPVisionTaskRunner.h" +#import "mediapipe/tasks/ios/vision/gesture_recognizer/utils/sources/MPPGestureRecognizerOptions+Helpers.h" +#import "mediapipe/tasks/ios/vision/gesture_recognizer/utils/sources/MPPGestureRecognizerResult+Helpers.h" + +namespace { +using ::mediapipe::NormalizedRect; +using ::mediapipe::Packet; +using ::mediapipe::Timestamp; +using ::mediapipe::tasks::core::PacketMap; +using ::mediapipe::tasks::core::PacketsCallback; +} // namespace + +static NSString *const kImageTag = @"IMAGE"; +static NSString *const kImageInStreamName = @"image_in"; +static NSString *const kNormRectTag = @"NORM_RECT"; +static NSString *const kNormRectInStreamName = @"norm_rect_in"; +static NSString *const kImageOutStreamName = @"image_out"; +static NSString *const kLandmarksTag = @"LANDMARKS"; +static NSString *const kLandmarksOutStreamName = @"hand_landmarks"; +static NSString *const kWorldLandmarksTag = @"WORLD_LANDMARKS"; +static NSString *const kWorldLandmarksOutStreamName = @"world_hand_landmarks"; +static NSString *const kHandednessTag = @"HANDEDNESS"; +static NSString *const kHandednessOutStreamName = @"handedness"; +static NSString *const kHandGesturesTag = @"HAND_GESTURES"; +static NSString *const kHandGesturesOutStreamName = @"hand_gestures"; +static NSString *const kTaskGraphName = + @"mediapipe.tasks.vision.gesture_recognizer.GestureRecognizerGraph"; +static NSString *const kTaskName = @"gestureRecognizer"; + +#define InputPacketMap(imagePacket, normalizedRectPacket) \ + { \ + {kImageInStreamName.cppString, imagePacket}, { \ + kNormRectInStreamName.cppString, normalizedRectPacket \ + } \ + } + +@interface MPPGestureRecognizer () { + /** iOS Vision Task Runner */ + MPPVisionTaskRunner *_visionTaskRunner; + dispatch_queue_t _callbackQueue; +} +@property(nonatomic, weak) id + gestureRecognizerLiveStreamDelegate; +@end + +@implementation MPPGestureRecognizer + +- (nullable MPPGestureRecognizerResult *)gestureRecognizerResultWithOutputPacketMap: + (PacketMap &)outputPacketMap { + return [MPPGestureRecognizerResult + gestureRecognizerResultWithHandGesturesPacket:outputPacketMap[kHandGesturesOutStreamName + .cppString] + handednessPacket:outputPacketMap[kHandednessOutStreamName + .cppString] + handLandmarksPacket:outputPacketMap[kLandmarksOutStreamName + .cppString] + worldLandmarksPacket:outputPacketMap[kWorldLandmarksOutStreamName + .cppString]]; +} + +- (void)processLiveStreamResult:(absl::StatusOr)liveStreamResult { + MPPGestureRecognizer *__weak weakSelf = self; + if (![weakSelf.gestureRecognizerLiveStreamDelegate + respondsToSelector:@selector(gestureRecognizer: + didFinishRecognitionWithResult:timestampInMilliseconds:error:)]) { + return; + } + + NSError *callbackError = nil; + if (![MPPCommonUtils checkCppError:liveStreamResult.status() toError:&callbackError]) { + dispatch_async(_callbackQueue, ^{ + [weakSelf.gestureRecognizerLiveStreamDelegate gestureRecognizer:weakSelf + didFinishRecognitionWithResult:nil + timestampInMilliseconds:Timestamp::Unset().Value() + error:callbackError]; + }); + return; + } + + PacketMap &outputPacketMap = liveStreamResult.value(); + if (outputPacketMap[kImageOutStreamName.cppString].IsEmpty()) { + return; + } + + MPPGestureRecognizerResult *result = + [weakSelf gestureRecognizerResultWithOutputPacketMap:outputPacketMap]; + + NSInteger timeStampInMilliseconds = + outputPacketMap[kImageOutStreamName.cppString].Timestamp().Value() / + kMicroSecondsPerMilliSecond; + dispatch_async(_callbackQueue, ^{ + [weakSelf.gestureRecognizerLiveStreamDelegate gestureRecognizer:weakSelf + didFinishRecognitionWithResult:result + timestampInMilliseconds:timeStampInMilliseconds + error:callbackError]; + }); +} + +- (instancetype)initWithOptions:(MPPGestureRecognizerOptions *)options error:(NSError **)error { + self = [super init]; + if (self) { + MPPTaskInfo *taskInfo = [[MPPTaskInfo alloc] + initWithTaskGraphName:kTaskGraphName + inputStreams:@[ + [NSString stringWithFormat:@"%@:%@", kImageTag, kImageInStreamName], + [NSString stringWithFormat:@"%@:%@", kNormRectTag, kNormRectInStreamName] + ] + outputStreams:@[ + [NSString stringWithFormat:@"%@:%@", kLandmarksTag, kLandmarksOutStreamName], + [NSString + stringWithFormat:@"%@:%@", kWorldLandmarksTag, kWorldLandmarksOutStreamName], + [NSString stringWithFormat:@"%@:%@", kHandednessTag, kHandednessOutStreamName], + [NSString + stringWithFormat:@"%@:%@", kHandGesturesTag, kHandGesturesOutStreamName], + [NSString stringWithFormat:@"%@:%@", kImageTag, kImageOutStreamName] + ] + taskOptions:options + enableFlowLimiting:options.runningMode == MPPRunningModeLiveStream + error:error]; + + if (!taskInfo) { + return nil; + } + + PacketsCallback packetsCallback = nullptr; + + if (options.gestureRecognizerLiveStreamDelegate) { + _gestureRecognizerLiveStreamDelegate = options.gestureRecognizerLiveStreamDelegate; + // Capturing `self` as weak in order to avoid `self` being kept in memory + // and cause a retain cycle, after self is set to `nil`. + MPPGestureRecognizer *__weak weakSelf = self; + + // Create a private serial dispatch queue in which the deleagte method will be called + // asynchronously. This is to ensure that if the client performs a long running operation in + // the delegate method, the queue on which the C++ callbacks is invoked is not blocked and is + // freed up to continue with its operations. + const char *queueName = [MPPVisionTaskRunner uniqueDispatchQueueNameWithSuffix:kTaskName]; + _callbackQueue = dispatch_queue_create(queueName, NULL); + packetsCallback = [=](absl::StatusOr liveStreamResult) { + [weakSelf processLiveStreamResult:liveStreamResult]; + }; + } + + _visionTaskRunner = + [[MPPVisionTaskRunner alloc] initWithCalculatorGraphConfig:[taskInfo generateGraphConfig] + runningMode:options.runningMode + packetsCallback:std::move(packetsCallback) + error:error]; + if (!_visionTaskRunner) { + return nil; + } + } + return self; +} + +- (instancetype)initWithModelPath:(NSString *)modelPath error:(NSError **)error { + MPPGestureRecognizerOptions *options = [[MPPGestureRecognizerOptions alloc] init]; + + options.baseOptions.modelAssetPath = modelPath; + + return [self initWithOptions:options error:error]; +} + +- (nullable MPPGestureRecognizerResult *)gestureRecognizerResultWithOptionalOutputPacketMap: + (std::optional &)outputPacketMap { + if (!outputPacketMap.has_value()) { + return nil; + } + MPPGestureRecognizerResult *result = + [self gestureRecognizerResultWithOutputPacketMap:outputPacketMap.value()]; + return result; +} + +- (nullable MPPGestureRecognizerResult *)recognizeImage:(MPPImage *)image error:(NSError **)error { + std::optional rect = + [_visionTaskRunner normalizedRectWithImageOrientation:image.orientation + imageSize:CGSizeMake(image.width, image.height) + error:error]; + if (!rect.has_value()) { + return nil; + } + + Packet imagePacket = [MPPVisionPacketCreator createPacketWithMPPImage:image error:error]; + if (imagePacket.IsEmpty()) { + return nil; + } + + Packet normalizedRectPacket = + [MPPVisionPacketCreator createPacketWithNormalizedRect:rect.value()]; + + PacketMap inputPacketMap = InputPacketMap(imagePacket, normalizedRectPacket); + + std::optional outputPacketMap = [_visionTaskRunner processImagePacketMap:inputPacketMap + error:error]; + return [self gestureRecognizerResultWithOptionalOutputPacketMap:outputPacketMap]; +} + +- (std::optional)inputPacketMapWithMPPImage:(MPPImage *)image + timestampInMilliseconds:(NSInteger)timestampInMilliseconds + error:(NSError **)error { + std::optional rect = + [_visionTaskRunner normalizedRectWithImageOrientation:image.orientation + imageSize:CGSizeMake(image.width, image.height) + error:error]; + if (!rect.has_value()) { + return std::nullopt; + } + + Packet imagePacket = [MPPVisionPacketCreator createPacketWithMPPImage:image + timestampInMilliseconds:timestampInMilliseconds + error:error]; + if (imagePacket.IsEmpty()) { + return std::nullopt; + } + + Packet normalizedRectPacket = + [MPPVisionPacketCreator createPacketWithNormalizedRect:rect.value() + timestampInMilliseconds:timestampInMilliseconds]; + + PacketMap inputPacketMap = InputPacketMap(imagePacket, normalizedRectPacket); + return inputPacketMap; +} + +- (nullable MPPGestureRecognizerResult *)recognizeVideoFrame:(MPPImage *)image + timestampInMilliseconds:(NSInteger)timestampInMilliseconds + error:(NSError **)error { + std::optional inputPacketMap = [self inputPacketMapWithMPPImage:image + timestampInMilliseconds:timestampInMilliseconds + error:error]; + if (!inputPacketMap.has_value()) { + return nil; + } + + std::optional outputPacketMap = + [_visionTaskRunner processVideoFramePacketMap:inputPacketMap.value() error:error]; + + return [self gestureRecognizerResultWithOptionalOutputPacketMap:outputPacketMap]; +} + +- (BOOL)recognizeAsyncImage:(MPPImage *)image + timestampInMilliseconds:(NSInteger)timestampInMilliseconds + error:(NSError **)error { + std::optional inputPacketMap = [self inputPacketMapWithMPPImage:image + timestampInMilliseconds:timestampInMilliseconds + error:error]; + if (!inputPacketMap.has_value()) { + return NO; + } + + return [_visionTaskRunner processLiveStreamPacketMap:inputPacketMap.value() error:error]; +} + +@end From 6fabc35ce77c6858d8f8bc27960457de46465eb7 Mon Sep 17 00:00:00 2001 From: Prianka Liz Kariat Date: Fri, 26 May 2023 18:49:08 +0530 Subject: [PATCH 4/5] Removed gesture recognizer implementation --- .../sources/MPPGestureRecognizer.mm | 273 ------------------ 1 file changed, 273 deletions(-) delete mode 100644 mediapipe/tasks/ios/vision/gesture_recognizer/sources/MPPGestureRecognizer.mm diff --git a/mediapipe/tasks/ios/vision/gesture_recognizer/sources/MPPGestureRecognizer.mm b/mediapipe/tasks/ios/vision/gesture_recognizer/sources/MPPGestureRecognizer.mm deleted file mode 100644 index b6acc05e8..000000000 --- a/mediapipe/tasks/ios/vision/gesture_recognizer/sources/MPPGestureRecognizer.mm +++ /dev/null @@ -1,273 +0,0 @@ -// Copyright 2023 The MediaPipe Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#import "mediapipe/tasks/ios/vision/gesture_recognizer/sources/MPPGestureRecognizer.h" - -#import "mediapipe/tasks/ios/common/utils/sources/MPPCommonUtils.h" -#import "mediapipe/tasks/ios/common/utils/sources/NSString+Helpers.h" -#import "mediapipe/tasks/ios/core/sources/MPPTaskInfo.h" -#import "mediapipe/tasks/ios/vision/core/sources/MPPVisionPacketCreator.h" -#import "mediapipe/tasks/ios/vision/core/sources/MPPVisionTaskRunner.h" -#import "mediapipe/tasks/ios/vision/gesture_recognizer/utils/sources/MPPGestureRecognizerOptions+Helpers.h" -#import "mediapipe/tasks/ios/vision/gesture_recognizer/utils/sources/MPPGestureRecognizerResult+Helpers.h" - -namespace { -using ::mediapipe::NormalizedRect; -using ::mediapipe::Packet; -using ::mediapipe::Timestamp; -using ::mediapipe::tasks::core::PacketMap; -using ::mediapipe::tasks::core::PacketsCallback; -} // namespace - -static NSString *const kImageTag = @"IMAGE"; -static NSString *const kImageInStreamName = @"image_in"; -static NSString *const kNormRectTag = @"NORM_RECT"; -static NSString *const kNormRectInStreamName = @"norm_rect_in"; -static NSString *const kImageOutStreamName = @"image_out"; -static NSString *const kLandmarksTag = @"LANDMARKS"; -static NSString *const kLandmarksOutStreamName = @"hand_landmarks"; -static NSString *const kWorldLandmarksTag = @"WORLD_LANDMARKS"; -static NSString *const kWorldLandmarksOutStreamName = @"world_hand_landmarks"; -static NSString *const kHandednessTag = @"HANDEDNESS"; -static NSString *const kHandednessOutStreamName = @"handedness"; -static NSString *const kHandGesturesTag = @"HAND_GESTURES"; -static NSString *const kHandGesturesOutStreamName = @"hand_gestures"; -static NSString *const kTaskGraphName = - @"mediapipe.tasks.vision.gesture_recognizer.GestureRecognizerGraph"; -static NSString *const kTaskName = @"gestureRecognizer"; - -#define InputPacketMap(imagePacket, normalizedRectPacket) \ - { \ - {kImageInStreamName.cppString, imagePacket}, { \ - kNormRectInStreamName.cppString, normalizedRectPacket \ - } \ - } - -@interface MPPGestureRecognizer () { - /** iOS Vision Task Runner */ - MPPVisionTaskRunner *_visionTaskRunner; - dispatch_queue_t _callbackQueue; -} -@property(nonatomic, weak) id - gestureRecognizerLiveStreamDelegate; -@end - -@implementation MPPGestureRecognizer - -- (nullable MPPGestureRecognizerResult *)gestureRecognizerResultWithOutputPacketMap: - (PacketMap &)outputPacketMap { - return [MPPGestureRecognizerResult - gestureRecognizerResultWithHandGesturesPacket:outputPacketMap[kHandGesturesOutStreamName - .cppString] - handednessPacket:outputPacketMap[kHandednessOutStreamName - .cppString] - handLandmarksPacket:outputPacketMap[kLandmarksOutStreamName - .cppString] - worldLandmarksPacket:outputPacketMap[kWorldLandmarksOutStreamName - .cppString]]; -} - -- (void)processLiveStreamResult:(absl::StatusOr)liveStreamResult { - MPPGestureRecognizer *__weak weakSelf = self; - if (![weakSelf.gestureRecognizerLiveStreamDelegate - respondsToSelector:@selector(gestureRecognizer: - didFinishRecognitionWithResult:timestampInMilliseconds:error:)]) { - return; - } - - NSError *callbackError = nil; - if (![MPPCommonUtils checkCppError:liveStreamResult.status() toError:&callbackError]) { - dispatch_async(_callbackQueue, ^{ - [weakSelf.gestureRecognizerLiveStreamDelegate gestureRecognizer:weakSelf - didFinishRecognitionWithResult:nil - timestampInMilliseconds:Timestamp::Unset().Value() - error:callbackError]; - }); - return; - } - - PacketMap &outputPacketMap = liveStreamResult.value(); - if (outputPacketMap[kImageOutStreamName.cppString].IsEmpty()) { - return; - } - - MPPGestureRecognizerResult *result = - [weakSelf gestureRecognizerResultWithOutputPacketMap:outputPacketMap]; - - NSInteger timeStampInMilliseconds = - outputPacketMap[kImageOutStreamName.cppString].Timestamp().Value() / - kMicroSecondsPerMilliSecond; - dispatch_async(_callbackQueue, ^{ - [weakSelf.gestureRecognizerLiveStreamDelegate gestureRecognizer:weakSelf - didFinishRecognitionWithResult:result - timestampInMilliseconds:timeStampInMilliseconds - error:callbackError]; - }); -} - -- (instancetype)initWithOptions:(MPPGestureRecognizerOptions *)options error:(NSError **)error { - self = [super init]; - if (self) { - MPPTaskInfo *taskInfo = [[MPPTaskInfo alloc] - initWithTaskGraphName:kTaskGraphName - inputStreams:@[ - [NSString stringWithFormat:@"%@:%@", kImageTag, kImageInStreamName], - [NSString stringWithFormat:@"%@:%@", kNormRectTag, kNormRectInStreamName] - ] - outputStreams:@[ - [NSString stringWithFormat:@"%@:%@", kLandmarksTag, kLandmarksOutStreamName], - [NSString - stringWithFormat:@"%@:%@", kWorldLandmarksTag, kWorldLandmarksOutStreamName], - [NSString stringWithFormat:@"%@:%@", kHandednessTag, kHandednessOutStreamName], - [NSString - stringWithFormat:@"%@:%@", kHandGesturesTag, kHandGesturesOutStreamName], - [NSString stringWithFormat:@"%@:%@", kImageTag, kImageOutStreamName] - ] - taskOptions:options - enableFlowLimiting:options.runningMode == MPPRunningModeLiveStream - error:error]; - - if (!taskInfo) { - return nil; - } - - PacketsCallback packetsCallback = nullptr; - - if (options.gestureRecognizerLiveStreamDelegate) { - _gestureRecognizerLiveStreamDelegate = options.gestureRecognizerLiveStreamDelegate; - // Capturing `self` as weak in order to avoid `self` being kept in memory - // and cause a retain cycle, after self is set to `nil`. - MPPGestureRecognizer *__weak weakSelf = self; - - // Create a private serial dispatch queue in which the deleagte method will be called - // asynchronously. This is to ensure that if the client performs a long running operation in - // the delegate method, the queue on which the C++ callbacks is invoked is not blocked and is - // freed up to continue with its operations. - const char *queueName = [MPPVisionTaskRunner uniqueDispatchQueueNameWithSuffix:kTaskName]; - _callbackQueue = dispatch_queue_create(queueName, NULL); - packetsCallback = [=](absl::StatusOr liveStreamResult) { - [weakSelf processLiveStreamResult:liveStreamResult]; - }; - } - - _visionTaskRunner = - [[MPPVisionTaskRunner alloc] initWithCalculatorGraphConfig:[taskInfo generateGraphConfig] - runningMode:options.runningMode - packetsCallback:std::move(packetsCallback) - error:error]; - if (!_visionTaskRunner) { - return nil; - } - } - return self; -} - -- (instancetype)initWithModelPath:(NSString *)modelPath error:(NSError **)error { - MPPGestureRecognizerOptions *options = [[MPPGestureRecognizerOptions alloc] init]; - - options.baseOptions.modelAssetPath = modelPath; - - return [self initWithOptions:options error:error]; -} - -- (nullable MPPGestureRecognizerResult *)gestureRecognizerResultWithOptionalOutputPacketMap: - (std::optional &)outputPacketMap { - if (!outputPacketMap.has_value()) { - return nil; - } - MPPGestureRecognizerResult *result = - [self gestureRecognizerResultWithOutputPacketMap:outputPacketMap.value()]; - return result; -} - -- (nullable MPPGestureRecognizerResult *)recognizeImage:(MPPImage *)image error:(NSError **)error { - std::optional rect = - [_visionTaskRunner normalizedRectWithImageOrientation:image.orientation - imageSize:CGSizeMake(image.width, image.height) - error:error]; - if (!rect.has_value()) { - return nil; - } - - Packet imagePacket = [MPPVisionPacketCreator createPacketWithMPPImage:image error:error]; - if (imagePacket.IsEmpty()) { - return nil; - } - - Packet normalizedRectPacket = - [MPPVisionPacketCreator createPacketWithNormalizedRect:rect.value()]; - - PacketMap inputPacketMap = InputPacketMap(imagePacket, normalizedRectPacket); - - std::optional outputPacketMap = [_visionTaskRunner processImagePacketMap:inputPacketMap - error:error]; - return [self gestureRecognizerResultWithOptionalOutputPacketMap:outputPacketMap]; -} - -- (std::optional)inputPacketMapWithMPPImage:(MPPImage *)image - timestampInMilliseconds:(NSInteger)timestampInMilliseconds - error:(NSError **)error { - std::optional rect = - [_visionTaskRunner normalizedRectWithImageOrientation:image.orientation - imageSize:CGSizeMake(image.width, image.height) - error:error]; - if (!rect.has_value()) { - return std::nullopt; - } - - Packet imagePacket = [MPPVisionPacketCreator createPacketWithMPPImage:image - timestampInMilliseconds:timestampInMilliseconds - error:error]; - if (imagePacket.IsEmpty()) { - return std::nullopt; - } - - Packet normalizedRectPacket = - [MPPVisionPacketCreator createPacketWithNormalizedRect:rect.value() - timestampInMilliseconds:timestampInMilliseconds]; - - PacketMap inputPacketMap = InputPacketMap(imagePacket, normalizedRectPacket); - return inputPacketMap; -} - -- (nullable MPPGestureRecognizerResult *)recognizeVideoFrame:(MPPImage *)image - timestampInMilliseconds:(NSInteger)timestampInMilliseconds - error:(NSError **)error { - std::optional inputPacketMap = [self inputPacketMapWithMPPImage:image - timestampInMilliseconds:timestampInMilliseconds - error:error]; - if (!inputPacketMap.has_value()) { - return nil; - } - - std::optional outputPacketMap = - [_visionTaskRunner processVideoFramePacketMap:inputPacketMap.value() error:error]; - - return [self gestureRecognizerResultWithOptionalOutputPacketMap:outputPacketMap]; -} - -- (BOOL)recognizeAsyncImage:(MPPImage *)image - timestampInMilliseconds:(NSInteger)timestampInMilliseconds - error:(NSError **)error { - std::optional inputPacketMap = [self inputPacketMapWithMPPImage:image - timestampInMilliseconds:timestampInMilliseconds - error:error]; - if (!inputPacketMap.has_value()) { - return NO; - } - - return [_visionTaskRunner processLiveStreamPacketMap:inputPacketMap.value() error:error]; -} - -@end From 23d97292a6740216376095fe947b0159ddd4afe0 Mon Sep 17 00:00:00 2001 From: Prianka Liz Kariat Date: Fri, 26 May 2023 18:49:24 +0530 Subject: [PATCH 5/5] Updated face detector to use new methods from vision task runner --- .../ios/vision/face_detector/sources/MPPFaceDetector.mm | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/mediapipe/tasks/ios/vision/face_detector/sources/MPPFaceDetector.mm b/mediapipe/tasks/ios/vision/face_detector/sources/MPPFaceDetector.mm index ceb5c957d..7cb525fb0 100644 --- a/mediapipe/tasks/ios/vision/face_detector/sources/MPPFaceDetector.mm +++ b/mediapipe/tasks/ios/vision/face_detector/sources/MPPFaceDetector.mm @@ -128,10 +128,8 @@ static NSString *const kTaskName = @"faceDetector"; timestampInMilliseconds:(NSInteger)timestampInMilliseconds error:(NSError **)error { std::optional rect = - [_visionTaskRunner normalizedRectFromRegionOfInterest:CGRectZero + [_visionTaskRunner normalizedRectWithImageOrientation:image.orientation imageSize:CGSizeMake(image.width, image.height) - imageOrientation:image.orientation - ROIAllowed:NO error:error]; if (!rect.has_value()) { return std::nullopt; @@ -154,10 +152,8 @@ static NSString *const kTaskName = @"faceDetector"; - (nullable MPPFaceDetectorResult *)detectInImage:(MPPImage *)image error:(NSError **)error { std::optional rect = - [_visionTaskRunner normalizedRectFromRegionOfInterest:CGRectZero + [_visionTaskRunner normalizedRectWithImageOrientation:image.orientation imageSize:CGSizeMake(image.width, image.height) - imageOrientation:image.orientation - ROIAllowed:NO error:error]; if (!rect.has_value()) { return nil;