diff --git a/mediapipe/calculators/core/BUILD b/mediapipe/calculators/core/BUILD index 4e32ed59f..297a416d9 100644 --- a/mediapipe/calculators/core/BUILD +++ b/mediapipe/calculators/core/BUILD @@ -297,8 +297,7 @@ cc_library( "//mediapipe/util:render_data_cc_proto", "@org_tensorflow//tensorflow/lite:framework", ] + select({ - "//mediapipe/gpu:disable_gpu": [], - "//mediapipe:ios": [], + ":ios_or_disable_gpu": [], "//conditions:default": [ "@org_tensorflow//tensorflow/lite/delegates/gpu/gl:gl_buffer", ], diff --git a/mediapipe/calculators/tensor/BUILD b/mediapipe/calculators/tensor/BUILD index 59102585c..2ad98f28d 100644 --- a/mediapipe/calculators/tensor/BUILD +++ b/mediapipe/calculators/tensor/BUILD @@ -655,6 +655,11 @@ cc_library( ] + select({ "//mediapipe/gpu:disable_gpu": [], "//conditions:default": ["tensor_converter_calculator_gpu_deps"], + }) + select({ + "//mediapipe:apple": [ + "//third_party/apple_frameworks:MetalKit", + ], + "//conditions:default": [], }), alwayslink = 1, ) diff --git a/mediapipe/tasks/ios/vision/gesture_recognizer/BUILD b/mediapipe/tasks/ios/vision/gesture_recognizer/BUILD index efe33718f..78a07e17d 100644 --- a/mediapipe/tasks/ios/vision/gesture_recognizer/BUILD +++ b/mediapipe/tasks/ios/vision/gesture_recognizer/BUILD @@ -41,6 +41,7 @@ objc_library( objc_library( name = "MPPGestureRecognizer", + srcs = ["sources/MPPGestureRecognizer.mm"], hdrs = ["sources/MPPGestureRecognizer.h"], copts = [ "-ObjC++", @@ -51,7 +52,15 @@ objc_library( deps = [ ":MPPGestureRecognizerOptions", ":MPPGestureRecognizerResult", + "//mediapipe/tasks/cc/vision/gesture_recognizer:gesture_recognizer_graph", + "//mediapipe/tasks/ios/common/utils:MPPCommonUtils", + "//mediapipe/tasks/ios/common/utils:NSStringHelpers", + "//mediapipe/tasks/ios/core:MPPTaskInfo", "//mediapipe/tasks/ios/core:MPPTaskOptions", "//mediapipe/tasks/ios/vision/core:MPPImage", + "//mediapipe/tasks/ios/vision/core:MPPVisionPacketCreator", + "//mediapipe/tasks/ios/vision/core:MPPVisionTaskRunner", + "//mediapipe/tasks/ios/vision/gesture_recognizer/utils:MPPGestureRecognizerOptionsHelpers", + "//mediapipe/tasks/ios/vision/gesture_recognizer/utils:MPPGestureRecognizerResultHelpers", ], ) diff --git a/mediapipe/tasks/ios/vision/gesture_recognizer/sources/MPPGestureRecognizer.mm b/mediapipe/tasks/ios/vision/gesture_recognizer/sources/MPPGestureRecognizer.mm new file mode 100644 index 000000000..b722163b2 --- /dev/null +++ b/mediapipe/tasks/ios/vision/gesture_recognizer/sources/MPPGestureRecognizer.mm @@ -0,0 +1,273 @@ +// Copyright 2023 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#import "mediapipe/tasks/ios/vision/gesture_recognizer/sources/MPPGestureRecognizer.h" + +#import "mediapipe/tasks/ios/common/utils/sources/MPPCommonUtils.h" +#import "mediapipe/tasks/ios/common/utils/sources/NSString+Helpers.h" +#import "mediapipe/tasks/ios/core/sources/MPPTaskInfo.h" +#import "mediapipe/tasks/ios/vision/core/sources/MPPVisionPacketCreator.h" +#import "mediapipe/tasks/ios/vision/core/sources/MPPVisionTaskRunner.h" +#import "mediapipe/tasks/ios/vision/gesture_recognizer/utils/sources/MPPGestureRecognizerOptions+Helpers.h" +#import "mediapipe/tasks/ios/vision/gesture_recognizer/utils/sources/MPPGestureRecognizerResult+Helpers.h" + +namespace { +using ::mediapipe::NormalizedRect; +using ::mediapipe::Packet; +using ::mediapipe::Timestamp; +using ::mediapipe::tasks::core::PacketMap; +using ::mediapipe::tasks::core::PacketsCallback; +} // namespace + +static NSString *const kImageTag = @"IMAGE"; +static NSString *const kImageInStreamName = @"image_in"; +static NSString *const kNormRectTag = @"NORM_RECT"; +static NSString *const kNormRectInStreamName = @"norm_rect_in"; +static NSString *const kImageOutStreamName = @"image_out"; +static NSString *const kLandmarksTag = @"LANDMARKS"; +static NSString *const kLandmarksOutStreamName = @"hand_landmarks"; +static NSString *const kWorldLandmarksTag = @"WORLD_LANDMARKS"; +static NSString *const kWorldLandmarksOutStreamName = @"world_hand_landmarks"; +static NSString *const kHandednessTag = @"HANDEDNESS"; +static NSString *const kHandednessOutStreamName = @"handedness"; +static NSString *const kHandGesturesTag = @"HAND_GESTURES"; +static NSString *const kHandGesturesOutStreamName = @"hand_gestures"; +static NSString *const kTaskGraphName = + @"mediapipe.tasks.vision.gesture_recognizer.GestureRecognizerGraph"; +static NSString *const kTaskName = @"gestureRecognizer"; + +#define InputPacketMap(imagePacket, normalizedRectPacket) \ + { \ + {kImageInStreamName.cppString, imagePacket}, { \ + kNormRectInStreamName.cppString, normalizedRectPacket \ + } \ + } + +@interface MPPGestureRecognizer () { + /** iOS Vision Task Runner */ + MPPVisionTaskRunner *_visionTaskRunner; + dispatch_queue_t _callbackQueue; +} +@property(nonatomic, weak) id + gestureRecognizerLiveStreamDelegate; +@end + +@implementation MPPGestureRecognizer + +- (nullable MPPGestureRecognizerResult *)gestureRecognizerResultWithOutputPacketMap: + (PacketMap &)outputPacketMap { + return [MPPGestureRecognizerResult + gestureRecognizerResultWithHandGesturesPacket:outputPacketMap[kHandGesturesOutStreamName + .cppString] + handednessPacket:outputPacketMap[kHandednessOutStreamName + .cppString] + handLandmarksPacket:outputPacketMap[kLandmarksOutStreamName + .cppString] + worldLandmarksPacket:outputPacketMap[kWorldLandmarksOutStreamName + .cppString]]; +} + +- (void)processLiveStreamResult:(absl::StatusOr)liveStreamResult { + if (![self.gestureRecognizerLiveStreamDelegate + respondsToSelector:@selector(gestureRecognizer: + didFinishRecognitionWithResult:timestampInMilliseconds:error:)]) { + return; + } + + NSError *callbackError = nil; + if (![MPPCommonUtils checkCppError:liveStreamResult.status() toError:&callbackError]) { + dispatch_async(_callbackQueue, ^{ + [self.gestureRecognizerLiveStreamDelegate gestureRecognizer:self + didFinishRecognitionWithResult:nil + timestampInMilliseconds:Timestamp::Unset().Value() + error:callbackError]; + }); + return; + } + + PacketMap &outputPacketMap = liveStreamResult.value(); + if (outputPacketMap[kImageOutStreamName.cppString].IsEmpty()) { + return; + } + + MPPGestureRecognizerResult *result = + [self gestureRecognizerResultWithOutputPacketMap:outputPacketMap]; + + NSInteger timeStampInMilliseconds = + outputPacketMap[kImageOutStreamName.cppString].Timestamp().Value() / + kMicroSecondsPerMilliSecond; + dispatch_async(_callbackQueue, ^{ + [self.gestureRecognizerLiveStreamDelegate gestureRecognizer:self + didFinishRecognitionWithResult:result + timestampInMilliseconds:timeStampInMilliseconds + error:callbackError]; + }); +} + +- (instancetype)initWithOptions:(MPPGestureRecognizerOptions *)options error:(NSError **)error { + self = [super init]; + if (self) { + MPPTaskInfo *taskInfo = [[MPPTaskInfo alloc] + initWithTaskGraphName:kTaskGraphName + inputStreams:@[ + [NSString stringWithFormat:@"%@:%@", kImageTag, kImageInStreamName], + [NSString stringWithFormat:@"%@:%@", kNormRectTag, kNormRectInStreamName] + ] + outputStreams:@[ + [NSString stringWithFormat:@"%@:%@", kLandmarksTag, kLandmarksOutStreamName], + [NSString + stringWithFormat:@"%@:%@", kWorldLandmarksTag, kWorldLandmarksOutStreamName], + [NSString stringWithFormat:@"%@:%@", kHandednessTag, kHandednessOutStreamName], + [NSString + stringWithFormat:@"%@:%@", kHandGesturesTag, kHandGesturesOutStreamName], + [NSString stringWithFormat:@"%@:%@", kImageTag, kImageOutStreamName] + ] + taskOptions:options + enableFlowLimiting:options.runningMode == MPPRunningModeLiveStream + error:error]; + + if (!taskInfo) { + return nil; + } + + PacketsCallback packetsCallback = nullptr; + + if (options.gestureRecognizerLiveStreamDelegate) { + _gestureRecognizerLiveStreamDelegate = options.gestureRecognizerLiveStreamDelegate; + + // Create a private serial dispatch queue in which the deleagte method will be called + // asynchronously. This is to ensure that if the client performs a long running operation in + // the delegate method, the queue on which the C++ callbacks is invoked is not blocked and is + // freed up to continue with its operations. + _callbackQueue = dispatch_queue_create( + [MPPVisionTaskRunner uniqueDispatchQueueNameWithSuffix:kTaskName], NULL); + + // Capturing `self` as weak in order to avoid `self` being kept in memory + // and cause a retain cycle, after self is set to `nil`. + MPPGestureRecognizer *__weak weakSelf = self; + packetsCallback = [=](absl::StatusOr liveStreamResult) { + [weakSelf processLiveStreamResult:liveStreamResult]; + }; + } + + _visionTaskRunner = + [[MPPVisionTaskRunner alloc] initWithCalculatorGraphConfig:[taskInfo generateGraphConfig] + runningMode:options.runningMode + packetsCallback:std::move(packetsCallback) + error:error]; + if (!_visionTaskRunner) { + return nil; + } + } + return self; +} + +- (instancetype)initWithModelPath:(NSString *)modelPath error:(NSError **)error { + MPPGestureRecognizerOptions *options = [[MPPGestureRecognizerOptions alloc] init]; + + options.baseOptions.modelAssetPath = modelPath; + + return [self initWithOptions:options error:error]; +} + +- (nullable MPPGestureRecognizerResult *)gestureRecognizerResultWithOptionalOutputPacketMap: + (std::optional &)outputPacketMap { + if (!outputPacketMap.has_value()) { + return nil; + } + MPPGestureRecognizerResult *result = + [self gestureRecognizerResultWithOutputPacketMap:outputPacketMap.value()]; + return result; +} + +- (nullable MPPGestureRecognizerResult *)recognizeImage:(MPPImage *)image error:(NSError **)error { + std::optional rect = + [_visionTaskRunner normalizedRectWithImageOrientation:image.orientation + imageSize:CGSizeMake(image.width, image.height) + error:error]; + if (!rect.has_value()) { + return nil; + } + + Packet imagePacket = [MPPVisionPacketCreator createPacketWithMPPImage:image error:error]; + if (imagePacket.IsEmpty()) { + return nil; + } + + Packet normalizedRectPacket = + [MPPVisionPacketCreator createPacketWithNormalizedRect:rect.value()]; + + PacketMap inputPacketMap = InputPacketMap(imagePacket, normalizedRectPacket); + + std::optional outputPacketMap = [_visionTaskRunner processImagePacketMap:inputPacketMap + error:error]; + return [self gestureRecognizerResultWithOptionalOutputPacketMap:outputPacketMap]; +} + +- (std::optional)inputPacketMapWithMPPImage:(MPPImage *)image + timestampInMilliseconds:(NSInteger)timestampInMilliseconds + error:(NSError **)error { + std::optional rect = + [_visionTaskRunner normalizedRectWithImageOrientation:image.orientation + imageSize:CGSizeMake(image.width, image.height) + error:error]; + if (!rect.has_value()) { + return std::nullopt; + } + + Packet imagePacket = [MPPVisionPacketCreator createPacketWithMPPImage:image + timestampInMilliseconds:timestampInMilliseconds + error:error]; + if (imagePacket.IsEmpty()) { + return std::nullopt; + } + + Packet normalizedRectPacket = + [MPPVisionPacketCreator createPacketWithNormalizedRect:rect.value() + timestampInMilliseconds:timestampInMilliseconds]; + + PacketMap inputPacketMap = InputPacketMap(imagePacket, normalizedRectPacket); + return inputPacketMap; +} + +- (nullable MPPGestureRecognizerResult *)recognizeVideoFrame:(MPPImage *)image + timestampInMilliseconds:(NSInteger)timestampInMilliseconds + error:(NSError **)error { + std::optional inputPacketMap = [self inputPacketMapWithMPPImage:image + timestampInMilliseconds:timestampInMilliseconds + error:error]; + if (!inputPacketMap.has_value()) { + return nil; + } + + std::optional outputPacketMap = + [_visionTaskRunner processVideoFramePacketMap:inputPacketMap.value() error:error]; + + return [self gestureRecognizerResultWithOptionalOutputPacketMap:outputPacketMap]; +} + +- (BOOL)recognizeAsyncImage:(MPPImage *)image + timestampInMilliseconds:(NSInteger)timestampInMilliseconds + error:(NSError **)error { + std::optional inputPacketMap = [self inputPacketMapWithMPPImage:image + timestampInMilliseconds:timestampInMilliseconds + error:error]; + if (!inputPacketMap.has_value()) { + return NO; + } + + return [_visionTaskRunner processLiveStreamPacketMap:inputPacketMap.value() error:error]; +} + +@end