diff --git a/mediapipe/tasks/ios/vision/core/BUILD b/mediapipe/tasks/ios/vision/core/BUILD index a08b2dd6c..7c3124566 100644 --- a/mediapipe/tasks/ios/vision/core/BUILD +++ b/mediapipe/tasks/ios/vision/core/BUILD @@ -65,6 +65,30 @@ objc_library( ], ) +objc_library( + name = "MPPVisionTaskRunnerRefactored", + srcs = ["sources/MPPVisionTaskRunnerRefactored.mm"], + hdrs = ["sources/MPPVisionTaskRunnerRefactored.h"], + copts = [ + "-ObjC++", + "-std=c++17", + ], + deps = [ + ":MPPImage", + ":MPPRunningMode", + ":MPPVisionPacketCreator", + "//mediapipe/calculators/core:flow_limiter_calculator", + "//mediapipe/framework/formats:rect_cc_proto", + "//mediapipe/tasks/ios/common:MPPCommon", + "//mediapipe/tasks/ios/common/utils:MPPCommonUtils", + "//mediapipe/tasks/ios/common/utils:NSStringHelpers", + "//mediapipe/tasks/ios/core:MPPTaskInfo", + "//mediapipe/tasks/ios/core:MPPTaskRunner", + "//third_party/apple_frameworks:UIKit", + "@com_google_absl//absl/status:statusor", + ], +) + objc_library( name = "MPPMask", srcs = ["sources/MPPMask.mm"], diff --git a/mediapipe/tasks/ios/vision/core/sources/MPPVisionTaskRunnerRefactored.h b/mediapipe/tasks/ios/vision/core/sources/MPPVisionTaskRunnerRefactored.h new file mode 100644 index 000000000..aa0307d71 --- /dev/null +++ b/mediapipe/tasks/ios/vision/core/sources/MPPVisionTaskRunnerRefactored.h @@ -0,0 +1,218 @@ +// Copyright 2023 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#import +#import + +#import "mediapipe/tasks/ios/core/sources/MPPTaskInfo.h" +#import "mediapipe/tasks/ios/core/sources/MPPTaskRunner.h" +#import "mediapipe/tasks/ios/vision/core/sources/MPPImage.h" +#import "mediapipe/tasks/ios/vision/core/sources/MPPRunningMode.h" + +NS_ASSUME_NONNULL_BEGIN + +/** + * This class is used to create and call appropriate methods on the C++ Task Runner to initialize, + * execute and terminate any MediaPipe vision task. + */ +@interface MPPVisionTaskRunner : MPPTaskRunner + +/** + * Initializes a new `MPPVisionTaskRunner` with the taskInfo, running mode, whether task supports + * region of interest, packets callback, image and norm rect input stream names. Make sure that the + * packets callback is set properly based on the vision task's running mode. In case of live stream + * running mode, a C++ packets callback that is intended to deliver inference results must be + * provided. In case of image or video running mode, packets callback must be set to nil. + * + * @param taskInfo A `MPPTaskInfo` initialized by the task. + * @param runningMode MediaPipe vision task running mode. + * @param roiAllowed A `BOOL` indicating if the task supports region of interest. + * @param packetsCallback An optional C++ callback function that takes a list of output packets as + * the input argument. If provided, the callback must in turn call the block provided by the user in + * the appropriate task options. Make sure that the packets callback is set properly based on the + * vision task's running mode. In case of live stream running mode, a C++ packets callback that is + * intended to deliver inference results must be provided. In case of image or video running mode, + * packets callback must be set to nil. + * @param imageInputStreamName Name of the image input stream of the task. + * @param normRectInputStreamName Name of the norm rect input stream of the task. + * + * @param error Pointer to the memory location where errors if any should be saved. If @c NULL, no + * error will be saved. + * + * @return An instance of `MPPVisionTaskRunner` initialized with the given the taskInfo, running + * mode, whether task supports region of interest, packets callback, image and norm rect input + * stream names. + */ + +- (nullable instancetype)initWithTaskInfo:(MPPTaskInfo *)taskInfo + runningMode:(MPPRunningMode)runningMode + roiAllowed:(BOOL)roiAllowed + packetsCallback:(mediapipe::tasks::core::PacketsCallback)packetsCallback + imageInputStreamName:(NSString *)imageInputStreamName + normRectInputStreamName:(NSString *)normRectInputStreamName + error:(NSError **)error NS_DESIGNATED_INITIALIZER; + +/** + * A synchronous method to invoke the C++ task runner to process single image inputs. The call + * blocks the current thread until a failure status or a successful result is returned. + * + * This method must be used by tasks when region of interest must not be factored in for inference. + * + * @param image An `MPPImage` input to the task. + * @param error Pointer to the memory location where errors if any should be + * saved. If @c NULL, no error will be saved. + * + * @return An optional `PacketMap` containing pairs of output stream name and data packet. + */ +- (std::optional)processImage:(MPPImage *)image + error:(NSError **)error; + +/** + * A synchronous method to invoke the C++ task runner to process single image inputs. The call + * blocks the current thread until a failure status or a successful result is returned. + * + * This method must be used by tasks when region of interest must be factored in for inference. + * When tasks which do not support region of interest calls this method in combination with any roi + * other than `CGRectZero` an error is returned. + * + * @param image An `MPPImage` input to the task. + * @param regionOfInterest A `CGRect` specifying the region of interest within the given image data + * of type `MPPImage`, on which inference should be performed. + * @param error Pointer to the memory location where errors if any should be + * saved. If @c NULL, no error will be saved. + * + * @return An optional `PacketMap` containing pairs of output stream name and data packet. + */ +- (std::optional)processImage:(MPPImage *)image + regionOfInterest:(CGRect)regionOfInterest + error:(NSError **)error; + +/** + * A synchronous method to invoke the C++ task runner to process continuous video frames. The call + * blocks the current thread until a failure status or a successful result is returned. + * + * This method must be used by tasks when region of interest must not be factored in for inference. + * + * @param videoFrame An `MPPImage` input to the task. + * @param timestampInMilliseconds The video frame's timestamp (in milliseconds). The input + * timestamps must be monotonically increasing. + * @param error Pointer to the memory location where errors if any should be saved. If @c NULL, no + * error will be saved. + * + * @return An optional `PacketMap` containing pairs of output stream name and data packet. + */ +- (std::optional)processVideoFrame:(MPPImage *)videoFrame + timestampInMilliseconds: + (NSInteger)timeStampInMilliseconds + error:(NSError **)error; + +/** + * A synchronous method to invoke the C++ task runner to process continuous video frames. The call + * blocks the current thread until a failure status or a successful result is returned. + * + * This method must be used by tasks when region of interest must be factored in for inference. + * When tasks which do not support region of interest calls this method in combination with any roi + * other than `CGRectZero` an error is returned. + * + * @param videoFrame An `MPPImage` input to the task. + * @param timestampInMilliseconds The video frame's timestamp (in milliseconds). The input + * timestamps must be monotonically increasing. + * @param regionOfInterest A `CGRect` specifying the region of interest within the given image data + * of type `MPPImage`, on which inference should be performed. + * @param error Pointer to the memory location where errors if any should be saved. If @c NULL, no + * error will be saved. + * + * @return An optional `PacketMap` containing pairs of output stream name and data packet. + */ +- (std::optional)processVideoFrame:(MPPImage *)videoFrame + regionOfInterest:(CGRect)regionOfInterest + timestampInMilliseconds: + (NSInteger)timeStampInMilliseconds + error:(NSError **)error; + +/** + * An asynchronous method to send live stream data to the C++ task runner. The call blocks the + * current thread until a failure status or a successful result is returned. The results will be + * available in the user-defined `packetsCallback` that was provided during initialization of the + * `MPPVisionTaskRunner`. + * + * This method must be used by tasks when region of interest must not be factored in for inference. + * + * @param image An `MPPImage` input to the task. + * @param timestampInMilliseconds The video frame's timestamp (in milliseconds). The input + * timestamps must be monotonically increasing. + * @param error Pointer to the memory location where errors if any should be saved. If @c NULL, no + * error will be saved. + * + * @return A `BOOL` indicating if the live stream data was sent to the C++ task runner successfully. + * Please note that any errors during processing of the live stream packet map will only be + * available in the user-defined `packetsCallback` that was provided during initialization of the + * `MPPVisionTaskRunner`. + */ +- (BOOL)processLiveStreamImage:(MPPImage *)image + timestampInMilliseconds:(NSInteger)timeStampInMilliseconds + error:(NSError **)error; + +/** + * An asynchronous method to send live stream data to the C++ task runner. The call blocks the + * current thread until a failure status or a successful result is returned. The results will be + * available in the user-defined `packetsCallback` that was provided during initialization of the + * `MPPVisionTaskRunner`. + * + * This method must be used by tasks when region of interest must not be factored in for inference. + * + * @param image An `MPPImage` input to the task. + * @param regionOfInterest A `CGRect` specifying the region of interest within the given image data + * of type `MPPImage`, on which inference should be performed. + * @param timestampInMilliseconds The video frame's timestamp (in milliseconds). The input + * timestamps must be monotonically increasing. + * @param error Pointer to the memory location where errors if any should be saved. If @c NULL, no + * error will be saved. + * + * @return A `BOOL` indicating if the live stream data was sent to the C++ task runner successfully. + * Please note that any errors during processing of the live stream packet map will only be + * available in the user-defined `packetsCallback` that was provided during initialization of the + * `MPPVisionTaskRunner`. + */ +- (BOOL)processLiveStreamImage:(MPPImage *)image + regionOfInterest:(CGRect)regionOfInterest + timestampInMilliseconds:(NSInteger)timeStampInMilliseconds + error:(NSError **)error; + +/** + * This method returns a unique dispatch queue name by adding the given suffix and a `UUID` to the + * pre-defined queue name prefix for vision tasks. The vision tasks can use this method to get + * unique dispatch queue names which are consistent with other vision tasks. + * Dispatch queue names need not be unique, but for easy debugging we ensure that the queue names + * are unique. + * + * @param suffix A suffix that identifies a dispatch queue's functionality. + * + * @return A unique dispatch queue name by adding the given suffix and a `UUID` to the pre-defined + * queue name prefix for vision tasks. + */ ++ (const char *)uniqueDispatchQueueNameWithSuffix:(NSString *)suffix; + +- (instancetype)initWithCalculatorGraphConfig:(mediapipe::CalculatorGraphConfig)graphConfig + packetsCallback: + (mediapipe::tasks::core::PacketsCallback)packetsCallback + error:(NSError **)error NS_UNAVAILABLE; + +- (instancetype)init NS_UNAVAILABLE; + ++ (instancetype)new NS_UNAVAILABLE; + +@end + +NS_ASSUME_NONNULL_END diff --git a/mediapipe/tasks/ios/vision/core/sources/MPPVisionTaskRunnerRefactored.mm b/mediapipe/tasks/ios/vision/core/sources/MPPVisionTaskRunnerRefactored.mm new file mode 100644 index 000000000..8a42175c2 --- /dev/null +++ b/mediapipe/tasks/ios/vision/core/sources/MPPVisionTaskRunnerRefactored.mm @@ -0,0 +1,331 @@ +// Copyright 2023 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#import "mediapipe/tasks/ios/vision/core/sources/MPPVisionTaskRunnerRefactored.h" + +#import "mediapipe/tasks/ios/common/sources/MPPCommon.h" +#import "mediapipe/tasks/ios/common/utils/sources/MPPCommonUtils.h" +#import "mediapipe/tasks/ios/common/utils/sources/NSString+Helpers.h" +#import "mediapipe/tasks/ios/core/sources/MPPTaskInfo.h" +#import "mediapipe/tasks/ios/vision/core/sources/MPPVisionPacketCreator.h" + +#include "absl/status/statusor.h" +#include "mediapipe/framework/formats/rect.pb.h" + +#include + +namespace { +using ::mediapipe::NormalizedRect; +using ::mediapipe::Packet; +using ::mediapipe::tasks::core::PacketMap; +using ::mediapipe::tasks::core::PacketsCallback; +} // namespace + +/** Rotation degrees for a 90 degree rotation to the right. */ +static const NSInteger kMPPOrientationDegreesRight = -270; + +/** Rotation degrees for a 180 degree rotation. */ +static const NSInteger kMPPOrientationDegreesDown = -180; + +/** Rotation degrees for a 90 degree rotation to the left. */ +static const NSInteger kMPPOrientationDegreesLeft = -90; + +static NSString *const kTaskPrefix = @"com.mediapipe.tasks.vision"; + +#define InputPacketMap(imagePacket, normalizedRectPacket) \ + { \ + {_imageInStreamName, imagePacket}, { _normRectInStreamName, normalizedRectPacket } \ + } + +@interface MPPVisionTaskRunner () { + MPPRunningMode _runningMode; + BOOL _roiAllowed; + std::string _imageInStreamName; + std::string _normRectInStreamName; +} +@end + +@implementation MPPVisionTaskRunner + +- (nullable instancetype)initWithTaskInfo:(MPPTaskInfo *)taskInfo + runningMode:(MPPRunningMode)runningMode + roiAllowed:(BOOL)roiAllowed + packetsCallback:(PacketsCallback)packetsCallback + imageInputStreamName:(NSString *)imageInputStreamName + normRectInputStreamName:(NSString *)normRectInputStreamName + error:(NSError **)error { + _roiAllowed = roiAllowed; + _imageInStreamName = imageInputStreamName.cppString; + _normRectInStreamName = normRectInputStreamName.cppString; + + switch (runningMode) { + case MPPRunningModeImage: + case MPPRunningModeVideo: { + if (packetsCallback) { + [MPPCommonUtils createCustomError:error + withCode:MPPTasksErrorCodeInvalidArgumentError + description:@"The vision task is in image or video mode. The " + @"delegate must not be set in the task's options."]; + return nil; + } + break; + } + case MPPRunningModeLiveStream: { + if (!packetsCallback) { + [MPPCommonUtils + createCustomError:error + withCode:MPPTasksErrorCodeInvalidArgumentError + description: + @"The vision task is in live stream mode. An object must be set as the " + @"delegate of the task in its options to ensure asynchronous delivery of " + @"results."]; + return nil; + } + break; + } + default: { + [MPPCommonUtils createCustomError:error + withCode:MPPTasksErrorCodeInvalidArgumentError + description:@"Unrecognized running mode"]; + return nil; + } + } + + _runningMode = runningMode; + self = [super initWithCalculatorGraphConfig: [taskInfo generateGraphConfig] + packetsCallback:packetsCallback + error:error]; + return self; +} + +- (std::optional)normalizedRectWithRegionOfInterest:(CGRect)roi + imageSize:(CGSize)imageSize + imageOrientation: + (UIImageOrientation)imageOrientation + error:(NSError **)error { + if (!CGRectEqualToRect(roi, CGRectZero) && !_roiAllowed) { + [MPPCommonUtils createCustomError:error + withCode:MPPTasksErrorCodeInvalidArgumentError + description:@"This task doesn't support region-of-interest."]; + return std::nullopt; + } + + CGRect calculatedRoi = CGRectEqualToRect(roi, CGRectZero) ? CGRectMake(0.0, 0.0, 1.0, 1.0) : roi; + + NormalizedRect normalizedRect; + normalizedRect.set_x_center(CGRectGetMidX(calculatedRoi)); + normalizedRect.set_y_center(CGRectGetMidY(calculatedRoi)); + + int rotationDegrees = 0; + switch (imageOrientation) { + case UIImageOrientationUp: + break; + case UIImageOrientationRight: { + rotationDegrees = kMPPOrientationDegreesRight; + break; + } + case UIImageOrientationDown: { + rotationDegrees = kMPPOrientationDegreesDown; + break; + } + case UIImageOrientationLeft: { + rotationDegrees = kMPPOrientationDegreesLeft; + break; + } + default: + [MPPCommonUtils + createCustomError:error + withCode:MPPTasksErrorCodeInvalidArgumentError + description: + @"Unsupported UIImageOrientation. `imageOrientation` cannot be equal to " + @"any of the mirrored orientations " + @"(`UIImageOrientationUpMirrored`,`UIImageOrientationDownMirrored`,`" + @"UIImageOrientationLeftMirrored`,`UIImageOrientationRightMirrored`)"]; + } + + normalizedRect.set_rotation(rotationDegrees * M_PI / kMPPOrientationDegreesDown); + + // For 90° and 270° rotations, we need to swap width and height. + // This is due to the internal behavior of ImageToTensorCalculator, which: + // - first denormalizes the provided rect by multiplying the rect width or height by the image + // width or height, respectively. + // - then rotates this by denormalized rect by the provided rotation, and uses this for cropping, + // - then finally rotates this back. + if (rotationDegrees % 180 == 0) { + normalizedRect.set_width(CGRectGetWidth(calculatedRoi)); + normalizedRect.set_height(CGRectGetHeight(calculatedRoi)); + } else { + const float width = CGRectGetHeight(calculatedRoi) * imageSize.height / imageSize.width; + const float height = CGRectGetWidth(calculatedRoi) * imageSize.width / imageSize.height; + + normalizedRect.set_width(width); + normalizedRect.set_height(height); + } + + return normalizedRect; +} + +- (std::optional)inputPacketMapWithMPPImage:(MPPImage *)image + regionOfInterest:(CGRect)roi + error:(NSError **)error { + std::optional rect = + [self normalizedRectWithRegionOfInterest:roi + imageSize:CGSizeMake(image.width, image.height) + imageOrientation:image.orientation + error:error]; + if (!rect.has_value()) { + return std::nullopt; + } + + Packet imagePacket = [MPPVisionPacketCreator createPacketWithMPPImage:image error:error]; + if (imagePacket.IsEmpty()) { + return std::nullopt; + } + + Packet normalizedRectPacket = + [MPPVisionPacketCreator createPacketWithNormalizedRect:rect.value()]; + + PacketMap inputPacketMap = InputPacketMap(imagePacket, normalizedRectPacket); + return inputPacketMap; +} + +- (std::optional)inputPacketMapWithMPPImage:(MPPImage *)image + regionOfInterest:(CGRect)roi + timestampInMilliseconds:(NSInteger)timestampInMilliseconds + error:(NSError **)error { + std::optional rect = + [self normalizedRectWithRegionOfInterest:roi + imageSize:CGSizeMake(image.width, image.height) + imageOrientation:image.orientation + error:error]; + if (!rect.has_value()) { + return std::nullopt; + } + + Packet imagePacket = [MPPVisionPacketCreator createPacketWithMPPImage:image + timestampInMilliseconds:timestampInMilliseconds + error:error]; + if (imagePacket.IsEmpty()) { + return std::nullopt; + } + + Packet normalizedRectPacket = + [MPPVisionPacketCreator createPacketWithNormalizedRect:rect.value() + timestampInMilliseconds:timestampInMilliseconds]; + + PacketMap inputPacketMap = InputPacketMap(imagePacket, normalizedRectPacket); + return inputPacketMap; +} + +- (std::optional)processImage:(MPPImage *)image + regionOfInterest:(CGRect)regionOfInterest + error:(NSError **)error { + if (_runningMode != MPPRunningModeImage) { + [MPPCommonUtils + createCustomError:error + withCode:MPPTasksErrorCodeInvalidArgumentError + description:[NSString stringWithFormat:@"The vision task is not initialized with " + @"image mode. Current Running Mode: %@", + MPPRunningModeDisplayName(_runningMode)]]; + return std::nullopt; + } + + std::optional inputPacketMap = [self inputPacketMapWithMPPImage:image + regionOfInterest:regionOfInterest + error:error]; + if (!inputPacketMap.has_value()) { + return std::nullopt; + } + + return [self processPacketMap:inputPacketMap.value() error:error]; +} + +- (std::optional)processImage:(MPPImage *)image error:(NSError **)error { + return [self processImage:image regionOfInterest:CGRectZero error:error]; +} + +- (std::optional)processVideoFrame:(MPPImage *)videoFrame + regionOfInterest:(CGRect)regionOfInterest + timestampInMilliseconds:(NSInteger)timestampInMilliseconds + error:(NSError **)error { + if (_runningMode != MPPRunningModeVideo) { + [MPPCommonUtils + createCustomError:error + withCode:MPPTasksErrorCodeInvalidArgumentError + description:[NSString stringWithFormat:@"The vision task is not initialized with " + @"video mode. Current Running Mode: %@", + MPPRunningModeDisplayName(_runningMode)]]; + return std::nullopt; + } + + std::optional inputPacketMap = [self inputPacketMapWithMPPImage:videoFrame + regionOfInterest:regionOfInterest + timestampInMilliseconds:timestampInMilliseconds + error:error]; + if (!inputPacketMap.has_value()) { + return std::nullopt; + } + + return [self processPacketMap:inputPacketMap.value() error:error]; +} + +- (std::optional)processVideoFrame:(MPPImage *)videoFrame + timestampInMilliseconds:(NSInteger)timestampInMilliseconds + error:(NSError **)error { + return [self processVideoFrame:videoFrame + regionOfInterest:CGRectZero + timestampInMilliseconds:timestampInMilliseconds + error:error]; +} + +- (BOOL)processLiveStreamImage:(MPPImage *)image + regionOfInterest:(CGRect)regionOfInterest + timestampInMilliseconds:(NSInteger)timestampInMilliseconds + error:(NSError **)error { + if (_runningMode != MPPRunningModeLiveStream) { + [MPPCommonUtils + createCustomError:error + withCode:MPPTasksErrorCodeInvalidArgumentError + description:[NSString stringWithFormat:@"The vision task is not initialized with " + @"live stream mode. Current Running Mode: %@", + MPPRunningModeDisplayName(_runningMode)]]; + return NO; + } + + std::optional inputPacketMap = [self inputPacketMapWithMPPImage:image + regionOfInterest:regionOfInterest + timestampInMilliseconds:timestampInMilliseconds + error:error]; + if (!inputPacketMap.has_value()) { + return NO; + } + + return [self sendPacketMap:inputPacketMap.value() error:error]; +} + +- (BOOL)processLiveStreamImage:(MPPImage *)image + timestampInMilliseconds:(NSInteger)timestampInMilliseconds + error:(NSError **)error { + return [self processLiveStreamImage:image + regionOfInterest:CGRectZero + timestampInMilliseconds:timestampInMilliseconds + error:error]; +} + ++ (const char *)uniqueDispatchQueueNameWithSuffix:(NSString *)suffix { + return [NSString stringWithFormat:@"%@.%@_%@", kTaskPrefix, suffix, [NSString uuidString]] + .UTF8String; +} + +@end