Merge pull request #4645 from priankakariatyml:ios-vision-task-runner-refactoring

PiperOrigin-RevId: 557894034
This commit is contained in:
Copybara-Service 2023-08-17 12:02:07 -07:00
commit 6866d338e0
3 changed files with 573 additions and 0 deletions

View File

@ -65,6 +65,30 @@ objc_library(
],
)
objc_library(
name = "MPPVisionTaskRunnerRefactored",
srcs = ["sources/MPPVisionTaskRunnerRefactored.mm"],
hdrs = ["sources/MPPVisionTaskRunnerRefactored.h"],
copts = [
"-ObjC++",
"-std=c++17",
],
deps = [
":MPPImage",
":MPPRunningMode",
":MPPVisionPacketCreator",
"//mediapipe/calculators/core:flow_limiter_calculator",
"//mediapipe/framework/formats:rect_cc_proto",
"//mediapipe/tasks/ios/common:MPPCommon",
"//mediapipe/tasks/ios/common/utils:MPPCommonUtils",
"//mediapipe/tasks/ios/common/utils:NSStringHelpers",
"//mediapipe/tasks/ios/core:MPPTaskInfo",
"//mediapipe/tasks/ios/core:MPPTaskRunner",
"//third_party/apple_frameworks:UIKit",
"@com_google_absl//absl/status:statusor",
],
)
objc_library(
name = "MPPMask",
srcs = ["sources/MPPMask.mm"],

View File

@ -0,0 +1,218 @@
// Copyright 2023 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#import <Foundation/Foundation.h>
#import <UIKit/UIKit.h>
#import "mediapipe/tasks/ios/core/sources/MPPTaskInfo.h"
#import "mediapipe/tasks/ios/core/sources/MPPTaskRunner.h"
#import "mediapipe/tasks/ios/vision/core/sources/MPPImage.h"
#import "mediapipe/tasks/ios/vision/core/sources/MPPRunningMode.h"
NS_ASSUME_NONNULL_BEGIN
/**
* This class is used to create and call appropriate methods on the C++ Task Runner to initialize,
* execute and terminate any MediaPipe vision task.
*/
@interface MPPVisionTaskRunner : MPPTaskRunner
/**
* Initializes a new `MPPVisionTaskRunner` with the taskInfo, running mode, whether task supports
* region of interest, packets callback, image and norm rect input stream names. Make sure that the
* packets callback is set properly based on the vision task's running mode. In case of live stream
* running mode, a C++ packets callback that is intended to deliver inference results must be
* provided. In case of image or video running mode, packets callback must be set to nil.
*
* @param taskInfo A `MPPTaskInfo` initialized by the task.
* @param runningMode MediaPipe vision task running mode.
* @param roiAllowed A `BOOL` indicating if the task supports region of interest.
* @param packetsCallback An optional C++ callback function that takes a list of output packets as
* the input argument. If provided, the callback must in turn call the block provided by the user in
* the appropriate task options. Make sure that the packets callback is set properly based on the
* vision task's running mode. In case of live stream running mode, a C++ packets callback that is
* intended to deliver inference results must be provided. In case of image or video running mode,
* packets callback must be set to nil.
* @param imageInputStreamName Name of the image input stream of the task.
* @param normRectInputStreamName Name of the norm rect input stream of the task.
*
* @param error Pointer to the memory location where errors if any should be saved. If @c NULL, no
* error will be saved.
*
* @return An instance of `MPPVisionTaskRunner` initialized with the given the taskInfo, running
* mode, whether task supports region of interest, packets callback, image and norm rect input
* stream names.
*/
- (nullable instancetype)initWithTaskInfo:(MPPTaskInfo *)taskInfo
runningMode:(MPPRunningMode)runningMode
roiAllowed:(BOOL)roiAllowed
packetsCallback:(mediapipe::tasks::core::PacketsCallback)packetsCallback
imageInputStreamName:(NSString *)imageInputStreamName
normRectInputStreamName:(NSString *)normRectInputStreamName
error:(NSError **)error NS_DESIGNATED_INITIALIZER;
/**
* A synchronous method to invoke the C++ task runner to process single image inputs. The call
* blocks the current thread until a failure status or a successful result is returned.
*
* This method must be used by tasks when region of interest must not be factored in for inference.
*
* @param image An `MPPImage` input to the task.
* @param error Pointer to the memory location where errors if any should be
* saved. If @c NULL, no error will be saved.
*
* @return An optional `PacketMap` containing pairs of output stream name and data packet.
*/
- (std::optional<mediapipe::tasks::core::PacketMap>)processImage:(MPPImage *)image
error:(NSError **)error;
/**
* A synchronous method to invoke the C++ task runner to process single image inputs. The call
* blocks the current thread until a failure status or a successful result is returned.
*
* This method must be used by tasks when region of interest must be factored in for inference.
* When tasks which do not support region of interest calls this method in combination with any roi
* other than `CGRectZero` an error is returned.
*
* @param image An `MPPImage` input to the task.
* @param regionOfInterest A `CGRect` specifying the region of interest within the given image data
* of type `MPPImage`, on which inference should be performed.
* @param error Pointer to the memory location where errors if any should be
* saved. If @c NULL, no error will be saved.
*
* @return An optional `PacketMap` containing pairs of output stream name and data packet.
*/
- (std::optional<mediapipe::tasks::core::PacketMap>)processImage:(MPPImage *)image
regionOfInterest:(CGRect)regionOfInterest
error:(NSError **)error;
/**
* A synchronous method to invoke the C++ task runner to process continuous video frames. The call
* blocks the current thread until a failure status or a successful result is returned.
*
* This method must be used by tasks when region of interest must not be factored in for inference.
*
* @param videoFrame An `MPPImage` input to the task.
* @param timestampInMilliseconds The video frame's timestamp (in milliseconds). The input
* timestamps must be monotonically increasing.
* @param error Pointer to the memory location where errors if any should be saved. If @c NULL, no
* error will be saved.
*
* @return An optional `PacketMap` containing pairs of output stream name and data packet.
*/
- (std::optional<mediapipe::tasks::core::PacketMap>)processVideoFrame:(MPPImage *)videoFrame
timestampInMilliseconds:
(NSInteger)timeStampInMilliseconds
error:(NSError **)error;
/**
* A synchronous method to invoke the C++ task runner to process continuous video frames. The call
* blocks the current thread until a failure status or a successful result is returned.
*
* This method must be used by tasks when region of interest must be factored in for inference.
* When tasks which do not support region of interest calls this method in combination with any roi
* other than `CGRectZero` an error is returned.
*
* @param videoFrame An `MPPImage` input to the task.
* @param timestampInMilliseconds The video frame's timestamp (in milliseconds). The input
* timestamps must be monotonically increasing.
* @param regionOfInterest A `CGRect` specifying the region of interest within the given image data
* of type `MPPImage`, on which inference should be performed.
* @param error Pointer to the memory location where errors if any should be saved. If @c NULL, no
* error will be saved.
*
* @return An optional `PacketMap` containing pairs of output stream name and data packet.
*/
- (std::optional<mediapipe::tasks::core::PacketMap>)processVideoFrame:(MPPImage *)videoFrame
regionOfInterest:(CGRect)regionOfInterest
timestampInMilliseconds:
(NSInteger)timeStampInMilliseconds
error:(NSError **)error;
/**
* An asynchronous method to send live stream data to the C++ task runner. The call blocks the
* current thread until a failure status or a successful result is returned. The results will be
* available in the user-defined `packetsCallback` that was provided during initialization of the
* `MPPVisionTaskRunner`.
*
* This method must be used by tasks when region of interest must not be factored in for inference.
*
* @param image An `MPPImage` input to the task.
* @param timestampInMilliseconds The video frame's timestamp (in milliseconds). The input
* timestamps must be monotonically increasing.
* @param error Pointer to the memory location where errors if any should be saved. If @c NULL, no
* error will be saved.
*
* @return A `BOOL` indicating if the live stream data was sent to the C++ task runner successfully.
* Please note that any errors during processing of the live stream packet map will only be
* available in the user-defined `packetsCallback` that was provided during initialization of the
* `MPPVisionTaskRunner`.
*/
- (BOOL)processLiveStreamImage:(MPPImage *)image
timestampInMilliseconds:(NSInteger)timeStampInMilliseconds
error:(NSError **)error;
/**
* An asynchronous method to send live stream data to the C++ task runner. The call blocks the
* current thread until a failure status or a successful result is returned. The results will be
* available in the user-defined `packetsCallback` that was provided during initialization of the
* `MPPVisionTaskRunner`.
*
* This method must be used by tasks when region of interest must not be factored in for inference.
*
* @param image An `MPPImage` input to the task.
* @param regionOfInterest A `CGRect` specifying the region of interest within the given image data
* of type `MPPImage`, on which inference should be performed.
* @param timestampInMilliseconds The video frame's timestamp (in milliseconds). The input
* timestamps must be monotonically increasing.
* @param error Pointer to the memory location where errors if any should be saved. If @c NULL, no
* error will be saved.
*
* @return A `BOOL` indicating if the live stream data was sent to the C++ task runner successfully.
* Please note that any errors during processing of the live stream packet map will only be
* available in the user-defined `packetsCallback` that was provided during initialization of the
* `MPPVisionTaskRunner`.
*/
- (BOOL)processLiveStreamImage:(MPPImage *)image
regionOfInterest:(CGRect)regionOfInterest
timestampInMilliseconds:(NSInteger)timeStampInMilliseconds
error:(NSError **)error;
/**
* This method returns a unique dispatch queue name by adding the given suffix and a `UUID` to the
* pre-defined queue name prefix for vision tasks. The vision tasks can use this method to get
* unique dispatch queue names which are consistent with other vision tasks.
* Dispatch queue names need not be unique, but for easy debugging we ensure that the queue names
* are unique.
*
* @param suffix A suffix that identifies a dispatch queue's functionality.
*
* @return A unique dispatch queue name by adding the given suffix and a `UUID` to the pre-defined
* queue name prefix for vision tasks.
*/
+ (const char *)uniqueDispatchQueueNameWithSuffix:(NSString *)suffix;
- (instancetype)initWithCalculatorGraphConfig:(mediapipe::CalculatorGraphConfig)graphConfig
packetsCallback:
(mediapipe::tasks::core::PacketsCallback)packetsCallback
error:(NSError **)error NS_UNAVAILABLE;
- (instancetype)init NS_UNAVAILABLE;
+ (instancetype)new NS_UNAVAILABLE;
@end
NS_ASSUME_NONNULL_END

View File

@ -0,0 +1,331 @@
// Copyright 2023 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#import "mediapipe/tasks/ios/vision/core/sources/MPPVisionTaskRunnerRefactored.h"
#import "mediapipe/tasks/ios/common/sources/MPPCommon.h"
#import "mediapipe/tasks/ios/common/utils/sources/MPPCommonUtils.h"
#import "mediapipe/tasks/ios/common/utils/sources/NSString+Helpers.h"
#import "mediapipe/tasks/ios/core/sources/MPPTaskInfo.h"
#import "mediapipe/tasks/ios/vision/core/sources/MPPVisionPacketCreator.h"
#include "absl/status/statusor.h"
#include "mediapipe/framework/formats/rect.pb.h"
#include <optional>
namespace {
using ::mediapipe::NormalizedRect;
using ::mediapipe::Packet;
using ::mediapipe::tasks::core::PacketMap;
using ::mediapipe::tasks::core::PacketsCallback;
} // namespace
/** Rotation degrees for a 90 degree rotation to the right. */
static const NSInteger kMPPOrientationDegreesRight = -270;
/** Rotation degrees for a 180 degree rotation. */
static const NSInteger kMPPOrientationDegreesDown = -180;
/** Rotation degrees for a 90 degree rotation to the left. */
static const NSInteger kMPPOrientationDegreesLeft = -90;
static NSString *const kTaskPrefix = @"com.mediapipe.tasks.vision";
#define InputPacketMap(imagePacket, normalizedRectPacket) \
{ \
{_imageInStreamName, imagePacket}, { _normRectInStreamName, normalizedRectPacket } \
}
@interface MPPVisionTaskRunner () {
MPPRunningMode _runningMode;
BOOL _roiAllowed;
std::string _imageInStreamName;
std::string _normRectInStreamName;
}
@end
@implementation MPPVisionTaskRunner
- (nullable instancetype)initWithTaskInfo:(MPPTaskInfo *)taskInfo
runningMode:(MPPRunningMode)runningMode
roiAllowed:(BOOL)roiAllowed
packetsCallback:(PacketsCallback)packetsCallback
imageInputStreamName:(NSString *)imageInputStreamName
normRectInputStreamName:(NSString *)normRectInputStreamName
error:(NSError **)error {
_roiAllowed = roiAllowed;
_imageInStreamName = imageInputStreamName.cppString;
_normRectInStreamName = normRectInputStreamName.cppString;
switch (runningMode) {
case MPPRunningModeImage:
case MPPRunningModeVideo: {
if (packetsCallback) {
[MPPCommonUtils createCustomError:error
withCode:MPPTasksErrorCodeInvalidArgumentError
description:@"The vision task is in image or video mode. The "
@"delegate must not be set in the task's options."];
return nil;
}
break;
}
case MPPRunningModeLiveStream: {
if (!packetsCallback) {
[MPPCommonUtils
createCustomError:error
withCode:MPPTasksErrorCodeInvalidArgumentError
description:
@"The vision task is in live stream mode. An object must be set as the "
@"delegate of the task in its options to ensure asynchronous delivery of "
@"results."];
return nil;
}
break;
}
default: {
[MPPCommonUtils createCustomError:error
withCode:MPPTasksErrorCodeInvalidArgumentError
description:@"Unrecognized running mode"];
return nil;
}
}
_runningMode = runningMode;
self = [super initWithCalculatorGraphConfig: [taskInfo generateGraphConfig]
packetsCallback:packetsCallback
error:error];
return self;
}
- (std::optional<NormalizedRect>)normalizedRectWithRegionOfInterest:(CGRect)roi
imageSize:(CGSize)imageSize
imageOrientation:
(UIImageOrientation)imageOrientation
error:(NSError **)error {
if (!CGRectEqualToRect(roi, CGRectZero) && !_roiAllowed) {
[MPPCommonUtils createCustomError:error
withCode:MPPTasksErrorCodeInvalidArgumentError
description:@"This task doesn't support region-of-interest."];
return std::nullopt;
}
CGRect calculatedRoi = CGRectEqualToRect(roi, CGRectZero) ? CGRectMake(0.0, 0.0, 1.0, 1.0) : roi;
NormalizedRect normalizedRect;
normalizedRect.set_x_center(CGRectGetMidX(calculatedRoi));
normalizedRect.set_y_center(CGRectGetMidY(calculatedRoi));
int rotationDegrees = 0;
switch (imageOrientation) {
case UIImageOrientationUp:
break;
case UIImageOrientationRight: {
rotationDegrees = kMPPOrientationDegreesRight;
break;
}
case UIImageOrientationDown: {
rotationDegrees = kMPPOrientationDegreesDown;
break;
}
case UIImageOrientationLeft: {
rotationDegrees = kMPPOrientationDegreesLeft;
break;
}
default:
[MPPCommonUtils
createCustomError:error
withCode:MPPTasksErrorCodeInvalidArgumentError
description:
@"Unsupported UIImageOrientation. `imageOrientation` cannot be equal to "
@"any of the mirrored orientations "
@"(`UIImageOrientationUpMirrored`,`UIImageOrientationDownMirrored`,`"
@"UIImageOrientationLeftMirrored`,`UIImageOrientationRightMirrored`)"];
}
normalizedRect.set_rotation(rotationDegrees * M_PI / kMPPOrientationDegreesDown);
// For 90° and 270° rotations, we need to swap width and height.
// This is due to the internal behavior of ImageToTensorCalculator, which:
// - first denormalizes the provided rect by multiplying the rect width or height by the image
// width or height, respectively.
// - then rotates this by denormalized rect by the provided rotation, and uses this for cropping,
// - then finally rotates this back.
if (rotationDegrees % 180 == 0) {
normalizedRect.set_width(CGRectGetWidth(calculatedRoi));
normalizedRect.set_height(CGRectGetHeight(calculatedRoi));
} else {
const float width = CGRectGetHeight(calculatedRoi) * imageSize.height / imageSize.width;
const float height = CGRectGetWidth(calculatedRoi) * imageSize.width / imageSize.height;
normalizedRect.set_width(width);
normalizedRect.set_height(height);
}
return normalizedRect;
}
- (std::optional<PacketMap>)inputPacketMapWithMPPImage:(MPPImage *)image
regionOfInterest:(CGRect)roi
error:(NSError **)error {
std::optional<NormalizedRect> rect =
[self normalizedRectWithRegionOfInterest:roi
imageSize:CGSizeMake(image.width, image.height)
imageOrientation:image.orientation
error:error];
if (!rect.has_value()) {
return std::nullopt;
}
Packet imagePacket = [MPPVisionPacketCreator createPacketWithMPPImage:image error:error];
if (imagePacket.IsEmpty()) {
return std::nullopt;
}
Packet normalizedRectPacket =
[MPPVisionPacketCreator createPacketWithNormalizedRect:rect.value()];
PacketMap inputPacketMap = InputPacketMap(imagePacket, normalizedRectPacket);
return inputPacketMap;
}
- (std::optional<PacketMap>)inputPacketMapWithMPPImage:(MPPImage *)image
regionOfInterest:(CGRect)roi
timestampInMilliseconds:(NSInteger)timestampInMilliseconds
error:(NSError **)error {
std::optional<NormalizedRect> rect =
[self normalizedRectWithRegionOfInterest:roi
imageSize:CGSizeMake(image.width, image.height)
imageOrientation:image.orientation
error:error];
if (!rect.has_value()) {
return std::nullopt;
}
Packet imagePacket = [MPPVisionPacketCreator createPacketWithMPPImage:image
timestampInMilliseconds:timestampInMilliseconds
error:error];
if (imagePacket.IsEmpty()) {
return std::nullopt;
}
Packet normalizedRectPacket =
[MPPVisionPacketCreator createPacketWithNormalizedRect:rect.value()
timestampInMilliseconds:timestampInMilliseconds];
PacketMap inputPacketMap = InputPacketMap(imagePacket, normalizedRectPacket);
return inputPacketMap;
}
- (std::optional<PacketMap>)processImage:(MPPImage *)image
regionOfInterest:(CGRect)regionOfInterest
error:(NSError **)error {
if (_runningMode != MPPRunningModeImage) {
[MPPCommonUtils
createCustomError:error
withCode:MPPTasksErrorCodeInvalidArgumentError
description:[NSString stringWithFormat:@"The vision task is not initialized with "
@"image mode. Current Running Mode: %@",
MPPRunningModeDisplayName(_runningMode)]];
return std::nullopt;
}
std::optional<PacketMap> inputPacketMap = [self inputPacketMapWithMPPImage:image
regionOfInterest:regionOfInterest
error:error];
if (!inputPacketMap.has_value()) {
return std::nullopt;
}
return [self processPacketMap:inputPacketMap.value() error:error];
}
- (std::optional<PacketMap>)processImage:(MPPImage *)image error:(NSError **)error {
return [self processImage:image regionOfInterest:CGRectZero error:error];
}
- (std::optional<PacketMap>)processVideoFrame:(MPPImage *)videoFrame
regionOfInterest:(CGRect)regionOfInterest
timestampInMilliseconds:(NSInteger)timestampInMilliseconds
error:(NSError **)error {
if (_runningMode != MPPRunningModeVideo) {
[MPPCommonUtils
createCustomError:error
withCode:MPPTasksErrorCodeInvalidArgumentError
description:[NSString stringWithFormat:@"The vision task is not initialized with "
@"video mode. Current Running Mode: %@",
MPPRunningModeDisplayName(_runningMode)]];
return std::nullopt;
}
std::optional<PacketMap> inputPacketMap = [self inputPacketMapWithMPPImage:videoFrame
regionOfInterest:regionOfInterest
timestampInMilliseconds:timestampInMilliseconds
error:error];
if (!inputPacketMap.has_value()) {
return std::nullopt;
}
return [self processPacketMap:inputPacketMap.value() error:error];
}
- (std::optional<PacketMap>)processVideoFrame:(MPPImage *)videoFrame
timestampInMilliseconds:(NSInteger)timestampInMilliseconds
error:(NSError **)error {
return [self processVideoFrame:videoFrame
regionOfInterest:CGRectZero
timestampInMilliseconds:timestampInMilliseconds
error:error];
}
- (BOOL)processLiveStreamImage:(MPPImage *)image
regionOfInterest:(CGRect)regionOfInterest
timestampInMilliseconds:(NSInteger)timestampInMilliseconds
error:(NSError **)error {
if (_runningMode != MPPRunningModeLiveStream) {
[MPPCommonUtils
createCustomError:error
withCode:MPPTasksErrorCodeInvalidArgumentError
description:[NSString stringWithFormat:@"The vision task is not initialized with "
@"live stream mode. Current Running Mode: %@",
MPPRunningModeDisplayName(_runningMode)]];
return NO;
}
std::optional<PacketMap> inputPacketMap = [self inputPacketMapWithMPPImage:image
regionOfInterest:regionOfInterest
timestampInMilliseconds:timestampInMilliseconds
error:error];
if (!inputPacketMap.has_value()) {
return NO;
}
return [self sendPacketMap:inputPacketMap.value() error:error];
}
- (BOOL)processLiveStreamImage:(MPPImage *)image
timestampInMilliseconds:(NSInteger)timestampInMilliseconds
error:(NSError **)error {
return [self processLiveStreamImage:image
regionOfInterest:CGRectZero
timestampInMilliseconds:timestampInMilliseconds
error:error];
}
+ (const char *)uniqueDispatchQueueNameWithSuffix:(NSString *)suffix {
return [NSString stringWithFormat:@"%@.%@_%@", kTaskPrefix, suffix, [NSString uuidString]]
.UTF8String;
}
@end