diff --git a/mediapipe/tasks/ios/vision/object_detector/BUILD b/mediapipe/tasks/ios/vision/object_detector/BUILD index f1325b050..aa4cf8aeb 100644 --- a/mediapipe/tasks/ios/vision/object_detector/BUILD +++ b/mediapipe/tasks/ios/vision/object_detector/BUILD @@ -36,3 +36,27 @@ objc_library( "//mediapipe/tasks/ios/vision/core:MPPRunningMode", ], ) + +objc_library( + name = "MPPObjectDetector", + srcs = ["sources/MPPObjectDetector.mm"], + hdrs = ["sources/MPPObjectDetector.h"], + copts = [ + "-ObjC++", + "-std=c++17", + "-x objective-c++", + ], + deps = [ + ":MPPObjectDetectorOptions", + ":MPPObjectDetectionResult", + "//mediapipe/tasks/cc/vision/object_detector:object_detector_graph", + "//mediapipe/tasks/ios/common/utils:MPPCommonUtils", + "//mediapipe/tasks/ios/common/utils:NSStringHelpers", + "//mediapipe/tasks/ios/core:MPPTaskInfo", + "//mediapipe/tasks/ios/vision/core:MPPImage", + "//mediapipe/tasks/ios/vision/core:MPPVisionPacketCreator", + "//mediapipe/tasks/ios/vision/core:MPPVisionTaskRunner", + "//mediapipe/tasks/ios/vision/object_detector/utils:MPPObjectDetectorOptionsHelpers", + "//mediapipe/tasks/ios/vision/object_detector/utils:MPPObjectDetectionResultHelpers", + ], +) diff --git a/mediapipe/tasks/ios/vision/object_detector/sources/MPPObjectDetector.h b/mediapipe/tasks/ios/vision/object_detector/sources/MPPObjectDetector.h new file mode 100644 index 000000000..22ad136cd --- /dev/null +++ b/mediapipe/tasks/ios/vision/object_detector/sources/MPPObjectDetector.h @@ -0,0 +1,242 @@ +// Copyright 2023 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#import + +#import "mediapipe/tasks/ios/vision/core/sources/MPPImage.h" +#import "mediapipe/tasks/ios/vision/object_detector/sources/MPPObjectDetectionResult.h" +#import "mediapipe/tasks/ios/vision/object_detector/sources/MPPObjectDetectorOptions.h" + +NS_ASSUME_NONNULL_BEGIN + +/** + * @brief Class that performs object detection on images. + * + * The API expects a TFLite model with mandatory TFLite Model Metadata. + * + * The API supports models with one image input tensor and one or more output tensors. To be more + * specific, here are the requirements. + * + * Input tensor + * (kTfLiteUInt8/kTfLiteFloat32) + * - image input of size `[batch x height x width x channels]`. + * - batch inference is not supported (`batch` is required to be 1). + * - only RGB inputs are supported (`channels` is required to be 3). + * - if type is kTfLiteFloat32, NormalizationOptions are required to be attached to the metadata + * for input normalization. + * + * Output tensors must be the 4 outputs of a `DetectionPostProcess` op, i.e:(kTfLiteFloat32) + * (kTfLiteUInt8/kTfLiteFloat32) + * - locations tensor of size `[num_results x 4]`, the inner array representing bounding boxes + * in the form [top, left, right, bottom]. + * - BoundingBoxProperties are required to be attached to the metadata and must specify + * type=BOUNDARIES and coordinate_type=RATIO. + * (kTfLiteFloat32) + * - classes tensor of size `[num_results]`, each value representing the integer index of a + * class. + * - optional (but recommended) label map(s) can be attached asAssociatedFile-s with type + * TENSOR_VALUE_LABELS, containing one label per line. The first such AssociatedFile (if any) is + * used to fill the `class_name` field of the results. The `display_name` field is filled from + * the AssociatedFile (if any) whose locale matches the `display_names_locale` field of the + * `ObjectDetectorOptions` used at creation time ("en" by default, i.e. English). If none of + * these are available, only the `index` field of the results will be filled. + * (kTfLiteFloat32) + * - scores tensor of size `[num_results]`, each value representing the score of the detected + * object. + * - optional score calibration can be attached using ScoreCalibrationOptions and an + * AssociatedFile with type TENSOR_AXIS_SCORE_CALIBRATION. See metadata_schema.fbs [1] for more + * details. + * (kTfLiteFloat32) + * - integer num_results as a tensor of size `[1]` + */ +NS_SWIFT_NAME(ObjectDetector) +@interface MPPObjectDetector : NSObject + +/** + * Creates a new instance of `MPPObjectDetector` from an absolute path to a TensorFlow Lite model + * file stored locally on the device and the default `MPPObjectDetector`. + * + * @param modelPath An absolute path to a TensorFlow Lite model file stored locally on the device. + * @param error An optional error parameter populated when there is an error in initializing the + * object detector. + * + * @return A new instance of `MPPObjectDetector` with the given model path. `nil` if there is an + * error in initializing the object detector. + */ +- (nullable instancetype)initWithModelPath:(NSString *)modelPath error:(NSError **)error; + +/** + * Creates a new instance of `MPPObjectDetector` from the given `MPPObjectDetectorOptions`. + * + * @param options The options of type `MPPObjectDetectorOptions` to use for configuring the + * `MPPImageClassifMPPObjectDetectorier`. + * @param error An optional error parameter populated when there is an error in initializing the + * object detector. + * + * @return A new instance of `MPPObjectDetector` with the given options. `nil` if there is an error + * in initializing the object detector. + */ +- (nullable instancetype)initWithOptions:(MPPObjectDetectorOptions *)options + error:(NSError **)error NS_DESIGNATED_INITIALIZER; + +/** + * Performs object detection on the provided MPPImage using the whole image as region of + * interest. Rotation will be applied according to the `orientation` property of the provided + * `MPPImage`. Only use this method when the `MPPObjectDetector` is created with + * `MPPRunningModeImage`. + * + * @param image The `MPPImage` on which object detection is to be performed. + * @param error An optional error parameter populated when there is an error in performing object + * detection on the input image. + * + * @return An `MPPObjectDetectionResult` object that contains a list of detections, each detection + * has a bounding box that is expressed in the unrotated input frame of reference coordinates + * system, i.e. in `[0,image_width) x [0,image_height)`, which are the dimensions of the underlying + * image data. + */ +- (nullable MPPObjectDetectionResult *)detectInImage:(MPPImage *)image + error:(NSError **)error + NS_SWIFT_NAME(detect(image:)); + +/** + * Performs object detectionon the provided `MPPImage` cropped to the specified region of + * interest. Rotation will be applied on the cropped image according to the `orientation` property + * of the provided `MPPImage`. Only use this method when the `MPPObjectDetector` is created with + * `MPPRunningModeImage`. + * + * @param image The `MPPImage` on which object detection is to be performed. + * @param roi A `CGRect` specifying the region of interest within the given `MPPImage`, on which + * object detection should be performed. + * @param error An optional error parameter populated when there is an error in performing object + * detection on the input image. + * + * @return An `MPPObjectDetectionResult` object that contains a list of detections, each detection + * has a bounding box that is expressed in the unrotated input frame of reference coordinates + * system, i.e. in `[0,image_width) x [0,image_height)`, which are the dimensions of the underlying + * image data. + */ +- (nullable MPPObjectDetectionResult *)detectInImage:(MPPImage *)image + regionOfInterest:(CGRect)roi + error:(NSError **)error + NS_SWIFT_NAME(detect(image:regionOfInterest:)); + +/** + * Performs object detection on the provided video frame of type `MPPImage` using the whole + * image as region of interest. Rotation will be applied according to the `orientation` property of + * the provided `MPPImage`. Only use this method when the `MPPObjectDetector` is created with + * `MPPRunningModeVideo`. + * + * @param image The `MPPImage` on which object detection is to be performed. + * @param timestampMs The video frame's timestamp (in milliseconds). The input timestamps must be + * monotonically increasing. + * @param error An optional error parameter populated when there is an error in performing object + * detection on the input image. + * + * @return An `MPPObjectDetectionResult` object that contains a list of detections, each detection + * has a bounding box that is expressed in the unrotated input frame of reference coordinates + * system, i.e. in `[0,image_width) x [0,image_height)`, which are the dimensions of the underlying + * image data. + */ +- (nullable MPPObjectDetectionResult *)detectInVideoFrame:(MPPImage *)image + timestampMs:(NSInteger)timestampMs + error:(NSError **)error + NS_SWIFT_NAME(detect(videoFrame:timestampMs:)); + +/** + * Performs object detection on the provided video frame of type `MPPImage` cropped to the + * specified region of interest. Rotation will be applied according to the `orientation` property of + * the provided `MPPImage`. Only use this method when the `MPPObjectDetector` is created with + * `MPPRunningModeVideo`. + * + * It's required to provide the video frame's timestamp (in milliseconds). The input timestamps must + * be monotonically increasing. + * + * @param image A live stream image data of type `MPPImage` on which object detection is to be + * performed. + * @param timestampMs The video frame's timestamp (in milliseconds). The input timestamps must be + * monotonically increasing. + * @param roi A `CGRect` specifying the region of interest within the given `MPPImage`, on which + * object detection should be performed. + * + * @param error An optional error parameter populated when there is an error in performing object + * detection on the input image. + * + * @return An `MPPObjectDetectionResult` object that contains a list of detections, each detection + * has a bounding box that is expressed in the unrotated input frame of reference coordinates + * system, i.e. in `[0,image_width) x [0,image_height)`, which are the dimensions of the underlying + * image data. + */ +- (nullable MPPObjectDetectionResult *)detectInVideoFrame:(MPPImage *)image + timestampMs:(NSInteger)timestampMs + regionOfInterest:(CGRect)roi + error:(NSError **)error + NS_SWIFT_NAME(detect(videoFrame:timestampMs:regionOfInterest:)); + +/** + * Sends live stream image data of type `MPPImage` to perform object detection using the whole + * image as region of interest. Rotation will be applied according to the `orientation` property of + * the provided `MPPImage`. Only use this method when the `MPPObjectDetector` is created with + * `MPPRunningModeLiveStream`. Results are provided asynchronously via the `completion` callback + * provided in the `MPPObjectDetectorOptions`. + * + * It's required to provide a timestamp (in milliseconds) to indicate when the input image is sent + * to the object detector. The input timestamps must be monotonically increasing. + * + * @param image A live stream image data of type `MPPImage` on which object detection is to be + * performed. + * @param timestampMs The timestamp (in milliseconds) which indicates when the input image is sent + * to the object detector. The input timestamps must be monotonically increasing. + * @param error An optional error parameter populated when there is an error in performing object + * detection on the input live stream image data. + * + * @return `YES` if the image was sent to the task successfully, otherwise `NO`. + */ +- (BOOL)detectAsyncInImage:(MPPImage *)image + timestampMs:(NSInteger)timestampMs + error:(NSError **)error NS_SWIFT_NAME(detectAsync(image:timestampMs:)); + +/** + * Sends live stream image data of type `MPPImage` to perform object detection, cropped to the + * specified region of interest. Rotation will be applied according to the `orientation` property + * of the provided `MPPImage`. Only use this method when the `MPPObjectDetector` is created with + * `MPPRunningModeLiveStream`. Results are provided asynchronously via the `completion` callback + * provided in the `MPPObjectDetectorOptions`. + * + * It's required to provide a timestamp (in milliseconds) to indicate when the input image is sent + * to the object detector. The input timestamps must be monotonically increasing. + * + * @param image A live stream image data of type `MPPImage` on which object detection is to be + * performed. + * @param timestampMs The timestamp (in milliseconds) which indicates when the input image is sent + * to the object detector. The input timestamps must be monotonically increasing. + * @param roi A `CGRect` specifying the region of interest within the given live stream image data + * of type `MPPImage`, on which iobject detection should be performed. + * @param error An optional error parameter populated when there is an error in performing object + * detection on the input live stream image data. + * + * @return `YES` if the image was sent to the task successfully, otherwise `NO`. + */ +- (BOOL)detectAsyncInImage:(MPPImage *)image + timestampMs:(NSInteger)timestampMs + regionOfInterest:(CGRect)roi + error:(NSError **)error + NS_SWIFT_NAME(detectAsync(image:timestampMs:regionOfInterest:)); + +- (instancetype)init NS_UNAVAILABLE; + ++ (instancetype)new NS_UNAVAILABLE; + +@end + +NS_ASSUME_NONNULL_END diff --git a/mediapipe/tasks/ios/vision/object_detector/sources/MPPObjectDetector.mm b/mediapipe/tasks/ios/vision/object_detector/sources/MPPObjectDetector.mm new file mode 100644 index 000000000..64ba96414 --- /dev/null +++ b/mediapipe/tasks/ios/vision/object_detector/sources/MPPObjectDetector.mm @@ -0,0 +1,247 @@ +// Copyright 2023 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#import "mediapipe/tasks/ios/vision/object_detector/sources/MPPObjectDetector.h" + +#import "mediapipe/tasks/ios/common/utils/sources/MPPCommonUtils.h" +#import "mediapipe/tasks/ios/common/utils/sources/NSString+Helpers.h" +#import "mediapipe/tasks/ios/core/sources/MPPTaskInfo.h" +#import "mediapipe/tasks/ios/vision/core/sources/MPPVisionPacketCreator.h" +#import "mediapipe/tasks/ios/vision/core/sources/MPPVisionTaskRunner.h" +#import "mediapipe/tasks/ios/vision/object_detector/utils/sources/MPPObjectDetectionResult+Helpers.h" +#import "mediapipe/tasks/ios/vision/object_detector/utils/sources/MPPObjectDetectorOptions+Helpers.h" + +namespace { +using ::mediapipe::NormalizedRect; +using ::mediapipe::Packet; +using ::mediapipe::Timestamp; +using ::mediapipe::tasks::core::PacketMap; +using ::mediapipe::tasks::core::PacketsCallback; +} // namespace + +static NSString *const kDetectionsStreamName = @"detections_out"; +static NSString *const kDetectionsTag = @"DETECTIONS"; +static NSString *const kImageInStreamName = @"image_in"; +static NSString *const kImageOutStreamName = @"image_out"; +static NSString *const kImageTag = @"IMAGE"; +static NSString *const kNormRectStreamName = @"norm_rect_in"; +static NSString *const kNormRectTag = @"NORM_RECT"; + +static NSString *const kTaskGraphName = @"mediapipe.tasks.vision.ObjectDetectorGraph"; + +#define InputPacketMap(imagePacket, normalizedRectPacket) \ + { \ + {kImageInStreamName.cppString, imagePacket}, { \ + kNormRectStreamName.cppString, normalizedRectPacket \ + } \ + } + +@interface MPPObjectDetector () { + /** iOS Vision Task Runner */ + MPPVisionTaskRunner *_visionTaskRunner; +} +@end + +@implementation MPPObjectDetector + +- (instancetype)initWithOptions:(MPPObjectDetectorOptions *)options error:(NSError **)error { + self = [super init]; + if (self) { + MPPTaskInfo *taskInfo = [[MPPTaskInfo alloc] + initWithTaskGraphName:kTaskGraphName + inputStreams:@[ + [NSString stringWithFormat:@"%@:%@", kImageTag, kImageInStreamName], + [NSString stringWithFormat:@"%@:%@", kNormRectTag, kNormRectStreamName] + ] + outputStreams:@[ + [NSString stringWithFormat:@"%@:%@", kDetectionsTag, kDetectionsStreamName], + [NSString stringWithFormat:@"%@:%@", kImageTag, kImageOutStreamName] + ] + taskOptions:options + enableFlowLimiting:options.runningMode == MPPRunningModeLiveStream + error:error]; + + if (!taskInfo) { + return nil; + } + + PacketsCallback packetsCallback = nullptr; + + if (options.completion) { + packetsCallback = [=](absl::StatusOr status_or_packets) { + NSError *callbackError = nil; + if (![MPPCommonUtils checkCppError:status_or_packets.status() toError:&callbackError]) { + options.completion(nil, Timestamp::Unset().Value(), callbackError); + return; + } + + PacketMap &outputPacketMap = status_or_packets.value(); + if (outputPacketMap[kImageOutStreamName.cppString].IsEmpty()) { + return; + } + + MPPObjectDetectionResult *result = [MPPObjectDetectionResult + objectDetectionResultWithDetectionsPacket:status_or_packets.value() + [kDetectionsStreamName.cppString]]; + + options.completion(result, + outputPacketMap[kImageOutStreamName.cppString].Timestamp().Value() / + kMicroSecondsPerMilliSecond, + callbackError); + }; + } + + _visionTaskRunner = + [[MPPVisionTaskRunner alloc] initWithCalculatorGraphConfig:[taskInfo generateGraphConfig] + runningMode:options.runningMode + packetsCallback:std::move(packetsCallback) + error:error]; + + if (!_visionTaskRunner) { + return nil; + } + } + return self; +} + +- (instancetype)initWithModelPath:(NSString *)modelPath error:(NSError **)error { + MPPObjectDetectorOptions *options = [[MPPObjectDetectorOptions alloc] init]; + + options.baseOptions.modelAssetPath = modelPath; + + return [self initWithOptions:options error:error]; +} + +- (nullable MPPObjectDetectionResult *)detectInImage:(MPPImage *)image + regionOfInterest:(CGRect)roi + error:(NSError **)error { + std::optional rect = + [_visionTaskRunner normalizedRectFromRegionOfInterest:roi + imageOrientation:image.orientation + ROIAllowed:YES + error:error]; + if (!rect.has_value()) { + return nil; + } + + Packet imagePacket = [MPPVisionPacketCreator createPacketWithMPPImage:image error:error]; + if (imagePacket.IsEmpty()) { + return nil; + } + + Packet normalizedRectPacket = + [MPPVisionPacketCreator createPacketWithNormalizedRect:rect.value()]; + + PacketMap inputPacketMap = InputPacketMap(imagePacket, normalizedRectPacket); + + std::optional outputPacketMap = [_visionTaskRunner processImagePacketMap:inputPacketMap + error:error]; + if (!outputPacketMap.has_value()) { + return nil; + } + + return [MPPObjectDetectionResult + objectDetectionResultWithDetectionsPacket:outputPacketMap + .value()[kDetectionsStreamName.cppString]]; +} + +- (std::optional)inputPacketMapWithMPPImage:(MPPImage *)image + timestampMs:(NSInteger)timestampMs + regionOfInterest:(CGRect)roi + error:(NSError **)error { + std::optional rect = + [_visionTaskRunner normalizedRectFromRegionOfInterest:roi + imageOrientation:image.orientation + ROIAllowed:YES + error:error]; + if (!rect.has_value()) { + return std::nullopt; + } + + Packet imagePacket = [MPPVisionPacketCreator createPacketWithMPPImage:image + timestampMs:timestampMs + error:error]; + if (imagePacket.IsEmpty()) { + return std::nullopt; + } + + Packet normalizedRectPacket = [MPPVisionPacketCreator createPacketWithNormalizedRect:rect.value() + timestampMs:timestampMs]; + + PacketMap inputPacketMap = InputPacketMap(imagePacket, normalizedRectPacket); + return inputPacketMap; +} + +- (nullable MPPObjectDetectionResult *)detectInImage:(MPPImage *)image error:(NSError **)error { + return [self detectInImage:image regionOfInterest:CGRectZero error:error]; +} + +- (nullable MPPObjectDetectionResult *)detectInVideoFrame:(MPPImage *)image + timestampMs:(NSInteger)timestampMs + regionOfInterest:(CGRect)roi + error:(NSError **)error { + std::optional inputPacketMap = [self inputPacketMapWithMPPImage:image + timestampMs:timestampMs + regionOfInterest:roi + error:error]; + if (!inputPacketMap.has_value()) { + return nil; + } + + std::optional outputPacketMap = + [_visionTaskRunner processVideoFramePacketMap:inputPacketMap.value() error:error]; + + if (!outputPacketMap.has_value()) { + return nil; + } + + return [MPPObjectDetectionResult + objectDetectionResultWithDetectionsPacket:outputPacketMap + .value()[kDetectionsStreamName.cppString]]; +} + +- (nullable MPPObjectDetectionResult *)detectInVideoFrame:(MPPImage *)image + timestampMs:(NSInteger)timestampMs + error:(NSError **)error { + return [self detectInVideoFrame:image + timestampMs:timestampMs + regionOfInterest:CGRectZero + error:error]; +} + +- (BOOL)detectAsyncInImage:(MPPImage *)image + timestampMs:(NSInteger)timestampMs + regionOfInterest:(CGRect)roi + error:(NSError **)error { + std::optional inputPacketMap = [self inputPacketMapWithMPPImage:image + timestampMs:timestampMs + regionOfInterest:roi + error:error]; + if (!inputPacketMap.has_value()) { + return NO; + } + + return [_visionTaskRunner processLiveStreamPacketMap:inputPacketMap.value() error:error]; +} + +- (BOOL)detectAsyncInImage:(MPPImage *)image + timestampMs:(NSInteger)timestampMs + error:(NSError **)error { + return [self detectAsyncInImage:image + timestampMs:timestampMs + regionOfInterest:CGRectZero + error:error]; +} + +@end