Added MPPObjectDetector

This commit is contained in:
Prianka Liz Kariat 2023-04-06 21:40:52 +05:30
parent b01b3b84c4
commit 30341024de
3 changed files with 513 additions and 0 deletions

View File

@ -36,3 +36,27 @@ objc_library(
"//mediapipe/tasks/ios/vision/core:MPPRunningMode",
],
)
objc_library(
name = "MPPObjectDetector",
srcs = ["sources/MPPObjectDetector.mm"],
hdrs = ["sources/MPPObjectDetector.h"],
copts = [
"-ObjC++",
"-std=c++17",
"-x objective-c++",
],
deps = [
":MPPObjectDetectorOptions",
":MPPObjectDetectionResult",
"//mediapipe/tasks/cc/vision/object_detector:object_detector_graph",
"//mediapipe/tasks/ios/common/utils:MPPCommonUtils",
"//mediapipe/tasks/ios/common/utils:NSStringHelpers",
"//mediapipe/tasks/ios/core:MPPTaskInfo",
"//mediapipe/tasks/ios/vision/core:MPPImage",
"//mediapipe/tasks/ios/vision/core:MPPVisionPacketCreator",
"//mediapipe/tasks/ios/vision/core:MPPVisionTaskRunner",
"//mediapipe/tasks/ios/vision/object_detector/utils:MPPObjectDetectorOptionsHelpers",
"//mediapipe/tasks/ios/vision/object_detector/utils:MPPObjectDetectionResultHelpers",
],
)

View File

@ -0,0 +1,242 @@
// Copyright 2023 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#import <Foundation/Foundation.h>
#import "mediapipe/tasks/ios/vision/core/sources/MPPImage.h"
#import "mediapipe/tasks/ios/vision/object_detector/sources/MPPObjectDetectionResult.h"
#import "mediapipe/tasks/ios/vision/object_detector/sources/MPPObjectDetectorOptions.h"
NS_ASSUME_NONNULL_BEGIN
/**
* @brief Class that performs object detection on images.
*
* The API expects a TFLite model with mandatory TFLite Model Metadata.
*
* The API supports models with one image input tensor and one or more output tensors. To be more
* specific, here are the requirements.
*
* Input tensor
* (kTfLiteUInt8/kTfLiteFloat32)
* - image input of size `[batch x height x width x channels]`.
* - batch inference is not supported (`batch` is required to be 1).
* - only RGB inputs are supported (`channels` is required to be 3).
* - if type is kTfLiteFloat32, NormalizationOptions are required to be attached to the metadata
* for input normalization.
*
* Output tensors must be the 4 outputs of a `DetectionPostProcess` op, i.e:(kTfLiteFloat32)
* (kTfLiteUInt8/kTfLiteFloat32)
* - locations tensor of size `[num_results x 4]`, the inner array representing bounding boxes
* in the form [top, left, right, bottom].
* - BoundingBoxProperties are required to be attached to the metadata and must specify
* type=BOUNDARIES and coordinate_type=RATIO.
* (kTfLiteFloat32)
* - classes tensor of size `[num_results]`, each value representing the integer index of a
* class.
* - optional (but recommended) label map(s) can be attached asAssociatedFile-s with type
* TENSOR_VALUE_LABELS, containing one label per line. The first such AssociatedFile (if any) is
* used to fill the `class_name` field of the results. The `display_name` field is filled from
* the AssociatedFile (if any) whose locale matches the `display_names_locale` field of the
* `ObjectDetectorOptions` used at creation time ("en" by default, i.e. English). If none of
* these are available, only the `index` field of the results will be filled.
* (kTfLiteFloat32)
* - scores tensor of size `[num_results]`, each value representing the score of the detected
* object.
* - optional score calibration can be attached using ScoreCalibrationOptions and an
* AssociatedFile with type TENSOR_AXIS_SCORE_CALIBRATION. See metadata_schema.fbs [1] for more
* details.
* (kTfLiteFloat32)
* - integer num_results as a tensor of size `[1]`
*/
NS_SWIFT_NAME(ObjectDetector)
@interface MPPObjectDetector : NSObject
/**
* Creates a new instance of `MPPObjectDetector` from an absolute path to a TensorFlow Lite model
* file stored locally on the device and the default `MPPObjectDetector`.
*
* @param modelPath An absolute path to a TensorFlow Lite model file stored locally on the device.
* @param error An optional error parameter populated when there is an error in initializing the
* object detector.
*
* @return A new instance of `MPPObjectDetector` with the given model path. `nil` if there is an
* error in initializing the object detector.
*/
- (nullable instancetype)initWithModelPath:(NSString *)modelPath error:(NSError **)error;
/**
* Creates a new instance of `MPPObjectDetector` from the given `MPPObjectDetectorOptions`.
*
* @param options The options of type `MPPObjectDetectorOptions` to use for configuring the
* `MPPImageClassifMPPObjectDetectorier`.
* @param error An optional error parameter populated when there is an error in initializing the
* object detector.
*
* @return A new instance of `MPPObjectDetector` with the given options. `nil` if there is an error
* in initializing the object detector.
*/
- (nullable instancetype)initWithOptions:(MPPObjectDetectorOptions *)options
error:(NSError **)error NS_DESIGNATED_INITIALIZER;
/**
* Performs object detection on the provided MPPImage using the whole image as region of
* interest. Rotation will be applied according to the `orientation` property of the provided
* `MPPImage`. Only use this method when the `MPPObjectDetector` is created with
* `MPPRunningModeImage`.
*
* @param image The `MPPImage` on which object detection is to be performed.
* @param error An optional error parameter populated when there is an error in performing object
* detection on the input image.
*
* @return An `MPPObjectDetectionResult` object that contains a list of detections, each detection
* has a bounding box that is expressed in the unrotated input frame of reference coordinates
* system, i.e. in `[0,image_width) x [0,image_height)`, which are the dimensions of the underlying
* image data.
*/
- (nullable MPPObjectDetectionResult *)detectInImage:(MPPImage *)image
error:(NSError **)error
NS_SWIFT_NAME(detect(image:));
/**
* Performs object detectionon the provided `MPPImage` cropped to the specified region of
* interest. Rotation will be applied on the cropped image according to the `orientation` property
* of the provided `MPPImage`. Only use this method when the `MPPObjectDetector` is created with
* `MPPRunningModeImage`.
*
* @param image The `MPPImage` on which object detection is to be performed.
* @param roi A `CGRect` specifying the region of interest within the given `MPPImage`, on which
* object detection should be performed.
* @param error An optional error parameter populated when there is an error in performing object
* detection on the input image.
*
* @return An `MPPObjectDetectionResult` object that contains a list of detections, each detection
* has a bounding box that is expressed in the unrotated input frame of reference coordinates
* system, i.e. in `[0,image_width) x [0,image_height)`, which are the dimensions of the underlying
* image data.
*/
- (nullable MPPObjectDetectionResult *)detectInImage:(MPPImage *)image
regionOfInterest:(CGRect)roi
error:(NSError **)error
NS_SWIFT_NAME(detect(image:regionOfInterest:));
/**
* Performs object detection on the provided video frame of type `MPPImage` using the whole
* image as region of interest. Rotation will be applied according to the `orientation` property of
* the provided `MPPImage`. Only use this method when the `MPPObjectDetector` is created with
* `MPPRunningModeVideo`.
*
* @param image The `MPPImage` on which object detection is to be performed.
* @param timestampMs The video frame's timestamp (in milliseconds). The input timestamps must be
* monotonically increasing.
* @param error An optional error parameter populated when there is an error in performing object
* detection on the input image.
*
* @return An `MPPObjectDetectionResult` object that contains a list of detections, each detection
* has a bounding box that is expressed in the unrotated input frame of reference coordinates
* system, i.e. in `[0,image_width) x [0,image_height)`, which are the dimensions of the underlying
* image data.
*/
- (nullable MPPObjectDetectionResult *)detectInVideoFrame:(MPPImage *)image
timestampMs:(NSInteger)timestampMs
error:(NSError **)error
NS_SWIFT_NAME(detect(videoFrame:timestampMs:));
/**
* Performs object detection on the provided video frame of type `MPPImage` cropped to the
* specified region of interest. Rotation will be applied according to the `orientation` property of
* the provided `MPPImage`. Only use this method when the `MPPObjectDetector` is created with
* `MPPRunningModeVideo`.
*
* It's required to provide the video frame's timestamp (in milliseconds). The input timestamps must
* be monotonically increasing.
*
* @param image A live stream image data of type `MPPImage` on which object detection is to be
* performed.
* @param timestampMs The video frame's timestamp (in milliseconds). The input timestamps must be
* monotonically increasing.
* @param roi A `CGRect` specifying the region of interest within the given `MPPImage`, on which
* object detection should be performed.
*
* @param error An optional error parameter populated when there is an error in performing object
* detection on the input image.
*
* @return An `MPPObjectDetectionResult` object that contains a list of detections, each detection
* has a bounding box that is expressed in the unrotated input frame of reference coordinates
* system, i.e. in `[0,image_width) x [0,image_height)`, which are the dimensions of the underlying
* image data.
*/
- (nullable MPPObjectDetectionResult *)detectInVideoFrame:(MPPImage *)image
timestampMs:(NSInteger)timestampMs
regionOfInterest:(CGRect)roi
error:(NSError **)error
NS_SWIFT_NAME(detect(videoFrame:timestampMs:regionOfInterest:));
/**
* Sends live stream image data of type `MPPImage` to perform object detection using the whole
* image as region of interest. Rotation will be applied according to the `orientation` property of
* the provided `MPPImage`. Only use this method when the `MPPObjectDetector` is created with
* `MPPRunningModeLiveStream`. Results are provided asynchronously via the `completion` callback
* provided in the `MPPObjectDetectorOptions`.
*
* It's required to provide a timestamp (in milliseconds) to indicate when the input image is sent
* to the object detector. The input timestamps must be monotonically increasing.
*
* @param image A live stream image data of type `MPPImage` on which object detection is to be
* performed.
* @param timestampMs The timestamp (in milliseconds) which indicates when the input image is sent
* to the object detector. The input timestamps must be monotonically increasing.
* @param error An optional error parameter populated when there is an error in performing object
* detection on the input live stream image data.
*
* @return `YES` if the image was sent to the task successfully, otherwise `NO`.
*/
- (BOOL)detectAsyncInImage:(MPPImage *)image
timestampMs:(NSInteger)timestampMs
error:(NSError **)error NS_SWIFT_NAME(detectAsync(image:timestampMs:));
/**
* Sends live stream image data of type `MPPImage` to perform object detection, cropped to the
* specified region of interest. Rotation will be applied according to the `orientation` property
* of the provided `MPPImage`. Only use this method when the `MPPObjectDetector` is created with
* `MPPRunningModeLiveStream`. Results are provided asynchronously via the `completion` callback
* provided in the `MPPObjectDetectorOptions`.
*
* It's required to provide a timestamp (in milliseconds) to indicate when the input image is sent
* to the object detector. The input timestamps must be monotonically increasing.
*
* @param image A live stream image data of type `MPPImage` on which object detection is to be
* performed.
* @param timestampMs The timestamp (in milliseconds) which indicates when the input image is sent
* to the object detector. The input timestamps must be monotonically increasing.
* @param roi A `CGRect` specifying the region of interest within the given live stream image data
* of type `MPPImage`, on which iobject detection should be performed.
* @param error An optional error parameter populated when there is an error in performing object
* detection on the input live stream image data.
*
* @return `YES` if the image was sent to the task successfully, otherwise `NO`.
*/
- (BOOL)detectAsyncInImage:(MPPImage *)image
timestampMs:(NSInteger)timestampMs
regionOfInterest:(CGRect)roi
error:(NSError **)error
NS_SWIFT_NAME(detectAsync(image:timestampMs:regionOfInterest:));
- (instancetype)init NS_UNAVAILABLE;
+ (instancetype)new NS_UNAVAILABLE;
@end
NS_ASSUME_NONNULL_END

View File

@ -0,0 +1,247 @@
// Copyright 2023 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#import "mediapipe/tasks/ios/vision/object_detector/sources/MPPObjectDetector.h"
#import "mediapipe/tasks/ios/common/utils/sources/MPPCommonUtils.h"
#import "mediapipe/tasks/ios/common/utils/sources/NSString+Helpers.h"
#import "mediapipe/tasks/ios/core/sources/MPPTaskInfo.h"
#import "mediapipe/tasks/ios/vision/core/sources/MPPVisionPacketCreator.h"
#import "mediapipe/tasks/ios/vision/core/sources/MPPVisionTaskRunner.h"
#import "mediapipe/tasks/ios/vision/object_detector/utils/sources/MPPObjectDetectionResult+Helpers.h"
#import "mediapipe/tasks/ios/vision/object_detector/utils/sources/MPPObjectDetectorOptions+Helpers.h"
namespace {
using ::mediapipe::NormalizedRect;
using ::mediapipe::Packet;
using ::mediapipe::Timestamp;
using ::mediapipe::tasks::core::PacketMap;
using ::mediapipe::tasks::core::PacketsCallback;
} // namespace
static NSString *const kDetectionsStreamName = @"detections_out";
static NSString *const kDetectionsTag = @"DETECTIONS";
static NSString *const kImageInStreamName = @"image_in";
static NSString *const kImageOutStreamName = @"image_out";
static NSString *const kImageTag = @"IMAGE";
static NSString *const kNormRectStreamName = @"norm_rect_in";
static NSString *const kNormRectTag = @"NORM_RECT";
static NSString *const kTaskGraphName = @"mediapipe.tasks.vision.ObjectDetectorGraph";
#define InputPacketMap(imagePacket, normalizedRectPacket) \
{ \
{kImageInStreamName.cppString, imagePacket}, { \
kNormRectStreamName.cppString, normalizedRectPacket \
} \
}
@interface MPPObjectDetector () {
/** iOS Vision Task Runner */
MPPVisionTaskRunner *_visionTaskRunner;
}
@end
@implementation MPPObjectDetector
- (instancetype)initWithOptions:(MPPObjectDetectorOptions *)options error:(NSError **)error {
self = [super init];
if (self) {
MPPTaskInfo *taskInfo = [[MPPTaskInfo alloc]
initWithTaskGraphName:kTaskGraphName
inputStreams:@[
[NSString stringWithFormat:@"%@:%@", kImageTag, kImageInStreamName],
[NSString stringWithFormat:@"%@:%@", kNormRectTag, kNormRectStreamName]
]
outputStreams:@[
[NSString stringWithFormat:@"%@:%@", kDetectionsTag, kDetectionsStreamName],
[NSString stringWithFormat:@"%@:%@", kImageTag, kImageOutStreamName]
]
taskOptions:options
enableFlowLimiting:options.runningMode == MPPRunningModeLiveStream
error:error];
if (!taskInfo) {
return nil;
}
PacketsCallback packetsCallback = nullptr;
if (options.completion) {
packetsCallback = [=](absl::StatusOr<PacketMap> status_or_packets) {
NSError *callbackError = nil;
if (![MPPCommonUtils checkCppError:status_or_packets.status() toError:&callbackError]) {
options.completion(nil, Timestamp::Unset().Value(), callbackError);
return;
}
PacketMap &outputPacketMap = status_or_packets.value();
if (outputPacketMap[kImageOutStreamName.cppString].IsEmpty()) {
return;
}
MPPObjectDetectionResult *result = [MPPObjectDetectionResult
objectDetectionResultWithDetectionsPacket:status_or_packets.value()
[kDetectionsStreamName.cppString]];
options.completion(result,
outputPacketMap[kImageOutStreamName.cppString].Timestamp().Value() /
kMicroSecondsPerMilliSecond,
callbackError);
};
}
_visionTaskRunner =
[[MPPVisionTaskRunner alloc] initWithCalculatorGraphConfig:[taskInfo generateGraphConfig]
runningMode:options.runningMode
packetsCallback:std::move(packetsCallback)
error:error];
if (!_visionTaskRunner) {
return nil;
}
}
return self;
}
- (instancetype)initWithModelPath:(NSString *)modelPath error:(NSError **)error {
MPPObjectDetectorOptions *options = [[MPPObjectDetectorOptions alloc] init];
options.baseOptions.modelAssetPath = modelPath;
return [self initWithOptions:options error:error];
}
- (nullable MPPObjectDetectionResult *)detectInImage:(MPPImage *)image
regionOfInterest:(CGRect)roi
error:(NSError **)error {
std::optional<NormalizedRect> rect =
[_visionTaskRunner normalizedRectFromRegionOfInterest:roi
imageOrientation:image.orientation
ROIAllowed:YES
error:error];
if (!rect.has_value()) {
return nil;
}
Packet imagePacket = [MPPVisionPacketCreator createPacketWithMPPImage:image error:error];
if (imagePacket.IsEmpty()) {
return nil;
}
Packet normalizedRectPacket =
[MPPVisionPacketCreator createPacketWithNormalizedRect:rect.value()];
PacketMap inputPacketMap = InputPacketMap(imagePacket, normalizedRectPacket);
std::optional<PacketMap> outputPacketMap = [_visionTaskRunner processImagePacketMap:inputPacketMap
error:error];
if (!outputPacketMap.has_value()) {
return nil;
}
return [MPPObjectDetectionResult
objectDetectionResultWithDetectionsPacket:outputPacketMap
.value()[kDetectionsStreamName.cppString]];
}
- (std::optional<PacketMap>)inputPacketMapWithMPPImage:(MPPImage *)image
timestampMs:(NSInteger)timestampMs
regionOfInterest:(CGRect)roi
error:(NSError **)error {
std::optional<NormalizedRect> rect =
[_visionTaskRunner normalizedRectFromRegionOfInterest:roi
imageOrientation:image.orientation
ROIAllowed:YES
error:error];
if (!rect.has_value()) {
return std::nullopt;
}
Packet imagePacket = [MPPVisionPacketCreator createPacketWithMPPImage:image
timestampMs:timestampMs
error:error];
if (imagePacket.IsEmpty()) {
return std::nullopt;
}
Packet normalizedRectPacket = [MPPVisionPacketCreator createPacketWithNormalizedRect:rect.value()
timestampMs:timestampMs];
PacketMap inputPacketMap = InputPacketMap(imagePacket, normalizedRectPacket);
return inputPacketMap;
}
- (nullable MPPObjectDetectionResult *)detectInImage:(MPPImage *)image error:(NSError **)error {
return [self detectInImage:image regionOfInterest:CGRectZero error:error];
}
- (nullable MPPObjectDetectionResult *)detectInVideoFrame:(MPPImage *)image
timestampMs:(NSInteger)timestampMs
regionOfInterest:(CGRect)roi
error:(NSError **)error {
std::optional<PacketMap> inputPacketMap = [self inputPacketMapWithMPPImage:image
timestampMs:timestampMs
regionOfInterest:roi
error:error];
if (!inputPacketMap.has_value()) {
return nil;
}
std::optional<PacketMap> outputPacketMap =
[_visionTaskRunner processVideoFramePacketMap:inputPacketMap.value() error:error];
if (!outputPacketMap.has_value()) {
return nil;
}
return [MPPObjectDetectionResult
objectDetectionResultWithDetectionsPacket:outputPacketMap
.value()[kDetectionsStreamName.cppString]];
}
- (nullable MPPObjectDetectionResult *)detectInVideoFrame:(MPPImage *)image
timestampMs:(NSInteger)timestampMs
error:(NSError **)error {
return [self detectInVideoFrame:image
timestampMs:timestampMs
regionOfInterest:CGRectZero
error:error];
}
- (BOOL)detectAsyncInImage:(MPPImage *)image
timestampMs:(NSInteger)timestampMs
regionOfInterest:(CGRect)roi
error:(NSError **)error {
std::optional<PacketMap> inputPacketMap = [self inputPacketMapWithMPPImage:image
timestampMs:timestampMs
regionOfInterest:roi
error:error];
if (!inputPacketMap.has_value()) {
return NO;
}
return [_visionTaskRunner processLiveStreamPacketMap:inputPacketMap.value() error:error];
}
- (BOOL)detectAsyncInImage:(MPPImage *)image
timestampMs:(NSInteger)timestampMs
error:(NSError **)error {
return [self detectAsyncInImage:image
timestampMs:timestampMs
regionOfInterest:CGRectZero
error:error];
}
@end