Add FaceLandmarker iOS Live Stream API

PiperOrigin-RevId: 537434786
This commit is contained in:
Sebastian Schmidt 2023-06-02 16:20:05 -07:00 committed by Copybara-Service
parent ace56b502a
commit 549e09cace
5 changed files with 373 additions and 8 deletions

View File

@ -53,6 +53,8 @@ static NSString *const kFaceLandmarkerModelName = @"face_landmarker_v2";
static NSString *const kFaceLandmarkerWithBlendshapesModelName =
@"face_landmarker_v2_with_blendshapes";
static NSString *const kExpectedErrorDomain = @"com.google.mediapipe.tasks";
static NSString *const kLiveStreamTestsDictFaceLandmarkerKey = @"face_landmarker";
static NSString *const kLiveStreamTestsDictExpectationKey = @"expectation";
constexpr float kLandmarkErrorThreshold = 0.03f;
constexpr float kBlendshapesErrorThreshold = 0.1f;
@ -64,7 +66,9 @@ constexpr float kFacialTransformationMatrixErrorThreshold = 0.2f;
XCTAssertEqual(error.code, expectedError.code); \
XCTAssertEqualObjects(error.localizedDescription, expectedError.localizedDescription)
@interface MPPFaceLandmarkerTests : XCTestCase {
@interface MPPFaceLandmarkerTests : XCTestCase <MPPFaceLandmarkerLiveStreamDelegate> {
NSDictionary *_liveStreamSucceedsTestDict;
NSDictionary *_outOfOrderTimestampTestDict;
}
@end
@ -164,14 +168,143 @@ constexpr float kFacialTransformationMatrixErrorThreshold = 0.2f;
}
}
#pragma mark Live Stream Mode Tests
- (void)testDetectWithLiveStreamModeAndPotraitSucceeds {
NSInteger iterationCount = 100;
// Because of flow limiting, the callback might be invoked fewer than `iterationCount` times. An
// normal expectation will fail if expectation.fullfill() is not called
// `expectation.expectedFulfillmentCount` times. If `expectation.isInverted = true`, the test will
// only succeed if expectation is not fullfilled for the specified `expectedFulfillmentCount`.
// Since it is not possible to predict how many times the expectation is supposed to be
// fullfilled, `expectation.expectedFulfillmentCount` = `iterationCount` + 1 and
// `expectation.isInverted = true` ensures that test succeeds if expectation is fullfilled <=
// `iterationCount` times.
XCTestExpectation *expectation = [[XCTestExpectation alloc]
initWithDescription:@"detectWithOutOfOrderTimestampsAndLiveStream"];
expectation.expectedFulfillmentCount = iterationCount + 1;
expectation.inverted = YES;
MPPFaceLandmarkerOptions *options =
[self faceLandmarkerOptionsWithModelName:kFaceLandmarkerModelName];
options.runningMode = MPPRunningModeLiveStream;
options.faceLandmarkerLiveStreamDelegate = self;
MPPFaceLandmarker *faceLandmarker = [[MPPFaceLandmarker alloc] initWithOptions:options error:nil];
MPPImage *image = [self imageWithFileInfo:kPortraitImage];
_liveStreamSucceedsTestDict = @{
kLiveStreamTestsDictFaceLandmarkerKey : faceLandmarker,
kLiveStreamTestsDictExpectationKey : expectation
};
for (int i = 0; i < iterationCount; i++) {
XCTAssertTrue([faceLandmarker detectAsyncInImage:image timestampInMilliseconds:i error:nil]);
}
NSTimeInterval timeout = 0.5f;
[self waitForExpectations:@[ expectation ] timeout:timeout];
}
- (void)testDetectWithOutOfOrderTimestampsAndLiveStreamModeFails {
MPPFaceLandmarkerOptions *options =
[self faceLandmarkerOptionsWithModelName:kFaceLandmarkerModelName];
options.runningMode = MPPRunningModeLiveStream;
options.faceLandmarkerLiveStreamDelegate = self;
XCTestExpectation *expectation = [[XCTestExpectation alloc]
initWithDescription:@"detectWithOutOfOrderTimestampsAndLiveStream"];
expectation.expectedFulfillmentCount = 1;
MPPFaceLandmarker *faceLandmarker = [[MPPFaceLandmarker alloc] initWithOptions:options error:nil];
_liveStreamSucceedsTestDict = @{
kLiveStreamTestsDictFaceLandmarkerKey : faceLandmarker,
kLiveStreamTestsDictExpectationKey : expectation
};
MPPImage *image = [self imageWithFileInfo:kPortraitImage];
XCTAssertTrue([faceLandmarker detectAsyncInImage:image timestampInMilliseconds:1 error:nil]);
NSError *error;
XCTAssertFalse([faceLandmarker detectAsyncInImage:image timestampInMilliseconds:0 error:&error]);
NSError *expectedError =
[NSError errorWithDomain:kExpectedErrorDomain
code:MPPTasksErrorCodeInvalidArgumentError
userInfo:@{
NSLocalizedDescriptionKey :
@"INVALID_ARGUMENT: Input timestamp must be monotonically increasing."
}];
AssertEqualErrors(error, expectedError);
NSTimeInterval timeout = 0.5f;
[self waitForExpectations:@[ expectation ] timeout:timeout];
}
#pragma mark Running Mode Tests
- (void)testCreateFaceLandmarkerFailsWithDelegateInNonLiveStreamMode {
MPPRunningMode runningModesToTest[] = {MPPRunningModeImage, MPPRunningModeVideo};
for (int i = 0; i < sizeof(runningModesToTest) / sizeof(runningModesToTest[0]); i++) {
MPPFaceLandmarkerOptions *options =
[self faceLandmarkerOptionsWithModelName:kFaceLandmarkerModelName];
options.runningMode = runningModesToTest[i];
options.faceLandmarkerLiveStreamDelegate = self;
[self
assertCreateFaceLandmarkerWithOptions:options
failsWithExpectedError:
[NSError errorWithDomain:kExpectedErrorDomain
code:MPPTasksErrorCodeInvalidArgumentError
userInfo:@{
NSLocalizedDescriptionKey :
@"The vision task is in image or video mode. The "
@"delegate must not be set in the task's options."
}]];
}
}
- (void)testCreateFaceLandmarkerFailsWithMissingDelegateInLiveStreamMode {
MPPFaceLandmarkerOptions *options =
[self faceLandmarkerOptionsWithModelName:kFaceLandmarkerModelName];
options.runningMode = MPPRunningModeLiveStream;
[self assertCreateFaceLandmarkerWithOptions:options
failsWithExpectedError:
[NSError errorWithDomain:kExpectedErrorDomain
code:MPPTasksErrorCodeInvalidArgumentError
userInfo:@{
NSLocalizedDescriptionKey :
@"The vision task is in live stream mode. An "
@"object must be set as the delegate of the task "
@"in its options to ensure asynchronous delivery "
@"of results."
}]];
}
- (void)testDetectFailsWithCallingWrongAPIInImageMode {
MPPFaceLandmarkerOptions *options =
[self faceLandmarkerOptionsWithModelName:kFaceLandmarkerModelName];
MPPFaceLandmarker *faceLandmarker = [[MPPFaceLandmarker alloc] initWithOptions:options error:nil];
MPPImage *image = [self imageWithFileInfo:kPortraitImage];
NSError *liveStreamAPICallError;
XCTAssertFalse([faceLandmarker detectAsyncInImage:image
timestampInMilliseconds:0
error:&liveStreamAPICallError]);
NSError *expectedLiveStreamAPICallError =
[NSError errorWithDomain:kExpectedErrorDomain
code:MPPTasksErrorCodeInvalidArgumentError
userInfo:@{
NSLocalizedDescriptionKey : @"The vision task is not initialized with live "
@"stream mode. Current Running Mode: Image"
}];
AssertEqualErrors(liveStreamAPICallError, expectedLiveStreamAPICallError);
NSError *videoAPICallError;
XCTAssertFalse([faceLandmarker detectInVideoFrame:image
timestampInMilliseconds:0
@ -195,6 +328,20 @@ constexpr float kFacialTransformationMatrixErrorThreshold = 0.2f;
MPPFaceLandmarker *faceLandmarker = [[MPPFaceLandmarker alloc] initWithOptions:options error:nil];
MPPImage *image = [self imageWithFileInfo:kPortraitImage];
NSError *liveStreamAPICallError;
XCTAssertFalse([faceLandmarker detectAsyncInImage:image
timestampInMilliseconds:0
error:&liveStreamAPICallError]);
NSError *expectedLiveStreamAPICallError =
[NSError errorWithDomain:kExpectedErrorDomain
code:MPPTasksErrorCodeInvalidArgumentError
userInfo:@{
NSLocalizedDescriptionKey : @"The vision task is not initialized with live "
@"stream mode. Current Running Mode: Video"
}];
AssertEqualErrors(liveStreamAPICallError, expectedLiveStreamAPICallError);
NSError *imageAPICallError;
XCTAssertFalse([faceLandmarker detectInImage:image error:&imageAPICallError]);
@ -208,6 +355,61 @@ constexpr float kFacialTransformationMatrixErrorThreshold = 0.2f;
AssertEqualErrors(imageAPICallError, expectedImageAPICallError);
}
- (void)testDetectFailsWithCallingWrongAPIInLiveStreamMode {
MPPFaceLandmarkerOptions *options =
[self faceLandmarkerOptionsWithModelName:kFaceLandmarkerModelName];
options.runningMode = MPPRunningModeLiveStream;
options.faceLandmarkerLiveStreamDelegate = self;
MPPFaceLandmarker *faceLandmarker = [[MPPFaceLandmarker alloc] initWithOptions:options error:nil];
MPPImage *image = [self imageWithFileInfo:kPortraitImage];
NSError *imageAPICallError;
XCTAssertFalse([faceLandmarker detectInImage:image error:&imageAPICallError]);
NSError *expectedImageAPICallError =
[NSError errorWithDomain:kExpectedErrorDomain
code:MPPTasksErrorCodeInvalidArgumentError
userInfo:@{
NSLocalizedDescriptionKey : @"The vision task is not initialized with "
@"image mode. Current Running Mode: Live Stream"
}];
AssertEqualErrors(imageAPICallError, expectedImageAPICallError);
NSError *videoAPICallError;
XCTAssertFalse([faceLandmarker detectInVideoFrame:image
timestampInMilliseconds:0
error:&videoAPICallError]);
NSError *expectedVideoAPICallError =
[NSError errorWithDomain:kExpectedErrorDomain
code:MPPTasksErrorCodeInvalidArgumentError
userInfo:@{
NSLocalizedDescriptionKey : @"The vision task is not initialized with "
@"video mode. Current Running Mode: Live Stream"
}];
AssertEqualErrors(videoAPICallError, expectedVideoAPICallError);
}
#pragma mark MPPFaceLandmarkerLiveStreamDelegate Methods
- (void)faceLandmarker:(MPPFaceLandmarker *)faceLandmarker
didFinishDetectionWithResult:(MPPFaceLandmarkerResult *)faceLandmarkerResult
timestampInMilliseconds:(NSInteger)timestampInMilliseconds
error:(NSError *)error {
NSArray<MPPNormalizedLandmark *> *expectedLandmarks =
[MPPFaceLandmarkerTests expectedLandmarksFromFileInfo:kPortraitExpectedLandmarksName];
[self assertFaceLandmarkerResult:faceLandmarkerResult
containsExpectedLandmarks:expectedLandmarks
expectedBlendshapes:NULL
expectedTransformationMatrix:NULL];
if (faceLandmarker == _outOfOrderTimestampTestDict[kLiveStreamTestsDictFaceLandmarkerKey]) {
[_outOfOrderTimestampTestDict[kLiveStreamTestsDictExpectationKey] fulfill];
} else if (faceLandmarker == _liveStreamSucceedsTestDict[kLiveStreamTestsDictFaceLandmarkerKey]) {
[_liveStreamSucceedsTestDict[kLiveStreamTestsDictExpectationKey] fulfill];
}
}
+ (NSString *)filePathWithName:(NSString *)fileName extension:(NSString *)extension {
NSString *filePath =
[[NSBundle bundleForClass:[MPPFaceLandmarkerTests class]] pathForResource:fileName

View File

@ -107,6 +107,46 @@ NS_SWIFT_NAME(FaceLandmarker)
error:(NSError **)error
NS_SWIFT_NAME(detect(videoFrame:timestampInMilliseconds:));
/**
* Sends live stream image data of type `MPPImage` to perform face landmark detection using the
* whole image as region of interest. Rotation will be applied according to the `orientation`
* property of the provided `MPPImage`. Only use this method when the `MPPFaceLandmarker` is created
* with `MPPRunningModeLiveStream`.
*
* The object which needs to be continuously notified of the available results of face
* detection must confirm to `MPPFaceLandmarkerLiveStreamDelegate` protocol and implement the
* `faceLandmarker:didFinishDetectionWithResult:timestampInMilliseconds:error:` delegate method.
*
* It's required to provide a timestamp (in milliseconds) to indicate when the input image is sent
* to the face detector. The input timestamps must be monotonically increasing.
*
* This method supports RGBA images. If your `MPPImage` has a source type of
* `MPPImageSourceTypePixelBuffer` or `MPPImageSourceTypeSampleBuffer`, the underlying pixel buffer
* must have one of the following pixel format types:
* 1. kCVPixelFormatType_32BGRA
* 2. kCVPixelFormatType_32RGBA
*
* If the input `MPPImage` has a source type of `MPPImageSourceTypeImage` ensure that the color
* space is RGB with an Alpha channel.
*
* If this method is used for classifying live camera frames using `AVFoundation`, ensure that you
* request `AVCaptureVideoDataOutput` to output frames in `kCMPixelFormat_32RGBA` using its
* `videoSettings` property.
*
* @param image A live stream image data of type `MPPImage` on which face landmark detection is to
* be performed.
* @param timestampInMilliseconds The timestamp (in milliseconds) which indicates when the input
* image is sent to the face detector. The input timestamps must be monotonically increasing.
* @param error An optional error parameter populated when there is an error when sending the input
* image to the graph.
*
* @return `YES` if the image was sent to the task successfully, otherwise `NO`.
*/
- (BOOL)detectAsyncInImage:(MPPImage *)image
timestampInMilliseconds:(NSInteger)timestampInMilliseconds
error:(NSError **)error
NS_SWIFT_NAME(detectAsync(image:timestampInMilliseconds:));
- (instancetype)init NS_UNAVAILABLE;
+ (instancetype)new NS_UNAVAILABLE;

View File

@ -25,9 +25,12 @@
using ::mediapipe::NormalizedRect;
using ::mediapipe::Packet;
using ::mediapipe::Timestamp;
using ::mediapipe::tasks::core::PacketMap;
using ::mediapipe::tasks::core::PacketsCallback;
static constexpr int kMicrosecondsPerMillisecond = 1000;
// Constants for the underlying MP Tasks Graph. See
// https://github.com/google/mediapipe/tree/master/mediapipe/tasks/cc/vision/face_landmarker/face_landmarker_graph.cc
static NSString *const kLandmarksOutStreamName = @"landmarks_out";
@ -55,8 +58,14 @@ static NSString *const kTaskName = @"faceLandmarker";
@interface MPPFaceLandmarker () {
/** iOS Vision Task Runner */
MPPVisionTaskRunner *_visionTaskRunner;
/**
* The callback queue for the live stream delegate. This is only set if the user provides a live
* stream delegate.
*/
dispatch_queue_t _callbackQueue;
/** The user-provided live stream delegate if set. */
__weak id<MPPFaceLandmarkerLiveStreamDelegate> _faceLandmarkerLiveStreamDelegate;
}
@end
@implementation MPPFaceLandmarker
@ -94,10 +103,30 @@ static NSString *const kTaskName = @"faceLandmarker";
return nil;
}
PacketsCallback packetsCallback = nullptr;
if (options.faceLandmarkerLiveStreamDelegate) {
_faceLandmarkerLiveStreamDelegate = options.faceLandmarkerLiveStreamDelegate;
// Create a private serial dispatch queue in which the delegate method will be called
// asynchronously. This is to ensure that if the client performs a long running operation in
// the delegate method, the queue on which the C++ callbacks is invoked is not blocked and is
// freed up to continue with its operations.
_callbackQueue = dispatch_queue_create(
[MPPVisionTaskRunner uniqueDispatchQueueNameWithSuffix:kTaskName], NULL);
// Capturing `self` as weak in order to avoid `self` being kept in memory
// and cause a retain cycle, after self is set to `nil`.
MPPFaceLandmarker *__weak weakSelf = self;
packetsCallback = [weakSelf](absl::StatusOr<PacketMap> liveStreamResult) {
[weakSelf processLiveStreamResult:liveStreamResult];
};
}
_visionTaskRunner =
[[MPPVisionTaskRunner alloc] initWithCalculatorGraphConfig:[taskInfo generateGraphConfig]
runningMode:options.runningMode
packetsCallback:nullptr
packetsCallback:std::move(packetsCallback)
error:error];
if (!_visionTaskRunner) {
@ -133,7 +162,7 @@ static NSString *const kTaskName = @"faceLandmarker";
}
Packet normalizedRectPacket =
[MPPVisionPacketCreator createPacketWithNormalizedRect:rect.value()
[MPPVisionPacketCreator createPacketWithNormalizedRect:*rect
timestampInMilliseconds:timestampInMilliseconds];
PacketMap inputPacketMap = InputPacketMap(imagePacket, normalizedRectPacket);
@ -154,8 +183,7 @@ static NSString *const kTaskName = @"faceLandmarker";
return nil;
}
Packet normalizedRectPacket =
[MPPVisionPacketCreator createPacketWithNormalizedRect:rect.value()];
Packet normalizedRectPacket = [MPPVisionPacketCreator createPacketWithNormalizedRect:*rect];
PacketMap inputPacketMap = InputPacketMap(imagePacket, normalizedRectPacket);
@ -185,8 +213,7 @@ static NSString *const kTaskName = @"faceLandmarker";
}
std::optional<PacketMap> outputPacketMap =
[_visionTaskRunner processVideoFramePacketMap:inputPacketMap.value() error:error];
[_visionTaskRunner processVideoFramePacketMap:*inputPacketMap error:error];
if (!outputPacketMap.has_value()) {
return nil;
}
@ -200,4 +227,54 @@ static NSString *const kTaskName = @"faceLandmarker";
.value()[kFaceGeometryOutStreamName.cppString]];
}
- (BOOL)detectAsyncInImage:(MPPImage *)image
timestampInMilliseconds:(NSInteger)timestampInMilliseconds
error:(NSError **)error {
std::optional<PacketMap> inputPacketMap = [self inputPacketMapWithMPPImage:image
timestampInMilliseconds:timestampInMilliseconds
error:error];
if (!inputPacketMap.has_value()) {
return NO;
}
return [_visionTaskRunner processLiveStreamPacketMap:*inputPacketMap error:error];
}
- (void)processLiveStreamResult:(absl::StatusOr<PacketMap>)liveStreamResult {
NSError *callbackError;
if (![MPPCommonUtils checkCppError:liveStreamResult.status() toError:&callbackError]) {
dispatch_async(_callbackQueue, ^{
[_faceLandmarkerLiveStreamDelegate faceLandmarker:self
didFinishDetectionWithResult:nil
timestampInMilliseconds:Timestamp::Unset().Value()
error:callbackError];
});
return;
}
PacketMap &outputPacketMap = *liveStreamResult;
if (outputPacketMap[kImageOutStreamName.cppString].IsEmpty()) {
// The graph did not return a result. We therefore do not raise the user callback. This mirrors
// returning `nil` in the other methods and is acceptable for the live stream delegate since
// it is expected that we drop frames and don't return results for every input.
return;
}
MPPFaceLandmarkerResult *result = [MPPFaceLandmarkerResult
faceLandmarkerResultWithLandmarksPacket:outputPacketMap[kLandmarksOutStreamName.cppString]
blendshapesPacket:outputPacketMap[kBlendshapesOutStreamName.cppString]
transformationMatrixesPacket:outputPacketMap[kFaceGeometryOutStreamName
.cppString]];
NSInteger timeStampInMilliseconds =
outputPacketMap[kImageOutStreamName.cppString].Timestamp().Value() /
kMicrosecondsPerMillisecond;
dispatch_async(_callbackQueue, ^{
[_faceLandmarkerLiveStreamDelegate faceLandmarker:self
didFinishDetectionWithResult:result
timestampInMilliseconds:timeStampInMilliseconds
error:callbackError];
});
}
@end

View File

@ -20,6 +20,41 @@
NS_ASSUME_NONNULL_BEGIN
@class MPPFaceLandmarker;
/**
* This protocol defines an interface for the delegates of `MPPFaceLandmarker` face to receive
* results of performing asynchronous face detection on images (i.e, when `runningMode` =
* `MPPRunningModeLiveStream`).
*
* The delegate of `MPPFaceLandmarker` must adopt `MPPFaceLandmarkerLiveStreamDelegate` protocol.
* The methods in this protocol are optional.
*/
NS_SWIFT_NAME(FaceDetectorLiveStreamDelegate)
@protocol MPPFaceLandmarkerLiveStreamDelegate <NSObject>
/**
* This method notifies a delegate that the results of asynchronous face detection of
* an image submitted to the `MPPFaceLandmarker` is available.
*
* This method is called on a private serial dispatch queue created by the `MPPFaceLandmarker`
* for performing the asynchronous delegates calls.
*
* @param faceLandmarker The face landmarker which performed the face landmark detctions.
* This is useful to test equality when there are multiple instances of `MPPFaceLandmarker`.
* @param result The `MPPFaceLandmarkerResult` object that contains a list of landmarks.
* @param timestampInMilliseconds The timestamp (in milliseconds) which indicates when the input
* image was sent to the face detector.
* @param error An optional error parameter populated when there is an error in performing face
* detection on the input live stream image data.
*/
- (void)faceLandmarker:(MPPFaceLandmarker *)faceLandmarker
didFinishDetectionWithResult:(nullable MPPFaceLandmarkerResult *)result
timestampInMilliseconds:(NSInteger)timestampInMilliseconds
error:(nullable NSError *)error
NS_SWIFT_NAME(faceLandmarker(_:didFinishDetection:timestampInMilliseconds:error:));
@end
/** Options for setting up a `MPPFaceLandmarker`. */
NS_SWIFT_NAME(FaceLandmarkerOptions)
@interface MPPFaceLandmarkerOptions : MPPTaskOptions <NSCopying>
@ -35,6 +70,15 @@ NS_SWIFT_NAME(FaceLandmarkerOptions)
*/
@property(nonatomic) MPPRunningMode runningMode;
/**
* An object that confirms to `MPPFaceLandmarkerLiveStreamDelegate` protocol. This object must
* implement `faceLandmarker:didFinishDetectionWithResult:timestampInMilliseconds:error:` to receive
* the results of performing asynchronous face landmark detection on images (i.e, when `runningMode`
* = `MPPRunningModeLiveStream`).
*/
@property(nonatomic, weak, nullable) id<MPPFaceLandmarkerLiveStreamDelegate>
faceLandmarkerLiveStreamDelegate;
/** The maximum number of faces can be detected by the FaceLandmarker. Defaults to 1. */
@property(nonatomic) NSInteger numFaces;

View File

@ -25,6 +25,7 @@
_minTrackingConfidence = 0.5f;
_outputFaceBlendshapes = NO;
_outputFacialTransformationMatrixes = NO;
_outputFacialTransformationMatrixes = NO;
}
return self;
}
@ -39,6 +40,7 @@
faceLandmarkerOptions.outputFaceBlendshapes = self.outputFaceBlendshapes;
faceLandmarkerOptions.outputFacialTransformationMatrixes =
self.outputFacialTransformationMatrixes;
faceLandmarkerOptions.faceLandmarkerLiveStreamDelegate = self.faceLandmarkerLiveStreamDelegate;
return faceLandmarkerOptions;
}