Merge pull request #4464 from priankakariatyml:ios-gesture-recognizer-updates

PiperOrigin-RevId: 536421680
This commit is contained in:
Copybara-Service 2023-05-30 09:24:06 -07:00
commit 21eeac9fd7
9 changed files with 124 additions and 53 deletions

View File

@ -19,8 +19,30 @@ NS_ASSUME_NONNULL_BEGIN
@interface MPPCategory (Helpers) @interface MPPCategory (Helpers)
/**
* Creates an `MPPCategory` with the given MediaPipe `Classification` proto.
*
* @param classificationProto A MediaPipe `Classification` proto.
* @return An `MPPCategory` object that with the given MediaPipe `Classification` proto.
*/
+ (MPPCategory *)categoryWithProto:(const ::mediapipe::Classification &)classificationProto; + (MPPCategory *)categoryWithProto:(const ::mediapipe::Classification &)classificationProto;
/**
* Creates an `MPPCategory` with the given MediaPipe `Classification` proto and the given category
* index. The resulting `MPPCategory` is created with the given category index instead of the
* category index specified in the `Classification` proto. This method is useful for tasks like
* gesture recognizer which always returns a default index for the recognized gestures.
*
* @param classificationProto A MediaPipe `Classification` proto.
* @param index The index to be used for creating the `MPPCategory` instead of the category index
* specified in the `Classification` proto.
*
* @return An `MPPGestureRecognizerResult` object that contains the hand gesture recognition
* results.
*/
+ (MPPCategory *)categoryWithProto:(const ::mediapipe::Classification &)classificationProto
index:(NSInteger)index;
@end @end
NS_ASSUME_NONNULL_END NS_ASSUME_NONNULL_END

View File

@ -21,7 +21,8 @@ using ClassificationProto = ::mediapipe::Classification;
@implementation MPPCategory (Helpers) @implementation MPPCategory (Helpers)
+ (MPPCategory *)categoryWithProto:(const ClassificationProto &)classificationProto { + (MPPCategory *)categoryWithProto:(const ClassificationProto &)classificationProto
index:(NSInteger)index {
NSString *categoryName; NSString *categoryName;
NSString *displayName; NSString *displayName;
@ -33,10 +34,14 @@ using ClassificationProto = ::mediapipe::Classification;
displayName = [NSString stringWithCppString:classificationProto.display_name()]; displayName = [NSString stringWithCppString:classificationProto.display_name()];
} }
return [[MPPCategory alloc] initWithIndex:classificationProto.index() return [[MPPCategory alloc] initWithIndex:index
score:classificationProto.score() score:classificationProto.score()
categoryName:categoryName categoryName:categoryName
displayName:displayName]; displayName:displayName];
} }
+ (MPPCategory *)categoryWithProto:(const ClassificationProto &)classificationProto {
return [MPPCategory categoryWithProto:classificationProto index:classificationProto.index()];
}
@end @end

View File

@ -58,38 +58,57 @@ NS_ASSUME_NONNULL_BEGIN
error:(NSError **)error NS_DESIGNATED_INITIALIZER; error:(NSError **)error NS_DESIGNATED_INITIALIZER;
/** /**
* Creates a `NormalizedRect` from a region of interest and an image orientation, performing * Creates a `NormalizedRect` from image orientation for a task which does not support roi,
* sanity checks on-the-fly. * performing sanity checks on-the-fly. Mirrored orientations
* If the input region of interest equals `CGRectZero`, returns a default `NormalizedRect` covering * (`UIImageOrientationUpMirrored`,`UIImageOrientationDownMirrored`,
* the whole image with rotation set according `imageOrientation`. If `ROIAllowed` is NO, an error
* will be returned if the input region of interest is not equal to `CGRectZero`. Mirrored
* orientations (`UIImageOrientationUpMirrored`,`UIImageOrientationDownMirrored`,
* `UIImageOrientationLeftMirrored`,`UIImageOrientationRightMirrored`) are not supported. An error * `UIImageOrientationLeftMirrored`,`UIImageOrientationRightMirrored`) are not supported. An error
* will be returned if `imageOrientation` is equal to any one of them. * will be returned if `imageOrientation` is equal to any one of them.
* *
* @param roi A `CGRect` specifying the region of interest. If the input region of interest equals
* `CGRectZero`, the returned `NormalizedRect` covers the whole image. Make sure that `roi` equals
* `CGRectZero` if `ROIAllowed` is NO. Otherwise, an error will be returned.
* @param imageSize A `CGSize` specifying the size of the image within which normalized rect is
* calculated.
* @param imageOrientation A `UIImageOrientation` indicating the rotation to be applied to the * @param imageOrientation A `UIImageOrientation` indicating the rotation to be applied to the
* image. The resulting `NormalizedRect` will convert the `imageOrientation` to degrees clockwise. * image. The resulting `NormalizedRect` will convert the `imageOrientation` to degrees clockwise.
* Mirrored orientations (`UIImageOrientationUpMirrored`, `UIImageOrientationDownMirrored`, * Mirrored orientations (`UIImageOrientationUpMirrored`, `UIImageOrientationDownMirrored`,
* `UIImageOrientationLeftMirrored`, `UIImageOrientationRightMirrored`) are not supported. An error * `UIImageOrientationLeftMirrored`, `UIImageOrientationRightMirrored`) are not supported. An error
* will be returned if `imageOrientation` is equal to any one of them. * will be returned if `imageOrientation` is equal to any one of them.
* @param ROIAllowed Indicates if the `roi` field is allowed to be a value other than `CGRectZero`. * @param imageSize A `CGSize` specifying the size of the image within which normalized rect is
* calculated.
* @param error Pointer to the memory location where errors if any should be saved. If @c NULL, no
* error will be saved.
*
* @return An optional `NormalizedRect` from the given region of interest and image orientation.
*/
- (std::optional<mediapipe::NormalizedRect>)normalizedRectWithImageOrientation:
(UIImageOrientation)imageOrientation
imageSize:(CGSize)imageSize
error:(NSError **)error;
/**
* Creates a `NormalizedRect` from roi and image orientation for a task which supports roi,
* performing sanity checks on-the-fly. If the input region of interest equals `CGRectZero`, returns
* a default `NormalizedRect` covering the whole image with rotation set according
* `imageOrientation`. Mirrored orientations
* (`UIImageOrientationUpMirrored`,`UIImageOrientationDownMirrored`,
* `UIImageOrientationLeftMirrored`,`UIImageOrientationRightMirrored`) are not supported. An error
* will be returned if `imageOrientation` is equal to any one of them.
*
* @param roi A `CGRect` specifying the region of interest. If the input region of interest equals
* `CGRectZero`, the returned `NormalizedRect` covers the whole image.
* @param imageOrientation A `UIImageOrientation` indicating the rotation to be applied to the
* image. The resulting `NormalizedRect` will convert the `imageOrientation` to degrees clockwise.
* Mirrored orientations (`UIImageOrientationUpMirrored`, `UIImageOrientationDownMirrored`,
* `UIImageOrientationLeftMirrored`, `UIImageOrientationRightMirrored`) are not supported. An error
* will be returned if `imageOrientation` is equal to any one of them.
* @param imageSize A `CGSize` specifying the size of the image within which normalized rect is
* calculated.
* @param error Pointer to the memory location where errors if any should be saved. If @c NULL, no * @param error Pointer to the memory location where errors if any should be saved. If @c NULL, no
* error will be saved. * error will be saved.
* *
* @return An optional `NormalizedRect` from the given region of interest and image orientation. * @return An optional `NormalizedRect` from the given region of interest and image orientation.
*/ */
- (std::optional<mediapipe::NormalizedRect>) - (std::optional<mediapipe::NormalizedRect>)
normalizedRectFromRegionOfInterest:(CGRect)roi normalizedRectWithRegionOfInterest:(CGRect)roi
imageSize:(CGSize)imageSize
imageOrientation:(UIImageOrientation)imageOrientation imageOrientation:(UIImageOrientation)imageOrientation
ROIAllowed:(BOOL)ROIAllowed imageSize:(CGSize)imageSize
error:(NSError **)error; error:(NSError **)error;
/** /**
* A synchronous method to invoke the C++ task runner to process single image inputs. The call * A synchronous method to invoke the C++ task runner to process single image inputs. The call
* blocks the current thread until a failure status or a successful result is returned. * blocks the current thread until a failure status or a successful result is returned.

View File

@ -91,7 +91,30 @@ static NSString *const kTaskPrefix = @"com.mediapipe.tasks.vision";
return self; return self;
} }
- (std::optional<NormalizedRect>)normalizedRectFromRegionOfInterest:(CGRect)roi - (std::optional<NormalizedRect>)normalizedRectWithRegionOfInterest:(CGRect)roi
imageOrientation:
(UIImageOrientation)imageOrientation
imageSize:(CGSize)imageSize
error:(NSError **)error {
return [self normalizedRectWithRegionOfInterest:roi
imageSize:imageSize
imageOrientation:imageOrientation
ROIAllowed:YES
error:error];
}
- (std::optional<NormalizedRect>)normalizedRectWithImageOrientation:
(UIImageOrientation)imageOrientation
imageSize:(CGSize)imageSize
error:(NSError **)error {
return [self normalizedRectWithRegionOfInterest:CGRectZero
imageSize:imageSize
imageOrientation:imageOrientation
ROIAllowed:NO
error:error];
}
- (std::optional<NormalizedRect>)normalizedRectWithRegionOfInterest:(CGRect)roi
imageSize:(CGSize)imageSize imageSize:(CGSize)imageSize
imageOrientation: imageOrientation:
(UIImageOrientation)imageOrientation (UIImageOrientation)imageOrientation

View File

@ -128,10 +128,8 @@ static NSString *const kTaskName = @"faceDetector";
timestampInMilliseconds:(NSInteger)timestampInMilliseconds timestampInMilliseconds:(NSInteger)timestampInMilliseconds
error:(NSError **)error { error:(NSError **)error {
std::optional<NormalizedRect> rect = std::optional<NormalizedRect> rect =
[_visionTaskRunner normalizedRectFromRegionOfInterest:CGRectZero [_visionTaskRunner normalizedRectWithImageOrientation:image.orientation
imageSize:CGSizeMake(image.width, image.height) imageSize:CGSizeMake(image.width, image.height)
imageOrientation:image.orientation
ROIAllowed:NO
error:error]; error:error];
if (!rect.has_value()) { if (!rect.has_value()) {
return std::nullopt; return std::nullopt;
@ -154,10 +152,8 @@ static NSString *const kTaskName = @"faceDetector";
- (nullable MPPFaceDetectorResult *)detectInImage:(MPPImage *)image error:(NSError **)error { - (nullable MPPFaceDetectorResult *)detectInImage:(MPPImage *)image error:(NSError **)error {
std::optional<NormalizedRect> rect = std::optional<NormalizedRect> rect =
[_visionTaskRunner normalizedRectFromRegionOfInterest:CGRectZero [_visionTaskRunner normalizedRectWithImageOrientation:image.orientation
imageSize:CGSizeMake(image.width, image.height) imageSize:CGSizeMake(image.width, image.height)
imageOrientation:image.orientation
ROIAllowed:NO
error:error]; error:error];
if (!rect.has_value()) { if (!rect.has_value()) {
return nil; return nil;

View File

@ -14,6 +14,8 @@
#import "mediapipe/tasks/ios/vision/gesture_recognizer/sources/MPPGestureRecognizerResult.h" #import "mediapipe/tasks/ios/vision/gesture_recognizer/sources/MPPGestureRecognizerResult.h"
#include "mediapipe/framework/formats/classification.pb.h"
#include "mediapipe/framework/formats/landmark.pb.h"
#include "mediapipe/framework/packet.h" #include "mediapipe/framework/packet.h"
NS_ASSUME_NONNULL_BEGIN NS_ASSUME_NONNULL_BEGIN
@ -23,14 +25,14 @@ static const int kMicroSecondsPerMilliSecond = 1000;
@interface MPPGestureRecognizerResult (Helpers) @interface MPPGestureRecognizerResult (Helpers)
/** /**
* Creates an `MPPGestureRecognizerResult` from hand gestures, handedness, hand landmarks and world * Creates an `MPPGestureRecognizerResult` from hand gestures, handedness, hand landmarks and world
* landmarks packets. * landmarks packets.
* *
* @param handGesturesPacket a MediaPipe packet wrapping a`std::vector<ClassificationListProto>`. * @param handGesturesPacket A MediaPipe packet wrapping a`std::vector<ClassificationListProto>`.
* @param handednessPacket a MediaPipe packet wrapping a`std::vector<ClassificationListProto>`. * @param handednessPacket A MediaPipe packet wrapping a`std::vector<ClassificationListProto>`.
* @param handLandmarksPacket a MediaPipe packet wrapping * @param handLandmarksPacket A MediaPipe packet wrapping
* a`std::vector<NormalizedlandmarkListProto>`. * a`std::vector<NormalizedlandmarkListProto>`.
* @param handLandmarksPacket a MediaPipe packet wrapping a`std::vector<LandmarkListProto>`. * @param worldLandmarksPacket A MediaPipe packet wrapping a`std::vector<LandmarkListProto>`.
* *
* @return An `MPPGestureRecognizerResult` object that contains the hand gesture recognition * @return An `MPPGestureRecognizerResult` object that contains the hand gesture recognition
* results. * results.

View File

@ -21,6 +21,8 @@
#include "mediapipe/framework/formats/landmark.pb.h" #include "mediapipe/framework/formats/landmark.pb.h"
#include "mediapipe/framework/packet.h" #include "mediapipe/framework/packet.h"
static const NSInteger kDefaultGestureIndex = -1;
namespace { namespace {
using ClassificationListProto = ::mediapipe::ClassificationList; using ClassificationListProto = ::mediapipe::ClassificationList;
using LandmarkListProto = ::mediapipe::LandmarkList; using LandmarkListProto = ::mediapipe::LandmarkList;
@ -30,6 +32,15 @@ using ::mediapipe::Packet;
@implementation MPPGestureRecognizerResult (Helpers) @implementation MPPGestureRecognizerResult (Helpers)
+ (MPPGestureRecognizerResult *)emptyGestureRecognizerResultWithTimestampInMilliseconds:
(NSInteger)timestampInMilliseconds {
return [[MPPGestureRecognizerResult alloc] initWithGestures:@[]
handedness:@[]
landmarks:@[]
worldLandmarks:@[]
timestampInMilliseconds:timestampInMilliseconds];
}
+ (MPPGestureRecognizerResult *) + (MPPGestureRecognizerResult *)
gestureRecognizerResultWithHandGesturesPacket:(const Packet &)handGesturesPacket gestureRecognizerResultWithHandGesturesPacket:(const Packet &)handGesturesPacket
handednessPacket:(const Packet &)handednessPacket handednessPacket:(const Packet &)handednessPacket
@ -39,18 +50,16 @@ using ::mediapipe::Packet;
(NSInteger)(handGesturesPacket.Timestamp().Value() / kMicroSecondsPerMilliSecond); (NSInteger)(handGesturesPacket.Timestamp().Value() / kMicroSecondsPerMilliSecond);
if (handGesturesPacket.IsEmpty()) { if (handGesturesPacket.IsEmpty()) {
return [[MPPGestureRecognizerResult alloc] initWithGestures:@[] return [MPPGestureRecognizerResult
handedness:@[] emptyGestureRecognizerResultWithTimestampInMilliseconds:timestampInMilliseconds];
landmarks:@[]
worldLandmarks:@[]
timestampInMilliseconds:timestampInMilliseconds];
} }
if (!handGesturesPacket.ValidateAsType<std::vector<ClassificationListProto>>().ok() || if (!handGesturesPacket.ValidateAsType<std::vector<ClassificationListProto>>().ok() ||
!handednessPacket.ValidateAsType<std::vector<ClassificationListProto>>().ok() || !handednessPacket.ValidateAsType<std::vector<ClassificationListProto>>().ok() ||
!handLandmarksPacket.ValidateAsType<std::vector<NormalizedLandmarkListProto>>().ok() || !handLandmarksPacket.ValidateAsType<std::vector<NormalizedLandmarkListProto>>().ok() ||
!worldLandmarksPacket.ValidateAsType<std::vector<LandmarkListProto>>().ok()) { !worldLandmarksPacket.ValidateAsType<std::vector<LandmarkListProto>>().ok()) {
return nil; return [MPPGestureRecognizerResult
emptyGestureRecognizerResultWithTimestampInMilliseconds:timestampInMilliseconds];
} }
const std::vector<ClassificationListProto> &handGesturesClassificationListProtos = const std::vector<ClassificationListProto> &handGesturesClassificationListProtos =
@ -62,7 +71,8 @@ using ::mediapipe::Packet;
NSMutableArray<MPPCategory *> *gestures = [NSMutableArray NSMutableArray<MPPCategory *> *gestures = [NSMutableArray
arrayWithCapacity:(NSUInteger)classificationListProto.classification().size()]; arrayWithCapacity:(NSUInteger)classificationListProto.classification().size()];
for (const auto &classificationProto : classificationListProto.classification()) { for (const auto &classificationProto : classificationListProto.classification()) {
MPPCategory *category = [MPPCategory categoryWithProto:classificationProto]; MPPCategory *category = [MPPCategory categoryWithProto:classificationProto
index:kDefaultGestureIndex];
[gestures addObject:category]; [gestures addObject:category];
} }
[multiHandGestures addObject:gestures]; [multiHandGestures addObject:gestures];

View File

@ -166,10 +166,9 @@ static const int kMicroSecondsPerMilliSecond = 1000;
regionOfInterest:(CGRect)roi regionOfInterest:(CGRect)roi
error:(NSError **)error { error:(NSError **)error {
std::optional<NormalizedRect> rect = std::optional<NormalizedRect> rect =
[_visionTaskRunner normalizedRectFromRegionOfInterest:roi [_visionTaskRunner normalizedRectWithRegionOfInterest:roi
imageSize:CGSizeMake(image.width, image.height)
imageOrientation:image.orientation imageOrientation:image.orientation
ROIAllowed:YES imageSize:CGSizeMake(image.width, image.height)
error:error]; error:error];
if (!rect.has_value()) { if (!rect.has_value()) {
return nil; return nil;
@ -196,15 +195,18 @@ static const int kMicroSecondsPerMilliSecond = 1000;
outputPacketMap.value()[kClassificationsStreamName.cppString]]; outputPacketMap.value()[kClassificationsStreamName.cppString]];
} }
- (nullable MPPImageClassifierResult *)classifyImage:(MPPImage *)image error:(NSError **)error {
return [self classifyImage:image regionOfInterest:CGRectZero error:error];
}
- (std::optional<PacketMap>)inputPacketMapWithMPPImage:(MPPImage *)image - (std::optional<PacketMap>)inputPacketMapWithMPPImage:(MPPImage *)image
timestampInMilliseconds:(NSInteger)timestampInMilliseconds timestampInMilliseconds:(NSInteger)timestampInMilliseconds
regionOfInterest:(CGRect)roi regionOfInterest:(CGRect)roi
error:(NSError **)error { error:(NSError **)error {
std::optional<NormalizedRect> rect = std::optional<NormalizedRect> rect =
[_visionTaskRunner normalizedRectFromRegionOfInterest:roi [_visionTaskRunner normalizedRectWithRegionOfInterest:roi
imageSize:CGSizeMake(image.width, image.height)
imageOrientation:image.orientation imageOrientation:image.orientation
ROIAllowed:YES imageSize:CGSizeMake(image.width, image.height)
error:error]; error:error];
if (!rect.has_value()) { if (!rect.has_value()) {
return std::nullopt; return std::nullopt;
@ -225,10 +227,6 @@ static const int kMicroSecondsPerMilliSecond = 1000;
return inputPacketMap; return inputPacketMap;
} }
- (nullable MPPImageClassifierResult *)classifyImage:(MPPImage *)image error:(NSError **)error {
return [self classifyImage:image regionOfInterest:CGRectZero error:error];
}
- (nullable MPPImageClassifierResult *)classifyVideoFrame:(MPPImage *)image - (nullable MPPImageClassifierResult *)classifyVideoFrame:(MPPImage *)image
timestampInMilliseconds:(NSInteger)timestampInMilliseconds timestampInMilliseconds:(NSInteger)timestampInMilliseconds
regionOfInterest:(CGRect)roi regionOfInterest:(CGRect)roi

View File

@ -160,10 +160,8 @@ static NSString *const kTaskName = @"objectDetector";
timestampInMilliseconds:(NSInteger)timestampInMilliseconds timestampInMilliseconds:(NSInteger)timestampInMilliseconds
error:(NSError **)error { error:(NSError **)error {
std::optional<NormalizedRect> rect = std::optional<NormalizedRect> rect =
[_visionTaskRunner normalizedRectFromRegionOfInterest:CGRectZero [_visionTaskRunner normalizedRectWithImageOrientation:image.orientation
imageSize:CGSizeMake(image.width, image.height) imageSize:CGSizeMake(image.width, image.height)
imageOrientation:image.orientation
ROIAllowed:NO
error:error]; error:error];
if (!rect.has_value()) { if (!rect.has_value()) {
return std::nullopt; return std::nullopt;
@ -188,10 +186,8 @@ static NSString *const kTaskName = @"objectDetector";
regionOfInterest:(CGRect)roi regionOfInterest:(CGRect)roi
error:(NSError **)error { error:(NSError **)error {
std::optional<NormalizedRect> rect = std::optional<NormalizedRect> rect =
[_visionTaskRunner normalizedRectFromRegionOfInterest:roi [_visionTaskRunner normalizedRectWithImageOrientation:image.orientation
imageSize:CGSizeMake(image.width, image.height) imageSize:CGSizeMake(image.width, image.height)
imageOrientation:image.orientation
ROIAllowed:NO
error:error]; error:error];
if (!rect.has_value()) { if (!rect.has_value()) {
return nil; return nil;