Merge pull request #4093 from priankakariatyml:ios-ml-image-utils

PiperOrigin-RevId: 509602397
This commit is contained in:
Copybara-Service 2023-02-14 12:17:41 -08:00
commit d6fd2c52a7
7 changed files with 598 additions and 0 deletions

View File

@ -24,3 +24,25 @@ objc_library(
"//third_party/apple_frameworks:UIKit", "//third_party/apple_frameworks:UIKit",
], ],
) )
objc_library(
name = "MPPRunningMode",
hdrs = ["sources/MPPRunningMode.h"],
module_name = "MPPRunningMode",
)
objc_library(
name = "MPPVisionTaskRunner",
srcs = ["sources/MPPVisionTaskRunner.mm"],
hdrs = ["sources/MPPVisionTaskRunner.h"],
copts = [
"-ObjC++",
"-std=c++17",
],
deps = [
":MPPRunningMode",
"//mediapipe/tasks/ios/common:MPPCommon",
"//mediapipe/tasks/ios/common/utils:MPPCommonUtils",
"//mediapipe/tasks/ios/core:MPPTaskRunner",
],
)

View File

@ -0,0 +1,41 @@
// Copyright 2023 The MediaPipe Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#import <Foundation/Foundation.h>
NS_ASSUME_NONNULL_BEGIN
/**
* MediaPipe vision task running mode. A MediaPipe vision task can be run with three different
* modes: image, video and live stream.
*/
typedef NS_ENUM(NSUInteger, MPPRunningMode) {
// Generic error codes.
/** The mode for running a mediapipe vision task on single image inputs. */
MPPRunningModeImage,
/** The mode for running a mediapipe vision task on the decoded frames of a video. */
MPPRunningModeVideo,
/**
* The mode for running a mediapipe vision task on a live stream of input data, such as from the
* camera.
*/
MPPRunningModeLiveStream,
} NS_SWIFT_NAME(RunningMode);
NS_ASSUME_NONNULL_END

View File

@ -0,0 +1,68 @@
// Copyright 2023 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#import <Foundation/Foundation.h>
#import "mediapipe/tasks/ios/core/sources/MPPTaskRunner.h"
#import "mediapipe/tasks/ios/vision/core/sources/MPPRunningMode.h"
NS_ASSUME_NONNULL_BEGIN
/**
* This class is used to create and call appropriate methods on the C++ Task Runner to initialize,
* execute and terminate any MediaPipe vision task.
*/
@interface MPPVisionTaskRunner : MPPTaskRunner
/**
* Initializes a new `MPPVisionTaskRunner` with the MediaPipe calculator config proto running mode
* and packetsCallback.
* Make sure that the packets callback is set properly based on the vision task's running mode.
* In case of live stream running mode, a C++ packets callback that is intended to deliver inference
* results must be provided. In case of image or video running mode, packets callback must be set to
* nil.
*
* @param graphConfig A MediaPipe calculator config proto.
* @param runningMode MediaPipe vision task running mode.
* @param packetsCallback An optional C++ callback function that takes a list of output packets as
* the input argument. If provided, the callback must in turn call the block provided by the user in
* the appropriate task options. Make sure that the packets callback is set properly based on the
* vision task's running mode. In case of live stream running mode, a C++ packets callback that is
* intended to deliver inference results must be provided. In case of image or video running mode,
* packets callback must be set to nil.
*
* @param error Pointer to the memory location where errors if any should be saved. If @c NULL, no
* error will be saved.
*
* @return An instance of `MPPVisionTaskRunner` initialized to the given MediaPipe calculator config
* proto, running mode and packets callback.
*/
- (nullable instancetype)initWithCalculatorGraphConfig:(mediapipe::CalculatorGraphConfig)graphConfig
runningMode:(MPPRunningMode)runningMode
packetsCallback:
(mediapipe::tasks::core::PacketsCallback)packetsCallback
error:(NSError **)error NS_DESIGNATED_INITIALIZER;
- (instancetype)initWithCalculatorGraphConfig:(mediapipe::CalculatorGraphConfig)graphConfig
packetsCallback:
(mediapipe::tasks::core::PacketsCallback)packetsCallback
error:(NSError **)error NS_UNAVAILABLE;
- (instancetype)init NS_UNAVAILABLE;
+ (instancetype)new NS_UNAVAILABLE;
@end
NS_ASSUME_NONNULL_END

View File

@ -0,0 +1,73 @@
// Copyright 2023 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#import "mediapipe/tasks/ios/vision/core/sources/MPPVisionTaskRunner.h"
#import "mediapipe/tasks/ios/common/sources/MPPCommon.h"
#import "mediapipe/tasks/ios/common/utils/sources/MPPCommonUtils.h"
namespace {
using ::mediapipe::CalculatorGraphConfig;
using ::mediapipe::tasks::core::PacketsCallback;
} // namespace
@interface MPPVisionTaskRunner () {
MPPRunningMode _runningMode;
}
@end
@implementation MPPVisionTaskRunner
- (nullable instancetype)initWithCalculatorGraphConfig:(CalculatorGraphConfig)graphConfig
runningMode:(MPPRunningMode)runningMode
packetsCallback:(PacketsCallback)packetsCallback
error:(NSError **)error {
switch (runningMode) {
case MPPRunningModeImage:
case MPPRunningModeVideo: {
if (packetsCallback) {
[MPPCommonUtils createCustomError:error
withCode:MPPTasksErrorCodeInvalidArgumentError
description:@"The vision task is in image or video mode, a "
@"user-defined result callback should not be provided."];
return nil;
}
break;
}
case MPPRunningModeLiveStream: {
if (!packetsCallback) {
[MPPCommonUtils createCustomError:error
withCode:MPPTasksErrorCodeInvalidArgumentError
description:@"The vision task is in live stream mode, a user-defined "
@"result callback must be provided."];
return nil;
}
break;
}
default: {
[MPPCommonUtils createCustomError:error
withCode:MPPTasksErrorCodeInvalidArgumentError
description:@"Unrecognized running mode"];
return nil;
}
}
_runningMode = runningMode;
self = [super initWithCalculatorGraphConfig:graphConfig
packetsCallback:packetsCallback
error:error];
return self;
}
@end

View File

@ -0,0 +1,26 @@
package(default_visibility = ["//mediapipe/tasks:internal"])
licenses(["notice"])
objc_library(
name = "MPPImageUtils",
srcs = ["sources/MPPImage+Utils.m"],
hdrs = ["sources/MPPImage+Utils.h"],
copts = [
"-ObjC++",
"-std=c++17",
],
module_name = "MPPImageUtils",
sdk_frameworks = [
"Accelerate",
"CoreGraphics",
"CoreImage",
"CoreVideo",
"UIKit",
],
deps = [
"//mediapipe/tasks/ios/common/utils:MPPCommonUtils",
"//mediapipe/tasks/ios/vision/core:MPPImage",
"//third_party/apple_frameworks:UIKit",
],
)

View File

@ -0,0 +1,44 @@
// Copyright 2023 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#import <Foundation/Foundation.h>
#import "mediapipe/tasks/ios/vision/core/sources/MPPImage.h"
NS_ASSUME_NONNULL_BEGIN
/**
* Helper utility for performing operations on MPPImage specific to the MediaPipe Vision library.
*/
@interface MPPImage (Utils)
/** Bitmap size of the image. */
@property(nonatomic, readonly) CGSize bitmapSize;
/**
* Returns the underlying uint8 pixel buffer of an `MPPImage`.
* Irrespective of whether the underlying buffer is grayscale, RGB, RGBA, BGRA etc., the pixel
* data is converted to an RGB format. In case of grayscale images, the mono channel is duplicated
* in the R, G, B channels.
*
* @param error Pointer to the memory location where errors if any should be saved. If @c NULL, no
* error will be saved.
*
* @return The underlying pixel buffer of the `MPPImage` or nil in case of errors.
*/
- (nullable uint8_t *)rgbPixelDataWithError:(NSError **)error;
@end
NS_ASSUME_NONNULL_END

View File

@ -0,0 +1,324 @@
// Copyright 2023 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#import "mediapipe/tasks/ios/vision/core/utils/sources/MPPImage+Utils.h"
#import "mediapipe/tasks/ios/common/sources/MPPCommon.h"
#import "mediapipe/tasks/ios/common/utils/sources/MPPCommonUtils.h"
#import <Accelerate/Accelerate.h>
#import <CoreGraphics/CoreGraphics.h>
#import <CoreImage/CoreImage.h>
#import <CoreVideo/CoreVideo.h>
@interface MPPPixelDataUtils : NSObject
+ (uint8_t *)rgbPixelDataFromPixelData:(uint8_t *)pixelData
withWidth:(size_t)width
height:(size_t)height
stride:(size_t)stride
pixelBufferFormat:(OSType)pixelBufferFormatType
error:(NSError **)error;
@end
@interface MPPCVPixelBufferUtils : NSObject
+ (uint8_t *)pixelDataFromCVPixelBuffer:(CVPixelBufferRef)pixelBuffer error:(NSError **)error;
@end
@interface MPPCGImageUtils : NSObject
+ (UInt8 *_Nullable)pixelDataFromCGImage:(CGImageRef)cgImage error:(NSError **)error;
@end
@interface UIImage (RawPixelDataUtils)
@property(nonatomic, readonly) CGSize bitmapSize;
- (uint8_t *)pixelDataWithError:(NSError **)error;
@end
@implementation MPPPixelDataUtils : NSObject
+ (uint8_t *)rgbPixelDataFromPixelData:(uint8_t *)pixelData
withWidth:(size_t)width
height:(size_t)height
stride:(size_t)stride
pixelBufferFormat:(OSType)pixelBufferFormatType
error:(NSError **)error {
NSInteger destinationChannelCount = 3;
size_t destinationBytesPerRow = width * destinationChannelCount;
uint8_t *destPixelBufferAddress =
(uint8_t *)[MPPCommonUtils mallocWithSize:sizeof(uint8_t) * height * destinationBytesPerRow
error:error];
if (!destPixelBufferAddress) {
return NULL;
}
vImage_Buffer srcBuffer = {.data = pixelData,
.height = (vImagePixelCount)height,
.width = (vImagePixelCount)width,
.rowBytes = stride};
vImage_Buffer destBuffer = {.data = destPixelBufferAddress,
.height = (vImagePixelCount)height,
.width = (vImagePixelCount)width,
.rowBytes = destinationBytesPerRow};
vImage_Error convertError = kvImageNoError;
switch (pixelBufferFormatType) {
case kCVPixelFormatType_32RGBA: {
convertError = vImageConvert_RGBA8888toRGB888(&srcBuffer, &destBuffer, kvImageNoFlags);
break;
}
case kCVPixelFormatType_32BGRA: {
convertError = vImageConvert_BGRA8888toRGB888(&srcBuffer, &destBuffer, kvImageNoFlags);
break;
}
default: {
[MPPCommonUtils createCustomError:error
withCode:MPPTasksErrorCodeInvalidArgumentError
description:@"Invalid source pixel buffer format. Expecting one of "
@"kCVPixelFormatType_32RGBA, kCVPixelFormatType_32BGRA"];
free(destPixelBufferAddress);
return NULL;
}
}
if (convertError != kvImageNoError) {
[MPPCommonUtils createCustomError:error
withCode:MPPTasksErrorCodeInternalError
description:@"Image format conversion failed."];
free(destPixelBufferAddress);
return NULL;
}
return destPixelBufferAddress;
}
@end
@implementation MPPCVPixelBufferUtils
+ (uint8_t *)rgbPixelDataFromCVPixelBuffer:(CVPixelBufferRef)pixelBuffer error:(NSError **)error {
CVPixelBufferLockBaseAddress(pixelBuffer, 0);
uint8_t *rgbPixelData = [MPPPixelDataUtils
rgbPixelDataFromPixelData:(uint8_t *)CVPixelBufferGetBaseAddress(pixelBuffer)
withWidth:CVPixelBufferGetWidth(pixelBuffer)
height:CVPixelBufferGetHeight(pixelBuffer)
stride:CVPixelBufferGetBytesPerRow(pixelBuffer)
pixelBufferFormat:CVPixelBufferGetPixelFormatType(pixelBuffer)
error:error];
CVPixelBufferUnlockBaseAddress(pixelBuffer, 0);
return rgbPixelData;
}
+ (nullable uint8_t *)pixelDataFromCVPixelBuffer:(CVPixelBufferRef)pixelBuffer
error:(NSError **)error {
uint8_t *pixelData = NULL;
OSType pixelBufferFormat = CVPixelBufferGetPixelFormatType(pixelBuffer);
switch (pixelBufferFormat) {
case kCVPixelFormatType_32BGRA: {
pixelData = [MPPCVPixelBufferUtils rgbPixelDataFromCVPixelBuffer:pixelBuffer error:error];
break;
}
default: {
[MPPCommonUtils createCustomError:error
withCode:MPPTasksErrorCodeInvalidArgumentError
description:@"Unsupported pixel format for CVPixelBuffer. Supported "
@"pixel format types are kCVPixelFormatType_32BGRA"];
}
}
return pixelData;
}
@end
@implementation MPPCGImageUtils
+ (UInt8 *_Nullable)pixelDataFromCGImage:(CGImageRef)cgImage error:(NSError **)error {
size_t width = CGImageGetWidth(cgImage);
size_t height = CGImageGetHeight(cgImage);
NSInteger bitsPerComponent = 8;
NSInteger channelCount = 4;
UInt8 *pixel_data_to_return = NULL;
CGColorSpaceRef colorSpace = CGColorSpaceCreateDeviceRGB();
size_t bytesPerRow = channelCount * width;
// iOS infers bytesPerRow if it is set to 0.
// See https://developer.apple.com/documentation/coregraphics/1455939-cgbitmapcontextcreate
// But for segmentation test image, this was not the case.
// Hence setting it to the value of channelCount*width.
// kCGImageAlphaNoneSkipLast specifies that Alpha will always be next to B.
// kCGBitmapByteOrder32Big specifies that R will be stored before B.
// In combination they signify a pixelFormat of kCVPixelFormatType32RGBA.
CGBitmapInfo bitMapinfoFor32RGBA = kCGImageAlphaNoneSkipLast | kCGBitmapByteOrder32Big;
CGContextRef context = CGBitmapContextCreate(nil, width, height, bitsPerComponent, bytesPerRow,
colorSpace, bitMapinfoFor32RGBA);
if (context) {
CGContextDrawImage(context, CGRectMake(0, 0, width, height), cgImage);
uint8_t *srcData = (uint8_t *)CGBitmapContextGetData(context);
if (srcData) {
// We have drawn the image as an RGBA image with 8 bitsPerComponent and hence can safely input
// a pixel format of type kCVPixelFormatType_32RGBA for conversion by vImage.
pixel_data_to_return = [MPPPixelDataUtils rgbPixelDataFromPixelData:srcData
withWidth:width
height:height
stride:bytesPerRow
pixelBufferFormat:kCVPixelFormatType_32RGBA
error:error];
}
CGContextRelease(context);
}
CGColorSpaceRelease(colorSpace);
return pixel_data_to_return;
}
@end
@implementation UIImage (RawPixelDataUtils)
- (uint8_t *)pixelDataFromCIImageWithError:(NSError **)error {
uint8_t *pixelData = NULL;
if (self.CIImage.pixelBuffer) {
pixelData = [MPPCVPixelBufferUtils pixelDataFromCVPixelBuffer:self.CIImage.pixelBuffer
error:error];
} else if (self.CIImage.CGImage) {
pixelData = [MPPCGImageUtils pixelDataFromCGImage:self.CIImage.CGImage error:error];
} else {
[MPPCommonUtils createCustomError:error
withCode:MPPTasksErrorCodeInvalidArgumentError
description:@"CIImage should have CGImage or CVPixelBuffer info."];
}
return pixelData;
}
- (uint8_t *)pixelDataWithError:(NSError **)error {
uint8_t *pixelData = nil;
if (self.CGImage) {
pixelData = [MPPCGImageUtils pixelDataFromCGImage:self.CGImage error:error];
} else if (self.CIImage) {
pixelData = [self pixelDataFromCIImageWithError:error];
} else {
[MPPCommonUtils createCustomError:error
withCode:MPPTasksErrorCodeInvalidArgumentError
description:@"UIImage should be initialized from"
" CIImage or CGImage."];
}
return pixelData;
}
- (CGSize)bitmapSize {
CGFloat width = 0;
CGFloat height = 0;
if (self.CGImage) {
width = CGImageGetWidth(self.CGImage);
height = CGImageGetHeight(self.CGImage);
} else if (self.CIImage.pixelBuffer) {
width = CVPixelBufferGetWidth(self.CIImage.pixelBuffer);
height = CVPixelBufferGetHeight(self.CIImage.pixelBuffer);
} else if (self.CIImage.CGImage) {
width = CGImageGetWidth(self.CIImage.CGImage);
height = CGImageGetHeight(self.CIImage.CGImage);
}
return CGSizeMake(width, height);
}
@end
@implementation MPPImage (Utils)
- (nullable uint8_t *)rgbPixelDataWithError:(NSError **)error {
uint8_t *pixelData = NULL;
switch (self.imageSourceType) {
case MPPImageSourceTypeSampleBuffer: {
CVPixelBufferRef sampleImagePixelBuffer = CMSampleBufferGetImageBuffer(self.sampleBuffer);
pixelData = [MPPCVPixelBufferUtils pixelDataFromCVPixelBuffer:sampleImagePixelBuffer
error:error];
break;
}
case MPPImageSourceTypePixelBuffer: {
pixelData = [MPPCVPixelBufferUtils pixelDataFromCVPixelBuffer:self.pixelBuffer error:error];
break;
}
case MPPImageSourceTypeImage: {
pixelData = [self.image pixelDataWithError:error];
break;
}
default:
[MPPCommonUtils createCustomError:error
withCode:MPPTasksErrorCodeInvalidArgumentError
description:@"Invalid source type for MPPImage."];
}
return pixelData;
}
- (CGSize)bitmapSize {
CGFloat width = 0;
CGFloat height = 0;
switch (self.imageSourceType) {
case MPPImageSourceTypeSampleBuffer: {
CVPixelBufferRef pixelBuffer = CMSampleBufferGetImageBuffer(self.sampleBuffer);
width = CVPixelBufferGetWidth(pixelBuffer);
height = CVPixelBufferGetHeight(pixelBuffer);
break;
}
case MPPImageSourceTypePixelBuffer: {
width = CVPixelBufferGetWidth(self.pixelBuffer);
height = CVPixelBufferGetHeight(self.pixelBuffer);
break;
}
case MPPImageSourceTypeImage: {
width = self.image.bitmapSize.width;
height = self.image.bitmapSize.height;
break;
}
default:
break;
}
return CGSizeMake(width, height);
}
@end