Merge pull request #4104 from priankakariatyml:ios-image-classifier

PiperOrigin-RevId: 512991238
This commit is contained in:
Copybara-Service 2023-02-28 11:29:19 -08:00
commit f1b20b0c52
10 changed files with 366 additions and 113 deletions

View File

@ -0,0 +1,27 @@
// Copyright 2023 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#import <Foundation/Foundation.h>
#include "mediapipe/framework/packet.h"
#import "mediapipe/tasks/ios/vision/core/sources/MPPImage.h"
/**
* This class helps create various kinds of packets for Mediapipe Vision Tasks.
*/
@interface MPPVisionPacketCreator : NSObject
+ (mediapipe::Packet)createPacketWithMPPImage:(MPPImage *)image error:(NSError **)error;
@end

View File

@ -0,0 +1,43 @@
// Copyright 2023 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#import "mediapipe/tasks/ios/vision/core/sources/MPPVisionPacketCreator.h"
#import "mediapipe/tasks/ios/vision/core/utils/sources/MPPImage+Utils.h"
#include "mediapipe/framework/formats/image.h"
namespace {
using ::mediapipe::Image;
using ::mediapipe::ImageFrame;
using ::mediapipe::MakePacket;
using ::mediapipe::Packet;
} // namespace
struct freeDeleter {
void operator()(void *ptr) { free(ptr); }
};
@implementation MPPVisionPacketCreator
+ (Packet)createPacketWithMPPImage:(MPPImage *)image error:(NSError **)error {
std::unique_ptr<ImageFrame> imageFrame = [image imageFrameWithError:error];
if (!imageFrame) {
return Packet();
}
return MakePacket<Image>(std::move(imageFrame));
}
@end

View File

@ -4,23 +4,22 @@ licenses(["notice"])
objc_library(
name = "MPPImageUtils",
srcs = ["sources/MPPImage+Utils.m"],
srcs = ["sources/MPPImage+Utils.mm"],
hdrs = ["sources/MPPImage+Utils.h"],
copts = [
"-ObjC++",
"-std=c++17",
],
module_name = "MPPImageUtils",
sdk_frameworks = [
"Accelerate",
"CoreGraphics",
"CoreImage",
"CoreVideo",
"UIKit",
],
deps = [
"//mediapipe/framework/formats:image_format_cc_proto",
"//mediapipe/framework/formats:image_frame",
"//mediapipe/tasks/ios/common:MPPCommon",
"//mediapipe/tasks/ios/common/utils:MPPCommonUtils",
"//mediapipe/tasks/ios/vision/core:MPPImage",
"//third_party/apple_frameworks:UIKit",
"//third_party/apple_frameworks:Accelerate",
"//third_party/apple_frameworks:CoreGraphics",
"//third_party/apple_frameworks:CoreImage",
"//third_party/apple_frameworks:CoreVideo",
],
)

View File

@ -14,30 +14,27 @@
#import <Foundation/Foundation.h>
#include "mediapipe/framework/formats/image_frame.h"
#import "mediapipe/tasks/ios/vision/core/sources/MPPImage.h"
NS_ASSUME_NONNULL_BEGIN
/**
* Helper utility for performing operations on MPPImage specific to the MediaPipe Vision library.
* Helper utility for converting `MPPImage` into a `mediapipe::ImageFrame`.
*/
@interface MPPImage (Utils)
/** Bitmap size of the image. */
@property(nonatomic, readonly) CGSize bitmapSize;
/**
* Returns the underlying uint8 pixel buffer of an `MPPImage`.
* Irrespective of whether the underlying buffer is grayscale, RGB, RGBA, BGRA etc., the pixel
* data is converted to an RGB format. In case of grayscale images, the mono channel is duplicated
* in the R, G, B channels.
* Converts the `MPPImage` into a `mediapipe::ImageFrame`.
* Irrespective of whether the underlying buffer is grayscale, RGB, RGBA, BGRA etc., the MPPImage is
* converted to an RGB format. In case of grayscale images, the mono channel is duplicated in the R,
* G, B channels.
*
* @param error Pointer to the memory location where errors if any should be saved. If @c NULL, no
* error will be saved.
*
* @return The underlying pixel buffer of the `MPPImage` or nil in case of errors.
* @return An std::unique_ptr<mediapipe::ImageFrame> or `nullptr` in case of errors.
*/
- (nullable uint8_t *)rgbPixelDataWithError:(NSError **)error;
- (std::unique_ptr<mediapipe::ImageFrame>)imageFrameWithError:(NSError **)error;
@end

View File

@ -22,6 +22,12 @@
#import <CoreImage/CoreImage.h>
#import <CoreVideo/CoreVideo.h>
#include "mediapipe/framework/formats/image_format.pb.h"
namespace {
using ::mediapipe::ImageFrame;
}
@interface MPPPixelDataUtils : NSObject
+ (uint8_t *)rgbPixelDataFromPixelData:(uint8_t *)pixelData
@ -35,21 +41,20 @@
@interface MPPCVPixelBufferUtils : NSObject
+ (uint8_t *)pixelDataFromCVPixelBuffer:(CVPixelBufferRef)pixelBuffer error:(NSError **)error;
+ (std::unique_ptr<ImageFrame>)imageFrameFromCVPixelBuffer:(CVPixelBufferRef)pixelBuffer
error:(NSError **)error;
@end
@interface MPPCGImageUtils : NSObject
+ (UInt8 *_Nullable)pixelDataFromCGImage:(CGImageRef)cgImage error:(NSError **)error;
+ (std::unique_ptr<ImageFrame>)imageFrameFromCGImage:(CGImageRef)cgImage error:(NSError **)error;
@end
@interface UIImage (RawPixelDataUtils)
@interface UIImage (ImageFrameUtils)
@property(nonatomic, readonly) CGSize bitmapSize;
- (uint8_t *)pixelDataWithError:(NSError **)error;
- (std::unique_ptr<ImageFrame>)imageFrameWithError:(NSError **)error;
@end
@ -120,9 +125,14 @@
@implementation MPPCVPixelBufferUtils
+ (uint8_t *)rgbPixelDataFromCVPixelBuffer:(CVPixelBufferRef)pixelBuffer error:(NSError **)error {
+ (std::unique_ptr<ImageFrame>)rgbImageFrameFromCVPixelBuffer:(CVPixelBufferRef)pixelBuffer
error:(NSError **)error {
CVPixelBufferLockBaseAddress(pixelBuffer, 0);
size_t width = CVPixelBufferGetWidth(pixelBuffer);
size_t height = CVPixelBufferGetHeight(pixelBuffer);
size_t stride = CVPixelBufferGetBytesPerRow(pixelBuffer);
uint8_t *rgbPixelData = [MPPPixelDataUtils
rgbPixelDataFromPixelData:(uint8_t *)CVPixelBufferGetBaseAddress(pixelBuffer)
withWidth:CVPixelBufferGetWidth(pixelBuffer)
@ -133,19 +143,24 @@
CVPixelBufferUnlockBaseAddress(pixelBuffer, 0);
return rgbPixelData;
if (!rgbPixelData) {
return nullptr;
}
std::unique_ptr<ImageFrame> imageFrame = absl::make_unique<ImageFrame>(
::mediapipe::ImageFormat::SRGB, width, height, stride, static_cast<uint8 *>(rgbPixelData),
/*deleter=*/free);
return imageFrame;
}
+ (nullable uint8_t *)pixelDataFromCVPixelBuffer:(CVPixelBufferRef)pixelBuffer
error:(NSError **)error {
uint8_t *pixelData = NULL;
+ (std::unique_ptr<ImageFrame>)imageFrameFromCVPixelBuffer:(CVPixelBufferRef)pixelBuffer
error:(NSError **)error {
OSType pixelBufferFormat = CVPixelBufferGetPixelFormatType(pixelBuffer);
switch (pixelBufferFormat) {
case kCVPixelFormatType_32BGRA: {
pixelData = [MPPCVPixelBufferUtils rgbPixelDataFromCVPixelBuffer:pixelBuffer error:error];
break;
return [MPPCVPixelBufferUtils rgbImageFrameFromCVPixelBuffer:pixelBuffer error:error];
}
default: {
[MPPCommonUtils createCustomError:error
@ -155,20 +170,20 @@
}
}
return pixelData;
return nullptr;
}
@end
@implementation MPPCGImageUtils
+ (UInt8 *_Nullable)pixelDataFromCGImage:(CGImageRef)cgImage error:(NSError **)error {
+ (std::unique_ptr<ImageFrame>)imageFrameFromCGImage:(CGImageRef)cgImage error:(NSError **)error {
size_t width = CGImageGetWidth(cgImage);
size_t height = CGImageGetHeight(cgImage);
NSInteger bitsPerComponent = 8;
NSInteger channelCount = 4;
UInt8 *pixel_data_to_return = NULL;
UInt8 *pixelDataToReturn = NULL;
CGColorSpaceRef colorSpace = CGColorSpaceCreateDeviceRGB();
size_t bytesPerRow = channelCount * width;
@ -191,12 +206,12 @@
if (srcData) {
// We have drawn the image as an RGBA image with 8 bitsPerComponent and hence can safely input
// a pixel format of type kCVPixelFormatType_32RGBA for conversion by vImage.
pixel_data_to_return = [MPPPixelDataUtils rgbPixelDataFromPixelData:srcData
withWidth:width
height:height
stride:bytesPerRow
pixelBufferFormat:kCVPixelFormatType_32RGBA
error:error];
pixelDataToReturn = [MPPPixelDataUtils rgbPixelDataFromPixelData:srcData
withWidth:width
height:height
stride:bytesPerRow
pixelBufferFormat:kCVPixelFormatType_32RGBA
error:error];
}
CGContextRelease(context);
@ -204,38 +219,38 @@
CGColorSpaceRelease(colorSpace);
return pixel_data_to_return;
std::unique_ptr<ImageFrame> imageFrame =
absl::make_unique<ImageFrame>(mediapipe::ImageFormat::SRGB, (int)width, (int)height,
(int)bytesPerRow, static_cast<uint8 *>(pixelDataToReturn),
/*deleter=*/free);
return imageFrame;
}
@end
@implementation UIImage (RawPixelDataUtils)
- (uint8_t *)pixelDataFromCIImageWithError:(NSError **)error {
uint8_t *pixelData = NULL;
@implementation UIImage (ImageFrameUtils)
- (std::unique_ptr<ImageFrame>)imageFrameFromCIImageWithError:(NSError **)error {
if (self.CIImage.pixelBuffer) {
pixelData = [MPPCVPixelBufferUtils pixelDataFromCVPixelBuffer:self.CIImage.pixelBuffer
error:error];
return [MPPCVPixelBufferUtils imageFrameFromCVPixelBuffer:self.CIImage.pixelBuffer error:error];
} else if (self.CIImage.CGImage) {
pixelData = [MPPCGImageUtils pixelDataFromCGImage:self.CIImage.CGImage error:error];
return [MPPCGImageUtils imageFrameFromCGImage:self.CIImage.CGImage error:error];
} else {
[MPPCommonUtils createCustomError:error
withCode:MPPTasksErrorCodeInvalidArgumentError
description:@"CIImage should have CGImage or CVPixelBuffer info."];
}
return pixelData;
return nullptr;
}
- (uint8_t *)pixelDataWithError:(NSError **)error {
uint8_t *pixelData = nil;
- (std::unique_ptr<ImageFrame>)imageFrameWithError:(NSError **)error {
if (self.CGImage) {
pixelData = [MPPCGImageUtils pixelDataFromCGImage:self.CGImage error:error];
return [MPPCGImageUtils imageFrameFromCGImage:self.CGImage error:error];
} else if (self.CIImage) {
pixelData = [self pixelDataFromCIImageWithError:error];
return [self imageFrameFromCIImageWithError:error];
} else {
[MPPCommonUtils createCustomError:error
withCode:MPPTasksErrorCodeInvalidArgumentError
@ -243,46 +258,24 @@
" CIImage or CGImage."];
}
return pixelData;
return nullptr;
}
- (CGSize)bitmapSize {
CGFloat width = 0;
CGFloat height = 0;
if (self.CGImage) {
width = CGImageGetWidth(self.CGImage);
height = CGImageGetHeight(self.CGImage);
} else if (self.CIImage.pixelBuffer) {
width = CVPixelBufferGetWidth(self.CIImage.pixelBuffer);
height = CVPixelBufferGetHeight(self.CIImage.pixelBuffer);
} else if (self.CIImage.CGImage) {
width = CGImageGetWidth(self.CIImage.CGImage);
height = CGImageGetHeight(self.CIImage.CGImage);
}
return CGSizeMake(width, height);
}
@end
@implementation MPPImage (Utils)
- (nullable uint8_t *)rgbPixelDataWithError:(NSError **)error {
uint8_t *pixelData = NULL;
- (std::unique_ptr<ImageFrame>)imageFrameWithError:(NSError **)error {
switch (self.imageSourceType) {
case MPPImageSourceTypeSampleBuffer: {
CVPixelBufferRef sampleImagePixelBuffer = CMSampleBufferGetImageBuffer(self.sampleBuffer);
pixelData = [MPPCVPixelBufferUtils pixelDataFromCVPixelBuffer:sampleImagePixelBuffer
error:error];
break;
return [MPPCVPixelBufferUtils imageFrameFromCVPixelBuffer:sampleImagePixelBuffer error:error];
}
case MPPImageSourceTypePixelBuffer: {
pixelData = [MPPCVPixelBufferUtils pixelDataFromCVPixelBuffer:self.pixelBuffer error:error];
break;
return [MPPCVPixelBufferUtils imageFrameFromCVPixelBuffer:self.pixelBuffer error:error];
}
case MPPImageSourceTypeImage: {
pixelData = [self.image pixelDataWithError:error];
break;
return [self.image imageFrameWithError:error];
}
default:
[MPPCommonUtils createCustomError:error
@ -290,35 +283,7 @@
description:@"Invalid source type for MPPImage."];
}
return pixelData;
}
- (CGSize)bitmapSize {
CGFloat width = 0;
CGFloat height = 0;
switch (self.imageSourceType) {
case MPPImageSourceTypeSampleBuffer: {
CVPixelBufferRef pixelBuffer = CMSampleBufferGetImageBuffer(self.sampleBuffer);
width = CVPixelBufferGetWidth(pixelBuffer);
height = CVPixelBufferGetHeight(pixelBuffer);
break;
}
case MPPImageSourceTypePixelBuffer: {
width = CVPixelBufferGetWidth(self.pixelBuffer);
height = CVPixelBufferGetHeight(self.pixelBuffer);
break;
}
case MPPImageSourceTypeImage: {
width = self.image.bitmapSize.width;
height = self.image.bitmapSize.height;
break;
}
default:
break;
}
return CGSizeMake(width, height);
return nullptr;
}
@end

View File

@ -0,0 +1,38 @@
# Copyright 2023 The MediaPipe Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
package(default_visibility = ["//mediapipe/tasks:internal"])
licenses(["notice"])
objc_library(
name = "MPPImageClassifierResult",
srcs = ["sources/MPPImageClassifierResult.m"],
hdrs = ["sources/MPPImageClassifierResult.h"],
deps = [
"//mediapipe/tasks/ios/components/containers:MPPClassificationResult",
"//mediapipe/tasks/ios/core:MPPTaskResult",
],
)
objc_library(
name = "MPPImageClassifierOptions",
srcs = ["sources/MPPImageClassifierOptions.m"],
hdrs = ["sources/MPPImageClassifierOptions.h"],
deps = [
":MPPImageClassifierResult",
"//mediapipe/tasks/ios/core:MPPTaskOptions",
"//mediapipe/tasks/ios/vision/core:MPPRunningMode",
],
)

View File

@ -0,0 +1,71 @@
// Copyright 2023 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#import <Foundation/Foundation.h>
#import "mediapipe/tasks/ios/core/sources/MPPTaskOptions.h"
#import "mediapipe/tasks/ios/vision/core/sources/MPPRunningMode.h"
#import "mediapipe/tasks/ios/vision/image_classifier/sources/MPPImageClassifierResult.h"
NS_ASSUME_NONNULL_BEGIN
/**
* Options for setting up a `MPPImageClassifier`.
*/
NS_SWIFT_NAME(ImageClassifierOptions)
@interface MPPImageClassifierOptions : MPPTaskOptions <NSCopying>
@property(nonatomic) MPPRunningMode runningMode;
/**
* The user-defined result callback for processing live stream data. The result callback should only
* be specified when the running mode is set to the live stream mode.
*/
@property(nonatomic, copy) void (^completion)(MPPImageClassifierResult *result, NSError *error);
/**
* The locale to use for display names specified through the TFLite Model Metadata, if any. Defaults
* to English.
*/
@property(nonatomic, copy) NSString *displayNamesLocale;
/**
* The maximum number of top-scored classification results to return. If < 0, all available results
* will be returned. If 0, an invalid argument error is returned.
*/
@property(nonatomic) NSInteger maxResults;
/**
* Score threshold to override the one provided in the model metadata (if any). Results below this
* value are rejected.
*/
@property(nonatomic) float scoreThreshold;
/**
* The allowlist of category names. If non-empty, detection results whose category name is not in
* this set will be filtered out. Duplicate or unknown category names are ignored. Mutually
* exclusive with categoryDenylist.
*/
@property(nonatomic, copy) NSArray<NSString *> *categoryAllowlist;
/**
* The denylist of category names. If non-empty, detection results whose category name is in this
* set will be filtered out. Duplicate or unknown category names are ignored. Mutually exclusive
* with categoryAllowlist.
*/
@property(nonatomic, copy) NSArray<NSString *> *categoryDenylist;
@end
NS_ASSUME_NONNULL_END

View File

@ -0,0 +1,41 @@
// Copyright 2023 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#import "mediapipe/tasks/ios/vision/image_classifier/sources/MPPImageClassifierOptions.h"
@implementation MPPImageClassifierOptions
- (instancetype)init {
self = [super init];
if (self) {
_maxResults = -1;
_scoreThreshold = 0;
}
return self;
}
- (id)copyWithZone:(NSZone *)zone {
MPPImageClassifierOptions *imageClassifierOptions = [super copyWithZone:zone];
imageClassifierOptions.scoreThreshold = self.scoreThreshold;
imageClassifierOptions.maxResults = self.maxResults;
imageClassifierOptions.categoryDenylist = self.categoryDenylist;
imageClassifierOptions.categoryAllowlist = self.categoryAllowlist;
imageClassifierOptions.displayNamesLocale = self.displayNamesLocale;
imageClassifierOptions.completion = self.completion;
return imageClassifierOptions;
}
@end

View File

@ -0,0 +1,44 @@
// Copyright 2023 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#import <Foundation/Foundation.h>
#import "mediapipe/tasks/ios/components/containers/sources/MPPClassificationResult.h"
#import "mediapipe/tasks/ios/core/sources/MPPTaskResult.h"
NS_ASSUME_NONNULL_BEGIN
/** Represents the classification results generated by `MPPImageClassifier`. **/
NS_SWIFT_NAME(ImageClassifierResult)
@interface MPPImageClassifierResult : MPPTaskResult
/** The `MPPClassificationResult` instance containing one set of results per classifier head. **/
@property(nonatomic, readonly) MPPClassificationResult *classificationResult;
/**
* Initializes a new `MPPImageClassifierResult` with the given `MPPClassificationResult` and
* timestamp (in milliseconds).
*
* @param classificationResult The `MPPClassificationResult` instance containing one set of results
* per classifier head.
* @param timestampMs The timestamp for this result.
*
* @return An instance of `MPPImageClassifierResult` initialized with the given
* `MPPClassificationResult` and timestamp (in milliseconds).
*/
- (instancetype)initWithClassificationResult:(MPPClassificationResult *)classificationResult
timestampMs:(NSInteger)timestampMs;
@end
NS_ASSUME_NONNULL_END

View File

@ -0,0 +1,28 @@
// Copyright 2023 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#import "mediapipe/tasks/ios/vision/image_classifier/sources/MPPImageClassifierResult.h"
@implementation MPPImageClassifierResult
- (instancetype)initWithClassificationResult:(MPPClassificationResult *)classificationResult
timestampMs:(NSInteger)timestampMs {
self = [super initWithTimestampMs:timestampMs];
if (self) {
_classificationResult = classificationResult;
}
return self;
}
@end