Merge pull request #4892 from priankakariatyml:ios-mpp-image-utils-pixel-buffer-format-fix

PiperOrigin-RevId: 590770032
This commit is contained in:
Copybara-Service 2023-12-13 18:03:58 -08:00
commit 04bcb0d2ee
2 changed files with 207 additions and 79 deletions

View File

@ -30,6 +30,20 @@ namespace {
using ::mediapipe::ImageFormat; using ::mediapipe::ImageFormat;
using ::mediapipe::ImageFrame; using ::mediapipe::ImageFrame;
vImage_Buffer CreateEmptyVImageBufferFromImageFrame(ImageFrame &imageFrame, bool shouldAllocate) {
UInt8 *data = shouldAllocate ? new UInt8[imageFrame.Height() * imageFrame.WidthStep()] : nullptr;
return {.data = data,
.height = static_cast<vImagePixelCount>(imageFrame.Height()),
.width = static_cast<vImagePixelCount>(imageFrame.Width()),
.rowBytes = static_cast<size_t>(imageFrame.WidthStep())};
}
vImage_Buffer CreateVImageBufferFromImageFrame(ImageFrame &imageFrame) {
vImage_Buffer imageBuffer = CreateEmptyVImageBufferFromImageFrame(imageFrame, false);
imageBuffer.data = imageFrame.MutablePixelData();
return imageBuffer;
}
vImage_Buffer allocatedVImageBuffer(vImagePixelCount width, vImagePixelCount height, vImage_Buffer allocatedVImageBuffer(vImagePixelCount width, vImagePixelCount height,
size_t rowBytes) { size_t rowBytes) {
UInt8 *data = new UInt8[height * rowBytes]; UInt8 *data = new UInt8[height * rowBytes];
@ -40,6 +54,8 @@ static void FreeDataProviderReleaseCallback(void *buffer, const void *data, size
delete[] (vImage_Buffer *)buffer; delete[] (vImage_Buffer *)buffer;
} }
static void FreeRefConReleaseCallback(void *refCon, const void *baseAddress) { free(refCon); }
} // namespace } // namespace
@interface MPPPixelDataUtils : NSObject @interface MPPPixelDataUtils : NSObject
@ -51,6 +67,10 @@ static void FreeDataProviderReleaseCallback(void *buffer, const void *data, size
pixelBufferFormat:(OSType)pixelBufferFormatType pixelBufferFormat:(OSType)pixelBufferFormatType
error:(NSError **)error; error:(NSError **)error;
+ (UInt8 *)pixelDataFromImageFrame:(ImageFrame &)imageFrame
shouldCopy:(BOOL)shouldCopy
error:(NSError **)error;
@end @end
@interface MPPCVPixelBufferUtils : NSObject @interface MPPCVPixelBufferUtils : NSObject
@ -58,6 +78,24 @@ static void FreeDataProviderReleaseCallback(void *buffer, const void *data, size
+ (std::unique_ptr<ImageFrame>)imageFrameFromCVPixelBuffer:(CVPixelBufferRef)pixelBuffer + (std::unique_ptr<ImageFrame>)imageFrameFromCVPixelBuffer:(CVPixelBufferRef)pixelBuffer
error:(NSError **)error; error:(NSError **)error;
// This method is used to create CVPixelBuffer from output images of tasks like `FaceStylizer` only
// when the input `MPImage` source type is `pixelBuffer`.
// Always copies the pixel data of the image frame to the created `CVPixelBuffer`.
//
// The only possible 32 RGBA pixel format of input `CVPixelBuffer` is `kCVPixelFormatType_32BGRA`.
// But Mediapipe does not support inference on images of format `BGRA`. Hence the channels of the
// underlying pixel data of `CVPixelBuffer` are permuted to the supported RGBA format before passing
// them to the task for inference. The pixel format of the output images of any MediaPipe task will
// be the same as the pixel format of the input image. (RGBA in this case).
//
// Since creation of `CVPixelBuffer` from the output image pixels with a format of
// `kCVPixelFormatType_32RGBA` is not possible, the channels of the output C++ image `RGBA` have to
// be permuted to the format `BGRA`. When the pixels are copied to create `CVPixelBuffer` this does
// not pose a challenge.
//
// TODO: Investigate if permuting channels of output `mediapipe::Image` in place is possible for
// creating `CVPixelBuffer`s without copying the underlying pixels.
+ (CVPixelBufferRef)cvPixelBufferFromImageFrame:(ImageFrame &)imageFrame error:(NSError **)error;
@end @end
@interface MPPCGImageUtils : NSObject @interface MPPCGImageUtils : NSObject
@ -99,6 +137,9 @@ static void FreeDataProviderReleaseCallback(void *buffer, const void *data, size
// Convert the raw pixel data to RGBA format and un-premultiply the alpha from the R, G, B values // Convert the raw pixel data to RGBA format and un-premultiply the alpha from the R, G, B values
// since MediaPipe C++ APIs only accept un pre-multiplied channels. // since MediaPipe C++ APIs only accept un pre-multiplied channels.
//
// This method is commonly used for `MPImage`s of all source types. Hence supporting BGRA and RGBA
// formats. Only `pixelBuffer` source type is restricted to `BGRA` format.
switch (pixelBufferFormatType) { switch (pixelBufferFormatType) {
case kCVPixelFormatType_32RGBA: { case kCVPixelFormatType_32RGBA: {
destBuffer = allocatedVImageBuffer((vImagePixelCount)width, (vImagePixelCount)height, destBuffer = allocatedVImageBuffer((vImagePixelCount)width, (vImagePixelCount)height,
@ -107,6 +148,8 @@ static void FreeDataProviderReleaseCallback(void *buffer, const void *data, size
break; break;
} }
case kCVPixelFormatType_32BGRA: { case kCVPixelFormatType_32BGRA: {
// Permute channels to `RGBA` since MediaPipe tasks don't support inference on images of
// format `BGRA`.
const uint8_t permute_map[4] = {2, 1, 0, 3}; const uint8_t permute_map[4] = {2, 1, 0, 3};
destBuffer = allocatedVImageBuffer((vImagePixelCount)width, (vImagePixelCount)height, destBuffer = allocatedVImageBuffer((vImagePixelCount)width, (vImagePixelCount)height,
destinationBytesPerRow); destinationBytesPerRow);
@ -120,8 +163,7 @@ static void FreeDataProviderReleaseCallback(void *buffer, const void *data, size
default: { default: {
[MPPCommonUtils createCustomError:error [MPPCommonUtils createCustomError:error
withCode:MPPTasksErrorCodeInvalidArgumentError withCode:MPPTasksErrorCodeInvalidArgumentError
description:@"Invalid source pixel buffer format. Expecting one of " description:@"Some internal error occured."];
@"kCVPixelFormatType_32RGBA, kCVPixelFormatType_32BGRA"];
return nullptr; return nullptr;
} }
} }
@ -136,7 +178,47 @@ static void FreeDataProviderReleaseCallback(void *buffer, const void *data, size
// Uses default deleter // Uses default deleter
return std::make_unique<ImageFrame>(imageFormat, width, height, destinationBytesPerRow, return std::make_unique<ImageFrame>(imageFormat, width, height, destinationBytesPerRow,
static_cast<uint8 *>(destBuffer.data)); static_cast<uint8 *>(destBuffer.data));
}
+ (UInt8 *)pixelDataFromImageFrame:(ImageFrame &)imageFrame
shouldCopy:(BOOL)shouldCopy
error:(NSError **)error {
vImage_Buffer sourceBuffer = CreateVImageBufferFromImageFrame(imageFrame);
// Pre-multiply the raw pixels from a `mediapipe::Image` before creating a `CGImage` to ensure
// that pixels are displayed correctly irrespective of their alpha values.
vImage_Error premultiplyError;
vImage_Buffer destinationBuffer;
switch (imageFrame.Format()) {
case ImageFormat::SRGBA: {
destinationBuffer =
shouldCopy ? CreateEmptyVImageBufferFromImageFrame(imageFrame, true) : sourceBuffer;
premultiplyError =
vImagePremultiplyData_RGBA8888(&sourceBuffer, &destinationBuffer, kvImageNoFlags);
break;
}
default: {
[MPPCommonUtils createCustomError:error
withCode:MPPTasksErrorCodeInternalError
description:@"An error occured while processing the output image "
@"pixels of the vision task."];
return nullptr;
}
}
if (premultiplyError != kvImageNoError) {
[MPPCommonUtils
createCustomError:error
withCode:MPPTasksErrorCodeInternalError
description:
@"An error occured while processing the output image pixels of the vision task."];
return nullptr;
}
return (UInt8 *)destinationBuffer.data;
} }
@end @end
@ -149,7 +231,8 @@ static void FreeDataProviderReleaseCallback(void *buffer, const void *data, size
std::unique_ptr<ImageFrame> imageFrame = nullptr; std::unique_ptr<ImageFrame> imageFrame = nullptr;
switch (pixelBufferFormat) { switch (pixelBufferFormat) {
case kCVPixelFormatType_32RGBA: // Core Video only supports pixel data of order BGRA for 32 bit RGBA images.
// Thus other formats like `kCVPixelFormatType_32BGRA` don't need to be accounted for.
case kCVPixelFormatType_32BGRA: { case kCVPixelFormatType_32BGRA: {
CVPixelBufferLockBaseAddress(pixelBuffer, 0); CVPixelBufferLockBaseAddress(pixelBuffer, 0);
imageFrame = [MPPPixelDataUtils imageFrame = [MPPPixelDataUtils
@ -165,15 +248,58 @@ static void FreeDataProviderReleaseCallback(void *buffer, const void *data, size
default: { default: {
[MPPCommonUtils createCustomError:error [MPPCommonUtils createCustomError:error
withCode:MPPTasksErrorCodeInvalidArgumentError withCode:MPPTasksErrorCodeInvalidArgumentError
description:@"Unsupported pixel format for CVPixelBuffer. Supported " description:@"Unsupported pixel format for CVPixelBuffer. Expected "
@"pixel format types are kCVPixelFormatType_32BGRA and " @"kCVPixelFormatType_32BGRA"];
@"kCVPixelFormatType_32RGBA"];
} }
} }
return imageFrame; return imageFrame;
} }
+ (CVPixelBufferRef)cvPixelBufferFromImageFrame:(ImageFrame &)imageFrame error:(NSError **)error {
if (imageFrame.Format() != ImageFormat::SRGBA) {
[MPPCommonUtils createCustomError:error
withCode:MPPTasksErrorCodeInternalError
description:@"An error occured while creating a CVPixelBuffer from the "
@"output image of the vision task."];
return nullptr;
}
UInt8 *pixelData = [MPPPixelDataUtils pixelDataFromImageFrame:imageFrame
shouldCopy:YES
error:error];
if (!pixelData) {
return nullptr;
}
const uint8_t permute_map[4] = {2, 1, 0, 3};
vImage_Buffer sourceBuffer = CreateEmptyVImageBufferFromImageFrame(imageFrame, NO);
sourceBuffer.data = pixelData;
if (vImagePermuteChannels_ARGB8888(&sourceBuffer, &sourceBuffer, permute_map, kvImageNoFlags) ==
kvImageNoError) {
CVPixelBufferRef outputBuffer;
OSType pixelBufferFormatType = kCVPixelFormatType_32BGRA;
// Since data is copied, pass in a release callback that will be invoked when the pixel buffer
// is destroyed.
if (CVPixelBufferCreateWithBytes(kCFAllocatorDefault, imageFrame.Width(), imageFrame.Height(),
pixelBufferFormatType, pixelData, imageFrame.WidthStep(),
FreeRefConReleaseCallback, pixelData, nullptr,
&outputBuffer) == kCVReturnSuccess) {
return outputBuffer;
}
}
[MPPCommonUtils createCustomError:error
withCode:MPPTasksErrorCodeInternalError
description:@"An error occured while creating a CVPixelBuffer from the "
@"output image of the vision task."];
return nullptr;
}
@end @end
@implementation MPPCGImageUtils @implementation MPPCGImageUtils
@ -232,7 +358,14 @@ static void FreeDataProviderReleaseCallback(void *buffer, const void *data, size
CGBitmapInfo bitmapInfo = kCGImageAlphaNoneSkipLast | kCGBitmapByteOrderDefault; CGBitmapInfo bitmapInfo = kCGImageAlphaNoneSkipLast | kCGBitmapByteOrderDefault;
ImageFrame *internalImageFrame = imageFrame.get(); ImageFrame *internalImageFrame = imageFrame.get();
size_t channelCount = 4;
UInt8 *pixelData = [MPPPixelDataUtils pixelDataFromImageFrame:*internalImageFrame
shouldCopy:shouldCopyPixelData
error:error];
if (!pixelData) {
return nullptr;
}
switch (internalImageFrame->Format()) { switch (internalImageFrame->Format()) {
case ImageFormat::SRGBA: { case ImageFormat::SRGBA: {
@ -242,56 +375,41 @@ static void FreeDataProviderReleaseCallback(void *buffer, const void *data, size
default: default:
[MPPCommonUtils createCustomError:error [MPPCommonUtils createCustomError:error
withCode:MPPTasksErrorCodeInternalError withCode:MPPTasksErrorCodeInternalError
description:@"An internal error occured."]; description:@"An error occured while creating a CGImage from the "
@"output image of the vision task."];
return nullptr; return nullptr;
} }
size_t bitsPerComponent = 8;
vImage_Buffer sourceBuffer = {
.data = (void *)internalImageFrame->MutablePixelData(),
.height = static_cast<vImagePixelCount>(internalImageFrame->Height()),
.width = static_cast<vImagePixelCount>(internalImageFrame->Width()),
.rowBytes = static_cast<size_t>(internalImageFrame->WidthStep())};
vImage_Buffer destBuffer;
CGDataProviderReleaseDataCallback callback = nullptr; CGDataProviderReleaseDataCallback callback = nullptr;
if (shouldCopyPixelData) {
destBuffer = allocatedVImageBuffer(static_cast<vImagePixelCount>(internalImageFrame->Width()),
static_cast<vImagePixelCount>(internalImageFrame->Height()),
static_cast<size_t>(internalImageFrame->WidthStep()));
callback = FreeDataProviderReleaseCallback;
} else {
destBuffer = sourceBuffer;
}
// Pre-multiply the raw pixels from a `mediapipe::Image` before creating a `CGImage` to ensure
// that pixels are displayed correctly irrespective of their alpha values.
vImage_Error premultiplyError =
vImagePremultiplyData_RGBA8888(&sourceBuffer, &destBuffer, kvImageNoFlags);
if (premultiplyError != kvImageNoError) {
[MPPCommonUtils createCustomError:error
withCode:MPPTasksErrorCodeInternalError
description:@"An internal error occured."];
return nullptr;
}
CGDataProviderRef provider = CGDataProviderCreateWithData( CGDataProviderRef provider = CGDataProviderCreateWithData(
destBuffer.data, destBuffer.data, pixelData, pixelData, internalImageFrame->WidthStep() * internalImageFrame->Height(),
internalImageFrame->WidthStep() * internalImageFrame->Height(), callback); callback);
CGColorSpaceRef colorSpace = CGColorSpaceCreateDeviceRGB();
CGImageRef cgImageRef =
CGImageCreate(internalImageFrame->Width(), internalImageFrame->Height(), bitsPerComponent,
bitsPerComponent * channelCount, internalImageFrame->WidthStep(), colorSpace,
bitmapInfo, provider, nullptr, YES, kCGRenderingIntentDefault);
CGColorSpaceRef colorSpace = CGColorSpaceCreateDeviceRGB();
CGImageRef cgImageRef = nullptr;
if (provider && colorSpace) {
size_t bitsPerComponent = 8;
size_t channelCount = 4;
cgImageRef =
CGImageCreate(internalImageFrame->Width(), internalImageFrame->Height(), bitsPerComponent,
bitsPerComponent * channelCount, internalImageFrame->WidthStep(), colorSpace,
bitmapInfo, provider, nullptr, YES, kCGRenderingIntentDefault);
}
// Can safely pass `NULL` to these functions according to iOS docs.
CGDataProviderRelease(provider); CGDataProviderRelease(provider);
CGColorSpaceRelease(colorSpace); CGColorSpaceRelease(colorSpace);
if (!cgImageRef) {
[MPPCommonUtils createCustomError:error
withCode:MPPTasksErrorCodeInternalError
description:@"An error occured while converting the output image of the "
@"vision task to a CGImage."];
}
return cgImageRef; return cgImageRef;
} }
@ -347,8 +465,30 @@ static void FreeDataProviderReleaseCallback(void *buffer, const void *data, size
return [self initWithUIImage:image orientation:sourceImage.orientation error:nil]; return [self initWithUIImage:image orientation:sourceImage.orientation error:nil];
} }
case MPPImageSourceTypePixelBuffer: {
if (!shouldCopyPixelData) {
// TODO: Investigate possibility of permuting channels of `mediapipe::Image` returned by
// vision tasks in place to ensure that we can support creating `CVPixelBuffer`s without
// copying the pixel data.
[MPPCommonUtils
createCustomError:error
withCode:MPPTasksErrorCodeInvalidArgumentError
description:
@"When the source type is pixel buffer, you cannot request uncopied data"];
return nil;
}
CVPixelBufferRef pixelBuffer =
[MPPCVPixelBufferUtils cvPixelBufferFromImageFrame:*(image.GetImageFrameSharedPtr())
error:error];
MPPImage *image = [self initWithPixelBuffer:pixelBuffer
orientation:sourceImage.orientation
error:nil];
CVPixelBufferRelease(pixelBuffer);
return image;
}
default: default:
// TODO Implement Other Source Types. // TODO Implement CMSampleBuffer.
return nil; return nil;
} }
} }

View File

@ -82,10 +82,8 @@ NS_SWIFT_NAME(ImageClassifier)
* `.image`. * `.image`.
* *
* This method supports classification of RGBA images. If your `MPImage` has a source type * This method supports classification of RGBA images. If your `MPImage` has a source type
* ofm`.pixelBuffer` or `.sampleBuffer`, the underlying pixel buffer must have one of the following * of `.pixelBuffer` or `.sampleBuffer`, the underlying pixel buffer must use
* pixel format types: * `kCVPixelFormatType_32BGRA` as its pixel format.
* 1. kCVPixelFormatType_32BGRA
* 2. kCVPixelFormatType_32RGBA
* *
* If your `MPImage` has a source type of `.image` ensure that the color space is RGB with an Alpha * If your `MPImage` has a source type of `.image` ensure that the color space is RGB with an Alpha
* channel. * channel.
@ -104,11 +102,9 @@ NS_SWIFT_NAME(ImageClassifier)
* of the provided `MPImage`. Only use this method when the `ImageClassifier` is created with * of the provided `MPImage`. Only use this method when the `ImageClassifier` is created with
* running mode, `.image`. * running mode, `.image`.
* *
* This method supports classification of RGBA images. If your `MPImage` has a source type of * This method supports classification of RGBA images. If your `MPImage` has a source type
* `.pixelBuffer` or `.sampleBuffer`, the underlying pixel buffer must have one of the following * of `.pixelBuffer` or `.sampleBuffer`, the underlying pixel buffer must use
* pixel format types: * `kCVPixelFormatType_32BGRA` as its pixel format.
* 1. kCVPixelFormatType_32BGRA
* 2. kCVPixelFormatType_32RGBA
* *
* If your `MPImage` has a source type of `.image` ensure that the color space is RGB with an Alpha * If your `MPImage` has a source type of `.image` ensure that the color space is RGB with an Alpha
* channel. * channel.
@ -133,11 +129,9 @@ NS_SWIFT_NAME(ImageClassifier)
* It's required to provide the video frame's timestamp (in milliseconds). The input timestamps must * It's required to provide the video frame's timestamp (in milliseconds). The input timestamps must
* be monotonically increasing. * be monotonically increasing.
* *
* This method supports classification of RGBA images. If your `MPImage` has a source type of * This method supports classification of RGBA images. If your `MPImage` has a source type
* `.pixelBuffer` or `.sampleBuffer`, the underlying pixel buffer must have one of the following * of `.pixelBuffer` or `.sampleBuffer`, the underlying pixel buffer must use
* pixel format types: * `kCVPixelFormatType_32BGRA` as its pixel format.
* 1. kCVPixelFormatType_32BGRA
* 2. kCVPixelFormatType_32RGBA
* *
* If your `MPImage` has a source type of `.image` ensure that the color space is RGB with an Alpha * If your `MPImage` has a source type of `.image` ensure that the color space is RGB with an Alpha
* channel. * channel.
@ -161,11 +155,9 @@ NS_SWIFT_NAME(ImageClassifier)
* It's required to provide the video frame's timestamp (in milliseconds). The input timestamps must * It's required to provide the video frame's timestamp (in milliseconds). The input timestamps must
* be monotonically increasing. * be monotonically increasing.
* *
* This method supports classification of RGBA images. If your `MPImage` has a source type of * This method supports classification of RGBA images. If your `MPImage` has a source type
* `.pixelBuffer` or `.sampleBuffer`, the underlying pixel buffer must have one of the following * of `.pixelBuffer` or `.sampleBuffer`, the underlying pixel buffer must use
* pixel format types: * `kCVPixelFormatType_32BGRA` as its pixel format.
* 1. kCVPixelFormatType_32BGRA
* 2. kCVPixelFormatType_32RGBA
* *
* If your `MPImage` has a source type of `.image` ensure that the color space is RGB with an Alpha * If your `MPImage` has a source type of `.image` ensure that the color space is RGB with an Alpha
* channel. * channel.
@ -199,11 +191,9 @@ NS_SWIFT_NAME(ImageClassifier)
* It's required to provide a timestamp (in milliseconds) to indicate when the input image is sent * It's required to provide a timestamp (in milliseconds) to indicate when the input image is sent
* to the image classifier. The input timestamps must be monotonically increasing. * to the image classifier. The input timestamps must be monotonically increasing.
* *
* This method supports classification of RGBA images. If your `MPImage` has a source type of * This method supports classification of RGBA images. If your `MPImage` has a source type
* .pixelBuffer` or `.sampleBuffer`, the underlying pixel buffer must have one of the following * of `.pixelBuffer` or `.sampleBuffer`, the underlying pixel buffer must use
* pixel format types: * `kCVPixelFormatType_32BGRA` as its pixel format.
* 1. kCVPixelFormatType_32BGRA
* 2. kCVPixelFormatType_32RGBA
* *
* If the input `MPImage` has a source type of `.image` ensure that the color space is RGB with an * If the input `MPImage` has a source type of `.image` ensure that the color space is RGB with an
* Alpha channel. * Alpha channel.
@ -238,17 +228,15 @@ NS_SWIFT_NAME(ImageClassifier)
* It's required to provide a timestamp (in milliseconds) to indicate when the input image is sent * It's required to provide a timestamp (in milliseconds) to indicate when the input image is sent
* to the image classifier. The input timestamps must be monotonically increasing. * to the image classifier. The input timestamps must be monotonically increasing.
* *
* This method supports classification of RGBA images. If your `MPImage` has a source type of * This method supports classification of RGBA images. If your `MPImage` has a source type
* `.pixelBuffer` or `.sampleBuffer`, the underlying pixel buffer must have one of the following * of `.pixelBuffer` or `.sampleBuffer`, the underlying pixel buffer must use
* pixel format types: * `kCVPixelFormatType_32BGRA` as its pixel format.
* 1. kCVPixelFormatType_32BGRA
* 2. kCVPixelFormatType_32RGBA
* *
* If the input `MPImage` has a source type of `.image` ensure that the color space is RGB with an * If the input `MPImage` has a source type of `.image` ensure that the color space is RGB with an
* Alpha channel. * Alpha channel.
* *
* If this method is used for classifying live camera frames using `AVFoundation`, ensure that you * If this method is used for classifying live camera frames using `AVFoundation`, ensure that you
* request `AVCaptureVideoDataOutput` to output frames in `kCMPixelFormat_32RGBA` using its * request `AVCaptureVideoDataOutput` to output frames in `kCMPixelFormat_32BGRA` using its
* `videoSettings` property. * `videoSettings` property.
* *
* @param image A live stream image data of type `MPImage` on which image classification is to be * @param image A live stream image data of type `MPImage` on which image classification is to be