From 0fe677b78f3f775c6a7c866b174858ca66380102 Mon Sep 17 00:00:00 2001 From: Prianka Liz Kariat Date: Thu, 19 Oct 2023 19:24:40 +0530 Subject: [PATCH 01/39] Updated supported pixel formats in iOS image classifier Documentation --- .../image_classifier/sources/MPPImageClassifier.h | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/mediapipe/tasks/ios/vision/image_classifier/sources/MPPImageClassifier.h b/mediapipe/tasks/ios/vision/image_classifier/sources/MPPImageClassifier.h index 5b9b24fb6..8245dcbdf 100644 --- a/mediapipe/tasks/ios/vision/image_classifier/sources/MPPImageClassifier.h +++ b/mediapipe/tasks/ios/vision/image_classifier/sources/MPPImageClassifier.h @@ -82,10 +82,9 @@ NS_SWIFT_NAME(ImageClassifier) * `.image`. * * This method supports classification of RGBA images. If your `MPImage` has a source type - * ofm`.pixelBuffer` or `.sampleBuffer`, the underlying pixel buffer must have one of the following + * of `.pixelBuffer` or `.sampleBuffer`, the underlying pixel buffer must have one of the following * pixel format types: * 1. kCVPixelFormatType_32BGRA - * 2. kCVPixelFormatType_32RGBA * * If your `MPImage` has a source type of `.image` ensure that the color space is RGB with an Alpha * channel. @@ -108,7 +107,6 @@ NS_SWIFT_NAME(ImageClassifier) * `.pixelBuffer` or `.sampleBuffer`, the underlying pixel buffer must have one of the following * pixel format types: * 1. kCVPixelFormatType_32BGRA - * 2. kCVPixelFormatType_32RGBA * * If your `MPImage` has a source type of `.image` ensure that the color space is RGB with an Alpha * channel. @@ -137,7 +135,6 @@ NS_SWIFT_NAME(ImageClassifier) * `.pixelBuffer` or `.sampleBuffer`, the underlying pixel buffer must have one of the following * pixel format types: * 1. kCVPixelFormatType_32BGRA - * 2. kCVPixelFormatType_32RGBA * * If your `MPImage` has a source type of `.image` ensure that the color space is RGB with an Alpha * channel. @@ -165,7 +162,6 @@ NS_SWIFT_NAME(ImageClassifier) * `.pixelBuffer` or `.sampleBuffer`, the underlying pixel buffer must have one of the following * pixel format types: * 1. kCVPixelFormatType_32BGRA - * 2. kCVPixelFormatType_32RGBA * * If your `MPImage` has a source type of `.image` ensure that the color space is RGB with an Alpha * channel. @@ -203,7 +199,6 @@ NS_SWIFT_NAME(ImageClassifier) * .pixelBuffer` or `.sampleBuffer`, the underlying pixel buffer must have one of the following * pixel format types: * 1. kCVPixelFormatType_32BGRA - * 2. kCVPixelFormatType_32RGBA * * If the input `MPImage` has a source type of `.image` ensure that the color space is RGB with an * Alpha channel. @@ -242,13 +237,12 @@ NS_SWIFT_NAME(ImageClassifier) * `.pixelBuffer` or `.sampleBuffer`, the underlying pixel buffer must have one of the following * pixel format types: * 1. kCVPixelFormatType_32BGRA - * 2. kCVPixelFormatType_32RGBA * * If the input `MPImage` has a source type of `.image` ensure that the color space is RGB with an * Alpha channel. * * If this method is used for classifying live camera frames using `AVFoundation`, ensure that you - * request `AVCaptureVideoDataOutput` to output frames in `kCMPixelFormat_32RGBA` using its + * request `AVCaptureVideoDataOutput` to output frames in `kCMPixelFormat_32BGRA` using its * `videoSettings` property. * * @param image A live stream image data of type `MPImage` on which image classification is to be From 032d7a5d22988b0f6b15f45dc587f15a9c387e3a Mon Sep 17 00:00:00 2001 From: Prianka Liz Kariat Date: Thu, 19 Oct 2023 19:56:44 +0530 Subject: [PATCH 02/39] Removed support for CVPixelBuffer of type 32RGBA --- .../tasks/ios/vision/core/utils/sources/MPPImage+Utils.mm | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/mediapipe/tasks/ios/vision/core/utils/sources/MPPImage+Utils.mm b/mediapipe/tasks/ios/vision/core/utils/sources/MPPImage+Utils.mm index 440b321b9..df61d4c32 100644 --- a/mediapipe/tasks/ios/vision/core/utils/sources/MPPImage+Utils.mm +++ b/mediapipe/tasks/ios/vision/core/utils/sources/MPPImage+Utils.mm @@ -120,8 +120,7 @@ static void FreeDataProviderReleaseCallback(void *buffer, const void *data, size default: { [MPPCommonUtils createCustomError:error withCode:MPPTasksErrorCodeInvalidArgumentError - description:@"Invalid source pixel buffer format. Expecting one of " - @"kCVPixelFormatType_32RGBA, kCVPixelFormatType_32BGRA"]; + description:@"Some internal error occured."]; return nullptr; } } @@ -149,7 +148,6 @@ static void FreeDataProviderReleaseCallback(void *buffer, const void *data, size std::unique_ptr imageFrame = nullptr; switch (pixelBufferFormat) { - case kCVPixelFormatType_32RGBA: case kCVPixelFormatType_32BGRA: { CVPixelBufferLockBaseAddress(pixelBuffer, 0); imageFrame = [MPPPixelDataUtils @@ -165,9 +163,7 @@ static void FreeDataProviderReleaseCallback(void *buffer, const void *data, size default: { [MPPCommonUtils createCustomError:error withCode:MPPTasksErrorCodeInvalidArgumentError - description:@"Unsupported pixel format for CVPixelBuffer. Supported " - @"pixel format types are kCVPixelFormatType_32BGRA and " - @"kCVPixelFormatType_32RGBA"]; + description:@"Unsupported pixel format for CVPixelBuffer. Supported pixel format is kCVPixelFormatType_32BGRA"]; } } From ad6812206919a01b24aef45cc80e73d7fc2745d0 Mon Sep 17 00:00:00 2001 From: Prianka Liz Kariat Date: Thu, 19 Oct 2023 19:58:40 +0530 Subject: [PATCH 03/39] Added support for creating CVPixelBuffer from C++ Images to iOS MPPImage Utils --- .../core/utils/sources/MPPImage+Utils.mm | 139 +++++++++++++++++- 1 file changed, 137 insertions(+), 2 deletions(-) diff --git a/mediapipe/tasks/ios/vision/core/utils/sources/MPPImage+Utils.mm b/mediapipe/tasks/ios/vision/core/utils/sources/MPPImage+Utils.mm index df61d4c32..45915f019 100644 --- a/mediapipe/tasks/ios/vision/core/utils/sources/MPPImage+Utils.mm +++ b/mediapipe/tasks/ios/vision/core/utils/sources/MPPImage+Utils.mm @@ -30,6 +30,20 @@ namespace { using ::mediapipe::ImageFormat; using ::mediapipe::ImageFrame; +vImage_Buffer EmptyVImageBufferFromImageFrame(ImageFrame &imageFrame, bool shouldAllocate) { + UInt8 *data = shouldAllocate ? new UInt8[imageFrame.Height() * imageFrame.WidthStep()] : NULL; + return {.data = data, + .height = static_cast(imageFrame.Height()), + .width = static_cast(imageFrame.Width()), + .rowBytes = static_cast(imageFrame.WidthStep())}; +} + +vImage_Buffer VImageBufferFromImageFrame(ImageFrame &imageFrame) { + vImage_Buffer imageBuffer = EmptyVImageBufferFromImageFrame(imageFrame, false); + imageBuffer.data = imageFrame.MutablePixelData(); + return imageBuffer; +} + vImage_Buffer allocatedVImageBuffer(vImagePixelCount width, vImagePixelCount height, size_t rowBytes) { UInt8 *data = new UInt8[height * rowBytes]; @@ -37,7 +51,11 @@ vImage_Buffer allocatedVImageBuffer(vImagePixelCount width, vImagePixelCount hei } static void FreeDataProviderReleaseCallback(void *buffer, const void *data, size_t size) { - delete (vImage_Buffer *)buffer; + delete[] buffer; +} + +static void FreeRefConReleaseCallback(void *refCon, const void *baseAddress) { + delete[] refCon; } } // namespace @@ -51,6 +69,10 @@ static void FreeDataProviderReleaseCallback(void *buffer, const void *data, size pixelBufferFormat:(OSType)pixelBufferFormatType error:(NSError **)error; ++ (UInt8 *)pixelDataFromImageFrame:(ImageFrame &)imageFrame + shouldCopy:(BOOL)shouldCopy + error:(NSError **)error; + @end @interface MPPCVPixelBufferUtils : NSObject @@ -58,6 +80,9 @@ static void FreeDataProviderReleaseCallback(void *buffer, const void *data, size + (std::unique_ptr)imageFrameFromCVPixelBuffer:(CVPixelBufferRef)pixelBuffer error:(NSError **)error; + ++ (CVPixelBufferRef)cvPixelBufferFromImageFrame:(ImageFrame &)imageFrame + error:(NSError **)error; @end @interface MPPCGImageUtils : NSObject @@ -138,6 +163,42 @@ static void FreeDataProviderReleaseCallback(void *buffer, const void *data, size static_cast(destBuffer.data)); } ++ (UInt8 *)pixelDataFromImageFrame:(ImageFrame &)imageFrame + shouldCopy:(BOOL)shouldCopy + error:(NSError **)error { + vImage_Buffer sourceBuffer = VImageBufferFromImageFrame(imageFrame); + + // Pre-multiply the raw pixels from a `mediapipe::Image` before creating a `CGImage` to ensure + // that pixels are displayed correctly irrespective of their alpha values. + vImage_Error premultiplyError; + vImage_Buffer destinationBuffer; + + switch (imageFrame.Format()) { + case ImageFormat::SRGBA: { + destinationBuffer = + shouldCopy ? EmptyVImageBufferFromImageFrame(imageFrame, true) : sourceBuffer; + premultiplyError = vImagePremultiplyData_RGBA8888(&sourceBuffer, &destinationBuffer, kvImageNoFlags); + break; + } + default: { + [MPPCommonUtils createCustomError:error + withCode:MPPTasksErrorCodeInternalError + description:@"An internal error occured"]; + return NULL; + } + } + + if (premultiplyError != kvImageNoError) { + [MPPCommonUtils createCustomError:error + withCode:MPPTasksErrorCodeInternalError + description:@"An internal error occured."]; + + return NULL; + } + + return (UInt8 *)destinationBuffer.data; +} + @end @implementation MPPCVPixelBufferUtils @@ -170,6 +231,64 @@ static void FreeDataProviderReleaseCallback(void *buffer, const void *data, size return imageFrame; } ++ (CVPixelBufferRef)cvPixelBufferFromImageFrame:(ImageFrame &)imageFrame + error:(NSError **)error { + + // Supporting only RGBA and BGRA since creation of CVPixelBuffers with RGB format + // is restrictred in iOS. Thus, the APIs will never receive an input pixel buffer in RGB format + // and in turn the resulting image frame will never be of the RGB format. Moreover, writing unit + // tests for RGB images will also be not possible. + switch (imageFrame.Format()) { + case ImageFormat::SRGBA: + break; + default: { + [MPPCommonUtils createCustomError:error + withCode:MPPTasksErrorCodeInternalError + description:@"An internal error occured."]; + return NULL; + } + } + + UInt8 *pixelData = [MPPPixelDataUtils pixelDataFromImageFrame:imageFrame + shouldCopy:YES + error:error]; + + if (!pixelData) { + return NULL; + } + + const uint8_t permute_map[4] = {2, 1, 0, 3}; + vImage_Buffer sourceBuffer = EmptyVImageBufferFromImageFrame(imageFrame, NO); + sourceBuffer.data = pixelData; + + if (vImagePermuteChannels_ARGB8888(&sourceBuffer, &sourceBuffer, permute_map, kvImageNoFlags) != kvImageNoError) { + [MPPCommonUtils createCustomError:error + withCode:MPPTasksErrorCodeInternalError + description:@"An internal error occured."]; + return NULL; + } + + CVPixelBufferRef outputBuffer; + + OSType pixelBufferFormatType = kCVPixelFormatType_32BGRA; + + + // If pixel data is copied, then pass in a release callback that will be invoked when the + // pixel buffer is destroyed. If data is not copied, the responsibility of deletion is on the + // owner of the data (a.k.a C++ Image Frame). + if(CVPixelBufferCreateWithBytes(kCFAllocatorDefault, imageFrame.Width(), imageFrame.Height(), + pixelBufferFormatType, pixelData, imageFrame.WidthStep(), + FreeRefConReleaseCallback, + pixelData, NULL, &outputBuffer) == kCVReturnSuccess) { + return outputBuffer; + } + [MPPCommonUtils createCustomError:error + withCode:MPPTasksErrorCodeInternalError + description:@"An internal error occured."]; + return NULL; +} + + @end @implementation MPPCGImageUtils @@ -343,8 +462,24 @@ static void FreeDataProviderReleaseCallback(void *buffer, const void *data, size return [self initWithUIImage:image orientation:sourceImage.orientation error:nil]; } + case MPPImageSourceTypePixelBuffer: { + if (!shouldCopyPixelData) { + [MPPCommonUtils createCustomError:error + withCode:MPPTasksErrorCodeInvalidArgumentError + description:@"When the source type is pixel buffer, you cannot request uncopied data"]; + return nil; + } + CVPixelBufferRef pixelBuffer = + [MPPCVPixelBufferUtils cvPixelBufferFromImageFrame:*(image.GetImageFrameSharedPtr()) + error:error]; + MPPImage *image = [self initWithPixelBuffer:pixelBuffer + orientation:sourceImage.orientation + error:nil]; + CVPixelBufferRelease(pixelBuffer); + return image; + } default: - // TODO Implement Other Source Types. + // TODO Implement CMSampleBuffer. return nil; } } From 4668d683d566983dcdff11130b9d324510a4b819 Mon Sep 17 00:00:00 2001 From: Prianka Liz Kariat Date: Thu, 19 Oct 2023 19:59:09 +0530 Subject: [PATCH 04/39] Updated implementation of MPPImage Utils to reduce lines of code --- .../core/utils/sources/MPPImage+Utils.mm | 63 ++++++++----------- 1 file changed, 27 insertions(+), 36 deletions(-) diff --git a/mediapipe/tasks/ios/vision/core/utils/sources/MPPImage+Utils.mm b/mediapipe/tasks/ios/vision/core/utils/sources/MPPImage+Utils.mm index 45915f019..eb7d02c05 100644 --- a/mediapipe/tasks/ios/vision/core/utils/sources/MPPImage+Utils.mm +++ b/mediapipe/tasks/ios/vision/core/utils/sources/MPPImage+Utils.mm @@ -347,7 +347,14 @@ static void FreeRefConReleaseCallback(void *refCon, const void *baseAddress) { CGBitmapInfo bitmapInfo = kCGImageAlphaNoneSkipLast | kCGBitmapByteOrderDefault; ImageFrame *internalImageFrame = imageFrame.get(); - size_t channelCount = 4; + + UInt8 *pixelData = [MPPPixelDataUtils pixelDataFromImageFrame:*internalImageFrame + shouldCopy:shouldCopyPixelData + error:error]; + + if (!pixelData) { + return NULL; + } switch (internalImageFrame->Format()) { case ImageFormat::SRGBA: { @@ -358,54 +365,38 @@ static void FreeRefConReleaseCallback(void *refCon, const void *baseAddress) { [MPPCommonUtils createCustomError:error withCode:MPPTasksErrorCodeInternalError description:@"An internal error occured."]; - return nullptr; + return NULL; } - size_t bitsPerComponent = 8; - - vImage_Buffer sourceBuffer = { - .data = (void *)internalImageFrame->MutablePixelData(), - .height = static_cast(internalImageFrame->Height()), - .width = static_cast(internalImageFrame->Width()), - .rowBytes = static_cast(internalImageFrame->WidthStep())}; - - vImage_Buffer destBuffer; - CGDataProviderReleaseDataCallback callback = nullptr; - if (shouldCopyPixelData) { - destBuffer = allocatedVImageBuffer(static_cast(internalImageFrame->Width()), - static_cast(internalImageFrame->Height()), - static_cast(internalImageFrame->WidthStep())); - callback = FreeDataProviderReleaseCallback; - } else { - destBuffer = sourceBuffer; - } - - // Pre-multiply the raw pixels from a `mediapipe::Image` before creating a `CGImage` to ensure - // that pixels are displayed correctly irrespective of their alpha values. - vImage_Error premultiplyError = - vImagePremultiplyData_RGBA8888(&sourceBuffer, &destBuffer, kvImageNoFlags); - - if (premultiplyError != kvImageNoError) { - [MPPCommonUtils createCustomError:error - withCode:MPPTasksErrorCodeInternalError - description:@"An internal error occured."]; - - return nullptr; - } - CGDataProviderRef provider = CGDataProviderCreateWithData( - destBuffer.data, destBuffer.data, + pixelData, pixelData, internalImageFrame->WidthStep() * internalImageFrame->Height(), callback); + CGColorSpaceRef colorSpace = CGColorSpaceCreateDeviceRGB(); - CGImageRef cgImageRef = + + CGImageRef cgImageRef = NULL; + + if (provider && colorSpace) { + size_t bitsPerComponent = 8; + size_t channelCount = 4; + + cgImageRef = CGImageCreate(internalImageFrame->Width(), internalImageFrame->Height(), bitsPerComponent, bitsPerComponent * channelCount, internalImageFrame->WidthStep(), colorSpace, bitmapInfo, provider, nullptr, YES, kCGRenderingIntentDefault); + } + // Can safely pass `NULL` to these functions according to iOS docs. CGDataProviderRelease(provider); CGColorSpaceRelease(colorSpace); + + if (!cgImageRef) { + [MPPCommonUtils createCustomError:error + withCode:MPPTasksErrorCodeInternalError + description:@"An internal error occured."]; + } return cgImageRef; } From b9c869494d6b80fd2f4e21b466e0f6f2d4492347 Mon Sep 17 00:00:00 2001 From: Prianka Liz Kariat Date: Thu, 19 Oct 2023 19:59:59 +0530 Subject: [PATCH 05/39] Fixed formatting of MPPImage+Utils.mm --- .../core/utils/sources/MPPImage+Utils.mm | 119 +++++++++--------- 1 file changed, 58 insertions(+), 61 deletions(-) diff --git a/mediapipe/tasks/ios/vision/core/utils/sources/MPPImage+Utils.mm b/mediapipe/tasks/ios/vision/core/utils/sources/MPPImage+Utils.mm index eb7d02c05..34cd50973 100644 --- a/mediapipe/tasks/ios/vision/core/utils/sources/MPPImage+Utils.mm +++ b/mediapipe/tasks/ios/vision/core/utils/sources/MPPImage+Utils.mm @@ -54,9 +54,7 @@ static void FreeDataProviderReleaseCallback(void *buffer, const void *data, size delete[] buffer; } -static void FreeRefConReleaseCallback(void *refCon, const void *baseAddress) { - delete[] refCon; -} +static void FreeRefConReleaseCallback(void *refCon, const void *baseAddress) { delete[] refCon; } } // namespace @@ -70,8 +68,8 @@ static void FreeRefConReleaseCallback(void *refCon, const void *baseAddress) { error:(NSError **)error; + (UInt8 *)pixelDataFromImageFrame:(ImageFrame &)imageFrame - shouldCopy:(BOOL)shouldCopy - error:(NSError **)error; + shouldCopy:(BOOL)shouldCopy + error:(NSError **)error; @end @@ -80,9 +78,7 @@ static void FreeRefConReleaseCallback(void *refCon, const void *baseAddress) { + (std::unique_ptr)imageFrameFromCVPixelBuffer:(CVPixelBufferRef)pixelBuffer error:(NSError **)error; - -+ (CVPixelBufferRef)cvPixelBufferFromImageFrame:(ImageFrame &)imageFrame - error:(NSError **)error; ++ (CVPixelBufferRef)cvPixelBufferFromImageFrame:(ImageFrame &)imageFrame error:(NSError **)error; @end @interface MPPCGImageUtils : NSObject @@ -160,12 +156,12 @@ static void FreeRefConReleaseCallback(void *refCon, const void *baseAddress) { // Uses default deleter return std::make_unique(imageFormat, width, height, destinationBytesPerRow, - static_cast(destBuffer.data)); + static_cast(destBuffer.data)); } + (UInt8 *)pixelDataFromImageFrame:(ImageFrame &)imageFrame shouldCopy:(BOOL)shouldCopy - error:(NSError **)error { + error:(NSError **)error { vImage_Buffer sourceBuffer = VImageBufferFromImageFrame(imageFrame); // Pre-multiply the raw pixels from a `mediapipe::Image` before creating a `CGImage` to ensure @@ -177,7 +173,8 @@ static void FreeRefConReleaseCallback(void *refCon, const void *baseAddress) { case ImageFormat::SRGBA: { destinationBuffer = shouldCopy ? EmptyVImageBufferFromImageFrame(imageFrame, true) : sourceBuffer; - premultiplyError = vImagePremultiplyData_RGBA8888(&sourceBuffer, &destinationBuffer, kvImageNoFlags); + premultiplyError = + vImagePremultiplyData_RGBA8888(&sourceBuffer, &destinationBuffer, kvImageNoFlags); break; } default: { @@ -224,16 +221,15 @@ static void FreeRefConReleaseCallback(void *refCon, const void *baseAddress) { default: { [MPPCommonUtils createCustomError:error withCode:MPPTasksErrorCodeInvalidArgumentError - description:@"Unsupported pixel format for CVPixelBuffer. Supported pixel format is kCVPixelFormatType_32BGRA"]; + description:@"Unsupported pixel format for CVPixelBuffer. Supported " + @"pixel format is kCVPixelFormatType_32BGRA"]; } } return imageFrame; } -+ (CVPixelBufferRef)cvPixelBufferFromImageFrame:(ImageFrame &)imageFrame - error:(NSError **)error { - ++ (CVPixelBufferRef)cvPixelBufferFromImageFrame:(ImageFrame &)imageFrame error:(NSError **)error { // Supporting only RGBA and BGRA since creation of CVPixelBuffers with RGB format // is restrictred in iOS. Thus, the APIs will never receive an input pixel buffer in RGB format // and in turn the resulting image frame will never be of the RGB format. Moreover, writing unit @@ -249,46 +245,45 @@ static void FreeRefConReleaseCallback(void *refCon, const void *baseAddress) { } } - UInt8 *pixelData = [MPPPixelDataUtils pixelDataFromImageFrame:imageFrame - shouldCopy:YES - error:error]; + UInt8 *pixelData = [MPPPixelDataUtils pixelDataFromImageFrame:imageFrame + shouldCopy:YES + error:error]; - if (!pixelData) { - return NULL; - } + if (!pixelData) { + return NULL; + } - const uint8_t permute_map[4] = {2, 1, 0, 3}; - vImage_Buffer sourceBuffer = EmptyVImageBufferFromImageFrame(imageFrame, NO); - sourceBuffer.data = pixelData; + const uint8_t permute_map[4] = {2, 1, 0, 3}; + vImage_Buffer sourceBuffer = EmptyVImageBufferFromImageFrame(imageFrame, NO); + sourceBuffer.data = pixelData; - if (vImagePermuteChannels_ARGB8888(&sourceBuffer, &sourceBuffer, permute_map, kvImageNoFlags) != kvImageNoError) { - [MPPCommonUtils createCustomError:error - withCode:MPPTasksErrorCodeInternalError - description:@"An internal error occured."]; - return NULL; - } - - CVPixelBufferRef outputBuffer; + if (vImagePermuteChannels_ARGB8888(&sourceBuffer, &sourceBuffer, permute_map, kvImageNoFlags) != + kvImageNoError) { + [MPPCommonUtils createCustomError:error + withCode:MPPTasksErrorCodeInternalError + description:@"An internal error occured."]; + return NULL; + } - OSType pixelBufferFormatType = kCVPixelFormatType_32BGRA; + CVPixelBufferRef outputBuffer; + OSType pixelBufferFormatType = kCVPixelFormatType_32BGRA; - // If pixel data is copied, then pass in a release callback that will be invoked when the - // pixel buffer is destroyed. If data is not copied, the responsibility of deletion is on the - // owner of the data (a.k.a C++ Image Frame). - if(CVPixelBufferCreateWithBytes(kCFAllocatorDefault, imageFrame.Width(), imageFrame.Height(), + // If pixel data is copied, then pass in a release callback that will be invoked when the + // pixel buffer is destroyed. If data is not copied, the responsibility of deletion is on the + // owner of the data (a.k.a C++ Image Frame). + if (CVPixelBufferCreateWithBytes(kCFAllocatorDefault, imageFrame.Width(), imageFrame.Height(), pixelBufferFormatType, pixelData, imageFrame.WidthStep(), - FreeRefConReleaseCallback, - pixelData, NULL, &outputBuffer) == kCVReturnSuccess) { - return outputBuffer; - } - [MPPCommonUtils createCustomError:error - withCode:MPPTasksErrorCodeInternalError - description:@"An internal error occured."]; - return NULL; + FreeRefConReleaseCallback, pixelData, NULL, + &outputBuffer) == kCVReturnSuccess) { + return outputBuffer; + } + [MPPCommonUtils createCustomError:error + withCode:MPPTasksErrorCodeInternalError + description:@"An internal error occured."]; + return NULL; } - @end @implementation MPPCGImageUtils @@ -371,31 +366,31 @@ static void FreeRefConReleaseCallback(void *refCon, const void *baseAddress) { CGDataProviderReleaseDataCallback callback = nullptr; CGDataProviderRef provider = CGDataProviderCreateWithData( - pixelData, pixelData, - internalImageFrame->WidthStep() * internalImageFrame->Height(), callback); + pixelData, pixelData, internalImageFrame->WidthStep() * internalImageFrame->Height(), + callback); CGColorSpaceRef colorSpace = CGColorSpaceCreateDeviceRGB(); CGImageRef cgImageRef = NULL; if (provider && colorSpace) { - size_t bitsPerComponent = 8; - size_t channelCount = 4; + size_t bitsPerComponent = 8; + size_t channelCount = 4; - cgImageRef = - CGImageCreate(internalImageFrame->Width(), internalImageFrame->Height(), bitsPerComponent, - bitsPerComponent * channelCount, internalImageFrame->WidthStep(), colorSpace, - bitmapInfo, provider, nullptr, YES, kCGRenderingIntentDefault); + cgImageRef = + CGImageCreate(internalImageFrame->Width(), internalImageFrame->Height(), bitsPerComponent, + bitsPerComponent * channelCount, internalImageFrame->WidthStep(), colorSpace, + bitmapInfo, provider, nullptr, YES, kCGRenderingIntentDefault); } // Can safely pass `NULL` to these functions according to iOS docs. CGDataProviderRelease(provider); CGColorSpaceRelease(colorSpace); - + if (!cgImageRef) { [MPPCommonUtils createCustomError:error - withCode:MPPTasksErrorCodeInternalError - description:@"An internal error occured."]; + withCode:MPPTasksErrorCodeInternalError + description:@"An internal error occured."]; } return cgImageRef; @@ -455,10 +450,12 @@ static void FreeRefConReleaseCallback(void *refCon, const void *baseAddress) { } case MPPImageSourceTypePixelBuffer: { if (!shouldCopyPixelData) { - [MPPCommonUtils createCustomError:error - withCode:MPPTasksErrorCodeInvalidArgumentError - description:@"When the source type is pixel buffer, you cannot request uncopied data"]; - return nil; + [MPPCommonUtils + createCustomError:error + withCode:MPPTasksErrorCodeInvalidArgumentError + description: + @"When the source type is pixel buffer, you cannot request uncopied data"]; + return nil; } CVPixelBufferRef pixelBuffer = [MPPCVPixelBufferUtils cvPixelBufferFromImageFrame:*(image.GetImageFrameSharedPtr()) From af9a7e7e404dad4c745dcd3454b86d1acf7af5b5 Mon Sep 17 00:00:00 2001 From: Prianka Liz Kariat Date: Thu, 19 Oct 2023 20:27:51 +0530 Subject: [PATCH 06/39] Added documentation --- .../core/utils/sources/MPPImage+Utils.mm | 37 +++++++++++++++---- 1 file changed, 30 insertions(+), 7 deletions(-) diff --git a/mediapipe/tasks/ios/vision/core/utils/sources/MPPImage+Utils.mm b/mediapipe/tasks/ios/vision/core/utils/sources/MPPImage+Utils.mm index 34cd50973..d8156a671 100644 --- a/mediapipe/tasks/ios/vision/core/utils/sources/MPPImage+Utils.mm +++ b/mediapipe/tasks/ios/vision/core/utils/sources/MPPImage+Utils.mm @@ -78,6 +78,24 @@ static void FreeRefConReleaseCallback(void *refCon, const void *baseAddress) { d + (std::unique_ptr)imageFrameFromCVPixelBuffer:(CVPixelBufferRef)pixelBuffer error:(NSError **)error; +// Always copies the pixel data of the image frame to the created `CVPixelBuffer`. + +// This method is used to create CVPixelBuffer from output images of tasks like `FaceStylizer` only +// when the input `MPImage` source type is `pixelBuffer`. +// +// The only possible 32 RGBA pixel format of input `CVPixelBuffer` is `kCVPixelFormatType_32BGRA`. +// But Mediapipe does not support inference on images of format `BGRA`. Hence the channels of the +// underlying pixel data of `CVPixelBuffer` are permuted to the supported RGBA format before passing +// them to the task for inference. The pixel format of the output images of any MediaPipe task will +// be the same as the pixel format of the input image. (RGBA in this case). +// +// Since creation of `CVPixelBuffer` from the output image pixels with a format of +// `kCVPixelFormatType_32RGBA` is not possible, the channels of the output C++ image `RGBA` have to +// be permuted to the format `BGRA`. When the pixels are copied to create `CVPixelBuffer` this does +// not pose a challenge. +// +// TODO: Investigate if permuting channels of output `mediapipe::Image` in place is possible for +// creating `CVPixelBuffer`s without copying the underlying pixels. + (CVPixelBufferRef)cvPixelBufferFromImageFrame:(ImageFrame &)imageFrame error:(NSError **)error; @end @@ -120,6 +138,9 @@ static void FreeRefConReleaseCallback(void *refCon, const void *baseAddress) { d // Convert the raw pixel data to RGBA format and un-premultiply the alpha from the R, G, B values // since MediaPipe C++ APIs only accept un pre-multiplied channels. + // + // This method is commonly used for `MPImage`s of all source types. Hence supporting BGRA and RGBA + // formats. Only `pixelBuffer` source type is restricted to `BGRA` format. switch (pixelBufferFormatType) { case kCVPixelFormatType_32RGBA: { destBuffer = allocatedVImageBuffer((vImagePixelCount)width, (vImagePixelCount)height, @@ -128,6 +149,8 @@ static void FreeRefConReleaseCallback(void *refCon, const void *baseAddress) { d break; } case kCVPixelFormatType_32BGRA: { + // Permute channels to `RGBA` since MediaPipe tasks don't support inference on images of + // format `BGRA`. const uint8_t permute_map[4] = {2, 1, 0, 3}; destBuffer = allocatedVImageBuffer((vImagePixelCount)width, (vImagePixelCount)height, destinationBytesPerRow); @@ -206,6 +229,8 @@ static void FreeRefConReleaseCallback(void *refCon, const void *baseAddress) { d std::unique_ptr imageFrame = nullptr; switch (pixelBufferFormat) { + // Core Video only supports pixel data of order BGRA for 32 bit RGBA images. + // Thus other formats like `kCVPixelFormatType_32BGRA` don't need to be accounted for. case kCVPixelFormatType_32BGRA: { CVPixelBufferLockBaseAddress(pixelBuffer, 0); imageFrame = [MPPPixelDataUtils @@ -230,10 +255,6 @@ static void FreeRefConReleaseCallback(void *refCon, const void *baseAddress) { d } + (CVPixelBufferRef)cvPixelBufferFromImageFrame:(ImageFrame &)imageFrame error:(NSError **)error { - // Supporting only RGBA and BGRA since creation of CVPixelBuffers with RGB format - // is restrictred in iOS. Thus, the APIs will never receive an input pixel buffer in RGB format - // and in turn the resulting image frame will never be of the RGB format. Moreover, writing unit - // tests for RGB images will also be not possible. switch (imageFrame.Format()) { case ImageFormat::SRGBA: break; @@ -269,9 +290,8 @@ static void FreeRefConReleaseCallback(void *refCon, const void *baseAddress) { d OSType pixelBufferFormatType = kCVPixelFormatType_32BGRA; - // If pixel data is copied, then pass in a release callback that will be invoked when the - // pixel buffer is destroyed. If data is not copied, the responsibility of deletion is on the - // owner of the data (a.k.a C++ Image Frame). + // Since data is copied, pass in a release callback that will be invoked when the + // pixel buffer is destroyed. if (CVPixelBufferCreateWithBytes(kCFAllocatorDefault, imageFrame.Width(), imageFrame.Height(), pixelBufferFormatType, pixelData, imageFrame.WidthStep(), FreeRefConReleaseCallback, pixelData, NULL, @@ -450,6 +470,9 @@ static void FreeRefConReleaseCallback(void *refCon, const void *baseAddress) { d } case MPPImageSourceTypePixelBuffer: { if (!shouldCopyPixelData) { + // TODO: Investigate possibility of permuting channels of `mediapipe::Image` returned by + // vision tasks in place to ensure that we can support creating `CVPixelBuffer`s without + // copying the pixel data. [MPPCommonUtils createCustomError:error withCode:MPPTasksErrorCodeInvalidArgumentError From b91ec031a2e2ad3e525ed3d703fb85b887b9d05a Mon Sep 17 00:00:00 2001 From: MediaPipe Team Date: Mon, 4 Dec 2023 10:27:19 -0800 Subject: [PATCH 07/39] Adding version.bzl for tracking version PiperOrigin-RevId: 587767961 --- version.bzl | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 version.bzl diff --git a/version.bzl b/version.bzl new file mode 100644 index 000000000..c5d7d4d83 --- /dev/null +++ b/version.bzl @@ -0,0 +1,3 @@ +"""Version number for MediaPipe""" + +MEDIAPIPE_FULL_VERSION = "0.10.8" From dad2626f91fd927b6a6958cfc66a8ee8b05e6df5 Mon Sep 17 00:00:00 2001 From: Sebastian Schmidt Date: Mon, 4 Dec 2023 14:55:48 -0800 Subject: [PATCH 08/39] No public description PiperOrigin-RevId: 587850211 --- platform_mappings | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/platform_mappings b/platform_mappings index debf1e4b8..e6ada28df 100644 --- a/platform_mappings +++ b/platform_mappings @@ -24,35 +24,35 @@ flags: --cpu=darwin_x86_64 --apple_platform_type=macos - @@mediapipe//mediapipe:macos_x86_64_platform + @mediapipe//mediapipe:macos_x86_64_platform --cpu=darwin_arm64 --apple_platform_type=macos - @@mediapipe//mediapipe:macos_arm64_platform + @mediapipe//mediapipe:macos_arm64_platform --cpu=ios_i386 --apple_platform_type=ios - @@mediapipe//mediapipe:ios_i386_platform + @mediapipe//mediapipe:ios_i386_platform --cpu=ios_x86_64 --apple_platform_type=ios - @@mediapipe//mediapipe:ios_x86_64_platform + @mediapipe//mediapipe:ios_x86_64_platform --cpu=ios_sim_arm64 --apple_platform_type=ios - @@mediapipe//mediapipe:ios_sim_arm64_platform + @mediapipe//mediapipe:ios_sim_arm64_platform --cpu=ios_armv7 --apple_platform_type=ios - @@mediapipe//mediapipe:ios_armv7_platform + @mediapipe//mediapipe:ios_armv7_platform --cpu=ios_arm64 --apple_platform_type=ios - @@mediapipe//mediapipe:ios_arm64_platform + @mediapipe//mediapipe:ios_arm64_platform --cpu=ios_arm64e --apple_platform_type=ios - @@mediapipe//mediapipe:ios_arm64e_platform + @mediapipe//mediapipe:ios_arm64e_platform --cpu=x64_windows @mediapipe//mediapipe:windows_platform From 0f90ba17dc589ff4ed22e8638a67d01f6f1c563b Mon Sep 17 00:00:00 2001 From: Sebastian Schmidt Date: Tue, 5 Dec 2023 13:24:31 -0800 Subject: [PATCH 09/39] Use Java Proto Lite Target for Hand ROI Refinement proto PiperOrigin-RevId: 588170664 --- .../java/com/google/mediapipe/tasks/mediapipe_tasks_aar.bzl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mediapipe/tasks/java/com/google/mediapipe/tasks/mediapipe_tasks_aar.bzl b/mediapipe/tasks/java/com/google/mediapipe/tasks/mediapipe_tasks_aar.bzl index f2e4d485f..ae167a1bc 100644 --- a/mediapipe/tasks/java/com/google/mediapipe/tasks/mediapipe_tasks_aar.bzl +++ b/mediapipe/tasks/java/com/google/mediapipe/tasks/mediapipe_tasks_aar.bzl @@ -85,7 +85,7 @@ _VISION_TASKS_IMAGE_GENERATOR_JAVA_PROTO_LITE_TARGETS = [ "//mediapipe/tasks/cc/vision/hand_detector/proto:hand_detector_graph_options_java_proto_lite", "//mediapipe/tasks/cc/vision/hand_landmarker/proto:hand_landmarker_graph_options_java_proto_lite", "//mediapipe/tasks/cc/vision/hand_landmarker/proto:hand_landmarks_detector_graph_options_java_proto_lite", - "//mediapipe/tasks/cc/vision/hand_landmarker/proto:hand_roi_refinement_graph_options_proto", + "//mediapipe/tasks/cc/vision/hand_landmarker/proto:hand_roi_refinement_graph_options_java_proto_lite", ] _TEXT_TASKS_JAVA_PROTO_LITE_TARGETS = [ From b5c1c11f6a64f216da5b2bd9ebaecfd43d3811ba Mon Sep 17 00:00:00 2001 From: Kinar Date: Wed, 6 Dec 2023 02:59:39 -0800 Subject: [PATCH 10/39] Added Hand Landmarker C Tasks API and tests --- .../gesture_recognizer/gesture_recognizer.cc | 2 +- ...esture_recognizer_result_converter_test.cc | 16 + .../tasks/c/vision/hand_landmarker/BUILD | 141 +++++++++ .../vision/hand_landmarker/hand_landmarker.cc | 283 ++++++++++++++++++ .../vision/hand_landmarker/hand_landmarker.h | 144 +++++++++ .../hand_landmarker/hand_landmarker_result.h | 58 ++++ .../hand_landmarker_result_converter.cc | 103 +++++++ .../hand_landmarker_result_converter.h | 32 ++ .../hand_landmarker_result_converter_test.cc | 125 ++++++++ .../hand_landmarker/hand_landmarker_test.cc | 261 ++++++++++++++++ 10 files changed, 1164 insertions(+), 1 deletion(-) create mode 100644 mediapipe/tasks/c/vision/hand_landmarker/BUILD create mode 100644 mediapipe/tasks/c/vision/hand_landmarker/hand_landmarker.cc create mode 100644 mediapipe/tasks/c/vision/hand_landmarker/hand_landmarker.h create mode 100644 mediapipe/tasks/c/vision/hand_landmarker/hand_landmarker_result.h create mode 100644 mediapipe/tasks/c/vision/hand_landmarker/hand_landmarker_result_converter.cc create mode 100644 mediapipe/tasks/c/vision/hand_landmarker/hand_landmarker_result_converter.h create mode 100644 mediapipe/tasks/c/vision/hand_landmarker/hand_landmarker_result_converter_test.cc create mode 100644 mediapipe/tasks/c/vision/hand_landmarker/hand_landmarker_test.cc diff --git a/mediapipe/tasks/c/vision/gesture_recognizer/gesture_recognizer.cc b/mediapipe/tasks/c/vision/gesture_recognizer/gesture_recognizer.cc index d3b0868f8..f05b9a122 100644 --- a/mediapipe/tasks/c/vision/gesture_recognizer/gesture_recognizer.cc +++ b/mediapipe/tasks/c/vision/gesture_recognizer/gesture_recognizer.cc @@ -228,7 +228,7 @@ int CppGestureRecognizerRecognizeAsync(void* recognizer, const MpImage& image, auto cpp_recognizer = static_cast(recognizer); auto cpp_result = cpp_recognizer->RecognizeAsync(*img, timestamp_ms); if (!cpp_result.ok()) { - ABSL_LOG(ERROR) << "Data preparation for the image classification failed: " + ABSL_LOG(ERROR) << "Data preparation for the gesture recognition failed: " << cpp_result; return CppProcessError(cpp_result, error_msg); } diff --git a/mediapipe/tasks/c/vision/gesture_recognizer/gesture_recognizer_result_converter_test.cc b/mediapipe/tasks/c/vision/gesture_recognizer/gesture_recognizer_result_converter_test.cc index 603e5ed7d..f37dbf8e4 100644 --- a/mediapipe/tasks/c/vision/gesture_recognizer/gesture_recognizer_result_converter_test.cc +++ b/mediapipe/tasks/c/vision/gesture_recognizer/gesture_recognizer_result_converter_test.cc @@ -95,6 +95,22 @@ TEST(GestureRecognizerResultConverterTest, ConvertsCustomResult) { } } + // Verify conversion of handedness + EXPECT_NE(c_result.handedness, nullptr); + EXPECT_EQ(c_result.handedness_count, cpp_result.handedness.size()); + + for (uint32_t i = 0; i < c_result.handedness_count; ++i) { + EXPECT_EQ(c_result.handedness[i].categories_count, + cpp_result.handedness[i].classification_size()); + for (uint32_t j = 0; j < c_result.handedness[i].categories_count; ++j) { + auto handedness = cpp_result.handedness[i].classification(j); + EXPECT_EQ(std::string(c_result.handedness[i].categories[j].category_name), + handedness.label()); + EXPECT_FLOAT_EQ(c_result.handedness[i].categories[j].score, + handedness.score()); + } + } + // Verify conversion of hand_landmarks EXPECT_NE(c_result.hand_landmarks, nullptr); EXPECT_EQ(c_result.hand_landmarks_count, cpp_result.hand_landmarks.size()); diff --git a/mediapipe/tasks/c/vision/hand_landmarker/BUILD b/mediapipe/tasks/c/vision/hand_landmarker/BUILD new file mode 100644 index 000000000..a7ac13935 --- /dev/null +++ b/mediapipe/tasks/c/vision/hand_landmarker/BUILD @@ -0,0 +1,141 @@ +# Copyright 2023 The MediaPipe Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +package(default_visibility = ["//mediapipe/tasks:internal"]) + +licenses(["notice"]) + +cc_library( + name = "hand_landmarker_result", + hdrs = ["hand_landmarker_result.h"], + visibility = ["//visibility:public"], + deps = [ + "//mediapipe/tasks/c/components/containers:category", + "//mediapipe/tasks/c/components/containers:landmark", + ], +) + +cc_library( + name = "hand_landmarker_result_converter", + srcs = ["hand_landmarker_result_converter.cc"], + hdrs = ["hand_landmarker_result_converter.h"], + deps = [ + ":hand_landmarker_result", + "//mediapipe/tasks/c/components/containers:category", + "//mediapipe/tasks/c/components/containers:category_converter", + "//mediapipe/tasks/c/components/containers:landmark", + "//mediapipe/tasks/c/components/containers:landmark_converter", + "//mediapipe/tasks/cc/components/containers:category", + "//mediapipe/tasks/cc/components/containers:landmark", + "//mediapipe/tasks/cc/vision/hand_landmarker:hand_landmarker_result", + ], +) + +cc_test( + name = "hand_landmarker_result_converter_test", + srcs = ["hand_landmarker_result_converter_test.cc"], + linkstatic = 1, + deps = [ + ":hand_landmarker_result", + ":hand_landmarker_result_converter", + "//mediapipe/framework/port:gtest", + "//mediapipe/tasks/c/components/containers:landmark", + "//mediapipe/tasks/cc/vision/hand_landmarker:hand_landmarker_result", + "@com_google_googletest//:gtest_main", + ], +) + +cc_library( + name = "hand_landmarker_lib", + srcs = ["hand_landmarker.cc"], + hdrs = ["hand_landmarker.h"], + visibility = ["//visibility:public"], + deps = [ + ":hand_landmarker_result", + ":hand_landmarker_result_converter", + "//mediapipe/framework/formats:image", + "//mediapipe/framework/formats:image_frame", + "//mediapipe/tasks/c/core:base_options", + "//mediapipe/tasks/c/core:base_options_converter", + "//mediapipe/tasks/c/vision/core:common", + "//mediapipe/tasks/cc/vision/core:running_mode", + "//mediapipe/tasks/cc/vision/hand_landmarker", + "//mediapipe/tasks/cc/vision/hand_landmarker:hand_landmarker_result", + "//mediapipe/tasks/cc/vision/utils:image_utils", + "@com_google_absl//absl/log:absl_log", + "@com_google_absl//absl/status", + "@com_google_absl//absl/status:statusor", + ], + alwayslink = 1, +) + +cc_test( + name = "hand_landmarker_test", + srcs = ["hand_landmarker_test.cc"], + data = [ + "//mediapipe/framework/formats:image_frame_opencv", + "//mediapipe/framework/port:opencv_core", + "//mediapipe/framework/port:opencv_imgproc", + "//mediapipe/tasks/testdata/vision:test_images", + "//mediapipe/tasks/testdata/vision:test_models", + ], + linkstatic = 1, + deps = [ + ":hand_landmarker_lib", + ":hand_landmarker_result", + "//mediapipe/framework/deps:file_path", + "//mediapipe/framework/formats:image", + "//mediapipe/framework/port:gtest", + "//mediapipe/tasks/c/components/containers:landmark", + "//mediapipe/tasks/c/vision/core:common", + "//mediapipe/tasks/cc/vision/utils:image_utils", + "@com_google_absl//absl/flags:flag", + "@com_google_absl//absl/strings", + "@com_google_googletest//:gtest_main", + ], +) + +# bazel build -c opt --linkopt -s --strip always --define MEDIAPIPE_DISABLE_GPU=1 \ +# //mediapipe/tasks/c/vision/hand_landmarker:libhand_landmarker.so +cc_binary( + name = "libhand_landmarker.so", + linkopts = [ + "-Wl,-soname=libhand_landmarker.so", + "-fvisibility=hidden", + ], + linkshared = True, + tags = [ + "manual", + "nobuilder", + "notap", + ], + deps = [":hand_landmarker_lib"], +) + +# bazel build --config darwin_arm64 -c opt --strip always --define MEDIAPIPE_DISABLE_GPU=1 \ +# //mediapipe/tasks/c/vision/hand_landmarker:libhand_landmarker.dylib +cc_binary( + name = "libhand_landmarker.dylib", + linkopts = [ + "-Wl,-install_name,libhand_landmarker.dylib", + "-fvisibility=hidden", + ], + linkshared = True, + tags = [ + "manual", + "nobuilder", + "notap", + ], + deps = [":hand_landmarker_lib"], +) diff --git a/mediapipe/tasks/c/vision/hand_landmarker/hand_landmarker.cc b/mediapipe/tasks/c/vision/hand_landmarker/hand_landmarker.cc new file mode 100644 index 000000000..f6df09f96 --- /dev/null +++ b/mediapipe/tasks/c/vision/hand_landmarker/hand_landmarker.cc @@ -0,0 +1,283 @@ +/* Copyright 2023 The MediaPipe Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "mediapipe/tasks/c/vision/hand_landmarker/hand_landmarker.h" + +#include +#include +#include +#include + +#include "absl/log/absl_log.h" +#include "absl/status/status.h" +#include "absl/status/statusor.h" +#include "mediapipe/framework/formats/image.h" +#include "mediapipe/framework/formats/image_frame.h" +#include "mediapipe/tasks/c/core/base_options_converter.h" +#include "mediapipe/tasks/c/vision/core/common.h" +#include "mediapipe/tasks/c/vision/hand_landmarker/hand_landmarker_result.h" +#include "mediapipe/tasks/c/vision/hand_landmarker/hand_landmarker_result_converter.h" +#include "mediapipe/tasks/cc/vision/core/running_mode.h" +#include "mediapipe/tasks/cc/vision/hand_landmarker/hand_landmarker.h" +#include "mediapipe/tasks/cc/vision/hand_landmarker/hand_landmarker_result.h" +#include "mediapipe/tasks/cc/vision/utils/image_utils.h" + +namespace mediapipe::tasks::c::vision::hand_landmarker { + +namespace { + +using ::mediapipe::tasks::c::components::containers:: + CppCloseHandLandmarkerResult; +using ::mediapipe::tasks::c::components::containers:: + CppConvertToHandLandmarkerResult; +using ::mediapipe::tasks::c::core::CppConvertToBaseOptions; +using ::mediapipe::tasks::vision::CreateImageFromBuffer; +using ::mediapipe::tasks::vision::core::RunningMode; +using ::mediapipe::tasks::vision::hand_landmarker::HandLandmarker; +typedef ::mediapipe::tasks::vision::hand_landmarker::HandLandmarkerResult + CppHandLandmarkerResult; + +int CppProcessError(absl::Status status, char** error_msg) { + if (error_msg) { + *error_msg = strdup(status.ToString().c_str()); + } + return status.raw_code(); +} + +} // namespace + +void CppConvertToHandLandmarkerOptions( + const HandLandmarkerOptions& in, + mediapipe::tasks::vision::hand_landmarker::HandLandmarkerOptions* out) { + out->num_hands = in.num_hands; + out->min_hand_detection_confidence = in.min_hand_detection_confidence; + out->min_hand_presence_confidence = in.min_hand_presence_confidence; + out->min_tracking_confidence = in.min_tracking_confidence; +} + +HandLandmarker* CppHandLandmarkerCreate(const HandLandmarkerOptions& options, + char** error_msg) { + auto cpp_options = std::make_unique< + ::mediapipe::tasks::vision::hand_landmarker::HandLandmarkerOptions>(); + + CppConvertToBaseOptions(options.base_options, &cpp_options->base_options); + CppConvertToHandLandmarkerOptions(options, cpp_options.get()); + cpp_options->running_mode = static_cast(options.running_mode); + + // Enable callback for processing live stream data when the running mode is + // set to RunningMode::LIVE_STREAM. + if (cpp_options->running_mode == RunningMode::LIVE_STREAM) { + if (options.result_callback == nullptr) { + const absl::Status status = absl::InvalidArgumentError( + "Provided null pointer to callback function."); + ABSL_LOG(ERROR) << "Failed to create HandLandmarker: " << status; + CppProcessError(status, error_msg); + return nullptr; + } + + HandLandmarkerOptions::result_callback_fn result_callback = + options.result_callback; + cpp_options->result_callback = + [result_callback](absl::StatusOr cpp_result, + const Image& image, int64_t timestamp) { + char* error_msg = nullptr; + + if (!cpp_result.ok()) { + ABSL_LOG(ERROR) << "Recognition failed: " << cpp_result.status(); + CppProcessError(cpp_result.status(), &error_msg); + result_callback(nullptr, MpImage(), timestamp, error_msg); + free(error_msg); + return; + } + + // Result is valid for the lifetime of the callback function. + HandLandmarkerResult result; + CppConvertToHandLandmarkerResult(*cpp_result, &result); + + const auto& image_frame = image.GetImageFrameSharedPtr(); + const MpImage mp_image = { + .type = MpImage::IMAGE_FRAME, + .image_frame = { + .format = static_cast<::ImageFormat>(image_frame->Format()), + .image_buffer = image_frame->PixelData(), + .width = image_frame->Width(), + .height = image_frame->Height()}}; + + result_callback(&result, mp_image, timestamp, + /* error_msg= */ nullptr); + + CppCloseHandLandmarkerResult(&result); + }; + } + + auto detector = HandLandmarker::Create(std::move(cpp_options)); + if (!detector.ok()) { + ABSL_LOG(ERROR) << "Failed to create HandLandmarker: " << detector.status(); + CppProcessError(detector.status(), error_msg); + return nullptr; + } + return detector->release(); +} + +int CppHandLandmarkerDetect(void* detector, const MpImage& image, + HandLandmarkerResult* result, char** error_msg) { + if (image.type == MpImage::GPU_BUFFER) { + const absl::Status status = + absl::InvalidArgumentError("GPU Buffer not supported yet."); + + ABSL_LOG(ERROR) << "Recognition failed: " << status.message(); + return CppProcessError(status, error_msg); + } + + const auto img = CreateImageFromBuffer( + static_cast(image.image_frame.format), + image.image_frame.image_buffer, image.image_frame.width, + image.image_frame.height); + + if (!img.ok()) { + ABSL_LOG(ERROR) << "Failed to create Image: " << img.status(); + return CppProcessError(img.status(), error_msg); + } + + auto cpp_detector = static_cast(detector); + auto cpp_result = cpp_detector->Detect(*img); + if (!cpp_result.ok()) { + ABSL_LOG(ERROR) << "Recognition failed: " << cpp_result.status(); + return CppProcessError(cpp_result.status(), error_msg); + } + CppConvertToHandLandmarkerResult(*cpp_result, result); + return 0; +} + +int CppHandLandmarkerDetectForVideo(void* detector, const MpImage& image, + int64_t timestamp_ms, + HandLandmarkerResult* result, + char** error_msg) { + if (image.type == MpImage::GPU_BUFFER) { + absl::Status status = + absl::InvalidArgumentError("GPU Buffer not supported yet"); + + ABSL_LOG(ERROR) << "Recognition failed: " << status.message(); + return CppProcessError(status, error_msg); + } + + const auto img = CreateImageFromBuffer( + static_cast(image.image_frame.format), + image.image_frame.image_buffer, image.image_frame.width, + image.image_frame.height); + + if (!img.ok()) { + ABSL_LOG(ERROR) << "Failed to create Image: " << img.status(); + return CppProcessError(img.status(), error_msg); + } + + auto cpp_detector = static_cast(detector); + auto cpp_result = cpp_detector->DetectForVideo(*img, timestamp_ms); + if (!cpp_result.ok()) { + ABSL_LOG(ERROR) << "Recognition failed: " << cpp_result.status(); + return CppProcessError(cpp_result.status(), error_msg); + } + CppConvertToHandLandmarkerResult(*cpp_result, result); + return 0; +} + +int CppHandLandmarkerDetectAsync(void* detector, const MpImage& image, + int64_t timestamp_ms, char** error_msg) { + if (image.type == MpImage::GPU_BUFFER) { + absl::Status status = + absl::InvalidArgumentError("GPU Buffer not supported yet"); + + ABSL_LOG(ERROR) << "Recognition failed: " << status.message(); + return CppProcessError(status, error_msg); + } + + const auto img = CreateImageFromBuffer( + static_cast(image.image_frame.format), + image.image_frame.image_buffer, image.image_frame.width, + image.image_frame.height); + + if (!img.ok()) { + ABSL_LOG(ERROR) << "Failed to create Image: " << img.status(); + return CppProcessError(img.status(), error_msg); + } + + auto cpp_detector = static_cast(detector); + auto cpp_result = cpp_detector->DetectAsync(*img, timestamp_ms); + if (!cpp_result.ok()) { + ABSL_LOG(ERROR) << "Data preparation for the landmark detection failed: " + << cpp_result; + return CppProcessError(cpp_result, error_msg); + } + return 0; +} + +void CppHandLandmarkerCloseResult(HandLandmarkerResult* result) { + CppCloseHandLandmarkerResult(result); +} + +int CppHandLandmarkerClose(void* detector, char** error_msg) { + auto cpp_detector = static_cast(detector); + auto result = cpp_detector->Close(); + if (!result.ok()) { + ABSL_LOG(ERROR) << "Failed to close HandLandmarker: " << result; + return CppProcessError(result, error_msg); + } + delete cpp_detector; + return 0; +} + +} // namespace mediapipe::tasks::c::vision::hand_landmarker + +extern "C" { + +void* hand_landmarker_create(struct HandLandmarkerOptions* options, + char** error_msg) { + return mediapipe::tasks::c::vision::hand_landmarker::CppHandLandmarkerCreate( + *options, error_msg); +} + +int hand_landmarker_detect_image(void* detector, const MpImage& image, + HandLandmarkerResult* result, + char** error_msg) { + return mediapipe::tasks::c::vision::hand_landmarker:: + CppHandLandmarkerDetect(detector, image, result, error_msg); +} + +int hand_landmarker_detect_for_video(void* detector, const MpImage& image, + int64_t timestamp_ms, + HandLandmarkerResult* result, + char** error_msg) { + return mediapipe::tasks::c::vision::hand_landmarker:: + CppHandLandmarkerDetectForVideo(detector, image, timestamp_ms, result, + error_msg); +} + +int hand_landmarker_detect_async(void* detector, const MpImage& image, + int64_t timestamp_ms, char** error_msg) { + return mediapipe::tasks::c::vision::hand_landmarker:: + CppHandLandmarkerDetectAsync(detector, image, timestamp_ms, error_msg); +} + +void hand_landmarker_close_result(HandLandmarkerResult* result) { + mediapipe::tasks::c::vision::hand_landmarker::CppHandLandmarkerCloseResult( + result); +} + +int hand_landmarker_close(void* detector, char** error_ms) { + return mediapipe::tasks::c::vision::hand_landmarker::CppHandLandmarkerClose( + detector, error_ms); +} + +} // extern "C" diff --git a/mediapipe/tasks/c/vision/hand_landmarker/hand_landmarker.h b/mediapipe/tasks/c/vision/hand_landmarker/hand_landmarker.h new file mode 100644 index 000000000..e813f07e5 --- /dev/null +++ b/mediapipe/tasks/c/vision/hand_landmarker/hand_landmarker.h @@ -0,0 +1,144 @@ +/* Copyright 2023 The MediaPipe Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef MEDIAPIPE_TASKS_C_VISION_HAND_LANDMARKER_HAND_LANDMARKER_H_ +#define MEDIAPIPE_TASKS_C_VISION_HAND_LANDMARKER_HAND_LANDMARKER_H_ + +#include "mediapipe/tasks/c/core/base_options.h" +#include "mediapipe/tasks/c/vision/core/common.h" +#include "mediapipe/tasks/c/vision/hand_landmarker/hand_landmarker_result.h" + +#ifndef MP_EXPORT +#define MP_EXPORT __attribute__((visibility("default"))) +#endif // MP_EXPORT + +#ifdef __cplusplus +extern "C" { +#endif + +// The options for configuring a MediaPipe hand landmarker task. +struct HandLandmarkerOptions { + // Base options for configuring MediaPipe Tasks, such as specifying the model + // file with metadata, accelerator options, op resolver, etc. + struct BaseOptions base_options; + + // The running mode of the task. Default to the image mode. + // HandLandmarker has three running modes: + // 1) The image mode for recognizing hand landmarks on single image inputs. + // 2) The video mode for recognizing hand landmarks on the decoded frames of a + // video. + // 3) The live stream mode for recognizing hand landmarks on the live stream + // of input data, such as from camera. In this mode, the "result_callback" + // below must be specified to receive the detection results asynchronously. + RunningMode running_mode; + + // The maximum number of hands can be detected by the HandLandmarker. + int num_hands = 1; + + // The minimum confidence score for the hand detection to be considered + // successful. + float min_hand_detection_confidence = 0.5; + + // The minimum confidence score of hand presence score in the hand landmark + // detection. + float min_hand_presence_confidence = 0.5; + + // The minimum confidence score for the hand tracking to be considered + // successful. + float min_tracking_confidence = 0.5; + + // The user-defined result callback for processing live stream data. + // The result callback should only be specified when the running mode is set + // to RunningMode::LIVE_STREAM. Arguments of the callback function include: + // the pointer to recognition result, the image that result was obtained + // on, the timestamp relevant to recognition results and pointer to error + // message in case of any failure. The validity of the passed arguments is + // true for the lifetime of the callback function. + // + // A caller is responsible for closing hand landmarker result. + typedef void (*result_callback_fn)(HandLandmarkerResult* result, + const MpImage& image, int64_t timestamp_ms, + char* error_msg); + result_callback_fn result_callback; +}; + +// Creates an HandLandmarker from the provided `options`. +// Returns a pointer to the hand landmarker on success. +// If an error occurs, returns `nullptr` and sets the error parameter to an +// an error message (if `error_msg` is not `nullptr`). You must free the memory +// allocated for the error message. +MP_EXPORT void* hand_landmarker_create(struct HandLandmarkerOptions* options, + char** error_msg); + +// Performs gesture recognition on the input `image`. Returns `0` on success. +// If an error occurs, returns an error code and sets the error parameter to an +// an error message (if `error_msg` is not `nullptr`). You must free the memory +// allocated for the error message. +MP_EXPORT int hand_landmarker_detect_image(void* detector, const MpImage& image, + HandLandmarkerResult* result, + char** error_msg); + +// Performs gesture recognition on the provided video frame. +// Only use this method when the HandLandmarker is created with the video +// running mode. +// The image can be of any size with format RGB or RGBA. It's required to +// provide the video frame's timestamp (in milliseconds). The input timestamps +// must be monotonically increasing. +// If an error occurs, returns an error code and sets the error parameter to an +// an error message (if `error_msg` is not `nullptr`). You must free the memory +// allocated for the error message. +MP_EXPORT int hand_landmarker_detect_for_video(void* detector, + const MpImage& image, + int64_t timestamp_ms, + HandLandmarkerResult* result, + char** error_msg); + +// Sends live image data to gesture recognition, and the results will be +// available via the `result_callback` provided in the HandLandmarkerOptions. +// Only use this method when the HandLandmarker is created with the live +// stream running mode. +// The image can be of any size with format RGB or RGBA. It's required to +// provide a timestamp (in milliseconds) to indicate when the input image is +// sent to the hand landmarker. The input timestamps must be monotonically +// increasing. +// The `result_callback` provides: +// - The recognition results as an HandLandmarkerResult object. +// - The const reference to the corresponding input image that the gesture +// detector runs on. Note that the const reference to the image will no +// longer be valid when the callback returns. To access the image data +// outside of the callback, callers need to make a copy of the image. +// - The input timestamp in milliseconds. +// If an error occurs, returns an error code and sets the error parameter to an +// an error message (if `error_msg` is not `nullptr`). You must free the memory +// allocated for the error message. +MP_EXPORT int hand_landmarker_detect_async(void* detector, const MpImage& image, + int64_t timestamp_ms, + char** error_msg); + +// Frees the memory allocated inside a HandLandmarkerResult result. +// Does not free the result pointer itself. +MP_EXPORT void hand_landmarker_close_result(HandLandmarkerResult* result); + +// Frees hand landmarker. +// If an error occurs, returns an error code and sets the error parameter to an +// an error message (if `error_msg` is not `nullptr`). You must free the memory +// allocated for the error message. +MP_EXPORT int hand_landmarker_close(void* detector, char** error_msg); + +#ifdef __cplusplus +} // extern C +#endif + +#endif // MEDIAPIPE_TASKS_C_VISION_HAND_LANDMARKER_HAND_LANDMARKER_H_ diff --git a/mediapipe/tasks/c/vision/hand_landmarker/hand_landmarker_result.h b/mediapipe/tasks/c/vision/hand_landmarker/hand_landmarker_result.h new file mode 100644 index 000000000..da5e4c5aa --- /dev/null +++ b/mediapipe/tasks/c/vision/hand_landmarker/hand_landmarker_result.h @@ -0,0 +1,58 @@ +/* Copyright 2023 The MediaPipe Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef MEDIAPIPE_TASKS_C_VISION_HAND_LANDMARKER_RESULT_HAND_LANDMARKER_RESULT_H_ +#define MEDIAPIPE_TASKS_C_VISION_HAND_LANDMARKER_RESULT_HAND_LANDMARKER_RESULT_H_ + +#include + +#include "mediapipe/tasks/c/components/containers/category.h" +#include "mediapipe/tasks/c/components/containers/landmark.h" + +#ifndef MP_EXPORT +#define MP_EXPORT __attribute__((visibility("default"))) +#endif // MP_EXPORT + +#ifdef __cplusplus +extern "C" { +#endif + +// The hand landmarker result from HandLandmarker, where each vector +// element represents a single hand detected in the image. +struct HandLandmarkerResult { + // Classification of handedness. + struct Categories* handedness; + + // The number of elements in the handedness array. + uint32_t handedness_count; + + // Detected hand landmarks in normalized image coordinates. + struct NormalizedLandmarks* hand_landmarks; + + // The number of elements in the hand_landmarks array. + uint32_t hand_landmarks_count; + + // Detected hand landmarks in world coordinates. + struct Landmarks* hand_world_landmarks; + + // The number of elements in the hand_world_landmarks array. + uint32_t hand_world_landmarks_count; +}; + +#ifdef __cplusplus +} // extern C +#endif + +#endif // MEDIAPIPE_TASKS_C_VISION_HAND_LANDMARKER_RESULT_HAND_LANDMARKER_RESULT_H_ diff --git a/mediapipe/tasks/c/vision/hand_landmarker/hand_landmarker_result_converter.cc b/mediapipe/tasks/c/vision/hand_landmarker/hand_landmarker_result_converter.cc new file mode 100644 index 000000000..3ce32ee63 --- /dev/null +++ b/mediapipe/tasks/c/vision/hand_landmarker/hand_landmarker_result_converter.cc @@ -0,0 +1,103 @@ +/* Copyright 2023 The MediaPipe Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "mediapipe/tasks/c/vision/hand_landmarker/hand_landmarker_result_converter.h" + +#include +#include + +#include "mediapipe/tasks/c/components/containers/category.h" +#include "mediapipe/tasks/c/components/containers/category_converter.h" +#include "mediapipe/tasks/c/components/containers/landmark.h" +#include "mediapipe/tasks/c/components/containers/landmark_converter.h" +#include "mediapipe/tasks/c/vision/hand_landmarker/hand_landmarker_result.h" +#include "mediapipe/tasks/cc/components/containers/category.h" +#include "mediapipe/tasks/cc/components/containers/landmark.h" +#include "mediapipe/tasks/cc/vision/hand_landmarker/hand_landmarker_result.h" + +namespace mediapipe::tasks::c::components::containers { + +using CppCategory = ::mediapipe::tasks::components::containers::Category; +using CppLandmark = ::mediapipe::tasks::components::containers::Landmark; +using CppNormalizedLandmark = + ::mediapipe::tasks::components::containers::NormalizedLandmark; + +void CppConvertToHandLandmarkerResult( + const mediapipe::tasks::vision::hand_landmarker::HandLandmarkerResult& in, + HandLandmarkerResult* out) { + out->handedness_count = in.handedness.size(); + out->handedness = new Categories[out->handedness_count]; + + for (uint32_t i = 0; i < out->handedness_count; ++i) { + uint32_t categories_count = in.handedness[i].categories.size(); + out->handedness[i].categories_count = categories_count; + out->handedness[i].categories = new Category[categories_count]; + + for (uint32_t j = 0; j < categories_count; ++j) { + const auto& cpp_category = in.handedness[i].categories[j]; + CppConvertToCategory(cpp_category, &out->handedness[i].categories[j]); + } + } + + out->hand_landmarks_count = in.hand_landmarks.size(); + out->hand_landmarks = new NormalizedLandmarks[out->hand_landmarks_count]; + for (uint32_t i = 0; i < out->hand_landmarks_count; ++i) { + std::vector cpp_normalized_landmarks; + for (uint32_t j = 0; j < in.hand_landmarks[i].landmarks.size(); ++j) { + const auto& cpp_landmark = in.hand_landmarks[i].landmarks[j]; + cpp_normalized_landmarks.push_back(cpp_landmark); + } + CppConvertToNormalizedLandmarks(cpp_normalized_landmarks, + &out->hand_landmarks[i]); + } + + out->hand_world_landmarks_count = in.hand_world_landmarks.size(); + out->hand_world_landmarks = new Landmarks[out->hand_world_landmarks_count]; + for (uint32_t i = 0; i < out->hand_world_landmarks_count; ++i) { + std::vector cpp_landmarks; + for (uint32_t j = 0; j < in.hand_world_landmarks[i].landmarks.size(); ++j) { + const auto& cpp_landmark = in.hand_world_landmarks[i].landmarks[j]; + cpp_landmarks.push_back(cpp_landmark); + } + CppConvertToLandmarks(cpp_landmarks, &out->hand_world_landmarks[i]); + } +} + +void CppCloseHandLandmarkerResult(HandLandmarkerResult* result) { + for (uint32_t i = 0; i < result->handedness_count; ++i) { + CppCloseCategories(&result->handedness[i]); + } + delete[] result->handedness; + + for (uint32_t i = 0; i < result->hand_landmarks_count; ++i) { + CppCloseNormalizedLandmarks(&result->hand_landmarks[i]); + } + delete[] result->hand_landmarks; + + for (uint32_t i = 0; i < result->hand_world_landmarks_count; ++i) { + CppCloseLandmarks(&result->hand_world_landmarks[i]); + } + delete[] result->hand_world_landmarks; + + result->handedness = nullptr; + result->hand_landmarks = nullptr; + result->hand_world_landmarks = nullptr; + + result->handedness_count = 0; + result->hand_landmarks_count = 0; + result->hand_world_landmarks_count = 0; +} + +} // namespace mediapipe::tasks::c::components::containers diff --git a/mediapipe/tasks/c/vision/hand_landmarker/hand_landmarker_result_converter.h b/mediapipe/tasks/c/vision/hand_landmarker/hand_landmarker_result_converter.h new file mode 100644 index 000000000..9fcd8e470 --- /dev/null +++ b/mediapipe/tasks/c/vision/hand_landmarker/hand_landmarker_result_converter.h @@ -0,0 +1,32 @@ +/* Copyright 2023 The MediaPipe Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef MEDIAPIPE_TASKS_C_COMPONENTS_CONTAINERS_HAND_LANDMARKER_RESULT_CONVERTER_H_ +#define MEDIAPIPE_TASKS_C_COMPONENTS_CONTAINERS_HAND_LANDMARKER_RESULT_CONVERTER_H_ + +#include "mediapipe/tasks/c/vision/hand_landmarker/hand_landmarker_result.h" +#include "mediapipe/tasks/cc/vision/hand_landmarker/hand_landmarker_result.h" + +namespace mediapipe::tasks::c::components::containers { + +void CppConvertToHandLandmarkerResult( + const mediapipe::tasks::vision::hand_landmarker::HandLandmarkerResult& in, + HandLandmarkerResult* out); + +void CppCloseHandLandmarkerResult(HandLandmarkerResult* result); + +} // namespace mediapipe::tasks::c::components::containers + +#endif // MEDIAPIPE_TASKS_C_COMPONENTS_CONTAINERS_HAND_LANDMARKER_RESULT_CONVERTER_H_ diff --git a/mediapipe/tasks/c/vision/hand_landmarker/hand_landmarker_result_converter_test.cc b/mediapipe/tasks/c/vision/hand_landmarker/hand_landmarker_result_converter_test.cc new file mode 100644 index 000000000..c38f5ea06 --- /dev/null +++ b/mediapipe/tasks/c/vision/hand_landmarker/hand_landmarker_result_converter_test.cc @@ -0,0 +1,125 @@ +/* Copyright 2023 The MediaPipe Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "mediapipe/tasks/c/vision/hand_landmarker/hand_landmarker_result_converter.h" + +#include +#include + +#include "mediapipe/framework/port/gtest.h" +#include "mediapipe/tasks/c/components/containers/landmark.h" +#include "mediapipe/tasks/c/vision/hand_landmarker/hand_landmarker_result.h" +#include "mediapipe/tasks/cc/vision/hand_landmarker/hand_landmarker_result.h" + +namespace mediapipe::tasks::c::components::containers { + +void InitHandLandmarkerResult( + ::mediapipe::tasks::vision::hand_landmarker::HandLandmarkerResult* + cpp_result) { + // Initialize handedness + mediapipe::tasks::components::containers::Category cpp_category = { + /* index= */ 1, + /* score= */ 0.8f, + /* category_name= */ "handeness_label_1", + /* display_name= */ "handeness_display_name_1"}; + mediapipe::tasks::components::containers::Classifications + classifications_for_handedness; + classifications_for_handedness.categories.push_back(cpp_category); + cpp_result->handedness.push_back(classifications_for_handedness); + + // Initialize hand_landmarks + mediapipe::tasks::components::containers::NormalizedLandmark + cpp_normalized_landmark = {/* x= */ 0.1f, + /* y= */ 0.2f, + /* z= */ 0.3f}; + mediapipe::tasks::components::containers::NormalizedLandmarks + cpp_normalized_landmarks; + cpp_normalized_landmarks.landmarks.push_back(cpp_normalized_landmark); + cpp_result->hand_landmarks.push_back(cpp_normalized_landmarks); + + // Initialize hand_world_landmarks + mediapipe::tasks::components::containers::Landmark cpp_landmark = { + /* x= */ 1.0f, + /* y= */ 1.1f, + /* z= */ 1.2f}; + mediapipe::tasks::components::containers::Landmarks cpp_landmarks; + cpp_landmarks.landmarks.push_back(cpp_landmark); + cpp_result->hand_world_landmarks.push_back(cpp_landmarks); +} + +TEST(HandLandmarkerResultConverterTest, ConvertsCustomResult) { + ::mediapipe::tasks::vision::hand_landmarker::HandLandmarkerResult cpp_result; + InitHandLandmarkerResult(&cpp_result); + + HandLandmarkerResult c_result; + CppConvertToHandLandmarkerResult(cpp_result, &c_result); + + // Verify conversion of hand_landmarks + EXPECT_NE(c_result.hand_landmarks, nullptr); + EXPECT_EQ(c_result.hand_landmarks_count, cpp_result.hand_landmarks.size()); + + for (uint32_t i = 0; i < c_result.hand_landmarks_count; ++i) { + EXPECT_EQ(c_result.hand_landmarks[i].landmarks_count, + cpp_result.hand_landmarks[i].landmarks.size()); + for (uint32_t j = 0; j < c_result.hand_landmarks[i].landmarks_count; ++j) { + const auto& landmark = cpp_result.hand_landmarks[i].landmarks[j]; + EXPECT_FLOAT_EQ(c_result.hand_landmarks[i].landmarks[j].x, landmark.x); + EXPECT_FLOAT_EQ(c_result.hand_landmarks[i].landmarks[j].y, landmark.y); + EXPECT_FLOAT_EQ(c_result.hand_landmarks[i].landmarks[j].z, landmark.z); + } + } + + // Verify conversion of hand_world_landmarks + EXPECT_NE(c_result.hand_world_landmarks, nullptr); + EXPECT_EQ(c_result.hand_world_landmarks_count, + cpp_result.hand_world_landmarks.size()); + + for (uint32_t i = 0; i < c_result.hand_landmarks_count; ++i) { + EXPECT_EQ(c_result.hand_world_landmarks[i].landmarks_count, + cpp_result.hand_world_landmarks[i].landmarks.size()); + for (uint32_t j = 0; j < c_result.hand_world_landmarks[i].landmarks_count; + ++j) { + const auto& landmark = cpp_result.hand_world_landmarks[i].landmarks[j]; + EXPECT_FLOAT_EQ(c_result.hand_world_landmarks[i].landmarks[j].x, + landmark.x); + EXPECT_FLOAT_EQ(c_result.hand_world_landmarks[i].landmarks[j].y, + landmark.y); + EXPECT_FLOAT_EQ(c_result.hand_world_landmarks[i].landmarks[j].z, + landmark.z); + } + } + + CppCloseHandLandmarkerResult(&c_result); +} + +TEST(HandLandmarkerResultConverterTest, FreesMemory) { + ::mediapipe::tasks::vision::hand_landmarker::HandLandmarkerResult cpp_result; + InitHandLandmarkerResult(&cpp_result); + + HandLandmarkerResult c_result; + CppConvertToHandLandmarkerResult(cpp_result, &c_result); + + EXPECT_NE(c_result.handedness, nullptr); + EXPECT_NE(c_result.hand_landmarks, nullptr); + EXPECT_NE(c_result.hand_world_landmarks, nullptr); + + CppCloseHandLandmarkerResult(&c_result); + + EXPECT_EQ(c_result.handedness, nullptr); + EXPECT_EQ(c_result.hand_landmarks, nullptr); + EXPECT_EQ(c_result.hand_world_landmarks, nullptr); +} + +} // namespace mediapipe::tasks::c::components::containers diff --git a/mediapipe/tasks/c/vision/hand_landmarker/hand_landmarker_test.cc b/mediapipe/tasks/c/vision/hand_landmarker/hand_landmarker_test.cc new file mode 100644 index 000000000..c418657e5 --- /dev/null +++ b/mediapipe/tasks/c/vision/hand_landmarker/hand_landmarker_test.cc @@ -0,0 +1,261 @@ +/* Copyright 2023 The MediaPipe Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "mediapipe/tasks/c/vision/hand_landmarker/hand_landmarker.h" + +#include +#include +#include + +#include "absl/flags/flag.h" +#include "absl/strings/string_view.h" +#include "mediapipe/framework/deps/file_path.h" +#include "mediapipe/framework/formats/image.h" +#include "mediapipe/framework/port/gmock.h" +#include "mediapipe/framework/port/gtest.h" +#include "mediapipe/tasks/c/components/containers/landmark.h" +#include "mediapipe/tasks/c/vision/core/common.h" +#include "mediapipe/tasks/c/vision/hand_landmarker/hand_landmarker_result.h" +#include "mediapipe/tasks/cc/vision/utils/image_utils.h" + +namespace { + +using ::mediapipe::file::JoinPath; +using ::mediapipe::tasks::vision::DecodeImageFromFile; +using testing::HasSubstr; + +constexpr char kTestDataDirectory[] = "/mediapipe/tasks/testdata/vision/"; +constexpr char kModelName[] = "hand_landmarker.task"; +constexpr char kImageFile[] = "fist.jpg"; +constexpr float kScorePrecision = 1e-2; +constexpr float kLandmarkPrecision = 1e-1; +constexpr int kIterations = 100; + +std::string GetFullPath(absl::string_view file_name) { + return JoinPath("./", kTestDataDirectory, file_name); +} + +void MatchesHandLandmarkerResult(HandLandmarkerResult* result, + const float score_precision, + const float landmark_precision) { + // Expects to have the same number of hands detected. + EXPECT_EQ(result->handedness_count, 1); + + // Actual handedness matches expected handedness. + EXPECT_EQ(std::string{result->handedness[0].categories[0].category_name}, + "Right"); + EXPECT_NEAR(result->handedness[0].categories[0].score, 0.9893f, + score_precision); + + // Actual landmarks match expected landmarks. + EXPECT_NEAR(result->hand_landmarks[0].landmarks[0].x, 0.477f, + landmark_precision); + EXPECT_NEAR(result->hand_landmarks[0].landmarks[0].y, 0.661f, + landmark_precision); + EXPECT_NEAR(result->hand_landmarks[0].landmarks[0].z, 0.0f, + landmark_precision); + EXPECT_NEAR(result->hand_world_landmarks[0].landmarks[0].x, -0.009f, + landmark_precision); + EXPECT_NEAR(result->hand_world_landmarks[0].landmarks[0].y, 0.082f, + landmark_precision); + EXPECT_NEAR(result->hand_world_landmarks[0].landmarks[0].z, 0.006f, + landmark_precision); +} + +TEST(HandLandmarkerTest, ImageModeTest) { + const auto image = DecodeImageFromFile(GetFullPath(kImageFile)); + ASSERT_TRUE(image.ok()); + + const std::string model_path = GetFullPath(kModelName); + HandLandmarkerOptions options = { + /* base_options= */ {/* model_asset_buffer= */ nullptr, + /* model_asset_buffer_count= */ 0, + /* model_asset_path= */ model_path.c_str()}, + /* running_mode= */ RunningMode::IMAGE, + /* num_hands= */ 1, + /* min_hand_detection_confidence= */ 0.5, + /* min_hand_presence_confidence= */ 0.5, + /* min_tracking_confidence= */ 0.5, + }; + + void* detector = hand_landmarker_create(&options, /* error_msg */ nullptr); + EXPECT_NE(detector, nullptr); + + const auto& image_frame = image->GetImageFrameSharedPtr(); + const MpImage mp_image = { + .type = MpImage::IMAGE_FRAME, + .image_frame = {.format = static_cast(image_frame->Format()), + .image_buffer = image_frame->PixelData(), + .width = image_frame->Width(), + .height = image_frame->Height()}}; + + HandLandmarkerResult result; + hand_landmarker_detect_image(detector, mp_image, &result, + /* error_msg */ nullptr); + MatchesHandLandmarkerResult(&result, kScorePrecision, kLandmarkPrecision); + hand_landmarker_close_result(&result); + hand_landmarker_close(detector, /* error_msg */ nullptr); +} + +TEST(HandLandmarkerTest, VideoModeTest) { + const auto image = DecodeImageFromFile(GetFullPath(kImageFile)); + ASSERT_TRUE(image.ok()); + + const std::string model_path = GetFullPath(kModelName); + HandLandmarkerOptions options = { + /* base_options= */ {/* model_asset_buffer= */ nullptr, + /* model_asset_buffer_count= */ 0, + /* model_asset_path= */ model_path.c_str()}, + /* running_mode= */ RunningMode::VIDEO, + /* num_hands= */ 1, + /* min_hand_detection_confidence= */ 0.5, + /* min_hand_presence_confidence= */ 0.5, + /* min_tracking_confidence= */ 0.5, + }; + + void* detector = hand_landmarker_create(&options, /* error_msg */ nullptr); + EXPECT_NE(detector, nullptr); + + const auto& image_frame = image->GetImageFrameSharedPtr(); + const MpImage mp_image = { + .type = MpImage::IMAGE_FRAME, + .image_frame = {.format = static_cast(image_frame->Format()), + .image_buffer = image_frame->PixelData(), + .width = image_frame->Width(), + .height = image_frame->Height()}}; + + for (int i = 0; i < kIterations; ++i) { + HandLandmarkerResult result; + hand_landmarker_detect_for_video(detector, mp_image, i, &result, + /* error_msg */ nullptr); + + MatchesHandLandmarkerResult(&result, kScorePrecision, kLandmarkPrecision); + hand_landmarker_close_result(&result); + } + hand_landmarker_close(detector, /* error_msg */ nullptr); +} + +// A structure to support LiveStreamModeTest below. This structure holds a +// static method `Fn` for a callback function of C API. A `static` qualifier +// allows to take an address of the method to follow API style. Another static +// struct member is `last_timestamp` that is used to verify that current +// timestamp is greater than the previous one. +struct LiveStreamModeCallback { + static int64_t last_timestamp; + static void Fn(HandLandmarkerResult* detector_result, const MpImage& image, + int64_t timestamp, char* error_msg) { + ASSERT_NE(detector_result, nullptr); + ASSERT_EQ(error_msg, nullptr); + MatchesHandLandmarkerResult(detector_result, kScorePrecision, + kLandmarkPrecision); + EXPECT_GT(image.image_frame.width, 0); + EXPECT_GT(image.image_frame.height, 0); + EXPECT_GT(timestamp, last_timestamp); + last_timestamp++; + } +}; +int64_t LiveStreamModeCallback::last_timestamp = -1; + +TEST(HandLandmarkerTest, LiveStreamModeTest) { + const auto image = DecodeImageFromFile(GetFullPath(kImageFile)); + ASSERT_TRUE(image.ok()); + + const std::string model_path = GetFullPath(kModelName); + + HandLandmarkerOptions options = { + /* base_options= */ {/* model_asset_buffer= */ nullptr, + /* model_asset_buffer_count= */ 0, + /* model_asset_path= */ model_path.c_str()}, + /* running_mode= */ RunningMode::LIVE_STREAM, + /* num_hands= */ 1, + /* min_hand_detection_confidence= */ 0.5, + /* min_hand_presence_confidence= */ 0.5, + /* min_tracking_confidence= */ 0.5, + /* result_callback= */ LiveStreamModeCallback::Fn, + }; + + void* detector = hand_landmarker_create(&options, /* error_msg */ nullptr); + EXPECT_NE(detector, nullptr); + + const auto& image_frame = image->GetImageFrameSharedPtr(); + const MpImage mp_image = { + .type = MpImage::IMAGE_FRAME, + .image_frame = {.format = static_cast(image_frame->Format()), + .image_buffer = image_frame->PixelData(), + .width = image_frame->Width(), + .height = image_frame->Height()}}; + + for (int i = 0; i < kIterations; ++i) { + EXPECT_GE(hand_landmarker_detect_async(detector, mp_image, i, + /* error_msg */ nullptr), + 0); + } + hand_landmarker_close(detector, /* error_msg */ nullptr); + + // Due to the flow limiter, the total of outputs might be smaller than the + // number of iterations. + EXPECT_LE(LiveStreamModeCallback::last_timestamp, kIterations); + EXPECT_GT(LiveStreamModeCallback::last_timestamp, 0); +} + +TEST(HandLandmarkerTest, InvalidArgumentHandling) { + // It is an error to set neither the asset buffer nor the path. + HandLandmarkerOptions options = { + /* base_options= */ {/* model_asset_buffer= */ nullptr, + /* model_asset_buffer_count= */ 0, + /* model_asset_path= */ nullptr}, + /* running_mode= */ RunningMode::IMAGE, + /* num_hands= */ 1, + /* min_hand_detection_confidence= */ 0.5, + /* min_hand_presence_confidence= */ 0.5, + /* min_tracking_confidence= */ 0.5, + }; + + char* error_msg; + void* detector = hand_landmarker_create(&options, &error_msg); + EXPECT_EQ(detector, nullptr); + + EXPECT_THAT(error_msg, HasSubstr("ExternalFile must specify")); + + free(error_msg); +} + +TEST(HandLandmarkerTest, FailedRecognitionHandling) { + const std::string model_path = GetFullPath(kModelName); + HandLandmarkerOptions options = { + /* base_options= */ {/* model_asset_buffer= */ nullptr, + /* model_asset_buffer_count= */ 0, + /* model_asset_path= */ model_path.c_str()}, + /* running_mode= */ RunningMode::IMAGE, + /* num_hands= */ 1, + /* min_hand_detection_confidence= */ 0.5, + /* min_hand_presence_confidence= */ 0.5, + /* min_tracking_confidence= */ 0.5, + }; + + void* detector = hand_landmarker_create(&options, /* error_msg */ + nullptr); + EXPECT_NE(detector, nullptr); + + const MpImage mp_image = {.type = MpImage::GPU_BUFFER, .gpu_buffer = {}}; + HandLandmarkerResult result; + char* error_msg; + hand_landmarker_detect_image(detector, mp_image, &result, &error_msg); + EXPECT_THAT(error_msg, HasSubstr("GPU Buffer not supported yet")); + free(error_msg); + hand_landmarker_close(detector, /* error_msg */ nullptr); +} + +} // namespace From e4a6ea3079e5e68b3392513459dca96bbbe311da Mon Sep 17 00:00:00 2001 From: MediaPipe Team Date: Wed, 6 Dec 2023 04:25:37 -0800 Subject: [PATCH 11/39] No public description PiperOrigin-RevId: 588376739 --- mediapipe/framework/BUILD | 2 +- mediapipe/framework/calculator_graph.cc | 15 ++++++++++----- mediapipe/framework/calculator_graph.h | 7 ++++--- 3 files changed, 15 insertions(+), 9 deletions(-) diff --git a/mediapipe/framework/BUILD b/mediapipe/framework/BUILD index e5e72cfbe..38812b39e 100644 --- a/mediapipe/framework/BUILD +++ b/mediapipe/framework/BUILD @@ -11,7 +11,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# load("//mediapipe/framework/port:build_config.bzl", "mediapipe_proto_library") load("@bazel_skylib//:bzl_library.bzl", "bzl_library") @@ -368,6 +367,7 @@ cc_library( "@com_google_absl//absl/container:flat_hash_set", "@com_google_absl//absl/log:absl_check", "@com_google_absl//absl/log:absl_log", + "@com_google_absl//absl/log:check", "@com_google_absl//absl/memory", "@com_google_absl//absl/status", "@com_google_absl//absl/status:statusor", diff --git a/mediapipe/framework/calculator_graph.cc b/mediapipe/framework/calculator_graph.cc index 1890d799c..1bd356eac 100644 --- a/mediapipe/framework/calculator_graph.cc +++ b/mediapipe/framework/calculator_graph.cc @@ -28,6 +28,7 @@ #include "absl/container/flat_hash_set.h" #include "absl/log/absl_check.h" #include "absl/log/absl_log.h" +#include "absl/log/check.h" #include "absl/memory/memory.h" #include "absl/status/status.h" #include "absl/strings/str_cat.h" @@ -890,12 +891,12 @@ absl::Status CalculatorGraph::WaitForObservedOutput() { } absl::Status CalculatorGraph::AddPacketToInputStream( - const std::string& stream_name, const Packet& packet) { + absl::string_view stream_name, const Packet& packet) { return AddPacketToInputStreamInternal(stream_name, packet); } absl::Status CalculatorGraph::AddPacketToInputStream( - const std::string& stream_name, Packet&& packet) { + absl::string_view stream_name, Packet&& packet) { return AddPacketToInputStreamInternal(stream_name, std::move(packet)); } @@ -918,14 +919,18 @@ absl::Status CalculatorGraph::SetInputStreamTimestampBound( // std::forward will deduce the correct type as we pass along packet. template absl::Status CalculatorGraph::AddPacketToInputStreamInternal( - const std::string& stream_name, T&& packet) { + absl::string_view stream_name, T&& packet) { + auto stream_it = graph_input_streams_.find(stream_name); std::unique_ptr* stream = - mediapipe::FindOrNull(graph_input_streams_, stream_name); + stream_it == graph_input_streams_.end() ? nullptr : &stream_it->second; RET_CHECK(stream).SetNoLogging() << absl::Substitute( "AddPacketToInputStream called on input stream \"$0\" which is not a " "graph input stream.", stream_name); - int node_id = mediapipe::FindOrDie(graph_input_stream_node_ids_, stream_name); + auto node_id_it = graph_input_stream_node_ids_.find(stream_name); + ABSL_CHECK(node_id_it != graph_input_stream_node_ids_.end()) + << "Map key not found: " << stream_name; + int node_id = node_id_it->second; ABSL_CHECK_GE(node_id, validated_graph_->CalculatorInfos().size()); { absl::MutexLock lock(&full_input_streams_mutex_); diff --git a/mediapipe/framework/calculator_graph.h b/mediapipe/framework/calculator_graph.h index 4284beb7c..80af72650 100644 --- a/mediapipe/framework/calculator_graph.h +++ b/mediapipe/framework/calculator_graph.h @@ -32,6 +32,7 @@ #include "absl/container/flat_hash_set.h" #include "absl/status/status.h" #include "absl/status/statusor.h" +#include "absl/strings/string_view.h" #include "absl/synchronization/mutex.h" #include "mediapipe/framework/calculator.pb.h" #include "mediapipe/framework/calculator_base.h" @@ -255,7 +256,7 @@ class CalculatorGraph { // sizes of the queues in the graph. The input stream must have been specified // in the configuration as a graph level input_stream. On error, nothing is // added. - absl::Status AddPacketToInputStream(const std::string& stream_name, + absl::Status AddPacketToInputStream(absl::string_view stream_name, const Packet& packet); // Same as the l-value version of this function by the same name, but moves @@ -265,7 +266,7 @@ class CalculatorGraph { // packet may remain valid. In particular, when using the ADD_IF_NOT_FULL // mode with a full queue, this will return StatusUnavailable and the caller // may try adding the packet again later. - absl::Status AddPacketToInputStream(const std::string& stream_name, + absl::Status AddPacketToInputStream(absl::string_view stream_name, Packet&& packet); // Indicates that input will arrive no earlier than a certain timestamp. @@ -509,7 +510,7 @@ class CalculatorGraph { // AddPacketToInputStream(Packet&& packet) or // AddPacketToInputStream(const Packet& packet). template - absl::Status AddPacketToInputStreamInternal(const std::string& stream_name, + absl::Status AddPacketToInputStreamInternal(absl::string_view stream_name, T&& packet); // Sets the executor that will run the nodes assigned to the executor From 0a3f27d1cef454675d02679e840e16674ce434ca Mon Sep 17 00:00:00 2001 From: Sebastian Schmidt Date: Wed, 6 Dec 2023 09:01:40 -0800 Subject: [PATCH 12/39] Move hand_roi_refinement_graph_options_java_proto_lite to vision lib PiperOrigin-RevId: 588444225 --- .../java/com/google/mediapipe/tasks/mediapipe_tasks_aar.bzl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mediapipe/tasks/java/com/google/mediapipe/tasks/mediapipe_tasks_aar.bzl b/mediapipe/tasks/java/com/google/mediapipe/tasks/mediapipe_tasks_aar.bzl index ae167a1bc..9d32f05de 100644 --- a/mediapipe/tasks/java/com/google/mediapipe/tasks/mediapipe_tasks_aar.bzl +++ b/mediapipe/tasks/java/com/google/mediapipe/tasks/mediapipe_tasks_aar.bzl @@ -50,6 +50,7 @@ _VISION_TASKS_JAVA_PROTO_LITE_TARGETS = [ "//mediapipe/tasks/cc/vision/hand_detector/proto:hand_detector_graph_options_java_proto_lite", "//mediapipe/tasks/cc/vision/hand_landmarker/proto:hand_landmarker_graph_options_java_proto_lite", "//mediapipe/tasks/cc/vision/hand_landmarker/proto:hand_landmarks_detector_graph_options_java_proto_lite", + "//mediapipe/tasks/cc/vision/hand_landmarker/proto:hand_roi_refinement_graph_options_java_proto_lite", "//mediapipe/tasks/cc/vision/holistic_landmarker/proto:holistic_landmarker_graph_options_java_proto_lite", "//mediapipe/tasks/cc/vision/image_classifier/proto:image_classifier_graph_options_java_proto_lite", "//mediapipe/tasks/cc/vision/image_embedder/proto:image_embedder_graph_options_java_proto_lite", @@ -85,7 +86,6 @@ _VISION_TASKS_IMAGE_GENERATOR_JAVA_PROTO_LITE_TARGETS = [ "//mediapipe/tasks/cc/vision/hand_detector/proto:hand_detector_graph_options_java_proto_lite", "//mediapipe/tasks/cc/vision/hand_landmarker/proto:hand_landmarker_graph_options_java_proto_lite", "//mediapipe/tasks/cc/vision/hand_landmarker/proto:hand_landmarks_detector_graph_options_java_proto_lite", - "//mediapipe/tasks/cc/vision/hand_landmarker/proto:hand_roi_refinement_graph_options_java_proto_lite", ] _TEXT_TASKS_JAVA_PROTO_LITE_TARGETS = [ From 78af80027aff3205f2382f0c92bc7c8b95f45ae9 Mon Sep 17 00:00:00 2001 From: MediaPipe Team Date: Wed, 6 Dec 2023 12:01:55 -0800 Subject: [PATCH 13/39] No user facing changes PiperOrigin-RevId: 588501289 --- mediapipe/framework/encode_binary_proto.bzl | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/mediapipe/framework/encode_binary_proto.bzl b/mediapipe/framework/encode_binary_proto.bzl index bf7f0583d..4f987780d 100644 --- a/mediapipe/framework/encode_binary_proto.bzl +++ b/mediapipe/framework/encode_binary_proto.bzl @@ -76,10 +76,12 @@ def _get_proto_provider(dep): """ if ProtoInfo in dep: return dep[ProtoInfo] + elif hasattr(dep, "proto"): return dep.proto else: - fail("cannot happen, rule definition requires .proto or ProtoInfo") + fail("cannot happen, rule definition requires .proto" + + " or ProtoInfo") def _encode_binary_proto_impl(ctx): """Implementation of the encode_binary_proto rule.""" @@ -142,7 +144,10 @@ _encode_binary_proto = rule( cfg = "exec", ), "deps": attr.label_list( - providers = [[ProtoInfo], ["proto"]], + providers = [ + [ProtoInfo], + ["proto"], + ], ), "input": attr.label( mandatory = True, @@ -182,7 +187,10 @@ def _generate_proto_descriptor_set_impl(ctx): all_protos = depset(transitive = [ _get_proto_provider(dep).transitive_sources for dep in ctx.attr.deps - if ProtoInfo in dep or hasattr(dep, "proto") + if ( + ProtoInfo in dep or + hasattr(dep, "proto") + ) ]) descriptor = ctx.outputs.output @@ -213,7 +221,10 @@ generate_proto_descriptor_set = rule( cfg = "exec", ), "deps": attr.label_list( - providers = [[ProtoInfo], ["proto"]], + providers = [ + [ProtoInfo], + ["proto"], + ], ), }, outputs = {"output": "%{name}.proto.bin"}, From fad3785721a1c050fb193e4cfa04f666f98f32ed Mon Sep 17 00:00:00 2001 From: MediaPipe Team Date: Wed, 6 Dec 2023 14:05:47 -0800 Subject: [PATCH 14/39] Export java package for hand_roi_refinement_graph_options. PiperOrigin-RevId: 588537174 --- .../proto/hand_roi_refinement_graph_options.proto | 3 +++ 1 file changed, 3 insertions(+) diff --git a/mediapipe/tasks/cc/vision/hand_landmarker/proto/hand_roi_refinement_graph_options.proto b/mediapipe/tasks/cc/vision/hand_landmarker/proto/hand_roi_refinement_graph_options.proto index 0f979c5aa..f72bd62b5 100644 --- a/mediapipe/tasks/cc/vision/hand_landmarker/proto/hand_roi_refinement_graph_options.proto +++ b/mediapipe/tasks/cc/vision/hand_landmarker/proto/hand_roi_refinement_graph_options.proto @@ -19,6 +19,9 @@ package mediapipe.tasks.vision.hand_landmarker.proto; import "mediapipe/tasks/cc/core/proto/base_options.proto"; +option java_package = "com.google.mediapipe.tasks.vision.handlandmarker.proto"; +option java_outer_classname = "HandRoiRefinementGraphOptionsProto"; + message HandRoiRefinementGraphOptions { core.proto.BaseOptions base_options = 1; } From 6909504ca9ad8dfff3bca2f12b17649f1bd8e4ca Mon Sep 17 00:00:00 2001 From: Kinar Date: Thu, 7 Dec 2023 00:25:54 -0800 Subject: [PATCH 15/39] Fix naming in different files --- .../vision/hand_landmarker/hand_landmarker.cc | 67 ++++++++++--------- .../vision/hand_landmarker/hand_landmarker.h | 24 +++---- .../hand_landmarker/hand_landmarker_test.cc | 46 ++++++------- 3 files changed, 69 insertions(+), 68 deletions(-) diff --git a/mediapipe/tasks/c/vision/hand_landmarker/hand_landmarker.cc b/mediapipe/tasks/c/vision/hand_landmarker/hand_landmarker.cc index f6df09f96..56ac960f1 100644 --- a/mediapipe/tasks/c/vision/hand_landmarker/hand_landmarker.cc +++ b/mediapipe/tasks/c/vision/hand_landmarker/hand_landmarker.cc @@ -122,17 +122,18 @@ HandLandmarker* CppHandLandmarkerCreate(const HandLandmarkerOptions& options, }; } - auto detector = HandLandmarker::Create(std::move(cpp_options)); - if (!detector.ok()) { - ABSL_LOG(ERROR) << "Failed to create HandLandmarker: " << detector.status(); - CppProcessError(detector.status(), error_msg); + auto landmarker = HandLandmarker::Create(std::move(cpp_options)); + if (!landmarker.ok()) { + ABSL_LOG(ERROR) << "Failed to create HandLandmarker: " + << landmarker.status(); + CppProcessError(landmarker.status(), error_msg); return nullptr; } - return detector->release(); + return landmarker->release(); } -int CppHandLandmarkerDetect(void* detector, const MpImage& image, - HandLandmarkerResult* result, char** error_msg) { +int CppHandLandmarkerDetect(void* landmarker, const MpImage& image, + HandLandmarkerResult* result, char** error_msg) { if (image.type == MpImage::GPU_BUFFER) { const absl::Status status = absl::InvalidArgumentError("GPU Buffer not supported yet."); @@ -151,8 +152,8 @@ int CppHandLandmarkerDetect(void* detector, const MpImage& image, return CppProcessError(img.status(), error_msg); } - auto cpp_detector = static_cast(detector); - auto cpp_result = cpp_detector->Detect(*img); + auto cpp_landmarker = static_cast(landmarker); + auto cpp_result = cpp_landmarker->Detect(*img); if (!cpp_result.ok()) { ABSL_LOG(ERROR) << "Recognition failed: " << cpp_result.status(); return CppProcessError(cpp_result.status(), error_msg); @@ -161,10 +162,10 @@ int CppHandLandmarkerDetect(void* detector, const MpImage& image, return 0; } -int CppHandLandmarkerDetectForVideo(void* detector, const MpImage& image, - int64_t timestamp_ms, - HandLandmarkerResult* result, - char** error_msg) { +int CppHandLandmarkerDetectForVideo(void* landmarker, const MpImage& image, + int64_t timestamp_ms, + HandLandmarkerResult* result, + char** error_msg) { if (image.type == MpImage::GPU_BUFFER) { absl::Status status = absl::InvalidArgumentError("GPU Buffer not supported yet"); @@ -183,8 +184,8 @@ int CppHandLandmarkerDetectForVideo(void* detector, const MpImage& image, return CppProcessError(img.status(), error_msg); } - auto cpp_detector = static_cast(detector); - auto cpp_result = cpp_detector->DetectForVideo(*img, timestamp_ms); + auto cpp_landmarker = static_cast(landmarker); + auto cpp_result = cpp_landmarker->DetectForVideo(*img, timestamp_ms); if (!cpp_result.ok()) { ABSL_LOG(ERROR) << "Recognition failed: " << cpp_result.status(); return CppProcessError(cpp_result.status(), error_msg); @@ -193,8 +194,8 @@ int CppHandLandmarkerDetectForVideo(void* detector, const MpImage& image, return 0; } -int CppHandLandmarkerDetectAsync(void* detector, const MpImage& image, - int64_t timestamp_ms, char** error_msg) { +int CppHandLandmarkerDetectAsync(void* landmarker, const MpImage& image, + int64_t timestamp_ms, char** error_msg) { if (image.type == MpImage::GPU_BUFFER) { absl::Status status = absl::InvalidArgumentError("GPU Buffer not supported yet"); @@ -213,8 +214,8 @@ int CppHandLandmarkerDetectAsync(void* detector, const MpImage& image, return CppProcessError(img.status(), error_msg); } - auto cpp_detector = static_cast(detector); - auto cpp_result = cpp_detector->DetectAsync(*img, timestamp_ms); + auto cpp_landmarker = static_cast(landmarker); + auto cpp_result = cpp_landmarker->DetectAsync(*img, timestamp_ms); if (!cpp_result.ok()) { ABSL_LOG(ERROR) << "Data preparation for the landmark detection failed: " << cpp_result; @@ -227,14 +228,14 @@ void CppHandLandmarkerCloseResult(HandLandmarkerResult* result) { CppCloseHandLandmarkerResult(result); } -int CppHandLandmarkerClose(void* detector, char** error_msg) { - auto cpp_detector = static_cast(detector); - auto result = cpp_detector->Close(); +int CppHandLandmarkerClose(void* landmarker, char** error_msg) { + auto cpp_landmarker = static_cast(landmarker); + auto result = cpp_landmarker->Close(); if (!result.ok()) { ABSL_LOG(ERROR) << "Failed to close HandLandmarker: " << result; return CppProcessError(result, error_msg); } - delete cpp_detector; + delete cpp_landmarker; return 0; } @@ -248,26 +249,26 @@ void* hand_landmarker_create(struct HandLandmarkerOptions* options, *options, error_msg); } -int hand_landmarker_detect_image(void* detector, const MpImage& image, +int hand_landmarker_detect_image(void* landmarker, const MpImage& image, HandLandmarkerResult* result, char** error_msg) { - return mediapipe::tasks::c::vision::hand_landmarker:: - CppHandLandmarkerDetect(detector, image, result, error_msg); + return mediapipe::tasks::c::vision::hand_landmarker::CppHandLandmarkerDetect( + landmarker, image, result, error_msg); } -int hand_landmarker_detect_for_video(void* detector, const MpImage& image, +int hand_landmarker_detect_for_video(void* landmarker, const MpImage& image, int64_t timestamp_ms, HandLandmarkerResult* result, char** error_msg) { return mediapipe::tasks::c::vision::hand_landmarker:: - CppHandLandmarkerDetectForVideo(detector, image, timestamp_ms, result, - error_msg); + CppHandLandmarkerDetectForVideo(landmarker, image, timestamp_ms, result, + error_msg); } -int hand_landmarker_detect_async(void* detector, const MpImage& image, +int hand_landmarker_detect_async(void* landmarker, const MpImage& image, int64_t timestamp_ms, char** error_msg) { return mediapipe::tasks::c::vision::hand_landmarker:: - CppHandLandmarkerDetectAsync(detector, image, timestamp_ms, error_msg); + CppHandLandmarkerDetectAsync(landmarker, image, timestamp_ms, error_msg); } void hand_landmarker_close_result(HandLandmarkerResult* result) { @@ -275,9 +276,9 @@ void hand_landmarker_close_result(HandLandmarkerResult* result) { result); } -int hand_landmarker_close(void* detector, char** error_ms) { +int hand_landmarker_close(void* landmarker, char** error_ms) { return mediapipe::tasks::c::vision::hand_landmarker::CppHandLandmarkerClose( - detector, error_ms); + landmarker, error_ms); } } // extern "C" diff --git a/mediapipe/tasks/c/vision/hand_landmarker/hand_landmarker.h b/mediapipe/tasks/c/vision/hand_landmarker/hand_landmarker.h index e813f07e5..0b22db206 100644 --- a/mediapipe/tasks/c/vision/hand_landmarker/hand_landmarker.h +++ b/mediapipe/tasks/c/vision/hand_landmarker/hand_landmarker.h @@ -82,15 +82,15 @@ struct HandLandmarkerOptions { MP_EXPORT void* hand_landmarker_create(struct HandLandmarkerOptions* options, char** error_msg); -// Performs gesture recognition on the input `image`. Returns `0` on success. -// If an error occurs, returns an error code and sets the error parameter to an -// an error message (if `error_msg` is not `nullptr`). You must free the memory -// allocated for the error message. -MP_EXPORT int hand_landmarker_detect_image(void* detector, const MpImage& image, +// Performs hand landmark detection on the input `image`. Returns `0` on +// success. If an error occurs, returns an error code and sets the error +// parameter to an an error message (if `error_msg` is not `nullptr`). You must +// free the memory allocated for the error message. +MP_EXPORT int hand_landmarker_detect_image(void* landmarker, const MpImage& image, HandLandmarkerResult* result, char** error_msg); -// Performs gesture recognition on the provided video frame. +// Performs hand landmark detection on the provided video frame. // Only use this method when the HandLandmarker is created with the video // running mode. // The image can be of any size with format RGB or RGBA. It's required to @@ -99,13 +99,13 @@ MP_EXPORT int hand_landmarker_detect_image(void* detector, const MpImage& image, // If an error occurs, returns an error code and sets the error parameter to an // an error message (if `error_msg` is not `nullptr`). You must free the memory // allocated for the error message. -MP_EXPORT int hand_landmarker_detect_for_video(void* detector, +MP_EXPORT int hand_landmarker_detect_for_video(void* landmarker, const MpImage& image, int64_t timestamp_ms, HandLandmarkerResult* result, char** error_msg); -// Sends live image data to gesture recognition, and the results will be +// Sends live image data to hand landmark detection, and the results will be // available via the `result_callback` provided in the HandLandmarkerOptions. // Only use this method when the HandLandmarker is created with the live // stream running mode. @@ -115,15 +115,15 @@ MP_EXPORT int hand_landmarker_detect_for_video(void* detector, // increasing. // The `result_callback` provides: // - The recognition results as an HandLandmarkerResult object. -// - The const reference to the corresponding input image that the gesture -// detector runs on. Note that the const reference to the image will no +// - The const reference to the corresponding input image that the hand +// landmarker runs on. Note that the const reference to the image will no // longer be valid when the callback returns. To access the image data // outside of the callback, callers need to make a copy of the image. // - The input timestamp in milliseconds. // If an error occurs, returns an error code and sets the error parameter to an // an error message (if `error_msg` is not `nullptr`). You must free the memory // allocated for the error message. -MP_EXPORT int hand_landmarker_detect_async(void* detector, const MpImage& image, +MP_EXPORT int hand_landmarker_detect_async(void* landmarker, const MpImage& image, int64_t timestamp_ms, char** error_msg); @@ -135,7 +135,7 @@ MP_EXPORT void hand_landmarker_close_result(HandLandmarkerResult* result); // If an error occurs, returns an error code and sets the error parameter to an // an error message (if `error_msg` is not `nullptr`). You must free the memory // allocated for the error message. -MP_EXPORT int hand_landmarker_close(void* detector, char** error_msg); +MP_EXPORT int hand_landmarker_close(void* landmarker, char** error_msg); #ifdef __cplusplus } // extern C diff --git a/mediapipe/tasks/c/vision/hand_landmarker/hand_landmarker_test.cc b/mediapipe/tasks/c/vision/hand_landmarker/hand_landmarker_test.cc index c418657e5..ed7b4646f 100644 --- a/mediapipe/tasks/c/vision/hand_landmarker/hand_landmarker_test.cc +++ b/mediapipe/tasks/c/vision/hand_landmarker/hand_landmarker_test.cc @@ -90,8 +90,8 @@ TEST(HandLandmarkerTest, ImageModeTest) { /* min_tracking_confidence= */ 0.5, }; - void* detector = hand_landmarker_create(&options, /* error_msg */ nullptr); - EXPECT_NE(detector, nullptr); + void* landmarker = hand_landmarker_create(&options, /* error_msg */ nullptr); + EXPECT_NE(landmarker, nullptr); const auto& image_frame = image->GetImageFrameSharedPtr(); const MpImage mp_image = { @@ -102,11 +102,11 @@ TEST(HandLandmarkerTest, ImageModeTest) { .height = image_frame->Height()}}; HandLandmarkerResult result; - hand_landmarker_detect_image(detector, mp_image, &result, + hand_landmarker_detect_image(landmarker, mp_image, &result, /* error_msg */ nullptr); MatchesHandLandmarkerResult(&result, kScorePrecision, kLandmarkPrecision); hand_landmarker_close_result(&result); - hand_landmarker_close(detector, /* error_msg */ nullptr); + hand_landmarker_close(landmarker, /* error_msg */ nullptr); } TEST(HandLandmarkerTest, VideoModeTest) { @@ -125,8 +125,8 @@ TEST(HandLandmarkerTest, VideoModeTest) { /* min_tracking_confidence= */ 0.5, }; - void* detector = hand_landmarker_create(&options, /* error_msg */ nullptr); - EXPECT_NE(detector, nullptr); + void* landmarker = hand_landmarker_create(&options, /* error_msg */ nullptr); + EXPECT_NE(landmarker, nullptr); const auto& image_frame = image->GetImageFrameSharedPtr(); const MpImage mp_image = { @@ -138,13 +138,13 @@ TEST(HandLandmarkerTest, VideoModeTest) { for (int i = 0; i < kIterations; ++i) { HandLandmarkerResult result; - hand_landmarker_detect_for_video(detector, mp_image, i, &result, + hand_landmarker_detect_for_video(landmarker, mp_image, i, &result, /* error_msg */ nullptr); MatchesHandLandmarkerResult(&result, kScorePrecision, kLandmarkPrecision); hand_landmarker_close_result(&result); } - hand_landmarker_close(detector, /* error_msg */ nullptr); + hand_landmarker_close(landmarker, /* error_msg */ nullptr); } // A structure to support LiveStreamModeTest below. This structure holds a @@ -154,16 +154,16 @@ TEST(HandLandmarkerTest, VideoModeTest) { // timestamp is greater than the previous one. struct LiveStreamModeCallback { static int64_t last_timestamp; - static void Fn(HandLandmarkerResult* detector_result, const MpImage& image, + static void Fn(HandLandmarkerResult* landmarker_result, const MpImage& image, int64_t timestamp, char* error_msg) { - ASSERT_NE(detector_result, nullptr); + ASSERT_NE(landmarker_result, nullptr); ASSERT_EQ(error_msg, nullptr); - MatchesHandLandmarkerResult(detector_result, kScorePrecision, + MatchesHandLandmarkerResult(landmarker_result, kScorePrecision, kLandmarkPrecision); EXPECT_GT(image.image_frame.width, 0); EXPECT_GT(image.image_frame.height, 0); EXPECT_GT(timestamp, last_timestamp); - last_timestamp++; + ++last_timestamp; } }; int64_t LiveStreamModeCallback::last_timestamp = -1; @@ -186,8 +186,8 @@ TEST(HandLandmarkerTest, LiveStreamModeTest) { /* result_callback= */ LiveStreamModeCallback::Fn, }; - void* detector = hand_landmarker_create(&options, /* error_msg */ nullptr); - EXPECT_NE(detector, nullptr); + void* landmarker = hand_landmarker_create(&options, /* error_msg */ nullptr); + EXPECT_NE(landmarker, nullptr); const auto& image_frame = image->GetImageFrameSharedPtr(); const MpImage mp_image = { @@ -198,11 +198,11 @@ TEST(HandLandmarkerTest, LiveStreamModeTest) { .height = image_frame->Height()}}; for (int i = 0; i < kIterations; ++i) { - EXPECT_GE(hand_landmarker_detect_async(detector, mp_image, i, + EXPECT_GE(hand_landmarker_detect_async(landmarker, mp_image, i, /* error_msg */ nullptr), 0); } - hand_landmarker_close(detector, /* error_msg */ nullptr); + hand_landmarker_close(landmarker, /* error_msg */ nullptr); // Due to the flow limiter, the total of outputs might be smaller than the // number of iterations. @@ -224,8 +224,8 @@ TEST(HandLandmarkerTest, InvalidArgumentHandling) { }; char* error_msg; - void* detector = hand_landmarker_create(&options, &error_msg); - EXPECT_EQ(detector, nullptr); + void* landmarker = hand_landmarker_create(&options, &error_msg); + EXPECT_EQ(landmarker, nullptr); EXPECT_THAT(error_msg, HasSubstr("ExternalFile must specify")); @@ -245,17 +245,17 @@ TEST(HandLandmarkerTest, FailedRecognitionHandling) { /* min_tracking_confidence= */ 0.5, }; - void* detector = hand_landmarker_create(&options, /* error_msg */ - nullptr); - EXPECT_NE(detector, nullptr); + void* landmarker = hand_landmarker_create(&options, /* error_msg */ + nullptr); + EXPECT_NE(landmarker, nullptr); const MpImage mp_image = {.type = MpImage::GPU_BUFFER, .gpu_buffer = {}}; HandLandmarkerResult result; char* error_msg; - hand_landmarker_detect_image(detector, mp_image, &result, &error_msg); + hand_landmarker_detect_image(landmarker, mp_image, &result, &error_msg); EXPECT_THAT(error_msg, HasSubstr("GPU Buffer not supported yet")); free(error_msg); - hand_landmarker_close(detector, /* error_msg */ nullptr); + hand_landmarker_close(landmarker, /* error_msg */ nullptr); } } // namespace From 66655a15b21d1076a85491189fe03b89f57bee24 Mon Sep 17 00:00:00 2001 From: MediaPipe Team Date: Thu, 7 Dec 2023 08:17:56 -0800 Subject: [PATCH 16/39] API 2: Do not redirect from MEDIAPIPE_REGISTER_NODE to REGISTER_CALCULATOR The problem with redirecting is that the calling code has to include API 1 code (often third_party/mediapipe/framework/calculator_framework.h), even though they are only using API 2 functionality (albeit deprecated). This can lead to weird issues, e.g. MakePacket confusingly does not return a Packet, but a Packet, because it's using the API 1 version. PiperOrigin-RevId: 588798455 --- mediapipe/framework/api2/BUILD | 6 ++---- mediapipe/framework/api2/node.h | 15 ++++++++------- 2 files changed, 10 insertions(+), 11 deletions(-) diff --git a/mediapipe/framework/api2/BUILD b/mediapipe/framework/api2/BUILD index 5c5ec04ea..f7e5b3325 100644 --- a/mediapipe/framework/api2/BUILD +++ b/mediapipe/framework/api2/BUILD @@ -81,15 +81,13 @@ cc_library( srcs = ["node.cc"], hdrs = ["node.h"], deps = [ - ":const_str", ":contract", - ":packet", - ":port", "//mediapipe/framework:calculator_base", "//mediapipe/framework:calculator_context", "//mediapipe/framework:calculator_contract", "//mediapipe/framework:subgraph", - "//mediapipe/framework/deps:no_destructor", + "//mediapipe/framework/deps:registration", + "@com_google_absl//absl/status", ], ) diff --git a/mediapipe/framework/api2/node.h b/mediapipe/framework/api2/node.h index 58cebf1ea..6d5e3da71 100644 --- a/mediapipe/framework/api2/node.h +++ b/mediapipe/framework/api2/node.h @@ -1,17 +1,15 @@ #ifndef MEDIAPIPE_FRAMEWORK_API2_NODE_H_ #define MEDIAPIPE_FRAMEWORK_API2_NODE_H_ -#include -#include +#include +#include -#include "mediapipe/framework/api2/const_str.h" +#include "absl/status/status.h" #include "mediapipe/framework/api2/contract.h" -#include "mediapipe/framework/api2/packet.h" -#include "mediapipe/framework/api2/port.h" #include "mediapipe/framework/calculator_base.h" #include "mediapipe/framework/calculator_context.h" #include "mediapipe/framework/calculator_contract.h" -#include "mediapipe/framework/deps/no_destructor.h" +#include "mediapipe/framework/deps/registration.h" #include "mediapipe/framework/subgraph.h" namespace mediapipe { @@ -178,7 +176,10 @@ class SubgraphImpl : public Subgraph, absl::make_unique>) // This macro is used to register a non-split-contract calculator. Deprecated. -#define MEDIAPIPE_REGISTER_NODE(name) REGISTER_CALCULATOR(name) +#define MEDIAPIPE_REGISTER_NODE(name) \ + MEDIAPIPE_REGISTER_FACTORY_FUNCTION_QUALIFIED( \ + mediapipe::CalculatorBaseRegistry, calculator_registration, #name, \ + absl::make_unique>) // This macro is used to define a subgraph that does not use automatic // registration. Deprecated. From 0a77b8c57bdfe91b4f194ab730548cc2f9bfd290 Mon Sep 17 00:00:00 2001 From: Jiuqiang Tang Date: Thu, 7 Dec 2023 10:02:18 -0800 Subject: [PATCH 17/39] No public description PiperOrigin-RevId: 588827865 --- .../java/com/google/mediapipe/solutioncore/SolutionBase.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mediapipe/java/com/google/mediapipe/solutioncore/SolutionBase.java b/mediapipe/java/com/google/mediapipe/solutioncore/SolutionBase.java index 6dbc11fd5..cdf11d85f 100644 --- a/mediapipe/java/com/google/mediapipe/solutioncore/SolutionBase.java +++ b/mediapipe/java/com/google/mediapipe/solutioncore/SolutionBase.java @@ -27,8 +27,8 @@ import com.google.mediapipe.framework.Graph; import com.google.mediapipe.framework.MediaPipeException; import com.google.mediapipe.framework.Packet; import com.google.mediapipe.framework.PacketGetter; -import com.google.mediapipe.solutioncore.logging.SolutionStatsLogger; import com.google.mediapipe.solutioncore.logging.SolutionStatsDummyLogger; +import com.google.mediapipe.solutioncore.logging.SolutionStatsLogger; import com.google.protobuf.Parser; import java.io.File; import java.util.List; From 20743b811051ee99303ef7c098d08cbbaaee74e4 Mon Sep 17 00:00:00 2001 From: Sebastian Schmidt Date: Thu, 7 Dec 2023 13:25:03 -0800 Subject: [PATCH 18/39] Update MediaPipe development version to 0.10.9 PiperOrigin-RevId: 588890763 --- version.bzl | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/version.bzl b/version.bzl index c5d7d4d83..24048e1a2 100644 --- a/version.bzl +++ b/version.bzl @@ -1,3 +1,5 @@ """Version number for MediaPipe""" -MEDIAPIPE_FULL_VERSION = "0.10.8" +# The next version of MediaPipe (e.g. the version that is currently in development). +# This version should be bumped after every release. +MEDIAPIPE_FULL_VERSION = "0.10.9" From 4e78e645d060abd194e87ec495cf4c7134aba7e0 Mon Sep 17 00:00:00 2001 From: Dmitri Gribenko Date: Fri, 8 Dec 2023 16:58:17 -0800 Subject: [PATCH 19/39] No public description PiperOrigin-RevId: 589279414 --- mediapipe/framework/deps/strong_int.h | 33 ++++++++++++++------------- 1 file changed, 17 insertions(+), 16 deletions(-) diff --git a/mediapipe/framework/deps/strong_int.h b/mediapipe/framework/deps/strong_int.h index b4bfef770..d3a0f77e2 100644 --- a/mediapipe/framework/deps/strong_int.h +++ b/mediapipe/framework/deps/strong_int.h @@ -31,14 +31,14 @@ // A StrongInt with a NullStrongIntValidator should compile away to a raw T // in optimized mode. What this means is that the generated assembly for: // -// int64 foo = 123; -// int64 bar = 456; -// int64 baz = foo + bar; -// constexpr int64 fubar = 789; +// int64_t foo = 123; +// int64_t bar = 456; +// int64_t baz = foo + bar; +// constexpr int64_t fubar = 789; // // ...should be identical to the generated assembly for: // -// DEFINE_STRONG_INT_TYPE(MyStrongInt, int64); +// DEFINE_STRONG_INT_TYPE(MyStrongInt, int64_t); // MyStrongInt foo(123); // MyStrongInt bar(456); // MyStrongInt baz = foo + bar; @@ -97,6 +97,7 @@ #ifndef MEDIAPIPE_DEPS_STRONG_INT_H_ #define MEDIAPIPE_DEPS_STRONG_INT_H_ +#include #include #include #include @@ -179,11 +180,11 @@ struct NullStrongIntValidator { } // Verify lhs << rhs. template - static void ValidateLeftShift(T lhs, int64 rhs) { /* do nothing */ + static void ValidateLeftShift(T lhs, int64_t rhs) { /* do nothing */ } // Verify lhs >> rhs. template - static void ValidateRightShift(T lhs, int64 rhs) { /* do nothing */ + static void ValidateRightShift(T lhs, int64_t rhs) { /* do nothing */ } // Verify lhs & rhs. template @@ -224,8 +225,8 @@ class StrongInt { // // Example: Assume you have two StrongInt types. // - // DEFINE_STRONG_INT_TYPE(Bytes, int64); - // DEFINE_STRONG_INT_TYPE(Megabytes, int64); + // DEFINE_STRONG_INT_TYPE(Bytes, int64_t); + // DEFINE_STRONG_INT_TYPE(Megabytes, int64_t); // // If you want to be able to (explicitly) construct an instance of Bytes from // an instance of Megabytes, simply define a converter function in the same @@ -337,12 +338,12 @@ class StrongInt { value_ %= arg; return *this; } - StrongInt &operator<<=(int64 arg) { // NOLINT(whitespace/operators) + StrongInt &operator<<=(int64_t arg) { // NOLINT(whitespace/operators) ValidatorType::template ValidateLeftShift(value_, arg); value_ <<= arg; return *this; } - StrongInt &operator>>=(int64 arg) { // NOLINT(whitespace/operators) + StrongInt &operator>>=(int64_t arg) { // NOLINT(whitespace/operators) ValidatorType::template ValidateRightShift(value_, arg); value_ >>= arg; return *this; @@ -378,19 +379,19 @@ std::ostream &operator<<(std::ostream &os, return os << arg.value(); } -// Provide the << operator, primarily for logging purposes. Specialized for int8 -// so that an integer and not a character is printed. +// Provide the << operator, primarily for logging purposes. Specialized for +// int8_t so that an integer and not a character is printed. template std::ostream &operator<<(std::ostream &os, - StrongInt arg) { + StrongInt arg) { return os << static_cast(arg.value()); } // Provide the << operator, primarily for logging purposes. Specialized for -// uint8 so that an integer and not a character is printed. +// uint8_t so that an integer and not a character is printed. template std::ostream &operator<<(std::ostream &os, - StrongInt arg) { + StrongInt arg) { return os << static_cast(arg.value()); } From 61efcf5a11aa6eeb571084fe8e206d1e4ce747c2 Mon Sep 17 00:00:00 2001 From: MediaPipe Team Date: Fri, 8 Dec 2023 17:21:02 -0800 Subject: [PATCH 20/39] internal-only change PiperOrigin-RevId: 589284167 --- mediapipe/framework/tool/BUILD | 2 +- mediapipe/util/BUILD | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/mediapipe/framework/tool/BUILD b/mediapipe/framework/tool/BUILD index 7a4b5a112..2376aebb7 100644 --- a/mediapipe/framework/tool/BUILD +++ b/mediapipe/framework/tool/BUILD @@ -13,13 +13,13 @@ # limitations under the License. # +load("//mediapipe/framework:mediapipe_cc_test.bzl", "mediapipe_cc_test") load("//mediapipe/framework/port:build_config.bzl", "mediapipe_proto_library") load( "//mediapipe/framework/tool:mediapipe_graph.bzl", "data_as_c_string", "mediapipe_binary_graph", ) -load("//mediapipe/framework:mediapipe_cc_test.bzl", "mediapipe_cc_test") load("@bazel_skylib//:bzl_library.bzl", "bzl_library") licenses(["notice"]) diff --git a/mediapipe/util/BUILD b/mediapipe/util/BUILD index 0316224f7..fdeefb49e 100644 --- a/mediapipe/util/BUILD +++ b/mediapipe/util/BUILD @@ -13,8 +13,8 @@ # limitations under the License. # Placeholder: load py_library -load("//mediapipe/framework/port:build_config.bzl", "mediapipe_proto_library") load("//mediapipe/framework:mediapipe_cc_test.bzl", "mediapipe_cc_test") +load("//mediapipe/framework/port:build_config.bzl", "mediapipe_proto_library") licenses(["notice"]) From fec4dff0d66f1700ba31c948808b7b129a822a26 Mon Sep 17 00:00:00 2001 From: Kinar Date: Mon, 11 Dec 2023 04:58:15 -0800 Subject: [PATCH 21/39] Added Face Landmarker C Tasks API and tests --- mediapipe/tasks/c/components/containers/BUILD | 26 ++ .../tasks/c/components/containers/matrix.h | 41 +++ .../components/containers/matrix_converter.cc | 45 +++ .../components/containers/matrix_converter.h | 30 ++ .../containers/matrix_converter_test.cc | 52 ++++ .../tasks/c/vision/face_landmarker/BUILD | 148 +++++++++ .../vision/face_landmarker/face_landmarker.cc | 287 +++++++++++++++++ .../vision/face_landmarker/face_landmarker.h | 156 ++++++++++ .../face_landmarker/face_landmarker_result.h | 59 ++++ .../face_landmarker_result_converter.cc | 116 +++++++ .../face_landmarker_result_converter.h | 32 ++ .../face_landmarker_result_converter_test.cc | 157 ++++++++++ .../face_landmarker/face_landmarker_test.cc | 291 ++++++++++++++++++ mediapipe/tasks/testdata/vision/BUILD | 2 + 14 files changed, 1442 insertions(+) create mode 100644 mediapipe/tasks/c/components/containers/matrix.h create mode 100644 mediapipe/tasks/c/components/containers/matrix_converter.cc create mode 100644 mediapipe/tasks/c/components/containers/matrix_converter.h create mode 100644 mediapipe/tasks/c/components/containers/matrix_converter_test.cc create mode 100644 mediapipe/tasks/c/vision/face_landmarker/BUILD create mode 100644 mediapipe/tasks/c/vision/face_landmarker/face_landmarker.cc create mode 100644 mediapipe/tasks/c/vision/face_landmarker/face_landmarker.h create mode 100644 mediapipe/tasks/c/vision/face_landmarker/face_landmarker_result.h create mode 100644 mediapipe/tasks/c/vision/face_landmarker/face_landmarker_result_converter.cc create mode 100644 mediapipe/tasks/c/vision/face_landmarker/face_landmarker_result_converter.h create mode 100644 mediapipe/tasks/c/vision/face_landmarker/face_landmarker_result_converter_test.cc create mode 100644 mediapipe/tasks/c/vision/face_landmarker/face_landmarker_test.cc diff --git a/mediapipe/tasks/c/components/containers/BUILD b/mediapipe/tasks/c/components/containers/BUILD index 3c4b557b3..510bdcd81 100644 --- a/mediapipe/tasks/c/components/containers/BUILD +++ b/mediapipe/tasks/c/components/containers/BUILD @@ -43,6 +43,32 @@ cc_test( ], ) +cc_library( + name = "matrix", + hdrs = ["matrix.h"], +) + +cc_library( + name = "matrix_converter", + srcs = ["matrix_converter.cc"], + hdrs = ["matrix_converter.h"], + deps = [ + ":matrix", + "@eigen_archive//:eigen3", + ], +) + +cc_test( + name = "matrix_converter_test", + srcs = ["matrix_converter_test.cc"], + deps = [ + ":matrix", + ":matrix_converter", + "//mediapipe/framework/port:gtest", + "@com_google_googletest//:gtest_main", + ], +) + cc_library( name = "landmark", hdrs = ["landmark.h"], diff --git a/mediapipe/tasks/c/components/containers/matrix.h b/mediapipe/tasks/c/components/containers/matrix.h new file mode 100644 index 000000000..71ec7474c --- /dev/null +++ b/mediapipe/tasks/c/components/containers/matrix.h @@ -0,0 +1,41 @@ +/* Copyright 2023 The MediaPipe Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef MEDIAPIPE_TASKS_C_COMPONENTS_CONTAINERS_MATRIX_H_ +#define MEDIAPIPE_TASKS_C_COMPONENTS_CONTAINERS_MATRIX_H_ + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +// Data are stored in column-major order by default. +struct Matrix { + // The number of rows in the matrix. + long rows; + + // The number of rows in the matrix. + long cols; + + // The matrix data. + float* data; +}; + +#ifdef __cplusplus +} // extern C +#endif + +#endif // MEDIAPIPE_TASKS_C_COMPONENTS_CONTAINERS_MATRIX_H_ diff --git a/mediapipe/tasks/c/components/containers/matrix_converter.cc b/mediapipe/tasks/c/components/containers/matrix_converter.cc new file mode 100644 index 000000000..6d823a424 --- /dev/null +++ b/mediapipe/tasks/c/components/containers/matrix_converter.cc @@ -0,0 +1,45 @@ +/* Copyright 2023 The MediaPipe Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "mediapipe/tasks/c/components/containers/matrix_converter.h" + +#include + +#include "Eigen/Core" +#include "mediapipe/tasks/c/components/containers/matrix.h" + +namespace mediapipe::tasks::c::components::containers { + +void CppConvertToMatrix(const Eigen::MatrixXf& in, ::Matrix* out) { + out->rows = in.rows(); + out->cols = in.cols(); + out->data = new float[out->rows * out->cols]; + + // Copy data from Eigen matrix to C matrix in column-major order + for (int col = 0; col < out->cols; ++col) { + for (int row = 0; row < out->rows; ++row) { + out->data[col * out->rows + row] = in(row, col); + } + } +} + +void CppCloseMatrix(::Matrix* in) { + if (in->data) { + delete[] in->data; + in->data = nullptr; + } +} + +} // namespace mediapipe::tasks::c::components::containers diff --git a/mediapipe/tasks/c/components/containers/matrix_converter.h b/mediapipe/tasks/c/components/containers/matrix_converter.h new file mode 100644 index 000000000..cc92f5c98 --- /dev/null +++ b/mediapipe/tasks/c/components/containers/matrix_converter.h @@ -0,0 +1,30 @@ +/* Copyright 2023 The MediaPipe Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef MEDIAPIPE_TASKS_C_COMPONENTS_CONTAINERS_MATRIX_CONVERTER_H_ +#define MEDIAPIPE_TASKS_C_COMPONENTS_CONTAINERS_MATRIX_CONVERTER_H_ + +#include "Eigen/Core" +#include "mediapipe/tasks/c/components/containers/matrix.h" + +namespace mediapipe::tasks::c::components::containers { + +void CppConvertToMatrix(const Eigen::MatrixXf& in, ::Matrix* out); + +void CppCloseMatrix(::Matrix* data); + +} // namespace mediapipe::tasks::c::components::containers + +#endif // MEDIAPIPE_TASKS_C_COMPONENTS_CONTAINERS_MATRIX_CONVERTER_H_ diff --git a/mediapipe/tasks/c/components/containers/matrix_converter_test.cc b/mediapipe/tasks/c/components/containers/matrix_converter_test.cc new file mode 100644 index 000000000..01248203e --- /dev/null +++ b/mediapipe/tasks/c/components/containers/matrix_converter_test.cc @@ -0,0 +1,52 @@ +/* Copyright 2023 The MediaPipe Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "mediapipe/tasks/c/components/containers/matrix_converter.h" + +#include +#include +#include + +#include "mediapipe/framework/port/gtest.h" +#include "mediapipe/tasks/c/components/containers/matrix.h" + +namespace mediapipe::tasks::c::components::containers { + +TEST(MatrixConversionTest, ConvertsEigenMatrixToCMatrixAndFreesMemory) { + // Initialize an Eigen::MatrixXf + Eigen::MatrixXf cpp_matrix(2, 2); + cpp_matrix << 1.0f, 2.0f, 3.0f, 4.0f; + + // Convert this Eigen matrix to C-style Matrix + ::Matrix c_matrix; + CppConvertToMatrix(cpp_matrix, &c_matrix); + + // Verify the conversion + EXPECT_EQ(c_matrix.rows, 2); + EXPECT_EQ(c_matrix.cols, 2); + ASSERT_NE(c_matrix.data, nullptr); + EXPECT_FLOAT_EQ(c_matrix.data[0], 1.0f); + EXPECT_FLOAT_EQ(c_matrix.data[1], 2.0f); + EXPECT_FLOAT_EQ(c_matrix.data[2], 3.0f); + EXPECT_FLOAT_EQ(c_matrix.data[3], 4.0f); + + // Close the C-style Matrix + CppCloseMatrix(&c_matrix); + + // Verify that memory is freed + EXPECT_EQ(c_matrix.data, nullptr); +} + +} // namespace mediapipe::tasks::c::components::containers diff --git a/mediapipe/tasks/c/vision/face_landmarker/BUILD b/mediapipe/tasks/c/vision/face_landmarker/BUILD new file mode 100644 index 000000000..083bfa592 --- /dev/null +++ b/mediapipe/tasks/c/vision/face_landmarker/BUILD @@ -0,0 +1,148 @@ +# Copyright 2023 The MediaPipe Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +package(default_visibility = ["//mediapipe/tasks:internal"]) + +licenses(["notice"]) + +cc_library( + name = "face_landmarker_result", + hdrs = ["face_landmarker_result.h"], + visibility = ["//visibility:public"], + deps = [ + "//mediapipe/tasks/c/components/containers:category", + "//mediapipe/tasks/c/components/containers:landmark", + "//mediapipe/tasks/c/components/containers:matrix", + ], +) + +cc_library( + name = "face_landmarker_result_converter", + srcs = ["face_landmarker_result_converter.cc"], + hdrs = ["face_landmarker_result_converter.h"], + deps = [ + ":face_landmarker_result", + "//mediapipe/tasks/c/components/containers:category", + "//mediapipe/tasks/c/components/containers:category_converter", + "//mediapipe/tasks/c/components/containers:landmark", + "//mediapipe/tasks/c/components/containers:landmark_converter", + "//mediapipe/tasks/c/components/containers:matrix", + "//mediapipe/tasks/c/components/containers:matrix_converter", + "//mediapipe/tasks/cc/components/containers:category", + "//mediapipe/tasks/cc/components/containers:landmark", + "//mediapipe/tasks/cc/vision/face_landmarker:face_landmarker_result", + ], +) + +cc_test( + name = "face_landmarker_result_converter_test", + srcs = ["face_landmarker_result_converter_test.cc"], + linkstatic = 1, + deps = [ + ":face_landmarker_result", + ":face_landmarker_result_converter", + "//mediapipe/framework/port:gtest", + "//mediapipe/framework/formats:matrix", + "//mediapipe/tasks/c/components/containers:landmark", + "//mediapipe/tasks/cc/components/containers:category", + "//mediapipe/tasks/cc/components/containers:classification_result", + "//mediapipe/tasks/cc/components/containers:landmark", + "//mediapipe/tasks/cc/vision/face_landmarker:face_landmarker_result", + "@com_google_googletest//:gtest_main", + ], +) + +cc_library( + name = "face_landmarker_lib", + srcs = ["face_landmarker.cc"], + hdrs = ["face_landmarker.h"], + visibility = ["//visibility:public"], + deps = [ + ":face_landmarker_result", + ":face_landmarker_result_converter", + "//mediapipe/framework/formats:image", + "//mediapipe/framework/formats:image_frame", + "//mediapipe/tasks/c/core:base_options", + "//mediapipe/tasks/c/core:base_options_converter", + "//mediapipe/tasks/c/vision/core:common", + "//mediapipe/tasks/cc/vision/core:running_mode", + "//mediapipe/tasks/cc/vision/face_landmarker", + "//mediapipe/tasks/cc/vision/face_landmarker:face_landmarker_result", + "//mediapipe/tasks/cc/vision/utils:image_utils", + "@com_google_absl//absl/log:absl_log", + "@com_google_absl//absl/status", + "@com_google_absl//absl/status:statusor", + ], + alwayslink = 1, +) + +cc_test( + name = "face_landmarker_test", + srcs = ["face_landmarker_test.cc"], + data = [ + "//mediapipe/framework/formats:image_frame_opencv", + "//mediapipe/framework/port:opencv_core", + "//mediapipe/framework/port:opencv_imgproc", + "//mediapipe/tasks/testdata/vision:test_images", + "//mediapipe/tasks/testdata/vision:test_models", + ], + linkstatic = 1, + deps = [ + ":face_landmarker_lib", + ":face_landmarker_result", + "//mediapipe/framework/deps:file_path", + "//mediapipe/framework/formats:image", + "//mediapipe/framework/port:gtest", + "//mediapipe/tasks/c/components/containers:landmark", + "//mediapipe/tasks/c/vision/core:common", + "//mediapipe/tasks/cc/vision/utils:image_utils", + "@com_google_absl//absl/flags:flag", + "@com_google_absl//absl/strings", + "@com_google_googletest//:gtest_main", + ], +) + +# bazel build -c opt --linkopt -s --strip always --define MEDIAPIPE_DISABLE_GPU=1 \ +# //mediapipe/tasks/c/vision/face_landmarker:libface_landmarker.so +cc_binary( + name = "libface_landmarker.so", + linkopts = [ + "-Wl,-soname=libface_landmarker.so", + "-fvisibility=hidden", + ], + linkshared = True, + tags = [ + "manual", + "nobuilder", + "notap", + ], + deps = [":face_landmarker_lib"], +) + +# bazel build --config darwin_arm64 -c opt --strip always --define MEDIAPIPE_DISABLE_GPU=1 \ +# //mediapipe/tasks/c/vision/face_landmarker:libface_landmarker.dylib +cc_binary( + name = "libface_landmarker.dylib", + linkopts = [ + "-Wl,-install_name,libface_landmarker.dylib", + "-fvisibility=hidden", + ], + linkshared = True, + tags = [ + "manual", + "nobuilder", + "notap", + ], + deps = [":face_landmarker_lib"], +) diff --git a/mediapipe/tasks/c/vision/face_landmarker/face_landmarker.cc b/mediapipe/tasks/c/vision/face_landmarker/face_landmarker.cc new file mode 100644 index 000000000..47f26a120 --- /dev/null +++ b/mediapipe/tasks/c/vision/face_landmarker/face_landmarker.cc @@ -0,0 +1,287 @@ +/* Copyright 2023 The MediaPipe Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "mediapipe/tasks/c/vision/face_landmarker/face_landmarker.h" + +#include +#include +#include +#include + +#include "absl/log/absl_log.h" +#include "absl/status/status.h" +#include "absl/status/statusor.h" +#include "mediapipe/framework/formats/image.h" +#include "mediapipe/framework/formats/image_frame.h" +#include "mediapipe/tasks/c/core/base_options_converter.h" +#include "mediapipe/tasks/c/vision/core/common.h" +#include "mediapipe/tasks/c/vision/face_landmarker/face_landmarker_result.h" +#include "mediapipe/tasks/c/vision/face_landmarker/face_landmarker_result_converter.h" +#include "mediapipe/tasks/cc/vision/core/running_mode.h" +#include "mediapipe/tasks/cc/vision/face_landmarker/face_landmarker.h" +#include "mediapipe/tasks/cc/vision/face_landmarker/face_landmarker_result.h" +#include "mediapipe/tasks/cc/vision/utils/image_utils.h" + +namespace mediapipe::tasks::c::vision::face_landmarker { + +namespace { + +using ::mediapipe::tasks::c::components::containers:: + CppCloseFaceLandmarkerResult; +using ::mediapipe::tasks::c::components::containers:: + CppConvertToFaceLandmarkerResult; +using ::mediapipe::tasks::c::core::CppConvertToBaseOptions; +using ::mediapipe::tasks::vision::CreateImageFromBuffer; +using ::mediapipe::tasks::vision::core::RunningMode; +using ::mediapipe::tasks::vision::face_landmarker::FaceLandmarker; +typedef ::mediapipe::tasks::vision::face_landmarker::FaceLandmarkerResult + CppFaceLandmarkerResult; + +int CppProcessError(absl::Status status, char** error_msg) { + if (error_msg) { + *error_msg = strdup(status.ToString().c_str()); + } + return status.raw_code(); +} + +} // namespace + +void CppConvertToFaceLandmarkerOptions( + const FaceLandmarkerOptions& in, + mediapipe::tasks::vision::face_landmarker::FaceLandmarkerOptions* out) { + out->num_faces = in.num_faces; + out->min_face_detection_confidence = in.min_face_detection_confidence; + out->min_face_presence_confidence = in.min_face_presence_confidence; + out->min_tracking_confidence = in.min_tracking_confidence; + out->output_face_blendshapes = in.output_face_blendshapes; + out->output_facial_transformation_matrixes = + in.output_facial_transformation_matrixes; +} + +FaceLandmarker* CppFaceLandmarkerCreate(const FaceLandmarkerOptions& options, + char** error_msg) { + auto cpp_options = std::make_unique< + ::mediapipe::tasks::vision::face_landmarker::FaceLandmarkerOptions>(); + + CppConvertToBaseOptions(options.base_options, &cpp_options->base_options); + CppConvertToFaceLandmarkerOptions(options, cpp_options.get()); + cpp_options->running_mode = static_cast(options.running_mode); + + // Enable callback for processing live stream data when the running mode is + // set to RunningMode::LIVE_STREAM. + if (cpp_options->running_mode == RunningMode::LIVE_STREAM) { + if (options.result_callback == nullptr) { + const absl::Status status = absl::InvalidArgumentError( + "Provided null pointer to callback function."); + ABSL_LOG(ERROR) << "Failed to create FaceLandmarker: " << status; + CppProcessError(status, error_msg); + return nullptr; + } + + FaceLandmarkerOptions::result_callback_fn result_callback = + options.result_callback; + cpp_options->result_callback = + [result_callback](absl::StatusOr cpp_result, + const Image& image, int64_t timestamp) { + char* error_msg = nullptr; + + if (!cpp_result.ok()) { + ABSL_LOG(ERROR) << "Recognition failed: " << cpp_result.status(); + CppProcessError(cpp_result.status(), &error_msg); + result_callback(nullptr, MpImage(), timestamp, error_msg); + free(error_msg); + return; + } + + // Result is valid for the lifetime of the callback function. + FaceLandmarkerResult result; + CppConvertToFaceLandmarkerResult(*cpp_result, &result); + + const auto& image_frame = image.GetImageFrameSharedPtr(); + const MpImage mp_image = { + .type = MpImage::IMAGE_FRAME, + .image_frame = { + .format = static_cast<::ImageFormat>(image_frame->Format()), + .image_buffer = image_frame->PixelData(), + .width = image_frame->Width(), + .height = image_frame->Height()}}; + + result_callback(&result, mp_image, timestamp, + /* error_msg= */ nullptr); + + CppCloseFaceLandmarkerResult(&result); + }; + } + + auto landmarker = FaceLandmarker::Create(std::move(cpp_options)); + if (!landmarker.ok()) { + ABSL_LOG(ERROR) << "Failed to create FaceLandmarker: " + << landmarker.status(); + CppProcessError(landmarker.status(), error_msg); + return nullptr; + } + return landmarker->release(); +} + +int CppFaceLandmarkerDetect(void* landmarker, const MpImage& image, + FaceLandmarkerResult* result, char** error_msg) { + if (image.type == MpImage::GPU_BUFFER) { + const absl::Status status = + absl::InvalidArgumentError("GPU Buffer not supported yet."); + + ABSL_LOG(ERROR) << "Recognition failed: " << status.message(); + return CppProcessError(status, error_msg); + } + + const auto img = CreateImageFromBuffer( + static_cast(image.image_frame.format), + image.image_frame.image_buffer, image.image_frame.width, + image.image_frame.height); + + if (!img.ok()) { + ABSL_LOG(ERROR) << "Failed to create Image: " << img.status(); + return CppProcessError(img.status(), error_msg); + } + + auto cpp_landmarker = static_cast(landmarker); + auto cpp_result = cpp_landmarker->Detect(*img); + if (!cpp_result.ok()) { + ABSL_LOG(ERROR) << "Recognition failed: " << cpp_result.status(); + return CppProcessError(cpp_result.status(), error_msg); + } + CppConvertToFaceLandmarkerResult(*cpp_result, result); + return 0; +} + +int CppFaceLandmarkerDetectForVideo(void* landmarker, const MpImage& image, + int64_t timestamp_ms, + FaceLandmarkerResult* result, + char** error_msg) { + if (image.type == MpImage::GPU_BUFFER) { + absl::Status status = + absl::InvalidArgumentError("GPU Buffer not supported yet"); + + ABSL_LOG(ERROR) << "Recognition failed: " << status.message(); + return CppProcessError(status, error_msg); + } + + const auto img = CreateImageFromBuffer( + static_cast(image.image_frame.format), + image.image_frame.image_buffer, image.image_frame.width, + image.image_frame.height); + + if (!img.ok()) { + ABSL_LOG(ERROR) << "Failed to create Image: " << img.status(); + return CppProcessError(img.status(), error_msg); + } + + auto cpp_landmarker = static_cast(landmarker); + auto cpp_result = cpp_landmarker->DetectForVideo(*img, timestamp_ms); + if (!cpp_result.ok()) { + ABSL_LOG(ERROR) << "Recognition failed: " << cpp_result.status(); + return CppProcessError(cpp_result.status(), error_msg); + } + CppConvertToFaceLandmarkerResult(*cpp_result, result); + return 0; +} + +int CppFaceLandmarkerDetectAsync(void* landmarker, const MpImage& image, + int64_t timestamp_ms, char** error_msg) { + if (image.type == MpImage::GPU_BUFFER) { + absl::Status status = + absl::InvalidArgumentError("GPU Buffer not supported yet"); + + ABSL_LOG(ERROR) << "Recognition failed: " << status.message(); + return CppProcessError(status, error_msg); + } + + const auto img = CreateImageFromBuffer( + static_cast(image.image_frame.format), + image.image_frame.image_buffer, image.image_frame.width, + image.image_frame.height); + + if (!img.ok()) { + ABSL_LOG(ERROR) << "Failed to create Image: " << img.status(); + return CppProcessError(img.status(), error_msg); + } + + auto cpp_landmarker = static_cast(landmarker); + auto cpp_result = cpp_landmarker->DetectAsync(*img, timestamp_ms); + if (!cpp_result.ok()) { + ABSL_LOG(ERROR) << "Data preparation for the landmark detection failed: " + << cpp_result; + return CppProcessError(cpp_result, error_msg); + } + return 0; +} + +void CppFaceLandmarkerCloseResult(FaceLandmarkerResult* result) { + CppCloseFaceLandmarkerResult(result); +} + +int CppFaceLandmarkerClose(void* landmarker, char** error_msg) { + auto cpp_landmarker = static_cast(landmarker); + auto result = cpp_landmarker->Close(); + if (!result.ok()) { + ABSL_LOG(ERROR) << "Failed to close FaceLandmarker: " << result; + return CppProcessError(result, error_msg); + } + delete cpp_landmarker; + return 0; +} + +} // namespace mediapipe::tasks::c::vision::face_landmarker + +extern "C" { + +void* face_landmarker_create(struct FaceLandmarkerOptions* options, + char** error_msg) { + return mediapipe::tasks::c::vision::face_landmarker::CppFaceLandmarkerCreate( + *options, error_msg); +} + +int face_landmarker_detect_image(void* landmarker, const MpImage& image, + FaceLandmarkerResult* result, + char** error_msg) { + return mediapipe::tasks::c::vision::face_landmarker::CppFaceLandmarkerDetect( + landmarker, image, result, error_msg); +} + +int face_landmarker_detect_for_video(void* landmarker, const MpImage& image, + int64_t timestamp_ms, + FaceLandmarkerResult* result, + char** error_msg) { + return mediapipe::tasks::c::vision::face_landmarker:: + CppFaceLandmarkerDetectForVideo(landmarker, image, timestamp_ms, result, + error_msg); +} + +int face_landmarker_detect_async(void* landmarker, const MpImage& image, + int64_t timestamp_ms, char** error_msg) { + return mediapipe::tasks::c::vision::face_landmarker:: + CppFaceLandmarkerDetectAsync(landmarker, image, timestamp_ms, error_msg); +} + +void face_landmarker_close_result(FaceLandmarkerResult* result) { + mediapipe::tasks::c::vision::face_landmarker::CppFaceLandmarkerCloseResult( + result); +} + +int face_landmarker_close(void* landmarker, char** error_ms) { + return mediapipe::tasks::c::vision::face_landmarker::CppFaceLandmarkerClose( + landmarker, error_ms); +} + +} // extern "C" diff --git a/mediapipe/tasks/c/vision/face_landmarker/face_landmarker.h b/mediapipe/tasks/c/vision/face_landmarker/face_landmarker.h new file mode 100644 index 000000000..6256e9ce7 --- /dev/null +++ b/mediapipe/tasks/c/vision/face_landmarker/face_landmarker.h @@ -0,0 +1,156 @@ +/* Copyright 2023 The MediaPipe Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef MEDIAPIPE_TASKS_C_VISION_FACE_LANDMARKER_FACE_LANDMARKER_H_ +#define MEDIAPIPE_TASKS_C_VISION_FACE_LANDMARKER_FACE_LANDMARKER_H_ + +#include "mediapipe/tasks/c/core/base_options.h" +#include "mediapipe/tasks/c/vision/core/common.h" +#include "mediapipe/tasks/c/vision/face_landmarker/face_landmarker_result.h" + +#ifndef MP_EXPORT +#define MP_EXPORT __attribute__((visibility("default"))) +#endif // MP_EXPORT + +#ifdef __cplusplus +extern "C" { +#endif + +// The options for configuring a MediaPipe face landmarker task. +struct FaceLandmarkerOptions { + // Base options for configuring MediaPipe Tasks, such as specifying the model + // file with metadata, accelerator options, op resolver, etc. + struct BaseOptions base_options; + + // The running mode of the task. Default to the image mode. + // FaceLandmarker has three running modes: + // 1) The image mode for recognizing face landmarks on single image inputs. + // 2) The video mode for recognizing face landmarks on the decoded frames of a + // video. + // 3) The live stream mode for recognizing face landmarks on the live stream + // of input data, such as from camera. In this mode, the "result_callback" + // below must be specified to receive the detection results asynchronously. + RunningMode running_mode; + + // The maximum number of faces can be detected by the FaceLandmarker. + int num_faces = 1; + + // The minimum confidence score for the face detection to be considered + // successful. + float min_face_detection_confidence = 0.5; + + // The minimum confidence score of face presence score in the face landmark + // detection. + float min_face_presence_confidence = 0.5; + + // The minimum confidence score for the face tracking to be considered + // successful. + float min_tracking_confidence = 0.5; + + // Whether FaceLandmarker outputs face blendshapes classification. Face + // blendshapes are used for rendering the 3D face model. + bool output_face_blendshapes = false; + + // Whether FaceLandmarker outputs facial transformation_matrix. Facial + // transformation matrix is used to transform the face landmarks in canonical + // face to the detected face, so that users can apply face effects on the + // detected landmarks. + bool output_facial_transformation_matrixes = false; + + // The user-defined result callback for processing live stream data. + // The result callback should only be specified when the running mode is set + // to RunningMode::LIVE_STREAM. Arguments of the callback function include: + // the pointer to recognition result, the image that result was obtained + // on, the timestamp relevant to recognition results and pointer to error + // message in case of any failure. The validity of the passed arguments is + // true for the lifetime of the callback function. + // + // A caller is responsible for closing face landmarker result. + typedef void (*result_callback_fn)(FaceLandmarkerResult* result, + const MpImage& image, int64_t timestamp_ms, + char* error_msg); + result_callback_fn result_callback; +}; + +// Creates an FaceLandmarker from the provided `options`. +// Returns a pointer to the face landmarker on success. +// If an error occurs, returns `nullptr` and sets the error parameter to an +// an error message (if `error_msg` is not `nullptr`). You must free the memory +// allocated for the error message. +MP_EXPORT void* face_landmarker_create(struct FaceLandmarkerOptions* options, + char** error_msg); + +// Performs face landmark detection on the input `image`. Returns `0` on +// success. If an error occurs, returns an error code and sets the error +// parameter to an an error message (if `error_msg` is not `nullptr`). You must +// free the memory allocated for the error message. +MP_EXPORT int face_landmarker_detect_image(void* landmarker, + const MpImage& image, + FaceLandmarkerResult* result, + char** error_msg); + +// Performs face landmark detection on the provided video frame. +// Only use this method when the FaceLandmarker is created with the video +// running mode. +// The image can be of any size with format RGB or RGBA. It's required to +// provide the video frame's timestamp (in milliseconds). The input timestamps +// must be monotonically increasing. +// If an error occurs, returns an error code and sets the error parameter to an +// an error message (if `error_msg` is not `nullptr`). You must free the memory +// allocated for the error message. +MP_EXPORT int face_landmarker_detect_for_video(void* landmarker, + const MpImage& image, + int64_t timestamp_ms, + FaceLandmarkerResult* result, + char** error_msg); + +// Sends live image data to face landmark detection, and the results will be +// available via the `result_callback` provided in the FaceLandmarkerOptions. +// Only use this method when the FaceLandmarker is created with the live +// stream running mode. +// The image can be of any size with format RGB or RGBA. It's required to +// provide a timestamp (in milliseconds) to indicate when the input image is +// sent to the face landmarker. The input timestamps must be monotonically +// increasing. +// The `result_callback` provides: +// - The recognition results as an FaceLandmarkerResult object. +// - The const reference to the corresponding input image that the face +// landmarker runs on. Note that the const reference to the image will no +// longer be valid when the callback returns. To access the image data +// outside of the callback, callers need to make a copy of the image. +// - The input timestamp in milliseconds. +// If an error occurs, returns an error code and sets the error parameter to an +// an error message (if `error_msg` is not `nullptr`). You must free the memory +// allocated for the error message. +MP_EXPORT int face_landmarker_detect_async(void* landmarker, + const MpImage& image, + int64_t timestamp_ms, + char** error_msg); + +// Frees the memory allocated inside a FaceLandmarkerResult result. +// Does not free the result pointer itself. +MP_EXPORT void face_landmarker_close_result(FaceLandmarkerResult* result); + +// Frees face landmarker. +// If an error occurs, returns an error code and sets the error parameter to an +// an error message (if `error_msg` is not `nullptr`). You must free the memory +// allocated for the error message. +MP_EXPORT int face_landmarker_close(void* landmarker, char** error_msg); + +#ifdef __cplusplus +} // extern C +#endif + +#endif // MEDIAPIPE_TASKS_C_VISION_FACE_LANDMARKER_FACE_LANDMARKER_H_ diff --git a/mediapipe/tasks/c/vision/face_landmarker/face_landmarker_result.h b/mediapipe/tasks/c/vision/face_landmarker/face_landmarker_result.h new file mode 100644 index 000000000..27d698d13 --- /dev/null +++ b/mediapipe/tasks/c/vision/face_landmarker/face_landmarker_result.h @@ -0,0 +1,59 @@ +/* Copyright 2023 The MediaPipe Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef MEDIAPIPE_TASKS_C_VISION_FACE_LANDMARKER_RESULT_FACE_LANDMARKER_RESULT_H_ +#define MEDIAPIPE_TASKS_C_VISION_FACE_LANDMARKER_RESULT_FACE_LANDMARKER_RESULT_H_ + +#include + +#include "mediapipe/tasks/c/components/containers/category.h" +#include "mediapipe/tasks/c/components/containers/landmark.h" +#include "mediapipe/tasks/c/components/containers/matrix.h" + +#ifndef MP_EXPORT +#define MP_EXPORT __attribute__((visibility("default"))) +#endif // MP_EXPORT + +#ifdef __cplusplus +extern "C" { +#endif + +// The hand landmarker result from HandLandmarker, where each vector +// element represents a single hand detected in the image. +struct FaceLandmarkerResult { + // Optional face blendshapes results. + struct Categories* face_blendshapes; + + // The number of elements in the face_blendshapes array. + uint32_t face_blendshapes_count; + + // Detected face landmarks in normalized image coordinates. + struct NormalizedLandmarks* face_landmarks; + + // The number of elements in the face_landmarks array. + uint32_t face_landmarks_count; + + // Optional facial transformation matrix. + struct Matrix* facial_transformation_matrixes; + + // The number of elements in the facial_transformation_matrixes array. + uint32_t facial_transformation_matrixes_count; +}; + +#ifdef __cplusplus +} // extern C +#endif + +#endif // MEDIAPIPE_TASKS_C_VISION_FACE_LANDMARKER_RESULT_FACE_LANDMARKER_RESULT_H_ diff --git a/mediapipe/tasks/c/vision/face_landmarker/face_landmarker_result_converter.cc b/mediapipe/tasks/c/vision/face_landmarker/face_landmarker_result_converter.cc new file mode 100644 index 000000000..026719153 --- /dev/null +++ b/mediapipe/tasks/c/vision/face_landmarker/face_landmarker_result_converter.cc @@ -0,0 +1,116 @@ +/* Copyright 2023 The MediaPipe Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "mediapipe/tasks/c/vision/face_landmarker/face_landmarker_result_converter.h" + +#include + +#include "mediapipe/tasks/c/components/containers/category.h" +#include "mediapipe/tasks/c/components/containers/category_converter.h" +#include "mediapipe/tasks/c/components/containers/landmark.h" +#include "mediapipe/tasks/c/components/containers/landmark_converter.h" +#include "mediapipe/tasks/c/components/containers/matrix.h" +#include "mediapipe/tasks/c/components/containers/matrix_converter.h" +#include "mediapipe/tasks/c/vision/face_landmarker/face_landmarker_result.h" +#include "mediapipe/tasks/cc/components/containers/category.h" +#include "mediapipe/tasks/cc/components/containers/landmark.h" +#include "mediapipe/tasks/cc/vision/face_landmarker/face_landmarker_result.h" + +namespace mediapipe::tasks::c::components::containers { + +using CppCategory = ::mediapipe::tasks::components::containers::Category; +using CppNormalizedLandmark = + ::mediapipe::tasks::components::containers::NormalizedLandmark; + +void CppConvertToFaceLandmarkerResult( + const ::mediapipe::tasks::vision::face_landmarker::FaceLandmarkerResult& in, + FaceLandmarkerResult* out) { + out->face_landmarks_count = in.face_landmarks.size(); + out->face_landmarks = new NormalizedLandmarks[out->face_landmarks_count]; + for (uint32_t i = 0; i < out->face_landmarks_count; ++i) { + std::vector cpp_normalized_landmarks; + for (uint32_t j = 0; j < in.face_landmarks[i].landmarks.size(); ++j) { + const auto& cpp_landmark = in.face_landmarks[i].landmarks[j]; + cpp_normalized_landmarks.push_back(cpp_landmark); + } + CppConvertToNormalizedLandmarks(cpp_normalized_landmarks, + &out->face_landmarks[i]); + } + + if (in.face_blendshapes.has_value()) { + out->face_blendshapes_count = in.face_blendshapes->size(); + out->face_blendshapes = new Categories[out->face_blendshapes_count]; + + for (uint32_t i = 0; i < out->face_blendshapes_count; ++i) { + uint32_t categories_count = + in.face_blendshapes.value()[i].categories.size(); + out->face_blendshapes[i].categories_count = categories_count; + out->face_blendshapes[i].categories = new Category[categories_count]; + + for (uint32_t j = 0; j < categories_count; ++j) { + const auto& cpp_category = in.face_blendshapes.value()[i].categories[j]; + CppConvertToCategory(cpp_category, + &out->face_blendshapes[i].categories[j]); + } + } + } else { + out->face_blendshapes_count = 0; + out->face_blendshapes = nullptr; + } + + if (in.facial_transformation_matrixes.has_value()) { + out->facial_transformation_matrixes_count = + in.facial_transformation_matrixes.value().size(); + out->facial_transformation_matrixes = + new ::Matrix[out->facial_transformation_matrixes_count]; + for (uint32_t i = 0; i < out->facial_transformation_matrixes_count; ++i) { + CppConvertToMatrix(in.facial_transformation_matrixes.value()[i], + &out->facial_transformation_matrixes[i]); + } + } else { + out->facial_transformation_matrixes_count = 0; + out->facial_transformation_matrixes = nullptr; + } +} + +void CppCloseFaceLandmarkerResult(FaceLandmarkerResult* result) { + for (uint32_t i = 0; i < result->face_blendshapes_count; ++i) { + for (uint32_t j = 0; j < result->face_blendshapes[i].categories_count; + ++j) { + CppCloseCategory(&result->face_blendshapes[i].categories[j]); + } + delete[] result->face_blendshapes[i].categories; + } + delete[] result->face_blendshapes; + + for (uint32_t i = 0; i < result->face_landmarks_count; ++i) { + CppCloseNormalizedLandmarks(&result->face_landmarks[i]); + } + delete[] result->face_landmarks; + + for (uint32_t i = 0; i < result->facial_transformation_matrixes_count; ++i) { + CppCloseMatrix(&result->facial_transformation_matrixes[i]); + } + delete[] result->facial_transformation_matrixes; + + result->face_blendshapes_count = 0; + result->face_landmarks_count = 0; + result->facial_transformation_matrixes_count = 0; + result->face_blendshapes = nullptr; + result->face_landmarks = nullptr; + result->facial_transformation_matrixes = nullptr; +} + +} // namespace mediapipe::tasks::c::components::containers diff --git a/mediapipe/tasks/c/vision/face_landmarker/face_landmarker_result_converter.h b/mediapipe/tasks/c/vision/face_landmarker/face_landmarker_result_converter.h new file mode 100644 index 000000000..dfdfa57af --- /dev/null +++ b/mediapipe/tasks/c/vision/face_landmarker/face_landmarker_result_converter.h @@ -0,0 +1,32 @@ +/* Copyright 2023 The MediaPipe Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef MEDIAPIPE_TASKS_C_COMPONENTS_CONTAINERS_FACE_LANDMARKER_RESULT_CONVERTER_H_ +#define MEDIAPIPE_TASKS_C_COMPONENTS_CONTAINERS_FACE_LANDMARKER_RESULT_CONVERTER_H_ + +#include "mediapipe/tasks/c/vision/face_landmarker/face_landmarker_result.h" +#include "mediapipe/tasks/cc/vision/face_landmarker/face_landmarker_result.h" + +namespace mediapipe::tasks::c::components::containers { + +void CppConvertToFaceLandmarkerResult( + const mediapipe::tasks::vision::face_landmarker::FaceLandmarkerResult& in, + FaceLandmarkerResult* out); + +void CppCloseFaceLandmarkerResult(FaceLandmarkerResult* result); + +} // namespace mediapipe::tasks::c::components::containers + +#endif // MEDIAPIPE_TASKS_C_COMPONENTS_CONTAINERS_FACE_LANDMARKER_RESULT_CONVERTER_H_ diff --git a/mediapipe/tasks/c/vision/face_landmarker/face_landmarker_result_converter_test.cc b/mediapipe/tasks/c/vision/face_landmarker/face_landmarker_result_converter_test.cc new file mode 100644 index 000000000..59d3023c1 --- /dev/null +++ b/mediapipe/tasks/c/vision/face_landmarker/face_landmarker_result_converter_test.cc @@ -0,0 +1,157 @@ +/* Copyright 2023 The MediaPipe Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "mediapipe/tasks/c/vision/face_landmarker/face_landmarker_result_converter.h" + +#include + +#include "mediapipe/framework/formats/matrix.h" +#include "mediapipe/framework/port/gtest.h" +#include "mediapipe/tasks/c/components/containers/landmark.h" +#include "mediapipe/tasks/c/vision/face_landmarker/face_landmarker_result.h" +#include "mediapipe/tasks/cc/components/containers/category.h" +#include "mediapipe/tasks/cc/components/containers/classification_result.h" +#include "mediapipe/tasks/cc/components/containers/landmark.h" +#include "mediapipe/tasks/cc/vision/face_landmarker/face_landmarker_result.h" + +namespace mediapipe::tasks::c::components::containers { + +void InitFaceLandmarkerResult( + ::mediapipe::tasks::vision::face_landmarker::FaceLandmarkerResult* + cpp_result) { + // Initialize face_landmarks + mediapipe::tasks::components::containers::NormalizedLandmark + cpp_normalized_landmark = {/* x= */ 0.1f, /* y= */ 0.2f, /* z= */ 0.3f}; + mediapipe::tasks::components::containers::NormalizedLandmarks + cpp_normalized_landmarks; + cpp_normalized_landmarks.landmarks.push_back(cpp_normalized_landmark); + cpp_result->face_landmarks.push_back(cpp_normalized_landmarks); + + // Initialize face_blendshapes + mediapipe::tasks::components::containers::Category cpp_category = { + /* index= */ 1, + /* score= */ 0.8f, + /* category_name= */ "blendshape_label_1", + /* display_name= */ "blendshape_display_name_1"}; + mediapipe::tasks::components::containers::Classifications + classifications_for_blendshapes; + classifications_for_blendshapes.categories.push_back(cpp_category); + + cpp_result->face_blendshapes = + std::vector{ + classifications_for_blendshapes}; + // cpp_result->face_blendshapes->push_back( + // classifications_for_blendshapes); + + // Initialize facial_transformation_matrixes + Eigen::MatrixXf cpp_matrix(2, 2); + cpp_matrix << 1.0f, 2.0f, 3.0f, 4.0f; + cpp_result->facial_transformation_matrixes = std::vector{cpp_matrix}; +} + +TEST(FaceLandmarkerResultConverterTest, ConvertsCustomResult) { + // Initialize a C++ FaceLandmarkerResult + ::mediapipe::tasks::vision::face_landmarker::FaceLandmarkerResult cpp_result; + InitFaceLandmarkerResult(&cpp_result); + + FaceLandmarkerResult c_result; + CppConvertToFaceLandmarkerResult(cpp_result, &c_result); + + // Verify conversion of face_landmarks + EXPECT_EQ(c_result.face_landmarks_count, cpp_result.face_landmarks.size()); + for (uint32_t i = 0; i < c_result.face_landmarks_count; ++i) { + EXPECT_EQ(c_result.face_landmarks[i].landmarks_count, + cpp_result.face_landmarks[i].landmarks.size()); + for (uint32_t j = 0; j < c_result.face_landmarks[i].landmarks_count; ++j) { + const auto& cpp_landmark = cpp_result.face_landmarks[i].landmarks[j]; + EXPECT_FLOAT_EQ(c_result.face_landmarks[i].landmarks[j].x, + cpp_landmark.x); + EXPECT_FLOAT_EQ(c_result.face_landmarks[i].landmarks[j].y, + cpp_landmark.y); + EXPECT_FLOAT_EQ(c_result.face_landmarks[i].landmarks[j].z, + cpp_landmark.z); + } + } + + // Verify conversion of face_blendshapes + if (cpp_result.face_blendshapes.has_value()) { + EXPECT_EQ(c_result.face_blendshapes_count, + cpp_result.face_blendshapes.value().size()); + for (uint32_t i = 0; i < c_result.face_blendshapes_count; ++i) { + const auto& cpp_face_blendshapes = cpp_result.face_blendshapes.value(); + EXPECT_EQ(c_result.face_blendshapes[i].categories_count, + cpp_face_blendshapes[i].categories.size()); + for (uint32_t j = 0; j < c_result.face_blendshapes[i].categories_count; + ++j) { + const auto& cpp_category = cpp_face_blendshapes[i].categories[j]; + EXPECT_EQ(c_result.face_blendshapes[i].categories[j].index, + cpp_category.index); + EXPECT_FLOAT_EQ(c_result.face_blendshapes[i].categories[j].score, + cpp_category.score); + EXPECT_EQ(std::string( + c_result.face_blendshapes[i].categories[j].category_name), + cpp_category.category_name); + } + } + } + + // Verify conversion of facial_transformation_matrixes + if (cpp_result.facial_transformation_matrixes.has_value()) { + EXPECT_EQ(c_result.facial_transformation_matrixes_count, + cpp_result.facial_transformation_matrixes.value().size()); + for (uint32_t i = 0; i < c_result.facial_transformation_matrixes_count; + ++i) { + const auto& cpp_facial_transformation_matrixes = + cpp_result.facial_transformation_matrixes.value(); + // Assuming Matrix struct contains data array and dimensions + const auto& cpp_matrix = cpp_facial_transformation_matrixes[i]; + EXPECT_EQ(c_result.facial_transformation_matrixes[i].rows, + cpp_matrix.rows()); + EXPECT_EQ(c_result.facial_transformation_matrixes[i].cols, + cpp_matrix.cols()); + // Check each element of the matrix + for (long row = 0; row < cpp_matrix.rows(); ++row) { + for (long col = 0; col < cpp_matrix.cols(); ++col) { + size_t index = col * cpp_matrix.rows() + row; // Column-major index + EXPECT_FLOAT_EQ( + c_result.facial_transformation_matrixes[i].data[index], + cpp_matrix(row, col)); + } + } + } + } + + CppCloseFaceLandmarkerResult(&c_result); +} + +TEST(FaceLandmarkerResultConverterTest, FreesMemory) { + ::mediapipe::tasks::vision::face_landmarker::FaceLandmarkerResult cpp_result; + InitFaceLandmarkerResult(&cpp_result); + + FaceLandmarkerResult c_result; + CppConvertToFaceLandmarkerResult(cpp_result, &c_result); + + EXPECT_NE(c_result.face_blendshapes, nullptr); + EXPECT_NE(c_result.face_landmarks, nullptr); + EXPECT_NE(c_result.facial_transformation_matrixes, nullptr); + + CppCloseFaceLandmarkerResult(&c_result); + + EXPECT_EQ(c_result.face_blendshapes, nullptr); + EXPECT_EQ(c_result.face_landmarks, nullptr); + EXPECT_EQ(c_result.facial_transformation_matrixes, nullptr); +} + +} // namespace mediapipe::tasks::c::components::containers diff --git a/mediapipe/tasks/c/vision/face_landmarker/face_landmarker_test.cc b/mediapipe/tasks/c/vision/face_landmarker/face_landmarker_test.cc new file mode 100644 index 000000000..d96d7f32a --- /dev/null +++ b/mediapipe/tasks/c/vision/face_landmarker/face_landmarker_test.cc @@ -0,0 +1,291 @@ +/* Copyright 2023 The MediaPipe Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "mediapipe/tasks/c/vision/face_landmarker/face_landmarker.h" + +#include +#include +#include + +#include "absl/flags/flag.h" +#include "absl/strings/string_view.h" +#include "mediapipe/framework/deps/file_path.h" +#include "mediapipe/framework/formats/image.h" +#include "mediapipe/framework/port/gmock.h" +#include "mediapipe/framework/port/gtest.h" +#include "mediapipe/tasks/c/components/containers/landmark.h" +#include "mediapipe/tasks/c/vision/core/common.h" +#include "mediapipe/tasks/c/vision/face_landmarker/face_landmarker_result.h" +#include "mediapipe/tasks/cc/vision/utils/image_utils.h" + +namespace { + +using ::mediapipe::file::JoinPath; +using ::mediapipe::tasks::vision::DecodeImageFromFile; +using testing::HasSubstr; + +constexpr char kTestDataDirectory[] = "/mediapipe/tasks/testdata/vision/"; +constexpr char kModelName[] = "face_landmarker_v2_with_blendshapes.task"; +constexpr char kImageFile[] = "portrait.jpg"; +constexpr float kLandmarksPrecision = 0.03; +constexpr float kBlendshapesPrecision = 0.12; +constexpr float kFacialTransformationMatrixPrecision = 0.05; +constexpr int kIterations = 100; + +std::string GetFullPath(absl::string_view file_name) { + return JoinPath("./", kTestDataDirectory, file_name); +} + +void MatchesFaceLandmarkerResult(FaceLandmarkerResult* result, + const float blendshapes_precision, + const float landmark_precision, + const float matrix_precison) { + // Expects to have the same number of faces detected. + EXPECT_EQ(result->face_blendshapes_count, 1); + + // Actual blendshapes matches expected blendshapes. + EXPECT_EQ( + std::string{result->face_blendshapes[0].categories[0].category_name}, + "_neutral"); + EXPECT_NEAR(result->face_blendshapes[0].categories[0].score, 0.0f, + blendshapes_precision); + + // Actual landmarks match expected landmarks. + EXPECT_NEAR(result->face_landmarks[0].landmarks[0].x, 0.4977f, + landmark_precision); + EXPECT_NEAR(result->face_landmarks[0].landmarks[0].y, 0.2485f, + landmark_precision); + EXPECT_NEAR(result->face_landmarks[0].landmarks[0].z, -0.0305f, + landmark_precision); + + // Expects to have at least one facial transformation matrix. + EXPECT_GE(result->facial_transformation_matrixes_count, 1); + + // Actual matrix matches expected matrix. + // Assuming the expected matrix is 2x2 for demonstration. + const float expected_matrix[4] = {0.9991f, 0.0166f, -0.0374f, 0.0f}; + for (int i = 0; i < 4; ++i) { + printf(">> %f <<", result->facial_transformation_matrixes[0].data[i]); + EXPECT_NEAR(result->facial_transformation_matrixes[0].data[i], + expected_matrix[i], matrix_precison); + } +} + +TEST(FaceLandmarkerTest, ImageModeTest) { + const auto image = DecodeImageFromFile(GetFullPath(kImageFile)); + ASSERT_TRUE(image.ok()); + + const std::string model_path = GetFullPath(kModelName); + FaceLandmarkerOptions options = { + /* base_options= */ {/* model_asset_buffer= */ nullptr, + /* model_asset_buffer_count= */ 0, + /* model_asset_path= */ model_path.c_str()}, + /* running_mode= */ RunningMode::IMAGE, + /* num_faces= */ 1, + /* min_face_detection_confidence= */ 0.5, + /* min_face_presence_confidence= */ 0.5, + /* min_tracking_confidence= */ 0.5, + /* output_face_blendshapes = */ true, + /* output_facial_transformation_matrixes = */ true, + }; + + void* landmarker = face_landmarker_create(&options, /* error_msg */ nullptr); + EXPECT_NE(landmarker, nullptr); + + const auto& image_frame = image->GetImageFrameSharedPtr(); + const MpImage mp_image = { + .type = MpImage::IMAGE_FRAME, + .image_frame = {.format = static_cast(image_frame->Format()), + .image_buffer = image_frame->PixelData(), + .width = image_frame->Width(), + .height = image_frame->Height()}}; + + FaceLandmarkerResult result; + face_landmarker_detect_image(landmarker, mp_image, &result, + /* error_msg */ nullptr); + MatchesFaceLandmarkerResult(&result, kBlendshapesPrecision, + kLandmarksPrecision, + kFacialTransformationMatrixPrecision); + face_landmarker_close_result(&result); + face_landmarker_close(landmarker, /* error_msg */ nullptr); +} + +TEST(FaceLandmarkerTest, VideoModeTest) { + const auto image = DecodeImageFromFile(GetFullPath(kImageFile)); + ASSERT_TRUE(image.ok()); + + const std::string model_path = GetFullPath(kModelName); + FaceLandmarkerOptions options = { + /* base_options= */ {/* model_asset_buffer= */ nullptr, + /* model_asset_buffer_count= */ 0, + /* model_asset_path= */ model_path.c_str()}, + /* running_mode= */ RunningMode::VIDEO, + /* num_faces= */ 1, + /* min_face_detection_confidence= */ 0.5, + /* min_face_presence_confidence= */ 0.5, + /* min_tracking_confidence= */ 0.5, + /* output_face_blendshapes = */ true, + /* output_facial_transformation_matrixes = */ true, + }; + + void* landmarker = face_landmarker_create(&options, + /* error_msg */ nullptr); + EXPECT_NE(landmarker, nullptr); + + const auto& image_frame = image->GetImageFrameSharedPtr(); + const MpImage mp_image = { + .type = MpImage::IMAGE_FRAME, + .image_frame = {.format = static_cast(image_frame->Format()), + .image_buffer = image_frame->PixelData(), + .width = image_frame->Width(), + .height = image_frame->Height()}}; + + for (int i = 0; i < kIterations; ++i) { + FaceLandmarkerResult result; + face_landmarker_detect_for_video(landmarker, mp_image, i, &result, + /* error_msg */ nullptr); + + MatchesFaceLandmarkerResult(&result, kBlendshapesPrecision, + kLandmarksPrecision, + kFacialTransformationMatrixPrecision); + face_landmarker_close_result(&result); + } + face_landmarker_close(landmarker, /* error_msg */ nullptr); +} + +// A structure to support LiveStreamModeTest below. This structure holds a +// static method `Fn` for a callback function of C API. A `static` qualifier +// allows to take an address of the method to follow API style. Another static +// struct member is `last_timestamp` that is used to verify that current +// timestamp is greater than the previous one. +struct LiveStreamModeCallback { + static int64_t last_timestamp; + static void Fn(FaceLandmarkerResult* landmarker_result, const MpImage& image, + int64_t timestamp, char* error_msg) { + ASSERT_NE(landmarker_result, nullptr); + ASSERT_EQ(error_msg, nullptr); + MatchesFaceLandmarkerResult(landmarker_result, kBlendshapesPrecision, + kLandmarksPrecision, + kFacialTransformationMatrixPrecision); + EXPECT_GT(image.image_frame.width, 0); + EXPECT_GT(image.image_frame.height, 0); + EXPECT_GT(timestamp, last_timestamp); + ++last_timestamp; + } +}; +int64_t LiveStreamModeCallback::last_timestamp = -1; + +TEST(FaceLandmarkerTest, LiveStreamModeTest) { + const auto image = DecodeImageFromFile(GetFullPath(kImageFile)); + ASSERT_TRUE(image.ok()); + + const std::string model_path = GetFullPath(kModelName); + + FaceLandmarkerOptions options = { + /* base_options= */ {/* model_asset_buffer= */ nullptr, + /* model_asset_buffer_count= */ 0, + /* model_asset_path= */ model_path.c_str()}, + /* running_mode= */ RunningMode::LIVE_STREAM, + /* num_faces= */ 1, + /* min_face_detection_confidence= */ 0.5, + /* min_face_presence_confidence= */ 0.5, + /* min_tracking_confidence= */ 0.5, + /* output_face_blendshapes = */ true, + /* output_facial_transformation_matrixes = */ true, + /* result_callback= */ LiveStreamModeCallback::Fn, + }; + + void* landmarker = face_landmarker_create(&options, /* error_msg */ + nullptr); + EXPECT_NE(landmarker, nullptr); + + const auto& image_frame = image->GetImageFrameSharedPtr(); + const MpImage mp_image = { + .type = MpImage::IMAGE_FRAME, + .image_frame = {.format = static_cast(image_frame->Format()), + .image_buffer = image_frame->PixelData(), + .width = image_frame->Width(), + .height = image_frame->Height()}}; + + for (int i = 0; i < kIterations; ++i) { + EXPECT_GE(face_landmarker_detect_async(landmarker, mp_image, i, + /* error_msg */ nullptr), + 0); + } + face_landmarker_close(landmarker, /* error_msg */ nullptr); + + // Due to the flow limiter, the total of outputs might be smaller than the + // number of iterations. + EXPECT_LE(LiveStreamModeCallback::last_timestamp, kIterations); + EXPECT_GT(LiveStreamModeCallback::last_timestamp, 0); +} + +TEST(FaceLandmarkerTest, InvalidArgumentHandling) { + // It is an error to set neither the asset buffer nor the path. + FaceLandmarkerOptions options = { + /* base_options= */ {/* model_asset_buffer= */ nullptr, + /* model_asset_buffer_count= */ 0, + /* model_asset_path= */ nullptr}, + /* running_mode= */ RunningMode::IMAGE, + /* num_faces= */ 1, + /* min_face_detection_confidence= */ 0.5, + /* min_face_presence_confidence= */ 0.5, + /* min_tracking_confidence= */ 0.5, + /* output_face_blendshapes = */ true, + /* output_facial_transformation_matrixes = */ true, + }; + + char* error_msg; + void* landmarker = face_landmarker_create(&options, &error_msg); + EXPECT_EQ(landmarker, nullptr); + + EXPECT_THAT( + error_msg, + HasSubstr("INVALID_ARGUMENT: BLENDSHAPES Tag and blendshapes model must " + "be both set. Get BLENDSHAPES is set: true, blendshapes model " + "is set: false [MediaPipeTasksStatus='601']")); + + free(error_msg); +} + +TEST(FaceLandmarkerTest, FailedRecognitionHandling) { + const std::string model_path = GetFullPath(kModelName); + FaceLandmarkerOptions options = { + /* base_options= */ {/* model_asset_buffer= */ nullptr, + /* model_asset_buffer_count= */ 0, + /* model_asset_path= */ model_path.c_str()}, + /* running_mode= */ RunningMode::IMAGE, + /* num_faces= */ 1, + /* min_face_detection_confidence= */ 0.5, + /* min_face_presence_confidence= */ 0.5, + /* min_tracking_confidence= */ 0.5, + /* output_face_blendshapes = */ true, + /* output_facial_transformation_matrixes = */ true, + }; + + void* landmarker = face_landmarker_create(&options, /* error_msg */ + nullptr); + EXPECT_NE(landmarker, nullptr); + + const MpImage mp_image = {.type = MpImage::GPU_BUFFER, .gpu_buffer = {}}; + FaceLandmarkerResult result; + char* error_msg; + face_landmarker_detect_image(landmarker, mp_image, &result, &error_msg); + EXPECT_THAT(error_msg, HasSubstr("GPU Buffer not supported yet")); + free(error_msg); + face_landmarker_close(landmarker, /* error_msg */ nullptr); +} + +} // namespace diff --git a/mediapipe/tasks/testdata/vision/BUILD b/mediapipe/tasks/testdata/vision/BUILD index 2f5157309..616183c9b 100644 --- a/mediapipe/tasks/testdata/vision/BUILD +++ b/mediapipe/tasks/testdata/vision/BUILD @@ -48,6 +48,7 @@ mediapipe_files(srcs = [ "face_landmark.tflite", "face_landmarker.task", "face_landmarker_v2.task", + "face_landmarker_v2_with_blendshapes.task", "face_stylizer_color_ink.task", "fist.jpg", "fist.png", @@ -185,6 +186,7 @@ filegroup( "face_detection_short_range.tflite", "face_landmarker.task", "face_landmarker_v2.task", + "face_landmarker_v2_with_blendshapes.task", "face_stylizer_color_ink.task", "gesture_recognizer.task", "hair_segmentation.tflite", From 0200d32285731889f7a0f92fe83859a00f8d56ab Mon Sep 17 00:00:00 2001 From: Kinar Date: Mon, 11 Dec 2023 05:20:48 -0800 Subject: [PATCH 22/39] Changed Recognition to Detection --- .../c/vision/face_landmarker/face_landmarker.cc | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/mediapipe/tasks/c/vision/face_landmarker/face_landmarker.cc b/mediapipe/tasks/c/vision/face_landmarker/face_landmarker.cc index 47f26a120..033642c7f 100644 --- a/mediapipe/tasks/c/vision/face_landmarker/face_landmarker.cc +++ b/mediapipe/tasks/c/vision/face_landmarker/face_landmarker.cc @@ -98,7 +98,7 @@ FaceLandmarker* CppFaceLandmarkerCreate(const FaceLandmarkerOptions& options, char* error_msg = nullptr; if (!cpp_result.ok()) { - ABSL_LOG(ERROR) << "Recognition failed: " << cpp_result.status(); + ABSL_LOG(ERROR) << "Detection failed: " << cpp_result.status(); CppProcessError(cpp_result.status(), &error_msg); result_callback(nullptr, MpImage(), timestamp, error_msg); free(error_msg); @@ -141,7 +141,7 @@ int CppFaceLandmarkerDetect(void* landmarker, const MpImage& image, const absl::Status status = absl::InvalidArgumentError("GPU Buffer not supported yet."); - ABSL_LOG(ERROR) << "Recognition failed: " << status.message(); + ABSL_LOG(ERROR) << "Detection failed: " << status.message(); return CppProcessError(status, error_msg); } @@ -158,7 +158,7 @@ int CppFaceLandmarkerDetect(void* landmarker, const MpImage& image, auto cpp_landmarker = static_cast(landmarker); auto cpp_result = cpp_landmarker->Detect(*img); if (!cpp_result.ok()) { - ABSL_LOG(ERROR) << "Recognition failed: " << cpp_result.status(); + ABSL_LOG(ERROR) << "Detection failed: " << cpp_result.status(); return CppProcessError(cpp_result.status(), error_msg); } CppConvertToFaceLandmarkerResult(*cpp_result, result); @@ -173,7 +173,7 @@ int CppFaceLandmarkerDetectForVideo(void* landmarker, const MpImage& image, absl::Status status = absl::InvalidArgumentError("GPU Buffer not supported yet"); - ABSL_LOG(ERROR) << "Recognition failed: " << status.message(); + ABSL_LOG(ERROR) << "Detection failed: " << status.message(); return CppProcessError(status, error_msg); } @@ -190,7 +190,7 @@ int CppFaceLandmarkerDetectForVideo(void* landmarker, const MpImage& image, auto cpp_landmarker = static_cast(landmarker); auto cpp_result = cpp_landmarker->DetectForVideo(*img, timestamp_ms); if (!cpp_result.ok()) { - ABSL_LOG(ERROR) << "Recognition failed: " << cpp_result.status(); + ABSL_LOG(ERROR) << "Detection failed: " << cpp_result.status(); return CppProcessError(cpp_result.status(), error_msg); } CppConvertToFaceLandmarkerResult(*cpp_result, result); @@ -203,7 +203,7 @@ int CppFaceLandmarkerDetectAsync(void* landmarker, const MpImage& image, absl::Status status = absl::InvalidArgumentError("GPU Buffer not supported yet"); - ABSL_LOG(ERROR) << "Recognition failed: " << status.message(); + ABSL_LOG(ERROR) << "Detection failed: " << status.message(); return CppProcessError(status, error_msg); } From bd946db5a641e7e2db266181606d8814cd77da82 Mon Sep 17 00:00:00 2001 From: Jiuqiang Tang Date: Mon, 11 Dec 2023 11:38:12 -0800 Subject: [PATCH 23/39] No public description PiperOrigin-RevId: 589890130 --- .../tasks/cc/vision/image_generator/diffuser/diffuser_gpu.h | 1 + 1 file changed, 1 insertion(+) diff --git a/mediapipe/tasks/cc/vision/image_generator/diffuser/diffuser_gpu.h b/mediapipe/tasks/cc/vision/image_generator/diffuser/diffuser_gpu.h index 85738b80b..613c8e0f2 100644 --- a/mediapipe/tasks/cc/vision/image_generator/diffuser/diffuser_gpu.h +++ b/mediapipe/tasks/cc/vision/image_generator/diffuser/diffuser_gpu.h @@ -32,6 +32,7 @@ enum DiffuserModelType { kDiffuserModelTypeDistilledGldm, kDiffuserModelTypeSd2Base, kDiffuserModelTypeTigo, + kDiffuserModelTypeTigoUfo, }; enum DiffuserPriorityHint { From 42b251cb8d4808746465f9f3e3305a833eb890db Mon Sep 17 00:00:00 2001 From: Kinar Date: Mon, 11 Dec 2023 16:11:39 -0800 Subject: [PATCH 24/39] Updated tests in face_landmarker_result_converter_test --- .../face_landmarker_result_converter_test.cc | 75 +++++++++---------- 1 file changed, 34 insertions(+), 41 deletions(-) diff --git a/mediapipe/tasks/c/vision/face_landmarker/face_landmarker_result_converter_test.cc b/mediapipe/tasks/c/vision/face_landmarker/face_landmarker_result_converter_test.cc index 59d3023c1..69f8b6f47 100644 --- a/mediapipe/tasks/c/vision/face_landmarker/face_landmarker_result_converter_test.cc +++ b/mediapipe/tasks/c/vision/face_landmarker/face_landmarker_result_converter_test.cc @@ -52,8 +52,7 @@ void InitFaceLandmarkerResult( cpp_result->face_blendshapes = std::vector{ classifications_for_blendshapes}; - // cpp_result->face_blendshapes->push_back( - // classifications_for_blendshapes); + cpp_result->face_blendshapes->push_back(classifications_for_blendshapes); // Initialize facial_transformation_matrixes Eigen::MatrixXf cpp_matrix(2, 2); @@ -86,49 +85,43 @@ TEST(FaceLandmarkerResultConverterTest, ConvertsCustomResult) { } // Verify conversion of face_blendshapes - if (cpp_result.face_blendshapes.has_value()) { - EXPECT_EQ(c_result.face_blendshapes_count, - cpp_result.face_blendshapes.value().size()); - for (uint32_t i = 0; i < c_result.face_blendshapes_count; ++i) { - const auto& cpp_face_blendshapes = cpp_result.face_blendshapes.value(); - EXPECT_EQ(c_result.face_blendshapes[i].categories_count, - cpp_face_blendshapes[i].categories.size()); - for (uint32_t j = 0; j < c_result.face_blendshapes[i].categories_count; - ++j) { - const auto& cpp_category = cpp_face_blendshapes[i].categories[j]; - EXPECT_EQ(c_result.face_blendshapes[i].categories[j].index, - cpp_category.index); - EXPECT_FLOAT_EQ(c_result.face_blendshapes[i].categories[j].score, - cpp_category.score); - EXPECT_EQ(std::string( - c_result.face_blendshapes[i].categories[j].category_name), - cpp_category.category_name); - } + EXPECT_EQ(c_result.face_blendshapes_count, + cpp_result.face_blendshapes.value().size()); + for (uint32_t i = 0; i < c_result.face_blendshapes_count; ++i) { + const auto& cpp_face_blendshapes = cpp_result.face_blendshapes.value(); + EXPECT_EQ(c_result.face_blendshapes[i].categories_count, + cpp_face_blendshapes[i].categories.size()); + for (uint32_t j = 0; j < c_result.face_blendshapes[i].categories_count; + ++j) { + const auto& cpp_category = cpp_face_blendshapes[i].categories[j]; + EXPECT_EQ(c_result.face_blendshapes[i].categories[j].index, + cpp_category.index); + EXPECT_FLOAT_EQ(c_result.face_blendshapes[i].categories[j].score, + cpp_category.score); + EXPECT_EQ( + std::string(c_result.face_blendshapes[i].categories[j].category_name), + cpp_category.category_name); } } // Verify conversion of facial_transformation_matrixes - if (cpp_result.facial_transformation_matrixes.has_value()) { - EXPECT_EQ(c_result.facial_transformation_matrixes_count, - cpp_result.facial_transformation_matrixes.value().size()); - for (uint32_t i = 0; i < c_result.facial_transformation_matrixes_count; - ++i) { - const auto& cpp_facial_transformation_matrixes = - cpp_result.facial_transformation_matrixes.value(); - // Assuming Matrix struct contains data array and dimensions - const auto& cpp_matrix = cpp_facial_transformation_matrixes[i]; - EXPECT_EQ(c_result.facial_transformation_matrixes[i].rows, - cpp_matrix.rows()); - EXPECT_EQ(c_result.facial_transformation_matrixes[i].cols, - cpp_matrix.cols()); - // Check each element of the matrix - for (long row = 0; row < cpp_matrix.rows(); ++row) { - for (long col = 0; col < cpp_matrix.cols(); ++col) { - size_t index = col * cpp_matrix.rows() + row; // Column-major index - EXPECT_FLOAT_EQ( - c_result.facial_transformation_matrixes[i].data[index], - cpp_matrix(row, col)); - } + EXPECT_EQ(c_result.facial_transformation_matrixes_count, + cpp_result.facial_transformation_matrixes.value().size()); + for (uint32_t i = 0; i < c_result.facial_transformation_matrixes_count; ++i) { + const auto& cpp_facial_transformation_matrixes = + cpp_result.facial_transformation_matrixes.value(); + // Assuming Matrix struct contains data array and dimensions + const auto& cpp_matrix = cpp_facial_transformation_matrixes[i]; + EXPECT_EQ(c_result.facial_transformation_matrixes[i].rows, + cpp_matrix.rows()); + EXPECT_EQ(c_result.facial_transformation_matrixes[i].cols, + cpp_matrix.cols()); + // Check each element of the matrix + for (long row = 0; row < cpp_matrix.rows(); ++row) { + for (long col = 0; col < cpp_matrix.cols(); ++col) { + size_t index = col * cpp_matrix.rows() + row; // Column-major index + EXPECT_FLOAT_EQ(c_result.facial_transformation_matrixes[i].data[index], + cpp_matrix(row, col)); } } } From 9a20d6b3e4622460fc7fa0b4f14343fc34b380e9 Mon Sep 17 00:00:00 2001 From: MediaPipe Team Date: Mon, 11 Dec 2023 19:56:17 -0800 Subject: [PATCH 25/39] Create an explicit GlRuntimeException class PiperOrigin-RevId: 590035213 --- .../com/google/mediapipe/glutil/GlThread.java | 3 +- .../google/mediapipe/glutil/ShaderUtil.java | 49 ++++++++++++------- 2 files changed, 34 insertions(+), 18 deletions(-) diff --git a/mediapipe/java/com/google/mediapipe/glutil/GlThread.java b/mediapipe/java/com/google/mediapipe/glutil/GlThread.java index b8d4fa636..b7b59163a 100644 --- a/mediapipe/java/com/google/mediapipe/glutil/GlThread.java +++ b/mediapipe/java/com/google/mediapipe/glutil/GlThread.java @@ -18,6 +18,7 @@ import android.opengl.GLES20; import android.os.Handler; import android.os.Looper; import android.util.Log; +import com.google.mediapipe.glutil.ShaderUtil.GlRuntimeException; import javax.annotation.Nullable; import javax.microedition.khronos.egl.EGLContext; import javax.microedition.khronos.egl.EGLSurface; @@ -111,7 +112,7 @@ public class GlThread extends Thread { 0); int status = GLES20.glCheckFramebufferStatus(GLES20.GL_FRAMEBUFFER); if (status != GLES20.GL_FRAMEBUFFER_COMPLETE) { - throw new RuntimeException("Framebuffer not complete, status=" + status); + throw new GlRuntimeException("Framebuffer not complete, status=" + status); } GLES20.glViewport(0, 0, width, height); ShaderUtil.checkGlError("glViewport"); diff --git a/mediapipe/java/com/google/mediapipe/glutil/ShaderUtil.java b/mediapipe/java/com/google/mediapipe/glutil/ShaderUtil.java index 1cdaed041..1efc86846 100644 --- a/mediapipe/java/com/google/mediapipe/glutil/ShaderUtil.java +++ b/mediapipe/java/com/google/mediapipe/glutil/ShaderUtil.java @@ -24,16 +24,15 @@ import java.nio.FloatBuffer; import java.util.Map; import javax.annotation.Nullable; -/** - * Utility class for managing GLSL shaders. - */ +/** Utility class for managing GLSL shaders. */ public class ShaderUtil { private static final FluentLogger logger = FluentLogger.forEnclosingClass(); /** * Loads a shader from source. - * @param shaderType a valid GL shader type, e.g. {@link GLES20#GL_VERTEX_SHADER} or - * {@link GLES20#GL_FRAGMENT_SHADER}. + * + * @param shaderType a valid GL shader type, e.g. {@link GLES20#GL_VERTEX_SHADER} or {@link + * GLES20#GL_FRAGMENT_SHADER}. * @param source the shader's source in text form. * @return a handle to the created shader, or 0 in case of error. */ @@ -44,8 +43,8 @@ public class ShaderUtil { int[] compiled = new int[1]; GLES20.glGetShaderiv(shader, GLES20.GL_COMPILE_STATUS, compiled, 0); if (compiled[0] == 0) { - logger.atSevere().log("Could not compile shader %d: %s", shaderType, - GLES20.glGetShaderInfoLog(shader)); + logger.atSevere().log( + "Could not compile shader %d: %s", shaderType, GLES20.glGetShaderInfoLog(shader)); GLES20.glDeleteShader(shader); shader = 0; } @@ -99,6 +98,7 @@ public class ShaderUtil { /** * Creates a texture. Binds it to texture unit 0 to perform setup. + * * @return the name of the new texture. */ public static int createRgbaTexture(int width, int height) { @@ -111,7 +111,8 @@ public class ShaderUtil { GLES20.GL_TEXTURE_2D, 0, GLES20.GL_RGBA, - width, height, + width, + height, 0, GLES20.GL_RGBA, GLES20.GL_UNSIGNED_BYTE, @@ -147,13 +148,11 @@ public class ShaderUtil { } /** - * Creates a {@link FloatBuffer} with the given arguments as contents. - * The buffer is created in native format for efficient use with OpenGL. + * Creates a {@link FloatBuffer} with the given arguments as contents. The buffer is created in + * native format for efficient use with OpenGL. */ public static FloatBuffer floatBuffer(float... values) { - ByteBuffer byteBuffer = - ByteBuffer.allocateDirect( - values.length * 4 /* sizeof(float) */); + ByteBuffer byteBuffer = ByteBuffer.allocateDirect(values.length * 4 /* sizeof(float) */); // use the device hardware's native byte order byteBuffer.order(ByteOrder.nativeOrder()); @@ -166,13 +165,29 @@ public class ShaderUtil { return floatBuffer; } - /** - * Calls {@link GLES20#glGetError} and raises an exception if there was an error. - */ + /** Calls {@link GLES20#glGetError} and raises an exception if there was an error. */ public static void checkGlError(String msg) { int error = GLES20.glGetError(); if (error != GLES20.GL_NO_ERROR) { - throw new RuntimeException(msg + ": GL error: 0x" + Integer.toHexString(error)); + throw new GlRuntimeException(msg + ": GL error: 0x" + Integer.toHexString(error), error); + } + } + + /** A custom {@link RuntimeException} indicating an OpenGl error. */ + public static class GlRuntimeException extends RuntimeException { + private final int errorCode; + + public GlRuntimeException(String message, int errorCode) { + super(message); + this.errorCode = errorCode; + } + + public GlRuntimeException(String message) { + this(message, GLES20.GL_NO_ERROR); + } + + public int getErrorCode() { + return errorCode; } } } From 4237b765ce95af0813de4094ed1e21a67bad2a5f Mon Sep 17 00:00:00 2001 From: Sebastian Schmidt Date: Tue, 12 Dec 2023 08:53:36 -0800 Subject: [PATCH 26/39] Internal PiperOrigin-RevId: 590220032 --- third_party/wasm_files.bzl | 48 +++++++++++++++++++------------------- 1 file changed, 24 insertions(+), 24 deletions(-) diff --git a/third_party/wasm_files.bzl b/third_party/wasm_files.bzl index 58c0570e9..035e3d30b 100644 --- a/third_party/wasm_files.bzl +++ b/third_party/wasm_files.bzl @@ -12,72 +12,72 @@ def wasm_files(): http_file( name = "com_google_mediapipe_wasm_audio_wasm_internal_js", - sha256 = "8722e1047a54dcd08206d018a4bc348dd820f479cb10218c5cbcd411dd9e1c0c", - urls = ["https://storage.googleapis.com/mediapipe-assets/wasm/audio_wasm_internal.js?generation=1698954798232640"], + sha256 = "b03c28f6f65bfb4e0463d22d28831cb052042cac2955b75d5cf3ab196e80fd15", + urls = ["https://storage.googleapis.com/mediapipe-assets/wasm/audio_wasm_internal.js?generation=1702396665023163"], ) http_file( name = "com_google_mediapipe_wasm_audio_wasm_internal_wasm", - sha256 = "bcd230238dbabdf09eab58dbbe7e36deacf7e3fc57c2d67af679188d37731883", - urls = ["https://storage.googleapis.com/mediapipe-assets/wasm/audio_wasm_internal.wasm?generation=1698954800502145"], + sha256 = "4d1eecc9f77a3cc9f8b948e56e10b7e7bb20b3b5e36737efd1dbfa75fed1a1be", + urls = ["https://storage.googleapis.com/mediapipe-assets/wasm/audio_wasm_internal.wasm?generation=1702396667340558"], ) http_file( name = "com_google_mediapipe_wasm_audio_wasm_nosimd_internal_js", - sha256 = "a5d80eefde268611ed385b90fab9defc37df50a124a15282961dbaa30b62c14d", - urls = ["https://storage.googleapis.com/mediapipe-assets/wasm/audio_wasm_nosimd_internal.js?generation=1698954802474619"], + sha256 = "5a9350fddee447423d1480f3fe8fef30a0d3db510ec337cf2a050976e95df3cc", + urls = ["https://storage.googleapis.com/mediapipe-assets/wasm/audio_wasm_nosimd_internal.js?generation=1702396669287559"], ) http_file( name = "com_google_mediapipe_wasm_audio_wasm_nosimd_internal_wasm", - sha256 = "2fc431cc62330332c0c1e730d44b933a79e4572be0dc5c5a82635bd5dc330b94", - urls = ["https://storage.googleapis.com/mediapipe-assets/wasm/audio_wasm_nosimd_internal.wasm?generation=1698954804758365"], + sha256 = "c178ac0e7de77c2b377370a1b33543b1701e5c415990ec6ebe09a1225dde8614", + urls = ["https://storage.googleapis.com/mediapipe-assets/wasm/audio_wasm_nosimd_internal.wasm?generation=1702396671552140"], ) http_file( name = "com_google_mediapipe_wasm_text_wasm_internal_js", - sha256 = "b100d299cb06c0fd7cf40099653e8d4a3ac953937402a5d7c3a3a02fa59d8105", - urls = ["https://storage.googleapis.com/mediapipe-assets/wasm/text_wasm_internal.js?generation=1698954806886809"], + sha256 = "c718819f63962c094e5922970ecba0c34721aae10697df539fdb98c89922a600", + urls = ["https://storage.googleapis.com/mediapipe-assets/wasm/text_wasm_internal.js?generation=1702396673600705"], ) http_file( name = "com_google_mediapipe_wasm_text_wasm_internal_wasm", - sha256 = "ae1b8f9684b9afa989b1144f25a2ae1bda809c811367475567c823d65d4fef0a", - urls = ["https://storage.googleapis.com/mediapipe-assets/wasm/text_wasm_internal.wasm?generation=1698954809121561"], + sha256 = "5a102965ea9daa73db46f8c3bbb63a777a5ca9b3b2ae740db75d1c1bae453d8d", + urls = ["https://storage.googleapis.com/mediapipe-assets/wasm/text_wasm_internal.wasm?generation=1702396675922697"], ) http_file( name = "com_google_mediapipe_wasm_text_wasm_nosimd_internal_js", - sha256 = "d8db720214acfa1b758099daeb07c02e04b7221805523e9b6926a1f11ec00183", - urls = ["https://storage.googleapis.com/mediapipe-assets/wasm/text_wasm_nosimd_internal.js?generation=1698954811167986"], + sha256 = "ab97bfb473cc1da5167a0ea331dd103cce7c734b1f331c7668bf8c183f691685", + urls = ["https://storage.googleapis.com/mediapipe-assets/wasm/text_wasm_nosimd_internal.js?generation=1702396677878961"], ) http_file( name = "com_google_mediapipe_wasm_text_wasm_nosimd_internal_wasm", - sha256 = "44b8e5be980e6fe79fa9a8b02551ef50e1d74682dd8f3e6cf92435cf43e8ef91", - urls = ["https://storage.googleapis.com/mediapipe-assets/wasm/text_wasm_nosimd_internal.wasm?generation=1698954813498288"], + sha256 = "7a77b71c52c3b30e6f262448ef49311af6a0456daef914373e48669b7cca620b", + urls = ["https://storage.googleapis.com/mediapipe-assets/wasm/text_wasm_nosimd_internal.wasm?generation=1702396680089065"], ) http_file( name = "com_google_mediapipe_wasm_vision_wasm_internal_js", - sha256 = "f5ba7b1d0adad63c581a80113567913a7106b20f8d26982f82c56998c7d44465", - urls = ["https://storage.googleapis.com/mediapipe-assets/wasm/vision_wasm_internal.js?generation=1698954815469471"], + sha256 = "85dbd2eda56023ba3862b38d0e2299d96206be55a92b0fa160ff1742f564d476", + urls = ["https://storage.googleapis.com/mediapipe-assets/wasm/vision_wasm_internal.js?generation=1702396682088028"], ) http_file( name = "com_google_mediapipe_wasm_vision_wasm_internal_wasm", - sha256 = "d502a753b40626a36734806599bf0e765cf3a611653d980b39a5474998f1d6fe", - urls = ["https://storage.googleapis.com/mediapipe-assets/wasm/vision_wasm_internal.wasm?generation=1698954817976682"], + sha256 = "e3945f005e918f1176335b045fb7747eac1856579b077e7c3da5167b42d8a008", + urls = ["https://storage.googleapis.com/mediapipe-assets/wasm/vision_wasm_internal.wasm?generation=1702396684272221"], ) http_file( name = "com_google_mediapipe_wasm_vision_wasm_nosimd_internal_js", - sha256 = "731786df74b19150eecc8fe69ddf16040bbbba8cf2d22c964ef38ecef25d1e1f", - urls = ["https://storage.googleapis.com/mediapipe-assets/wasm/vision_wasm_nosimd_internal.js?generation=1698954819912485"], + sha256 = "a9b94a848af720b2e58156da31d7dd123fd3422c93f2a5ecf70f9ba7ebc99335", + urls = ["https://storage.googleapis.com/mediapipe-assets/wasm/vision_wasm_nosimd_internal.js?generation=1702396686268551"], ) http_file( name = "com_google_mediapipe_wasm_vision_wasm_nosimd_internal_wasm", - sha256 = "2c16ecc52398857c5ce45d58c98fe16e795b6a6eda6a2a8aa00f519a4bd15f2a", - urls = ["https://storage.googleapis.com/mediapipe-assets/wasm/vision_wasm_nosimd_internal.wasm?generation=1698954822497945"], + sha256 = "d832736bedf673d506454a54162bb9330daf149194691997485d8e46aed48f45", + urls = ["https://storage.googleapis.com/mediapipe-assets/wasm/vision_wasm_nosimd_internal.wasm?generation=1702396688641867"], ) From 15f2b32006341da2816ad8ad2121b4f806d3d6d2 Mon Sep 17 00:00:00 2001 From: Sebastian Schmidt Date: Tue, 12 Dec 2023 13:38:51 -0800 Subject: [PATCH 27/39] Expose MediaPipe's ABSL and Sentencepiece as shared dependencies PiperOrigin-RevId: 590320902 --- WORKSPACE | 30 +++----------------------- third_party/BUILD | 5 ++++- third_party/shared_dependencies.bzl | 33 +++++++++++++++++++++++++++++ 3 files changed, 40 insertions(+), 28 deletions(-) create mode 100644 third_party/shared_dependencies.bzl diff --git a/WORKSPACE b/WORKSPACE index 3a539569f..922b2c102 100644 --- a/WORKSPACE +++ b/WORKSPACE @@ -21,21 +21,9 @@ bazel_skylib_workspace() load("@bazel_skylib//lib:versions.bzl", "versions") versions.check(minimum_bazel_version = "3.7.2") -# ABSL cpp library lts_2023_01_25. -http_archive( - name = "com_google_absl", - urls = [ - "https://github.com/abseil/abseil-cpp/archive/refs/tags/20230125.0.tar.gz", - ], - patches = [ - "@//third_party:com_google_absl_windows_patch.diff" - ], - patch_args = [ - "-p1", - ], - strip_prefix = "abseil-cpp-20230125.0", - sha256 = "3ea49a7d97421b88a8c48a0de16c16048e17725c7ec0f1d3ea2683a2a75adc21" -) +load("@//third_party:shared_dependencies.bzl", "mediapipe_absl", "mediapipe_sentencepiece") +mediapipe_absl() +mediapipe_sentencepiece() http_archive( name = "rules_cc", @@ -241,18 +229,6 @@ http_archive( build_file = "@//third_party:pffft.BUILD", ) -# sentencepiece -http_archive( - name = "com_google_sentencepiece", - strip_prefix = "sentencepiece-0.1.96", - sha256 = "8409b0126ebd62b256c685d5757150cf7fcb2b92a2f2b98efb3f38fc36719754", - urls = [ - "https://github.com/google/sentencepiece/archive/refs/tags/v0.1.96.zip" - ], - build_file = "@//third_party:sentencepiece.BUILD", - patches = ["@//third_party:com_google_sentencepiece.diff"], - patch_args = ["-p1"], -) http_archive( name = "darts_clone", diff --git a/third_party/BUILD b/third_party/BUILD index d784fc3cc..3b8608cff 100644 --- a/third_party/BUILD +++ b/third_party/BUILD @@ -20,7 +20,10 @@ licenses(["notice"]) # Apache License 2.0 package(default_visibility = ["//visibility:public"]) -exports_files(["LICENSE"]) +exports_files([ + "LICENSE", + "shared_dependencies.bzl", +]) cc_library( name = "glog", diff --git a/third_party/shared_dependencies.bzl b/third_party/shared_dependencies.bzl new file mode 100644 index 000000000..6cd211a38 --- /dev/null +++ b/third_party/shared_dependencies.bzl @@ -0,0 +1,33 @@ +"""MediaPipe's shared dependencies that can be used by dependent projects. Includes build patches.""" + +load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive") + +# ABSL cpp library lts_2023_01_25. +def mediapipe_absl(): + http_archive( + name = "com_google_absl", + urls = [ + "https://github.com/abseil/abseil-cpp/archive/refs/tags/20230125.0.tar.gz", + ], + patches = [ + "@//third_party:com_google_absl_windows_patch.diff", + ], + patch_args = [ + "-p1", + ], + strip_prefix = "abseil-cpp-20230125.0", + sha256 = "3ea49a7d97421b88a8c48a0de16c16048e17725c7ec0f1d3ea2683a2a75adc21", + ) + +def mediapipe_sentencepiece(): + http_archive( + name = "com_google_sentencepiece", + strip_prefix = "sentencepiece-0.1.96", + sha256 = "8409b0126ebd62b256c685d5757150cf7fcb2b92a2f2b98efb3f38fc36719754", + urls = [ + "https://github.com/google/sentencepiece/archive/refs/tags/v0.1.96.zip", + ], + build_file = "@//third_party:sentencepiece.BUILD", + patches = ["@//third_party:com_google_sentencepiece.diff"], + patch_args = ["-p1"], + ) From 4892209da99158a3aa3c0388d6bfe9e947c12ee4 Mon Sep 17 00:00:00 2001 From: MediaPipe Team Date: Wed, 13 Dec 2023 09:47:13 -0800 Subject: [PATCH 28/39] No public description PiperOrigin-RevId: 590629265 --- mediapipe/framework/formats/BUILD | 64 ++++--- mediapipe/framework/formats/hardware_buffer.h | 167 ++++++++++++++++++ .../formats/hardware_buffer_android.cc | 152 ++++++++++++++++ .../framework/formats/hardware_buffer_test.cc | 131 ++++++++++++++ mediapipe/framework/formats/tensor.cc | 16 +- mediapipe/framework/formats/tensor.h | 17 +- mediapipe/framework/formats/tensor_ahwb.cc | 166 ++++++++--------- .../framework/formats/tensor_ahwb_test.cc | 2 + 8 files changed, 591 insertions(+), 124 deletions(-) create mode 100644 mediapipe/framework/formats/hardware_buffer.h create mode 100644 mediapipe/framework/formats/hardware_buffer_android.cc create mode 100644 mediapipe/framework/formats/hardware_buffer_test.cc diff --git a/mediapipe/framework/formats/BUILD b/mediapipe/framework/formats/BUILD index 047b95d32..9f3874f11 100644 --- a/mediapipe/framework/formats/BUILD +++ b/mediapipe/framework/formats/BUILD @@ -155,6 +155,27 @@ cc_library( ], ) +cc_library( + name = "hardware_buffer", + srcs = ["hardware_buffer_android.cc"], + hdrs = ["hardware_buffer.h"], + linkopts = select({ + "//conditions:default": [], + # Option for vendor binaries to avoid linking libandroid.so. + "//mediapipe/framework:android_no_jni": [], + "//mediapipe:android": ["-landroid"], + ":android_link_native_window": [ + "-lnativewindow", # Provides to vendor binaries on Android API >= 26. + ], + }), + visibility = ["//visibility:private"], + deps = [ + "//mediapipe/framework/port:ret_check", + "//mediapipe/framework/port:statusor", + "@com_google_absl//absl/log:absl_check", + ], +) + cc_library( name = "image_frame", srcs = ["image_frame.cc"], @@ -493,28 +514,31 @@ cc_library( "//conditions:default": [], # Option for vendor binaries to avoid linking libandroid.so. "//mediapipe/framework:android_no_jni": [], - "//mediapipe:android": ["-landroid"], - ":android_link_native_window": [ - "-lnativewindow", # Provides to vendor binaries on Android API >= 26. - ], }), deps = [ - "//mediapipe/framework:port", - "@com_google_absl//absl/container:flat_hash_map", - "@com_google_absl//absl/log:absl_check", - "@com_google_absl//absl/log:absl_log", - "@com_google_absl//absl/memory", - "@com_google_absl//absl/synchronization", - ] + select({ - "//mediapipe/gpu:disable_gpu": [], - "//conditions:default": [ - "//mediapipe/gpu:gl_base", - "//mediapipe/gpu:gl_context", - ], - }) + - select({ - "//conditions:default": [], - }), + "//mediapipe/framework:port", + "@com_google_absl//absl/container:flat_hash_map", + "@com_google_absl//absl/log:absl_check", + "@com_google_absl//absl/log:absl_log", + "@com_google_absl//absl/memory", + "@com_google_absl//absl/synchronization", + ] + select({ + "//mediapipe/gpu:disable_gpu": [], + "//conditions:default": [ + "//mediapipe/gpu:gl_base", + "//mediapipe/gpu:gl_context", + ], + "//mediapipe:android": [ + ":hardware_buffer", + "//mediapipe/gpu:gl_base", + "//mediapipe/gpu:gl_context", + ], + ":android_link_native_window": [ + ":hardware_buffer", + "//mediapipe/gpu:gl_base", + "//mediapipe/gpu:gl_context", + ], + }), ) cc_test( diff --git a/mediapipe/framework/formats/hardware_buffer.h b/mediapipe/framework/formats/hardware_buffer.h new file mode 100644 index 000000000..52180eaa9 --- /dev/null +++ b/mediapipe/framework/formats/hardware_buffer.h @@ -0,0 +1,167 @@ +// Copyright 2019 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef MEDIAPIPE_FRAMEWORK_FORMATS_HARDWARE_BUFFER_H_ +#define MEDIAPIPE_FRAMEWORK_FORMATS_HARDWARE_BUFFER_H_ + +#include +#include +#include +#include + +#include "absl/base/attributes.h" +#include "absl/status/status.h" +#include "absl/status/statusor.h" + +typedef struct AHardwareBuffer AHardwareBuffer; + +namespace mediapipe { + +struct HardwareBufferSpec { + // Buffer pixel formats. See NDK's hardware_buffer.h for descriptions. + enum { + // This must be kept in sync with NDK's hardware_buffer.h + AHARDWAREBUFFER_FORMAT_R8G8B8A8_UNORM = 0x01, + AHARDWAREBUFFER_FORMAT_R8G8B8_UNORM = 0x03, + AHARDWAREBUFFER_FORMAT_R16G16B16A16_FLOAT = 0x16, + AHARDWAREBUFFER_FORMAT_BLOB = 0x21, + AHARDWAREBUFFER_FORMAT_R8_UNORM = 0x38, + }; + + // Buffer usage descriptions. See NDK's hardware_buffer.h for descriptions. + enum { + // This must be kept in sync with NDK's hardware_buffer.h + AHARDWAREBUFFER_USAGE_CPU_READ_NEVER = 0x0UL, + AHARDWAREBUFFER_USAGE_CPU_READ_RARELY = 0x2UL, + AHARDWAREBUFFER_USAGE_CPU_READ_OFTEN = 0x3UL, + AHARDWAREBUFFER_USAGE_CPU_WRITE_NEVER = UINT64_C(0) << 4, + AHARDWAREBUFFER_USAGE_CPU_WRITE_RARELY = UINT64_C(2) << 4, + AHARDWAREBUFFER_USAGE_CPU_WRITE_OFTEN = UINT64_C(3) << 4, + AHARDWAREBUFFER_USAGE_GPU_SAMPLED_IMAGE = UINT64_C(1) << 8, + AHARDWAREBUFFER_USAGE_GPU_FRAMEBUFFER = UINT64_C(1) << 9, + AHARDWAREBUFFER_USAGE_GPU_DATA_BUFFER = UINT64_C(1) << 24, + }; + + // Hashing required to use HardwareBufferSpec as key in buffer pools. See + // absl::Hash for details. + template + friend H AbslHashValue(H h, const HardwareBufferSpec& spec) { + return H::combine(std::move(h), spec.width, spec.height, spec.layers, + spec.format, spec.usage); + } + + uint32_t width = 0; + uint32_t height = 0; + uint32_t layers = 0; + uint32_t format = 0; + uint64_t usage = 0; +}; + +// Equality operators +inline bool operator==(const HardwareBufferSpec& lhs, + const HardwareBufferSpec& rhs) { + return lhs.width == rhs.width && lhs.height == rhs.height && + lhs.layers == rhs.layers && lhs.format == rhs.format && + lhs.usage == rhs.usage; +} +inline bool operator!=(const HardwareBufferSpec& lhs, + const HardwareBufferSpec& rhs) { + return !operator==(lhs, rhs); +} + +// For internal use only. Thinly wraps the Android NDK AHardwareBuffer. +class HardwareBuffer { + public: + // Constructs a HardwareBuffer instance from a newly allocated Android NDK + // AHardwareBuffer. + static absl::StatusOr Create(const HardwareBufferSpec& spec); + + // Destructs the HardwareBuffer, releasing the AHardwareBuffer. + ~HardwareBuffer(); + + // Support HardwareBuffer moves. + HardwareBuffer(HardwareBuffer&& other); + + // Delete assignment and copy constructors. + HardwareBuffer(HardwareBuffer& other) = delete; + HardwareBuffer(const HardwareBuffer& other) = delete; + HardwareBuffer& operator=(const HardwareBuffer&) = delete; + + // Returns true if AHWB is supported. + static bool IsSupported(); + + // Lock the hardware buffer for the given usage flags. fence_file_descriptor + // specifies a fence file descriptor on which to wait before locking the + // buffer. Returns raw memory address if lock is successful, nullptr + // otherwise. + ABSL_MUST_USE_RESULT absl::StatusOr Lock( + uint64_t usage, std::optional fence_file_descriptor = std::nullopt); + + // Unlocks the hardware buffer synchronously. This method blocks until + // unlocking is complete. + absl::Status Unlock(); + + // Unlocks the hardware buffer asynchronously. It returns a file_descriptor + // which can be used as a fence that is signaled once unlocking is complete. + absl::StatusOr UnlockAsync(); + + // Returns the underlying raw AHardwareBuffer pointer to be used directly with + // AHardwareBuffer APIs. + AHardwareBuffer* GetAHardwareBuffer() const { return ahw_buffer_; } + + // Returns whether this HardwareBuffer contains a valid AHardwareBuffer. + bool IsValid() const { return ahw_buffer_ != nullptr; } + + // Returns whether this HardwareBuffer is locked. + bool IsLocked() const { return is_locked_; } + + // Releases the AHardwareBuffer. + void Reset(); + + // Ahwb's are aligned to an implementation specific cacheline size. + uint32_t GetAlignedWidth() const; + + // Returns buffer spec. + const HardwareBufferSpec& spec() const { return spec_; } + + private: + // Allocates an AHardwareBuffer instance; + static absl::StatusOr AllocateAHardwareBuffer( + const HardwareBufferSpec& spec); + + // Constructs a HardwareBuffer instance from an already aquired + // AHardwareBuffer instance and its spec. + HardwareBuffer(const HardwareBufferSpec& spec, AHardwareBuffer* ahwb); + + // Unlocks the hardware buffer. If fence_file_descriptor_ptr is not nullptr, + // the function won't block and instead fence_file_descriptor_ptr will be set + // to a file descriptor to become signaled once unlocking is complete. + absl::Status UnlockInternal(int* fence_file_descriptor_ptr); + + // Releases ahw_buffer_ AHardwareBuffer instance; + absl::Status ReleaseAHardwareBuffer(); + + // Buffer spec. + HardwareBufferSpec spec_ = {}; + + // Android NDK AHardwareBuffer. + AHardwareBuffer* ahw_buffer_ = nullptr; + + // Indicates if AHardwareBuffer is locked for reading or writing. + bool is_locked_ = false; +}; + +} // namespace mediapipe + +#endif // MEDIAPIPE_FRAMEWORK_FORMATS_AHWB_BUFFER_H_ diff --git a/mediapipe/framework/formats/hardware_buffer_android.cc b/mediapipe/framework/formats/hardware_buffer_android.cc new file mode 100644 index 000000000..1df6ad841 --- /dev/null +++ b/mediapipe/framework/formats/hardware_buffer_android.cc @@ -0,0 +1,152 @@ +// Copyright 2019 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#if !defined(MEDIAPIPE_NO_JNI) && \ + (__ANDROID_API__ >= 26 || \ + defined(__ANDROID_UNAVAILABLE_SYMBOLS_ARE_WEAK__)) + +#include + +#include + +#include "absl/log/absl_check.h" +#include "absl/status/status.h" +#include "absl/status/statusor.h" +#include "mediapipe/framework/formats/hardware_buffer.h" +#include "mediapipe/framework/port/ret_check.h" + +namespace mediapipe { + +HardwareBuffer::HardwareBuffer(HardwareBuffer &&other) { + spec_ = std::exchange(other.spec_, {}); + ahw_buffer_ = std::exchange(other.ahw_buffer_, nullptr); + is_locked_ = std::exchange(other.is_locked_, false); +} + +HardwareBuffer::HardwareBuffer(const HardwareBufferSpec &spec, + AHardwareBuffer *ahwb) + : spec_(spec), ahw_buffer_(ahwb), is_locked_(false) {} + +HardwareBuffer::~HardwareBuffer() { Reset(); } + +absl::StatusOr HardwareBuffer::Create( + const HardwareBufferSpec &spec) { + MP_ASSIGN_OR_RETURN(AHardwareBuffer * ahwb, AllocateAHardwareBuffer(spec)); + return HardwareBuffer(spec, ahwb); +} + +bool HardwareBuffer::IsSupported() { + if (__builtin_available(android 26, *)) { + return true; + } + return false; +} + +absl::StatusOr HardwareBuffer::AllocateAHardwareBuffer( + const HardwareBufferSpec &spec) { + RET_CHECK(IsSupported()) << "AndroidHWBuffers not supported"; + + AHardwareBuffer *output = nullptr; + int error = 0; + if (__builtin_available(android 26, *)) { + AHardwareBuffer_Desc desc = { + .width = spec.width, + .height = spec.height, + .layers = spec.layers, + .format = spec.format, + .usage = spec.usage, + }; + error = AHardwareBuffer_allocate(&desc, &output); + } + RET_CHECK(!error && output != nullptr) << "AHardwareBuffer_allocate failed"; + return output; +} + +absl::Status HardwareBuffer::ReleaseAHardwareBuffer() { + if (ahw_buffer_ == nullptr) { + return absl::OkStatus(); + } + if (is_locked_) { + MP_RETURN_IF_ERROR(Unlock()); + } + if (__builtin_available(android 26, *)) { + AHardwareBuffer_release(ahw_buffer_); + } + spec_ = {}; + ahw_buffer_ = nullptr; + return absl::OkStatus(); +} + +absl::StatusOr HardwareBuffer::Lock( + uint64_t usage, std::optional fence_file_descriptor) { + RET_CHECK(ahw_buffer_ != nullptr) << "Hardware Buffer not allocated"; + RET_CHECK(!is_locked_) << "Hardware Buffer already locked"; + void *mem = nullptr; + if (__builtin_available(android 26, *)) { + const int error = AHardwareBuffer_lock( + ahw_buffer_, usage, + fence_file_descriptor.has_value() ? *fence_file_descriptor : -1, + nullptr, &mem); + RET_CHECK(error == 0) << "Hardware Buffer lock failed. Error: " << error; + } + is_locked_ = true; + return mem; +} + +absl::Status HardwareBuffer::Unlock() { + return UnlockInternal(/*fence_file_descriptor=*/nullptr); +} + +absl::StatusOr HardwareBuffer::UnlockAsync() { + int fence_file_descriptor = -1; + MP_RETURN_IF_ERROR(UnlockInternal(&fence_file_descriptor)); + return fence_file_descriptor; +} + +absl::Status HardwareBuffer::UnlockInternal(int *fence_file_descriptor) { + RET_CHECK(ahw_buffer_ != nullptr) << "Hardware Buffer not allocated"; + if (!is_locked_) { + return absl::OkStatus(); + } + if (__builtin_available(android 26, *)) { + const int error = + AHardwareBuffer_unlock(ahw_buffer_, fence_file_descriptor); + RET_CHECK(error == 0) << "Hardware Buffer unlock failed. error: " << error; + } + is_locked_ = false; + return absl::OkStatus(); +} + +uint32_t HardwareBuffer::GetAlignedWidth() const { + if (__builtin_available(android 26, *)) { + ABSL_CHECK(ahw_buffer_ != nullptr) << "Hardware Buffer not allocated"; + AHardwareBuffer_Desc desc = {}; + AHardwareBuffer_describe(ahw_buffer_, &desc); + ABSL_CHECK_GT(desc.stride, 0); + return desc.stride; + } + return 0; +} + +void HardwareBuffer::Reset() { + const auto success = ReleaseAHardwareBuffer(); + if (!success.ok()) { + ABSL_LOG(DFATAL) << "Failed to release AHardwareBuffer: " << success; + } +} + +} // namespace mediapipe + +#endif // !defined(MEDIAPIPE_NO_JNI) && (__ANDROID_API__>= 26 || + // defined(__ANDROID_UNAVAILABLE_SYMBOLS_ARE_WEAK__)) diff --git a/mediapipe/framework/formats/hardware_buffer_test.cc b/mediapipe/framework/formats/hardware_buffer_test.cc new file mode 100644 index 000000000..9518fbc08 --- /dev/null +++ b/mediapipe/framework/formats/hardware_buffer_test.cc @@ -0,0 +1,131 @@ +#include "mediapipe/framework/formats/hardware_buffer.h" + +#include + +#include + +#include "base/logging.h" +#include "mediapipe/framework/port/status_macros.h" +#include "testing/base/public/gmock.h" +#include "testing/base/public/gunit.h" + +namespace mediapipe { + +namespace { + +HardwareBufferSpec GetTestHardwareBufferSpec(uint32_t size_bytes) { + return {.width = size_bytes, + .height = 1, + .layers = 1, + .format = HardwareBufferSpec::AHARDWAREBUFFER_FORMAT_BLOB, + .usage = HardwareBufferSpec::AHARDWAREBUFFER_USAGE_CPU_WRITE_RARELY | + HardwareBufferSpec::AHARDWAREBUFFER_USAGE_CPU_WRITE_OFTEN | + HardwareBufferSpec::AHARDWAREBUFFER_USAGE_CPU_READ_OFTEN | + HardwareBufferSpec::AHARDWAREBUFFER_USAGE_GPU_DATA_BUFFER}; +} + +TEST(HardwareBufferTest, ShouldConstructValidAHardwareBuffer) { + MP_ASSERT_OK_AND_ASSIGN( + HardwareBuffer hardware_buffer, + HardwareBuffer::Create(GetTestHardwareBufferSpec(/*size_bytes=*/123))); + EXPECT_NE(hardware_buffer.GetAHardwareBuffer(), nullptr); + EXPECT_TRUE(hardware_buffer.IsValid()); +} + +TEST(HardwareBufferTest, ShouldResetValidAHardwareBuffer) { + MP_ASSERT_OK_AND_ASSIGN( + HardwareBuffer hardware_buffer, + HardwareBuffer::Create(GetTestHardwareBufferSpec(/*size_bytes=*/123))); + EXPECT_TRUE(hardware_buffer.IsValid()); + EXPECT_NE(*hardware_buffer.Lock( + HardwareBufferSpec::AHARDWAREBUFFER_USAGE_CPU_WRITE_RARELY), + nullptr); + EXPECT_TRUE(hardware_buffer.IsLocked()); + + hardware_buffer.Reset(); + + EXPECT_FALSE(hardware_buffer.IsValid()); + EXPECT_FALSE(hardware_buffer.IsLocked()); +} + +TEST(HardwareBufferTest, ShouldAllocateRequestedBufferSize) { + constexpr int kBufferSize = 123; + const HardwareBufferSpec spec = GetTestHardwareBufferSpec(kBufferSize); + MP_ASSERT_OK_AND_ASSIGN(HardwareBuffer hardware_buffer, + HardwareBuffer::Create(spec)); + + EXPECT_TRUE(hardware_buffer.IsValid()); + if (__builtin_available(android 26, *)) { + AHardwareBuffer_Desc desc; + AHardwareBuffer_describe(hardware_buffer.GetAHardwareBuffer(), &desc); + EXPECT_EQ(desc.width, spec.width); + EXPECT_EQ(desc.height, spec.height); + EXPECT_EQ(desc.layers, spec.layers); + EXPECT_EQ(desc.format, spec.format); + EXPECT_EQ(desc.usage, spec.usage); + } + EXPECT_EQ(hardware_buffer.spec().width, spec.width); + EXPECT_EQ(hardware_buffer.spec().height, spec.height); + EXPECT_EQ(hardware_buffer.spec().layers, spec.layers); + EXPECT_EQ(hardware_buffer.spec().format, spec.format); + EXPECT_EQ(hardware_buffer.spec().usage, spec.usage); +} + +TEST(HardwareBufferTest, ShouldSupportMoveConstructor) { + constexpr int kBufferSize = 123; + const auto spec = GetTestHardwareBufferSpec(kBufferSize); + MP_ASSERT_OK_AND_ASSIGN(HardwareBuffer hardware_buffer_a, + HardwareBuffer::Create(spec)); + EXPECT_TRUE(hardware_buffer_a.IsValid()); + void* const ahardware_buffer_ptr_a = hardware_buffer_a.GetAHardwareBuffer(); + EXPECT_NE(ahardware_buffer_ptr_a, nullptr); + EXPECT_FALSE(hardware_buffer_a.IsLocked()); + MP_ASSERT_OK_AND_ASSIGN( + void* const hardware_buffer_a_locked_ptr, + hardware_buffer_a.Lock( + HardwareBufferSpec::AHARDWAREBUFFER_USAGE_CPU_WRITE_RARELY)); + EXPECT_NE(hardware_buffer_a_locked_ptr, nullptr); + EXPECT_TRUE(hardware_buffer_a.IsLocked()); + + HardwareBuffer hardware_buffer_b(std::move(hardware_buffer_a)); + + EXPECT_FALSE(hardware_buffer_a.IsValid()); + EXPECT_FALSE(hardware_buffer_a.IsLocked()); + void* const ahardware_buffer_ptr_b = hardware_buffer_b.GetAHardwareBuffer(); + EXPECT_EQ(ahardware_buffer_ptr_a, ahardware_buffer_ptr_b); + EXPECT_TRUE(hardware_buffer_b.IsValid()); + EXPECT_TRUE(hardware_buffer_b.IsLocked()); + + EXPECT_EQ(hardware_buffer_a.spec(), HardwareBufferSpec()); + EXPECT_EQ(hardware_buffer_b.spec(), spec); + + MP_ASSERT_OK(hardware_buffer_b.Unlock()); +} + +TEST(HardwareBufferTest, ShouldSupportReadWrite) { + constexpr std::string_view kTestString = "TestString"; + constexpr int kBufferSize = kTestString.size(); + MP_ASSERT_OK_AND_ASSIGN( + HardwareBuffer hardware_buffer, + HardwareBuffer::Create(GetTestHardwareBufferSpec(kBufferSize))); + + // Write test string. + MP_ASSERT_OK_AND_ASSIGN( + void* const write_ptr, + hardware_buffer.Lock( + HardwareBufferSpec::AHARDWAREBUFFER_USAGE_CPU_WRITE_RARELY)); + memcpy(write_ptr, kTestString.data(), kBufferSize); + MP_ASSERT_OK(hardware_buffer.Unlock()); + + // Read test string. + MP_ASSERT_OK_AND_ASSIGN( + void* const read_ptr, + hardware_buffer.Lock( + HardwareBufferSpec::AHARDWAREBUFFER_USAGE_CPU_READ_RARELY)); + EXPECT_EQ(memcmp(read_ptr, kTestString.data(), kBufferSize), 0); + MP_ASSERT_OK(hardware_buffer.Unlock()); +} + +} // namespace + +} // namespace mediapipe diff --git a/mediapipe/framework/formats/tensor.cc b/mediapipe/framework/formats/tensor.cc index 2f2bfaae4..49987791a 100644 --- a/mediapipe/framework/formats/tensor.cc +++ b/mediapipe/framework/formats/tensor.cc @@ -24,6 +24,9 @@ #if MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_30 #include "mediapipe/gpu/gl_base.h" #endif // MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_30 +#ifdef MEDIAPIPE_TENSOR_USE_AHWB +#include "mediapipe/framework/formats/hardware_buffer.h" +#endif // MEDIAPIPE_TENSOR_USE_AHWB #if MEDIAPIPE_METAL_ENABLED #import @@ -536,9 +539,8 @@ Tensor::CpuReadView Tensor::GetCpuReadView() const { void* ptr = MapAhwbToCpuRead(); if (ptr) { valid_ |= kValidCpu; - return {ptr, std::move(lock), [ahwb = ahwb_] { - auto error = AHardwareBuffer_unlock(ahwb, nullptr); - ABSL_CHECK(error == 0) << "AHardwareBuffer_unlock " << error; + return {ptr, std::move(lock), [ahwb = ahwb_.get()] { + ABSL_CHECK_OK(ahwb->Unlock()) << "Unlock failed."; }}; } } @@ -620,9 +622,11 @@ Tensor::CpuWriteView Tensor::GetCpuWriteView( if (__builtin_available(android 26, *)) { void* ptr = MapAhwbToCpuWrite(); if (ptr) { - return {ptr, std::move(lock), [ahwb = ahwb_, fence_fd = &fence_fd_] { - auto error = AHardwareBuffer_unlock(ahwb, fence_fd); - ABSL_CHECK(error == 0) << "AHardwareBuffer_unlock " << error; + return {ptr, std::move(lock), + [ahwb = ahwb_.get(), fence_fd = &fence_fd_] { + auto fence_fd_status = ahwb->UnlockAsync(); + ABSL_CHECK_OK(fence_fd_status) << "Unlock failed."; + *fence_fd = fence_fd_status.value(); }}; } } diff --git a/mediapipe/framework/formats/tensor.h b/mediapipe/framework/formats/tensor.h index 701707ded..361883a67 100644 --- a/mediapipe/framework/formats/tensor.h +++ b/mediapipe/framework/formats/tensor.h @@ -44,7 +44,8 @@ #ifdef MEDIAPIPE_TENSOR_USE_AHWB #include #include -#include + +#include "mediapipe/framework/formats/hardware_buffer.h" #endif // MEDIAPIPE_TENSOR_USE_AHWB #if MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_30 #include "mediapipe/gpu/gl_base.h" @@ -195,9 +196,11 @@ class Tensor { using FinishingFunc = std::function; class AHardwareBufferView : public View { public: - AHardwareBuffer* handle() const { return handle_; } + AHardwareBuffer* handle() const { + return hardware_buffer_->GetAHardwareBuffer(); + } AHardwareBufferView(AHardwareBufferView&& src) : View(std::move(src)) { - handle_ = std::exchange(src.handle_, nullptr); + hardware_buffer_ = std::move(src.hardware_buffer_); file_descriptor_ = src.file_descriptor_; fence_fd_ = std::exchange(src.fence_fd_, nullptr); ahwb_written_ = std::exchange(src.ahwb_written_, nullptr); @@ -222,17 +225,17 @@ class Tensor { protected: friend class Tensor; - AHardwareBufferView(AHardwareBuffer* handle, int file_descriptor, + AHardwareBufferView(HardwareBuffer* hardware_buffer, int file_descriptor, int* fence_fd, FinishingFunc* ahwb_written, std::function* release_callback, std::unique_ptr&& lock) : View(std::move(lock)), - handle_(handle), + hardware_buffer_(hardware_buffer), file_descriptor_(file_descriptor), fence_fd_(fence_fd), ahwb_written_(ahwb_written), release_callback_(release_callback) {} - AHardwareBuffer* handle_; + HardwareBuffer* hardware_buffer_; int file_descriptor_; // The view sets some Tensor's fields. The view is released prior to tensor. int* fence_fd_; @@ -384,7 +387,7 @@ class Tensor { mutable std::unique_ptr mtl_resources_; #ifdef MEDIAPIPE_TENSOR_USE_AHWB - mutable AHardwareBuffer* ahwb_ = nullptr; + mutable std::unique_ptr ahwb_; // Signals when GPU finished writing into SSBO so AHWB can be used then. Or // signals when writing into AHWB has been finished so GPU can read from SSBO. // Sync and FD are bound together. diff --git a/mediapipe/framework/formats/tensor_ahwb.cc b/mediapipe/framework/formats/tensor_ahwb.cc index 339148e94..05c1e4b6e 100644 --- a/mediapipe/framework/formats/tensor_ahwb.cc +++ b/mediapipe/framework/formats/tensor_ahwb.cc @@ -10,7 +10,7 @@ #include "absl/log/absl_check.h" #include "absl/log/absl_log.h" #include "absl/synchronization/mutex.h" -#include "mediapipe/framework/port.h" +#include "mediapipe/framework/formats/hardware_buffer.h" #include "mediapipe/gpu/gl_base.h" #endif // MEDIAPIPE_TENSOR_USE_AHWB @@ -97,7 +97,7 @@ class DelayedReleaser { DelayedReleaser(DelayedReleaser&&) = delete; DelayedReleaser& operator=(DelayedReleaser&&) = delete; - static void Add(AHardwareBuffer* ahwb, GLuint opengl_buffer, + static void Add(std::unique_ptr ahwb, GLuint opengl_buffer, EGLSyncKHR ssbo_sync, GLsync ssbo_read, Tensor::FinishingFunc&& ahwb_written, std::shared_ptr gl_context, @@ -115,8 +115,8 @@ class DelayedReleaser { // Using `new` to access a non-public constructor. to_release_local.emplace_back(absl::WrapUnique(new DelayedReleaser( - ahwb, opengl_buffer, ssbo_sync, ssbo_read, std::move(ahwb_written), - gl_context, std::move(callback)))); + std::move(ahwb), opengl_buffer, ssbo_sync, ssbo_read, + std::move(ahwb_written), gl_context, std::move(callback)))); for (auto it = to_release_local.begin(); it != to_release_local.end();) { if ((*it)->IsSignaled()) { it = to_release_local.erase(it); @@ -136,9 +136,6 @@ class DelayedReleaser { ~DelayedReleaser() { if (release_callback_) release_callback_(); - if (__builtin_available(android 26, *)) { - AHardwareBuffer_release(ahwb_); - } } bool IsSignaled() { @@ -181,7 +178,7 @@ class DelayedReleaser { } protected: - AHardwareBuffer* ahwb_; + std::unique_ptr ahwb_; GLuint opengl_buffer_; // TODO: use wrapper instead. EGLSyncKHR fence_sync_; @@ -192,12 +189,12 @@ class DelayedReleaser { std::function release_callback_; static inline std::deque> to_release_; - DelayedReleaser(AHardwareBuffer* ahwb, GLuint opengl_buffer, + DelayedReleaser(std::unique_ptr ahwb, GLuint opengl_buffer, EGLSyncKHR fence_sync, GLsync ssbo_read, Tensor::FinishingFunc&& ahwb_written, std::shared_ptr gl_context, std::function&& callback) - : ahwb_(ahwb), + : ahwb_(std::move(ahwb)), opengl_buffer_(opengl_buffer), fence_sync_(fence_sync), ssbo_read_(ssbo_read), @@ -214,7 +211,7 @@ Tensor::AHardwareBufferView Tensor::GetAHardwareBufferReadView() const { ABSL_CHECK(!(valid_ & kValidOpenGlTexture2d)) << "Tensor conversion between OpenGL texture and AHardwareBuffer is not " "supported."; - bool transfer = !ahwb_; + bool transfer = ahwb_ == nullptr; ABSL_CHECK(AllocateAHardwareBuffer()) << "AHardwareBuffer is not supported on the target system."; valid_ |= kValidAHardwareBuffer; @@ -223,12 +220,10 @@ Tensor::AHardwareBufferView Tensor::GetAHardwareBufferReadView() const { } else { if (valid_ & kValidOpenGlBuffer) CreateEglSyncAndFd(); } - return {ahwb_, - ssbo_written_, + return {ahwb_.get(), ssbo_written_, &fence_fd_, // The FD is created for SSBO -> AHWB synchronization. &ahwb_written_, // Filled by SetReadingFinishedFunc. - &release_callback_, - std::move(lock)}; + &release_callback_, std::move(lock)}; } void Tensor::CreateEglSyncAndFd() const { @@ -258,12 +253,11 @@ Tensor::AHardwareBufferView Tensor::GetAHardwareBufferWriteView( ABSL_CHECK(AllocateAHardwareBuffer(size_alignment)) << "AHardwareBuffer is not supported on the target system."; valid_ = kValidAHardwareBuffer; - return {ahwb_, + return {ahwb_.get(), /*ssbo_written=*/-1, - &fence_fd_, // For SetWritingFinishedFD. - &ahwb_written_, - &release_callback_, - std::move(lock)}; + &fence_fd_, // For SetWritingFinishedFD. + &ahwb_written_, // Filled by SetReadingFinishedFunc. + &release_callback_, std::move(lock)}; } bool Tensor::AllocateAHardwareBuffer(int size_alignment) const { @@ -276,40 +270,43 @@ bool Tensor::AllocateAHardwareBuffer(int size_alignment) const { } use_ahwb_ = true; - if (__builtin_available(android 26, *)) { - if (ahwb_ == nullptr) { - AHardwareBuffer_Desc desc = {}; - if (size_alignment == 0) { - desc.width = bytes(); - } else { - // We expect allocations to be page-aligned, implicitly satisfying any - // requirements from Edge TPU. No need to add a check for this, - // since Edge TPU will check for us. - desc.width = AlignedToPowerOf2(bytes(), size_alignment); - } - desc.height = 1; - desc.layers = 1; - desc.format = AHARDWAREBUFFER_FORMAT_BLOB; - desc.usage = AHARDWAREBUFFER_USAGE_CPU_WRITE_OFTEN | - AHARDWAREBUFFER_USAGE_CPU_READ_OFTEN | - AHARDWAREBUFFER_USAGE_GPU_DATA_BUFFER; - return AHardwareBuffer_allocate(&desc, &ahwb_) == 0; + if (ahwb_ == nullptr) { + HardwareBufferSpec spec = {}; + if (size_alignment == 0) { + spec.width = bytes(); + } else { + // We expect allocations to be page-aligned, implicitly satisfying any + // requirements from Edge TPU. No need to add a check for this, + // since Edge TPU will check for us. + spec.width = AlignedToPowerOf2(bytes(), size_alignment); } - return true; + spec.height = 1; + spec.layers = 1; + spec.format = HardwareBufferSpec::AHARDWAREBUFFER_FORMAT_BLOB; + spec.usage = HardwareBufferSpec::AHARDWAREBUFFER_USAGE_CPU_WRITE_OFTEN | + HardwareBufferSpec::AHARDWAREBUFFER_USAGE_CPU_READ_OFTEN | + HardwareBufferSpec::AHARDWAREBUFFER_USAGE_GPU_DATA_BUFFER; + auto new_ahwb = HardwareBuffer::Create(spec); + if (!new_ahwb.ok()) { + ABSL_LOG(ERROR) << "Allocation of NDK Hardware Buffer failed: " + << new_ahwb.status(); + return false; + } + ahwb_ = std::make_unique(std::move(*new_ahwb)); } - return false; + return true; } bool Tensor::AllocateAhwbMapToSsbo() const { if (__builtin_available(android 26, *)) { if (AllocateAHardwareBuffer()) { - if (MapAHardwareBufferToGlBuffer(ahwb_, bytes()).ok()) { + if (MapAHardwareBufferToGlBuffer(ahwb_->GetAHardwareBuffer(), bytes()) + .ok()) { glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0); return true; } // Unable to make OpenGL <-> AHWB binding. Use regular SSBO instead. - AHardwareBuffer_release(ahwb_); - ahwb_ = nullptr; + ahwb_.reset(); } } return false; @@ -317,14 +314,11 @@ bool Tensor::AllocateAhwbMapToSsbo() const { // Moves Cpu/Ssbo resource under the Ahwb backed memory. void Tensor::MoveCpuOrSsboToAhwb() const { - void* dest = nullptr; - if (__builtin_available(android 26, *)) { - auto error = AHardwareBuffer_lock( - ahwb_, AHARDWAREBUFFER_USAGE_CPU_WRITE_RARELY, -1, nullptr, &dest); - ABSL_CHECK(error == 0) << "AHardwareBuffer_lock " << error; - } + auto dest = + ahwb_->Lock(HardwareBufferSpec::AHARDWAREBUFFER_USAGE_CPU_WRITE_RARELY); + ABSL_CHECK_OK(dest) << "Lock of AHWB failed"; if (valid_ & kValidCpu) { - std::memcpy(dest, cpu_buffer_, bytes()); + std::memcpy(*dest, cpu_buffer_, bytes()); // Free CPU memory because next time AHWB is mapped instead. free(cpu_buffer_); cpu_buffer_ = nullptr; @@ -334,7 +328,7 @@ void Tensor::MoveCpuOrSsboToAhwb() const { glBindBuffer(GL_SHADER_STORAGE_BUFFER, opengl_buffer_); const void* src = glMapBufferRange(GL_SHADER_STORAGE_BUFFER, 0, bytes(), GL_MAP_READ_BIT); - std::memcpy(dest, src, bytes()); + std::memcpy(*dest, src, bytes()); glUnmapBuffer(GL_SHADER_STORAGE_BUFFER); glDeleteBuffers(1, &opengl_buffer_); }); @@ -347,10 +341,7 @@ void Tensor::MoveCpuOrSsboToAhwb() const { ABSL_LOG(FATAL) << "Can't convert tensor with mask " << valid_ << " into AHWB."; } - if (__builtin_available(android 26, *)) { - auto error = AHardwareBuffer_unlock(ahwb_, nullptr); - ABSL_CHECK(error == 0) << "AHardwareBuffer_unlock " << error; - } + ABSL_CHECK_OK(ahwb_->Unlock()) << "Unlock of AHWB failed"; } // SSBO is created on top of AHWB. A fence is inserted into the GPU queue before @@ -403,59 +394,52 @@ void Tensor::ReleaseAhwbStuff() { if (ahwb_) { if (ssbo_read_ != 0 || fence_sync_ != EGL_NO_SYNC_KHR || ahwb_written_) { if (ssbo_written_ != -1) close(ssbo_written_); - DelayedReleaser::Add(ahwb_, opengl_buffer_, fence_sync_, ssbo_read_, - std::move(ahwb_written_), gl_context_, + DelayedReleaser::Add(std::move(ahwb_), opengl_buffer_, fence_sync_, + ssbo_read_, std::move(ahwb_written_), gl_context_, std::move(release_callback_)); opengl_buffer_ = GL_INVALID_INDEX; } else { if (release_callback_) release_callback_(); - AHardwareBuffer_release(ahwb_); + ahwb_.reset(); } } } } void* Tensor::MapAhwbToCpuRead() const { - if (__builtin_available(android 26, *)) { - if (ahwb_) { - if (!(valid_ & kValidCpu)) { - if ((valid_ & kValidOpenGlBuffer) && ssbo_written_ == -1) { - // EGLSync is failed. Use another synchronization method. - // TODO: Use tflite::gpu::GlBufferSync and GlActiveSync. - gl_context_->Run([]() { glFinish(); }); - } else if (valid_ & kValidAHardwareBuffer) { - ABSL_CHECK(ahwb_written_) - << "Ahwb-to-Cpu synchronization requires the " - "completion function to be set"; - ABSL_CHECK(ahwb_written_(true)) - << "An error oqcured while waiting for the buffer to be written"; - } + if (ahwb_ != nullptr) { + if (!(valid_ & kValidCpu)) { + if ((valid_ & kValidOpenGlBuffer) && ssbo_written_ == -1) { + // EGLSync is failed. Use another synchronization method. + // TODO: Use tflite::gpu::GlBufferSync and GlActiveSync. + gl_context_->Run([]() { glFinish(); }); + } else if (valid_ & kValidAHardwareBuffer) { + ABSL_CHECK(ahwb_written_) << "Ahwb-to-Cpu synchronization requires the " + "completion function to be set"; + ABSL_CHECK(ahwb_written_(true)) + << "An error oqcured while waiting for the buffer to be written"; } - void* ptr; - auto error = - AHardwareBuffer_lock(ahwb_, AHARDWAREBUFFER_USAGE_CPU_READ_OFTEN, - ssbo_written_, nullptr, &ptr); - ABSL_CHECK(error == 0) << "AHardwareBuffer_lock " << error; - close(ssbo_written_); - ssbo_written_ = -1; - return ptr; } + auto ptr = + ahwb_->Lock(HardwareBufferSpec::AHARDWAREBUFFER_USAGE_CPU_READ_OFTEN, + ssbo_written_); + ABSL_CHECK_OK(ptr) << "Lock of AHWB failed"; + close(ssbo_written_); + ssbo_written_ = -1; + return *ptr; } return nullptr; } void* Tensor::MapAhwbToCpuWrite() const { - if (__builtin_available(android 26, *)) { - if (ahwb_) { - // TODO: If previously acquired view is GPU write view then need - // to be sure that writing is finished. That's a warning: two consequent - // write views should be interleaved with read view. - void* ptr; - auto error = AHardwareBuffer_lock( - ahwb_, AHARDWAREBUFFER_USAGE_CPU_WRITE_OFTEN, -1, nullptr, &ptr); - ABSL_CHECK(error == 0) << "AHardwareBuffer_lock " << error; - return ptr; - } + if (ahwb_ != nullptr) { + // TODO: If previously acquired view is GPU write view then need + // to be sure that writing is finished. That's a warning: two consequent + // write views should be interleaved with read view. + auto locked_ptr = + ahwb_->Lock(HardwareBufferSpec::AHARDWAREBUFFER_USAGE_CPU_WRITE_OFTEN); + ABSL_CHECK_OK(locked_ptr) << "Lock of AHWB failed"; + return *locked_ptr; } return nullptr; } diff --git a/mediapipe/framework/formats/tensor_ahwb_test.cc b/mediapipe/framework/formats/tensor_ahwb_test.cc index 69e49dd58..d23ee7071 100644 --- a/mediapipe/framework/formats/tensor_ahwb_test.cc +++ b/mediapipe/framework/formats/tensor_ahwb_test.cc @@ -1,3 +1,5 @@ +#include + #include "mediapipe/framework/formats/tensor.h" #include "testing/base/public/gmock.h" #include "testing/base/public/gunit.h" From 4e89de69a6f008c619fab3b7dd120e679ff5894c Mon Sep 17 00:00:00 2001 From: MediaPipe Team Date: Wed, 13 Dec 2023 11:15:25 -0800 Subject: [PATCH 29/39] Version increment in version.bzl PiperOrigin-RevId: 590658960 --- version.bzl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/version.bzl b/version.bzl index 24048e1a2..acd810505 100644 --- a/version.bzl +++ b/version.bzl @@ -2,4 +2,4 @@ # The next version of MediaPipe (e.g. the version that is currently in development). # This version should be bumped after every release. -MEDIAPIPE_FULL_VERSION = "0.10.9" +MEDIAPIPE_FULL_VERSION = "0.10.10" From 6fab3a8b85c66dfa44723116dca27414c47d6ed3 Mon Sep 17 00:00:00 2001 From: Kinar Date: Wed, 13 Dec 2023 19:59:36 -0800 Subject: [PATCH 30/39] Simplified copying data from Eigen matrix to C-style matrix and addressed some issues --- .../components/containers/matrix_converter.cc | 12 +++++---- .../face_landmarker/face_landmarker_result.h | 14 +++++----- .../face_landmarker/face_landmarker_test.cc | 26 +++++++++---------- .../hand_landmarker/hand_landmarker_test.cc | 16 +++++++----- 4 files changed, 36 insertions(+), 32 deletions(-) diff --git a/mediapipe/tasks/c/components/containers/matrix_converter.cc b/mediapipe/tasks/c/components/containers/matrix_converter.cc index 6d823a424..9320178cb 100644 --- a/mediapipe/tasks/c/components/containers/matrix_converter.cc +++ b/mediapipe/tasks/c/components/containers/matrix_converter.cc @@ -27,11 +27,13 @@ void CppConvertToMatrix(const Eigen::MatrixXf& in, ::Matrix* out) { out->cols = in.cols(); out->data = new float[out->rows * out->cols]; - // Copy data from Eigen matrix to C matrix in column-major order - for (int col = 0; col < out->cols; ++col) { - for (int row = 0; row < out->rows; ++row) { - out->data[col * out->rows + row] = in(row, col); - } + // Copy data from Eigen matrix to C-style matrix. + // This operation copies the elements sequentially as they appear in the Eigen + // matrix's internal storage, regardless of whether it's stored in row-major + // or column-major order and ensures the integrity of data during the + // transfer. + for (int i = 0; i < out->rows * out->cols; ++i) { + out->data[i] = in.data()[i]; } } diff --git a/mediapipe/tasks/c/vision/face_landmarker/face_landmarker_result.h b/mediapipe/tasks/c/vision/face_landmarker/face_landmarker_result.h index 27d698d13..0d86b7956 100644 --- a/mediapipe/tasks/c/vision/face_landmarker/face_landmarker_result.h +++ b/mediapipe/tasks/c/vision/face_landmarker/face_landmarker_result.h @@ -33,19 +33,19 @@ extern "C" { // The hand landmarker result from HandLandmarker, where each vector // element represents a single hand detected in the image. struct FaceLandmarkerResult { - // Optional face blendshapes results. - struct Categories* face_blendshapes; - - // The number of elements in the face_blendshapes array. - uint32_t face_blendshapes_count; - // Detected face landmarks in normalized image coordinates. struct NormalizedLandmarks* face_landmarks; // The number of elements in the face_landmarks array. uint32_t face_landmarks_count; - // Optional facial transformation matrix. + // Optional face blendshapes results. + struct Categories* face_blendshapes; + + // The number of elements in the face_blendshapes array. + uint32_t face_blendshapes_count; + + // Optional facial transformation matrixes. struct Matrix* facial_transformation_matrixes; // The number of elements in the facial_transformation_matrixes array. diff --git a/mediapipe/tasks/c/vision/face_landmarker/face_landmarker_test.cc b/mediapipe/tasks/c/vision/face_landmarker/face_landmarker_test.cc index d96d7f32a..fb2e1edff 100644 --- a/mediapipe/tasks/c/vision/face_landmarker/face_landmarker_test.cc +++ b/mediapipe/tasks/c/vision/face_landmarker/face_landmarker_test.cc @@ -48,10 +48,10 @@ std::string GetFullPath(absl::string_view file_name) { return JoinPath("./", kTestDataDirectory, file_name); } -void MatchesFaceLandmarkerResult(FaceLandmarkerResult* result, - const float blendshapes_precision, - const float landmark_precision, - const float matrix_precison) { +void ExpectFaceLandmarkerResultCorrect(FaceLandmarkerResult* result, + const float blendshapes_precision, + const float landmark_precision, + const float matrix_precison) { // Expects to have the same number of faces detected. EXPECT_EQ(result->face_blendshapes_count, 1); @@ -115,9 +115,9 @@ TEST(FaceLandmarkerTest, ImageModeTest) { FaceLandmarkerResult result; face_landmarker_detect_image(landmarker, mp_image, &result, /* error_msg */ nullptr); - MatchesFaceLandmarkerResult(&result, kBlendshapesPrecision, - kLandmarksPrecision, - kFacialTransformationMatrixPrecision); + ExpectFaceLandmarkerResultCorrect(&result, kBlendshapesPrecision, + kLandmarksPrecision, + kFacialTransformationMatrixPrecision); face_landmarker_close_result(&result); face_landmarker_close(landmarker, /* error_msg */ nullptr); } @@ -157,9 +157,9 @@ TEST(FaceLandmarkerTest, VideoModeTest) { face_landmarker_detect_for_video(landmarker, mp_image, i, &result, /* error_msg */ nullptr); - MatchesFaceLandmarkerResult(&result, kBlendshapesPrecision, - kLandmarksPrecision, - kFacialTransformationMatrixPrecision); + ExpectFaceLandmarkerResultCorrect(&result, kBlendshapesPrecision, + kLandmarksPrecision, + kFacialTransformationMatrixPrecision); face_landmarker_close_result(&result); } face_landmarker_close(landmarker, /* error_msg */ nullptr); @@ -176,9 +176,9 @@ struct LiveStreamModeCallback { int64_t timestamp, char* error_msg) { ASSERT_NE(landmarker_result, nullptr); ASSERT_EQ(error_msg, nullptr); - MatchesFaceLandmarkerResult(landmarker_result, kBlendshapesPrecision, - kLandmarksPrecision, - kFacialTransformationMatrixPrecision); + ExpectFaceLandmarkerResultCorrect(landmarker_result, kBlendshapesPrecision, + kLandmarksPrecision, + kFacialTransformationMatrixPrecision); EXPECT_GT(image.image_frame.width, 0); EXPECT_GT(image.image_frame.height, 0); EXPECT_GT(timestamp, last_timestamp); diff --git a/mediapipe/tasks/c/vision/hand_landmarker/hand_landmarker_test.cc b/mediapipe/tasks/c/vision/hand_landmarker/hand_landmarker_test.cc index ed7b4646f..7cd8ec164 100644 --- a/mediapipe/tasks/c/vision/hand_landmarker/hand_landmarker_test.cc +++ b/mediapipe/tasks/c/vision/hand_landmarker/hand_landmarker_test.cc @@ -47,9 +47,9 @@ std::string GetFullPath(absl::string_view file_name) { return JoinPath("./", kTestDataDirectory, file_name); } -void MatchesHandLandmarkerResult(HandLandmarkerResult* result, - const float score_precision, - const float landmark_precision) { +void ExpectHandLandmarkerResultCorrect(HandLandmarkerResult* result, + const float score_precision, + const float landmark_precision) { // Expects to have the same number of hands detected. EXPECT_EQ(result->handedness_count, 1); @@ -104,7 +104,8 @@ TEST(HandLandmarkerTest, ImageModeTest) { HandLandmarkerResult result; hand_landmarker_detect_image(landmarker, mp_image, &result, /* error_msg */ nullptr); - MatchesHandLandmarkerResult(&result, kScorePrecision, kLandmarkPrecision); + ExpectHandLandmarkerResultCorrect(&result, kScorePrecision, + kLandmarkPrecision); hand_landmarker_close_result(&result); hand_landmarker_close(landmarker, /* error_msg */ nullptr); } @@ -141,7 +142,8 @@ TEST(HandLandmarkerTest, VideoModeTest) { hand_landmarker_detect_for_video(landmarker, mp_image, i, &result, /* error_msg */ nullptr); - MatchesHandLandmarkerResult(&result, kScorePrecision, kLandmarkPrecision); + ExpectHandLandmarkerResultCorrect(&result, kScorePrecision, + kLandmarkPrecision); hand_landmarker_close_result(&result); } hand_landmarker_close(landmarker, /* error_msg */ nullptr); @@ -158,8 +160,8 @@ struct LiveStreamModeCallback { int64_t timestamp, char* error_msg) { ASSERT_NE(landmarker_result, nullptr); ASSERT_EQ(error_msg, nullptr); - MatchesHandLandmarkerResult(landmarker_result, kScorePrecision, - kLandmarkPrecision); + ExpectHandLandmarkerResultCorrect(landmarker_result, kScorePrecision, + kLandmarkPrecision); EXPECT_GT(image.image_frame.width, 0); EXPECT_GT(image.image_frame.height, 0); EXPECT_GT(timestamp, last_timestamp); From 4004c2dfaa64b085d30167a89f663ae613a04a11 Mon Sep 17 00:00:00 2001 From: MediaPipe Team Date: Thu, 14 Dec 2023 06:03:06 -0800 Subject: [PATCH 31/39] No public description PiperOrigin-RevId: 590913209 --- mediapipe/gpu/gl_context.cc | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/mediapipe/gpu/gl_context.cc b/mediapipe/gpu/gl_context.cc index 5eff88b92..4dc90b52a 100644 --- a/mediapipe/gpu/gl_context.cc +++ b/mediapipe/gpu/gl_context.cc @@ -697,10 +697,13 @@ class GlFenceSyncPoint : public GlSyncPoint { void Wait() override { if (!sync_) return; - gl_context_->Run([this] { - // TODO: must this run on the original context?? + if (GlContext::IsAnyContextCurrent()) { sync_.Wait(); - }); + return; + } + // In case a current GL context is not available, we fall back using the + // captured gl_context_. + gl_context_->Run([this] { sync_.Wait(); }); } void WaitOnGpu() override { From df7feadaf7ecc9e075d2ba42d2d8cdbc54b42c2c Mon Sep 17 00:00:00 2001 From: Sebastian Schmidt Date: Thu, 14 Dec 2023 08:41:13 -0800 Subject: [PATCH 32/39] No public description PiperOrigin-RevId: 590950821 --- WORKSPACE | 43 ++------------------------ third_party/shared_dependencies.bzl | 47 +++++++++++++++++++++++++++++ 2 files changed, 50 insertions(+), 40 deletions(-) diff --git a/WORKSPACE b/WORKSPACE index 922b2c102..fe85e7596 100644 --- a/WORKSPACE +++ b/WORKSPACE @@ -1,6 +1,7 @@ workspace(name = "mediapipe") load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive") +load("@//third_party:shared_dependencies.bzl", "mediapipe_absl", "mediapipe_sentencepiece", "mediapipe_flatbuffers", "mediapipe_tensorflow") # Protobuf expects an //external:python_headers target bind( @@ -21,7 +22,6 @@ bazel_skylib_workspace() load("@bazel_skylib//lib:versions.bzl", "versions") versions.check(minimum_bazel_version = "3.7.2") -load("@//third_party:shared_dependencies.bzl", "mediapipe_absl", "mediapipe_sentencepiece") mediapipe_absl() mediapipe_sentencepiece() @@ -208,8 +208,7 @@ http_archive( urls = ["https://github.com/protocolbuffers/protobuf/archive/v3.19.1.tar.gz"], ) -load("@//third_party/flatbuffers:workspace.bzl", flatbuffers = "repo") -flatbuffers() +mediapipe_flatbuffers() http_archive( name = "com_google_audio_tools", @@ -229,7 +228,6 @@ http_archive( build_file = "@//third_party:pffft.BUILD", ) - http_archive( name = "darts_clone", build_file = "@//third_party:darts_clone.BUILD", @@ -463,42 +461,7 @@ maven_install( version_conflict_policy = "pinned", ) -# Needed by TensorFlow -http_archive( - name = "io_bazel_rules_closure", - sha256 = "e0a111000aeed2051f29fcc7a3f83be3ad8c6c93c186e64beb1ad313f0c7f9f9", - strip_prefix = "rules_closure-cf1e44edb908e9616030cc83d085989b8e6cd6df", - urls = [ - "http://mirror.tensorflow.org/github.com/bazelbuild/rules_closure/archive/cf1e44edb908e9616030cc83d085989b8e6cd6df.tar.gz", - "https://github.com/bazelbuild/rules_closure/archive/cf1e44edb908e9616030cc83d085989b8e6cd6df.tar.gz", # 2019-04-04 - ], -) - -# TensorFlow repo should always go after the other external dependencies. -# TF on 2023-07-26. -_TENSORFLOW_GIT_COMMIT = "e92261fd4cec0b726692081c4d2966b75abf31dd" -# curl -L https://github.com/tensorflow/tensorflow/archive/.tar.gz | shasum -a 256 -_TENSORFLOW_SHA256 = "478a229bd4ec70a5b568ac23b5ea013d9fca46a47d6c43e30365a0412b9febf4" -http_archive( - name = "org_tensorflow", - urls = [ - "https://github.com/tensorflow/tensorflow/archive/%s.tar.gz" % _TENSORFLOW_GIT_COMMIT, - ], - patches = [ - "@//third_party:org_tensorflow_compatibility_fixes.diff", - "@//third_party:org_tensorflow_system_python.diff", - # Diff is generated with a script, don't update it manually. - "@//third_party:org_tensorflow_custom_ops.diff", - # Works around Bazel issue with objc_library. - # See https://github.com/bazelbuild/bazel/issues/19912 - "@//third_party:org_tensorflow_objc_build_fixes.diff", - ], - patch_args = [ - "-p1", - ], - strip_prefix = "tensorflow-%s" % _TENSORFLOW_GIT_COMMIT, - sha256 = _TENSORFLOW_SHA256, -) +mediapipe_tensorflow() load("@org_tensorflow//tensorflow:workspace3.bzl", "tf_workspace3") tf_workspace3() diff --git a/third_party/shared_dependencies.bzl b/third_party/shared_dependencies.bzl index 6cd211a38..08a21bb7a 100644 --- a/third_party/shared_dependencies.bzl +++ b/third_party/shared_dependencies.bzl @@ -1,9 +1,18 @@ """MediaPipe's shared dependencies that can be used by dependent projects. Includes build patches.""" +load("@//third_party/flatbuffers:workspace.bzl", flatbuffers = "repo") load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive") +# TensorFlow repo should always go after the other external dependencies. +# TF on 2023-07-26. +_TENSORFLOW_GIT_COMMIT = "e92261fd4cec0b726692081c4d2966b75abf31dd" + +# curl -L https://github.com/tensorflow/tensorflow/archive/.tar.gz | shasum -a 256 +_TENSORFLOW_SHA256 = "478a229bd4ec70a5b568ac23b5ea013d9fca46a47d6c43e30365a0412b9febf4" + # ABSL cpp library lts_2023_01_25. def mediapipe_absl(): + """Exports the ABSL depedency on TensorFlow.""" http_archive( name = "com_google_absl", urls = [ @@ -20,6 +29,7 @@ def mediapipe_absl(): ) def mediapipe_sentencepiece(): + """Exports the Semtencepiece depedency on TensorFlow.""" http_archive( name = "com_google_sentencepiece", strip_prefix = "sentencepiece-0.1.96", @@ -31,3 +41,40 @@ def mediapipe_sentencepiece(): patches = ["@//third_party:com_google_sentencepiece.diff"], patch_args = ["-p1"], ) + +def mediapipe_flatbuffers(): + """Exports the FlatBuffers depedency on TensorFlow.""" + flatbuffers() + +def mediapipe_tensorflow(): + """Exports the MediaPipe depedency on TensorFlow.""" + + # Needed by TensorFlow + http_archive( + name = "io_bazel_rules_closure", + sha256 = "e0a111000aeed2051f29fcc7a3f83be3ad8c6c93c186e64beb1ad313f0c7f9f9", + strip_prefix = "rules_closure-cf1e44edb908e9616030cc83d085989b8e6cd6df", + urls = [ + "http://mirror.tensorflow.org/github.com/bazelbuild/rules_closure/archive/cf1e44edb908e9616030cc83d085989b8e6cd6df.tar.gz", + "https://github.com/bazelbuild/rules_closure/archive/cf1e44edb908e9616030cc83d085989b8e6cd6df.tar.gz", # 2019-04-04 + ], + ) + + http_archive( + name = "org_tensorflow", + urls = ["https://github.com/tensorflow/tensorflow/archive/%s.tar.gz" % _TENSORFLOW_GIT_COMMIT], + patches = [ + "@//third_party:org_tensorflow_compatibility_fixes.diff", + "@//third_party:org_tensorflow_system_python.diff", + # Diff is generated with a script, don't update it manually. + "@//third_party:org_tensorflow_custom_ops.diff", + # Works around Bazel issue with objc_library. + # See https://github.com/bazelbuild/bazel/issues/19912 + "@//third_party:org_tensorflow_objc_build_fixes.diff", + ], + patch_args = [ + "-p1", + ], + strip_prefix = "tensorflow-%s" % _TENSORFLOW_GIT_COMMIT, + sha256 = _TENSORFLOW_SHA256, + ) From f4bbfef67445f700399e91535435c799080f6377 Mon Sep 17 00:00:00 2001 From: Kinar Date: Thu, 14 Dec 2023 08:56:38 -0800 Subject: [PATCH 33/39] Use memcpy now for copying data and indicate how the data is stored --- .../c/components/containers/matrix_converter.cc | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/mediapipe/tasks/c/components/containers/matrix_converter.cc b/mediapipe/tasks/c/components/containers/matrix_converter.cc index 9320178cb..e3e3b7e30 100644 --- a/mediapipe/tasks/c/components/containers/matrix_converter.cc +++ b/mediapipe/tasks/c/components/containers/matrix_converter.cc @@ -27,13 +27,13 @@ void CppConvertToMatrix(const Eigen::MatrixXf& in, ::Matrix* out) { out->cols = in.cols(); out->data = new float[out->rows * out->cols]; - // Copy data from Eigen matrix to C-style matrix. - // This operation copies the elements sequentially as they appear in the Eigen - // matrix's internal storage, regardless of whether it's stored in row-major - // or column-major order and ensures the integrity of data during the - // transfer. - for (int i = 0; i < out->rows * out->cols; ++i) { - out->data[i] = in.data()[i]; + // Copies data from an Eigen matrix (default column-major) to a C-style + // matrix, preserving the sequence of elements as per the Eigen matrix's + // internal storage (column-major order by default). + if (!in.IsRowMajor) { + // Safe to use memcpy when the Eigen matrix is in its default column-major + // order. + memcpy(out->data, in.data(), sizeof(float) * out->rows * out->cols); } } From 5e75a169d3fc3d32325ab9514c8312bd92f96420 Mon Sep 17 00:00:00 2001 From: Kinar Date: Thu, 14 Dec 2023 09:02:05 -0800 Subject: [PATCH 34/39] Fix rows to columns for the field cols in struct Matrix --- mediapipe/tasks/c/components/containers/matrix.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mediapipe/tasks/c/components/containers/matrix.h b/mediapipe/tasks/c/components/containers/matrix.h index 71ec7474c..7d1de67f1 100644 --- a/mediapipe/tasks/c/components/containers/matrix.h +++ b/mediapipe/tasks/c/components/containers/matrix.h @@ -27,7 +27,7 @@ struct Matrix { // The number of rows in the matrix. long rows; - // The number of rows in the matrix. + // The number of columns in the matrix. long cols; // The matrix data. From 746d775933a351bf925a206e1fe21ddfc6c4c5fd Mon Sep 17 00:00:00 2001 From: MediaPipe Team Date: Thu, 14 Dec 2023 14:08:56 -0800 Subject: [PATCH 35/39] Extend verifyGraph to be compatible with proto3. PiperOrigin-RevId: 591047275 --- .../tasks/web/core/task_runner_test_utils.ts | 27 +++++++++++++++---- 1 file changed, 22 insertions(+), 5 deletions(-) diff --git a/mediapipe/tasks/web/core/task_runner_test_utils.ts b/mediapipe/tasks/web/core/task_runner_test_utils.ts index 777cb8704..69d00b944 100644 --- a/mediapipe/tasks/web/core/task_runner_test_utils.ts +++ b/mediapipe/tasks/web/core/task_runner_test_utils.ts @@ -71,14 +71,23 @@ export interface MediapipeTasksFake { /** An map of field paths to values */ export type FieldPathToValue = [string[] | string, unknown]; +type JsonObject = Record; + +type Deserializer = (binaryProto: string | Uint8Array) => JsonObject; + /** * Verifies that the graph has been initialized and that it contains the * provided options. + * + * @param deserializer - the function to convert a binary proto to a JsonObject. + * For example, the deserializer of HolisticLandmarkerOptions's binary proto is + * HolisticLandmarkerOptions.deserializeBinary(binaryProto).toObject(). */ export function verifyGraph( tasksFake: MediapipeTasksFake, expectedCalculatorOptions?: FieldPathToValue, expectedBaseOptions?: FieldPathToValue, + deserializer?: Deserializer, ): void { expect(tasksFake.graph).toBeDefined(); // Our graphs should have at least one node in them for processing, and @@ -89,22 +98,30 @@ export function verifyGraph( expect(node).toEqual( jasmine.objectContaining({calculator: tasksFake.calculatorName})); + let proto; + if (deserializer) { + const binaryProto = + tasksFake.graph!.getNodeList()[0].getNodeOptionsList()[0].getValue(); + proto = deserializer(binaryProto); + } else { + proto = (node.options as {ext: unknown}).ext; + } + if (expectedBaseOptions) { const [fieldPath, value] = expectedBaseOptions; - let proto = (node.options as {ext: {baseOptions: unknown}}).ext.baseOptions; + let baseOptions = (proto as {baseOptions: unknown}).baseOptions; for (const fieldName of ( Array.isArray(fieldPath) ? fieldPath : [fieldPath])) { - proto = ((proto ?? {}) as Record)[fieldName]; + baseOptions = ((baseOptions ?? {}) as JsonObject)[fieldName]; } - expect(proto).toEqual(value); + expect(baseOptions).toEqual(value); } if (expectedCalculatorOptions) { const [fieldPath, value] = expectedCalculatorOptions; - let proto = (node.options as {ext: unknown}).ext; for (const fieldName of ( Array.isArray(fieldPath) ? fieldPath : [fieldPath])) { - proto = ((proto ?? {}) as Record)[fieldName]; + proto = ((proto ?? {}) as JsonObject)[fieldName]; } expect(proto).toEqual(value); } From 28d5546d9dec9814e6b8ab4dcd6895b4c111170f Mon Sep 17 00:00:00 2001 From: MediaPipe Team Date: Thu, 14 Dec 2023 19:46:08 -0800 Subject: [PATCH 36/39] Internal change PiperOrigin-RevId: 591121815 --- .../diffuser/stable_diffusion_iterate_calculator.cc | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/mediapipe/tasks/cc/vision/image_generator/diffuser/stable_diffusion_iterate_calculator.cc b/mediapipe/tasks/cc/vision/image_generator/diffuser/stable_diffusion_iterate_calculator.cc index f7eb7c1b6..e0a4d18b3 100644 --- a/mediapipe/tasks/cc/vision/image_generator/diffuser/stable_diffusion_iterate_calculator.cc +++ b/mediapipe/tasks/cc/vision/image_generator/diffuser/stable_diffusion_iterate_calculator.cc @@ -121,6 +121,8 @@ class StableDiffusionIterateCalculator : public Node { if (handle_) dlclose(handle_); } + static absl::Status UpdateContract(CalculatorContract* cc); + absl::Status Open(CalculatorContext* cc) override; absl::Status Process(CalculatorContext* cc) override; @@ -188,6 +190,11 @@ class StableDiffusionIterateCalculator : public Node { bool emit_empty_packet_; }; +absl::Status StableDiffusionIterateCalculator::UpdateContract( + CalculatorContract* cc) { + return absl::OkStatus(); +} + absl::Status StableDiffusionIterateCalculator::Open(CalculatorContext* cc) { StableDiffusionIterateCalculatorOptions options; if (kOptionsIn(cc).IsEmpty()) { @@ -205,7 +212,11 @@ absl::Status StableDiffusionIterateCalculator::Open(CalculatorContext* cc) { if (options.file_folder().empty()) { std::strcpy(config.model_dir, "bins/"); // NOLINT } else { - std::strcpy(config.model_dir, options.file_folder().c_str()); // NOLINT + std::string file_folder = options.file_folder(); + if (!file_folder.empty() && file_folder.back() != '/') { + file_folder.push_back('/'); + } + std::strcpy(config.model_dir, file_folder.c_str()); // NOLINT } MP_RETURN_IF_ERROR(mediapipe::file::Exists(config.model_dir)) << config.model_dir; From e55caa234cf5769fd7704199957b134dc0fd38c1 Mon Sep 17 00:00:00 2001 From: MediaPipe Team Date: Thu, 14 Dec 2023 22:22:57 -0800 Subject: [PATCH 37/39] No public description PiperOrigin-RevId: 591148449 --- mediapipe/framework/formats/tensor.cc | 8 +- mediapipe/framework/formats/tensor.h | 4 + .../framework/formats/tensor_ahwb_gpu_test.cc | 133 +++++++++++++++--- 3 files changed, 122 insertions(+), 23 deletions(-) diff --git a/mediapipe/framework/formats/tensor.cc b/mediapipe/framework/formats/tensor.cc index 49987791a..bf856f5a9 100644 --- a/mediapipe/framework/formats/tensor.cc +++ b/mediapipe/framework/formats/tensor.cc @@ -359,7 +359,13 @@ Tensor::OpenGlBufferView Tensor::GetOpenGlBufferReadView() const { } return {opengl_buffer_, std::move(lock), #ifdef MEDIAPIPE_TENSOR_USE_AHWB - &ssbo_read_ + // ssbo_read_ is passed to be populated on OpenGlBufferView + // destruction in order to perform delayed resources releasing (see + // tensor_ahwb.cc/DelayedReleaser) only when AHWB is in use. + // + // Not passing for the case when AHWB is not in use to avoid creation + // of unnecessary sync object and memory leak. + use_ahwb_ ? &ssbo_read_ : nullptr #else nullptr #endif // MEDIAPIPE_TENSOR_USE_AHWB diff --git a/mediapipe/framework/formats/tensor.h b/mediapipe/framework/formats/tensor.h index 361883a67..863e5fdd3 100644 --- a/mediapipe/framework/formats/tensor.h +++ b/mediapipe/framework/formats/tensor.h @@ -288,18 +288,22 @@ class Tensor { class OpenGlBufferView : public View { public: GLuint name() const { return name_; } + OpenGlBufferView(OpenGlBufferView&& src) : View(std::move(src)) { name_ = std::exchange(src.name_, GL_INVALID_INDEX); ssbo_read_ = std::exchange(src.ssbo_read_, nullptr); } ~OpenGlBufferView() { if (ssbo_read_) { + // TODO: update tensor to properly handle cases when + // multiple views were requested multiple sync fence may be needed. *ssbo_read_ = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0); } } protected: friend class Tensor; + OpenGlBufferView(GLuint name, std::unique_ptr&& lock, GLsync* ssbo_read) : View(std::move(lock)), name_(name), ssbo_read_(ssbo_read) {} diff --git a/mediapipe/framework/formats/tensor_ahwb_gpu_test.cc b/mediapipe/framework/formats/tensor_ahwb_gpu_test.cc index b06bd3ef2..bfafc44aa 100644 --- a/mediapipe/framework/formats/tensor_ahwb_gpu_test.cc +++ b/mediapipe/framework/formats/tensor_ahwb_gpu_test.cc @@ -6,6 +6,7 @@ #include +#include "absl/algorithm/container.h" #include "mediapipe/framework/formats/tensor.h" #include "mediapipe/framework/formats/tensor/views/data_types.h" #include "mediapipe/gpu/gpu_test_base.h" @@ -18,7 +19,7 @@ // Then the test requests the CPU view and compares the values. // Float32 and Float16 tests are there. -namespace { +namespace mediapipe { using mediapipe::Float16; using mediapipe::Tensor; @@ -27,6 +28,16 @@ MATCHER_P(NearWithPrecision, precision, "") { return std::abs(std::get<0>(arg) - std::get<1>(arg)) < precision; } +template +std::vector CreateReferenceData(int num_elements) { + std::vector reference; + reference.resize(num_elements); + for (int i = 0; i < num_elements; i++) { + reference[i] = static_cast(i) / 10.0f; + } + return reference; +} + #if MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_31 // Utility function to fill the GPU buffer. @@ -110,11 +121,7 @@ TEST_F(TensorAhwbGpuTest, TestGpuToCpuFloat32) { }); auto ptr = tensor.GetCpuReadView().buffer(); ASSERT_NE(ptr, nullptr); - std::vector reference; - reference.resize(num_elements); - for (int i = 0; i < num_elements; i++) { - reference[i] = static_cast(i) / 10.0f; - } + std::vector reference = CreateReferenceData(num_elements); EXPECT_THAT(absl::Span(ptr, num_elements), testing::Pointwise(testing::FloatEq(), reference)); } @@ -137,11 +144,7 @@ TEST_F(TensorAhwbGpuTest, TestGpuToCpuFloat16) { }); auto ptr = tensor.GetCpuReadView().buffer(); ASSERT_NE(ptr, nullptr); - std::vector reference; - reference.resize(num_elements); - for (int i = 0; i < num_elements; i++) { - reference[i] = static_cast(i) / 10.0f; - } + std::vector reference = CreateReferenceData(num_elements); // Precision is set to a reasonable value for Float16. EXPECT_THAT(absl::Span(ptr, num_elements), testing::Pointwise(NearWithPrecision(0.001), reference)); @@ -166,11 +169,7 @@ TEST_F(TensorAhwbGpuTest, TestReplacingCpuByAhwb) { } auto ptr = tensor.GetCpuReadView().buffer(); ASSERT_NE(ptr, nullptr); - std::vector reference; - reference.resize(num_elements); - for (int i = 0; i < num_elements; i++) { - reference[i] = static_cast(i) / 10.0f; - } + std::vector reference = CreateReferenceData(num_elements); EXPECT_THAT(absl::Span(ptr, num_elements), testing::Pointwise(testing::FloatEq(), reference)); } @@ -194,17 +193,107 @@ TEST_F(TensorAhwbGpuTest, TestReplacingGpuByAhwb) { } auto ptr = tensor.GetCpuReadView().buffer(); ASSERT_NE(ptr, nullptr); - std::vector reference; - reference.resize(num_elements); - for (int i = 0; i < num_elements; i++) { - reference[i] = static_cast(i) / 10.0f; - } + std::vector reference = CreateReferenceData(num_elements); EXPECT_THAT(absl::Span(ptr, num_elements), testing::Pointwise(testing::FloatEq(), reference)); } +std::vector ReadGlBufferView(const Tensor::OpenGlBufferView& view, + int num_elements) { + glBindBuffer(GL_SHADER_STORAGE_BUFFER, view.name()); + int bytes = num_elements * sizeof(float); + void* ptr = + glMapBufferRange(GL_SHADER_STORAGE_BUFFER, 0, bytes, GL_MAP_READ_BIT); + ABSL_CHECK(ptr) << "glMapBufferRange failed: " << glGetError(); + + std::vector data; + data.resize(num_elements); + std::memcpy(data.data(), ptr, bytes); + glUnmapBuffer(GL_SHADER_STORAGE_BUFFER); + return data; +} + +TEST_F(TensorAhwbGpuTest, TestGetOpenGlBufferReadViewNoAhwb) { + constexpr size_t kNumElements = 20; + std::vector reference = CreateReferenceData(kNumElements); + + Tensor tensor(Tensor::ElementType::kFloat32, Tensor::Shape({kNumElements})); + { + // Populate tensor on CPU and make sure view is destroyed + absl::c_copy(reference, tensor.GetCpuWriteView().buffer()); + } + + RunInGlContext([&] { + // Triggers conversion to GL buffer. + auto ssbo_view = tensor.GetOpenGlBufferReadView(); + ASSERT_NE(ssbo_view.name(), 0); + // ssbo_read_ must NOT be populated, as there's no AHWB associated with + // GL buffer + ASSERT_EQ(ssbo_view.ssbo_read_, nullptr); + + std::vector output = ReadGlBufferView(ssbo_view, kNumElements); + EXPECT_THAT(output, testing::Pointwise(testing::FloatEq(), reference)); + }); +} + +TEST_F(TensorAhwbGpuTest, TestGetOpenGlBufferReadViewAhwbFromCpu) { + constexpr size_t kNumElements = 20; + std::vector reference = CreateReferenceData(kNumElements); + + Tensor tensor(Tensor::ElementType::kFloat32, Tensor::Shape({kNumElements})); + { + // Populate tensor on CPU and make sure view is destroyed + absl::c_copy(reference, tensor.GetCpuWriteView().buffer()); + } + + { + // Make tensor to allocate ahwb and make sure view is destroyed. + ASSERT_NE(tensor.GetAHardwareBufferReadView().handle(), nullptr); + } + + RunInGlContext([&] { + // Triggers conversion to GL buffer. + auto ssbo_view = tensor.GetOpenGlBufferReadView(); + ASSERT_NE(ssbo_view.name(), 0); + // ssbo_read_ must be populated, so during view destruction it's set + // properly for further AHWB destruction + ASSERT_NE(ssbo_view.ssbo_read_, nullptr); + + std::vector output = ReadGlBufferView(ssbo_view, kNumElements); + EXPECT_THAT(output, testing::Pointwise(testing::FloatEq(), reference)); + }); +} + +TEST_F(TensorAhwbGpuTest, TestGetOpenGlBufferReadViewAhwbFromGpu) { + constexpr size_t kNumElements = 20; + std::vector reference = CreateReferenceData(kNumElements); + + Tensor tensor(Tensor::ElementType::kFloat32, Tensor::Shape({kNumElements})); + { + // Make tensor to allocate ahwb and make sure view is destroyed. + ASSERT_NE(tensor.GetAHardwareBufferWriteView().handle(), nullptr); + } + + RunInGlContext([&] { + FillGpuBuffer(tensor.GetOpenGlBufferWriteView().name(), + tensor.shape().num_elements(), tensor.element_type()); + }); + + RunInGlContext([&] { + // Triggers conversion to GL buffer. + auto ssbo_view = tensor.GetOpenGlBufferReadView(); + ASSERT_NE(ssbo_view.name(), 0); + // ssbo_read_ must be populated, so during view destruction it's set + // properly for further AHWB destruction + ASSERT_NE(ssbo_view.ssbo_read_, nullptr); + + std::vector output = ReadGlBufferView(ssbo_view, kNumElements); + EXPECT_THAT(output, testing::Pointwise(testing::FloatEq(), reference)); + }); +} + #endif // MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_31 -} // namespace +} // namespace mediapipe #endif // !defined(MEDIAPIPE_NO_JNI) && (__ANDROID_API__ >= 26 || // defined(__ANDROID_UNAVAILABLE_SYMBOLS_ARE_WEAK__)) From 41db137d3736f7a74c45ed37b615033ed9f8d9ff Mon Sep 17 00:00:00 2001 From: MediaPipe Team Date: Fri, 15 Dec 2023 13:55:52 -0800 Subject: [PATCH 38/39] No public description PiperOrigin-RevId: 591351708 --- mediapipe/gpu/gl_context.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/mediapipe/gpu/gl_context.h b/mediapipe/gpu/gl_context.h index bb3e6a597..51faffa0c 100644 --- a/mediapipe/gpu/gl_context.h +++ b/mediapipe/gpu/gl_context.h @@ -190,8 +190,7 @@ class GlContext : public std::enable_shared_from_this { // Like Run, but does not wait. void RunWithoutWaiting(GlVoidFunction gl_func); - // Returns a synchronization token. - // This should not be called outside of the GlContext thread. + // Returns a synchronization token for this GlContext. std::shared_ptr CreateSyncToken(); // If another part of the framework calls glFinish, it should call this From 4b471266b8374cd9923551a4174360d161929c12 Mon Sep 17 00:00:00 2001 From: Sebastian Schmidt Date: Fri, 15 Dec 2023 15:08:19 -0800 Subject: [PATCH 39/39] No public description PiperOrigin-RevId: 591370734 --- WORKSPACE | 43 ++++++++++++++++++++++++-- third_party/shared_dependencies.bzl | 47 ----------------------------- 2 files changed, 40 insertions(+), 50 deletions(-) diff --git a/WORKSPACE b/WORKSPACE index fe85e7596..922b2c102 100644 --- a/WORKSPACE +++ b/WORKSPACE @@ -1,7 +1,6 @@ workspace(name = "mediapipe") load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive") -load("@//third_party:shared_dependencies.bzl", "mediapipe_absl", "mediapipe_sentencepiece", "mediapipe_flatbuffers", "mediapipe_tensorflow") # Protobuf expects an //external:python_headers target bind( @@ -22,6 +21,7 @@ bazel_skylib_workspace() load("@bazel_skylib//lib:versions.bzl", "versions") versions.check(minimum_bazel_version = "3.7.2") +load("@//third_party:shared_dependencies.bzl", "mediapipe_absl", "mediapipe_sentencepiece") mediapipe_absl() mediapipe_sentencepiece() @@ -208,7 +208,8 @@ http_archive( urls = ["https://github.com/protocolbuffers/protobuf/archive/v3.19.1.tar.gz"], ) -mediapipe_flatbuffers() +load("@//third_party/flatbuffers:workspace.bzl", flatbuffers = "repo") +flatbuffers() http_archive( name = "com_google_audio_tools", @@ -228,6 +229,7 @@ http_archive( build_file = "@//third_party:pffft.BUILD", ) + http_archive( name = "darts_clone", build_file = "@//third_party:darts_clone.BUILD", @@ -461,7 +463,42 @@ maven_install( version_conflict_policy = "pinned", ) -mediapipe_tensorflow() +# Needed by TensorFlow +http_archive( + name = "io_bazel_rules_closure", + sha256 = "e0a111000aeed2051f29fcc7a3f83be3ad8c6c93c186e64beb1ad313f0c7f9f9", + strip_prefix = "rules_closure-cf1e44edb908e9616030cc83d085989b8e6cd6df", + urls = [ + "http://mirror.tensorflow.org/github.com/bazelbuild/rules_closure/archive/cf1e44edb908e9616030cc83d085989b8e6cd6df.tar.gz", + "https://github.com/bazelbuild/rules_closure/archive/cf1e44edb908e9616030cc83d085989b8e6cd6df.tar.gz", # 2019-04-04 + ], +) + +# TensorFlow repo should always go after the other external dependencies. +# TF on 2023-07-26. +_TENSORFLOW_GIT_COMMIT = "e92261fd4cec0b726692081c4d2966b75abf31dd" +# curl -L https://github.com/tensorflow/tensorflow/archive/.tar.gz | shasum -a 256 +_TENSORFLOW_SHA256 = "478a229bd4ec70a5b568ac23b5ea013d9fca46a47d6c43e30365a0412b9febf4" +http_archive( + name = "org_tensorflow", + urls = [ + "https://github.com/tensorflow/tensorflow/archive/%s.tar.gz" % _TENSORFLOW_GIT_COMMIT, + ], + patches = [ + "@//third_party:org_tensorflow_compatibility_fixes.diff", + "@//third_party:org_tensorflow_system_python.diff", + # Diff is generated with a script, don't update it manually. + "@//third_party:org_tensorflow_custom_ops.diff", + # Works around Bazel issue with objc_library. + # See https://github.com/bazelbuild/bazel/issues/19912 + "@//third_party:org_tensorflow_objc_build_fixes.diff", + ], + patch_args = [ + "-p1", + ], + strip_prefix = "tensorflow-%s" % _TENSORFLOW_GIT_COMMIT, + sha256 = _TENSORFLOW_SHA256, +) load("@org_tensorflow//tensorflow:workspace3.bzl", "tf_workspace3") tf_workspace3() diff --git a/third_party/shared_dependencies.bzl b/third_party/shared_dependencies.bzl index 08a21bb7a..6cd211a38 100644 --- a/third_party/shared_dependencies.bzl +++ b/third_party/shared_dependencies.bzl @@ -1,18 +1,9 @@ """MediaPipe's shared dependencies that can be used by dependent projects. Includes build patches.""" -load("@//third_party/flatbuffers:workspace.bzl", flatbuffers = "repo") load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive") -# TensorFlow repo should always go after the other external dependencies. -# TF on 2023-07-26. -_TENSORFLOW_GIT_COMMIT = "e92261fd4cec0b726692081c4d2966b75abf31dd" - -# curl -L https://github.com/tensorflow/tensorflow/archive/.tar.gz | shasum -a 256 -_TENSORFLOW_SHA256 = "478a229bd4ec70a5b568ac23b5ea013d9fca46a47d6c43e30365a0412b9febf4" - # ABSL cpp library lts_2023_01_25. def mediapipe_absl(): - """Exports the ABSL depedency on TensorFlow.""" http_archive( name = "com_google_absl", urls = [ @@ -29,7 +20,6 @@ def mediapipe_absl(): ) def mediapipe_sentencepiece(): - """Exports the Semtencepiece depedency on TensorFlow.""" http_archive( name = "com_google_sentencepiece", strip_prefix = "sentencepiece-0.1.96", @@ -41,40 +31,3 @@ def mediapipe_sentencepiece(): patches = ["@//third_party:com_google_sentencepiece.diff"], patch_args = ["-p1"], ) - -def mediapipe_flatbuffers(): - """Exports the FlatBuffers depedency on TensorFlow.""" - flatbuffers() - -def mediapipe_tensorflow(): - """Exports the MediaPipe depedency on TensorFlow.""" - - # Needed by TensorFlow - http_archive( - name = "io_bazel_rules_closure", - sha256 = "e0a111000aeed2051f29fcc7a3f83be3ad8c6c93c186e64beb1ad313f0c7f9f9", - strip_prefix = "rules_closure-cf1e44edb908e9616030cc83d085989b8e6cd6df", - urls = [ - "http://mirror.tensorflow.org/github.com/bazelbuild/rules_closure/archive/cf1e44edb908e9616030cc83d085989b8e6cd6df.tar.gz", - "https://github.com/bazelbuild/rules_closure/archive/cf1e44edb908e9616030cc83d085989b8e6cd6df.tar.gz", # 2019-04-04 - ], - ) - - http_archive( - name = "org_tensorflow", - urls = ["https://github.com/tensorflow/tensorflow/archive/%s.tar.gz" % _TENSORFLOW_GIT_COMMIT], - patches = [ - "@//third_party:org_tensorflow_compatibility_fixes.diff", - "@//third_party:org_tensorflow_system_python.diff", - # Diff is generated with a script, don't update it manually. - "@//third_party:org_tensorflow_custom_ops.diff", - # Works around Bazel issue with objc_library. - # See https://github.com/bazelbuild/bazel/issues/19912 - "@//third_party:org_tensorflow_objc_build_fixes.diff", - ], - patch_args = [ - "-p1", - ], - strip_prefix = "tensorflow-%s" % _TENSORFLOW_GIT_COMMIT, - sha256 = _TENSORFLOW_SHA256, - )