From a128810564d27ee126a5bded65571018fc1825ee Mon Sep 17 00:00:00 2001 From: Prianka Liz Kariat Date: Thu, 16 Feb 2023 01:25:16 +0530 Subject: [PATCH 01/47] Updated MPPImageUtils with methods to create image frame --- mediapipe/tasks/ios/vision/core/utils/BUILD | 14 +- .../core/utils/sources/MPPImage+Utils.h | 22 ++- .../{MPPImage+Utils.m => MPPImage+Utils.mm} | 133 +++++++----------- 3 files changed, 70 insertions(+), 99 deletions(-) rename mediapipe/tasks/ios/vision/core/utils/sources/{MPPImage+Utils.m => MPPImage+Utils.mm} (73%) diff --git a/mediapipe/tasks/ios/vision/core/utils/BUILD b/mediapipe/tasks/ios/vision/core/utils/BUILD index 540c2753b..c99055c75 100644 --- a/mediapipe/tasks/ios/vision/core/utils/BUILD +++ b/mediapipe/tasks/ios/vision/core/utils/BUILD @@ -4,23 +4,23 @@ licenses(["notice"]) objc_library( name = "MPPImageUtils", - srcs = ["sources/MPPImage+Utils.m"], + srcs = ["sources/MPPImage+Utils.mm"], hdrs = ["sources/MPPImage+Utils.h"], - copts = [ - "-ObjC++", - "-std=c++17", - ], module_name = "MPPImageUtils", sdk_frameworks = [ "Accelerate", "CoreGraphics", "CoreImage", "CoreVideo", - "UIKit", ], deps = [ "//mediapipe/tasks/ios/common/utils:MPPCommonUtils", "//mediapipe/tasks/ios/vision/core:MPPImage", - "//third_party/apple_frameworks:UIKit", + "//mediapipe/framework/formats:image_format_cc_proto", + "//mediapipe/framework/formats:image_frame", + ], + copts = [ + "-ObjC++", + "-std=c++17", ], ) diff --git a/mediapipe/tasks/ios/vision/core/utils/sources/MPPImage+Utils.h b/mediapipe/tasks/ios/vision/core/utils/sources/MPPImage+Utils.h index a9c371d5b..724bccdad 100644 --- a/mediapipe/tasks/ios/vision/core/utils/sources/MPPImage+Utils.h +++ b/mediapipe/tasks/ios/vision/core/utils/sources/MPPImage+Utils.h @@ -15,29 +15,25 @@ #import #import "mediapipe/tasks/ios/vision/core/sources/MPPImage.h" +#include "mediapipe/framework/formats/image_frame.h" NS_ASSUME_NONNULL_BEGIN -/** - * Helper utility for performing operations on MPPImage specific to the MediaPipe Vision library. +/** + * Helper utility for converting `MPPImage` into a `mediapipe::ImageFrame`. */ @interface MPPImage (Utils) - -/** Bitmap size of the image. */ -@property(nonatomic, readonly) CGSize bitmapSize; - /** - * Returns the underlying uint8 pixel buffer of an `MPPImage`. - * Irrespective of whether the underlying buffer is grayscale, RGB, RGBA, BGRA etc., the pixel - * data is converted to an RGB format. In case of grayscale images, the mono channel is duplicated + * Converts the `MPPImage` into a `mediapipe::ImageFrame`. + * Irrespective of whether the underlying buffer is grayscale, RGB, RGBA, BGRA etc., the MPPImage is converted to an RGB format. In case of grayscale images, the mono channel is duplicated * in the R, G, B channels. * - * @param error Pointer to the memory location where errors if any should be saved. If @c NULL, no - * error will be saved. + * @param error Pointer to the memory location where errors if any should be + * saved. If @c NULL, no error will be saved. * - * @return The underlying pixel buffer of the `MPPImage` or nil in case of errors. + * @return An std::unique_ptr or `nullptr` in case of errors. */ -- (nullable uint8_t *)rgbPixelDataWithError:(NSError **)error; +- (std::unique_ptr)imageFrameWithError:(NSError **)error; @end diff --git a/mediapipe/tasks/ios/vision/core/utils/sources/MPPImage+Utils.m b/mediapipe/tasks/ios/vision/core/utils/sources/MPPImage+Utils.mm similarity index 73% rename from mediapipe/tasks/ios/vision/core/utils/sources/MPPImage+Utils.m rename to mediapipe/tasks/ios/vision/core/utils/sources/MPPImage+Utils.mm index 01ac9912a..8d6efe91c 100644 --- a/mediapipe/tasks/ios/vision/core/utils/sources/MPPImage+Utils.m +++ b/mediapipe/tasks/ios/vision/core/utils/sources/MPPImage+Utils.mm @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -#import "mediapipe/tasks/ios/vision/core/utils/sources/MPPImage+Utils.h" +#import "mediapipe/tasks/ios/vision/core/utils/sources/MPPImage+ImageFrameUtils.h" #import "mediapipe/tasks/ios/common/sources/MPPCommon.h" #import "mediapipe/tasks/ios/common/utils/sources/MPPCommonUtils.h" @@ -22,6 +22,12 @@ #import #import +#include "mediapipe/framework/formats/image_format.pb.h" + +namespace { + using ::mediapipe::ImageFrame; +} + @interface MPPPixelDataUtils : NSObject + (uint8_t *)rgbPixelDataFromPixelData:(uint8_t *)pixelData @@ -35,21 +41,19 @@ @interface MPPCVPixelBufferUtils : NSObject -+ (uint8_t *)pixelDataFromCVPixelBuffer:(CVPixelBufferRef)pixelBuffer error:(NSError **)error; ++ (std::unique_ptr)imageFrameFromCVPixelBuffer:(CVPixelBufferRef)pixelBuffer error:(NSError **)error; @end @interface MPPCGImageUtils : NSObject -+ (UInt8 *_Nullable)pixelDataFromCGImage:(CGImageRef)cgImage error:(NSError **)error; ++ (std::unique_ptr)imageFrameFromCGImage:(CGImageRef)cgImage error:(NSError **)error; @end -@interface UIImage (RawPixelDataUtils) +@interface UIImage (ImageFrameUtils) -@property(nonatomic, readonly) CGSize bitmapSize; - -- (uint8_t *)pixelDataWithError:(NSError **)error; +- (std::unique_ptr)imageFrameWithError:(NSError **)error; @end @@ -120,9 +124,15 @@ @implementation MPPCVPixelBufferUtils -+ (uint8_t *)rgbPixelDataFromCVPixelBuffer:(CVPixelBufferRef)pixelBuffer error:(NSError **)error { ++ (std::unique_ptr)rgbImageFrameFromCVPixelBuffer:(CVPixelBufferRef)pixelBuffer error:(NSError **)error { CVPixelBufferLockBaseAddress(pixelBuffer, 0); + size_t width = CVPixelBufferGetWidth(pixelBuffer); + size_t height = CVPixelBufferGetHeight(pixelBuffer); + size_t stride = CVPixelBufferGetBytesPerRow(pixelBuffer); + + + uint8_t *rgbPixelData = [MPPPixelDataUtils rgbPixelDataFromPixelData:(uint8_t *)CVPixelBufferGetBaseAddress(pixelBuffer) withWidth:CVPixelBufferGetWidth(pixelBuffer) @@ -133,10 +143,19 @@ CVPixelBufferUnlockBaseAddress(pixelBuffer, 0); - return rgbPixelData; + if (!rgbPixelData) { + return nullptr; + } + + std::unique_ptr imageFrame = absl::make_unique( + ::mediapipe::ImageFormat::SRGB, /*width=*/width, /*height=*/height, stride, + static_cast(rgbPixelData), + /*deleter=*/free); + + return imageFrame; } -+ (nullable uint8_t *)pixelDataFromCVPixelBuffer:(CVPixelBufferRef)pixelBuffer ++ (std::unique_ptr)imageFrameFromCVPixelBuffer:(CVPixelBufferRef)pixelBuffer error:(NSError **)error { uint8_t *pixelData = NULL; @@ -144,8 +163,7 @@ switch (pixelBufferFormat) { case kCVPixelFormatType_32BGRA: { - pixelData = [MPPCVPixelBufferUtils rgbPixelDataFromCVPixelBuffer:pixelBuffer error:error]; - break; + return [MPPCVPixelBufferUtils rgbImageFrameFromCVPixelBuffer:pixelBuffer error:error]; } default: { [MPPCommonUtils createCustomError:error @@ -155,20 +173,20 @@ } } - return pixelData; + return nullptr; } @end @implementation MPPCGImageUtils -+ (UInt8 *_Nullable)pixelDataFromCGImage:(CGImageRef)cgImage error:(NSError **)error { ++ (std::unique_ptr)imageFrameFromCGImage:(CGImageRef)cgImage error:(NSError **)error { size_t width = CGImageGetWidth(cgImage); size_t height = CGImageGetHeight(cgImage); NSInteger bitsPerComponent = 8; NSInteger channelCount = 4; - UInt8 *pixel_data_to_return = NULL; + UInt8 *pixelDataToReturn = NULL; CGColorSpaceRef colorSpace = CGColorSpaceCreateDeviceRGB(); size_t bytesPerRow = channelCount * width; @@ -191,7 +209,7 @@ if (srcData) { // We have drawn the image as an RGBA image with 8 bitsPerComponent and hence can safely input // a pixel format of type kCVPixelFormatType_32RGBA for conversion by vImage. - pixel_data_to_return = [MPPPixelDataUtils rgbPixelDataFromPixelData:srcData + pixelDataToReturn = [MPPPixelDataUtils rgbPixelDataFromPixelData:srcData withWidth:width height:height stride:bytesPerRow @@ -204,38 +222,42 @@ CGColorSpaceRelease(colorSpace); - return pixel_data_to_return; + std::unique_ptr imageFrame = absl::make_unique( + mediapipe::ImageFormat::SRGB, /*width=*/(int)width, /*height=*/(int)height, (int)bytesPerRow, + static_cast(pixelDataToReturn), + /*deleter=*/free); + + return imageFrame; } @end -@implementation UIImage (RawPixelDataUtils) +@implementation UIImage (ImageFrameUtils) -- (uint8_t *)pixelDataFromCIImageWithError:(NSError **)error { - uint8_t *pixelData = NULL; +- (std::unique_ptr)imageFrameFromCIImageWithError:(NSError **)error { if (self.CIImage.pixelBuffer) { - pixelData = [MPPCVPixelBufferUtils pixelDataFromCVPixelBuffer:self.CIImage.pixelBuffer + return [MPPCVPixelBufferUtils imageFrameFromCVPixelBuffer:self.CIImage.pixelBuffer error:error]; } else if (self.CIImage.CGImage) { - pixelData = [MPPCGImageUtils pixelDataFromCGImage:self.CIImage.CGImage error:error]; + return [MPPCGImageUtils imageFrameFromCGImage:self.CIImage.CGImage error:error]; } else { [MPPCommonUtils createCustomError:error withCode:MPPTasksErrorCodeInvalidArgumentError description:@"CIImage should have CGImage or CVPixelBuffer info."]; } - return pixelData; + return nullptr; } -- (uint8_t *)pixelDataWithError:(NSError **)error { +- (std::unique_ptr)imageFrameWithError:(NSError **)error { uint8_t *pixelData = nil; if (self.CGImage) { - pixelData = [MPPCGImageUtils pixelDataFromCGImage:self.CGImage error:error]; + return [MPPCGImageUtils imageFrameFromCGImage:self.CGImage error:error]; } else if (self.CIImage) { - pixelData = [self pixelDataFromCIImageWithError:error]; + return [self imageFrameFromCIImageWithError:error]; } else { [MPPCommonUtils createCustomError:error withCode:MPPTasksErrorCodeInvalidArgumentError @@ -243,46 +265,27 @@ " CIImage or CGImage."]; } - return pixelData; + return nullptr; } -- (CGSize)bitmapSize { - CGFloat width = 0; - CGFloat height = 0; - - if (self.CGImage) { - width = CGImageGetWidth(self.CGImage); - height = CGImageGetHeight(self.CGImage); - } else if (self.CIImage.pixelBuffer) { - width = CVPixelBufferGetWidth(self.CIImage.pixelBuffer); - height = CVPixelBufferGetHeight(self.CIImage.pixelBuffer); - } else if (self.CIImage.CGImage) { - width = CGImageGetWidth(self.CIImage.CGImage); - height = CGImageGetHeight(self.CIImage.CGImage); - } - return CGSizeMake(width, height); -} @end @implementation MPPImage (Utils) -- (nullable uint8_t *)rgbPixelDataWithError:(NSError **)error { +- (std::unique_ptr)imageFrameWithError:(NSError **)error { uint8_t *pixelData = NULL; switch (self.imageSourceType) { case MPPImageSourceTypeSampleBuffer: { CVPixelBufferRef sampleImagePixelBuffer = CMSampleBufferGetImageBuffer(self.sampleBuffer); - pixelData = [MPPCVPixelBufferUtils pixelDataFromCVPixelBuffer:sampleImagePixelBuffer + return [MPPCVPixelBufferUtils imageFrameFromCVPixelBuffer:sampleImagePixelBuffer error:error]; - break; } case MPPImageSourceTypePixelBuffer: { - pixelData = [MPPCVPixelBufferUtils pixelDataFromCVPixelBuffer:self.pixelBuffer error:error]; - break; + return [MPPCVPixelBufferUtils imageFrameFromCVPixelBuffer:self.pixelBuffer error:error]; } case MPPImageSourceTypeImage: { - pixelData = [self.image pixelDataWithError:error]; - break; + return [self.image imageFrameWithError:error]; } default: [MPPCommonUtils createCustomError:error @@ -290,35 +293,7 @@ description:@"Invalid source type for MPPImage."]; } - return pixelData; -} - -- (CGSize)bitmapSize { - CGFloat width = 0; - CGFloat height = 0; - - switch (self.imageSourceType) { - case MPPImageSourceTypeSampleBuffer: { - CVPixelBufferRef pixelBuffer = CMSampleBufferGetImageBuffer(self.sampleBuffer); - width = CVPixelBufferGetWidth(pixelBuffer); - height = CVPixelBufferGetHeight(pixelBuffer); - break; - } - case MPPImageSourceTypePixelBuffer: { - width = CVPixelBufferGetWidth(self.pixelBuffer); - height = CVPixelBufferGetHeight(self.pixelBuffer); - break; - } - case MPPImageSourceTypeImage: { - width = self.image.bitmapSize.width; - height = self.image.bitmapSize.height; - break; - } - default: - break; - } - - return CGSizeMake(width, height); + return nullptr; } @end From 825b30bccdeae1c5982e65bd0b3bd9abbb85f780 Mon Sep 17 00:00:00 2001 From: Prianka Liz Kariat Date: Thu, 16 Feb 2023 01:25:57 +0530 Subject: [PATCH 02/47] Added MPPImageClassifierResult --- .../tasks/ios/vision/image_classifier/BUILD | 28 ++++++++++++ .../sources/MPPImageClassifierResult.h | 44 +++++++++++++++++++ .../sources/MPPImageClassifierResult.m | 28 ++++++++++++ 3 files changed, 100 insertions(+) create mode 100644 mediapipe/tasks/ios/vision/image_classifier/BUILD create mode 100644 mediapipe/tasks/ios/vision/image_classifier/sources/MPPImageClassifierResult.h create mode 100644 mediapipe/tasks/ios/vision/image_classifier/sources/MPPImageClassifierResult.m diff --git a/mediapipe/tasks/ios/vision/image_classifier/BUILD b/mediapipe/tasks/ios/vision/image_classifier/BUILD new file mode 100644 index 000000000..2ecfcab07 --- /dev/null +++ b/mediapipe/tasks/ios/vision/image_classifier/BUILD @@ -0,0 +1,28 @@ +# Copyright 2023 The MediaPipe Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +package(default_visibility = ["//mediapipe/tasks:internal"]) + +licenses(["notice"]) + +objc_library( + name = "MPPImageClassifierResult", + srcs = ["sources/MPPImageClassifierResult.m"], + hdrs = ["sources/MPPImageClassifierResult.h"], + deps = [ + "//mediapipe/tasks/ios/components/containers:MPPClassificationResult", + "//mediapipe/tasks/ios/core:MPPTaskResult", + ], +) + diff --git a/mediapipe/tasks/ios/vision/image_classifier/sources/MPPImageClassifierResult.h b/mediapipe/tasks/ios/vision/image_classifier/sources/MPPImageClassifierResult.h new file mode 100644 index 000000000..92fdb13cb --- /dev/null +++ b/mediapipe/tasks/ios/vision/image_classifier/sources/MPPImageClassifierResult.h @@ -0,0 +1,44 @@ +// Copyright 2023 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#import +#import "mediapipe/tasks/ios/components/containers/sources/MPPClassificationResult.h" +#import "mediapipe/tasks/ios/core/sources/MPPTaskResult.h" + +NS_ASSUME_NONNULL_BEGIN + +/** Represents the classification results generated by `MPPImageClassifier`. **/ +NS_SWIFT_NAME(ImageClassifierResult) +@interface MPPImageClassifierResult : MPPTaskResult + +/** The `MPPClassificationResult` instance containing one set of results per classifier head. **/ +@property(nonatomic, readonly) MPPClassificationResult *classificationResult; + +/** + * Initializes a new `MPPImageClassifierResult` with the given `MPPClassificationResult` and + * timestamp (in milliseconds). + * + * @param classificationResult The `MPPClassificationResult` instance containing one set of results + * per classifier head. + * @param timestampMs The timestamp for this result. + * + * @return An instance of `MPPImageClassifierResult` initialized with the given + * `MPPClassificationResult` and timestamp (in milliseconds). + */ +- (instancetype)initWithClassificationResult:(MPPClassificationResult *)classificationResult + timestampMs:(NSInteger)timestampMs; + +@end + +NS_ASSUME_NONNULL_END diff --git a/mediapipe/tasks/ios/vision/image_classifier/sources/MPPImageClassifierResult.m b/mediapipe/tasks/ios/vision/image_classifier/sources/MPPImageClassifierResult.m new file mode 100644 index 000000000..6dcd064eb --- /dev/null +++ b/mediapipe/tasks/ios/vision/image_classifier/sources/MPPImageClassifierResult.m @@ -0,0 +1,28 @@ +// Copyright 2023 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#import "mediapipe/tasks/ios/vision/image_classifier/sources/MPPImageClassifierResult.h" + +@implementation MPPImageClassifierResult + +- (instancetype)initWithClassificationResult:(MPPClassificationResult *)classificationResult + timestampMs:(NSInteger)timestampMs { + self = [super initWithTimestampMs:timestampMs]; + if (self) { + _classificationResult = classificationResult; + } + return self; +} + +@end From 8c3e3456a3888a3d9a63cd8306ce743b6251731c Mon Sep 17 00:00:00 2001 From: Prianka Liz Kariat Date: Thu, 16 Feb 2023 01:26:10 +0530 Subject: [PATCH 03/47] Added MPPImageClassifierOptions --- .../tasks/ios/vision/image_classifier/BUILD | 10 +++ .../sources/MPPImageClassifierOptions.h | 70 +++++++++++++++++++ .../sources/MPPImageClassifierOptions.m | 41 +++++++++++ 3 files changed, 121 insertions(+) create mode 100644 mediapipe/tasks/ios/vision/image_classifier/sources/MPPImageClassifierOptions.h create mode 100644 mediapipe/tasks/ios/vision/image_classifier/sources/MPPImageClassifierOptions.m diff --git a/mediapipe/tasks/ios/vision/image_classifier/BUILD b/mediapipe/tasks/ios/vision/image_classifier/BUILD index 2ecfcab07..45e6e2156 100644 --- a/mediapipe/tasks/ios/vision/image_classifier/BUILD +++ b/mediapipe/tasks/ios/vision/image_classifier/BUILD @@ -26,3 +26,13 @@ objc_library( ], ) +objc_library( + name = "MPPImageClassifierOptions", + srcs = ["sources/MPPImageClassifierOptions.m"], + hdrs = ["sources/MPPImageClassifierOptions.h"], + deps = [ + ":MPPImageClassifierResult", + "//mediapipe/tasks/ios/core:MPPTaskOptions", + "//mediapipe/tasks/ios/vision/core:MPPRunningMode", + ], +) diff --git a/mediapipe/tasks/ios/vision/image_classifier/sources/MPPImageClassifierOptions.h b/mediapipe/tasks/ios/vision/image_classifier/sources/MPPImageClassifierOptions.h new file mode 100644 index 000000000..2ca158113 --- /dev/null +++ b/mediapipe/tasks/ios/vision/image_classifier/sources/MPPImageClassifierOptions.h @@ -0,0 +1,70 @@ +// Copyright 2023 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#import + +#import "mediapipe/tasks/ios/core/sources/MPPTaskOptions.h" +#import "mediapipe/tasks/ios/vision/core/sources/MPPRunningMode.h" +#import "mediapipe/tasks/ios/vision/image_classifier/sources/MPPImageClassifierResult.h" + +NS_ASSUME_NONNULL_BEGIN + +/** + * Options for setting up a `MPPImageClassifier`. + */ +NS_SWIFT_NAME(ImageClassifierOptions) +@interface MPPImageClassifierOptions : MPPTaskOptions + +@property(nonatomic) MPPRunningMode runningMode; + +/** + * The user-defined result callback for processing live stream data. The result callback should only be specified when the running mode is set to the live stream mode. + */ +@property(nonatomic, copy) void (^completion)(MPPImageClassifierResult *result, NSError *error); + +/** + * The locale to use for display names specified through the TFLite Model Metadata, if any. Defaults + * to English. + */ +@property(nonatomic, copy) NSString *displayNamesLocale; + +/** + * The maximum number of top-scored classification results to return. If < 0, all available results + * will be returned. If 0, an invalid argument error is returned. + */ +@property(nonatomic) NSInteger maxResults; + +/** + * Score threshold to override the one provided in the model metadata (if any). Results below this + * value are rejected. + */ +@property(nonatomic) float scoreThreshold; + +/** + * The allowlist of category names. If non-empty, detection results whose category name is not in + * this set will be filtered out. Duplicate or unknown category names are ignored. Mutually + * exclusive with categoryDenylist. + */ +@property(nonatomic, copy) NSArray *categoryAllowlist; + +/** + * The denylist of category names. If non-empty, detection results whose category name is in this + * set will be filtered out. Duplicate or unknown category names are ignored. Mutually exclusive + * with categoryAllowlist. + */ +@property(nonatomic, copy) NSArray *categoryDenylist; + +@end + +NS_ASSUME_NONNULL_END diff --git a/mediapipe/tasks/ios/vision/image_classifier/sources/MPPImageClassifierOptions.m b/mediapipe/tasks/ios/vision/image_classifier/sources/MPPImageClassifierOptions.m new file mode 100644 index 000000000..e109dcc3b --- /dev/null +++ b/mediapipe/tasks/ios/vision/image_classifier/sources/MPPImageClassifierOptions.m @@ -0,0 +1,41 @@ +// Copyright 2023 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#import "mediapipe/tasks/ios/vision/image_classifier/sources/MPPImageClassifierOptions.h" + +@implementation MPPImageClassifierOptions + +- (instancetype)init { + self = [super init]; + if (self) { + _maxResults = -1; + _scoreThreshold = 0; + } + return self; +} + +- (id)copyWithZone:(NSZone *)zone { + MPPImageClassifierOptions *imageClassifierOptions = [super copyWithZone:zone]; + + imageClassifierOptions.scoreThreshold = self.scoreThreshold; + imageClassifierOptions.maxResults = self.maxResults; + imageClassifierOptions.categoryDenylist = self.categoryDenylist; + imageClassifierOptions.categoryAllowlist = self.categoryAllowlist; + imageClassifierOptions.displayNamesLocale = self.displayNamesLocale; + imageClassifierOptions.completion = self.completion; + + return imageClassifierOptions; +} + +@end From a0253274cc0b2e4139249989c443f28017ce44fb Mon Sep 17 00:00:00 2001 From: Prianka Liz Kariat Date: Thu, 16 Feb 2023 01:31:09 +0530 Subject: [PATCH 04/47] Added MPPVisionPacketCreator --- mediapipe/tasks/ios/vision/core/BUILD | 15 +++++-- .../core/sources/MPPVisionPacketCreator.h | 26 +++++++++++ .../core/sources/MPPVisionPacketCreator.mm | 44 +++++++++++++++++++ 3 files changed, 82 insertions(+), 3 deletions(-) create mode 100644 mediapipe/tasks/ios/vision/core/sources/MPPVisionPacketCreator.h create mode 100644 mediapipe/tasks/ios/vision/core/sources/MPPVisionPacketCreator.mm diff --git a/mediapipe/tasks/ios/vision/core/BUILD b/mediapipe/tasks/ios/vision/core/BUILD index 91b9078a5..360eb1cfc 100644 --- a/mediapipe/tasks/ios/vision/core/BUILD +++ b/mediapipe/tasks/ios/vision/core/BUILD @@ -19,9 +19,6 @@ objc_library( deps = [ "//mediapipe/tasks/ios/common:MPPCommon", "//mediapipe/tasks/ios/common/utils:MPPCommonUtils", - "//third_party/apple_frameworks:CoreMedia", - "//third_party/apple_frameworks:CoreVideo", - "//third_party/apple_frameworks:UIKit", ], ) @@ -44,5 +41,17 @@ objc_library( "//mediapipe/tasks/ios/common:MPPCommon", "//mediapipe/tasks/ios/common/utils:MPPCommonUtils", "//mediapipe/tasks/ios/core:MPPTaskRunner", +objc_library( + name = "MPPVisionPacketCreator", + srcs = ["sources/MPPVisionPacketCreator.mm"], + hdrs = ["sources/MPPVisionPacketCreator.h"], + copts = [ + "-ObjC++", + "-std=c++17", + ], + deps = [ + "//mediapipe/framework:packet", + "//mediapipe/framework/formats:image", + "//mediapipe/tasks/ios/vision/core/utils:MPPImageFrameUtils", ], ) diff --git a/mediapipe/tasks/ios/vision/core/sources/MPPVisionPacketCreator.h b/mediapipe/tasks/ios/vision/core/sources/MPPVisionPacketCreator.h new file mode 100644 index 000000000..b7b777c97 --- /dev/null +++ b/mediapipe/tasks/ios/vision/core/sources/MPPVisionPacketCreator.h @@ -0,0 +1,26 @@ +// Copyright 2023 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#import + +#include "mediapipe/framework/packet.h" + +/** + * This class helps create various kinds of packets for Mediapipe Vision Tasks. + */ +@interface MPPVisionPacketCreator : NSObject + ++ (Packet)createWithMPPImage:(MPPImage *)image error:(NSError **)error; + +@end diff --git a/mediapipe/tasks/ios/vision/core/sources/MPPVisionPacketCreator.mm b/mediapipe/tasks/ios/vision/core/sources/MPPVisionPacketCreator.mm new file mode 100644 index 000000000..ff5e41030 --- /dev/null +++ b/mediapipe/tasks/ios/vision/core/sources/MPPVisionPacketCreator.mm @@ -0,0 +1,44 @@ +// Copyright 2019 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#import "mediapipe/tasks/ios/vision/core/sources/MPPVisionPacketCreator.h" + +#import "mediapipe/tasks/ios/vision/core/utils/sources/MPPImage+ImageFrameUtils.h" + +#include "mediapipe/framework/formats/image.h" + +namespace { +using ::mediapipe::MakePacket; +using ::mediapipe::Packet; +using ::mediapipe::Image; +} // namespace + +struct freeDeleter { + void operator()(void* ptr) { free(ptr); } +} + +@implementation MPPVisionPacketCreator + ++ (Packet)createWithMPPImage:(MPPImage *)image error:(NSError **)error { + + std::unique_ptr imageFrame = [image imageFrameWithError:error]; + + if (!imageFrame) { + return nullptr; + } + + return MakePacket(std::move(imageFrame)); +} + +@end From a503fb53e0163dbe5ed9923c506a74252af5743f Mon Sep 17 00:00:00 2001 From: Prianka Liz Kariat Date: Thu, 16 Feb 2023 01:31:29 +0530 Subject: [PATCH 05/47] Updated MPPVisionTaskRunner --- mediapipe/tasks/ios/vision/core/BUILD | 6 ++++-- .../ios/vision/core/sources/MPPVisionTaskRunner.h | 10 ++-------- .../ios/vision/core/sources/MPPVisionTaskRunner.mm | 8 +++++--- 3 files changed, 11 insertions(+), 13 deletions(-) diff --git a/mediapipe/tasks/ios/vision/core/BUILD b/mediapipe/tasks/ios/vision/core/BUILD index 360eb1cfc..1364d967c 100644 --- a/mediapipe/tasks/ios/vision/core/BUILD +++ b/mediapipe/tasks/ios/vision/core/BUILD @@ -38,9 +38,11 @@ objc_library( ], deps = [ ":MPPRunningMode", - "//mediapipe/tasks/ios/common:MPPCommon", - "//mediapipe/tasks/ios/common/utils:MPPCommonUtils", "//mediapipe/tasks/ios/core:MPPTaskRunner", + "//mediapipe/tasks/ios/core:MPPVisionPacketCreator", + ], +) + objc_library( name = "MPPVisionPacketCreator", srcs = ["sources/MPPVisionPacketCreator.mm"], diff --git a/mediapipe/tasks/ios/vision/core/sources/MPPVisionTaskRunner.h b/mediapipe/tasks/ios/vision/core/sources/MPPVisionTaskRunner.h index 84b657305..b33cd3c8f 100644 --- a/mediapipe/tasks/ios/vision/core/sources/MPPVisionTaskRunner.h +++ b/mediapipe/tasks/ios/vision/core/sources/MPPVisionTaskRunner.h @@ -13,7 +13,6 @@ // limitations under the License. #import - #import "mediapipe/tasks/ios/core/sources/MPPTaskRunner.h" #import "mediapipe/tasks/ios/vision/core/sources/MPPRunningMode.h" @@ -42,8 +41,8 @@ NS_ASSUME_NONNULL_BEGIN * intended to deliver inference results must be provided. In case of image or video running mode, * packets callback must be set to nil. * - * @param error Pointer to the memory location where errors if any should be saved. If @c NULL, no - * error will be saved. + * @param error Pointer to the memory location where errors if any should be + * saved. If @c NULL, no error will be saved. * * @return An instance of `MPPVisionTaskRunner` initialized to the given MediaPipe calculator config * proto, running mode and packets callback. @@ -54,11 +53,6 @@ NS_ASSUME_NONNULL_BEGIN (mediapipe::tasks::core::PacketsCallback)packetsCallback error:(NSError **)error NS_DESIGNATED_INITIALIZER; -- (instancetype)initWithCalculatorGraphConfig:(mediapipe::CalculatorGraphConfig)graphConfig - packetsCallback: - (mediapipe::tasks::core::PacketsCallback)packetsCallback - error:(NSError **)error NS_UNAVAILABLE; - - (instancetype)init NS_UNAVAILABLE; + (instancetype)new NS_UNAVAILABLE; diff --git a/mediapipe/tasks/ios/vision/core/sources/MPPVisionTaskRunner.mm b/mediapipe/tasks/ios/vision/core/sources/MPPVisionTaskRunner.mm index bfa9e34e5..7c39bf804 100644 --- a/mediapipe/tasks/ios/vision/core/sources/MPPVisionTaskRunner.mm +++ b/mediapipe/tasks/ios/vision/core/sources/MPPVisionTaskRunner.mm @@ -19,7 +19,8 @@ namespace { using ::mediapipe::CalculatorGraphConfig; -using ::mediapipe::tasks::core::PacketsCallback; +using ::mediapipe::Packet; +using ::mediapipe::tasks::core::PacketMap; } // namespace @interface MPPVisionTaskRunner () { @@ -29,9 +30,10 @@ using ::mediapipe::tasks::core::PacketsCallback; @implementation MPPVisionTaskRunner -- (nullable instancetype)initWithCalculatorGraphConfig:(CalculatorGraphConfig)graphConfig +- (nullable instancetype)initWithCalculatorGraphConfig:(mediapipe::CalculatorGraphConfig)graphConfig runningMode:(MPPRunningMode)runningMode - packetsCallback:(PacketsCallback)packetsCallback + packetsCallback: + (mediapipe::tasks::core::PacketsCallback)packetsCallback error:(NSError **)error { switch (runningMode) { case MPPRunningModeImage: From 42e35503d9c1071fceccd509a2e508ecd0ce5935 Mon Sep 17 00:00:00 2001 From: Prianka Liz Kariat Date: Thu, 16 Feb 2023 01:31:51 +0530 Subject: [PATCH 06/47] Removed unwanted declarations in namespace --- mediapipe/tasks/ios/vision/core/sources/MPPVisionTaskRunner.mm | 2 -- 1 file changed, 2 deletions(-) diff --git a/mediapipe/tasks/ios/vision/core/sources/MPPVisionTaskRunner.mm b/mediapipe/tasks/ios/vision/core/sources/MPPVisionTaskRunner.mm index 7c39bf804..fddad964b 100644 --- a/mediapipe/tasks/ios/vision/core/sources/MPPVisionTaskRunner.mm +++ b/mediapipe/tasks/ios/vision/core/sources/MPPVisionTaskRunner.mm @@ -19,8 +19,6 @@ namespace { using ::mediapipe::CalculatorGraphConfig; -using ::mediapipe::Packet; -using ::mediapipe::tasks::core::PacketMap; } // namespace @interface MPPVisionTaskRunner () { From ae05c784437d1408c501aa998a7968b55fbc205c Mon Sep 17 00:00:00 2001 From: Prianka Liz Kariat Date: Thu, 16 Feb 2023 01:33:33 +0530 Subject: [PATCH 07/47] Updated formatting --- .../core/sources/MPPVisionPacketCreator.h | 2 +- .../core/sources/MPPVisionPacketCreator.mm | 7 ++-- .../core/utils/sources/MPPImage+Utils.h | 9 ++-- .../core/utils/sources/MPPImage+Utils.mm | 41 +++++++++---------- .../sources/MPPImageClassifierOptions.h | 5 ++- 5 files changed, 31 insertions(+), 33 deletions(-) diff --git a/mediapipe/tasks/ios/vision/core/sources/MPPVisionPacketCreator.h b/mediapipe/tasks/ios/vision/core/sources/MPPVisionPacketCreator.h index b7b777c97..3618d8de4 100644 --- a/mediapipe/tasks/ios/vision/core/sources/MPPVisionPacketCreator.h +++ b/mediapipe/tasks/ios/vision/core/sources/MPPVisionPacketCreator.h @@ -16,7 +16,7 @@ #include "mediapipe/framework/packet.h" -/** +/** * This class helps create various kinds of packets for Mediapipe Vision Tasks. */ @interface MPPVisionPacketCreator : NSObject diff --git a/mediapipe/tasks/ios/vision/core/sources/MPPVisionPacketCreator.mm b/mediapipe/tasks/ios/vision/core/sources/MPPVisionPacketCreator.mm index ff5e41030..d0ee3f5df 100644 --- a/mediapipe/tasks/ios/vision/core/sources/MPPVisionPacketCreator.mm +++ b/mediapipe/tasks/ios/vision/core/sources/MPPVisionPacketCreator.mm @@ -1,4 +1,4 @@ -// Copyright 2019 The MediaPipe Authors. +// Copyright 2023 The MediaPipe Authors. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -19,19 +19,18 @@ #include "mediapipe/framework/formats/image.h" namespace { +using ::mediapipe::Image; using ::mediapipe::MakePacket; using ::mediapipe::Packet; -using ::mediapipe::Image; } // namespace struct freeDeleter { - void operator()(void* ptr) { free(ptr); } + void operator()(void *ptr) { free(ptr); } } @implementation MPPVisionPacketCreator + (Packet)createWithMPPImage:(MPPImage *)image error:(NSError **)error { - std::unique_ptr imageFrame = [image imageFrameWithError:error]; if (!imageFrame) { diff --git a/mediapipe/tasks/ios/vision/core/utils/sources/MPPImage+Utils.h b/mediapipe/tasks/ios/vision/core/utils/sources/MPPImage+Utils.h index 724bccdad..ac304d6a0 100644 --- a/mediapipe/tasks/ios/vision/core/utils/sources/MPPImage+Utils.h +++ b/mediapipe/tasks/ios/vision/core/utils/sources/MPPImage+Utils.h @@ -14,19 +14,20 @@ #import -#import "mediapipe/tasks/ios/vision/core/sources/MPPImage.h" #include "mediapipe/framework/formats/image_frame.h" +#import "mediapipe/tasks/ios/vision/core/sources/MPPImage.h" NS_ASSUME_NONNULL_BEGIN -/** +/** * Helper utility for converting `MPPImage` into a `mediapipe::ImageFrame`. */ @interface MPPImage (Utils) /** * Converts the `MPPImage` into a `mediapipe::ImageFrame`. - * Irrespective of whether the underlying buffer is grayscale, RGB, RGBA, BGRA etc., the MPPImage is converted to an RGB format. In case of grayscale images, the mono channel is duplicated - * in the R, G, B channels. + * Irrespective of whether the underlying buffer is grayscale, RGB, RGBA, BGRA etc., the MPPImage is + * converted to an RGB format. In case of grayscale images, the mono channel is duplicated in the R, + * G, B channels. * * @param error Pointer to the memory location where errors if any should be * saved. If @c NULL, no error will be saved. diff --git a/mediapipe/tasks/ios/vision/core/utils/sources/MPPImage+Utils.mm b/mediapipe/tasks/ios/vision/core/utils/sources/MPPImage+Utils.mm index 8d6efe91c..e9f2540f7 100644 --- a/mediapipe/tasks/ios/vision/core/utils/sources/MPPImage+Utils.mm +++ b/mediapipe/tasks/ios/vision/core/utils/sources/MPPImage+Utils.mm @@ -25,7 +25,7 @@ #include "mediapipe/framework/formats/image_format.pb.h" namespace { - using ::mediapipe::ImageFrame; +using ::mediapipe::ImageFrame; } @interface MPPPixelDataUtils : NSObject @@ -41,7 +41,8 @@ namespace { @interface MPPCVPixelBufferUtils : NSObject -+ (std::unique_ptr)imageFrameFromCVPixelBuffer:(CVPixelBufferRef)pixelBuffer error:(NSError **)error; ++ (std::unique_ptr)imageFrameFromCVPixelBuffer:(CVPixelBufferRef)pixelBuffer + error:(NSError **)error; @end @@ -124,15 +125,14 @@ namespace { @implementation MPPCVPixelBufferUtils -+ (std::unique_ptr)rgbImageFrameFromCVPixelBuffer:(CVPixelBufferRef)pixelBuffer error:(NSError **)error { ++ (std::unique_ptr)rgbImageFrameFromCVPixelBuffer:(CVPixelBufferRef)pixelBuffer + error:(NSError **)error { CVPixelBufferLockBaseAddress(pixelBuffer, 0); size_t width = CVPixelBufferGetWidth(pixelBuffer); size_t height = CVPixelBufferGetHeight(pixelBuffer); size_t stride = CVPixelBufferGetBytesPerRow(pixelBuffer); - - uint8_t *rgbPixelData = [MPPPixelDataUtils rgbPixelDataFromPixelData:(uint8_t *)CVPixelBufferGetBaseAddress(pixelBuffer) withWidth:CVPixelBufferGetWidth(pixelBuffer) @@ -146,17 +146,17 @@ namespace { if (!rgbPixelData) { return nullptr; } - - std::unique_ptr imageFrame = absl::make_unique( - ::mediapipe::ImageFormat::SRGB, /*width=*/width, /*height=*/height, stride, - static_cast(rgbPixelData), - /*deleter=*/free); + + std::unique_ptr imageFrame = + absl::make_unique(::mediapipe::ImageFormat::SRGB, /*width=*/width, + /*height=*/height, stride, static_cast(rgbPixelData), + /*deleter=*/free); return imageFrame; } + (std::unique_ptr)imageFrameFromCVPixelBuffer:(CVPixelBufferRef)pixelBuffer - error:(NSError **)error { + error:(NSError **)error { uint8_t *pixelData = NULL; OSType pixelBufferFormat = CVPixelBufferGetPixelFormatType(pixelBuffer); @@ -210,11 +210,11 @@ namespace { // We have drawn the image as an RGBA image with 8 bitsPerComponent and hence can safely input // a pixel format of type kCVPixelFormatType_32RGBA for conversion by vImage. pixelDataToReturn = [MPPPixelDataUtils rgbPixelDataFromPixelData:srcData - withWidth:width - height:height - stride:bytesPerRow - pixelBufferFormat:kCVPixelFormatType_32RGBA - error:error]; + withWidth:width + height:height + stride:bytesPerRow + pixelBufferFormat:kCVPixelFormatType_32RGBA + error:error]; } CGContextRelease(context); @@ -224,7 +224,7 @@ namespace { std::unique_ptr imageFrame = absl::make_unique( mediapipe::ImageFormat::SRGB, /*width=*/(int)width, /*height=*/(int)height, (int)bytesPerRow, - static_cast(pixelDataToReturn), + static_cast(pixelDataToReturn), /*deleter=*/free); return imageFrame; @@ -235,10 +235,8 @@ namespace { @implementation UIImage (ImageFrameUtils) - (std::unique_ptr)imageFrameFromCIImageWithError:(NSError **)error { - if (self.CIImage.pixelBuffer) { - return [MPPCVPixelBufferUtils imageFrameFromCVPixelBuffer:self.CIImage.pixelBuffer - error:error]; + return [MPPCVPixelBufferUtils imageFrameFromCVPixelBuffer:self.CIImage.pixelBuffer error:error]; } else if (self.CIImage.CGImage) { return [MPPCGImageUtils imageFrameFromCGImage:self.CIImage.CGImage error:error]; @@ -278,8 +276,7 @@ namespace { switch (self.imageSourceType) { case MPPImageSourceTypeSampleBuffer: { CVPixelBufferRef sampleImagePixelBuffer = CMSampleBufferGetImageBuffer(self.sampleBuffer); - return [MPPCVPixelBufferUtils imageFrameFromCVPixelBuffer:sampleImagePixelBuffer - error:error]; + return [MPPCVPixelBufferUtils imageFrameFromCVPixelBuffer:sampleImagePixelBuffer error:error]; } case MPPImageSourceTypePixelBuffer: { return [MPPCVPixelBufferUtils imageFrameFromCVPixelBuffer:self.pixelBuffer error:error]; diff --git a/mediapipe/tasks/ios/vision/image_classifier/sources/MPPImageClassifierOptions.h b/mediapipe/tasks/ios/vision/image_classifier/sources/MPPImageClassifierOptions.h index 2ca158113..f7e9a6297 100644 --- a/mediapipe/tasks/ios/vision/image_classifier/sources/MPPImageClassifierOptions.h +++ b/mediapipe/tasks/ios/vision/image_classifier/sources/MPPImageClassifierOptions.h @@ -28,8 +28,9 @@ NS_SWIFT_NAME(ImageClassifierOptions) @property(nonatomic) MPPRunningMode runningMode; -/** - * The user-defined result callback for processing live stream data. The result callback should only be specified when the running mode is set to the live stream mode. +/** + * The user-defined result callback for processing live stream data. The result callback should only + * be specified when the running mode is set to the live stream mode. */ @property(nonatomic, copy) void (^completion)(MPPImageClassifierResult *result, NSError *error); From dce81342b6d622e51259c2d7364fca5f006ba29f Mon Sep 17 00:00:00 2001 From: Prianka Liz Kariat Date: Thu, 16 Feb 2023 01:35:27 +0530 Subject: [PATCH 08/47] Updated imports of MPPImageUtils --- mediapipe/tasks/ios/vision/core/BUILD | 2 +- .../tasks/ios/vision/core/sources/MPPVisionPacketCreator.mm | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/mediapipe/tasks/ios/vision/core/BUILD b/mediapipe/tasks/ios/vision/core/BUILD index 1364d967c..47e6ce226 100644 --- a/mediapipe/tasks/ios/vision/core/BUILD +++ b/mediapipe/tasks/ios/vision/core/BUILD @@ -54,6 +54,6 @@ objc_library( deps = [ "//mediapipe/framework:packet", "//mediapipe/framework/formats:image", - "//mediapipe/tasks/ios/vision/core/utils:MPPImageFrameUtils", + "//mediapipe/tasks/ios/vision/core/utils:MPPImageUtils", ], ) diff --git a/mediapipe/tasks/ios/vision/core/sources/MPPVisionPacketCreator.mm b/mediapipe/tasks/ios/vision/core/sources/MPPVisionPacketCreator.mm index d0ee3f5df..b0d3b1420 100644 --- a/mediapipe/tasks/ios/vision/core/sources/MPPVisionPacketCreator.mm +++ b/mediapipe/tasks/ios/vision/core/sources/MPPVisionPacketCreator.mm @@ -14,7 +14,7 @@ #import "mediapipe/tasks/ios/vision/core/sources/MPPVisionPacketCreator.h" -#import "mediapipe/tasks/ios/vision/core/utils/sources/MPPImage+ImageFrameUtils.h" +#import "mediapipe/tasks/ios/vision/core/utils/sources/MPPImage+Utils.h" #include "mediapipe/framework/formats/image.h" From 5e5a1a733fc3619c7e0b1d4bc8d74f3060ee1f98 Mon Sep 17 00:00:00 2001 From: Prianka Liz Kariat Date: Thu, 16 Feb 2023 01:37:50 +0530 Subject: [PATCH 09/47] Updated imports --- mediapipe/tasks/ios/vision/core/utils/sources/MPPImage+Utils.mm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mediapipe/tasks/ios/vision/core/utils/sources/MPPImage+Utils.mm b/mediapipe/tasks/ios/vision/core/utils/sources/MPPImage+Utils.mm index e9f2540f7..13cfd3be4 100644 --- a/mediapipe/tasks/ios/vision/core/utils/sources/MPPImage+Utils.mm +++ b/mediapipe/tasks/ios/vision/core/utils/sources/MPPImage+Utils.mm @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -#import "mediapipe/tasks/ios/vision/core/utils/sources/MPPImage+ImageFrameUtils.h" +#import "mediapipe/tasks/ios/vision/core/utils/sources/MPPImage+Utils.h" #import "mediapipe/tasks/ios/common/sources/MPPCommon.h" #import "mediapipe/tasks/ios/common/utils/sources/MPPCommonUtils.h" From 4d2dd50703c8165c0377f147f4465f613d411619 Mon Sep 17 00:00:00 2001 From: Prianka Liz Kariat Date: Mon, 20 Feb 2023 11:23:04 +0530 Subject: [PATCH 10/47] Updated method name in MPPVisionPacketCreator --- .../tasks/ios/vision/core/sources/MPPVisionPacketCreator.h | 2 +- .../tasks/ios/vision/core/sources/MPPVisionPacketCreator.mm | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/mediapipe/tasks/ios/vision/core/sources/MPPVisionPacketCreator.h b/mediapipe/tasks/ios/vision/core/sources/MPPVisionPacketCreator.h index 3618d8de4..e8a7e91ad 100644 --- a/mediapipe/tasks/ios/vision/core/sources/MPPVisionPacketCreator.h +++ b/mediapipe/tasks/ios/vision/core/sources/MPPVisionPacketCreator.h @@ -21,6 +21,6 @@ */ @interface MPPVisionPacketCreator : NSObject -+ (Packet)createWithMPPImage:(MPPImage *)image error:(NSError **)error; ++ (Packet)createPacketWithMPPImage:(MPPImage *)image error:(NSError **)error; @end diff --git a/mediapipe/tasks/ios/vision/core/sources/MPPVisionPacketCreator.mm b/mediapipe/tasks/ios/vision/core/sources/MPPVisionPacketCreator.mm index b0d3b1420..51dbc3254 100644 --- a/mediapipe/tasks/ios/vision/core/sources/MPPVisionPacketCreator.mm +++ b/mediapipe/tasks/ios/vision/core/sources/MPPVisionPacketCreator.mm @@ -30,7 +30,7 @@ struct freeDeleter { @implementation MPPVisionPacketCreator -+ (Packet)createWithMPPImage:(MPPImage *)image error:(NSError **)error { ++ (Packet)createPacketWithMPPImage:(MPPImage *)image error:(NSError **)error { std::unique_ptr imageFrame = [image imageFrameWithError:error]; if (!imageFrame) { From 68fdf6b6cbb99a32d04044def4ff15c141c41cdc Mon Sep 17 00:00:00 2001 From: Prianka Liz Kariat Date: Mon, 20 Feb 2023 11:27:21 +0530 Subject: [PATCH 11/47] Updated comment arguments in MPPImageUtils --- .../tasks/ios/vision/core/utils/sources/MPPImage+Utils.mm | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/mediapipe/tasks/ios/vision/core/utils/sources/MPPImage+Utils.mm b/mediapipe/tasks/ios/vision/core/utils/sources/MPPImage+Utils.mm index 13cfd3be4..c8b2b8a6e 100644 --- a/mediapipe/tasks/ios/vision/core/utils/sources/MPPImage+Utils.mm +++ b/mediapipe/tasks/ios/vision/core/utils/sources/MPPImage+Utils.mm @@ -148,8 +148,8 @@ using ::mediapipe::ImageFrame; } std::unique_ptr imageFrame = - absl::make_unique(::mediapipe::ImageFormat::SRGB, /*width=*/width, - /*height=*/height, stride, static_cast(rgbPixelData), + absl::make_unique(::mediapipe::ImageFormat::SRGB, width, + height, stride, static_cast(rgbPixelData), /*deleter=*/free); return imageFrame; @@ -223,7 +223,7 @@ using ::mediapipe::ImageFrame; CGColorSpaceRelease(colorSpace); std::unique_ptr imageFrame = absl::make_unique( - mediapipe::ImageFormat::SRGB, /*width=*/(int)width, /*height=*/(int)height, (int)bytesPerRow, + mediapipe::ImageFormat::SRGB, (int)width, (int)height, (int)bytesPerRow, static_cast(pixelDataToReturn), /*deleter=*/free); From bc157ff990d7314b527aefe6228d4cc5c6f01078 Mon Sep 17 00:00:00 2001 From: kuaashish <98159216+kuaashish@users.noreply.github.com> Date: Thu, 23 Feb 2023 21:57:42 +0530 Subject: [PATCH 12/47] Update help.md --- docs/getting_started/help.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/getting_started/help.md b/docs/getting_started/help.md index a9d2ba7b9..0d950c371 100644 --- a/docs/getting_started/help.md +++ b/docs/getting_started/help.md @@ -37,8 +37,8 @@ If you open a GitHub issue, here is our policy: - **OS Platform and Distribution (e.g., Linux Ubuntu 16.04)**: - **Mobile device (e.g. iPhone 8, Pixel 2, Samsung Galaxy) if the issue happens on mobile device**: - **Bazel version**: -- **Android Studio, NDK, SDK versions (if issue is related to building in mobile dev enviroment)**: -- **Xcode & Tulsi version (if issue is related to building in mobile dev enviroment)**: +- **Android Studio, NDK, SDK versions (if issue is related to building in mobile dev environment)**: +- **Xcode & Tulsi version (if issue is related to building in mobile dev environment)**: - **Exact steps to reproduce**: ### Describe the problem From abc8c73aee79f561a087c91ad540d6f044fa9b85 Mon Sep 17 00:00:00 2001 From: kuaashish <98159216+kuaashish@users.noreply.github.com> Date: Thu, 23 Feb 2023 22:00:01 +0530 Subject: [PATCH 13/47] Update javascript.md --- docs/getting_started/javascript.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/getting_started/javascript.md b/docs/getting_started/javascript.md index 71cec2632..3c8ba562e 100644 --- a/docs/getting_started/javascript.md +++ b/docs/getting_started/javascript.md @@ -33,7 +33,7 @@ snippets. | Browser | Platform | Notes | | ------- | ----------------------- | -------------------------------------- | -| Chrome | Android / Windows / Mac | Pixel 4 and older unsupported. Fuschia | +| Chrome | Android / Windows / Mac | Pixel 4 and older unsupported. Fuchsia | | | | unsupported. | | Chrome | iOS | Camera unavailable in Chrome on iOS. | | Safari | iPad/iPhone/Mac | iOS and Safari on iPad / iPhone / | From 9e37b520fbefed2ee87fc25d0a25ac374cc604ca Mon Sep 17 00:00:00 2001 From: kuaashish <98159216+kuaashish@users.noreply.github.com> Date: Thu, 23 Feb 2023 22:01:19 +0530 Subject: [PATCH 14/47] Update faq.md --- docs/getting_started/faq.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/getting_started/faq.md b/docs/getting_started/faq.md index c42ef898c..4a694d031 100644 --- a/docs/getting_started/faq.md +++ b/docs/getting_started/faq.md @@ -59,7 +59,7 @@ The second approach allows up to [`max_in_flight`] invocations of the packets from [`CalculatorBase::Process`] are automatically ordered by timestamp before they are passed along to downstream calculators. -With either aproach, you must be aware that the calculator running in parallel +With either approach, you must be aware that the calculator running in parallel cannot maintain internal state in the same way as a normal sequential calculator. From 488b1e23a01111f183fd2a4f32b4487d85926b98 Mon Sep 17 00:00:00 2001 From: kuaashish <98159216+kuaashish@users.noreply.github.com> Date: Thu, 23 Feb 2023 22:01:58 +0530 Subject: [PATCH 15/47] Update troubleshooting.md --- docs/getting_started/troubleshooting.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/getting_started/troubleshooting.md b/docs/getting_started/troubleshooting.md index a4f347aaa..7d371bcb8 100644 --- a/docs/getting_started/troubleshooting.md +++ b/docs/getting_started/troubleshooting.md @@ -65,7 +65,7 @@ WARNING: Download from https://storage.googleapis.com/mirror.tensorflow.org/gith ``` usually indicates that Bazel fails to download necessary dependency repositories -that MediaPipe needs. MedaiPipe has several dependency repositories that are +that MediaPipe needs. MediaPipe has several dependency repositories that are hosted by Google sites. In some regions, you may need to set up a network proxy or use a VPN to access those resources. You may also need to append `--host_jvm_args "-DsocksProxyHost= -DsocksProxyPort="` From 34e361f81e37a8211eddc8a2122310f8b066cfa8 Mon Sep 17 00:00:00 2001 From: kuaashish <98159216+kuaashish@users.noreply.github.com> Date: Thu, 23 Feb 2023 22:04:02 +0530 Subject: [PATCH 16/47] Update media_sequence.md --- docs/solutions/media_sequence.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/solutions/media_sequence.md b/docs/solutions/media_sequence.md index e6bd5fd44..48b4530c8 100644 --- a/docs/solutions/media_sequence.md +++ b/docs/solutions/media_sequence.md @@ -85,7 +85,7 @@ process new data sets, in the documentation of MediaSequence uses SequenceExamples as the format of both inputs and outputs. Annotations are encoded as inputs in a SequenceExample of metadata - that defines the labels and the path to the cooresponding video file. This + that defines the labels and the path to the corresponding video file. This metadata is passed as input to the C++ `media_sequence_demo` binary, and the output is a SequenceExample filled with images and annotations ready for model training. From 35d1b98433b36f96c759e4a7798366ed17c94d66 Mon Sep 17 00:00:00 2001 From: kuaashish <98159216+kuaashish@users.noreply.github.com> Date: Thu, 23 Feb 2023 22:04:49 +0530 Subject: [PATCH 17/47] Update objectron.md --- docs/solutions/objectron.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/solutions/objectron.md b/docs/solutions/objectron.md index 10483e499..190f251f2 100644 --- a/docs/solutions/objectron.md +++ b/docs/solutions/objectron.md @@ -170,7 +170,7 @@ and a The detection subgraph performs ML inference only once every few frames to reduce computation load, and decodes the output tensor to a FrameAnnotation that contains nine keypoints: the 3D bounding box's center and its eight vertices. -The tracking subgraph runs every frame, using the box traker in +The tracking subgraph runs every frame, using the box tracker in [MediaPipe Box Tracking](./box_tracking.md) to track the 2D box tightly enclosing the projection of the 3D bounding box, and lifts the tracked 2D keypoints to 3D with From 48d72e7ddc5d226b118325b943c1fd279959e772 Mon Sep 17 00:00:00 2001 From: kuaashish <98159216+kuaashish@users.noreply.github.com> Date: Thu, 23 Feb 2023 22:06:35 +0530 Subject: [PATCH 18/47] Update face_mesh.md --- docs/solutions/face_mesh.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/solutions/face_mesh.md b/docs/solutions/face_mesh.md index 24ee760fc..87424c8fe 100644 --- a/docs/solutions/face_mesh.md +++ b/docs/solutions/face_mesh.md @@ -133,7 +133,7 @@ about the model in this [paper](https://arxiv.org/abs/2006.10962). The [Face Landmark Model](#face-landmark-model) performs a single-camera face landmark detection in the screen coordinate space: the X- and Y- coordinates are normalized screen coordinates, while the Z coordinate is relative and is scaled -as the X coodinate under the +as the X coordinate under the [weak perspective projection camera model](https://en.wikipedia.org/wiki/3D_projection#Weak_perspective_projection). This format is well-suited for some applications, however it does not directly enable the full spectrum of augmented reality (AR) features like aligning a From 1c37010ddefc0742529933a7f44e98e3e17c12ed Mon Sep 17 00:00:00 2001 From: kuaashish <98159216+kuaashish@users.noreply.github.com> Date: Thu, 23 Feb 2023 22:07:04 +0530 Subject: [PATCH 19/47] Update objectron.md --- docs/solutions/objectron.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/solutions/objectron.md b/docs/solutions/objectron.md index 190f251f2..f3b7af35a 100644 --- a/docs/solutions/objectron.md +++ b/docs/solutions/objectron.md @@ -613,7 +613,7 @@ z_ndc = 1 / Z ### Pixel Space -In this API we set upper-left coner of an image as the origin of pixel +In this API we set upper-left corner of an image as the origin of pixel coordinate. One can convert from NDC to pixel space as follows: ``` From f42b60e08f08526e813e6222d1ba7a5dcd1418ea Mon Sep 17 00:00:00 2001 From: kuaashish <98159216+kuaashish@users.noreply.github.com> Date: Thu, 23 Feb 2023 22:07:40 +0530 Subject: [PATCH 20/47] Update iris.md --- docs/solutions/iris.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/solutions/iris.md b/docs/solutions/iris.md index 1d36f74ca..9ca882527 100644 --- a/docs/solutions/iris.md +++ b/docs/solutions/iris.md @@ -38,7 +38,7 @@ camera, in real-time, without the need for specialized hardware. Through use of iris landmarks, the solution is also able to determine the metric distance between the subject and the camera with relative error less than 10%. Note that iris tracking does not infer the location at which people are looking, nor does -it provide any form of identity recognition. With the cross-platfrom capability +it provide any form of identity recognition. With the cross-platform capability of the MediaPipe framework, MediaPipe Iris can run on most modern [mobile phones](#mobile), [desktops/laptops](#desktop) and even on the [web](#web). From 86a2fa300b44dd55eae41cc130b8e6843494b62d Mon Sep 17 00:00:00 2001 From: kuaashish <98159216+kuaashish@users.noreply.github.com> Date: Thu, 23 Feb 2023 22:08:05 +0530 Subject: [PATCH 21/47] Update iris.md --- docs/solutions/iris.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/solutions/iris.md b/docs/solutions/iris.md index 9ca882527..ba7aaaa02 100644 --- a/docs/solutions/iris.md +++ b/docs/solutions/iris.md @@ -99,7 +99,7 @@ You can also find more details in this ### Iris Landmark Model The iris model takes an image patch of the eye region and estimates both the eye -landmarks (along the eyelid) and iris landmarks (along ths iris contour). You +landmarks (along the eyelid) and iris landmarks (along this iris contour). You can find more details in this [paper](https://arxiv.org/abs/2006.11341). ![iris_tracking_eye_and_iris_landmarks.png](https://mediapipe.dev/images/mobile/iris_tracking_eye_and_iris_landmarks.png) | From 9054ff728330009175bd4d4778da60fa54fc1b43 Mon Sep 17 00:00:00 2001 From: MediaPipe Team Date: Thu, 23 Feb 2023 17:20:59 -0800 Subject: [PATCH 22/47] Internal change PiperOrigin-RevId: 511928038 --- docs/_layouts/forward.html | 13 +++++++++++++ docs/tools/visualizer.md | 3 ++- 2 files changed, 15 insertions(+), 1 deletion(-) create mode 100644 docs/_layouts/forward.html diff --git a/docs/_layouts/forward.html b/docs/_layouts/forward.html new file mode 100644 index 000000000..ec97e98cd --- /dev/null +++ b/docs/_layouts/forward.html @@ -0,0 +1,13 @@ + + + + + + Redirecting + + +

This page now lives on https://developers.google.com/mediapipe/. If you aren't automatically + redirected, follow this + link.

+ + diff --git a/docs/tools/visualizer.md b/docs/tools/visualizer.md index 5ed2de2d2..45111a36e 100644 --- a/docs/tools/visualizer.md +++ b/docs/tools/visualizer.md @@ -1,5 +1,6 @@ --- -layout: default +layout: forward +target: https://developers.google.com/mediapipe/framework/tools/visualizer title: Visualizer parent: Tools nav_order: 1 From 17466fb7f1182f7634d51f63bd90e952e2d9bc8d Mon Sep 17 00:00:00 2001 From: MediaPipe Team Date: Fri, 24 Feb 2023 10:54:46 -0800 Subject: [PATCH 23/47] Internal change PiperOrigin-RevId: 512111461 --- .../hand_detector/hand_detector_graph.cc | 21 +++-- .../tasks/cc/vision/hand_landmarker/BUILD | 1 + .../hand_landmarker/hand_landmarker_graph.cc | 79 +++++++++++------- .../testdata/vision/hand_landmarker.task | Bin 7819037 -> 7819041 bytes third_party/external_files.bzl | 8 +- 5 files changed, 68 insertions(+), 41 deletions(-) diff --git a/mediapipe/tasks/cc/vision/hand_detector/hand_detector_graph.cc b/mediapipe/tasks/cc/vision/hand_detector/hand_detector_graph.cc index d7163e331..923eab1ca 100644 --- a/mediapipe/tasks/cc/vision/hand_detector/hand_detector_graph.cc +++ b/mediapipe/tasks/cc/vision/hand_detector/hand_detector_graph.cc @@ -257,19 +257,28 @@ class HandDetectorGraph : public core::ModelTaskGraph { preprocessed_tensors >> inference.In("TENSORS"); auto model_output_tensors = inference.Out("TENSORS"); + // TODO: support hand detection metadata. + bool has_metadata = false; + // Generates a single side packet containing a vector of SSD anchors. auto& ssd_anchor = graph.AddNode("SsdAnchorsCalculator"); - ConfigureSsdAnchorsCalculator( - &ssd_anchor.GetOptions()); + auto& ssd_anchor_options = + ssd_anchor.GetOptions(); + if (!has_metadata) { + ConfigureSsdAnchorsCalculator(&ssd_anchor_options); + } auto anchors = ssd_anchor.SideOut(""); // Converts output tensors to Detections. auto& tensors_to_detections = graph.AddNode("TensorsToDetectionsCalculator"); - ConfigureTensorsToDetectionsCalculator( - subgraph_options, - &tensors_to_detections - .GetOptions()); + if (!has_metadata) { + ConfigureTensorsToDetectionsCalculator( + subgraph_options, + &tensors_to_detections + .GetOptions()); + } + model_output_tensors >> tensors_to_detections.In("TENSORS"); anchors >> tensors_to_detections.SideIn("ANCHORS"); auto detections = tensors_to_detections.Out("DETECTIONS"); diff --git a/mediapipe/tasks/cc/vision/hand_landmarker/BUILD b/mediapipe/tasks/cc/vision/hand_landmarker/BUILD index 2552e7a10..7a83816b8 100644 --- a/mediapipe/tasks/cc/vision/hand_landmarker/BUILD +++ b/mediapipe/tasks/cc/vision/hand_landmarker/BUILD @@ -148,6 +148,7 @@ cc_library( "//mediapipe/tasks/cc/vision/hand_landmarker/calculators:hand_landmarks_deduplication_calculator", "//mediapipe/tasks/cc/vision/hand_landmarker/proto:hand_landmarker_graph_options_cc_proto", "//mediapipe/tasks/cc/vision/hand_landmarker/proto:hand_landmarks_detector_graph_options_cc_proto", + "//mediapipe/util:graph_builder_utils", ], alwayslink = 1, ) diff --git a/mediapipe/tasks/cc/vision/hand_landmarker/hand_landmarker_graph.cc b/mediapipe/tasks/cc/vision/hand_landmarker/hand_landmarker_graph.cc index 74d288ac1..4a3db9f4d 100644 --- a/mediapipe/tasks/cc/vision/hand_landmarker/hand_landmarker_graph.cc +++ b/mediapipe/tasks/cc/vision/hand_landmarker/hand_landmarker_graph.cc @@ -14,6 +14,7 @@ limitations under the License. ==============================================================================*/ #include +#include #include #include #include @@ -41,6 +42,7 @@ limitations under the License. #include "mediapipe/tasks/cc/vision/hand_landmarker/calculators/hand_association_calculator.pb.h" #include "mediapipe/tasks/cc/vision/hand_landmarker/proto/hand_landmarker_graph_options.pb.h" #include "mediapipe/tasks/cc/vision/hand_landmarker/proto/hand_landmarks_detector_graph_options.pb.h" +#include "mediapipe/util/graph_builder_utils.h" namespace mediapipe { namespace tasks { @@ -53,7 +55,7 @@ using ::mediapipe::NormalizedRect; using ::mediapipe::api2::Input; using ::mediapipe::api2::Output; using ::mediapipe::api2::builder::Graph; -using ::mediapipe::api2::builder::Source; +using ::mediapipe::api2::builder::Stream; using ::mediapipe::tasks::components::utils::DisallowIf; using ::mediapipe::tasks::core::ModelAssetBundleResources; using ::mediapipe::tasks::metadata::SetExternalFile; @@ -78,40 +80,46 @@ constexpr char kHandLandmarksDetectorTFLiteName[] = "hand_landmarks_detector.tflite"; struct HandLandmarkerOutputs { - Source> landmark_lists; - Source> world_landmark_lists; - Source> hand_rects_next_frame; - Source> handednesses; - Source> palm_rects; - Source> palm_detections; - Source image; + Stream> landmark_lists; + Stream> world_landmark_lists; + Stream> hand_rects_next_frame; + Stream> handednesses; + Stream> palm_rects; + Stream> palm_detections; + Stream image; }; // Sets the base options in the sub tasks. absl::Status SetSubTaskBaseOptions(const ModelAssetBundleResources& resources, HandLandmarkerGraphOptions* options, bool is_copy) { - ASSIGN_OR_RETURN(const auto hand_detector_file, - resources.GetModelFile(kHandDetectorTFLiteName)); auto* hand_detector_graph_options = options->mutable_hand_detector_graph_options(); - SetExternalFile(hand_detector_file, - hand_detector_graph_options->mutable_base_options() - ->mutable_model_asset(), - is_copy); + if (!hand_detector_graph_options->base_options().has_model_asset()) { + ASSIGN_OR_RETURN(const auto hand_detector_file, + resources.GetModelFile(kHandDetectorTFLiteName)); + SetExternalFile(hand_detector_file, + hand_detector_graph_options->mutable_base_options() + ->mutable_model_asset(), + is_copy); + } hand_detector_graph_options->mutable_base_options() ->mutable_acceleration() ->CopyFrom(options->base_options().acceleration()); hand_detector_graph_options->mutable_base_options()->set_use_stream_mode( options->base_options().use_stream_mode()); - ASSIGN_OR_RETURN(const auto hand_landmarks_detector_file, - resources.GetModelFile(kHandLandmarksDetectorTFLiteName)); auto* hand_landmarks_detector_graph_options = options->mutable_hand_landmarks_detector_graph_options(); - SetExternalFile(hand_landmarks_detector_file, - hand_landmarks_detector_graph_options->mutable_base_options() - ->mutable_model_asset(), - is_copy); + if (!hand_landmarks_detector_graph_options->base_options() + .has_model_asset()) { + ASSIGN_OR_RETURN(const auto hand_landmarks_detector_file, + resources.GetModelFile(kHandLandmarksDetectorTFLiteName)); + SetExternalFile( + hand_landmarks_detector_file, + hand_landmarks_detector_graph_options->mutable_base_options() + ->mutable_model_asset(), + is_copy); + } hand_landmarks_detector_graph_options->mutable_base_options() ->mutable_acceleration() ->CopyFrom(options->base_options().acceleration()); @@ -119,7 +127,6 @@ absl::Status SetSubTaskBaseOptions(const ModelAssetBundleResources& resources, ->set_use_stream_mode(options->base_options().use_stream_mode()); return absl::OkStatus(); } - } // namespace // A "mediapipe.tasks.vision.hand_landmarker.HandLandmarkerGraph" performs hand @@ -219,12 +226,15 @@ class HandLandmarkerGraph : public core::ModelTaskGraph { !sc->Service(::mediapipe::tasks::core::kModelResourcesCacheService) .IsAvailable())); } + Stream image_in = graph.In(kImageTag).Cast(); + std::optional> norm_rect_in; + if (HasInput(sc->OriginalNode(), kNormRectTag)) { + norm_rect_in = graph.In(kNormRectTag).Cast(); + } ASSIGN_OR_RETURN( auto hand_landmarker_outputs, - BuildHandLandmarkerGraph( - sc->Options(), - graph[Input(kImageTag)], - graph[Input::Optional(kNormRectTag)], graph)); + BuildHandLandmarkerGraph(sc->Options(), + image_in, norm_rect_in, graph)); hand_landmarker_outputs.landmark_lists >> graph[Output>(kLandmarksTag)]; hand_landmarker_outputs.world_landmark_lists >> @@ -262,8 +272,8 @@ class HandLandmarkerGraph : public core::ModelTaskGraph { // image_in: (mediapipe::Image) stream to run hand landmark detection on. // graph: the mediapipe graph instance to be updated. absl::StatusOr BuildHandLandmarkerGraph( - const HandLandmarkerGraphOptions& tasks_options, Source image_in, - Source norm_rect_in, Graph& graph) { + const HandLandmarkerGraphOptions& tasks_options, Stream image_in, + std::optional> norm_rect_in, Graph& graph) { const int max_num_hands = tasks_options.hand_detector_graph_options().num_hands(); @@ -293,10 +303,15 @@ class HandLandmarkerGraph : public core::ModelTaskGraph { // track the hands from the last frame. auto image_for_hand_detector = DisallowIf(image_in, has_enough_hands, graph); - auto norm_rect_in_for_hand_detector = - DisallowIf(norm_rect_in, has_enough_hands, graph); + std::optional> norm_rect_in_for_hand_detector; + if (norm_rect_in) { + norm_rect_in_for_hand_detector = + DisallowIf(norm_rect_in.value(), has_enough_hands, graph); + } image_for_hand_detector >> hand_detector.In("IMAGE"); - norm_rect_in_for_hand_detector >> hand_detector.In("NORM_RECT"); + if (norm_rect_in_for_hand_detector) { + norm_rect_in_for_hand_detector.value() >> hand_detector.In("NORM_RECT"); + } auto hand_rects_from_hand_detector = hand_detector.Out("HAND_RECTS"); auto& hand_association = graph.AddNode("HandAssociationCalculator"); hand_association.GetOptions() @@ -313,7 +328,9 @@ class HandLandmarkerGraph : public core::ModelTaskGraph { // series, and we don't want to enable the tracking and hand associations // between input images. Always use the hand detector graph. image_in >> hand_detector.In("IMAGE"); - norm_rect_in >> hand_detector.In("NORM_RECT"); + if (norm_rect_in) { + norm_rect_in.value() >> hand_detector.In("NORM_RECT"); + } auto hand_rects_from_hand_detector = hand_detector.Out("HAND_RECTS"); hand_rects_from_hand_detector >> clip_hand_rects.In(""); } diff --git a/mediapipe/tasks/testdata/vision/hand_landmarker.task b/mediapipe/tasks/testdata/vision/hand_landmarker.task index 1ae9f7f6b33c52e16493b566cc76d014655c4d25..748b2f0138d4d0c18bf5abcf54d0ffa357c43c70 100644 GIT binary patch delta 550 zcmZY6NlsJ&6ouheR8fk^0Tl%SX)IAekw#EK6s0K}@QDrB0RlE40!sTvCOSBA36+>& zV&c$Y5fZk*8ko8c|H2_R`DQ1%gMZ`aZu}QYf?#+so+u4+_h)-#^qZ`hm1o7Rgq3e4 ztpnCUtH3&B6)TRcTdOr>ttL#;UdItkauH zHD>pH|DMjKa0c}_i*q=S1~lRVE}{vS(2N$eq79ePjt*SGRa`?Sx^NxcxPhDK!7bcI zFZ$4r0ic`RTNpYRz=SjGxgv9^`YuJ`ruY$K>=a)R2$t2%mzU}@E?;tjw4o9j(h5Pe;BJ){Rk#)cdTM_G^ z6}5`3606iYWR+Rv)?q7VRao>Zts_>IRc+N+N3B}xn04H$vrbqit+-Whow6FNMyu(E z63P63zqeBP1e$RgXK)rRIEN(8qZMtqfOcF&2QJ| Date: Sun, 26 Feb 2023 11:52:32 -0800 Subject: [PATCH 24/47] Internal change PiperOrigin-RevId: 512457466 --- mediapipe/framework/tool/status_util.cc | 9 +++++---- mediapipe/framework/tool/status_util.h | 9 +++++---- 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/mediapipe/framework/tool/status_util.cc b/mediapipe/framework/tool/status_util.cc index 0e3a59246..401a1b63c 100644 --- a/mediapipe/framework/tool/status_util.cc +++ b/mediapipe/framework/tool/status_util.cc @@ -18,15 +18,16 @@ #include "absl/strings/str_cat.h" #include "absl/strings/str_join.h" +#include "absl/strings/string_view.h" namespace mediapipe { namespace tool { -absl::Status StatusInvalid(const std::string& message) { +absl::Status StatusInvalid(absl::string_view message) { return absl::Status(absl::StatusCode::kInvalidArgument, message); } -absl::Status StatusFail(const std::string& message) { +absl::Status StatusFail(absl::string_view message) { return absl::Status(absl::StatusCode::kUnknown, message); } @@ -35,12 +36,12 @@ absl::Status StatusStop() { "mediapipe::tool::StatusStop()"); } -absl::Status AddStatusPrefix(const std::string& prefix, +absl::Status AddStatusPrefix(absl::string_view prefix, const absl::Status& status) { return absl::Status(status.code(), absl::StrCat(prefix, status.message())); } -absl::Status CombinedStatus(const std::string& general_comment, +absl::Status CombinedStatus(absl::string_view general_comment, const std::vector& statuses) { // The final error code is absl::StatusCode::kUnknown if not all // the error codes are the same. Otherwise it is the same error code diff --git a/mediapipe/framework/tool/status_util.h b/mediapipe/framework/tool/status_util.h index 8b4bc02d2..0db03ec4d 100644 --- a/mediapipe/framework/tool/status_util.h +++ b/mediapipe/framework/tool/status_util.h @@ -19,6 +19,7 @@ #include #include "absl/base/macros.h" +#include "absl/strings/string_view.h" #include "mediapipe/framework/port/status.h" namespace mediapipe { @@ -34,16 +35,16 @@ absl::Status StatusStop(); // Return a status which signals an invalid initial condition (for // example an InputSidePacket does not include all necessary fields). ABSL_DEPRECATED("Use absl::InvalidArgumentError(error_message) instead.") -absl::Status StatusInvalid(const std::string& error_message); +absl::Status StatusInvalid(absl::string_view error_message); // Return a status which signals that something unexpectedly failed. ABSL_DEPRECATED("Use absl::UnknownError(error_message) instead.") -absl::Status StatusFail(const std::string& error_message); +absl::Status StatusFail(absl::string_view error_message); // Prefixes the given string to the error message in status. // This function should be considered internal to the framework. // TODO Replace usage of AddStatusPrefix with util::Annotate(). -absl::Status AddStatusPrefix(const std::string& prefix, +absl::Status AddStatusPrefix(absl::string_view prefix, const absl::Status& status); // Combine a vector of absl::Status into a single composite status. @@ -51,7 +52,7 @@ absl::Status AddStatusPrefix(const std::string& prefix, // will be returned. // This function should be considered internal to the framework. // TODO Move this function to somewhere with less visibility. -absl::Status CombinedStatus(const std::string& general_comment, +absl::Status CombinedStatus(absl::string_view general_comment, const std::vector& statuses); } // namespace tool From 39b2fec60f5a2c43aad8b897988feb920f0885ce Mon Sep 17 00:00:00 2001 From: MediaPipe Team Date: Mon, 27 Feb 2023 09:08:20 -0800 Subject: [PATCH 25/47] Internal change PiperOrigin-RevId: 512643827 --- .../components/GlSurfaceViewRenderer.java | 23 ++++++++++++++----- 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/mediapipe/java/com/google/mediapipe/components/GlSurfaceViewRenderer.java b/mediapipe/java/com/google/mediapipe/components/GlSurfaceViewRenderer.java index 7a6c547a2..b836a38f3 100644 --- a/mediapipe/java/com/google/mediapipe/components/GlSurfaceViewRenderer.java +++ b/mediapipe/java/com/google/mediapipe/components/GlSurfaceViewRenderer.java @@ -15,6 +15,7 @@ package com.google.mediapipe.components; import static java.lang.Math.max; +import static java.lang.Math.min; import android.graphics.SurfaceTexture; import android.opengl.GLES11Ext; @@ -56,6 +57,7 @@ public class GlSurfaceViewRenderer implements GLSurfaceView.Renderer { private int frameUniform; private int textureTarget = GLES11Ext.GL_TEXTURE_EXTERNAL_OES; private int textureTransformUniform; + private boolean shouldFitToWidth = false; // Controls the alignment between frame size and surface size, 0.5f default is centered. private float alignmentHorizontal = 0.5f; private float alignmentVertical = 0.5f; @@ -158,13 +160,17 @@ public class GlSurfaceViewRenderer implements GLSurfaceView.Renderer { // TODO: compute scale from surfaceTexture size. float scaleWidth = frameWidth > 0 ? (float) surfaceWidth / (float) frameWidth : 1.0f; float scaleHeight = frameHeight > 0 ? (float) surfaceHeight / (float) frameHeight : 1.0f; - // Whichever of the two scales is greater corresponds to the dimension where the image - // is proportionally smaller than the view. Dividing both scales by that number results + // By default whichever of the two scales is greater corresponds to the dimension where the + // image is proportionally smaller than the view. Dividing both scales by that number results // in that dimension having scale 1.0, and thus touching the edges of the view, while the - // other is cropped proportionally. - float maxScale = max(scaleWidth, scaleHeight); - scaleWidth /= maxScale; - scaleHeight /= maxScale; + // other is cropped proportionally. If shouldFitToWidth is set as true, use the min scale + // if frame width is greater than frame height. + float scale = max(scaleWidth, scaleHeight); + if (shouldFitToWidth && (frameWidth > frameHeight)) { + scale = min(scaleWidth, scaleHeight); + } + scaleWidth /= scale; + scaleHeight /= scale; // Alignment controls where the visible section is placed within the full camera frame, with // (0, 0) being the bottom left, and (1, 1) being the top right. @@ -232,6 +238,11 @@ public class GlSurfaceViewRenderer implements GLSurfaceView.Renderer { frameHeight = height; } + /** Supports fit to width when the frame width is greater than the frame height. */ + public void setShouldFitToWidth(boolean shouldFitToWidth) { + this.shouldFitToWidth = shouldFitToWidth; + } + /** * When the aspect ratios between the camera frame and the surface size are mismatched, this * controls how the image is aligned. 0.0 means aligning the left/bottom edges; 1.0 means aligning From 9f59d4d01beb662370822ea35cff82b398cf2023 Mon Sep 17 00:00:00 2001 From: Sebastian Schmidt Date: Mon, 27 Feb 2023 10:46:49 -0800 Subject: [PATCH 26/47] Remove cosineSimilarity() from AudioEmbedder PiperOrigin-RevId: 512671255 --- mediapipe/tasks/cc/audio/audio_embedder/BUILD | 1 - .../cc/audio/audio_embedder/audio_embedder.cc | 7 ---- .../cc/audio/audio_embedder/audio_embedder.h | 10 ----- .../audio_embedder/audio_embedder_test.cc | 26 +----------- .../com/google/mediapipe/tasks/audio/BUILD | 2 - .../audio/audioembedder/AudioEmbedder.java | 13 ------ mediapipe/tasks/python/audio/BUILD | 1 - .../tasks/python/audio/audio_embedder.py | 24 ----------- .../python/test/audio/audio_embedder_test.py | 42 +++---------------- .../tasks/web/audio/audio_embedder/BUILD | 2 - .../audio/audio_embedder/audio_embedder.ts | 15 ------- 11 files changed, 7 insertions(+), 136 deletions(-) diff --git a/mediapipe/tasks/cc/audio/audio_embedder/BUILD b/mediapipe/tasks/cc/audio/audio_embedder/BUILD index 1dfdd6f1b..d79a6f01e 100644 --- a/mediapipe/tasks/cc/audio/audio_embedder/BUILD +++ b/mediapipe/tasks/cc/audio/audio_embedder/BUILD @@ -35,7 +35,6 @@ cc_library( "//mediapipe/tasks/cc/components/containers/proto:embeddings_cc_proto", "//mediapipe/tasks/cc/components/processors:embedder_options", "//mediapipe/tasks/cc/components/processors/proto:embedder_options_cc_proto", - "//mediapipe/tasks/cc/components/utils:cosine_similarity", "//mediapipe/tasks/cc/core:base_options", "//mediapipe/tasks/cc/core:task_runner", "//mediapipe/tasks/cc/core/proto:inference_subgraph_cc_proto", diff --git a/mediapipe/tasks/cc/audio/audio_embedder/audio_embedder.cc b/mediapipe/tasks/cc/audio/audio_embedder/audio_embedder.cc index 1c4a524d6..8dd384c48 100644 --- a/mediapipe/tasks/cc/audio/audio_embedder/audio_embedder.cc +++ b/mediapipe/tasks/cc/audio/audio_embedder/audio_embedder.cc @@ -29,7 +29,6 @@ limitations under the License. #include "mediapipe/tasks/cc/components/containers/proto/embeddings.pb.h" #include "mediapipe/tasks/cc/components/processors/embedder_options.h" #include "mediapipe/tasks/cc/components/processors/proto/embedder_options.pb.h" -#include "mediapipe/tasks/cc/components/utils/cosine_similarity.h" #include "mediapipe/tasks/cc/core/proto/inference_subgraph.pb.h" #include "mediapipe/tasks/cc/core/task_runner.h" #include "tensorflow/lite/core/api/op_resolver.h" @@ -147,10 +146,4 @@ absl::Status AudioEmbedder::EmbedAsync(Matrix audio_block, .At(Timestamp(timestamp_ms * kMicroSecondsPerMilliSecond))}}); } -absl::StatusOr AudioEmbedder::CosineSimilarity( - const components::containers::Embedding& u, - const components::containers::Embedding& v) { - return components::utils::CosineSimilarity(u, v); -} - } // namespace mediapipe::tasks::audio::audio_embedder diff --git a/mediapipe/tasks/cc/audio/audio_embedder/audio_embedder.h b/mediapipe/tasks/cc/audio/audio_embedder/audio_embedder.h index 31cb61422..c5f548a60 100644 --- a/mediapipe/tasks/cc/audio/audio_embedder/audio_embedder.h +++ b/mediapipe/tasks/cc/audio/audio_embedder/audio_embedder.h @@ -125,16 +125,6 @@ class AudioEmbedder : core::BaseAudioTaskApi { // Shuts down the AudioEmbedder when all works are done. absl::Status Close() { return runner_->Close(); } - - // Utility function to compute cosine similarity [1] between two embeddings. - // May return an InvalidArgumentError if e.g. the embeddings are of different - // types (quantized vs. float), have different sizes, or have a an L2-norm of - // 0. - // - // [1]: https://en.wikipedia.org/wiki/Cosine_similarity - static absl::StatusOr CosineSimilarity( - const components::containers::Embedding& u, - const components::containers::Embedding& v); }; } // namespace mediapipe::tasks::audio::audio_embedder diff --git a/mediapipe/tasks/cc/audio/audio_embedder/audio_embedder_test.cc b/mediapipe/tasks/cc/audio/audio_embedder/audio_embedder_test.cc index 749066ead..e388423b1 100644 --- a/mediapipe/tasks/cc/audio/audio_embedder/audio_embedder_test.cc +++ b/mediapipe/tasks/cc/audio/audio_embedder/audio_embedder_test.cc @@ -54,8 +54,6 @@ constexpr char kModelWithMetadata[] = "yamnet_embedding_metadata.tflite"; constexpr char k16kTestWavFilename[] = "speech_16000_hz_mono.wav"; constexpr char k48kTestWavFilename[] = "speech_48000_hz_mono.wav"; constexpr char k16kTestWavForTwoHeadsFilename[] = "two_heads_16000_hz_mono.wav"; -constexpr float kSpeechSimilarities[] = {0.985359, 0.994349, 0.993227, 0.996658, - 0.996384}; constexpr int kMilliSecondsPerSecond = 1000; constexpr int kYamnetNumOfAudioSamples = 15600; constexpr int kYamnetAudioSampleRate = 16000; @@ -163,15 +161,9 @@ TEST_F(EmbedTest, SucceedsWithSameAudioAtDifferentSampleRates) { audio_embedder->Embed(audio_buffer1, 16000)); MP_ASSERT_OK_AND_ASSIGN(auto result2, audio_embedder->Embed(audio_buffer2, 48000)); - int expected_size = sizeof(kSpeechSimilarities) / sizeof(float); + int expected_size = 5; ASSERT_EQ(result1.size(), expected_size); ASSERT_EQ(result2.size(), expected_size); - for (int i = 0; i < expected_size; ++i) { - MP_ASSERT_OK_AND_ASSIGN(double similarity, AudioEmbedder::CosineSimilarity( - result1[i].embeddings[0], - result2[i].embeddings[0])); - EXPECT_NEAR(similarity, kSpeechSimilarities[i], 1e-6); - } MP_EXPECT_OK(audio_embedder->Close()); } @@ -192,10 +184,6 @@ TEST_F(EmbedTest, SucceedsWithDifferentAudios) { audio_embedder->Embed(audio_buffer2, kYamnetAudioSampleRate)); ASSERT_EQ(result1.size(), 5); ASSERT_EQ(result2.size(), 1); - MP_ASSERT_OK_AND_ASSIGN(double similarity, AudioEmbedder::CosineSimilarity( - result1[0].embeddings[0], - result2[0].embeddings[0])); - EXPECT_NEAR(similarity, 0.09017f, 1e-6); MP_EXPECT_OK(audio_embedder->Close()); } @@ -258,15 +246,9 @@ TEST_F(EmbedAsyncTest, SucceedsWithSameAudioAtDifferentSampleRates) { RunAudioEmbedderInStreamMode(k16kTestWavFilename, 16000, &result1); std::vector result2; RunAudioEmbedderInStreamMode(k48kTestWavFilename, 48000, &result2); - int expected_size = sizeof(kSpeechSimilarities) / sizeof(float); + int expected_size = 5; ASSERT_EQ(result1.size(), expected_size); ASSERT_EQ(result2.size(), expected_size); - for (int i = 0; i < expected_size; ++i) { - MP_ASSERT_OK_AND_ASSIGN(double similarity, AudioEmbedder::CosineSimilarity( - result1[i].embeddings[0], - result2[i].embeddings[0])); - EXPECT_NEAR(similarity, kSpeechSimilarities[i], 1e-6); - } } TEST_F(EmbedAsyncTest, SucceedsWithDifferentAudios) { @@ -276,10 +258,6 @@ TEST_F(EmbedAsyncTest, SucceedsWithDifferentAudios) { RunAudioEmbedderInStreamMode(k16kTestWavForTwoHeadsFilename, 16000, &result2); ASSERT_EQ(result1.size(), 5); ASSERT_EQ(result2.size(), 1); - MP_ASSERT_OK_AND_ASSIGN(double similarity, AudioEmbedder::CosineSimilarity( - result1[0].embeddings[0], - result2[0].embeddings[0])); - EXPECT_NEAR(similarity, 0.09017f, 1e-6); } } // namespace diff --git a/mediapipe/tasks/java/com/google/mediapipe/tasks/audio/BUILD b/mediapipe/tasks/java/com/google/mediapipe/tasks/audio/BUILD index 50ee56f66..7f9363340 100644 --- a/mediapipe/tasks/java/com/google/mediapipe/tasks/audio/BUILD +++ b/mediapipe/tasks/java/com/google/mediapipe/tasks/audio/BUILD @@ -101,9 +101,7 @@ android_library( "//mediapipe/tasks/cc/core/proto:base_options_java_proto_lite", "//mediapipe/tasks/java/com/google/mediapipe/tasks/audio:libmediapipe_tasks_audio_jni_lib", "//mediapipe/tasks/java/com/google/mediapipe/tasks/components/containers:audiodata", - "//mediapipe/tasks/java/com/google/mediapipe/tasks/components/containers:embedding", "//mediapipe/tasks/java/com/google/mediapipe/tasks/components/containers:embeddingresult", - "//mediapipe/tasks/java/com/google/mediapipe/tasks/components/utils:cosinesimilarity", "//mediapipe/tasks/java/com/google/mediapipe/tasks/core", "//third_party:autovalue", "@maven//:com_google_guava_guava", diff --git a/mediapipe/tasks/java/com/google/mediapipe/tasks/audio/audioembedder/AudioEmbedder.java b/mediapipe/tasks/java/com/google/mediapipe/tasks/audio/audioembedder/AudioEmbedder.java index 077f28ca2..67d3f8b5f 100644 --- a/mediapipe/tasks/java/com/google/mediapipe/tasks/audio/audioembedder/AudioEmbedder.java +++ b/mediapipe/tasks/java/com/google/mediapipe/tasks/audio/audioembedder/AudioEmbedder.java @@ -26,10 +26,8 @@ import com.google.mediapipe.tasks.audio.audioembedder.proto.AudioEmbedderGraphOp import com.google.mediapipe.tasks.audio.core.BaseAudioTaskApi; import com.google.mediapipe.tasks.audio.core.RunningMode; import com.google.mediapipe.tasks.components.containers.AudioData; -import com.google.mediapipe.tasks.components.containers.Embedding; import com.google.mediapipe.tasks.components.containers.proto.EmbeddingsProto; import com.google.mediapipe.tasks.components.processors.proto.EmbedderOptionsProto; -import com.google.mediapipe.tasks.components.utils.CosineSimilarity; import com.google.mediapipe.tasks.core.BaseOptions; import com.google.mediapipe.tasks.core.ErrorListener; import com.google.mediapipe.tasks.core.OutputHandler; @@ -273,17 +271,6 @@ public final class AudioEmbedder extends BaseAudioTaskApi { sendAudioStreamData(audioBlock, timestampMs); } - /** - * Utility function to compute cosine - * similarity between two {@link Embedding} objects. - * - * @throws IllegalArgumentException if the embeddings are of different types (float vs. - * quantized), have different sizes, or have an L2-norm of 0. - */ - public static double cosineSimilarity(Embedding u, Embedding v) { - return CosineSimilarity.compute(u, v); - } - /** Options for setting up and {@link AudioEmbedder}. */ @AutoValue public abstract static class AudioEmbedderOptions extends TaskOptions { diff --git a/mediapipe/tasks/python/audio/BUILD b/mediapipe/tasks/python/audio/BUILD index 6dda7a53c..9d8af1463 100644 --- a/mediapipe/tasks/python/audio/BUILD +++ b/mediapipe/tasks/python/audio/BUILD @@ -56,7 +56,6 @@ py_library( "//mediapipe/tasks/python/audio/core:base_audio_task_api", "//mediapipe/tasks/python/components/containers:audio_data", "//mediapipe/tasks/python/components/containers:embedding_result", - "//mediapipe/tasks/python/components/utils:cosine_similarity", "//mediapipe/tasks/python/core:base_options", "//mediapipe/tasks/python/core:optional_dependencies", "//mediapipe/tasks/python/core:task_info", diff --git a/mediapipe/tasks/python/audio/audio_embedder.py b/mediapipe/tasks/python/audio/audio_embedder.py index 4c37783e9..835dd0e31 100644 --- a/mediapipe/tasks/python/audio/audio_embedder.py +++ b/mediapipe/tasks/python/audio/audio_embedder.py @@ -26,7 +26,6 @@ from mediapipe.tasks.python.audio.core import audio_task_running_mode as running from mediapipe.tasks.python.audio.core import base_audio_task_api from mediapipe.tasks.python.components.containers import audio_data as audio_data_module from mediapipe.tasks.python.components.containers import embedding_result as embedding_result_module -from mediapipe.tasks.python.components.utils import cosine_similarity from mediapipe.tasks.python.core import base_options as base_options_module from mediapipe.tasks.python.core import task_info as task_info_module from mediapipe.tasks.python.core.optional_dependencies import doc_controls @@ -284,26 +283,3 @@ class AudioEmbedder(base_audio_task_api.BaseAudioTaskApi): packet_creator.create_matrix(audio_block.buffer, transpose=True).at( timestamp_ms * _MICRO_SECONDS_PER_MILLISECOND) }) - - @classmethod - def cosine_similarity(cls, u: embedding_result_module.Embedding, - v: embedding_result_module.Embedding) -> float: - """Utility function to compute cosine similarity between two embedding entries. - - May return an InvalidArgumentError if e.g. the feature vectors are - of different types (quantized vs. float), have different sizes, or have a - an L2-norm of 0. - - Args: - u: An embedding entry. - v: An embedding entry. - - Returns: - The cosine similarity for the two embeddings. - - Raises: - ValueError: May return an error if e.g. the feature vectors are of - different types (quantized vs. float), have different sizes, or have - an L2-norm of 0. - """ - return cosine_similarity.cosine_similarity(u, v) diff --git a/mediapipe/tasks/python/test/audio/audio_embedder_test.py b/mediapipe/tasks/python/test/audio/audio_embedder_test.py index f280235d7..934cdc8db 100644 --- a/mediapipe/tasks/python/test/audio/audio_embedder_test.py +++ b/mediapipe/tasks/python/test/audio/audio_embedder_test.py @@ -42,13 +42,10 @@ _SPEECH_WAV_16K_MONO = 'speech_16000_hz_mono.wav' _SPEECH_WAV_48K_MONO = 'speech_48000_hz_mono.wav' _TWO_HEADS_WAV_16K_MONO = 'two_heads_16000_hz_mono.wav' _TEST_DATA_DIR = 'mediapipe/tasks/testdata/audio' -_SPEECH_SIMILARITIES = [0.985359, 0.994349, 0.993227, 0.996658, 0.996384] _YAMNET_NUM_OF_SAMPLES = 15600 _MILLSECONDS_PER_SECOND = 1000 # Tolerance for embedding vector coordinate values. _EPSILON = 3e-6 -# Tolerance for cosine similarity evaluation. -_SIMILARITY_TOLERANCE = 1e-6 class ModelFileType(enum.Enum): @@ -98,27 +95,6 @@ class AudioEmbedderTest(parameterized.TestCase): else: self.assertEqual(embedding_result.embedding.dtype, float) - def _check_cosine_similarity(self, result0, result1, expected_similarity): - # Checks cosine similarity. - similarity = _AudioEmbedder.cosine_similarity(result0.embeddings[0], - result1.embeddings[0]) - self.assertAlmostEqual( - similarity, expected_similarity, delta=_SIMILARITY_TOLERANCE) - - def _check_yamnet_result(self, - embedding_result0_list: List[_AudioEmbedderResult], - embedding_result1_list: List[_AudioEmbedderResult], - expected_similarities: List[float]): - expected_size = len(expected_similarities) - self.assertLen(embedding_result0_list, expected_size) - self.assertLen(embedding_result1_list, expected_size) - - for idx in range(expected_size): - embedding_result0 = embedding_result0_list[idx] - embedding_result1 = embedding_result1_list[idx] - self._check_cosine_similarity(embedding_result0, embedding_result1, - expected_similarities[idx]) - def test_create_from_file_succeeds_with_valid_model_path(self): # Creates with default option and valid model file successfully. with _AudioEmbedder.create_from_model_path( @@ -176,7 +152,7 @@ class AudioEmbedderTest(parameterized.TestCase): embedding_result0_list = embedder.embed(self._read_wav_file(audio_file0)) embedding_result1_list = embedder.embed(self._read_wav_file(audio_file1)) - # Checks embeddings and cosine similarity. + # Checks embeddings. expected_result0_value, expected_result1_value = expected_first_values self._check_embedding_size(embedding_result0_list[0], quantize, expected_size) @@ -186,10 +162,8 @@ class AudioEmbedderTest(parameterized.TestCase): expected_result0_value) self._check_embedding_value(embedding_result1_list[0], expected_result1_value) - self._check_yamnet_result( - embedding_result0_list, - embedding_result1_list, - expected_similarities=_SPEECH_SIMILARITIES) + self.assertLen(embedding_result0_list, 5) + self.assertLen(embedding_result1_list, 5) def test_embed_with_yamnet_model_and_different_inputs(self): with _AudioEmbedder.create_from_model_path( @@ -200,10 +174,6 @@ class AudioEmbedderTest(parameterized.TestCase): self._read_wav_file(_TWO_HEADS_WAV_16K_MONO)) self.assertLen(embedding_result0_list, 5) self.assertLen(embedding_result1_list, 1) - self._check_cosine_similarity( - embedding_result0_list[0], - embedding_result1_list[0], - expected_similarity=0.09017) def test_missing_sample_rate_in_audio_clips_mode(self): options = _AudioEmbedderOptions( @@ -304,10 +274,8 @@ class AudioEmbedderTest(parameterized.TestCase): embedder.embed_async(audio_data, timestamp_ms) embedding_result1_list = embedding_result_list - self._check_yamnet_result( - embedding_result0_list, - embedding_result1_list, - expected_similarities=_SPEECH_SIMILARITIES) + self.assertLen(embedding_result0_list, 5) + self.assertLen(embedding_result1_list, 5) if __name__ == '__main__': diff --git a/mediapipe/tasks/web/audio/audio_embedder/BUILD b/mediapipe/tasks/web/audio/audio_embedder/BUILD index 68a7f7bd5..69b0761d3 100644 --- a/mediapipe/tasks/web/audio/audio_embedder/BUILD +++ b/mediapipe/tasks/web/audio/audio_embedder/BUILD @@ -21,10 +21,8 @@ mediapipe_ts_library( "//mediapipe/tasks/cc/components/containers/proto:embeddings_jspb_proto", "//mediapipe/tasks/cc/core/proto:base_options_jspb_proto", "//mediapipe/tasks/web/audio/core:audio_task_runner", - "//mediapipe/tasks/web/components/containers:embedding_result", "//mediapipe/tasks/web/components/processors:embedder_options", "//mediapipe/tasks/web/components/processors:embedder_result", - "//mediapipe/tasks/web/components/utils:cosine_similarity", "//mediapipe/tasks/web/core", "//mediapipe/tasks/web/core:embedder_options", "//mediapipe/tasks/web/core:task_runner", diff --git a/mediapipe/tasks/web/audio/audio_embedder/audio_embedder.ts b/mediapipe/tasks/web/audio/audio_embedder/audio_embedder.ts index 7d8c7a5b4..e6d659b9b 100644 --- a/mediapipe/tasks/web/audio/audio_embedder/audio_embedder.ts +++ b/mediapipe/tasks/web/audio/audio_embedder/audio_embedder.ts @@ -20,10 +20,8 @@ import {AudioEmbedderGraphOptions as AudioEmbedderGraphOptionsProto} from '../.. import {EmbeddingResult} from '../../../../tasks/cc/components/containers/proto/embeddings_pb'; import {BaseOptions as BaseOptionsProto} from '../../../../tasks/cc/core/proto/base_options_pb'; import {AudioTaskRunner} from '../../../../tasks/web/audio/core/audio_task_runner'; -import {Embedding} from '../../../../tasks/web/components/containers/embedding_result'; import {convertEmbedderOptionsToProto} from '../../../../tasks/web/components/processors/embedder_options'; import {convertFromEmbeddingResultProto} from '../../../../tasks/web/components/processors/embedder_result'; -import {computeCosineSimilarity} from '../../../../tasks/web/components/utils/cosine_similarity'; import {CachedGraphRunner} from '../../../../tasks/web/core/task_runner'; import {WasmFileset} from '../../../../tasks/web/core/wasm_fileset'; import {WasmModule} from '../../../../web/graph_runner/graph_runner'; @@ -145,19 +143,6 @@ export class AudioEmbedder extends AudioTaskRunner { return this.processAudioClip(audioData, sampleRate); } - /** - * Utility function to compute cosine similarity[1] between two `Embedding` - * objects. - * - * [1]: https://en.wikipedia.org/wiki/Cosine_similarity - * - * @throws if the embeddings are of different types(float vs. quantized), have - * different sizes, or have an L2-norm of 0. - */ - static cosineSimilarity(u: Embedding, v: Embedding): number { - return computeCosineSimilarity(u, v); - } - protected override process( audioData: Float32Array, sampleRate: number, timestampMs: number): AudioEmbedderResult[] { From 75576700ed8420bae763b69dc4b97df00ac5cd9b Mon Sep 17 00:00:00 2001 From: MediaPipe Team Date: Mon, 27 Feb 2023 11:08:58 -0800 Subject: [PATCH 27/47] Add Bitmap image capture capability to GlSurfaceViewRenderer. PiperOrigin-RevId: 512677893 --- .../components/GlSurfaceViewRenderer.java | 55 +++++++++++++++++++ 1 file changed, 55 insertions(+) diff --git a/mediapipe/java/com/google/mediapipe/components/GlSurfaceViewRenderer.java b/mediapipe/java/com/google/mediapipe/components/GlSurfaceViewRenderer.java index b836a38f3..23178b265 100644 --- a/mediapipe/java/com/google/mediapipe/components/GlSurfaceViewRenderer.java +++ b/mediapipe/java/com/google/mediapipe/components/GlSurfaceViewRenderer.java @@ -17,6 +17,7 @@ package com.google.mediapipe.components; import static java.lang.Math.max; import static java.lang.Math.min; +import android.graphics.Bitmap; import android.graphics.SurfaceTexture; import android.opengl.GLES11Ext; import android.opengl.GLES20; @@ -26,9 +27,12 @@ import android.util.Log; import com.google.mediapipe.framework.TextureFrame; import com.google.mediapipe.glutil.CommonShaders; import com.google.mediapipe.glutil.ShaderUtil; +import java.nio.ByteBuffer; +import java.nio.ByteOrder; import java.nio.FloatBuffer; import java.util.HashMap; import java.util.Map; +import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicReference; import javax.microedition.khronos.egl.EGLConfig; import javax.microedition.khronos.opengles.GL10; @@ -45,6 +49,13 @@ import javax.microedition.khronos.opengles.GL10; * {@link TextureFrame} (call {@link #setNextFrame(TextureFrame)}). */ public class GlSurfaceViewRenderer implements GLSurfaceView.Renderer { + /** + * Listener for Bitmap capture requests. + */ + public interface BitmapCaptureListener { + void onBitmapCaptured(Bitmap result); + } + private static final String TAG = "DemoRenderer"; private static final int ATTRIB_POSITION = 1; private static final int ATTRIB_TEXTURE_COORDINATE = 2; @@ -64,6 +75,25 @@ public class GlSurfaceViewRenderer implements GLSurfaceView.Renderer { private float[] textureTransformMatrix = new float[16]; private SurfaceTexture surfaceTexture = null; private final AtomicReference nextFrame = new AtomicReference<>(); + private final AtomicBoolean captureNextFrameBitmap = new AtomicBoolean(); + private BitmapCaptureListener bitmapCaptureListener; + + /** + * Sets the {@link BitmapCaptureListener}. + */ + public void setBitmapCaptureListener(BitmapCaptureListener bitmapCaptureListener) { + this.bitmapCaptureListener = bitmapCaptureListener; + } + + /** + * Request to capture Bitmap of the next frame. + * + * The result will be provided to the {@link BitmapCaptureListener} if one is set. Please note + * this is an expensive operation and the result may not be available for a while. + */ + public void captureNextFrameBitmap() { + captureNextFrameBitmap.set(true); + } @Override public void onSurfaceCreated(GL10 gl, EGLConfig config) { @@ -149,6 +179,31 @@ public class GlSurfaceViewRenderer implements GLSurfaceView.Renderer { GLES20.glDrawArrays(GLES20.GL_TRIANGLE_STRIP, 0, 4); ShaderUtil.checkGlError("glDrawArrays"); + + // Capture Bitmap if requested. + BitmapCaptureListener bitmapCaptureListener = this.bitmapCaptureListener; + if (captureNextFrameBitmap.getAndSet(false) && bitmapCaptureListener != null) { + int bitmapSize = surfaceWidth * surfaceHeight; + ByteBuffer byteBuffer = ByteBuffer.allocateDirect(bitmapSize * 4); + byteBuffer.order(ByteOrder.nativeOrder()); + GLES20.glReadPixels( + 0, 0, surfaceWidth, surfaceHeight, GLES20.GL_RGBA, GLES20.GL_UNSIGNED_BYTE, byteBuffer); + int[] pixelBuffer = new int[bitmapSize]; + byteBuffer.asIntBuffer().get(pixelBuffer); + for (int i = 0; i < bitmapSize; i++) { + // Swap R and B channels. + pixelBuffer[i] = + (pixelBuffer[i] & 0xff00ff00) + | ((pixelBuffer[i] & 0x000000ff) << 16) + | ((pixelBuffer[i] & 0x00ff0000) >> 16); + } + Bitmap bitmap = Bitmap.createBitmap(surfaceWidth, surfaceHeight, Bitmap.Config.ARGB_8888); + bitmap.setPixels( + pixelBuffer, /* offset= */bitmapSize - surfaceWidth, /* stride= */-surfaceWidth, + /* x= */0, /* y= */0, surfaceWidth, surfaceHeight); + bitmapCaptureListener.onBitmapCaptured(bitmap); + } + GLES20.glBindTexture(textureTarget, 0); ShaderUtil.checkGlError("unbind surfaceTexture"); From 5913e5dd1d080f9bf27ca61b61a7a65207c31357 Mon Sep 17 00:00:00 2001 From: MediaPipe Team Date: Mon, 27 Feb 2023 11:40:05 -0800 Subject: [PATCH 28/47] Internal change PiperOrigin-RevId: 512686714 --- mediapipe/tasks/ios/test/vision/core/BUILD | 12 +++++------- mediapipe/tasks/ios/vision/core/BUILD | 5 ----- 2 files changed, 5 insertions(+), 12 deletions(-) diff --git a/mediapipe/tasks/ios/test/vision/core/BUILD b/mediapipe/tasks/ios/test/vision/core/BUILD index ef95e4681..92954d065 100644 --- a/mediapipe/tasks/ios/test/vision/core/BUILD +++ b/mediapipe/tasks/ios/test/vision/core/BUILD @@ -34,16 +34,14 @@ objc_library( data = [ "//mediapipe/tasks/testdata/vision:test_images", ], - sdk_frameworks = [ - "CoreMedia", - "CoreVideo", - "CoreGraphics", - "UIKit", - "Accelerate", - ], deps = [ "//mediapipe/tasks/ios/common:MPPCommon", "//mediapipe/tasks/ios/vision/core:MPPImage", + "//third_party/apple_frameworks:Accelerate", + "//third_party/apple_frameworks:CoreGraphics", + "//third_party/apple_frameworks:CoreMedia", + "//third_party/apple_frameworks:CoreVideo", + "//third_party/apple_frameworks:UIKit", ], ) diff --git a/mediapipe/tasks/ios/vision/core/BUILD b/mediapipe/tasks/ios/vision/core/BUILD index 91b9078a5..1961ca6b0 100644 --- a/mediapipe/tasks/ios/vision/core/BUILD +++ b/mediapipe/tasks/ios/vision/core/BUILD @@ -11,11 +11,6 @@ objc_library( "-std=c++17", ], module_name = "MPPImage", - sdk_frameworks = [ - "CoreMedia", - "CoreVideo", - "UIKit", - ], deps = [ "//mediapipe/tasks/ios/common:MPPCommon", "//mediapipe/tasks/ios/common/utils:MPPCommonUtils", From aa61abe386c1396944494e13d81d4faac5a9d316 Mon Sep 17 00:00:00 2001 From: Jiuqiang Tang Date: Mon, 27 Feb 2023 11:41:25 -0800 Subject: [PATCH 29/47] Implement TensorsToImageCalculator. PiperOrigin-RevId: 512687103 --- .../cc/vision/face_stylizer/calculators/BUILD | 108 +++++ .../tensors_to_image_calculator.cc | 439 ++++++++++++++++++ .../tensors_to_image_calculator.proto | 31 ++ 3 files changed, 578 insertions(+) create mode 100644 mediapipe/tasks/cc/vision/face_stylizer/calculators/BUILD create mode 100644 mediapipe/tasks/cc/vision/face_stylizer/calculators/tensors_to_image_calculator.cc create mode 100644 mediapipe/tasks/cc/vision/face_stylizer/calculators/tensors_to_image_calculator.proto diff --git a/mediapipe/tasks/cc/vision/face_stylizer/calculators/BUILD b/mediapipe/tasks/cc/vision/face_stylizer/calculators/BUILD new file mode 100644 index 000000000..be1ce9b3d --- /dev/null +++ b/mediapipe/tasks/cc/vision/face_stylizer/calculators/BUILD @@ -0,0 +1,108 @@ +# Copyright 2023 The MediaPipe Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +load("//mediapipe/framework/port:build_config.bzl", "mediapipe_proto_library") + +licenses(["notice"]) + +package(default_visibility = ["//mediapipe/tasks:internal"]) + +mediapipe_proto_library( + name = "tensors_to_image_calculator_proto", + srcs = ["tensors_to_image_calculator.proto"], + deps = [ + "//mediapipe/framework:calculator_options_proto", + "//mediapipe/framework:calculator_proto", + "//mediapipe/gpu:gpu_origin_proto", + ], +) + +cc_library( + name = "tensors_to_image_calculator", + srcs = ["tensors_to_image_calculator.cc"], + copts = select({ + "//mediapipe:apple": [ + "-x objective-c++", + "-fobjc-arc", # enable reference-counting + ], + "//conditions:default": [], + }), + features = ["-layering_check"], # allow depending on tensor_to_image_calculator_gpu_deps + linkopts = select({ + "//mediapipe:apple": [ + "-framework CoreVideo", + "-framework MetalKit", + ], + "//conditions:default": [], + }), + deps = [ + ":tensors_to_image_calculator_cc_proto", + "@com_google_absl//absl/status", + "@com_google_absl//absl/strings", + "//mediapipe/framework:calculator_framework", + "//mediapipe/framework:calculator_options_cc_proto", + "//mediapipe/framework/api2:builder", + "//mediapipe/framework/api2:node", + "//mediapipe/framework/api2:packet", + "//mediapipe/framework/api2:port", + "//mediapipe/framework/formats:image", + "//mediapipe/framework/formats:tensor", + "//mediapipe/framework/port:logging", + "//mediapipe/framework/port:ret_check", + "//mediapipe/framework/port:status", + "//mediapipe/framework/port:vector", + "//mediapipe/gpu:gpu_origin_cc_proto", + ] + select({ + "//mediapipe/gpu:disable_gpu": [], + "//conditions:default": ["tensor_to_image_calculator_gpu_deps"], + }), + alwayslink = 1, +) + +cc_library( + name = "tensor_to_image_calculator_gpu_deps", + visibility = ["//visibility:private"], + deps = select({ + "//mediapipe:android": [ + "//mediapipe/gpu:gl_calculator_helper", + "//mediapipe/gpu:gl_quad_renderer", + "//mediapipe/gpu:gl_simple_shaders", + "//mediapipe/gpu:gpu_buffer", + "@org_tensorflow//tensorflow/lite/delegates/gpu:gl_delegate", + "@org_tensorflow//tensorflow/lite/delegates/gpu/common:util", + "@org_tensorflow//tensorflow/lite/delegates/gpu/gl:gl_program", + "@org_tensorflow//tensorflow/lite/delegates/gpu/gl:gl_shader", + "@org_tensorflow//tensorflow/lite/delegates/gpu/gl:gl_texture", + "@org_tensorflow//tensorflow/lite/delegates/gpu/gl/converters:util", + ], + "//mediapipe:ios": [ + "//mediapipe/gpu:MPPMetalHelper", + "//mediapipe/gpu:MPPMetalUtil", + "//mediapipe/gpu:gl_calculator_helper", + "//mediapipe/gpu:gpu_buffer", + ], + "//mediapipe:macos": [], + "//conditions:default": [ + "//mediapipe/gpu:gl_calculator_helper", + "//mediapipe/gpu:gl_quad_renderer", + "//mediapipe/gpu:gpu_buffer", + "@org_tensorflow//tensorflow/lite/delegates/gpu:gl_delegate", + "@org_tensorflow//tensorflow/lite/delegates/gpu/common:util", + "@org_tensorflow//tensorflow/lite/delegates/gpu/gl:gl_program", + "@org_tensorflow//tensorflow/lite/delegates/gpu/gl:gl_shader", + "@org_tensorflow//tensorflow/lite/delegates/gpu/gl:gl_texture", + "@org_tensorflow//tensorflow/lite/delegates/gpu/gl/converters:util", + ], + }), +) diff --git a/mediapipe/tasks/cc/vision/face_stylizer/calculators/tensors_to_image_calculator.cc b/mediapipe/tasks/cc/vision/face_stylizer/calculators/tensors_to_image_calculator.cc new file mode 100644 index 000000000..7d7443761 --- /dev/null +++ b/mediapipe/tasks/cc/vision/face_stylizer/calculators/tensors_to_image_calculator.cc @@ -0,0 +1,439 @@ +// Copyright 2023 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include + +#include "absl/status/status.h" +#include "absl/strings/str_cat.h" +#include "mediapipe/framework/api2/node.h" +#include "mediapipe/framework/api2/packet.h" +#include "mediapipe/framework/api2/port.h" +#include "mediapipe/framework/calculator_framework.h" +#include "mediapipe/framework/calculator_options.pb.h" +#include "mediapipe/framework/formats/image.h" +#include "mediapipe/framework/formats/tensor.h" +#include "mediapipe/framework/port/logging.h" +#include "mediapipe/framework/port/status.h" +#include "mediapipe/gpu/gpu_origin.pb.h" +#include "mediapipe/tasks/cc/vision/face_stylizer/calculators/tensors_to_image_calculator.pb.h" + +#if !MEDIAPIPE_DISABLE_GPU +#include "mediapipe/gpu/gpu_buffer.h" +#if MEDIAPIPE_METAL_ENABLED +#import +#import +#import + +#include "mediapipe/framework/formats/tensor_mtl_buffer_view.h" +#import "mediapipe/gpu/MPPMetalHelper.h" +#else +#include "mediapipe/gpu/gl_calculator_helper.h" +#include "mediapipe/gpu/gl_quad_renderer.h" +#include "mediapipe/gpu/gl_simple_shaders.h" +#if MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_31 +#include "tensorflow/lite/delegates/gpu/common/util.h" +#include "tensorflow/lite/delegates/gpu/gl/converters/util.h" +#include "tensorflow/lite/delegates/gpu/gl/gl_program.h" +#include "tensorflow/lite/delegates/gpu/gl/gl_shader.h" +#include "tensorflow/lite/delegates/gpu/gl/gl_texture.h" +#include "tensorflow/lite/delegates/gpu/gl_delegate.h" +#endif // MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_31 +#endif // MEDIAPIPE_METAL_ENABLED +#endif // !MEDIAPIPE_DISABLE_GPU + +namespace mediapipe { +namespace tasks { +namespace { + +using ::mediapipe::api2::Input; +using ::mediapipe::api2::Node; +using ::mediapipe::api2::Output; + +#if MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_31 +using ::tflite::gpu::gl::GlProgram; +using ::tflite::gpu::gl::GlShader; +#endif // MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_31 + +enum { ATTRIB_VERTEX, ATTRIB_TEXTURE_POSITION, NUM_ATTRIBUTES }; + +// Commonly used to compute the number of blocks to launch in a kernel. +static int NumGroups(const int size, const int group_size) { // NOLINT + return (size + group_size - 1) / group_size; +} + +} // namespace + +// Converts a MediaPipe tensor to a MediaPipe Image. +// +// Input streams: +// TENSORS - std::vector that only contains one element. +// +// Output streams: +// OUTPUT - mediapipe::Image. +// +// TODO: Enable TensorsToImageCalculator to run on CPU. +class TensorsToImageCalculator : public Node { + public: + static constexpr Input> kInputTensors{"TENSORS"}; + static constexpr Output kOutputImage{"IMAGE"}; + + MEDIAPIPE_NODE_CONTRACT(kInputTensors, kOutputImage); + + static absl::Status GetContract(CalculatorContract* cc); + absl::Status Open(CalculatorContext* cc); + absl::Status Process(CalculatorContext* cc); + absl::Status Close(CalculatorContext* cc); + + private: +#if !MEDIAPIPE_DISABLE_GPU +#if MEDIAPIPE_METAL_ENABLED + bool metal_initialized_ = false; + MPPMetalHelper* gpu_helper_ = nullptr; + id to_buffer_program_; + + absl::Status MetalSetup(CalculatorContext* cc); + absl::Status MetalProcess(CalculatorContext* cc); +#else + absl::Status GlSetup(CalculatorContext* cc); + + GlCalculatorHelper gl_helper_; + + bool gl_initialized_ = false; +#if MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_31 + std::unique_ptr gl_compute_program_; + const tflite::gpu::uint3 workgroup_size_ = {8, 8, 1}; +#else + GLuint program_ = 0; + std::unique_ptr gl_renderer_; +#endif // MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_31 +#endif // MEDIAPIPE_METAL_ENABLED +#endif // !MEDIAPIPE_DISABLE_GPU +}; +MEDIAPIPE_REGISTER_NODE(::mediapipe::tasks::TensorsToImageCalculator); + +absl::Status TensorsToImageCalculator::GetContract(CalculatorContract* cc) { +#if !MEDIAPIPE_DISABLE_GPU +#if MEDIAPIPE_METAL_ENABLED + MP_RETURN_IF_ERROR([MPPMetalHelper updateContract:cc]); +#else + return GlCalculatorHelper::UpdateContract(cc); +#endif // MEDIAPIPE_METAL_ENABLED +#endif // !MEDIAPIPE_DISABLE_GPU + return absl::OkStatus(); +} + +absl::Status TensorsToImageCalculator::Open(CalculatorContext* cc) { +#if !MEDIAPIPE_DISABLE_GPU +#if MEDIAPIPE_METAL_ENABLED + gpu_helper_ = [[MPPMetalHelper alloc] initWithCalculatorContext:cc]; + RET_CHECK(gpu_helper_); +#else + MP_RETURN_IF_ERROR(gl_helper_.Open(cc)); +#endif // MEDIAPIPE_METAL_ENABLED +#endif // !MEDIAPIPE_DISABLE_GPU + + return absl::OkStatus(); +} + +absl::Status TensorsToImageCalculator::Process(CalculatorContext* cc) { +#if !MEDIAPIPE_DISABLE_GPU +#if MEDIAPIPE_METAL_ENABLED + + return MetalProcess(cc); + +#else + + return gl_helper_.RunInGlContext([this, cc]() -> absl::Status { + if (!gl_initialized_) { + MP_RETURN_IF_ERROR(GlSetup(cc)); + gl_initialized_ = true; + } + + if (kInputTensors(cc).IsEmpty()) { + return absl::OkStatus(); + } + const auto& input_tensors = kInputTensors(cc).Get(); + RET_CHECK_EQ(input_tensors.size(), 1) + << "Expect 1 input tensor, but have " << input_tensors.size(); + const int tensor_width = input_tensors[0].shape().dims[2]; + const int tensor_height = input_tensors[0].shape().dims[1]; + +#if MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_31 + + auto out_texture = std::make_unique(); + MP_RETURN_IF_ERROR(CreateReadWriteRgbaImageTexture( + tflite::gpu::DataType::UINT8, // GL_RGBA8 + {tensor_width, tensor_height}, out_texture.get())); + + const int output_index = 0; + glBindImageTexture(output_index, out_texture->id(), 0, GL_FALSE, 0, + GL_WRITE_ONLY, GL_RGBA8); + + auto read_view = input_tensors[0].GetOpenGlBufferReadView(); + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, read_view.name()); + + const tflite::gpu::uint3 workload = {tensor_width, tensor_height, 1}; + const tflite::gpu::uint3 workgroups = + tflite::gpu::DivideRoundUp(workload, workgroup_size_); + + glUseProgram(gl_compute_program_->id()); + glUniform2i(glGetUniformLocation(gl_compute_program_->id(), "out_size"), + tensor_width, tensor_height); + + MP_RETURN_IF_ERROR(gl_compute_program_->Dispatch(workgroups)); + + auto texture_buffer = mediapipe::GlTextureBuffer::Wrap( + out_texture->target(), out_texture->id(), tensor_width, tensor_height, + mediapipe::GpuBufferFormat::kBGRA32, + [ptr = out_texture.release()]( + std::shared_ptr sync_token) mutable { + delete ptr; + }); + + auto output = + std::make_unique(std::move(texture_buffer)); + kOutputImage(cc).Send(Image(*output)); + ; + +#else + + if (!input_tensors[0].ready_as_opengl_texture_2d()) { + (void)input_tensors[0].GetCpuReadView(); + } + + auto output_texture = + gl_helper_.CreateDestinationTexture(tensor_width, tensor_height); + gl_helper_.BindFramebuffer(output_texture); // GL_TEXTURE0 + glActiveTexture(GL_TEXTURE1); + glBindTexture(GL_TEXTURE_2D, + input_tensors[0].GetOpenGlTexture2dReadView().name()); + + MP_RETURN_IF_ERROR(gl_renderer_->GlRender( + tensor_width, tensor_height, output_texture.width(), + output_texture.height(), mediapipe::FrameScaleMode::kStretch, + mediapipe::FrameRotation::kNone, + /*flip_horizontal=*/false, /*flip_vertical=*/false, + /*flip_texture=*/false)); + + glActiveTexture(GL_TEXTURE1); + glBindTexture(GL_TEXTURE_2D, 0); + + auto output = output_texture.GetFrame(); + kOutputImage(cc).Send(Image(*output)); + +#endif // MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_31 + + return mediapipe::OkStatus(); + }); + +#endif // MEDIAPIPE_METAL_ENABLED +#endif // !MEDIAPIPE_DISABLE_GPU + return absl::OkStatus(); +} + +absl::Status TensorsToImageCalculator::Close(CalculatorContext* cc) { +#if !MEDIAPIPE_DISABLE_GPU && !MEDIAPIPE_METAL_ENABLED + gl_helper_.RunInGlContext([this] { +#if MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_31 + gl_compute_program_.reset(); +#else + if (program_) glDeleteProgram(program_); + program_ = 0; +#endif // MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_31 + }); +#endif // !MEDIAPIPE_DISABLE_GPU && !MEDIAPIPE_METAL_ENABLED + return absl::OkStatus(); +} + +#if MEDIAPIPE_METAL_ENABLED + +absl::Status TensorsToImageCalculator::MetalProcess(CalculatorContext* cc) { + if (!metal_initialized_) { + MP_RETURN_IF_ERROR(MetalSetup(cc)); + metal_initialized_ = true; + } + + if (kInputTensors(cc).IsEmpty()) { + return absl::OkStatus(); + } + const auto& input_tensors = kInputTensors(cc).Get(); + RET_CHECK_EQ(input_tensors.size(), 1) + << "Expect 1 input tensor, but have " << input_tensors.size(); + const int tensor_width = input_tensors[0].shape().dims[2]; + const int tensor_height = input_tensors[0].shape().dims[1]; + + // TODO: Fix unused variable + [[maybe_unused]] id device = gpu_helper_.mtlDevice; + id command_buffer = [gpu_helper_ commandBuffer]; + command_buffer.label = @"TensorsToImageCalculatorConvert"; + id compute_encoder = + [command_buffer computeCommandEncoder]; + [compute_encoder setComputePipelineState:to_buffer_program_]; + + auto input_view = + mediapipe::MtlBufferView::GetReadView(input_tensors[0], command_buffer); + [compute_encoder setBuffer:input_view.buffer() offset:0 atIndex:0]; + + mediapipe::GpuBuffer output = + [gpu_helper_ mediapipeGpuBufferWithWidth:tensor_width + height:tensor_height]; + id dst_texture = [gpu_helper_ metalTextureWithGpuBuffer:output]; + [compute_encoder setTexture:dst_texture atIndex:1]; + + MTLSize threads_per_group = MTLSizeMake(8, 8, 1); + MTLSize threadgroups = + MTLSizeMake(NumGroups(tensor_width, 8), NumGroups(tensor_height, 8), 1); + [compute_encoder dispatchThreadgroups:threadgroups + threadsPerThreadgroup:threads_per_group]; + [compute_encoder endEncoding]; + [command_buffer commit]; + + kOutputImage(cc).Send(Image(output)); + return absl::OkStatus(); +} + +absl::Status TensorsToImageCalculator::MetalSetup(CalculatorContext* cc) { + id device = gpu_helper_.mtlDevice; + const std::string shader_source = + R"( + #include + + using namespace metal; + + kernel void convertKernel( + device float* in_buf [[ buffer(0) ]], + texture2d out_tex [[ texture(1) ]], + uint2 gid [[ thread_position_in_grid ]]) { + if (gid.x >= out_tex.get_width() || gid.y >= out_tex.get_height()) return; + uint linear_index = 3 * (gid.y * out_tex.get_width() + gid.x); + float4 out_value = float4(in_buf[linear_index], in_buf[linear_index + 1], in_buf[linear_index + 2], 1.0); + out_tex.write(out_value, gid); + } + )"; + NSString* library_source = + [NSString stringWithUTF8String:shader_source.c_str()]; + NSError* error = nil; + id library = + [device newLibraryWithSource:library_source options:nullptr error:&error]; + RET_CHECK(library != nil) << "Couldn't create shader library " + << [[error localizedDescription] UTF8String]; + id kernel_func = nil; + kernel_func = [library newFunctionWithName:@"convertKernel"]; + RET_CHECK(kernel_func != nil) << "Couldn't create kernel function."; + to_buffer_program_ = + [device newComputePipelineStateWithFunction:kernel_func error:&error]; + RET_CHECK(to_buffer_program_ != nil) << "Couldn't create pipeline state " << + [[error localizedDescription] UTF8String]; + + return mediapipe::OkStatus(); +} + +#endif // MEDIAPIPE_METAL_ENABLED + +#if !MEDIAPIPE_DISABLE_GPU && !MEDIAPIPE_METAL_ENABLED +absl::Status TensorsToImageCalculator::GlSetup(CalculatorContext* cc) { + std::string maybe_flip_y_define; +#if !defined(__APPLE__) + const auto& options = cc->Options(); + if (options.gpu_origin() != mediapipe::GpuOrigin::TOP_LEFT) { + maybe_flip_y_define = R"( + #define FLIP_Y_COORD + )"; + } +#endif // !defined(__APPLE__) + +#if MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_31 + + const std::string shader_header = + absl::StrCat(tflite::gpu::gl::GetShaderHeader(workgroup_size_), R"( + precision highp float; + layout(rgba8, binding = 0) writeonly uniform highp image2D output_texture; + uniform ivec2 out_size; + )"); + + const std::string shader_body = R"( + layout(std430, binding = 2) readonly buffer B0 { + float elements[]; + } input_data; // data tensor + + void main() { + int out_width = out_size.x; + int out_height = out_size.y; + + ivec2 gid = ivec2(gl_GlobalInvocationID.xy); + if (gid.x >= out_width || gid.y >= out_height) { return; } + int linear_index = 3 * (gid.y * out_width + gid.x); + +#ifdef FLIP_Y_COORD + int y_coord = out_height - gid.y - 1; +#else + int y_coord = gid.y; +#endif // defined(FLIP_Y_COORD) + + ivec2 out_coordinate = ivec2(gid.x, y_coord); + vec4 out_value = vec4(input_data.elements[linear_index], input_data.elements[linear_index + 1], input_data.elements[linear_index + 2], 1.0); + imageStore(output_texture, out_coordinate, out_value); + })"; + + const std::string shader_full = + absl::StrCat(shader_header, maybe_flip_y_define, shader_body); + + GlShader shader; + MP_RETURN_IF_ERROR( + GlShader::CompileShader(GL_COMPUTE_SHADER, shader_full, &shader)); + gl_compute_program_ = std::make_unique(); + MP_RETURN_IF_ERROR( + GlProgram::CreateWithShader(shader, gl_compute_program_.get())); + +#else + constexpr GLchar kFragColorOutputDeclaration[] = R"( + #ifdef GL_ES + #define fragColor gl_FragColor + #else + out vec4 fragColor; + #endif // defined(GL_ES); +)"; + + constexpr GLchar kBody[] = R"( + DEFAULT_PRECISION(mediump, float) + in vec2 sample_coordinate; + uniform sampler2D tensor; + void main() { +#ifdef FLIP_Y_COORD + float y_coord = 1.0 - sample_coordinate.y; +#else + float y_coord = sample_coordinate.y; +#endif // defined(FLIP_Y_COORD) + vec3 color = texture2D(tensor, vec2(sample_coordinate.x, y_coord)).rgb; + fragColor = vec4(color, 1.0); + } + )"; + + const std::string src = + absl::StrCat(mediapipe::kMediaPipeFragmentShaderPreamble, + kFragColorOutputDeclaration, maybe_flip_y_define, kBody); + gl_renderer_ = std::make_unique(); + MP_RETURN_IF_ERROR(gl_renderer_->GlSetup(src.c_str(), {"tensor"})); + +#endif // MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_31 + + return mediapipe::OkStatus(); +} + +#endif // !MEDIAPIPE_DISABLE_GPU && !MEDIAPIPE_METAL_ENABLED + +} // namespace tasks +} // namespace mediapipe diff --git a/mediapipe/tasks/cc/vision/face_stylizer/calculators/tensors_to_image_calculator.proto b/mediapipe/tasks/cc/vision/face_stylizer/calculators/tensors_to_image_calculator.proto new file mode 100644 index 000000000..08bd7b080 --- /dev/null +++ b/mediapipe/tasks/cc/vision/face_stylizer/calculators/tensors_to_image_calculator.proto @@ -0,0 +1,31 @@ +// Copyright 2023 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +syntax = "proto2"; + +package mediapipe.tasks; + +import "mediapipe/framework/calculator.proto"; +import "mediapipe/gpu/gpu_origin.proto"; + +message TensorsToImageCalculatorOptions { + extend mediapipe.CalculatorOptions { + optional TensorsToImageCalculatorOptions ext = 511831156; + } + + // For CONVENTIONAL mode for OpenGL, input image starts at bottom and needs + // to be flipped vertically as tensors are expected to start at top. + // (DEFAULT or unset interpreted as CONVENTIONAL.) + optional mediapipe.GpuOrigin.Mode gpu_origin = 1; +} From a60d67eb1007acceabedb00632c66d85aeeaae8a Mon Sep 17 00:00:00 2001 From: MediaPipe Team Date: Mon, 27 Feb 2023 12:19:24 -0800 Subject: [PATCH 30/47] Update ImageSegmenter API for image/video mode to have both callback API and returned result API. PiperOrigin-RevId: 512697585 --- .../vision/imagesegmenter/ImageSegmenter.java | 250 ++++++++++++++---- .../imagesegmenter/ImageSegmenterTest.java | 213 +++++++++------ 2 files changed, 334 insertions(+), 129 deletions(-) diff --git a/mediapipe/tasks/java/com/google/mediapipe/tasks/vision/imagesegmenter/ImageSegmenter.java b/mediapipe/tasks/java/com/google/mediapipe/tasks/vision/imagesegmenter/ImageSegmenter.java index 8d07b7c68..2ef1b57d8 100644 --- a/mediapipe/tasks/java/com/google/mediapipe/tasks/vision/imagesegmenter/ImageSegmenter.java +++ b/mediapipe/tasks/java/com/google/mediapipe/tasks/vision/imagesegmenter/ImageSegmenter.java @@ -47,10 +47,14 @@ import java.util.Optional; /** * Performs image segmentation on images. * - *

Note that, unlike other vision tasks, the output of ImageSegmenter is provided through a - * user-defined callback function even for the synchronous API. This makes it possible for - * ImageSegmenter to return the output masks without any copy. {@link ResultListener} must be set in - * the {@link ImageSegmenterOptions} for all {@link RunningMode}. + *

Note that, in addition to the standard segmentation API, {@link segment} and {@link + * segmentForVideo}, that take an input image and return the outputs, but involves deep copy of the + * returns, ImageSegmenter also supports the callback API, {@link segmentWithResultListener} and + * {@link segmentForVideoWithResultListener}, which allow you to access the outputs through zero + * copy. + * + *

The callback API is available for all {@link RunningMode} in ImageSegmenter. Set {@link + * ResultListener} in {@link ImageSegmenterOptions} properly to use the callback API. * *

The API expects a TFLite model with,TFLite Model Metadata.. @@ -85,6 +89,8 @@ public final class ImageSegmenter extends BaseVisionTaskApi { private static final String TASK_GRAPH_NAME = "mediapipe.tasks.vision.image_segmenter.ImageSegmenterGraph"; + private boolean hasResultListener = false; + /** * Creates an {@link ImageSegmenter} instance from an {@link ImageSegmenterOptions}. * @@ -116,8 +122,19 @@ public final class ImageSegmenter extends BaseVisionTaskApi { int imageListSize = PacketGetter.getImageListSize(packets.get(GROUPED_SEGMENTATION_OUT_STREAM_INDEX)); ByteBuffer[] buffersArray = new ByteBuffer[imageListSize]; + // If resultListener is not provided, the resulted MPImage is deep copied from mediapipe + // graph. If provided, the result MPImage is wrapping the mediapipe packet memory. + if (!segmenterOptions.resultListener().isPresent()) { + for (int i = 0; i < imageListSize; i++) { + buffersArray[i] = + ByteBuffer.allocateDirect( + width * height * (imageFormat == MPImage.IMAGE_FORMAT_VEC32F1 ? 4 : 1)); + } + } if (!PacketGetter.getImageList( - packets.get(GROUPED_SEGMENTATION_OUT_STREAM_INDEX), buffersArray, false)) { + packets.get(GROUPED_SEGMENTATION_OUT_STREAM_INDEX), + buffersArray, + !segmenterOptions.resultListener().isPresent())) { throw new MediaPipeException( MediaPipeException.StatusCode.INTERNAL.ordinal(), "There is an error getting segmented masks. It usually results from incorrect" @@ -143,7 +160,7 @@ public final class ImageSegmenter extends BaseVisionTaskApi { .build(); } }); - handler.setResultListener(segmenterOptions.resultListener()); + segmenterOptions.resultListener().ifPresent(handler::setResultListener); segmenterOptions.errorListener().ifPresent(handler::setErrorListener); TaskRunner runner = TaskRunner.create( @@ -158,7 +175,8 @@ public final class ImageSegmenter extends BaseVisionTaskApi { .setEnableFlowLimiting(segmenterOptions.runningMode() == RunningMode.LIVE_STREAM) .build(), handler); - return new ImageSegmenter(runner, segmenterOptions.runningMode()); + return new ImageSegmenter( + runner, segmenterOptions.runningMode(), segmenterOptions.resultListener().isPresent()); } /** @@ -168,16 +186,17 @@ public final class ImageSegmenter extends BaseVisionTaskApi { * @param taskRunner a {@link TaskRunner}. * @param runningMode a mediapipe vision task {@link RunningMode}. */ - private ImageSegmenter(TaskRunner taskRunner, RunningMode runningMode) { + private ImageSegmenter( + TaskRunner taskRunner, RunningMode runningMode, boolean hasResultListener) { super(taskRunner, runningMode, IMAGE_IN_STREAM_NAME, NORM_RECT_IN_STREAM_NAME); + this.hasResultListener = hasResultListener; } /** * Performs image segmentation on the provided single image with default image processing options, - * i.e. without any rotation applied, and the results will be available via the {@link - * ResultListener} provided in the {@link ImageSegmenterOptions}. Only use this method when the - * {@link ImageSegmenter} is created with {@link RunningMode.IMAGE}. TODO update java - * doc for input image format. + * i.e. without any rotation applied. Only use this method when the {@link ImageSegmenter} is + * created with {@link RunningMode.IMAGE}. TODO update java doc for input image + * format. * *

{@link ImageSegmenter} supports the following color space types: * @@ -186,19 +205,19 @@ public final class ImageSegmenter extends BaseVisionTaskApi { * * * @param image a MediaPipe {@link MPImage} object for processing. - * @throws MediaPipeException if there is an internal error. + * @throws MediaPipeException if there is an internal error. Or if {@link ImageSegmenter} is + * created with a {@link ResultListener}. */ - public void segment(MPImage image) { - segment(image, ImageProcessingOptions.builder().build()); + public ImageSegmenterResult segment(MPImage image) { + return segment(image, ImageProcessingOptions.builder().build()); } /** - * Performs image segmentation on the provided single image, and the results will be available via - * the {@link ResultListener} provided in the {@link ImageSegmenterOptions}. Only use this method - * when the {@link ImageSegmenter} is created with {@link RunningMode.IMAGE}. TODO - * update java doc for input image format. + * Performs image segmentation on the provided single image. Only use this method when the {@link + * ImageSegmenter} is created with {@link RunningMode.IMAGE}. TODO update java doc + * for input image format. * - *

{@link HandLandmarker} supports the following color space types: + *

{@link ImageSegmenter} supports the following color space types: * *