diff --git a/mediapipe/tasks/c/components/containers/embedding_result_converter.cc b/mediapipe/tasks/c/components/containers/embedding_result_converter.cc index cd4850c18..2e552801d 100644 --- a/mediapipe/tasks/c/components/containers/embedding_result_converter.cc +++ b/mediapipe/tasks/c/components/containers/embedding_result_converter.cc @@ -66,7 +66,7 @@ void CppConvertToEmbeddingResult( } } -void ConvertToCppEmbedding( +void CppConvertToCppEmbedding( const Embedding& in, // C struct as input mediapipe::tasks::components::containers::Embedding* out) { // Handle float embeddings @@ -85,7 +85,7 @@ void ConvertToCppEmbedding( // Copy head_name if it is present. if (in.head_name) { - out->head_name = std::make_optional(std::string(in.head_name)); + out->head_name = std::string(in.head_name); } } diff --git a/mediapipe/tasks/c/components/containers/embedding_result_converter.h b/mediapipe/tasks/c/components/containers/embedding_result_converter.h index 5ba4e4e2b..0955a949d 100644 --- a/mediapipe/tasks/c/components/containers/embedding_result_converter.h +++ b/mediapipe/tasks/c/components/containers/embedding_result_converter.h @@ -29,7 +29,7 @@ void CppConvertToEmbeddingResult( const mediapipe::tasks::components::containers::EmbeddingResult& in, EmbeddingResult* out); -void ConvertToCppEmbedding( +void CppConvertToCppEmbedding( const Embedding& in, mediapipe::tasks::components::containers::Embedding* out); diff --git a/mediapipe/tasks/c/text/text_embedder/text_embedder.cc b/mediapipe/tasks/c/text/text_embedder/text_embedder.cc index b19998985..6a016dd37 100644 --- a/mediapipe/tasks/c/text/text_embedder/text_embedder.cc +++ b/mediapipe/tasks/c/text/text_embedder/text_embedder.cc @@ -29,9 +29,8 @@ namespace mediapipe::tasks::c::text::text_embedder { namespace { - -using ::mediapipe::tasks::c::components::containers::ConvertToCppEmbedding; using ::mediapipe::tasks::c::components::containers::CppCloseEmbeddingResult; +using ::mediapipe::tasks::c::components::containers::CppConvertToCppEmbedding; using ::mediapipe::tasks::c::components::containers:: CppConvertToEmbeddingResult; using ::mediapipe::tasks::c::components::processors:: @@ -97,9 +96,9 @@ int CppTextEmbedderClose(void* embedder, char** error_msg) { int CppTextEmbedderCosineSimilarity(const Embedding& u, const Embedding& v, double* similarity, char** error_msg) { CppEmbedding cpp_u; - ConvertToCppEmbedding(u, &cpp_u); + CppConvertToCppEmbedding(u, &cpp_u); CppEmbedding cpp_v; - ConvertToCppEmbedding(v, &cpp_v); + CppConvertToCppEmbedding(v, &cpp_v); auto status_or_similarity = mediapipe::tasks::text::text_embedder::TextEmbedder::CosineSimilarity( cpp_u, cpp_v); @@ -137,8 +136,8 @@ int text_embedder_close(void* embedder, char** error_ms) { embedder, error_ms); } -int cosine_similarity(const Embedding& u, const Embedding& v, - double* similarity, char** error_msg) { +int text_embedder_cosine_similarity(const Embedding& u, const Embedding& v, + double* similarity, char** error_msg) { return mediapipe::tasks::c::text::text_embedder:: CppTextEmbedderCosineSimilarity(u, v, similarity, error_msg); } diff --git a/mediapipe/tasks/c/text/text_embedder/text_embedder.h b/mediapipe/tasks/c/text/text_embedder/text_embedder.h index 61a24044e..b737f47f1 100644 --- a/mediapipe/tasks/c/text/text_embedder/text_embedder.h +++ b/mediapipe/tasks/c/text/text_embedder/text_embedder.h @@ -72,8 +72,10 @@ MP_EXPORT int text_embedder_close(void* embedder, char** error_msg); // 0. // // [1]: https://en.wikipedia.org/wiki/Cosine_similarity -MP_EXPORT int cosine_similarity(const Embedding& u, const Embedding& v, - double* similarity, char** error_msg = nullptr); +MP_EXPORT int text_embedder_cosine_similarity(const Embedding& u, + const Embedding& v, + double* similarity, + char** error_msg); #ifdef __cplusplus } // extern C diff --git a/mediapipe/tasks/c/text/text_embedder/text_embedder_test.cc b/mediapipe/tasks/c/text/text_embedder/text_embedder_test.cc index bce4ffe38..57021773e 100644 --- a/mediapipe/tasks/c/text/text_embedder/text_embedder_test.cc +++ b/mediapipe/tasks/c/text/text_embedder/text_embedder_test.cc @@ -33,10 +33,10 @@ constexpr char kTestDataDirectory[] = "/mediapipe/tasks/testdata/text/"; constexpr char kTestBertModelPath[] = "mobilebert_embedding_with_metadata.tflite"; constexpr char kTestString0[] = - "When you go to this restaurant, they hold the pancake upside-down " - "before they hand it to you. It's a great gimmick."; + "When you go to this restaurant, they hold the pancake upside-down " + "before they hand it to you. It's a great gimmick."; constexpr char kTestString1[] = - "Let's make a plan to steal the declaration of independence."; + "Let's make a plan to steal the declaration of independence."; constexpr float kPrecision = 1e-3; std::string GetFullPath(absl::string_view file_name) { @@ -81,14 +81,16 @@ TEST(TextEmbedderTest, SucceedsWithCosineSimilarity) { // Extract both embeddings. TextEmbedderResult result0; - text_embedder_embed(embedder, kTestString0, &result0, /* error_msg */ nullptr); + text_embedder_embed(embedder, kTestString0, &result0, + /* error_msg */ nullptr); TextEmbedderResult result1; - text_embedder_embed(embedder, kTestString1, &result1, /* error_msg */ nullptr); + text_embedder_embed(embedder, kTestString1, &result1, + /* error_msg */ nullptr); // Check cosine similarity. double similarity; - cosine_similarity(result0.embeddings[0], result1.embeddings[0], - &similarity); + text_embedder_cosine_similarity(result0.embeddings[0], result1.embeddings[0], + &similarity, nullptr); double expected_similarity = 0.98077; EXPECT_LE(abs(similarity - expected_similarity), kPrecision); text_embedder_close(embedder, /* error_msg */ nullptr); diff --git a/mediapipe/tasks/c/vision/core/BUILD b/mediapipe/tasks/c/vision/core/BUILD new file mode 100644 index 000000000..7d3b0f9a9 --- /dev/null +++ b/mediapipe/tasks/c/vision/core/BUILD @@ -0,0 +1,22 @@ +# Copyright 2023 The MediaPipe Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +package(default_visibility = ["//mediapipe/tasks:internal"]) + +licenses(["notice"]) + +cc_library( + name = "common", + hdrs = ["common.h"], +) diff --git a/mediapipe/tasks/c/vision/core/common.h b/mediapipe/tasks/c/vision/core/common.h new file mode 100644 index 000000000..8e88e2244 --- /dev/null +++ b/mediapipe/tasks/c/vision/core/common.h @@ -0,0 +1,69 @@ +/* Copyright 2023 The MediaPipe Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + + +#ifndef MEDIAPIPE_TASKS_C_VISION_CORE_COMMON_H_ +#define MEDIAPIPE_TASKS_C_VISION_CORE_COMMON_H_ + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +// Supported image formats. +enum ImageFormat { + UNKNOWN = 0, + SRGB = 1, + SRGBA = 2, + GRAY8 = 3, + SBGRA = 11 // compatible with Flutter `bgra8888` format. +}; + +// Supported processing modes. +enum RunningMode { + IMAGE = 1, + VIDEO = 2, + LIVE_STREAM = 3, +}; + +// Structure to hold image frame. +struct ImageFrame { + enum ImageFormat format; + const uint8_t* image_buffer; + int width; + int height; +}; + +// TODO: Add GPU buffer declaration and processing logic for it. +struct GpuBuffer { + int width; + int height; +}; + +// The object to contain an image, realizes `OneOf` concept. +struct MpImage { + enum { IMAGE_FRAME, GPU_BUFFER } type; + union { + struct ImageFrame image_frame; + struct GpuBuffer gpu_buffer; + }; +}; + +#ifdef __cplusplus +} // extern C +#endif + +#endif // MEDIAPIPE_TASKS_C_VISION_CORE_COMMON_H_ diff --git a/mediapipe/tasks/c/vision/image_classifier/BUILD b/mediapipe/tasks/c/vision/image_classifier/BUILD index a2c7ca290..08a0801d3 100644 --- a/mediapipe/tasks/c/vision/image_classifier/BUILD +++ b/mediapipe/tasks/c/vision/image_classifier/BUILD @@ -30,6 +30,7 @@ cc_library( "//mediapipe/tasks/c/components/processors:classifier_options_converter", "//mediapipe/tasks/c/core:base_options", "//mediapipe/tasks/c/core:base_options_converter", + "//mediapipe/tasks/c/vision/core:common", "//mediapipe/tasks/cc/vision/core:running_mode", "//mediapipe/tasks/cc/vision/image_classifier", "//mediapipe/tasks/cc/vision/utils:image_utils", diff --git a/mediapipe/tasks/c/vision/image_classifier/image_classifier.h b/mediapipe/tasks/c/vision/image_classifier/image_classifier.h index 51316bcbe..8d3231c2e 100644 --- a/mediapipe/tasks/c/vision/image_classifier/image_classifier.h +++ b/mediapipe/tasks/c/vision/image_classifier/image_classifier.h @@ -16,11 +16,10 @@ limitations under the License. #ifndef MEDIAPIPE_TASKS_C_VISION_IMAGE_CLASSIFIER_IMAGE_CLASSIFIER_H_ #define MEDIAPIPE_TASKS_C_VISION_IMAGE_CLASSIFIER_IMAGE_CLASSIFIER_H_ -#include - #include "mediapipe/tasks/c/components/containers/classification_result.h" #include "mediapipe/tasks/c/components/processors/classifier_options.h" #include "mediapipe/tasks/c/core/base_options.h" +#include "mediapipe/tasks/c/vision/core/common.h" #ifndef MP_EXPORT #define MP_EXPORT __attribute__((visibility("default"))) @@ -32,46 +31,7 @@ extern "C" { typedef ClassificationResult ImageClassifierResult; -// Supported image formats. -enum ImageFormat { - UNKNOWN = 0, - SRGB = 1, - SRGBA = 2, - GRAY8 = 3, - SBGRA = 11 // compatible with Flutter `bgra8888` format. -}; - -// Supported processing modes. -enum RunningMode { - IMAGE = 1, - VIDEO = 2, - LIVE_STREAM = 3, -}; - -// Structure to hold image frame. -struct ImageFrame { - enum ImageFormat format; - const uint8_t* image_buffer; - int width; - int height; -}; - -// TODO: Add GPU buffer declaration and proccessing logic for it. -struct GpuBuffer { - int width; - int height; -}; - -// The object to contain an image, realizes `OneOf` concept. -struct MpImage { - enum { IMAGE_FRAME, GPU_BUFFER } type; - union { - struct ImageFrame image_frame; - struct GpuBuffer gpu_buffer; - }; -}; - -// The options for configuring a Mediapipe image classifier task. +// The options for configuring a MediaPipe image classifier task. struct ImageClassifierOptions { // Base options for configuring MediaPipe Tasks, such as specifying the model // file with metadata, accelerator options, op resolver, etc. @@ -122,12 +82,39 @@ MP_EXPORT int image_classifier_classify_image(void* classifier, ImageClassifierResult* result, char** error_msg); +// Performs image classification on the provided video frame. +// Only use this method when the ImageClassifier is created with the video +// running mode. +// The image can be of any size with format RGB or RGBA. It's required to +// provide the video frame's timestamp (in milliseconds). The input timestamps +// must be monotonically increasing. +// If an error occurs, returns an error code and sets the error parameter to an +// an error message (if `error_msg` is not `nullptr`). You must free the memory +// allocated for the error message. MP_EXPORT int image_classifier_classify_for_video(void* classifier, const MpImage* image, int64_t timestamp_ms, ImageClassifierResult* result, char** error_msg); +// Sends live image data to image classification, and the results will be +// available via the "result_callback" provided in the ImageClassifierOptions. +// Only use this method when the ImageClassifier is created with the live +// stream running mode. +// The image can be of any size with format RGB or RGBA. It's required to +// provide a timestamp (in milliseconds) to indicate when the input image is +// sent to the object detector. The input timestamps must be monotonically +// increasing. +// The "result_callback" provides: +// - The classification results as an ImageClassifierResult object. +// - The const reference to the corresponding input image that the image +// classifier runs on. Note that the const reference to the image will no +// longer be valid when the callback returns. To access the image data +// outside of the callback, callers need to make a copy of the image. +// - The input timestamp in milliseconds. +// If an error occurs, returns an error code and sets the error parameter to an +// an error message (if `error_msg` is not `nullptr`). You must free the memory +// allocated for the error message. MP_EXPORT int image_classifier_classify_async(void* classifier, const MpImage* image, int64_t timestamp_ms, diff --git a/mediapipe/tasks/c/vision/image_embedder/BUILD b/mediapipe/tasks/c/vision/image_embedder/BUILD index 3300b4a0a..69f7cb08b 100644 --- a/mediapipe/tasks/c/vision/image_embedder/BUILD +++ b/mediapipe/tasks/c/vision/image_embedder/BUILD @@ -30,6 +30,8 @@ cc_library( "//mediapipe/tasks/c/components/processors:embedder_options_converter", "//mediapipe/tasks/c/core:base_options", "//mediapipe/tasks/c/core:base_options_converter", + "//mediapipe/tasks/c/vision/core:common", + "//mediapipe/tasks/cc/vision/core:running_mode", "//mediapipe/tasks/cc/vision/image_embedder", "//mediapipe/tasks/cc/vision/utils:image_utils", "@com_google_absl//absl/log:absl_log", diff --git a/mediapipe/tasks/c/vision/image_embedder/image_embedder.cc b/mediapipe/tasks/c/vision/image_embedder/image_embedder.cc index ee042ba00..041962004 100644 --- a/mediapipe/tasks/c/vision/image_embedder/image_embedder.cc +++ b/mediapipe/tasks/c/vision/image_embedder/image_embedder.cc @@ -36,8 +36,8 @@ namespace mediapipe::tasks::c::vision::image_embedder { namespace { -using ::mediapipe::tasks::c::components::containers::ConvertToCppEmbedding; using ::mediapipe::tasks::c::components::containers::CppCloseEmbeddingResult; +using ::mediapipe::tasks::c::components::containers::CppConvertToCppEmbedding; using ::mediapipe::tasks::c::components::containers:: CppConvertToEmbeddingResult; using ::mediapipe::tasks::c::components::processors:: @@ -235,9 +235,9 @@ int CppImageEmbedderClose(void* embedder, char** error_msg) { int CppImageEmbedderCosineSimilarity(const Embedding& u, const Embedding& v, double* similarity, char** error_msg) { CppEmbedding cpp_u; - ConvertToCppEmbedding(u, &cpp_u); + CppConvertToCppEmbedding(u, &cpp_u); CppEmbedding cpp_v; - ConvertToCppEmbedding(v, &cpp_v); + CppConvertToCppEmbedding(v, &cpp_v); auto status_or_similarity = mediapipe::tasks::vision::image_embedder::ImageEmbedder::CosineSimilarity( cpp_u, cpp_v); @@ -291,8 +291,8 @@ int image_embedder_close(void* embedder, char** error_msg) { embedder, error_msg); } -int cosine_similarity(const Embedding& u, const Embedding& v, - double* similarity, char** error_msg) { +int image_embedder_cosine_similarity(const Embedding& u, const Embedding& v, + double* similarity, char** error_msg) { return mediapipe::tasks::c::vision::image_embedder:: CppImageEmbedderCosineSimilarity(u, v, similarity, error_msg); } diff --git a/mediapipe/tasks/c/vision/image_embedder/image_embedder.h b/mediapipe/tasks/c/vision/image_embedder/image_embedder.h index 3fdb448f0..f198d9810 100644 --- a/mediapipe/tasks/c/vision/image_embedder/image_embedder.h +++ b/mediapipe/tasks/c/vision/image_embedder/image_embedder.h @@ -21,6 +21,7 @@ limitations under the License. #include "mediapipe/tasks/c/components/containers/embedding_result.h" #include "mediapipe/tasks/c/components/processors/embedder_options.h" #include "mediapipe/tasks/c/core/base_options.h" +#include "mediapipe/tasks/c/vision/core/common.h" #ifndef MP_EXPORT #define MP_EXPORT __attribute__((visibility("default"))) @@ -32,45 +33,6 @@ extern "C" { typedef EmbeddingResult ImageEmbedderResult; -// Supported image formats. -enum ImageFormat { - UNKNOWN = 0, - SRGB = 1, - SRGBA = 2, - GRAY8 = 3, - SBGRA = 11 // compatible with Flutter `bgra8888` format. -}; - -// Supported processing modes. -enum RunningMode { - IMAGE = 1, - VIDEO = 2, - LIVE_STREAM = 3, -}; - -// Structure to hold image frame. -struct ImageFrame { - enum ImageFormat format; - const uint8_t* image_buffer; - int width; - int height; -}; - -// TODO: Add GPU buffer declaration and proccessing logic for it. -struct GpuBuffer { - int width; - int height; -}; - -// The object to contain an image, realizes `OneOf` concept. -struct MpImage { - enum { IMAGE_FRAME, GPU_BUFFER } type; - union { - struct ImageFrame image_frame; - struct GpuBuffer gpu_buffer; - }; -}; - // The options for configuring a MediaPipe image embedder task. struct ImageEmbedderOptions { // Base options for configuring MediaPipe Tasks, such as specifying the model @@ -121,12 +83,40 @@ MP_EXPORT int image_embedder_embed_image(void* embedder, const MpImage* image, ImageEmbedderResult* result, char** error_msg); +// Performs embedding extraction on the provided video frame. +// Only use this method when the ImageEmbedder is created with the video +// running mode. +// The image can be of any size with format RGB or RGBA. It's required to +// provide the video frame's timestamp (in milliseconds). The input timestamps +// must be monotonically increasing. +// If an error occurs, returns an error code and sets the error parameter to an +// an error message (if `error_msg` is not `nullptr`). You must free the memory +// allocated for the error message. MP_EXPORT int image_embedder_embed_for_video(void* embedder, const MpImage* image, int64_t timestamp_ms, ImageEmbedderResult* result, char** error_msg); +// Sends live image data to embedder, and the results will be available via +// the "result_callback" provided in the ImageEmbedderOptions. +// Only use this method when the ImageEmbedder is created with the live +// stream running mode. +// The image can be of any size with format RGB or RGBA. It's required to +// provide a timestamp (in milliseconds) to indicate when the input image is +// sent to the object detector. The input timestamps must be monotonically +// increasing. +// The "result_callback" provides +// - The embedding results as a +// components::containers::proto::EmbeddingResult object. +// - The const reference to the corresponding input image that the image +// embedder runs on. Note that the const reference to the image will no +// longer be valid when the callback returns. To access the image data +// outside of the callback, callers need to make a copy of the image. +// - The input timestamp in milliseconds. +// If an error occurs, returns an error code and sets the error parameter to an +// an error message (if `error_msg` is not `nullptr`). You must free the memory +// allocated for the error message. MP_EXPORT int image_embedder_embed_async(void* embedder, const MpImage* image, int64_t timestamp_ms, char** error_msg); @@ -147,8 +137,10 @@ MP_EXPORT int image_embedder_close(void* embedder, char** error_msg); // 0. // // [1]: https://en.wikipedia.org/wiki/Cosine_similarity -MP_EXPORT int cosine_similarity(const Embedding& u, const Embedding& v, - double* similarity, char** error_msg); +MP_EXPORT int image_embedder_cosine_similarity(const Embedding& u, + const Embedding& v, + double* similarity, + char** error_msg); #ifdef __cplusplus } // extern C diff --git a/mediapipe/tasks/c/vision/image_embedder/image_embedder_test.cc b/mediapipe/tasks/c/vision/image_embedder/image_embedder_test.cc index 52a8b523a..c92a2427d 100644 --- a/mediapipe/tasks/c/vision/image_embedder/image_embedder_test.cc +++ b/mediapipe/tasks/c/vision/image_embedder/image_embedder_test.cc @@ -143,8 +143,9 @@ TEST(ImageEmbedderTest, SucceedsWithCosineSimilarity) { CheckMobileNetV3Result(crop_result, false); // Check cosine similarity. double similarity; - cosine_similarity(image_result.embeddings[0], crop_result.embeddings[0], - &similarity, /* error_msg */ nullptr); + image_embedder_cosine_similarity(image_result.embeddings[0], + crop_result.embeddings[0], &similarity, + /* error_msg */ nullptr); double expected_similarity = 0.925519; EXPECT_LE(abs(similarity - expected_similarity), kPrecision); image_embedder_close_result(&image_result);