Resolved issues and added a common header to hold all the necessary structures for the vision tasks

2023-11-07 14:23:15 -08:00 · 2023-11-07 14:23:15 -08:00 · c442d6117e
commit c442d6117e
parent 197358dfee
13 changed files with 185 additions and 108 deletions
--- a/mediapipe/tasks/c/components/containers/embedding_result_converter.cc
+++ b/mediapipe/tasks/c/components/containers/embedding_result_converter.cc
@ -66,7 +66,7 @@ void CppConvertToEmbeddingResult(
  }
 }
-void ConvertToCppEmbedding(
+void CppConvertToCppEmbedding(
    const Embedding& in,  // C struct as input
    mediapipe::tasks::components::containers::Embedding* out) {
  // Handle float embeddings
@ -85,7 +85,7 @@ void ConvertToCppEmbedding(
  // Copy head_name if it is present.
  if (in.head_name) {
-    out->head_name = std::make_optional(std::string(in.head_name));
+    out->head_name = std::string(in.head_name);
  }
 }
--- a/mediapipe/tasks/c/components/containers/embedding_result_converter.h
+++ b/mediapipe/tasks/c/components/containers/embedding_result_converter.h
@ -29,7 +29,7 @@ void CppConvertToEmbeddingResult(
    const mediapipe::tasks::components::containers::EmbeddingResult& in,
    EmbeddingResult* out);
-void ConvertToCppEmbedding(
+void CppConvertToCppEmbedding(
    const Embedding& in,
    mediapipe::tasks::components::containers::Embedding* out);
--- a/mediapipe/tasks/c/text/text_embedder/text_embedder.cc
+++ b/mediapipe/tasks/c/text/text_embedder/text_embedder.cc
@ -29,9 +29,8 @@ namespace mediapipe::tasks::c::text::text_embedder {
 namespace {
 using ::mediapipe::tasks::c::components::containers::ConvertToCppEmbedding;
 using ::mediapipe::tasks::c::components::containers::CppCloseEmbeddingResult;
 using ::mediapipe::tasks::c::components::containers::CppConvertToCppEmbedding;
 using ::mediapipe::tasks::c::components::containers::
    CppConvertToEmbeddingResult;
 using ::mediapipe::tasks::c::components::processors::
@ -97,9 +96,9 @@ int CppTextEmbedderClose(void* embedder, char** error_msg) {
 int CppTextEmbedderCosineSimilarity(const Embedding& u, const Embedding& v,
                                    double* similarity, char** error_msg) {
  CppEmbedding cpp_u;
-  ConvertToCppEmbedding(u, &cpp_u);
+  CppConvertToCppEmbedding(u, &cpp_u);
  CppEmbedding cpp_v;
-  ConvertToCppEmbedding(v, &cpp_v);
+  CppConvertToCppEmbedding(v, &cpp_v);
  auto status_or_similarity =
      mediapipe::tasks::text::text_embedder::TextEmbedder::CosineSimilarity(
          cpp_u, cpp_v);
@ -137,8 +136,8 @@ int text_embedder_close(void* embedder, char** error_ms) {
      embedder, error_ms);
 }
-int cosine_similarity(const Embedding& u, const Embedding& v,
+int text_embedder_cosine_similarity(const Embedding& u, const Embedding& v,
-                      double* similarity, char** error_msg) {
+                                    double* similarity, char** error_msg) {
  return mediapipe::tasks::c::text::text_embedder::
      CppTextEmbedderCosineSimilarity(u, v, similarity, error_msg);
 }
--- a/mediapipe/tasks/c/text/text_embedder/text_embedder.h
+++ b/mediapipe/tasks/c/text/text_embedder/text_embedder.h
@ -72,8 +72,10 @@ MP_EXPORT int text_embedder_close(void* embedder, char** error_msg);
 // 0.
 //
 // [1]: https://en.wikipedia.org/wiki/Cosine_similarity
-MP_EXPORT int cosine_similarity(const Embedding& u, const Embedding& v,
+MP_EXPORT int text_embedder_cosine_similarity(const Embedding& u,
-                                double* similarity, char** error_msg = nullptr);
+                                              const Embedding& v,
                                              double* similarity,
                                              char** error_msg);
 #ifdef __cplusplus
 }  // extern C
--- a/mediapipe/tasks/c/text/text_embedder/text_embedder_test.cc
+++ b/mediapipe/tasks/c/text/text_embedder/text_embedder_test.cc
@ -33,10 +33,10 @@ constexpr char kTestDataDirectory[] = "/mediapipe/tasks/testdata/text/";
 constexpr char kTestBertModelPath[] =
    "mobilebert_embedding_with_metadata.tflite";
 constexpr char kTestString0[] =
-        "When you go to this restaurant, they hold the pancake upside-down "
+    "When you go to this restaurant, they hold the pancake upside-down "
-        "before they hand it to you. It's a great gimmick.";
+    "before they hand it to you. It's a great gimmick.";
 constexpr char kTestString1[] =
-        "Let's make a plan to steal the declaration of independence.";
+    "Let's make a plan to steal the declaration of independence.";
 constexpr float kPrecision = 1e-3;
 std::string GetFullPath(absl::string_view file_name) {
@ -81,14 +81,16 @@ TEST(TextEmbedderTest, SucceedsWithCosineSimilarity) {
  // Extract both embeddings.
  TextEmbedderResult result0;
-  text_embedder_embed(embedder, kTestString0, &result0, /* error_msg */ nullptr);
+  text_embedder_embed(embedder, kTestString0, &result0,
                      /* error_msg */ nullptr);
  TextEmbedderResult result1;
-  text_embedder_embed(embedder, kTestString1, &result1, /* error_msg */ nullptr);
+  text_embedder_embed(embedder, kTestString1, &result1,
                      /* error_msg */ nullptr);
  // Check cosine similarity.
  double similarity;
-  cosine_similarity(result0.embeddings[0], result1.embeddings[0],
+  text_embedder_cosine_similarity(result0.embeddings[0], result1.embeddings[0],
-                    &similarity);
+                                  &similarity, nullptr);
  double expected_similarity = 0.98077;
  EXPECT_LE(abs(similarity - expected_similarity), kPrecision);
  text_embedder_close(embedder, /* error_msg */ nullptr);
--- a/mediapipe/tasks/c/vision/core/BUILD
+++ b/mediapipe/tasks/c/vision/core/BUILD
@ -0,0 +1,22 @@
 # Copyright 2023 The MediaPipe Authors.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #      http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 package(default_visibility = ["//mediapipe/tasks:internal"])
 licenses(["notice"])
 cc_library(
    name = "common",
    hdrs = ["common.h"],
 )
--- a/mediapipe/tasks/c/vision/core/common.h
+++ b/mediapipe/tasks/c/vision/core/common.h
@ -0,0 +1,69 @@
 /* Copyright 2023 The MediaPipe Authors.
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
    http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 #ifndef MEDIAPIPE_TASKS_C_VISION_CORE_COMMON_H_
 #define MEDIAPIPE_TASKS_C_VISION_CORE_COMMON_H_
 #include <cstdint>
 #ifdef __cplusplus
 extern "C" {
 #endif
 // Supported image formats.
 enum ImageFormat {
    UNKNOWN = 0,
    SRGB = 1,
    SRGBA = 2,
    GRAY8 = 3,
    SBGRA = 11  // compatible with Flutter `bgra8888` format.
 };
 // Supported processing modes.
 enum RunningMode {
    IMAGE = 1,
    VIDEO = 2,
    LIVE_STREAM = 3,
 };
 // Structure to hold image frame.
 struct ImageFrame {
    enum ImageFormat format;
    const uint8_t* image_buffer;
    int width;
    int height;
 };
 // TODO: Add GPU buffer declaration and processing logic for it.
 struct GpuBuffer {
    int width;
    int height;
 };
 // The object to contain an image, realizes `OneOf` concept.
 struct MpImage {
    enum { IMAGE_FRAME, GPU_BUFFER } type;
    union {
        struct ImageFrame image_frame;
        struct GpuBuffer gpu_buffer;
    };
 };
 #ifdef __cplusplus
 }  // extern C
 #endif
 #endif  // MEDIAPIPE_TASKS_C_VISION_CORE_COMMON_H_
--- a/mediapipe/tasks/c/vision/image_classifier/BUILD
+++ b/mediapipe/tasks/c/vision/image_classifier/BUILD
@ -30,6 +30,7 @@ cc_library(
        "//mediapipe/tasks/c/components/processors:classifier_options_converter",
        "//mediapipe/tasks/c/core:base_options",
        "//mediapipe/tasks/c/core:base_options_converter",
        "//mediapipe/tasks/c/vision/core:common",
        "//mediapipe/tasks/cc/vision/core:running_mode",
        "//mediapipe/tasks/cc/vision/image_classifier",
        "//mediapipe/tasks/cc/vision/utils:image_utils",
--- a/mediapipe/tasks/c/vision/image_classifier/image_classifier.h
+++ b/mediapipe/tasks/c/vision/image_classifier/image_classifier.h
@ -16,11 +16,10 @@ limitations under the License.
 #ifndef MEDIAPIPE_TASKS_C_VISION_IMAGE_CLASSIFIER_IMAGE_CLASSIFIER_H_
 #define MEDIAPIPE_TASKS_C_VISION_IMAGE_CLASSIFIER_IMAGE_CLASSIFIER_H_
 #include <cstdint>
 #include "mediapipe/tasks/c/components/containers/classification_result.h"
 #include "mediapipe/tasks/c/components/processors/classifier_options.h"
 #include "mediapipe/tasks/c/core/base_options.h"
 #include "mediapipe/tasks/c/vision/core/common.h"
 #ifndef MP_EXPORT
 #define MP_EXPORT __attribute__((visibility("default")))
@ -32,46 +31,7 @@ extern "C" {
 typedef ClassificationResult ImageClassifierResult;
-// Supported image formats.
+// The options for configuring a MediaPipe image classifier task.
 enum ImageFormat {
  UNKNOWN = 0,
  SRGB = 1,
  SRGBA = 2,
  GRAY8 = 3,
  SBGRA = 11  // compatible with Flutter `bgra8888` format.
 };
 // Supported processing modes.
 enum RunningMode {
  IMAGE = 1,
  VIDEO = 2,
  LIVE_STREAM = 3,
 };
 // Structure to hold image frame.
 struct ImageFrame {
  enum ImageFormat format;
  const uint8_t* image_buffer;
  int width;
  int height;
 };
 // TODO: Add GPU buffer declaration and proccessing logic for it.
 struct GpuBuffer {
  int width;
  int height;
 };
 // The object to contain an image, realizes `OneOf` concept.
 struct MpImage {
  enum { IMAGE_FRAME, GPU_BUFFER } type;
  union {
    struct ImageFrame image_frame;
    struct GpuBuffer gpu_buffer;
  };
 };
 // The options for configuring a Mediapipe image classifier task.
 struct ImageClassifierOptions {
  // Base options for configuring MediaPipe Tasks, such as specifying the model
  // file with metadata, accelerator options, op resolver, etc.
@ -122,12 +82,39 @@ MP_EXPORT int image_classifier_classify_image(void* classifier,
                                              ImageClassifierResult* result,
                                              char** error_msg);
 // Performs image classification on the provided video frame.
 // Only use this method when the ImageClassifier is created with the video
 // running mode.
 // The image can be of any size with format RGB or RGBA. It's required to
 // provide the video frame's timestamp (in milliseconds). The input timestamps
 // must be monotonically increasing.
 // If an error occurs, returns an error code and sets the error parameter to an
 // an error message (if `error_msg` is not `nullptr`). You must free the memory
 // allocated for the error message.
 MP_EXPORT int image_classifier_classify_for_video(void* classifier,
                                                  const MpImage* image,
                                                  int64_t timestamp_ms,
                                                  ImageClassifierResult* result,
                                                  char** error_msg);
 // Sends live image data to image classification, and the results will be
 // available via the "result_callback" provided in the ImageClassifierOptions.
 // Only use this method when the ImageClassifier is created with the live
 // stream running mode.
 // The image can be of any size with format RGB or RGBA. It's required to
 // provide a timestamp (in milliseconds) to indicate when the input image is
 // sent to the object detector. The input timestamps must be monotonically
 // increasing.
 // The "result_callback" provides:
 //   - The classification results as an ImageClassifierResult object.
 //   - The const reference to the corresponding input image that the image
 //     classifier runs on. Note that the const reference to the image will no
 //     longer be valid when the callback returns. To access the image data
 //     outside of the callback, callers need to make a copy of the image.
 //   - The input timestamp in milliseconds.
 // If an error occurs, returns an error code and sets the error parameter to an
 // an error message (if `error_msg` is not `nullptr`). You must free the memory
 // allocated for the error message.
 MP_EXPORT int image_classifier_classify_async(void* classifier,
                                              const MpImage* image,
                                              int64_t timestamp_ms,
--- a/mediapipe/tasks/c/vision/image_embedder/BUILD
+++ b/mediapipe/tasks/c/vision/image_embedder/BUILD
@ -30,6 +30,8 @@ cc_library(
        "//mediapipe/tasks/c/components/processors:embedder_options_converter",
        "//mediapipe/tasks/c/core:base_options",
        "//mediapipe/tasks/c/core:base_options_converter",
        "//mediapipe/tasks/c/vision/core:common",
        "//mediapipe/tasks/cc/vision/core:running_mode",
        "//mediapipe/tasks/cc/vision/image_embedder",
        "//mediapipe/tasks/cc/vision/utils:image_utils",
        "@com_google_absl//absl/log:absl_log",
--- a/mediapipe/tasks/c/vision/image_embedder/image_embedder.cc
+++ b/mediapipe/tasks/c/vision/image_embedder/image_embedder.cc
@ -36,8 +36,8 @@ namespace mediapipe::tasks::c::vision::image_embedder {
 namespace {
 using ::mediapipe::tasks::c::components::containers::ConvertToCppEmbedding;
 using ::mediapipe::tasks::c::components::containers::CppCloseEmbeddingResult;
 using ::mediapipe::tasks::c::components::containers::CppConvertToCppEmbedding;
 using ::mediapipe::tasks::c::components::containers::
    CppConvertToEmbeddingResult;
 using ::mediapipe::tasks::c::components::processors::
@ -235,9 +235,9 @@ int CppImageEmbedderClose(void* embedder, char** error_msg) {
 int CppImageEmbedderCosineSimilarity(const Embedding& u, const Embedding& v,
                                     double* similarity, char** error_msg) {
  CppEmbedding cpp_u;
-  ConvertToCppEmbedding(u, &cpp_u);
+  CppConvertToCppEmbedding(u, &cpp_u);
  CppEmbedding cpp_v;
-  ConvertToCppEmbedding(v, &cpp_v);
+  CppConvertToCppEmbedding(v, &cpp_v);
  auto status_or_similarity =
      mediapipe::tasks::vision::image_embedder::ImageEmbedder::CosineSimilarity(
          cpp_u, cpp_v);
@ -291,8 +291,8 @@ int image_embedder_close(void* embedder, char** error_msg) {
      embedder, error_msg);
 }
-int cosine_similarity(const Embedding& u, const Embedding& v,
+int image_embedder_cosine_similarity(const Embedding& u, const Embedding& v,
-                      double* similarity, char** error_msg) {
+                                     double* similarity, char** error_msg) {
  return mediapipe::tasks::c::vision::image_embedder::
      CppImageEmbedderCosineSimilarity(u, v, similarity, error_msg);
 }
--- a/mediapipe/tasks/c/vision/image_embedder/image_embedder.h
+++ b/mediapipe/tasks/c/vision/image_embedder/image_embedder.h
@ -21,6 +21,7 @@ limitations under the License.
 #include "mediapipe/tasks/c/components/containers/embedding_result.h"
 #include "mediapipe/tasks/c/components/processors/embedder_options.h"
 #include "mediapipe/tasks/c/core/base_options.h"
 #include "mediapipe/tasks/c/vision/core/common.h"
 #ifndef MP_EXPORT
 #define MP_EXPORT __attribute__((visibility("default")))
@ -32,45 +33,6 @@ extern "C" {
 typedef EmbeddingResult ImageEmbedderResult;
 // Supported image formats.
 enum ImageFormat {
  UNKNOWN = 0,
  SRGB = 1,
  SRGBA = 2,
  GRAY8 = 3,
  SBGRA = 11  // compatible with Flutter `bgra8888` format.
 };
 // Supported processing modes.
 enum RunningMode {
  IMAGE = 1,
  VIDEO = 2,
  LIVE_STREAM = 3,
 };
 // Structure to hold image frame.
 struct ImageFrame {
  enum ImageFormat format;
  const uint8_t* image_buffer;
  int width;
  int height;
 };
 // TODO: Add GPU buffer declaration and proccessing logic for it.
 struct GpuBuffer {
  int width;
  int height;
 };
 // The object to contain an image, realizes `OneOf` concept.
 struct MpImage {
  enum { IMAGE_FRAME, GPU_BUFFER } type;
  union {
    struct ImageFrame image_frame;
    struct GpuBuffer gpu_buffer;
  };
 };
 // The options for configuring a MediaPipe image embedder task.
 struct ImageEmbedderOptions {
  // Base options for configuring MediaPipe Tasks, such as specifying the model
@ -121,12 +83,40 @@ MP_EXPORT int image_embedder_embed_image(void* embedder, const MpImage* image,
                                         ImageEmbedderResult* result,
                                         char** error_msg);
 // Performs embedding extraction on the provided video frame.
 // Only use this method when the ImageEmbedder is created with the video
 // running mode.
 // The image can be of any size with format RGB or RGBA. It's required to
 // provide the video frame's timestamp (in milliseconds). The input timestamps
 // must be monotonically increasing.
 // If an error occurs, returns an error code and sets the error parameter to an
 // an error message (if `error_msg` is not `nullptr`). You must free the memory
 // allocated for the error message.
 MP_EXPORT int image_embedder_embed_for_video(void* embedder,
                                             const MpImage* image,
                                             int64_t timestamp_ms,
                                             ImageEmbedderResult* result,
                                             char** error_msg);
 // Sends live image data to embedder, and the results will be available via
 // the "result_callback" provided in the ImageEmbedderOptions.
 // Only use this method when the ImageEmbedder is created with the live
 // stream running mode.
 // The image can be of any size with format RGB or RGBA. It's required to
 // provide a timestamp (in milliseconds) to indicate when the input image is
 // sent to the object detector. The input timestamps must be monotonically
 // increasing.
 // The "result_callback" provides
 //   - The embedding results as a
 //     components::containers::proto::EmbeddingResult object.
 //   - The const reference to the corresponding input image that the image
 //     embedder runs on. Note that the const reference to the image will no
 //     longer be valid when the callback returns. To access the image data
 //     outside of the callback, callers need to make a copy of the image.
 //   - The input timestamp in milliseconds.
 // If an error occurs, returns an error code and sets the error parameter to an
 // an error message (if `error_msg` is not `nullptr`). You must free the memory
 // allocated for the error message.
 MP_EXPORT int image_embedder_embed_async(void* embedder, const MpImage* image,
                                         int64_t timestamp_ms,
                                         char** error_msg);
@ -147,8 +137,10 @@ MP_EXPORT int image_embedder_close(void* embedder, char** error_msg);
 // 0.
 //
 // [1]: https://en.wikipedia.org/wiki/Cosine_similarity
-MP_EXPORT int cosine_similarity(const Embedding& u, const Embedding& v,
+MP_EXPORT int image_embedder_cosine_similarity(const Embedding& u,
-                                double* similarity, char** error_msg);
+                                               const Embedding& v,
                                               double* similarity,
                                               char** error_msg);
 #ifdef __cplusplus
 }  // extern C
--- a/mediapipe/tasks/c/vision/image_embedder/image_embedder_test.cc
+++ b/mediapipe/tasks/c/vision/image_embedder/image_embedder_test.cc
@ -143,8 +143,9 @@ TEST(ImageEmbedderTest, SucceedsWithCosineSimilarity) {
  CheckMobileNetV3Result(crop_result, false);
  // Check cosine similarity.
  double similarity;
-  cosine_similarity(image_result.embeddings[0], crop_result.embeddings[0],
+  image_embedder_cosine_similarity(image_result.embeddings[0],
-                    &similarity, /* error_msg */ nullptr);
+                                   crop_result.embeddings[0], &similarity,
                                   /* error_msg */ nullptr);
  double expected_similarity = 0.925519;
  EXPECT_LE(abs(similarity - expected_similarity), kPrecision);
  image_embedder_close_result(&image_result);