Resolved issues and added a common header to hold all the necessary structures for the vision tasks

This commit is contained in:
Kinar 2023-11-07 14:23:15 -08:00
parent 197358dfee
commit c442d6117e
13 changed files with 185 additions and 108 deletions

View File

@ -66,7 +66,7 @@ void CppConvertToEmbeddingResult(
} }
} }
void ConvertToCppEmbedding( void CppConvertToCppEmbedding(
const Embedding& in, // C struct as input const Embedding& in, // C struct as input
mediapipe::tasks::components::containers::Embedding* out) { mediapipe::tasks::components::containers::Embedding* out) {
// Handle float embeddings // Handle float embeddings
@ -85,7 +85,7 @@ void ConvertToCppEmbedding(
// Copy head_name if it is present. // Copy head_name if it is present.
if (in.head_name) { if (in.head_name) {
out->head_name = std::make_optional(std::string(in.head_name)); out->head_name = std::string(in.head_name);
} }
} }

View File

@ -29,7 +29,7 @@ void CppConvertToEmbeddingResult(
const mediapipe::tasks::components::containers::EmbeddingResult& in, const mediapipe::tasks::components::containers::EmbeddingResult& in,
EmbeddingResult* out); EmbeddingResult* out);
void ConvertToCppEmbedding( void CppConvertToCppEmbedding(
const Embedding& in, const Embedding& in,
mediapipe::tasks::components::containers::Embedding* out); mediapipe::tasks::components::containers::Embedding* out);

View File

@ -29,9 +29,8 @@ namespace mediapipe::tasks::c::text::text_embedder {
namespace { namespace {
using ::mediapipe::tasks::c::components::containers::ConvertToCppEmbedding;
using ::mediapipe::tasks::c::components::containers::CppCloseEmbeddingResult; using ::mediapipe::tasks::c::components::containers::CppCloseEmbeddingResult;
using ::mediapipe::tasks::c::components::containers::CppConvertToCppEmbedding;
using ::mediapipe::tasks::c::components::containers:: using ::mediapipe::tasks::c::components::containers::
CppConvertToEmbeddingResult; CppConvertToEmbeddingResult;
using ::mediapipe::tasks::c::components::processors:: using ::mediapipe::tasks::c::components::processors::
@ -97,9 +96,9 @@ int CppTextEmbedderClose(void* embedder, char** error_msg) {
int CppTextEmbedderCosineSimilarity(const Embedding& u, const Embedding& v, int CppTextEmbedderCosineSimilarity(const Embedding& u, const Embedding& v,
double* similarity, char** error_msg) { double* similarity, char** error_msg) {
CppEmbedding cpp_u; CppEmbedding cpp_u;
ConvertToCppEmbedding(u, &cpp_u); CppConvertToCppEmbedding(u, &cpp_u);
CppEmbedding cpp_v; CppEmbedding cpp_v;
ConvertToCppEmbedding(v, &cpp_v); CppConvertToCppEmbedding(v, &cpp_v);
auto status_or_similarity = auto status_or_similarity =
mediapipe::tasks::text::text_embedder::TextEmbedder::CosineSimilarity( mediapipe::tasks::text::text_embedder::TextEmbedder::CosineSimilarity(
cpp_u, cpp_v); cpp_u, cpp_v);
@ -137,7 +136,7 @@ int text_embedder_close(void* embedder, char** error_ms) {
embedder, error_ms); embedder, error_ms);
} }
int cosine_similarity(const Embedding& u, const Embedding& v, int text_embedder_cosine_similarity(const Embedding& u, const Embedding& v,
double* similarity, char** error_msg) { double* similarity, char** error_msg) {
return mediapipe::tasks::c::text::text_embedder:: return mediapipe::tasks::c::text::text_embedder::
CppTextEmbedderCosineSimilarity(u, v, similarity, error_msg); CppTextEmbedderCosineSimilarity(u, v, similarity, error_msg);

View File

@ -72,8 +72,10 @@ MP_EXPORT int text_embedder_close(void* embedder, char** error_msg);
// 0. // 0.
// //
// [1]: https://en.wikipedia.org/wiki/Cosine_similarity // [1]: https://en.wikipedia.org/wiki/Cosine_similarity
MP_EXPORT int cosine_similarity(const Embedding& u, const Embedding& v, MP_EXPORT int text_embedder_cosine_similarity(const Embedding& u,
double* similarity, char** error_msg = nullptr); const Embedding& v,
double* similarity,
char** error_msg);
#ifdef __cplusplus #ifdef __cplusplus
} // extern C } // extern C

View File

@ -81,14 +81,16 @@ TEST(TextEmbedderTest, SucceedsWithCosineSimilarity) {
// Extract both embeddings. // Extract both embeddings.
TextEmbedderResult result0; TextEmbedderResult result0;
text_embedder_embed(embedder, kTestString0, &result0, /* error_msg */ nullptr); text_embedder_embed(embedder, kTestString0, &result0,
/* error_msg */ nullptr);
TextEmbedderResult result1; TextEmbedderResult result1;
text_embedder_embed(embedder, kTestString1, &result1, /* error_msg */ nullptr); text_embedder_embed(embedder, kTestString1, &result1,
/* error_msg */ nullptr);
// Check cosine similarity. // Check cosine similarity.
double similarity; double similarity;
cosine_similarity(result0.embeddings[0], result1.embeddings[0], text_embedder_cosine_similarity(result0.embeddings[0], result1.embeddings[0],
&similarity); &similarity, nullptr);
double expected_similarity = 0.98077; double expected_similarity = 0.98077;
EXPECT_LE(abs(similarity - expected_similarity), kPrecision); EXPECT_LE(abs(similarity - expected_similarity), kPrecision);
text_embedder_close(embedder, /* error_msg */ nullptr); text_embedder_close(embedder, /* error_msg */ nullptr);

View File

@ -0,0 +1,22 @@
# Copyright 2023 The MediaPipe Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
package(default_visibility = ["//mediapipe/tasks:internal"])
licenses(["notice"])
cc_library(
name = "common",
hdrs = ["common.h"],
)

View File

@ -0,0 +1,69 @@
/* Copyright 2023 The MediaPipe Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef MEDIAPIPE_TASKS_C_VISION_CORE_COMMON_H_
#define MEDIAPIPE_TASKS_C_VISION_CORE_COMMON_H_
#include <cstdint>
#ifdef __cplusplus
extern "C" {
#endif
// Supported image formats.
enum ImageFormat {
UNKNOWN = 0,
SRGB = 1,
SRGBA = 2,
GRAY8 = 3,
SBGRA = 11 // compatible with Flutter `bgra8888` format.
};
// Supported processing modes.
enum RunningMode {
IMAGE = 1,
VIDEO = 2,
LIVE_STREAM = 3,
};
// Structure to hold image frame.
struct ImageFrame {
enum ImageFormat format;
const uint8_t* image_buffer;
int width;
int height;
};
// TODO: Add GPU buffer declaration and processing logic for it.
struct GpuBuffer {
int width;
int height;
};
// The object to contain an image, realizes `OneOf` concept.
struct MpImage {
enum { IMAGE_FRAME, GPU_BUFFER } type;
union {
struct ImageFrame image_frame;
struct GpuBuffer gpu_buffer;
};
};
#ifdef __cplusplus
} // extern C
#endif
#endif // MEDIAPIPE_TASKS_C_VISION_CORE_COMMON_H_

View File

@ -30,6 +30,7 @@ cc_library(
"//mediapipe/tasks/c/components/processors:classifier_options_converter", "//mediapipe/tasks/c/components/processors:classifier_options_converter",
"//mediapipe/tasks/c/core:base_options", "//mediapipe/tasks/c/core:base_options",
"//mediapipe/tasks/c/core:base_options_converter", "//mediapipe/tasks/c/core:base_options_converter",
"//mediapipe/tasks/c/vision/core:common",
"//mediapipe/tasks/cc/vision/core:running_mode", "//mediapipe/tasks/cc/vision/core:running_mode",
"//mediapipe/tasks/cc/vision/image_classifier", "//mediapipe/tasks/cc/vision/image_classifier",
"//mediapipe/tasks/cc/vision/utils:image_utils", "//mediapipe/tasks/cc/vision/utils:image_utils",

View File

@ -16,11 +16,10 @@ limitations under the License.
#ifndef MEDIAPIPE_TASKS_C_VISION_IMAGE_CLASSIFIER_IMAGE_CLASSIFIER_H_ #ifndef MEDIAPIPE_TASKS_C_VISION_IMAGE_CLASSIFIER_IMAGE_CLASSIFIER_H_
#define MEDIAPIPE_TASKS_C_VISION_IMAGE_CLASSIFIER_IMAGE_CLASSIFIER_H_ #define MEDIAPIPE_TASKS_C_VISION_IMAGE_CLASSIFIER_IMAGE_CLASSIFIER_H_
#include <cstdint>
#include "mediapipe/tasks/c/components/containers/classification_result.h" #include "mediapipe/tasks/c/components/containers/classification_result.h"
#include "mediapipe/tasks/c/components/processors/classifier_options.h" #include "mediapipe/tasks/c/components/processors/classifier_options.h"
#include "mediapipe/tasks/c/core/base_options.h" #include "mediapipe/tasks/c/core/base_options.h"
#include "mediapipe/tasks/c/vision/core/common.h"
#ifndef MP_EXPORT #ifndef MP_EXPORT
#define MP_EXPORT __attribute__((visibility("default"))) #define MP_EXPORT __attribute__((visibility("default")))
@ -32,46 +31,7 @@ extern "C" {
typedef ClassificationResult ImageClassifierResult; typedef ClassificationResult ImageClassifierResult;
// Supported image formats. // The options for configuring a MediaPipe image classifier task.
enum ImageFormat {
UNKNOWN = 0,
SRGB = 1,
SRGBA = 2,
GRAY8 = 3,
SBGRA = 11 // compatible with Flutter `bgra8888` format.
};
// Supported processing modes.
enum RunningMode {
IMAGE = 1,
VIDEO = 2,
LIVE_STREAM = 3,
};
// Structure to hold image frame.
struct ImageFrame {
enum ImageFormat format;
const uint8_t* image_buffer;
int width;
int height;
};
// TODO: Add GPU buffer declaration and proccessing logic for it.
struct GpuBuffer {
int width;
int height;
};
// The object to contain an image, realizes `OneOf` concept.
struct MpImage {
enum { IMAGE_FRAME, GPU_BUFFER } type;
union {
struct ImageFrame image_frame;
struct GpuBuffer gpu_buffer;
};
};
// The options for configuring a Mediapipe image classifier task.
struct ImageClassifierOptions { struct ImageClassifierOptions {
// Base options for configuring MediaPipe Tasks, such as specifying the model // Base options for configuring MediaPipe Tasks, such as specifying the model
// file with metadata, accelerator options, op resolver, etc. // file with metadata, accelerator options, op resolver, etc.
@ -122,12 +82,39 @@ MP_EXPORT int image_classifier_classify_image(void* classifier,
ImageClassifierResult* result, ImageClassifierResult* result,
char** error_msg); char** error_msg);
// Performs image classification on the provided video frame.
// Only use this method when the ImageClassifier is created with the video
// running mode.
// The image can be of any size with format RGB or RGBA. It's required to
// provide the video frame's timestamp (in milliseconds). The input timestamps
// must be monotonically increasing.
// If an error occurs, returns an error code and sets the error parameter to an
// an error message (if `error_msg` is not `nullptr`). You must free the memory
// allocated for the error message.
MP_EXPORT int image_classifier_classify_for_video(void* classifier, MP_EXPORT int image_classifier_classify_for_video(void* classifier,
const MpImage* image, const MpImage* image,
int64_t timestamp_ms, int64_t timestamp_ms,
ImageClassifierResult* result, ImageClassifierResult* result,
char** error_msg); char** error_msg);
// Sends live image data to image classification, and the results will be
// available via the "result_callback" provided in the ImageClassifierOptions.
// Only use this method when the ImageClassifier is created with the live
// stream running mode.
// The image can be of any size with format RGB or RGBA. It's required to
// provide a timestamp (in milliseconds) to indicate when the input image is
// sent to the object detector. The input timestamps must be monotonically
// increasing.
// The "result_callback" provides:
// - The classification results as an ImageClassifierResult object.
// - The const reference to the corresponding input image that the image
// classifier runs on. Note that the const reference to the image will no
// longer be valid when the callback returns. To access the image data
// outside of the callback, callers need to make a copy of the image.
// - The input timestamp in milliseconds.
// If an error occurs, returns an error code and sets the error parameter to an
// an error message (if `error_msg` is not `nullptr`). You must free the memory
// allocated for the error message.
MP_EXPORT int image_classifier_classify_async(void* classifier, MP_EXPORT int image_classifier_classify_async(void* classifier,
const MpImage* image, const MpImage* image,
int64_t timestamp_ms, int64_t timestamp_ms,

View File

@ -30,6 +30,8 @@ cc_library(
"//mediapipe/tasks/c/components/processors:embedder_options_converter", "//mediapipe/tasks/c/components/processors:embedder_options_converter",
"//mediapipe/tasks/c/core:base_options", "//mediapipe/tasks/c/core:base_options",
"//mediapipe/tasks/c/core:base_options_converter", "//mediapipe/tasks/c/core:base_options_converter",
"//mediapipe/tasks/c/vision/core:common",
"//mediapipe/tasks/cc/vision/core:running_mode",
"//mediapipe/tasks/cc/vision/image_embedder", "//mediapipe/tasks/cc/vision/image_embedder",
"//mediapipe/tasks/cc/vision/utils:image_utils", "//mediapipe/tasks/cc/vision/utils:image_utils",
"@com_google_absl//absl/log:absl_log", "@com_google_absl//absl/log:absl_log",

View File

@ -36,8 +36,8 @@ namespace mediapipe::tasks::c::vision::image_embedder {
namespace { namespace {
using ::mediapipe::tasks::c::components::containers::ConvertToCppEmbedding;
using ::mediapipe::tasks::c::components::containers::CppCloseEmbeddingResult; using ::mediapipe::tasks::c::components::containers::CppCloseEmbeddingResult;
using ::mediapipe::tasks::c::components::containers::CppConvertToCppEmbedding;
using ::mediapipe::tasks::c::components::containers:: using ::mediapipe::tasks::c::components::containers::
CppConvertToEmbeddingResult; CppConvertToEmbeddingResult;
using ::mediapipe::tasks::c::components::processors:: using ::mediapipe::tasks::c::components::processors::
@ -235,9 +235,9 @@ int CppImageEmbedderClose(void* embedder, char** error_msg) {
int CppImageEmbedderCosineSimilarity(const Embedding& u, const Embedding& v, int CppImageEmbedderCosineSimilarity(const Embedding& u, const Embedding& v,
double* similarity, char** error_msg) { double* similarity, char** error_msg) {
CppEmbedding cpp_u; CppEmbedding cpp_u;
ConvertToCppEmbedding(u, &cpp_u); CppConvertToCppEmbedding(u, &cpp_u);
CppEmbedding cpp_v; CppEmbedding cpp_v;
ConvertToCppEmbedding(v, &cpp_v); CppConvertToCppEmbedding(v, &cpp_v);
auto status_or_similarity = auto status_or_similarity =
mediapipe::tasks::vision::image_embedder::ImageEmbedder::CosineSimilarity( mediapipe::tasks::vision::image_embedder::ImageEmbedder::CosineSimilarity(
cpp_u, cpp_v); cpp_u, cpp_v);
@ -291,7 +291,7 @@ int image_embedder_close(void* embedder, char** error_msg) {
embedder, error_msg); embedder, error_msg);
} }
int cosine_similarity(const Embedding& u, const Embedding& v, int image_embedder_cosine_similarity(const Embedding& u, const Embedding& v,
double* similarity, char** error_msg) { double* similarity, char** error_msg) {
return mediapipe::tasks::c::vision::image_embedder:: return mediapipe::tasks::c::vision::image_embedder::
CppImageEmbedderCosineSimilarity(u, v, similarity, error_msg); CppImageEmbedderCosineSimilarity(u, v, similarity, error_msg);

View File

@ -21,6 +21,7 @@ limitations under the License.
#include "mediapipe/tasks/c/components/containers/embedding_result.h" #include "mediapipe/tasks/c/components/containers/embedding_result.h"
#include "mediapipe/tasks/c/components/processors/embedder_options.h" #include "mediapipe/tasks/c/components/processors/embedder_options.h"
#include "mediapipe/tasks/c/core/base_options.h" #include "mediapipe/tasks/c/core/base_options.h"
#include "mediapipe/tasks/c/vision/core/common.h"
#ifndef MP_EXPORT #ifndef MP_EXPORT
#define MP_EXPORT __attribute__((visibility("default"))) #define MP_EXPORT __attribute__((visibility("default")))
@ -32,45 +33,6 @@ extern "C" {
typedef EmbeddingResult ImageEmbedderResult; typedef EmbeddingResult ImageEmbedderResult;
// Supported image formats.
enum ImageFormat {
UNKNOWN = 0,
SRGB = 1,
SRGBA = 2,
GRAY8 = 3,
SBGRA = 11 // compatible with Flutter `bgra8888` format.
};
// Supported processing modes.
enum RunningMode {
IMAGE = 1,
VIDEO = 2,
LIVE_STREAM = 3,
};
// Structure to hold image frame.
struct ImageFrame {
enum ImageFormat format;
const uint8_t* image_buffer;
int width;
int height;
};
// TODO: Add GPU buffer declaration and proccessing logic for it.
struct GpuBuffer {
int width;
int height;
};
// The object to contain an image, realizes `OneOf` concept.
struct MpImage {
enum { IMAGE_FRAME, GPU_BUFFER } type;
union {
struct ImageFrame image_frame;
struct GpuBuffer gpu_buffer;
};
};
// The options for configuring a MediaPipe image embedder task. // The options for configuring a MediaPipe image embedder task.
struct ImageEmbedderOptions { struct ImageEmbedderOptions {
// Base options for configuring MediaPipe Tasks, such as specifying the model // Base options for configuring MediaPipe Tasks, such as specifying the model
@ -121,12 +83,40 @@ MP_EXPORT int image_embedder_embed_image(void* embedder, const MpImage* image,
ImageEmbedderResult* result, ImageEmbedderResult* result,
char** error_msg); char** error_msg);
// Performs embedding extraction on the provided video frame.
// Only use this method when the ImageEmbedder is created with the video
// running mode.
// The image can be of any size with format RGB or RGBA. It's required to
// provide the video frame's timestamp (in milliseconds). The input timestamps
// must be monotonically increasing.
// If an error occurs, returns an error code and sets the error parameter to an
// an error message (if `error_msg` is not `nullptr`). You must free the memory
// allocated for the error message.
MP_EXPORT int image_embedder_embed_for_video(void* embedder, MP_EXPORT int image_embedder_embed_for_video(void* embedder,
const MpImage* image, const MpImage* image,
int64_t timestamp_ms, int64_t timestamp_ms,
ImageEmbedderResult* result, ImageEmbedderResult* result,
char** error_msg); char** error_msg);
// Sends live image data to embedder, and the results will be available via
// the "result_callback" provided in the ImageEmbedderOptions.
// Only use this method when the ImageEmbedder is created with the live
// stream running mode.
// The image can be of any size with format RGB or RGBA. It's required to
// provide a timestamp (in milliseconds) to indicate when the input image is
// sent to the object detector. The input timestamps must be monotonically
// increasing.
// The "result_callback" provides
// - The embedding results as a
// components::containers::proto::EmbeddingResult object.
// - The const reference to the corresponding input image that the image
// embedder runs on. Note that the const reference to the image will no
// longer be valid when the callback returns. To access the image data
// outside of the callback, callers need to make a copy of the image.
// - The input timestamp in milliseconds.
// If an error occurs, returns an error code and sets the error parameter to an
// an error message (if `error_msg` is not `nullptr`). You must free the memory
// allocated for the error message.
MP_EXPORT int image_embedder_embed_async(void* embedder, const MpImage* image, MP_EXPORT int image_embedder_embed_async(void* embedder, const MpImage* image,
int64_t timestamp_ms, int64_t timestamp_ms,
char** error_msg); char** error_msg);
@ -147,8 +137,10 @@ MP_EXPORT int image_embedder_close(void* embedder, char** error_msg);
// 0. // 0.
// //
// [1]: https://en.wikipedia.org/wiki/Cosine_similarity // [1]: https://en.wikipedia.org/wiki/Cosine_similarity
MP_EXPORT int cosine_similarity(const Embedding& u, const Embedding& v, MP_EXPORT int image_embedder_cosine_similarity(const Embedding& u,
double* similarity, char** error_msg); const Embedding& v,
double* similarity,
char** error_msg);
#ifdef __cplusplus #ifdef __cplusplus
} // extern C } // extern C

View File

@ -143,8 +143,9 @@ TEST(ImageEmbedderTest, SucceedsWithCosineSimilarity) {
CheckMobileNetV3Result(crop_result, false); CheckMobileNetV3Result(crop_result, false);
// Check cosine similarity. // Check cosine similarity.
double similarity; double similarity;
cosine_similarity(image_result.embeddings[0], crop_result.embeddings[0], image_embedder_cosine_similarity(image_result.embeddings[0],
&similarity, /* error_msg */ nullptr); crop_result.embeddings[0], &similarity,
/* error_msg */ nullptr);
double expected_similarity = 0.925519; double expected_similarity = 0.925519;
EXPECT_LE(abs(similarity - expected_similarity), kPrecision); EXPECT_LE(abs(similarity - expected_similarity), kPrecision);
image_embedder_close_result(&image_result); image_embedder_close_result(&image_result);