Updated the Image Embedder C API and added tests for cosine similarity

This commit is contained in:
Kinar 2023-11-07 07:02:08 -08:00
parent 3b122a1e61
commit d9080c0d38
8 changed files with 459 additions and 19 deletions

View File

@ -66,6 +66,29 @@ void CppConvertToEmbeddingResult(
} }
} }
void ConvertToCppEmbedding(
const Embedding& in, // C struct as input
mediapipe::tasks::components::containers::Embedding* out) {
// Handle float embeddings
if (in.float_embedding != nullptr) {
out->float_embedding.assign(in.float_embedding,
in.float_embedding + in.values_count);
}
// Handle quantized embeddings
if (in.quantized_embedding != nullptr) {
out->quantized_embedding.assign(in.quantized_embedding,
in.quantized_embedding + in.values_count);
}
out->head_index = in.head_index;
// Copy head_name if it is present.
if (in.head_name) {
out->head_name = std::make_optional(std::string(in.head_name));
}
}
void CppCloseEmbeddingResult(EmbeddingResult* in) { void CppCloseEmbeddingResult(EmbeddingResult* in) {
for (uint32_t i = 0; i < in->embeddings_count; ++i) { for (uint32_t i = 0; i < in->embeddings_count; ++i) {
auto embedding_in = in->embeddings[i]; auto embedding_in = in->embeddings[i];

View File

@ -29,6 +29,10 @@ void CppConvertToEmbeddingResult(
const mediapipe::tasks::components::containers::EmbeddingResult& in, const mediapipe::tasks::components::containers::EmbeddingResult& in,
EmbeddingResult* out); EmbeddingResult* out);
void ConvertToCppEmbedding(
const Embedding& in,
mediapipe::tasks::components::containers::Embedding* out);
void CppCloseEmbedding(Embedding* in); void CppCloseEmbedding(Embedding* in);
void CppCloseEmbeddingResult(EmbeddingResult* in); void CppCloseEmbeddingResult(EmbeddingResult* in);

View File

@ -29,6 +29,8 @@ namespace mediapipe::tasks::c::text::text_embedder {
namespace { namespace {
using ::mediapipe::tasks::c::components::containers::ConvertToCppEmbedding;
using ::mediapipe::tasks::c::components::containers::CppCloseEmbeddingResult; using ::mediapipe::tasks::c::components::containers::CppCloseEmbeddingResult;
using ::mediapipe::tasks::c::components::containers:: using ::mediapipe::tasks::c::components::containers::
CppConvertToEmbeddingResult; CppConvertToEmbeddingResult;
@ -36,6 +38,7 @@ using ::mediapipe::tasks::c::components::processors::
CppConvertToEmbedderOptions; CppConvertToEmbedderOptions;
using ::mediapipe::tasks::c::core::CppConvertToBaseOptions; using ::mediapipe::tasks::c::core::CppConvertToBaseOptions;
using ::mediapipe::tasks::text::text_embedder::TextEmbedder; using ::mediapipe::tasks::text::text_embedder::TextEmbedder;
typedef ::mediapipe::tasks::components::containers::Embedding CppEmbedding;
int CppProcessError(absl::Status status, char** error_msg) { int CppProcessError(absl::Status status, char** error_msg) {
if (error_msg) { if (error_msg) {
@ -91,6 +94,24 @@ int CppTextEmbedderClose(void* embedder, char** error_msg) {
return 0; return 0;
} }
int CppTextEmbedderCosineSimilarity(const Embedding& u, const Embedding& v,
double* similarity, char** error_msg) {
CppEmbedding cpp_u;
ConvertToCppEmbedding(u, &cpp_u);
CppEmbedding cpp_v;
ConvertToCppEmbedding(v, &cpp_v);
auto status_or_similarity =
mediapipe::tasks::text::text_embedder::TextEmbedder::CosineSimilarity(
cpp_u, cpp_v);
if (status_or_similarity.ok()) {
*similarity = status_or_similarity.value();
} else {
ABSL_LOG(ERROR) << "Cannot computer cosine similarity.";
return CppProcessError(status_or_similarity.status(), error_msg);
}
return 0;
}
} // namespace mediapipe::tasks::c::text::text_embedder } // namespace mediapipe::tasks::c::text::text_embedder
extern "C" { extern "C" {
@ -116,4 +137,10 @@ int text_embedder_close(void* embedder, char** error_ms) {
embedder, error_ms); embedder, error_ms);
} }
int cosine_similarity(const Embedding& u, const Embedding& v,
double* similarity, char** error_msg) {
return mediapipe::tasks::c::text::text_embedder::
CppTextEmbedderCosineSimilarity(u, v, similarity, error_msg);
}
} // extern "C" } // extern "C"

View File

@ -67,6 +67,15 @@ MP_EXPORT void text_embedder_close_result(TextEmbedderResult* result);
// allocated for the error message. // allocated for the error message.
MP_EXPORT int text_embedder_close(void* embedder, char** error_msg = nullptr); MP_EXPORT int text_embedder_close(void* embedder, char** error_msg = nullptr);
// Utility function to compute cosine similarity [1] between two embeddings.
// May return an InvalidArgumentError if e.g. the embeddings are of different
// types (quantized vs. float), have different sizes, or have a an L2-norm of
// 0.
//
// [1]: https://en.wikipedia.org/wiki/Cosine_similarity
MP_EXPORT int cosine_similarity(const Embedding& u, const Embedding& v,
double* similarity, char** error_msg = nullptr);
#ifdef __cplusplus #ifdef __cplusplus
} // extern C } // extern C
#endif #endif

View File

@ -32,7 +32,12 @@ using testing::HasSubstr;
constexpr char kTestDataDirectory[] = "/mediapipe/tasks/testdata/text/"; constexpr char kTestDataDirectory[] = "/mediapipe/tasks/testdata/text/";
constexpr char kTestBertModelPath[] = constexpr char kTestBertModelPath[] =
"mobilebert_embedding_with_metadata.tflite"; "mobilebert_embedding_with_metadata.tflite";
constexpr char kTestString[] = "It's beautiful outside."; constexpr char kTestString0[] =
"When you go to this restaurant, they hold the pancake upside-down "
"before they hand it to you. It's a great gimmick.";
constexpr char kTestString1[] =
"Let's make a plan to steal the declaration of independence.";
constexpr float kPrecision = 1e-3;
std::string GetFullPath(absl::string_view file_name) { std::string GetFullPath(absl::string_view file_name) {
return JoinPath("./", kTestDataDirectory, file_name); return JoinPath("./", kTestDataDirectory, file_name);
@ -51,7 +56,7 @@ TEST(TextEmbedderTest, SmokeTest) {
EXPECT_NE(embedder, nullptr); EXPECT_NE(embedder, nullptr);
TextEmbedderResult result; TextEmbedderResult result;
text_embedder_embed(embedder, kTestString, &result); text_embedder_embed(embedder, kTestString0, &result);
EXPECT_EQ(result.embeddings_count, 1); EXPECT_EQ(result.embeddings_count, 1);
EXPECT_EQ(result.embeddings[0].values_count, 512); EXPECT_EQ(result.embeddings[0].values_count, 512);
@ -59,6 +64,33 @@ TEST(TextEmbedderTest, SmokeTest) {
text_embedder_close(embedder); text_embedder_close(embedder);
} }
TEST(TextEmbedderTest, SucceedsWithCosineSimilarity) {
std::string model_path = GetFullPath(kTestBertModelPath);
TextEmbedderOptions options = {
/* base_options= */ {/* model_asset_buffer= */ nullptr,
/* model_asset_path= */ model_path.c_str()},
/* embedder_options= */
{/* l2_normalize= */ false,
/* quantize= */ false}};
void* embedder = text_embedder_create(&options);
EXPECT_NE(embedder, nullptr);
// Extract both embeddings.
TextEmbedderResult result0;
text_embedder_embed(embedder, kTestString0, &result0);
TextEmbedderResult result1;
text_embedder_embed(embedder, kTestString1, &result1);
// Check cosine similarity.
double similarity;
cosine_similarity(result0.embeddings[0], result1.embeddings[0],
&similarity);
double expected_similarity = 0.98077;
EXPECT_LE(abs(similarity - expected_similarity), kPrecision);
text_embedder_close(embedder);
}
TEST(TextEmbedderTest, ErrorHandling) { TEST(TextEmbedderTest, ErrorHandling) {
// It is an error to set neither the asset buffer nor the path. // It is an error to set neither the asset buffer nor the path.
TextEmbedderOptions options = { TextEmbedderOptions options = {

View File

@ -15,6 +15,8 @@ limitations under the License.
#include "mediapipe/tasks/c/vision/image_embedder/image_embedder.h" #include "mediapipe/tasks/c/vision/image_embedder/image_embedder.h"
#include <cstdint>
#include <cstdlib>
#include <memory> #include <memory>
#include <utility> #include <utility>
@ -26,6 +28,7 @@ limitations under the License.
#include "mediapipe/tasks/c/components/containers/embedding_result_converter.h" #include "mediapipe/tasks/c/components/containers/embedding_result_converter.h"
#include "mediapipe/tasks/c/components/processors/embedder_options_converter.h" #include "mediapipe/tasks/c/components/processors/embedder_options_converter.h"
#include "mediapipe/tasks/c/core/base_options_converter.h" #include "mediapipe/tasks/c/core/base_options_converter.h"
#include "mediapipe/tasks/cc/vision/core/running_mode.h"
#include "mediapipe/tasks/cc/vision/image_embedder/image_embedder.h" #include "mediapipe/tasks/cc/vision/image_embedder/image_embedder.h"
#include "mediapipe/tasks/cc/vision/utils/image_utils.h" #include "mediapipe/tasks/cc/vision/utils/image_utils.h"
@ -33,6 +36,7 @@ namespace mediapipe::tasks::c::vision::image_embedder {
namespace { namespace {
using ::mediapipe::tasks::c::components::containers::ConvertToCppEmbedding;
using ::mediapipe::tasks::c::components::containers::CppCloseEmbeddingResult; using ::mediapipe::tasks::c::components::containers::CppCloseEmbeddingResult;
using ::mediapipe::tasks::c::components::containers:: using ::mediapipe::tasks::c::components::containers::
CppConvertToEmbeddingResult; CppConvertToEmbeddingResult;
@ -40,7 +44,11 @@ using ::mediapipe::tasks::c::components::processors::
CppConvertToEmbedderOptions; CppConvertToEmbedderOptions;
using ::mediapipe::tasks::c::core::CppConvertToBaseOptions; using ::mediapipe::tasks::c::core::CppConvertToBaseOptions;
using ::mediapipe::tasks::vision::CreateImageFromBuffer; using ::mediapipe::tasks::vision::CreateImageFromBuffer;
using ::mediapipe::tasks::vision::core::RunningMode;
using ::mediapipe::tasks::vision::image_embedder::ImageEmbedder; using ::mediapipe::tasks::vision::image_embedder::ImageEmbedder;
typedef ::mediapipe::tasks::components::containers::Embedding CppEmbedding;
typedef ::mediapipe::tasks::vision::image_embedder::ImageEmbedderResult
CppImageEmbedderResult;
int CppProcessError(absl::Status status, char** error_msg) { int CppProcessError(absl::Status status, char** error_msg) {
if (error_msg) { if (error_msg) {
@ -59,6 +67,54 @@ ImageEmbedder* CppImageEmbedderCreate(const ImageEmbedderOptions& options,
CppConvertToBaseOptions(options.base_options, &cpp_options->base_options); CppConvertToBaseOptions(options.base_options, &cpp_options->base_options);
CppConvertToEmbedderOptions(options.embedder_options, CppConvertToEmbedderOptions(options.embedder_options,
&cpp_options->embedder_options); &cpp_options->embedder_options);
cpp_options->running_mode = static_cast<RunningMode>(options.running_mode);
// Enable callback for processing live stream data when the running mode is
// set to RunningMode::LIVE_STREAM.
if (cpp_options->running_mode == RunningMode::LIVE_STREAM) {
if (options.result_callback == nullptr) {
const absl::Status status = absl::InvalidArgumentError(
"Provided null pointer to callback function.");
ABSL_LOG(ERROR) << "Failed to create ImageEmbedder: " << status;
CppProcessError(status, error_msg);
return nullptr;
}
ImageEmbedderOptions::result_callback_fn result_callback =
options.result_callback;
cpp_options->result_callback =
[result_callback](absl::StatusOr<CppImageEmbedderResult> cpp_result,
const Image& image, int64_t timestamp) {
char* error_msg = nullptr;
if (!cpp_result.ok()) {
ABSL_LOG(ERROR)
<< "Embedding extraction failed: " << cpp_result.status();
CppProcessError(cpp_result.status(), &error_msg);
result_callback(nullptr, MpImage(), timestamp, error_msg);
free(error_msg);
return;
}
// Result is valid for the lifetime of the callback function.
ImageEmbedderResult result;
CppConvertToEmbeddingResult(*cpp_result, &result);
const auto& image_frame = image.GetImageFrameSharedPtr();
const MpImage mp_image = {
.type = MpImage::IMAGE_FRAME,
.image_frame = {
.format = static_cast<::ImageFormat>(image_frame->Format()),
.image_buffer = image_frame->PixelData(),
.width = image_frame->Width(),
.height = image_frame->Height()}};
result_callback(&result, mp_image, timestamp,
/* error_msg= */ nullptr);
CppCloseEmbeddingResult(&result);
};
}
auto embedder = ImageEmbedder::Create(std::move(cpp_options)); auto embedder = ImageEmbedder::Create(std::move(cpp_options));
if (!embedder.ok()) { if (!embedder.ok()) {
@ -72,10 +128,10 @@ ImageEmbedder* CppImageEmbedderCreate(const ImageEmbedderOptions& options,
int CppImageEmbedderEmbed(void* embedder, const MpImage* image, int CppImageEmbedderEmbed(void* embedder, const MpImage* image,
ImageEmbedderResult* result, char** error_msg) { ImageEmbedderResult* result, char** error_msg) {
if (image->type == MpImage::GPU_BUFFER) { if (image->type == MpImage::GPU_BUFFER) {
absl::Status status = const absl::Status status =
absl::InvalidArgumentError("gpu buffer not supported yet"); absl::InvalidArgumentError("GPU Buffer not supported yet.");
ABSL_LOG(ERROR) << "Classification failed: " << status.message(); ABSL_LOG(ERROR) << "Embedding extraction failed: " << status.message();
return CppProcessError(status, error_msg); return CppProcessError(status, error_msg);
} }
@ -92,13 +148,75 @@ int CppImageEmbedderEmbed(void* embedder, const MpImage* image,
auto cpp_embedder = static_cast<ImageEmbedder*>(embedder); auto cpp_embedder = static_cast<ImageEmbedder*>(embedder);
auto cpp_result = cpp_embedder->Embed(*img); auto cpp_result = cpp_embedder->Embed(*img);
if (!cpp_result.ok()) { if (!cpp_result.ok()) {
ABSL_LOG(ERROR) << "Classification failed: " << cpp_result.status(); ABSL_LOG(ERROR) << "Embedding extraction failed: " << cpp_result.status();
return CppProcessError(cpp_result.status(), error_msg); return CppProcessError(cpp_result.status(), error_msg);
} }
CppConvertToEmbeddingResult(*cpp_result, result); CppConvertToEmbeddingResult(*cpp_result, result);
return 0; return 0;
} }
int CppImageEmbedderEmbedForVideo(void* embedder, const MpImage* image,
int64_t timestamp_ms,
ImageEmbedderResult* result,
char** error_msg) {
if (image->type == MpImage::GPU_BUFFER) {
absl::Status status =
absl::InvalidArgumentError("GPU Buffer not supported yet");
ABSL_LOG(ERROR) << "Embedding extraction failed: " << status.message();
return CppProcessError(status, error_msg);
}
const auto img = CreateImageFromBuffer(
static_cast<ImageFormat::Format>(image->image_frame.format),
image->image_frame.image_buffer, image->image_frame.width,
image->image_frame.height);
if (!img.ok()) {
ABSL_LOG(ERROR) << "Failed to create Image: " << img.status();
return CppProcessError(img.status(), error_msg);
}
auto cpp_embedder = static_cast<ImageEmbedder*>(embedder);
auto cpp_result = cpp_embedder->EmbedForVideo(*img, timestamp_ms);
if (!cpp_result.ok()) {
ABSL_LOG(ERROR) << "Embedding extraction failed: " << cpp_result.status();
return CppProcessError(cpp_result.status(), error_msg);
}
CppConvertToEmbeddingResult(*cpp_result, result);
return 0;
}
int CppImageEmbedderEmbedAsync(void* embedder, const MpImage* image,
int64_t timestamp_ms, char** error_msg) {
if (image->type == MpImage::GPU_BUFFER) {
absl::Status status =
absl::InvalidArgumentError("GPU Buffer not supported yet");
ABSL_LOG(ERROR) << "Embedding extraction failed: " << status.message();
return CppProcessError(status, error_msg);
}
const auto img = CreateImageFromBuffer(
static_cast<ImageFormat::Format>(image->image_frame.format),
image->image_frame.image_buffer, image->image_frame.width,
image->image_frame.height);
if (!img.ok()) {
ABSL_LOG(ERROR) << "Failed to create Image: " << img.status();
return CppProcessError(img.status(), error_msg);
}
auto cpp_embedder = static_cast<ImageEmbedder*>(embedder);
auto cpp_result = cpp_embedder->EmbedAsync(*img, timestamp_ms);
if (!cpp_result.ok()) {
ABSL_LOG(ERROR) << "Data preparation for the embedding extraction failed: "
<< cpp_result;
return CppProcessError(cpp_result, error_msg);
}
return 0;
}
void CppImageEmbedderCloseResult(ImageEmbedderResult* result) { void CppImageEmbedderCloseResult(ImageEmbedderResult* result) {
CppCloseEmbeddingResult(result); CppCloseEmbeddingResult(result);
} }
@ -114,6 +232,24 @@ int CppImageEmbedderClose(void* embedder, char** error_msg) {
return 0; return 0;
} }
int CppImageEmbedderCosineSimilarity(const Embedding& u, const Embedding& v,
double* similarity, char** error_msg) {
CppEmbedding cpp_u;
ConvertToCppEmbedding(u, &cpp_u);
CppEmbedding cpp_v;
ConvertToCppEmbedding(v, &cpp_v);
auto status_or_similarity =
mediapipe::tasks::vision::image_embedder::ImageEmbedder::CosineSimilarity(
cpp_u, cpp_v);
if (status_or_similarity.ok()) {
*similarity = status_or_similarity.value();
} else {
ABSL_LOG(ERROR) << "Cannot computer cosine similarity.";
return CppProcessError(status_or_similarity.status(), error_msg);
}
return 0;
}
} // namespace mediapipe::tasks::c::vision::image_embedder } // namespace mediapipe::tasks::c::vision::image_embedder
extern "C" { extern "C" {
@ -130,14 +266,35 @@ int image_embedder_embed_image(void* embedder, const MpImage* image,
embedder, image, result, error_msg); embedder, image, result, error_msg);
} }
int image_embedder_embed_for_video(void* embedder, const MpImage* image,
int64_t timestamp_ms,
ImageEmbedderResult* result,
char** error_msg) {
return mediapipe::tasks::c::vision::image_embedder::
CppImageEmbedderEmbedForVideo(embedder, image, timestamp_ms, result,
error_msg);
}
int image_embedder_embed_async(void* embedder, const MpImage* image,
int64_t timestamp_ms, char** error_msg) {
return mediapipe::tasks::c::vision::image_embedder::
CppImageEmbedderEmbedAsync(embedder, image, timestamp_ms, error_msg);
}
void image_embedder_close_result(ImageEmbedderResult* result) { void image_embedder_close_result(ImageEmbedderResult* result) {
mediapipe::tasks::c::vision::image_embedder::CppImageEmbedderCloseResult( mediapipe::tasks::c::vision::image_embedder::CppImageEmbedderCloseResult(
result); result);
} }
int image_embedder_close(void* embedder, char** error_ms) { int image_embedder_close(void* embedder, char** error_msg) {
return mediapipe::tasks::c::vision::image_embedder::CppImageEmbedderClose( return mediapipe::tasks::c::vision::image_embedder::CppImageEmbedderClose(
embedder, error_ms); embedder, error_msg);
}
int cosine_similarity(const Embedding& u, const Embedding& v,
double* similarity, char** error_msg) {
return mediapipe::tasks::c::vision::image_embedder::
CppImageEmbedderCosineSimilarity(u, v, similarity, error_msg);
} }
} // extern "C" } // extern "C"

View File

@ -92,9 +92,16 @@ struct ImageEmbedderOptions {
// The user-defined result callback for processing live stream data. // The user-defined result callback for processing live stream data.
// The result callback should only be specified when the running mode is set // The result callback should only be specified when the running mode is set
// to RunningMode::LIVE_STREAM. // to RunningMode::LIVE_STREAM. Arguments of the callback function include:
typedef void (*result_callback_fn)(ImageEmbedderResult*, const MpImage*, // the pointer to embedding result, the image that result was obtained
int64_t); // on, the timestamp relevant to embedding extraction results and pointer to
// error message in case of any failure. The validity of the passed arguments
// is true for the lifetime of the callback function.
//
// A caller is responsible for closing image embedder result.
typedef void (*result_callback_fn)(ImageEmbedderResult* result,
const MpImage image, int64_t timestamp_ms,
char* error_msg);
result_callback_fn result_callback; result_callback_fn result_callback;
}; };
@ -110,12 +117,20 @@ MP_EXPORT void* image_embedder_create(struct ImageEmbedderOptions* options,
// If an error occurs, returns an error code and sets the error parameter to an // If an error occurs, returns an error code and sets the error parameter to an
// an error message (if `error_msg` is not nullptr). You must free the memory // an error message (if `error_msg` is not nullptr). You must free the memory
// allocated for the error message. // allocated for the error message.
//
// TODO: Add API for video and live stream processing.
MP_EXPORT int image_embedder_embed_image(void* embedder, const MpImage* image, MP_EXPORT int image_embedder_embed_image(void* embedder, const MpImage* image,
ImageEmbedderResult* result, ImageEmbedderResult* result,
char** error_msg = nullptr); char** error_msg = nullptr);
MP_EXPORT int image_embedder_embed_for_video(void* embedder,
const MpImage* image,
int64_t timestamp_ms,
ImageEmbedderResult* result,
char** error_msg = nullptr);
MP_EXPORT int image_embedder_embed_async(void* embedder, const MpImage* image,
int64_t timestamp_ms,
char** error_msg = nullptr);
// Frees the memory allocated inside a ImageEmbedderResult result. // Frees the memory allocated inside a ImageEmbedderResult result.
// Does not free the result pointer itself. // Does not free the result pointer itself.
MP_EXPORT void image_embedder_close_result(ImageEmbedderResult* result); MP_EXPORT void image_embedder_close_result(ImageEmbedderResult* result);
@ -126,6 +141,15 @@ MP_EXPORT void image_embedder_close_result(ImageEmbedderResult* result);
// allocated for the error message. // allocated for the error message.
MP_EXPORT int image_embedder_close(void* embedder, char** error_msg = nullptr); MP_EXPORT int image_embedder_close(void* embedder, char** error_msg = nullptr);
// Utility function to compute cosine similarity [1] between two embeddings.
// May return an InvalidArgumentError if e.g. the embeddings are of different
// types (quantized vs. float), have different sizes, or have a an L2-norm of
// 0.
//
// [1]: https://en.wikipedia.org/wiki/Cosine_similarity
MP_EXPORT int cosine_similarity(const Embedding& u, const Embedding& v,
double* similarity, char** error_msg = nullptr);
#ifdef __cplusplus #ifdef __cplusplus
} // extern C } // extern C
#endif #endif

View File

@ -37,13 +37,27 @@ constexpr char kTestDataDirectory[] = "/mediapipe/tasks/testdata/vision/";
constexpr char kModelName[] = "mobilenet_v3_small_100_224_embedder.tflite"; constexpr char kModelName[] = "mobilenet_v3_small_100_224_embedder.tflite";
constexpr char kImageFile[] = "burger.jpg"; constexpr char kImageFile[] = "burger.jpg";
constexpr float kPrecision = 1e-6; constexpr float kPrecision = 1e-6;
constexpr int kIterations = 100;
std::string GetFullPath(absl::string_view file_name) { std::string GetFullPath(absl::string_view file_name) {
return JoinPath("./", kTestDataDirectory, file_name); return JoinPath("./", kTestDataDirectory, file_name);
} }
TEST(ImageEmbedderTest, SmokeTest) { // Utility function to check the sizes, head_index and head_names of a result
const auto image = DecodeImageFromFile(GetFullPath("burger.jpg")); // produced by kMobileNetV3Embedder.
void CheckMobileNetV3Result(const ImageEmbedderResult& result, bool quantized) {
EXPECT_EQ(result.embeddings_count, 1);
EXPECT_EQ(result.embeddings[0].head_index, 0);
EXPECT_EQ(std::string{result.embeddings[0].head_name}, "feature");
if (quantized) {
EXPECT_EQ(result.embeddings[0].values_count, 1024);
} else {
EXPECT_EQ(result.embeddings[0].values_count, 1024);
}
}
TEST(ImageEmbedderTest, ImageModeTest) {
const auto image = DecodeImageFromFile(GetFullPath(kImageFile));
ASSERT_TRUE(image.ok()); ASSERT_TRUE(image.ok());
const std::string model_path = GetFullPath(kModelName); const std::string model_path = GetFullPath(kModelName);
@ -53,8 +67,7 @@ TEST(ImageEmbedderTest, SmokeTest) {
/* running_mode= */ RunningMode::IMAGE, /* running_mode= */ RunningMode::IMAGE,
/* embedder_options= */ /* embedder_options= */
{/* l2_normalize= */ true, {/* l2_normalize= */ true,
/* quantize= */ false}, /* quantize= */ false}};
};
void* embedder = image_embedder_create(&options); void* embedder = image_embedder_create(&options);
EXPECT_NE(embedder, nullptr); EXPECT_NE(embedder, nullptr);
@ -70,12 +83,163 @@ TEST(ImageEmbedderTest, SmokeTest) {
ImageEmbedderResult result; ImageEmbedderResult result;
image_embedder_embed_image(embedder, &mp_image, &result); image_embedder_embed_image(embedder, &mp_image, &result);
EXPECT_EQ(result.embeddings_count, 1); CheckMobileNetV3Result(result, false);
EXPECT_NEAR(result.embeddings[0].float_embedding[0], -0.0142344, kPrecision); EXPECT_NEAR(result.embeddings[0].float_embedding[0], -0.0142344, kPrecision);
image_embedder_close_result(&result); image_embedder_close_result(&result);
image_embedder_close(embedder); image_embedder_close(embedder);
} }
TEST(ImageEmbedderTest, SucceedsWithCosineSimilarity) {
const auto image = DecodeImageFromFile(GetFullPath("burger.jpg"));
ASSERT_TRUE(image.ok());
const auto crop = DecodeImageFromFile(GetFullPath("burger_crop.jpg"));
ASSERT_TRUE(crop.ok());
const std::string model_path = GetFullPath(kModelName);
ImageEmbedderOptions options = {
/* base_options= */ {/* model_asset_buffer= */ nullptr,
/* model_asset_path= */ model_path.c_str()},
/* running_mode= */ RunningMode::IMAGE,
/* embedder_options= */
{/* l2_normalize= */ true,
/* quantize= */ false}};
void* embedder = image_embedder_create(&options);
EXPECT_NE(embedder, nullptr);
const MpImage mp_image = {
.type = MpImage::IMAGE_FRAME,
.image_frame = {
.format = static_cast<ImageFormat>(
image->GetImageFrameSharedPtr()->Format()),
.image_buffer = image->GetImageFrameSharedPtr()->PixelData(),
.width = image->GetImageFrameSharedPtr()->Width(),
.height = image->GetImageFrameSharedPtr()->Height()}};
const MpImage mp_crop = {
.type = MpImage::IMAGE_FRAME,
.image_frame = {
.format = static_cast<ImageFormat>(
crop->GetImageFrameSharedPtr()->Format()),
.image_buffer = crop->GetImageFrameSharedPtr()->PixelData(),
.width = crop->GetImageFrameSharedPtr()->Width(),
.height = crop->GetImageFrameSharedPtr()->Height()}};
// Extract both embeddings.
ImageEmbedderResult image_result;
image_embedder_embed_image(embedder, &mp_image, &image_result);
ImageEmbedderResult crop_result;
image_embedder_embed_image(embedder, &mp_crop, &crop_result);
// Check results.
CheckMobileNetV3Result(image_result, false);
CheckMobileNetV3Result(crop_result, false);
// Check cosine similarity.
double similarity;
cosine_similarity(image_result.embeddings[0], crop_result.embeddings[0],
&similarity);
double expected_similarity = 0.925519;
EXPECT_LE(abs(similarity - expected_similarity), kPrecision);
image_embedder_close_result(&image_result);
image_embedder_close_result(&crop_result);
image_embedder_close(embedder);
}
TEST(ImageEmbedderTest, VideoModeTest) {
const auto image = DecodeImageFromFile(GetFullPath(kImageFile));
ASSERT_TRUE(image.ok());
const std::string model_path = GetFullPath(kModelName);
ImageEmbedderOptions options = {
/* base_options= */ {/* model_asset_buffer= */ nullptr,
/* model_asset_path= */ model_path.c_str()},
/* running_mode= */ RunningMode::VIDEO,
/* embedder_options= */
{/* l2_normalize= */ true,
/* quantize= */ false}};
void* embedder = image_embedder_create(&options);
EXPECT_NE(embedder, nullptr);
const auto& image_frame = image->GetImageFrameSharedPtr();
const MpImage mp_image = {
.type = MpImage::IMAGE_FRAME,
.image_frame = {.format = static_cast<ImageFormat>(image_frame->Format()),
.image_buffer = image_frame->PixelData(),
.width = image_frame->Width(),
.height = image_frame->Height()}};
for (int i = 0; i < kIterations; ++i) {
ImageEmbedderResult result;
image_embedder_embed_for_video(embedder, &mp_image, i, &result);
CheckMobileNetV3Result(result, false);
EXPECT_NEAR(result.embeddings[0].float_embedding[0], -0.0142344,
kPrecision);
image_embedder_close_result(&result);
}
image_embedder_close(embedder);
}
// A structure to support LiveStreamModeTest below. This structure holds a
// static method `Fn` for a callback function of C API. A `static` qualifier
// allows to take an address of the method to follow API style. Another static
// struct member is `last_timestamp` that is used to verify that current
// timestamp is greater than the previous one.
struct LiveStreamModeCallback {
static int64_t last_timestamp;
static void Fn(ImageEmbedderResult* embedder_result, const MpImage image,
int64_t timestamp, char* error_msg) {
ASSERT_NE(embedder_result, nullptr);
ASSERT_EQ(error_msg, nullptr);
CheckMobileNetV3Result(*embedder_result, false);
EXPECT_NEAR(embedder_result->embeddings[0].float_embedding[0], -0.0142344,
kPrecision);
EXPECT_GT(image.image_frame.width, 0);
EXPECT_GT(image.image_frame.height, 0);
EXPECT_GT(timestamp, last_timestamp);
last_timestamp++;
}
};
int64_t LiveStreamModeCallback::last_timestamp = -1;
TEST(ImageEmbedderTest, LiveStreamModeTest) {
const auto image = DecodeImageFromFile(GetFullPath(kImageFile));
ASSERT_TRUE(image.ok());
const std::string model_path = GetFullPath(kModelName);
ImageEmbedderOptions options = {
/* base_options= */ {/* model_asset_buffer= */ nullptr,
/* model_asset_path= */ model_path.c_str()},
/* running_mode= */ RunningMode::LIVE_STREAM,
/* embedder_options= */
{/* l2_normalize= */ true,
/* quantize= */ false},
/* result_callback= */ LiveStreamModeCallback::Fn,
};
void* embedder = image_embedder_create(&options);
EXPECT_NE(embedder, nullptr);
const auto& image_frame = image->GetImageFrameSharedPtr();
const MpImage mp_image = {
.type = MpImage::IMAGE_FRAME,
.image_frame = {.format = static_cast<ImageFormat>(image_frame->Format()),
.image_buffer = image_frame->PixelData(),
.width = image_frame->Width(),
.height = image_frame->Height()}};
for (int i = 0; i < kIterations; ++i) {
EXPECT_GE(image_embedder_embed_async(embedder, &mp_image, i), 0);
}
image_embedder_close(embedder);
// Due to the flow limiter, the total of outputs might be smaller than the
// number of iterations.
EXPECT_LE(LiveStreamModeCallback::last_timestamp, kIterations);
EXPECT_GT(LiveStreamModeCallback::last_timestamp, 0);
}
TEST(ImageEmbedderTest, InvalidArgumentHandling) { TEST(ImageEmbedderTest, InvalidArgumentHandling) {
// It is an error to set neither the asset buffer nor the path. // It is an error to set neither the asset buffer nor the path.
ImageEmbedderOptions options = { ImageEmbedderOptions options = {
@ -111,7 +275,7 @@ TEST(ImageEmbedderTest, FailedEmbeddingHandling) {
ImageEmbedderResult result; ImageEmbedderResult result;
char* error_msg; char* error_msg;
image_embedder_embed_image(embedder, &mp_image, &result, &error_msg); image_embedder_embed_image(embedder, &mp_image, &result, &error_msg);
EXPECT_THAT(error_msg, HasSubstr("gpu buffer not supported yet")); EXPECT_THAT(error_msg, HasSubstr("GPU Buffer not supported yet."));
free(error_msg); free(error_msg);
image_embedder_close(embedder); image_embedder_close(embedder);
} }