Resolved issues and added a common header to hold all the necessary structures for the vision tasks
This commit is contained in:
parent
197358dfee
commit
c442d6117e
|
@ -66,7 +66,7 @@ void CppConvertToEmbeddingResult(
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void ConvertToCppEmbedding(
|
void CppConvertToCppEmbedding(
|
||||||
const Embedding& in, // C struct as input
|
const Embedding& in, // C struct as input
|
||||||
mediapipe::tasks::components::containers::Embedding* out) {
|
mediapipe::tasks::components::containers::Embedding* out) {
|
||||||
// Handle float embeddings
|
// Handle float embeddings
|
||||||
|
@ -85,7 +85,7 @@ void ConvertToCppEmbedding(
|
||||||
|
|
||||||
// Copy head_name if it is present.
|
// Copy head_name if it is present.
|
||||||
if (in.head_name) {
|
if (in.head_name) {
|
||||||
out->head_name = std::make_optional(std::string(in.head_name));
|
out->head_name = std::string(in.head_name);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -29,7 +29,7 @@ void CppConvertToEmbeddingResult(
|
||||||
const mediapipe::tasks::components::containers::EmbeddingResult& in,
|
const mediapipe::tasks::components::containers::EmbeddingResult& in,
|
||||||
EmbeddingResult* out);
|
EmbeddingResult* out);
|
||||||
|
|
||||||
void ConvertToCppEmbedding(
|
void CppConvertToCppEmbedding(
|
||||||
const Embedding& in,
|
const Embedding& in,
|
||||||
mediapipe::tasks::components::containers::Embedding* out);
|
mediapipe::tasks::components::containers::Embedding* out);
|
||||||
|
|
||||||
|
|
|
@ -29,9 +29,8 @@ namespace mediapipe::tasks::c::text::text_embedder {
|
||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
|
|
||||||
|
|
||||||
using ::mediapipe::tasks::c::components::containers::ConvertToCppEmbedding;
|
|
||||||
using ::mediapipe::tasks::c::components::containers::CppCloseEmbeddingResult;
|
using ::mediapipe::tasks::c::components::containers::CppCloseEmbeddingResult;
|
||||||
|
using ::mediapipe::tasks::c::components::containers::CppConvertToCppEmbedding;
|
||||||
using ::mediapipe::tasks::c::components::containers::
|
using ::mediapipe::tasks::c::components::containers::
|
||||||
CppConvertToEmbeddingResult;
|
CppConvertToEmbeddingResult;
|
||||||
using ::mediapipe::tasks::c::components::processors::
|
using ::mediapipe::tasks::c::components::processors::
|
||||||
|
@ -97,9 +96,9 @@ int CppTextEmbedderClose(void* embedder, char** error_msg) {
|
||||||
int CppTextEmbedderCosineSimilarity(const Embedding& u, const Embedding& v,
|
int CppTextEmbedderCosineSimilarity(const Embedding& u, const Embedding& v,
|
||||||
double* similarity, char** error_msg) {
|
double* similarity, char** error_msg) {
|
||||||
CppEmbedding cpp_u;
|
CppEmbedding cpp_u;
|
||||||
ConvertToCppEmbedding(u, &cpp_u);
|
CppConvertToCppEmbedding(u, &cpp_u);
|
||||||
CppEmbedding cpp_v;
|
CppEmbedding cpp_v;
|
||||||
ConvertToCppEmbedding(v, &cpp_v);
|
CppConvertToCppEmbedding(v, &cpp_v);
|
||||||
auto status_or_similarity =
|
auto status_or_similarity =
|
||||||
mediapipe::tasks::text::text_embedder::TextEmbedder::CosineSimilarity(
|
mediapipe::tasks::text::text_embedder::TextEmbedder::CosineSimilarity(
|
||||||
cpp_u, cpp_v);
|
cpp_u, cpp_v);
|
||||||
|
@ -137,8 +136,8 @@ int text_embedder_close(void* embedder, char** error_ms) {
|
||||||
embedder, error_ms);
|
embedder, error_ms);
|
||||||
}
|
}
|
||||||
|
|
||||||
int cosine_similarity(const Embedding& u, const Embedding& v,
|
int text_embedder_cosine_similarity(const Embedding& u, const Embedding& v,
|
||||||
double* similarity, char** error_msg) {
|
double* similarity, char** error_msg) {
|
||||||
return mediapipe::tasks::c::text::text_embedder::
|
return mediapipe::tasks::c::text::text_embedder::
|
||||||
CppTextEmbedderCosineSimilarity(u, v, similarity, error_msg);
|
CppTextEmbedderCosineSimilarity(u, v, similarity, error_msg);
|
||||||
}
|
}
|
||||||
|
|
|
@ -72,8 +72,10 @@ MP_EXPORT int text_embedder_close(void* embedder, char** error_msg);
|
||||||
// 0.
|
// 0.
|
||||||
//
|
//
|
||||||
// [1]: https://en.wikipedia.org/wiki/Cosine_similarity
|
// [1]: https://en.wikipedia.org/wiki/Cosine_similarity
|
||||||
MP_EXPORT int cosine_similarity(const Embedding& u, const Embedding& v,
|
MP_EXPORT int text_embedder_cosine_similarity(const Embedding& u,
|
||||||
double* similarity, char** error_msg = nullptr);
|
const Embedding& v,
|
||||||
|
double* similarity,
|
||||||
|
char** error_msg);
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
} // extern C
|
} // extern C
|
||||||
|
|
|
@ -33,10 +33,10 @@ constexpr char kTestDataDirectory[] = "/mediapipe/tasks/testdata/text/";
|
||||||
constexpr char kTestBertModelPath[] =
|
constexpr char kTestBertModelPath[] =
|
||||||
"mobilebert_embedding_with_metadata.tflite";
|
"mobilebert_embedding_with_metadata.tflite";
|
||||||
constexpr char kTestString0[] =
|
constexpr char kTestString0[] =
|
||||||
"When you go to this restaurant, they hold the pancake upside-down "
|
"When you go to this restaurant, they hold the pancake upside-down "
|
||||||
"before they hand it to you. It's a great gimmick.";
|
"before they hand it to you. It's a great gimmick.";
|
||||||
constexpr char kTestString1[] =
|
constexpr char kTestString1[] =
|
||||||
"Let's make a plan to steal the declaration of independence.";
|
"Let's make a plan to steal the declaration of independence.";
|
||||||
constexpr float kPrecision = 1e-3;
|
constexpr float kPrecision = 1e-3;
|
||||||
|
|
||||||
std::string GetFullPath(absl::string_view file_name) {
|
std::string GetFullPath(absl::string_view file_name) {
|
||||||
|
@ -81,14 +81,16 @@ TEST(TextEmbedderTest, SucceedsWithCosineSimilarity) {
|
||||||
|
|
||||||
// Extract both embeddings.
|
// Extract both embeddings.
|
||||||
TextEmbedderResult result0;
|
TextEmbedderResult result0;
|
||||||
text_embedder_embed(embedder, kTestString0, &result0, /* error_msg */ nullptr);
|
text_embedder_embed(embedder, kTestString0, &result0,
|
||||||
|
/* error_msg */ nullptr);
|
||||||
TextEmbedderResult result1;
|
TextEmbedderResult result1;
|
||||||
text_embedder_embed(embedder, kTestString1, &result1, /* error_msg */ nullptr);
|
text_embedder_embed(embedder, kTestString1, &result1,
|
||||||
|
/* error_msg */ nullptr);
|
||||||
|
|
||||||
// Check cosine similarity.
|
// Check cosine similarity.
|
||||||
double similarity;
|
double similarity;
|
||||||
cosine_similarity(result0.embeddings[0], result1.embeddings[0],
|
text_embedder_cosine_similarity(result0.embeddings[0], result1.embeddings[0],
|
||||||
&similarity);
|
&similarity, nullptr);
|
||||||
double expected_similarity = 0.98077;
|
double expected_similarity = 0.98077;
|
||||||
EXPECT_LE(abs(similarity - expected_similarity), kPrecision);
|
EXPECT_LE(abs(similarity - expected_similarity), kPrecision);
|
||||||
text_embedder_close(embedder, /* error_msg */ nullptr);
|
text_embedder_close(embedder, /* error_msg */ nullptr);
|
||||||
|
|
22
mediapipe/tasks/c/vision/core/BUILD
Normal file
22
mediapipe/tasks/c/vision/core/BUILD
Normal file
|
@ -0,0 +1,22 @@
|
||||||
|
# Copyright 2023 The MediaPipe Authors.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
|
package(default_visibility = ["//mediapipe/tasks:internal"])
|
||||||
|
|
||||||
|
licenses(["notice"])
|
||||||
|
|
||||||
|
cc_library(
|
||||||
|
name = "common",
|
||||||
|
hdrs = ["common.h"],
|
||||||
|
)
|
69
mediapipe/tasks/c/vision/core/common.h
Normal file
69
mediapipe/tasks/c/vision/core/common.h
Normal file
|
@ -0,0 +1,69 @@
|
||||||
|
/* Copyright 2023 The MediaPipe Authors.
|
||||||
|
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
==============================================================================*/
|
||||||
|
|
||||||
|
|
||||||
|
#ifndef MEDIAPIPE_TASKS_C_VISION_CORE_COMMON_H_
|
||||||
|
#define MEDIAPIPE_TASKS_C_VISION_CORE_COMMON_H_
|
||||||
|
|
||||||
|
#include <cstdint>
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// Supported image formats.
|
||||||
|
enum ImageFormat {
|
||||||
|
UNKNOWN = 0,
|
||||||
|
SRGB = 1,
|
||||||
|
SRGBA = 2,
|
||||||
|
GRAY8 = 3,
|
||||||
|
SBGRA = 11 // compatible with Flutter `bgra8888` format.
|
||||||
|
};
|
||||||
|
|
||||||
|
// Supported processing modes.
|
||||||
|
enum RunningMode {
|
||||||
|
IMAGE = 1,
|
||||||
|
VIDEO = 2,
|
||||||
|
LIVE_STREAM = 3,
|
||||||
|
};
|
||||||
|
|
||||||
|
// Structure to hold image frame.
|
||||||
|
struct ImageFrame {
|
||||||
|
enum ImageFormat format;
|
||||||
|
const uint8_t* image_buffer;
|
||||||
|
int width;
|
||||||
|
int height;
|
||||||
|
};
|
||||||
|
|
||||||
|
// TODO: Add GPU buffer declaration and processing logic for it.
|
||||||
|
struct GpuBuffer {
|
||||||
|
int width;
|
||||||
|
int height;
|
||||||
|
};
|
||||||
|
|
||||||
|
// The object to contain an image, realizes `OneOf` concept.
|
||||||
|
struct MpImage {
|
||||||
|
enum { IMAGE_FRAME, GPU_BUFFER } type;
|
||||||
|
union {
|
||||||
|
struct ImageFrame image_frame;
|
||||||
|
struct GpuBuffer gpu_buffer;
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
} // extern C
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#endif // MEDIAPIPE_TASKS_C_VISION_CORE_COMMON_H_
|
|
@ -30,6 +30,7 @@ cc_library(
|
||||||
"//mediapipe/tasks/c/components/processors:classifier_options_converter",
|
"//mediapipe/tasks/c/components/processors:classifier_options_converter",
|
||||||
"//mediapipe/tasks/c/core:base_options",
|
"//mediapipe/tasks/c/core:base_options",
|
||||||
"//mediapipe/tasks/c/core:base_options_converter",
|
"//mediapipe/tasks/c/core:base_options_converter",
|
||||||
|
"//mediapipe/tasks/c/vision/core:common",
|
||||||
"//mediapipe/tasks/cc/vision/core:running_mode",
|
"//mediapipe/tasks/cc/vision/core:running_mode",
|
||||||
"//mediapipe/tasks/cc/vision/image_classifier",
|
"//mediapipe/tasks/cc/vision/image_classifier",
|
||||||
"//mediapipe/tasks/cc/vision/utils:image_utils",
|
"//mediapipe/tasks/cc/vision/utils:image_utils",
|
||||||
|
|
|
@ -16,11 +16,10 @@ limitations under the License.
|
||||||
#ifndef MEDIAPIPE_TASKS_C_VISION_IMAGE_CLASSIFIER_IMAGE_CLASSIFIER_H_
|
#ifndef MEDIAPIPE_TASKS_C_VISION_IMAGE_CLASSIFIER_IMAGE_CLASSIFIER_H_
|
||||||
#define MEDIAPIPE_TASKS_C_VISION_IMAGE_CLASSIFIER_IMAGE_CLASSIFIER_H_
|
#define MEDIAPIPE_TASKS_C_VISION_IMAGE_CLASSIFIER_IMAGE_CLASSIFIER_H_
|
||||||
|
|
||||||
#include <cstdint>
|
|
||||||
|
|
||||||
#include "mediapipe/tasks/c/components/containers/classification_result.h"
|
#include "mediapipe/tasks/c/components/containers/classification_result.h"
|
||||||
#include "mediapipe/tasks/c/components/processors/classifier_options.h"
|
#include "mediapipe/tasks/c/components/processors/classifier_options.h"
|
||||||
#include "mediapipe/tasks/c/core/base_options.h"
|
#include "mediapipe/tasks/c/core/base_options.h"
|
||||||
|
#include "mediapipe/tasks/c/vision/core/common.h"
|
||||||
|
|
||||||
#ifndef MP_EXPORT
|
#ifndef MP_EXPORT
|
||||||
#define MP_EXPORT __attribute__((visibility("default")))
|
#define MP_EXPORT __attribute__((visibility("default")))
|
||||||
|
@ -32,46 +31,7 @@ extern "C" {
|
||||||
|
|
||||||
typedef ClassificationResult ImageClassifierResult;
|
typedef ClassificationResult ImageClassifierResult;
|
||||||
|
|
||||||
// Supported image formats.
|
// The options for configuring a MediaPipe image classifier task.
|
||||||
enum ImageFormat {
|
|
||||||
UNKNOWN = 0,
|
|
||||||
SRGB = 1,
|
|
||||||
SRGBA = 2,
|
|
||||||
GRAY8 = 3,
|
|
||||||
SBGRA = 11 // compatible with Flutter `bgra8888` format.
|
|
||||||
};
|
|
||||||
|
|
||||||
// Supported processing modes.
|
|
||||||
enum RunningMode {
|
|
||||||
IMAGE = 1,
|
|
||||||
VIDEO = 2,
|
|
||||||
LIVE_STREAM = 3,
|
|
||||||
};
|
|
||||||
|
|
||||||
// Structure to hold image frame.
|
|
||||||
struct ImageFrame {
|
|
||||||
enum ImageFormat format;
|
|
||||||
const uint8_t* image_buffer;
|
|
||||||
int width;
|
|
||||||
int height;
|
|
||||||
};
|
|
||||||
|
|
||||||
// TODO: Add GPU buffer declaration and proccessing logic for it.
|
|
||||||
struct GpuBuffer {
|
|
||||||
int width;
|
|
||||||
int height;
|
|
||||||
};
|
|
||||||
|
|
||||||
// The object to contain an image, realizes `OneOf` concept.
|
|
||||||
struct MpImage {
|
|
||||||
enum { IMAGE_FRAME, GPU_BUFFER } type;
|
|
||||||
union {
|
|
||||||
struct ImageFrame image_frame;
|
|
||||||
struct GpuBuffer gpu_buffer;
|
|
||||||
};
|
|
||||||
};
|
|
||||||
|
|
||||||
// The options for configuring a Mediapipe image classifier task.
|
|
||||||
struct ImageClassifierOptions {
|
struct ImageClassifierOptions {
|
||||||
// Base options for configuring MediaPipe Tasks, such as specifying the model
|
// Base options for configuring MediaPipe Tasks, such as specifying the model
|
||||||
// file with metadata, accelerator options, op resolver, etc.
|
// file with metadata, accelerator options, op resolver, etc.
|
||||||
|
@ -122,12 +82,39 @@ MP_EXPORT int image_classifier_classify_image(void* classifier,
|
||||||
ImageClassifierResult* result,
|
ImageClassifierResult* result,
|
||||||
char** error_msg);
|
char** error_msg);
|
||||||
|
|
||||||
|
// Performs image classification on the provided video frame.
|
||||||
|
// Only use this method when the ImageClassifier is created with the video
|
||||||
|
// running mode.
|
||||||
|
// The image can be of any size with format RGB or RGBA. It's required to
|
||||||
|
// provide the video frame's timestamp (in milliseconds). The input timestamps
|
||||||
|
// must be monotonically increasing.
|
||||||
|
// If an error occurs, returns an error code and sets the error parameter to an
|
||||||
|
// an error message (if `error_msg` is not `nullptr`). You must free the memory
|
||||||
|
// allocated for the error message.
|
||||||
MP_EXPORT int image_classifier_classify_for_video(void* classifier,
|
MP_EXPORT int image_classifier_classify_for_video(void* classifier,
|
||||||
const MpImage* image,
|
const MpImage* image,
|
||||||
int64_t timestamp_ms,
|
int64_t timestamp_ms,
|
||||||
ImageClassifierResult* result,
|
ImageClassifierResult* result,
|
||||||
char** error_msg);
|
char** error_msg);
|
||||||
|
|
||||||
|
// Sends live image data to image classification, and the results will be
|
||||||
|
// available via the "result_callback" provided in the ImageClassifierOptions.
|
||||||
|
// Only use this method when the ImageClassifier is created with the live
|
||||||
|
// stream running mode.
|
||||||
|
// The image can be of any size with format RGB or RGBA. It's required to
|
||||||
|
// provide a timestamp (in milliseconds) to indicate when the input image is
|
||||||
|
// sent to the object detector. The input timestamps must be monotonically
|
||||||
|
// increasing.
|
||||||
|
// The "result_callback" provides:
|
||||||
|
// - The classification results as an ImageClassifierResult object.
|
||||||
|
// - The const reference to the corresponding input image that the image
|
||||||
|
// classifier runs on. Note that the const reference to the image will no
|
||||||
|
// longer be valid when the callback returns. To access the image data
|
||||||
|
// outside of the callback, callers need to make a copy of the image.
|
||||||
|
// - The input timestamp in milliseconds.
|
||||||
|
// If an error occurs, returns an error code and sets the error parameter to an
|
||||||
|
// an error message (if `error_msg` is not `nullptr`). You must free the memory
|
||||||
|
// allocated for the error message.
|
||||||
MP_EXPORT int image_classifier_classify_async(void* classifier,
|
MP_EXPORT int image_classifier_classify_async(void* classifier,
|
||||||
const MpImage* image,
|
const MpImage* image,
|
||||||
int64_t timestamp_ms,
|
int64_t timestamp_ms,
|
||||||
|
|
|
@ -30,6 +30,8 @@ cc_library(
|
||||||
"//mediapipe/tasks/c/components/processors:embedder_options_converter",
|
"//mediapipe/tasks/c/components/processors:embedder_options_converter",
|
||||||
"//mediapipe/tasks/c/core:base_options",
|
"//mediapipe/tasks/c/core:base_options",
|
||||||
"//mediapipe/tasks/c/core:base_options_converter",
|
"//mediapipe/tasks/c/core:base_options_converter",
|
||||||
|
"//mediapipe/tasks/c/vision/core:common",
|
||||||
|
"//mediapipe/tasks/cc/vision/core:running_mode",
|
||||||
"//mediapipe/tasks/cc/vision/image_embedder",
|
"//mediapipe/tasks/cc/vision/image_embedder",
|
||||||
"//mediapipe/tasks/cc/vision/utils:image_utils",
|
"//mediapipe/tasks/cc/vision/utils:image_utils",
|
||||||
"@com_google_absl//absl/log:absl_log",
|
"@com_google_absl//absl/log:absl_log",
|
||||||
|
|
|
@ -36,8 +36,8 @@ namespace mediapipe::tasks::c::vision::image_embedder {
|
||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
|
|
||||||
using ::mediapipe::tasks::c::components::containers::ConvertToCppEmbedding;
|
|
||||||
using ::mediapipe::tasks::c::components::containers::CppCloseEmbeddingResult;
|
using ::mediapipe::tasks::c::components::containers::CppCloseEmbeddingResult;
|
||||||
|
using ::mediapipe::tasks::c::components::containers::CppConvertToCppEmbedding;
|
||||||
using ::mediapipe::tasks::c::components::containers::
|
using ::mediapipe::tasks::c::components::containers::
|
||||||
CppConvertToEmbeddingResult;
|
CppConvertToEmbeddingResult;
|
||||||
using ::mediapipe::tasks::c::components::processors::
|
using ::mediapipe::tasks::c::components::processors::
|
||||||
|
@ -235,9 +235,9 @@ int CppImageEmbedderClose(void* embedder, char** error_msg) {
|
||||||
int CppImageEmbedderCosineSimilarity(const Embedding& u, const Embedding& v,
|
int CppImageEmbedderCosineSimilarity(const Embedding& u, const Embedding& v,
|
||||||
double* similarity, char** error_msg) {
|
double* similarity, char** error_msg) {
|
||||||
CppEmbedding cpp_u;
|
CppEmbedding cpp_u;
|
||||||
ConvertToCppEmbedding(u, &cpp_u);
|
CppConvertToCppEmbedding(u, &cpp_u);
|
||||||
CppEmbedding cpp_v;
|
CppEmbedding cpp_v;
|
||||||
ConvertToCppEmbedding(v, &cpp_v);
|
CppConvertToCppEmbedding(v, &cpp_v);
|
||||||
auto status_or_similarity =
|
auto status_or_similarity =
|
||||||
mediapipe::tasks::vision::image_embedder::ImageEmbedder::CosineSimilarity(
|
mediapipe::tasks::vision::image_embedder::ImageEmbedder::CosineSimilarity(
|
||||||
cpp_u, cpp_v);
|
cpp_u, cpp_v);
|
||||||
|
@ -291,8 +291,8 @@ int image_embedder_close(void* embedder, char** error_msg) {
|
||||||
embedder, error_msg);
|
embedder, error_msg);
|
||||||
}
|
}
|
||||||
|
|
||||||
int cosine_similarity(const Embedding& u, const Embedding& v,
|
int image_embedder_cosine_similarity(const Embedding& u, const Embedding& v,
|
||||||
double* similarity, char** error_msg) {
|
double* similarity, char** error_msg) {
|
||||||
return mediapipe::tasks::c::vision::image_embedder::
|
return mediapipe::tasks::c::vision::image_embedder::
|
||||||
CppImageEmbedderCosineSimilarity(u, v, similarity, error_msg);
|
CppImageEmbedderCosineSimilarity(u, v, similarity, error_msg);
|
||||||
}
|
}
|
||||||
|
|
|
@ -21,6 +21,7 @@ limitations under the License.
|
||||||
#include "mediapipe/tasks/c/components/containers/embedding_result.h"
|
#include "mediapipe/tasks/c/components/containers/embedding_result.h"
|
||||||
#include "mediapipe/tasks/c/components/processors/embedder_options.h"
|
#include "mediapipe/tasks/c/components/processors/embedder_options.h"
|
||||||
#include "mediapipe/tasks/c/core/base_options.h"
|
#include "mediapipe/tasks/c/core/base_options.h"
|
||||||
|
#include "mediapipe/tasks/c/vision/core/common.h"
|
||||||
|
|
||||||
#ifndef MP_EXPORT
|
#ifndef MP_EXPORT
|
||||||
#define MP_EXPORT __attribute__((visibility("default")))
|
#define MP_EXPORT __attribute__((visibility("default")))
|
||||||
|
@ -32,45 +33,6 @@ extern "C" {
|
||||||
|
|
||||||
typedef EmbeddingResult ImageEmbedderResult;
|
typedef EmbeddingResult ImageEmbedderResult;
|
||||||
|
|
||||||
// Supported image formats.
|
|
||||||
enum ImageFormat {
|
|
||||||
UNKNOWN = 0,
|
|
||||||
SRGB = 1,
|
|
||||||
SRGBA = 2,
|
|
||||||
GRAY8 = 3,
|
|
||||||
SBGRA = 11 // compatible with Flutter `bgra8888` format.
|
|
||||||
};
|
|
||||||
|
|
||||||
// Supported processing modes.
|
|
||||||
enum RunningMode {
|
|
||||||
IMAGE = 1,
|
|
||||||
VIDEO = 2,
|
|
||||||
LIVE_STREAM = 3,
|
|
||||||
};
|
|
||||||
|
|
||||||
// Structure to hold image frame.
|
|
||||||
struct ImageFrame {
|
|
||||||
enum ImageFormat format;
|
|
||||||
const uint8_t* image_buffer;
|
|
||||||
int width;
|
|
||||||
int height;
|
|
||||||
};
|
|
||||||
|
|
||||||
// TODO: Add GPU buffer declaration and proccessing logic for it.
|
|
||||||
struct GpuBuffer {
|
|
||||||
int width;
|
|
||||||
int height;
|
|
||||||
};
|
|
||||||
|
|
||||||
// The object to contain an image, realizes `OneOf` concept.
|
|
||||||
struct MpImage {
|
|
||||||
enum { IMAGE_FRAME, GPU_BUFFER } type;
|
|
||||||
union {
|
|
||||||
struct ImageFrame image_frame;
|
|
||||||
struct GpuBuffer gpu_buffer;
|
|
||||||
};
|
|
||||||
};
|
|
||||||
|
|
||||||
// The options for configuring a MediaPipe image embedder task.
|
// The options for configuring a MediaPipe image embedder task.
|
||||||
struct ImageEmbedderOptions {
|
struct ImageEmbedderOptions {
|
||||||
// Base options for configuring MediaPipe Tasks, such as specifying the model
|
// Base options for configuring MediaPipe Tasks, such as specifying the model
|
||||||
|
@ -121,12 +83,40 @@ MP_EXPORT int image_embedder_embed_image(void* embedder, const MpImage* image,
|
||||||
ImageEmbedderResult* result,
|
ImageEmbedderResult* result,
|
||||||
char** error_msg);
|
char** error_msg);
|
||||||
|
|
||||||
|
// Performs embedding extraction on the provided video frame.
|
||||||
|
// Only use this method when the ImageEmbedder is created with the video
|
||||||
|
// running mode.
|
||||||
|
// The image can be of any size with format RGB or RGBA. It's required to
|
||||||
|
// provide the video frame's timestamp (in milliseconds). The input timestamps
|
||||||
|
// must be monotonically increasing.
|
||||||
|
// If an error occurs, returns an error code and sets the error parameter to an
|
||||||
|
// an error message (if `error_msg` is not `nullptr`). You must free the memory
|
||||||
|
// allocated for the error message.
|
||||||
MP_EXPORT int image_embedder_embed_for_video(void* embedder,
|
MP_EXPORT int image_embedder_embed_for_video(void* embedder,
|
||||||
const MpImage* image,
|
const MpImage* image,
|
||||||
int64_t timestamp_ms,
|
int64_t timestamp_ms,
|
||||||
ImageEmbedderResult* result,
|
ImageEmbedderResult* result,
|
||||||
char** error_msg);
|
char** error_msg);
|
||||||
|
|
||||||
|
// Sends live image data to embedder, and the results will be available via
|
||||||
|
// the "result_callback" provided in the ImageEmbedderOptions.
|
||||||
|
// Only use this method when the ImageEmbedder is created with the live
|
||||||
|
// stream running mode.
|
||||||
|
// The image can be of any size with format RGB or RGBA. It's required to
|
||||||
|
// provide a timestamp (in milliseconds) to indicate when the input image is
|
||||||
|
// sent to the object detector. The input timestamps must be monotonically
|
||||||
|
// increasing.
|
||||||
|
// The "result_callback" provides
|
||||||
|
// - The embedding results as a
|
||||||
|
// components::containers::proto::EmbeddingResult object.
|
||||||
|
// - The const reference to the corresponding input image that the image
|
||||||
|
// embedder runs on. Note that the const reference to the image will no
|
||||||
|
// longer be valid when the callback returns. To access the image data
|
||||||
|
// outside of the callback, callers need to make a copy of the image.
|
||||||
|
// - The input timestamp in milliseconds.
|
||||||
|
// If an error occurs, returns an error code and sets the error parameter to an
|
||||||
|
// an error message (if `error_msg` is not `nullptr`). You must free the memory
|
||||||
|
// allocated for the error message.
|
||||||
MP_EXPORT int image_embedder_embed_async(void* embedder, const MpImage* image,
|
MP_EXPORT int image_embedder_embed_async(void* embedder, const MpImage* image,
|
||||||
int64_t timestamp_ms,
|
int64_t timestamp_ms,
|
||||||
char** error_msg);
|
char** error_msg);
|
||||||
|
@ -147,8 +137,10 @@ MP_EXPORT int image_embedder_close(void* embedder, char** error_msg);
|
||||||
// 0.
|
// 0.
|
||||||
//
|
//
|
||||||
// [1]: https://en.wikipedia.org/wiki/Cosine_similarity
|
// [1]: https://en.wikipedia.org/wiki/Cosine_similarity
|
||||||
MP_EXPORT int cosine_similarity(const Embedding& u, const Embedding& v,
|
MP_EXPORT int image_embedder_cosine_similarity(const Embedding& u,
|
||||||
double* similarity, char** error_msg);
|
const Embedding& v,
|
||||||
|
double* similarity,
|
||||||
|
char** error_msg);
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
} // extern C
|
} // extern C
|
||||||
|
|
|
@ -143,8 +143,9 @@ TEST(ImageEmbedderTest, SucceedsWithCosineSimilarity) {
|
||||||
CheckMobileNetV3Result(crop_result, false);
|
CheckMobileNetV3Result(crop_result, false);
|
||||||
// Check cosine similarity.
|
// Check cosine similarity.
|
||||||
double similarity;
|
double similarity;
|
||||||
cosine_similarity(image_result.embeddings[0], crop_result.embeddings[0],
|
image_embedder_cosine_similarity(image_result.embeddings[0],
|
||||||
&similarity, /* error_msg */ nullptr);
|
crop_result.embeddings[0], &similarity,
|
||||||
|
/* error_msg */ nullptr);
|
||||||
double expected_similarity = 0.925519;
|
double expected_similarity = 0.925519;
|
||||||
EXPECT_LE(abs(similarity - expected_similarity), kPrecision);
|
EXPECT_LE(abs(similarity - expected_similarity), kPrecision);
|
||||||
image_embedder_close_result(&image_result);
|
image_embedder_close_result(&image_result);
|
||||||
|
|
Loading…
Reference in New Issue
Block a user