Add support for rotation in ImageEmbedder & ImageSegmenter C++ APIs
PiperOrigin-RevId: 483416498
This commit is contained in:
parent
0fd69e8d83
commit
2f2baeff68
|
@ -58,6 +58,7 @@ cc_library(
|
||||||
"//mediapipe/tasks/cc/core:utils",
|
"//mediapipe/tasks/cc/core:utils",
|
||||||
"//mediapipe/tasks/cc/core/proto:base_options_cc_proto",
|
"//mediapipe/tasks/cc/core/proto:base_options_cc_proto",
|
||||||
"//mediapipe/tasks/cc/vision/core:base_vision_task_api",
|
"//mediapipe/tasks/cc/vision/core:base_vision_task_api",
|
||||||
|
"//mediapipe/tasks/cc/vision/core:image_processing_options",
|
||||||
"//mediapipe/tasks/cc/vision/core:running_mode",
|
"//mediapipe/tasks/cc/vision/core:running_mode",
|
||||||
"//mediapipe/tasks/cc/vision/core:vision_task_api_factory",
|
"//mediapipe/tasks/cc/vision/core:vision_task_api_factory",
|
||||||
"//mediapipe/tasks/cc/vision/image_embedder/proto:image_embedder_graph_options_cc_proto",
|
"//mediapipe/tasks/cc/vision/image_embedder/proto:image_embedder_graph_options_cc_proto",
|
||||||
|
|
|
@ -29,6 +29,7 @@ limitations under the License.
|
||||||
#include "mediapipe/tasks/cc/core/proto/base_options.pb.h"
|
#include "mediapipe/tasks/cc/core/proto/base_options.pb.h"
|
||||||
#include "mediapipe/tasks/cc/core/task_runner.h"
|
#include "mediapipe/tasks/cc/core/task_runner.h"
|
||||||
#include "mediapipe/tasks/cc/core/utils.h"
|
#include "mediapipe/tasks/cc/core/utils.h"
|
||||||
|
#include "mediapipe/tasks/cc/vision/core/image_processing_options.h"
|
||||||
#include "mediapipe/tasks/cc/vision/core/running_mode.h"
|
#include "mediapipe/tasks/cc/vision/core/running_mode.h"
|
||||||
#include "mediapipe/tasks/cc/vision/core/vision_task_api_factory.h"
|
#include "mediapipe/tasks/cc/vision/core/vision_task_api_factory.h"
|
||||||
#include "mediapipe/tasks/cc/vision/image_embedder/proto/image_embedder_graph_options.pb.h"
|
#include "mediapipe/tasks/cc/vision/image_embedder/proto/image_embedder_graph_options.pb.h"
|
||||||
|
@ -58,16 +59,6 @@ using ::mediapipe::tasks::core::PacketMap;
|
||||||
using ::mediapipe::tasks::vision::image_embedder::proto::
|
using ::mediapipe::tasks::vision::image_embedder::proto::
|
||||||
ImageEmbedderGraphOptions;
|
ImageEmbedderGraphOptions;
|
||||||
|
|
||||||
// Builds a NormalizedRect covering the entire image.
|
|
||||||
NormalizedRect BuildFullImageNormRect() {
|
|
||||||
NormalizedRect norm_rect;
|
|
||||||
norm_rect.set_x_center(0.5);
|
|
||||||
norm_rect.set_y_center(0.5);
|
|
||||||
norm_rect.set_width(1);
|
|
||||||
norm_rect.set_height(1);
|
|
||||||
return norm_rect;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Creates a MediaPipe graph config that contains a single node of type
|
// Creates a MediaPipe graph config that contains a single node of type
|
||||||
// "mediapipe.tasks.vision.image_embedder.ImageEmbedderGraph". If the task is
|
// "mediapipe.tasks.vision.image_embedder.ImageEmbedderGraph". If the task is
|
||||||
// running in the live stream mode, a "FlowLimiterCalculator" will be added to
|
// running in the live stream mode, a "FlowLimiterCalculator" will be added to
|
||||||
|
@ -148,15 +139,16 @@ absl::StatusOr<std::unique_ptr<ImageEmbedder>> ImageEmbedder::Create(
|
||||||
}
|
}
|
||||||
|
|
||||||
absl::StatusOr<EmbeddingResult> ImageEmbedder::Embed(
|
absl::StatusOr<EmbeddingResult> ImageEmbedder::Embed(
|
||||||
Image image, std::optional<NormalizedRect> roi) {
|
Image image,
|
||||||
|
std::optional<core::ImageProcessingOptions> image_processing_options) {
|
||||||
if (image.UsesGpu()) {
|
if (image.UsesGpu()) {
|
||||||
return CreateStatusWithPayload(
|
return CreateStatusWithPayload(
|
||||||
absl::StatusCode::kInvalidArgument,
|
absl::StatusCode::kInvalidArgument,
|
||||||
"GPU input images are currently not supported.",
|
"GPU input images are currently not supported.",
|
||||||
MediaPipeTasksStatus::kRunnerUnexpectedInputError);
|
MediaPipeTasksStatus::kRunnerUnexpectedInputError);
|
||||||
}
|
}
|
||||||
NormalizedRect norm_rect =
|
ASSIGN_OR_RETURN(NormalizedRect norm_rect,
|
||||||
roi.has_value() ? roi.value() : BuildFullImageNormRect();
|
ConvertToNormalizedRect(image_processing_options));
|
||||||
ASSIGN_OR_RETURN(
|
ASSIGN_OR_RETURN(
|
||||||
auto output_packets,
|
auto output_packets,
|
||||||
ProcessImageData(
|
ProcessImageData(
|
||||||
|
@ -167,15 +159,16 @@ absl::StatusOr<EmbeddingResult> ImageEmbedder::Embed(
|
||||||
}
|
}
|
||||||
|
|
||||||
absl::StatusOr<EmbeddingResult> ImageEmbedder::EmbedForVideo(
|
absl::StatusOr<EmbeddingResult> ImageEmbedder::EmbedForVideo(
|
||||||
Image image, int64 timestamp_ms, std::optional<NormalizedRect> roi) {
|
Image image, int64 timestamp_ms,
|
||||||
|
std::optional<core::ImageProcessingOptions> image_processing_options) {
|
||||||
if (image.UsesGpu()) {
|
if (image.UsesGpu()) {
|
||||||
return CreateStatusWithPayload(
|
return CreateStatusWithPayload(
|
||||||
absl::StatusCode::kInvalidArgument,
|
absl::StatusCode::kInvalidArgument,
|
||||||
"GPU input images are currently not supported.",
|
"GPU input images are currently not supported.",
|
||||||
MediaPipeTasksStatus::kRunnerUnexpectedInputError);
|
MediaPipeTasksStatus::kRunnerUnexpectedInputError);
|
||||||
}
|
}
|
||||||
NormalizedRect norm_rect =
|
ASSIGN_OR_RETURN(NormalizedRect norm_rect,
|
||||||
roi.has_value() ? roi.value() : BuildFullImageNormRect();
|
ConvertToNormalizedRect(image_processing_options));
|
||||||
ASSIGN_OR_RETURN(
|
ASSIGN_OR_RETURN(
|
||||||
auto output_packets,
|
auto output_packets,
|
||||||
ProcessVideoData(
|
ProcessVideoData(
|
||||||
|
@ -188,16 +181,17 @@ absl::StatusOr<EmbeddingResult> ImageEmbedder::EmbedForVideo(
|
||||||
return output_packets[kEmbeddingResultStreamName].Get<EmbeddingResult>();
|
return output_packets[kEmbeddingResultStreamName].Get<EmbeddingResult>();
|
||||||
}
|
}
|
||||||
|
|
||||||
absl::Status ImageEmbedder::EmbedAsync(Image image, int64 timestamp_ms,
|
absl::Status ImageEmbedder::EmbedAsync(
|
||||||
std::optional<NormalizedRect> roi) {
|
Image image, int64 timestamp_ms,
|
||||||
|
std::optional<core::ImageProcessingOptions> image_processing_options) {
|
||||||
if (image.UsesGpu()) {
|
if (image.UsesGpu()) {
|
||||||
return CreateStatusWithPayload(
|
return CreateStatusWithPayload(
|
||||||
absl::StatusCode::kInvalidArgument,
|
absl::StatusCode::kInvalidArgument,
|
||||||
"GPU input images are currently not supported.",
|
"GPU input images are currently not supported.",
|
||||||
MediaPipeTasksStatus::kRunnerUnexpectedInputError);
|
MediaPipeTasksStatus::kRunnerUnexpectedInputError);
|
||||||
}
|
}
|
||||||
NormalizedRect norm_rect =
|
ASSIGN_OR_RETURN(NormalizedRect norm_rect,
|
||||||
roi.has_value() ? roi.value() : BuildFullImageNormRect();
|
ConvertToNormalizedRect(image_processing_options));
|
||||||
return SendLiveStreamData(
|
return SendLiveStreamData(
|
||||||
{{kImageInStreamName,
|
{{kImageInStreamName,
|
||||||
MakePacket<Image>(std::move(image))
|
MakePacket<Image>(std::move(image))
|
||||||
|
|
|
@ -21,11 +21,11 @@ limitations under the License.
|
||||||
|
|
||||||
#include "absl/status/statusor.h"
|
#include "absl/status/statusor.h"
|
||||||
#include "mediapipe/framework/formats/image.h"
|
#include "mediapipe/framework/formats/image.h"
|
||||||
#include "mediapipe/framework/formats/rect.pb.h"
|
|
||||||
#include "mediapipe/tasks/cc/components/containers/proto/embeddings.pb.h"
|
#include "mediapipe/tasks/cc/components/containers/proto/embeddings.pb.h"
|
||||||
#include "mediapipe/tasks/cc/components/embedder_options.h"
|
#include "mediapipe/tasks/cc/components/embedder_options.h"
|
||||||
#include "mediapipe/tasks/cc/core/base_options.h"
|
#include "mediapipe/tasks/cc/core/base_options.h"
|
||||||
#include "mediapipe/tasks/cc/vision/core/base_vision_task_api.h"
|
#include "mediapipe/tasks/cc/vision/core/base_vision_task_api.h"
|
||||||
|
#include "mediapipe/tasks/cc/vision/core/image_processing_options.h"
|
||||||
#include "mediapipe/tasks/cc/vision/core/running_mode.h"
|
#include "mediapipe/tasks/cc/vision/core/running_mode.h"
|
||||||
|
|
||||||
namespace mediapipe {
|
namespace mediapipe {
|
||||||
|
@ -88,9 +88,17 @@ class ImageEmbedder : core::BaseVisionTaskApi {
|
||||||
static absl::StatusOr<std::unique_ptr<ImageEmbedder>> Create(
|
static absl::StatusOr<std::unique_ptr<ImageEmbedder>> Create(
|
||||||
std::unique_ptr<ImageEmbedderOptions> options);
|
std::unique_ptr<ImageEmbedderOptions> options);
|
||||||
|
|
||||||
// Performs embedding extraction on the provided single image. Extraction
|
// Performs embedding extraction on the provided single image.
|
||||||
// is performed on the region of interest specified by the `roi` argument if
|
//
|
||||||
// provided, or on the entire image otherwise.
|
// The optional 'image_processing_options' parameter can be used to specify:
|
||||||
|
// - the rotation to apply to the image before performing embedding
|
||||||
|
// extraction, by setting its 'rotation_degrees' field.
|
||||||
|
// and/or
|
||||||
|
// - the region-of-interest on which to perform embedding extraction, by
|
||||||
|
// setting its 'region_of_interest' field. If not specified, the full image
|
||||||
|
// is used.
|
||||||
|
// If both are specified, the crop around the region-of-interest is extracted
|
||||||
|
// first, then the specified rotation is applied to the crop.
|
||||||
//
|
//
|
||||||
// Only use this method when the ImageEmbedder is created with the image
|
// Only use this method when the ImageEmbedder is created with the image
|
||||||
// running mode.
|
// running mode.
|
||||||
|
@ -98,11 +106,20 @@ class ImageEmbedder : core::BaseVisionTaskApi {
|
||||||
// The image can be of any size with format RGB or RGBA.
|
// The image can be of any size with format RGB or RGBA.
|
||||||
absl::StatusOr<components::containers::proto::EmbeddingResult> Embed(
|
absl::StatusOr<components::containers::proto::EmbeddingResult> Embed(
|
||||||
mediapipe::Image image,
|
mediapipe::Image image,
|
||||||
std::optional<mediapipe::NormalizedRect> roi = std::nullopt);
|
std::optional<core::ImageProcessingOptions> image_processing_options =
|
||||||
|
std::nullopt);
|
||||||
|
|
||||||
// Performs embedding extraction on the provided video frame. Extraction
|
// Performs embedding extraction on the provided video frame.
|
||||||
// is performed on the region of interested specified by the `roi` argument if
|
//
|
||||||
// provided, or on the entire image otherwise.
|
// The optional 'image_processing_options' parameter can be used to specify:
|
||||||
|
// - the rotation to apply to the image before performing embedding
|
||||||
|
// extraction, by setting its 'rotation_degrees' field.
|
||||||
|
// and/or
|
||||||
|
// - the region-of-interest on which to perform embedding extraction, by
|
||||||
|
// setting its 'region_of_interest' field. If not specified, the full image
|
||||||
|
// is used.
|
||||||
|
// If both are specified, the crop around the region-of-interest is extracted
|
||||||
|
// first, then the specified rotation is applied to the crop.
|
||||||
//
|
//
|
||||||
// Only use this method when the ImageEmbedder is created with the video
|
// Only use this method when the ImageEmbedder is created with the video
|
||||||
// running mode.
|
// running mode.
|
||||||
|
@ -112,12 +129,21 @@ class ImageEmbedder : core::BaseVisionTaskApi {
|
||||||
// must be monotonically increasing.
|
// must be monotonically increasing.
|
||||||
absl::StatusOr<components::containers::proto::EmbeddingResult> EmbedForVideo(
|
absl::StatusOr<components::containers::proto::EmbeddingResult> EmbedForVideo(
|
||||||
mediapipe::Image image, int64 timestamp_ms,
|
mediapipe::Image image, int64 timestamp_ms,
|
||||||
std::optional<mediapipe::NormalizedRect> roi = std::nullopt);
|
std::optional<core::ImageProcessingOptions> image_processing_options =
|
||||||
|
std::nullopt);
|
||||||
|
|
||||||
// Sends live image data to embedder, and the results will be available via
|
// Sends live image data to embedder, and the results will be available via
|
||||||
// the "result_callback" provided in the ImageEmbedderOptions. Embedding
|
// the "result_callback" provided in the ImageEmbedderOptions.
|
||||||
// extraction is performed on the region of interested specified by the `roi`
|
//
|
||||||
// argument if provided, or on the entire image otherwise.
|
// The optional 'image_processing_options' parameter can be used to specify:
|
||||||
|
// - the rotation to apply to the image before performing embedding
|
||||||
|
// extraction, by setting its 'rotation_degrees' field.
|
||||||
|
// and/or
|
||||||
|
// - the region-of-interest on which to perform embedding extraction, by
|
||||||
|
// setting its 'region_of_interest' field. If not specified, the full image
|
||||||
|
// is used.
|
||||||
|
// If both are specified, the crop around the region-of-interest is extracted
|
||||||
|
// first, then the specified rotation is applied to the crop.
|
||||||
//
|
//
|
||||||
// Only use this method when the ImageEmbedder is created with the live
|
// Only use this method when the ImageEmbedder is created with the live
|
||||||
// stream running mode.
|
// stream running mode.
|
||||||
|
@ -135,9 +161,9 @@ class ImageEmbedder : core::BaseVisionTaskApi {
|
||||||
// longer be valid when the callback returns. To access the image data
|
// longer be valid when the callback returns. To access the image data
|
||||||
// outside of the callback, callers need to make a copy of the image.
|
// outside of the callback, callers need to make a copy of the image.
|
||||||
// - The input timestamp in milliseconds.
|
// - The input timestamp in milliseconds.
|
||||||
absl::Status EmbedAsync(
|
absl::Status EmbedAsync(mediapipe::Image image, int64 timestamp_ms,
|
||||||
mediapipe::Image image, int64 timestamp_ms,
|
std::optional<core::ImageProcessingOptions>
|
||||||
std::optional<mediapipe::NormalizedRect> roi = std::nullopt);
|
image_processing_options = std::nullopt);
|
||||||
|
|
||||||
// Shuts down the ImageEmbedder when all works are done.
|
// Shuts down the ImageEmbedder when all works are done.
|
||||||
absl::Status Close() { return runner_->Close(); }
|
absl::Status Close() { return runner_->Close(); }
|
||||||
|
|
|
@ -23,7 +23,6 @@ limitations under the License.
|
||||||
#include "absl/status/statusor.h"
|
#include "absl/status/statusor.h"
|
||||||
#include "mediapipe/framework/deps/file_path.h"
|
#include "mediapipe/framework/deps/file_path.h"
|
||||||
#include "mediapipe/framework/formats/image.h"
|
#include "mediapipe/framework/formats/image.h"
|
||||||
#include "mediapipe/framework/formats/rect.pb.h"
|
|
||||||
#include "mediapipe/framework/port/gmock.h"
|
#include "mediapipe/framework/port/gmock.h"
|
||||||
#include "mediapipe/framework/port/gtest.h"
|
#include "mediapipe/framework/port/gtest.h"
|
||||||
#include "mediapipe/framework/port/status_matchers.h"
|
#include "mediapipe/framework/port/status_matchers.h"
|
||||||
|
@ -42,7 +41,9 @@ namespace image_embedder {
|
||||||
namespace {
|
namespace {
|
||||||
|
|
||||||
using ::mediapipe::file::JoinPath;
|
using ::mediapipe::file::JoinPath;
|
||||||
|
using ::mediapipe::tasks::components::containers::Rect;
|
||||||
using ::mediapipe::tasks::components::containers::proto::EmbeddingResult;
|
using ::mediapipe::tasks::components::containers::proto::EmbeddingResult;
|
||||||
|
using ::mediapipe::tasks::vision::core::ImageProcessingOptions;
|
||||||
using ::testing::HasSubstr;
|
using ::testing::HasSubstr;
|
||||||
using ::testing::Optional;
|
using ::testing::Optional;
|
||||||
|
|
||||||
|
@ -326,16 +327,14 @@ TEST_F(ImageModeTest, SucceedsWithRegionOfInterest) {
|
||||||
MP_ASSERT_OK_AND_ASSIGN(
|
MP_ASSERT_OK_AND_ASSIGN(
|
||||||
Image crop, DecodeImageFromFile(
|
Image crop, DecodeImageFromFile(
|
||||||
JoinPath("./", kTestDataDirectory, "burger_crop.jpg")));
|
JoinPath("./", kTestDataDirectory, "burger_crop.jpg")));
|
||||||
// Bounding box in "burger.jpg" corresponding to "burger_crop.jpg".
|
// Region-of-interest in "burger.jpg" corresponding to "burger_crop.jpg".
|
||||||
NormalizedRect roi;
|
Rect roi{/*left=*/0, /*top=*/0, /*right=*/0.833333, /*bottom=*/1};
|
||||||
roi.set_x_center(200.0 / 480);
|
ImageProcessingOptions image_processing_options{roi, /*rotation_degrees=*/0};
|
||||||
roi.set_y_center(0.5);
|
|
||||||
roi.set_width(400.0 / 480);
|
|
||||||
roi.set_height(1.0f);
|
|
||||||
|
|
||||||
// Extract both embeddings.
|
// Extract both embeddings.
|
||||||
MP_ASSERT_OK_AND_ASSIGN(const EmbeddingResult& image_result,
|
MP_ASSERT_OK_AND_ASSIGN(
|
||||||
image_embedder->Embed(image, roi));
|
const EmbeddingResult& image_result,
|
||||||
|
image_embedder->Embed(image, image_processing_options));
|
||||||
MP_ASSERT_OK_AND_ASSIGN(const EmbeddingResult& crop_result,
|
MP_ASSERT_OK_AND_ASSIGN(const EmbeddingResult& crop_result,
|
||||||
image_embedder->Embed(crop));
|
image_embedder->Embed(crop));
|
||||||
|
|
||||||
|
@ -351,6 +350,77 @@ TEST_F(ImageModeTest, SucceedsWithRegionOfInterest) {
|
||||||
EXPECT_LE(abs(similarity - expected_similarity), kSimilarityTolerancy);
|
EXPECT_LE(abs(similarity - expected_similarity), kSimilarityTolerancy);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST_F(ImageModeTest, SucceedsWithRotation) {
|
||||||
|
auto options = std::make_unique<ImageEmbedderOptions>();
|
||||||
|
options->base_options.model_asset_path =
|
||||||
|
JoinPath("./", kTestDataDirectory, kMobileNetV3Embedder);
|
||||||
|
MP_ASSERT_OK_AND_ASSIGN(std::unique_ptr<ImageEmbedder> image_embedder,
|
||||||
|
ImageEmbedder::Create(std::move(options)));
|
||||||
|
// Load images: one is a rotated version of the other.
|
||||||
|
MP_ASSERT_OK_AND_ASSIGN(
|
||||||
|
Image image,
|
||||||
|
DecodeImageFromFile(JoinPath("./", kTestDataDirectory, "burger.jpg")));
|
||||||
|
MP_ASSERT_OK_AND_ASSIGN(Image rotated,
|
||||||
|
DecodeImageFromFile(JoinPath("./", kTestDataDirectory,
|
||||||
|
"burger_rotated.jpg")));
|
||||||
|
ImageProcessingOptions image_processing_options;
|
||||||
|
image_processing_options.rotation_degrees = -90;
|
||||||
|
|
||||||
|
// Extract both embeddings.
|
||||||
|
MP_ASSERT_OK_AND_ASSIGN(const EmbeddingResult& image_result,
|
||||||
|
image_embedder->Embed(image));
|
||||||
|
MP_ASSERT_OK_AND_ASSIGN(
|
||||||
|
const EmbeddingResult& rotated_result,
|
||||||
|
image_embedder->Embed(rotated, image_processing_options));
|
||||||
|
|
||||||
|
// Check results.
|
||||||
|
CheckMobileNetV3Result(image_result, false);
|
||||||
|
CheckMobileNetV3Result(rotated_result, false);
|
||||||
|
// CheckCosineSimilarity.
|
||||||
|
MP_ASSERT_OK_AND_ASSIGN(
|
||||||
|
double similarity,
|
||||||
|
ImageEmbedder::CosineSimilarity(image_result.embeddings(0).entries(0),
|
||||||
|
rotated_result.embeddings(0).entries(0)));
|
||||||
|
double expected_similarity = 0.572265;
|
||||||
|
EXPECT_LE(abs(similarity - expected_similarity), kSimilarityTolerancy);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_F(ImageModeTest, SucceedsWithRegionOfInterestAndRotation) {
|
||||||
|
auto options = std::make_unique<ImageEmbedderOptions>();
|
||||||
|
options->base_options.model_asset_path =
|
||||||
|
JoinPath("./", kTestDataDirectory, kMobileNetV3Embedder);
|
||||||
|
MP_ASSERT_OK_AND_ASSIGN(std::unique_ptr<ImageEmbedder> image_embedder,
|
||||||
|
ImageEmbedder::Create(std::move(options)));
|
||||||
|
MP_ASSERT_OK_AND_ASSIGN(
|
||||||
|
Image crop, DecodeImageFromFile(
|
||||||
|
JoinPath("./", kTestDataDirectory, "burger_crop.jpg")));
|
||||||
|
MP_ASSERT_OK_AND_ASSIGN(Image rotated,
|
||||||
|
DecodeImageFromFile(JoinPath("./", kTestDataDirectory,
|
||||||
|
"burger_rotated.jpg")));
|
||||||
|
// Region-of-interest corresponding to burger_crop.jpg.
|
||||||
|
Rect roi{/*left=*/0, /*top=*/0, /*right=*/1, /*bottom=*/0.8333333};
|
||||||
|
ImageProcessingOptions image_processing_options{roi,
|
||||||
|
/*rotation_degrees=*/-90};
|
||||||
|
|
||||||
|
// Extract both embeddings.
|
||||||
|
MP_ASSERT_OK_AND_ASSIGN(const EmbeddingResult& crop_result,
|
||||||
|
image_embedder->Embed(crop));
|
||||||
|
MP_ASSERT_OK_AND_ASSIGN(
|
||||||
|
const EmbeddingResult& rotated_result,
|
||||||
|
image_embedder->Embed(rotated, image_processing_options));
|
||||||
|
|
||||||
|
// Check results.
|
||||||
|
CheckMobileNetV3Result(crop_result, false);
|
||||||
|
CheckMobileNetV3Result(rotated_result, false);
|
||||||
|
// CheckCosineSimilarity.
|
||||||
|
MP_ASSERT_OK_AND_ASSIGN(
|
||||||
|
double similarity,
|
||||||
|
ImageEmbedder::CosineSimilarity(crop_result.embeddings(0).entries(0),
|
||||||
|
rotated_result.embeddings(0).entries(0)));
|
||||||
|
double expected_similarity = 0.62838;
|
||||||
|
EXPECT_LE(abs(similarity - expected_similarity), kSimilarityTolerancy);
|
||||||
|
}
|
||||||
|
|
||||||
class VideoModeTest : public tflite_shims::testing::Test {};
|
class VideoModeTest : public tflite_shims::testing::Test {};
|
||||||
|
|
||||||
TEST_F(VideoModeTest, FailsWithCallingWrongMethod) {
|
TEST_F(VideoModeTest, FailsWithCallingWrongMethod) {
|
||||||
|
|
|
@ -24,10 +24,12 @@ cc_library(
|
||||||
":image_segmenter_graph",
|
":image_segmenter_graph",
|
||||||
"//mediapipe/framework/api2:builder",
|
"//mediapipe/framework/api2:builder",
|
||||||
"//mediapipe/framework/formats:image",
|
"//mediapipe/framework/formats:image",
|
||||||
|
"//mediapipe/framework/formats:rect_cc_proto",
|
||||||
"//mediapipe/tasks/cc/components/proto:segmenter_options_cc_proto",
|
"//mediapipe/tasks/cc/components/proto:segmenter_options_cc_proto",
|
||||||
"//mediapipe/tasks/cc/core:base_options",
|
"//mediapipe/tasks/cc/core:base_options",
|
||||||
"//mediapipe/tasks/cc/core:utils",
|
"//mediapipe/tasks/cc/core:utils",
|
||||||
"//mediapipe/tasks/cc/vision/core:base_vision_task_api",
|
"//mediapipe/tasks/cc/vision/core:base_vision_task_api",
|
||||||
|
"//mediapipe/tasks/cc/vision/core:image_processing_options",
|
||||||
"//mediapipe/tasks/cc/vision/core:running_mode",
|
"//mediapipe/tasks/cc/vision/core:running_mode",
|
||||||
"//mediapipe/tasks/cc/vision/core:vision_task_api_factory",
|
"//mediapipe/tasks/cc/vision/core:vision_task_api_factory",
|
||||||
"//mediapipe/tasks/cc/vision/image_segmenter/proto:image_segmenter_options_cc_proto",
|
"//mediapipe/tasks/cc/vision/image_segmenter/proto:image_segmenter_options_cc_proto",
|
||||||
|
@ -48,6 +50,7 @@ cc_library(
|
||||||
"//mediapipe/framework/api2:builder",
|
"//mediapipe/framework/api2:builder",
|
||||||
"//mediapipe/framework/api2:port",
|
"//mediapipe/framework/api2:port",
|
||||||
"//mediapipe/framework/formats:image",
|
"//mediapipe/framework/formats:image",
|
||||||
|
"//mediapipe/framework/formats:rect_cc_proto",
|
||||||
"//mediapipe/framework/port:status",
|
"//mediapipe/framework/port:status",
|
||||||
"//mediapipe/tasks/cc:common",
|
"//mediapipe/tasks/cc:common",
|
||||||
"//mediapipe/tasks/cc/components:image_preprocessing",
|
"//mediapipe/tasks/cc/components:image_preprocessing",
|
||||||
|
|
|
@ -17,8 +17,10 @@ limitations under the License.
|
||||||
|
|
||||||
#include "mediapipe/framework/api2/builder.h"
|
#include "mediapipe/framework/api2/builder.h"
|
||||||
#include "mediapipe/framework/formats/image.h"
|
#include "mediapipe/framework/formats/image.h"
|
||||||
|
#include "mediapipe/framework/formats/rect.pb.h"
|
||||||
#include "mediapipe/tasks/cc/components/proto/segmenter_options.pb.h"
|
#include "mediapipe/tasks/cc/components/proto/segmenter_options.pb.h"
|
||||||
#include "mediapipe/tasks/cc/core/utils.h"
|
#include "mediapipe/tasks/cc/core/utils.h"
|
||||||
|
#include "mediapipe/tasks/cc/vision/core/image_processing_options.h"
|
||||||
#include "mediapipe/tasks/cc/vision/core/running_mode.h"
|
#include "mediapipe/tasks/cc/vision/core/running_mode.h"
|
||||||
#include "mediapipe/tasks/cc/vision/core/vision_task_api_factory.h"
|
#include "mediapipe/tasks/cc/vision/core/vision_task_api_factory.h"
|
||||||
|
|
||||||
|
@ -32,6 +34,8 @@ constexpr char kGroupedSegmentationTag[] = "GROUPED_SEGMENTATION";
|
||||||
constexpr char kImageInStreamName[] = "image_in";
|
constexpr char kImageInStreamName[] = "image_in";
|
||||||
constexpr char kImageOutStreamName[] = "image_out";
|
constexpr char kImageOutStreamName[] = "image_out";
|
||||||
constexpr char kImageTag[] = "IMAGE";
|
constexpr char kImageTag[] = "IMAGE";
|
||||||
|
constexpr char kNormRectStreamName[] = "norm_rect_in";
|
||||||
|
constexpr char kNormRectTag[] = "NORM_RECT";
|
||||||
constexpr char kSubgraphTypeName[] =
|
constexpr char kSubgraphTypeName[] =
|
||||||
"mediapipe.tasks.vision.ImageSegmenterGraph";
|
"mediapipe.tasks.vision.ImageSegmenterGraph";
|
||||||
constexpr int kMicroSecondsPerMilliSecond = 1000;
|
constexpr int kMicroSecondsPerMilliSecond = 1000;
|
||||||
|
@ -51,15 +55,18 @@ CalculatorGraphConfig CreateGraphConfig(
|
||||||
auto& task_subgraph = graph.AddNode(kSubgraphTypeName);
|
auto& task_subgraph = graph.AddNode(kSubgraphTypeName);
|
||||||
task_subgraph.GetOptions<ImageSegmenterOptionsProto>().Swap(options.get());
|
task_subgraph.GetOptions<ImageSegmenterOptionsProto>().Swap(options.get());
|
||||||
graph.In(kImageTag).SetName(kImageInStreamName);
|
graph.In(kImageTag).SetName(kImageInStreamName);
|
||||||
|
graph.In(kNormRectTag).SetName(kNormRectStreamName);
|
||||||
task_subgraph.Out(kGroupedSegmentationTag).SetName(kSegmentationStreamName) >>
|
task_subgraph.Out(kGroupedSegmentationTag).SetName(kSegmentationStreamName) >>
|
||||||
graph.Out(kGroupedSegmentationTag);
|
graph.Out(kGroupedSegmentationTag);
|
||||||
task_subgraph.Out(kImageTag).SetName(kImageOutStreamName) >>
|
task_subgraph.Out(kImageTag).SetName(kImageOutStreamName) >>
|
||||||
graph.Out(kImageTag);
|
graph.Out(kImageTag);
|
||||||
if (enable_flow_limiting) {
|
if (enable_flow_limiting) {
|
||||||
return tasks::core::AddFlowLimiterCalculator(
|
return tasks::core::AddFlowLimiterCalculator(graph, task_subgraph,
|
||||||
graph, task_subgraph, {kImageTag}, kGroupedSegmentationTag);
|
{kImageTag, kNormRectTag},
|
||||||
|
kGroupedSegmentationTag);
|
||||||
}
|
}
|
||||||
graph.In(kImageTag) >> task_subgraph.In(kImageTag);
|
graph.In(kImageTag) >> task_subgraph.In(kImageTag);
|
||||||
|
graph.In(kNormRectTag) >> task_subgraph.In(kNormRectTag);
|
||||||
return graph.GetConfig();
|
return graph.GetConfig();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -139,47 +146,68 @@ absl::StatusOr<std::unique_ptr<ImageSegmenter>> ImageSegmenter::Create(
|
||||||
}
|
}
|
||||||
|
|
||||||
absl::StatusOr<std::vector<Image>> ImageSegmenter::Segment(
|
absl::StatusOr<std::vector<Image>> ImageSegmenter::Segment(
|
||||||
mediapipe::Image image) {
|
mediapipe::Image image,
|
||||||
|
std::optional<core::ImageProcessingOptions> image_processing_options) {
|
||||||
if (image.UsesGpu()) {
|
if (image.UsesGpu()) {
|
||||||
return CreateStatusWithPayload(
|
return CreateStatusWithPayload(
|
||||||
absl::StatusCode::kInvalidArgument,
|
absl::StatusCode::kInvalidArgument,
|
||||||
absl::StrCat("GPU input images are currently not supported."),
|
absl::StrCat("GPU input images are currently not supported."),
|
||||||
MediaPipeTasksStatus::kRunnerUnexpectedInputError);
|
MediaPipeTasksStatus::kRunnerUnexpectedInputError);
|
||||||
}
|
}
|
||||||
|
ASSIGN_OR_RETURN(
|
||||||
|
NormalizedRect norm_rect,
|
||||||
|
ConvertToNormalizedRect(image_processing_options, /*roi_allowed=*/false));
|
||||||
ASSIGN_OR_RETURN(
|
ASSIGN_OR_RETURN(
|
||||||
auto output_packets,
|
auto output_packets,
|
||||||
ProcessImageData({{kImageInStreamName,
|
ProcessImageData(
|
||||||
mediapipe::MakePacket<Image>(std::move(image))}}));
|
{{kImageInStreamName, mediapipe::MakePacket<Image>(std::move(image))},
|
||||||
|
{kNormRectStreamName,
|
||||||
|
MakePacket<NormalizedRect>(std::move(norm_rect))}}));
|
||||||
return output_packets[kSegmentationStreamName].Get<std::vector<Image>>();
|
return output_packets[kSegmentationStreamName].Get<std::vector<Image>>();
|
||||||
}
|
}
|
||||||
|
|
||||||
absl::StatusOr<std::vector<Image>> ImageSegmenter::SegmentForVideo(
|
absl::StatusOr<std::vector<Image>> ImageSegmenter::SegmentForVideo(
|
||||||
mediapipe::Image image, int64 timestamp_ms) {
|
mediapipe::Image image, int64 timestamp_ms,
|
||||||
|
std::optional<core::ImageProcessingOptions> image_processing_options) {
|
||||||
if (image.UsesGpu()) {
|
if (image.UsesGpu()) {
|
||||||
return CreateStatusWithPayload(
|
return CreateStatusWithPayload(
|
||||||
absl::StatusCode::kInvalidArgument,
|
absl::StatusCode::kInvalidArgument,
|
||||||
absl::StrCat("GPU input images are currently not supported."),
|
absl::StrCat("GPU input images are currently not supported."),
|
||||||
MediaPipeTasksStatus::kRunnerUnexpectedInputError);
|
MediaPipeTasksStatus::kRunnerUnexpectedInputError);
|
||||||
}
|
}
|
||||||
|
ASSIGN_OR_RETURN(
|
||||||
|
NormalizedRect norm_rect,
|
||||||
|
ConvertToNormalizedRect(image_processing_options, /*roi_allowed=*/false));
|
||||||
ASSIGN_OR_RETURN(
|
ASSIGN_OR_RETURN(
|
||||||
auto output_packets,
|
auto output_packets,
|
||||||
ProcessVideoData(
|
ProcessVideoData(
|
||||||
{{kImageInStreamName,
|
{{kImageInStreamName,
|
||||||
MakePacket<Image>(std::move(image))
|
MakePacket<Image>(std::move(image))
|
||||||
|
.At(Timestamp(timestamp_ms * kMicroSecondsPerMilliSecond))},
|
||||||
|
{kNormRectStreamName,
|
||||||
|
MakePacket<NormalizedRect>(std::move(norm_rect))
|
||||||
.At(Timestamp(timestamp_ms * kMicroSecondsPerMilliSecond))}}));
|
.At(Timestamp(timestamp_ms * kMicroSecondsPerMilliSecond))}}));
|
||||||
return output_packets[kSegmentationStreamName].Get<std::vector<Image>>();
|
return output_packets[kSegmentationStreamName].Get<std::vector<Image>>();
|
||||||
}
|
}
|
||||||
|
|
||||||
absl::Status ImageSegmenter::SegmentAsync(Image image, int64 timestamp_ms) {
|
absl::Status ImageSegmenter::SegmentAsync(
|
||||||
|
Image image, int64 timestamp_ms,
|
||||||
|
std::optional<core::ImageProcessingOptions> image_processing_options) {
|
||||||
if (image.UsesGpu()) {
|
if (image.UsesGpu()) {
|
||||||
return CreateStatusWithPayload(
|
return CreateStatusWithPayload(
|
||||||
absl::StatusCode::kInvalidArgument,
|
absl::StatusCode::kInvalidArgument,
|
||||||
absl::StrCat("GPU input images are currently not supported."),
|
absl::StrCat("GPU input images are currently not supported."),
|
||||||
MediaPipeTasksStatus::kRunnerUnexpectedInputError);
|
MediaPipeTasksStatus::kRunnerUnexpectedInputError);
|
||||||
}
|
}
|
||||||
|
ASSIGN_OR_RETURN(
|
||||||
|
NormalizedRect norm_rect,
|
||||||
|
ConvertToNormalizedRect(image_processing_options, /*roi_allowed=*/false));
|
||||||
return SendLiveStreamData(
|
return SendLiveStreamData(
|
||||||
{{kImageInStreamName,
|
{{kImageInStreamName,
|
||||||
MakePacket<Image>(std::move(image))
|
MakePacket<Image>(std::move(image))
|
||||||
|
.At(Timestamp(timestamp_ms * kMicroSecondsPerMilliSecond))},
|
||||||
|
{kNormRectStreamName,
|
||||||
|
MakePacket<NormalizedRect>(std::move(norm_rect))
|
||||||
.At(Timestamp(timestamp_ms * kMicroSecondsPerMilliSecond))}});
|
.At(Timestamp(timestamp_ms * kMicroSecondsPerMilliSecond))}});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -25,6 +25,7 @@ limitations under the License.
|
||||||
#include "mediapipe/framework/formats/image.h"
|
#include "mediapipe/framework/formats/image.h"
|
||||||
#include "mediapipe/tasks/cc/core/base_options.h"
|
#include "mediapipe/tasks/cc/core/base_options.h"
|
||||||
#include "mediapipe/tasks/cc/vision/core/base_vision_task_api.h"
|
#include "mediapipe/tasks/cc/vision/core/base_vision_task_api.h"
|
||||||
|
#include "mediapipe/tasks/cc/vision/core/image_processing_options.h"
|
||||||
#include "mediapipe/tasks/cc/vision/image_segmenter/proto/image_segmenter_options.pb.h"
|
#include "mediapipe/tasks/cc/vision/image_segmenter/proto/image_segmenter_options.pb.h"
|
||||||
#include "tensorflow/lite/kernels/register.h"
|
#include "tensorflow/lite/kernels/register.h"
|
||||||
|
|
||||||
|
@ -116,14 +117,21 @@ class ImageSegmenter : tasks::vision::core::BaseVisionTaskApi {
|
||||||
// running mode.
|
// running mode.
|
||||||
//
|
//
|
||||||
// The image can be of any size with format RGB or RGBA.
|
// The image can be of any size with format RGB or RGBA.
|
||||||
// TODO: Describes how the input image will be preprocessed
|
//
|
||||||
// after the yuv support is implemented.
|
// The optional 'image_processing_options' parameter can be used to specify
|
||||||
|
// the rotation to apply to the image before performing segmentation, by
|
||||||
|
// setting its 'rotation_degrees' field. Note that specifying a
|
||||||
|
// region-of-interest using the 'region_of_interest' field is NOT supported
|
||||||
|
// and will result in an invalid argument error being returned.
|
||||||
//
|
//
|
||||||
// If the output_type is CATEGORY_MASK, the returned vector of images is
|
// If the output_type is CATEGORY_MASK, the returned vector of images is
|
||||||
// per-category segmented image mask.
|
// per-category segmented image mask.
|
||||||
// If the output_type is CONFIDENCE_MASK, the returned vector of images
|
// If the output_type is CONFIDENCE_MASK, the returned vector of images
|
||||||
// contains only one confidence image mask.
|
// contains only one confidence image mask.
|
||||||
absl::StatusOr<std::vector<mediapipe::Image>> Segment(mediapipe::Image image);
|
absl::StatusOr<std::vector<mediapipe::Image>> Segment(
|
||||||
|
mediapipe::Image image,
|
||||||
|
std::optional<core::ImageProcessingOptions> image_processing_options =
|
||||||
|
std::nullopt);
|
||||||
|
|
||||||
// Performs image segmentation on the provided video frame.
|
// Performs image segmentation on the provided video frame.
|
||||||
// Only use this method when the ImageSegmenter is created with the video
|
// Only use this method when the ImageSegmenter is created with the video
|
||||||
|
@ -133,12 +141,20 @@ class ImageSegmenter : tasks::vision::core::BaseVisionTaskApi {
|
||||||
// provide the video frame's timestamp (in milliseconds). The input timestamps
|
// provide the video frame's timestamp (in milliseconds). The input timestamps
|
||||||
// must be monotonically increasing.
|
// must be monotonically increasing.
|
||||||
//
|
//
|
||||||
|
// The optional 'image_processing_options' parameter can be used to specify
|
||||||
|
// the rotation to apply to the image before performing segmentation, by
|
||||||
|
// setting its 'rotation_degrees' field. Note that specifying a
|
||||||
|
// region-of-interest using the 'region_of_interest' field is NOT supported
|
||||||
|
// and will result in an invalid argument error being returned.
|
||||||
|
//
|
||||||
// If the output_type is CATEGORY_MASK, the returned vector of images is
|
// If the output_type is CATEGORY_MASK, the returned vector of images is
|
||||||
// per-category segmented image mask.
|
// per-category segmented image mask.
|
||||||
// If the output_type is CONFIDENCE_MASK, the returned vector of images
|
// If the output_type is CONFIDENCE_MASK, the returned vector of images
|
||||||
// contains only one confidence image mask.
|
// contains only one confidence image mask.
|
||||||
absl::StatusOr<std::vector<mediapipe::Image>> SegmentForVideo(
|
absl::StatusOr<std::vector<mediapipe::Image>> SegmentForVideo(
|
||||||
mediapipe::Image image, int64 timestamp_ms);
|
mediapipe::Image image, int64 timestamp_ms,
|
||||||
|
std::optional<core::ImageProcessingOptions> image_processing_options =
|
||||||
|
std::nullopt);
|
||||||
|
|
||||||
// Sends live image data to perform image segmentation, and the results will
|
// Sends live image data to perform image segmentation, and the results will
|
||||||
// be available via the "result_callback" provided in the
|
// be available via the "result_callback" provided in the
|
||||||
|
@ -150,6 +166,12 @@ class ImageSegmenter : tasks::vision::core::BaseVisionTaskApi {
|
||||||
// sent to the image segmenter. The input timestamps must be monotonically
|
// sent to the image segmenter. The input timestamps must be monotonically
|
||||||
// increasing.
|
// increasing.
|
||||||
//
|
//
|
||||||
|
// The optional 'image_processing_options' parameter can be used to specify
|
||||||
|
// the rotation to apply to the image before performing segmentation, by
|
||||||
|
// setting its 'rotation_degrees' field. Note that specifying a
|
||||||
|
// region-of-interest using the 'region_of_interest' field is NOT supported
|
||||||
|
// and will result in an invalid argument error being returned.
|
||||||
|
//
|
||||||
// The "result_callback" prvoides
|
// The "result_callback" prvoides
|
||||||
// - A vector of segmented image masks.
|
// - A vector of segmented image masks.
|
||||||
// If the output_type is CATEGORY_MASK, the returned vector of images is
|
// If the output_type is CATEGORY_MASK, the returned vector of images is
|
||||||
|
@ -161,7 +183,9 @@ class ImageSegmenter : tasks::vision::core::BaseVisionTaskApi {
|
||||||
// no longer be valid when the callback returns. To access the image data
|
// no longer be valid when the callback returns. To access the image data
|
||||||
// outside of the callback, callers need to make a copy of the image.
|
// outside of the callback, callers need to make a copy of the image.
|
||||||
// - The input timestamp in milliseconds.
|
// - The input timestamp in milliseconds.
|
||||||
absl::Status SegmentAsync(mediapipe::Image image, int64 timestamp_ms);
|
absl::Status SegmentAsync(mediapipe::Image image, int64 timestamp_ms,
|
||||||
|
std::optional<core::ImageProcessingOptions>
|
||||||
|
image_processing_options = std::nullopt);
|
||||||
|
|
||||||
// Shuts down the ImageSegmenter when all works are done.
|
// Shuts down the ImageSegmenter when all works are done.
|
||||||
absl::Status Close() { return runner_->Close(); }
|
absl::Status Close() { return runner_->Close(); }
|
||||||
|
|
|
@ -23,6 +23,7 @@ limitations under the License.
|
||||||
#include "mediapipe/framework/api2/builder.h"
|
#include "mediapipe/framework/api2/builder.h"
|
||||||
#include "mediapipe/framework/api2/port.h"
|
#include "mediapipe/framework/api2/port.h"
|
||||||
#include "mediapipe/framework/formats/image.h"
|
#include "mediapipe/framework/formats/image.h"
|
||||||
|
#include "mediapipe/framework/formats/rect.pb.h"
|
||||||
#include "mediapipe/framework/port/status_macros.h"
|
#include "mediapipe/framework/port/status_macros.h"
|
||||||
#include "mediapipe/tasks/cc/common.h"
|
#include "mediapipe/tasks/cc/common.h"
|
||||||
#include "mediapipe/tasks/cc/components/calculators/tensor/tensors_to_segmentation_calculator.pb.h"
|
#include "mediapipe/tasks/cc/components/calculators/tensor/tensors_to_segmentation_calculator.pb.h"
|
||||||
|
@ -62,6 +63,7 @@ using LabelItems = mediapipe::proto_ns::Map<int64, ::mediapipe::LabelMapItem>;
|
||||||
constexpr char kSegmentationTag[] = "SEGMENTATION";
|
constexpr char kSegmentationTag[] = "SEGMENTATION";
|
||||||
constexpr char kGroupedSegmentationTag[] = "GROUPED_SEGMENTATION";
|
constexpr char kGroupedSegmentationTag[] = "GROUPED_SEGMENTATION";
|
||||||
constexpr char kImageTag[] = "IMAGE";
|
constexpr char kImageTag[] = "IMAGE";
|
||||||
|
constexpr char kNormRectTag[] = "NORM_RECT";
|
||||||
constexpr char kTensorsTag[] = "TENSORS";
|
constexpr char kTensorsTag[] = "TENSORS";
|
||||||
constexpr char kOutputSizeTag[] = "OUTPUT_SIZE";
|
constexpr char kOutputSizeTag[] = "OUTPUT_SIZE";
|
||||||
|
|
||||||
|
@ -159,6 +161,10 @@ absl::StatusOr<const Tensor*> GetOutputTensor(
|
||||||
// Inputs:
|
// Inputs:
|
||||||
// IMAGE - Image
|
// IMAGE - Image
|
||||||
// Image to perform segmentation on.
|
// Image to perform segmentation on.
|
||||||
|
// NORM_RECT - NormalizedRect @Optional
|
||||||
|
// Describes image rotation and region of image to perform detection
|
||||||
|
// on.
|
||||||
|
// @Optional: rect covering the whole image is used if not specified.
|
||||||
//
|
//
|
||||||
// Outputs:
|
// Outputs:
|
||||||
// SEGMENTATION - mediapipe::Image @Multiple
|
// SEGMENTATION - mediapipe::Image @Multiple
|
||||||
|
@ -196,10 +202,12 @@ class ImageSegmenterGraph : public core::ModelTaskGraph {
|
||||||
ASSIGN_OR_RETURN(const auto* model_resources,
|
ASSIGN_OR_RETURN(const auto* model_resources,
|
||||||
CreateModelResources<ImageSegmenterOptions>(sc));
|
CreateModelResources<ImageSegmenterOptions>(sc));
|
||||||
Graph graph;
|
Graph graph;
|
||||||
ASSIGN_OR_RETURN(auto output_streams,
|
ASSIGN_OR_RETURN(
|
||||||
BuildSegmentationTask(
|
auto output_streams,
|
||||||
sc->Options<ImageSegmenterOptions>(), *model_resources,
|
BuildSegmentationTask(
|
||||||
graph[Input<Image>(kImageTag)], graph));
|
sc->Options<ImageSegmenterOptions>(), *model_resources,
|
||||||
|
graph[Input<Image>(kImageTag)],
|
||||||
|
graph[Input<NormalizedRect>::Optional(kNormRectTag)], graph));
|
||||||
|
|
||||||
auto& merge_images_to_vector =
|
auto& merge_images_to_vector =
|
||||||
graph.AddNode("MergeImagesToVectorCalculator");
|
graph.AddNode("MergeImagesToVectorCalculator");
|
||||||
|
@ -228,7 +236,7 @@ class ImageSegmenterGraph : public core::ModelTaskGraph {
|
||||||
absl::StatusOr<ImageSegmenterOutputs> BuildSegmentationTask(
|
absl::StatusOr<ImageSegmenterOutputs> BuildSegmentationTask(
|
||||||
const ImageSegmenterOptions& task_options,
|
const ImageSegmenterOptions& task_options,
|
||||||
const core::ModelResources& model_resources, Source<Image> image_in,
|
const core::ModelResources& model_resources, Source<Image> image_in,
|
||||||
Graph& graph) {
|
Source<NormalizedRect> norm_rect_in, Graph& graph) {
|
||||||
MP_RETURN_IF_ERROR(SanityCheckOptions(task_options));
|
MP_RETURN_IF_ERROR(SanityCheckOptions(task_options));
|
||||||
|
|
||||||
// Adds preprocessing calculators and connects them to the graph input image
|
// Adds preprocessing calculators and connects them to the graph input image
|
||||||
|
@ -240,6 +248,7 @@ class ImageSegmenterGraph : public core::ModelTaskGraph {
|
||||||
&preprocessing
|
&preprocessing
|
||||||
.GetOptions<tasks::components::ImagePreprocessingOptions>()));
|
.GetOptions<tasks::components::ImagePreprocessingOptions>()));
|
||||||
image_in >> preprocessing.In(kImageTag);
|
image_in >> preprocessing.In(kImageTag);
|
||||||
|
norm_rect_in >> preprocessing.In(kNormRectTag);
|
||||||
|
|
||||||
// Adds inference subgraph and connects its input stream to the output
|
// Adds inference subgraph and connects its input stream to the output
|
||||||
// tensors produced by the ImageToTensorCalculator.
|
// tensors produced by the ImageToTensorCalculator.
|
||||||
|
|
|
@ -29,8 +29,10 @@ limitations under the License.
|
||||||
#include "mediapipe/framework/port/opencv_imgcodecs_inc.h"
|
#include "mediapipe/framework/port/opencv_imgcodecs_inc.h"
|
||||||
#include "mediapipe/framework/port/status_matchers.h"
|
#include "mediapipe/framework/port/status_matchers.h"
|
||||||
#include "mediapipe/tasks/cc/components/calculators/tensor/tensors_to_segmentation_calculator.pb.h"
|
#include "mediapipe/tasks/cc/components/calculators/tensor/tensors_to_segmentation_calculator.pb.h"
|
||||||
|
#include "mediapipe/tasks/cc/components/containers/rect.h"
|
||||||
#include "mediapipe/tasks/cc/core/proto/base_options.pb.h"
|
#include "mediapipe/tasks/cc/core/proto/base_options.pb.h"
|
||||||
#include "mediapipe/tasks/cc/core/proto/external_file.pb.h"
|
#include "mediapipe/tasks/cc/core/proto/external_file.pb.h"
|
||||||
|
#include "mediapipe/tasks/cc/vision/core/image_processing_options.h"
|
||||||
#include "mediapipe/tasks/cc/vision/image_segmenter/proto/image_segmenter_options.pb.h"
|
#include "mediapipe/tasks/cc/vision/image_segmenter/proto/image_segmenter_options.pb.h"
|
||||||
#include "mediapipe/tasks/cc/vision/utils/image_utils.h"
|
#include "mediapipe/tasks/cc/vision/utils/image_utils.h"
|
||||||
#include "tensorflow/lite/core/shims/cc/shims_test_util.h"
|
#include "tensorflow/lite/core/shims/cc/shims_test_util.h"
|
||||||
|
@ -44,6 +46,8 @@ namespace {
|
||||||
|
|
||||||
using ::mediapipe::Image;
|
using ::mediapipe::Image;
|
||||||
using ::mediapipe::file::JoinPath;
|
using ::mediapipe::file::JoinPath;
|
||||||
|
using ::mediapipe::tasks::components::containers::Rect;
|
||||||
|
using ::mediapipe::tasks::vision::core::ImageProcessingOptions;
|
||||||
using ::testing::HasSubstr;
|
using ::testing::HasSubstr;
|
||||||
using ::testing::Optional;
|
using ::testing::Optional;
|
||||||
|
|
||||||
|
@ -237,7 +241,6 @@ TEST_F(ImageModeTest, SucceedsWithConfidenceMask) {
|
||||||
|
|
||||||
MP_ASSERT_OK_AND_ASSIGN(std::unique_ptr<ImageSegmenter> segmenter,
|
MP_ASSERT_OK_AND_ASSIGN(std::unique_ptr<ImageSegmenter> segmenter,
|
||||||
ImageSegmenter::Create(std::move(options)));
|
ImageSegmenter::Create(std::move(options)));
|
||||||
MP_ASSERT_OK_AND_ASSIGN(auto results, segmenter->Segment(image));
|
|
||||||
MP_ASSERT_OK_AND_ASSIGN(auto confidence_masks, segmenter->Segment(image));
|
MP_ASSERT_OK_AND_ASSIGN(auto confidence_masks, segmenter->Segment(image));
|
||||||
EXPECT_EQ(confidence_masks.size(), 21);
|
EXPECT_EQ(confidence_masks.size(), 21);
|
||||||
|
|
||||||
|
@ -253,6 +256,61 @@ TEST_F(ImageModeTest, SucceedsWithConfidenceMask) {
|
||||||
SimilarToFloatMask(expected_mask_float, kGoldenMaskSimilarity));
|
SimilarToFloatMask(expected_mask_float, kGoldenMaskSimilarity));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST_F(ImageModeTest, SucceedsWithRotation) {
|
||||||
|
MP_ASSERT_OK_AND_ASSIGN(
|
||||||
|
Image image, DecodeImageFromFile(
|
||||||
|
JoinPath("./", kTestDataDirectory, "cat_rotated.jpg")));
|
||||||
|
auto options = std::make_unique<ImageSegmenterOptions>();
|
||||||
|
options->base_options.model_asset_path =
|
||||||
|
JoinPath("./", kTestDataDirectory, kDeeplabV3WithMetadata);
|
||||||
|
options->output_type = ImageSegmenterOptions::OutputType::CONFIDENCE_MASK;
|
||||||
|
options->activation = ImageSegmenterOptions::Activation::SOFTMAX;
|
||||||
|
|
||||||
|
MP_ASSERT_OK_AND_ASSIGN(std::unique_ptr<ImageSegmenter> segmenter,
|
||||||
|
ImageSegmenter::Create(std::move(options)));
|
||||||
|
ImageProcessingOptions image_processing_options;
|
||||||
|
image_processing_options.rotation_degrees = -90;
|
||||||
|
MP_ASSERT_OK_AND_ASSIGN(auto confidence_masks, segmenter->Segment(image));
|
||||||
|
EXPECT_EQ(confidence_masks.size(), 21);
|
||||||
|
|
||||||
|
cv::Mat expected_mask =
|
||||||
|
cv::imread(JoinPath("./", kTestDataDirectory, "cat_rotated_mask.jpg"),
|
||||||
|
cv::IMREAD_GRAYSCALE);
|
||||||
|
cv::Mat expected_mask_float;
|
||||||
|
expected_mask.convertTo(expected_mask_float, CV_32FC1, 1 / 255.f);
|
||||||
|
|
||||||
|
// Cat category index 8.
|
||||||
|
cv::Mat cat_mask = mediapipe::formats::MatView(
|
||||||
|
confidence_masks[8].GetImageFrameSharedPtr().get());
|
||||||
|
EXPECT_THAT(cat_mask,
|
||||||
|
SimilarToFloatMask(expected_mask_float, kGoldenMaskSimilarity));
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_F(ImageModeTest, FailsWithRegionOfInterest) {
|
||||||
|
MP_ASSERT_OK_AND_ASSIGN(
|
||||||
|
Image image,
|
||||||
|
DecodeImageFromFile(JoinPath("./", kTestDataDirectory, "cat.jpg")));
|
||||||
|
auto options = std::make_unique<ImageSegmenterOptions>();
|
||||||
|
options->base_options.model_asset_path =
|
||||||
|
JoinPath("./", kTestDataDirectory, kDeeplabV3WithMetadata);
|
||||||
|
options->output_type = ImageSegmenterOptions::OutputType::CONFIDENCE_MASK;
|
||||||
|
options->activation = ImageSegmenterOptions::Activation::SOFTMAX;
|
||||||
|
|
||||||
|
MP_ASSERT_OK_AND_ASSIGN(std::unique_ptr<ImageSegmenter> segmenter,
|
||||||
|
ImageSegmenter::Create(std::move(options)));
|
||||||
|
Rect roi{/*left=*/0.1, /*top=*/0, /*right=*/0.9, /*bottom=*/1};
|
||||||
|
ImageProcessingOptions image_processing_options{roi, /*rotation_degrees=*/0};
|
||||||
|
|
||||||
|
auto results = segmenter->Segment(image, image_processing_options);
|
||||||
|
EXPECT_EQ(results.status().code(), absl::StatusCode::kInvalidArgument);
|
||||||
|
EXPECT_THAT(results.status().message(),
|
||||||
|
HasSubstr("This task doesn't support region-of-interest"));
|
||||||
|
EXPECT_THAT(
|
||||||
|
results.status().GetPayload(kMediaPipeTasksPayload),
|
||||||
|
Optional(absl::Cord(absl::StrCat(
|
||||||
|
MediaPipeTasksStatus::kImageProcessingInvalidArgumentError))));
|
||||||
|
}
|
||||||
|
|
||||||
TEST_F(ImageModeTest, SucceedsSelfie128x128Segmentation) {
|
TEST_F(ImageModeTest, SucceedsSelfie128x128Segmentation) {
|
||||||
Image image =
|
Image image =
|
||||||
GetSRGBImage(JoinPath("./", kTestDataDirectory, "mozart_square.jpg"));
|
GetSRGBImage(JoinPath("./", kTestDataDirectory, "mozart_square.jpg"));
|
||||||
|
|
4
mediapipe/tasks/testdata/vision/BUILD
vendored
4
mediapipe/tasks/testdata/vision/BUILD
vendored
|
@ -28,6 +28,8 @@ mediapipe_files(srcs = [
|
||||||
"burger_rotated.jpg",
|
"burger_rotated.jpg",
|
||||||
"cat.jpg",
|
"cat.jpg",
|
||||||
"cat_mask.jpg",
|
"cat_mask.jpg",
|
||||||
|
"cat_rotated.jpg",
|
||||||
|
"cat_rotated_mask.jpg",
|
||||||
"cats_and_dogs.jpg",
|
"cats_and_dogs.jpg",
|
||||||
"cats_and_dogs_no_resizing.jpg",
|
"cats_and_dogs_no_resizing.jpg",
|
||||||
"cats_and_dogs_rotated.jpg",
|
"cats_and_dogs_rotated.jpg",
|
||||||
|
@ -84,6 +86,8 @@ filegroup(
|
||||||
"burger_rotated.jpg",
|
"burger_rotated.jpg",
|
||||||
"cat.jpg",
|
"cat.jpg",
|
||||||
"cat_mask.jpg",
|
"cat_mask.jpg",
|
||||||
|
"cat_rotated.jpg",
|
||||||
|
"cat_rotated_mask.jpg",
|
||||||
"cats_and_dogs.jpg",
|
"cats_and_dogs.jpg",
|
||||||
"cats_and_dogs_no_resizing.jpg",
|
"cats_and_dogs_no_resizing.jpg",
|
||||||
"cats_and_dogs_rotated.jpg",
|
"cats_and_dogs_rotated.jpg",
|
||||||
|
|
32
third_party/external_files.bzl
vendored
32
third_party/external_files.bzl
vendored
|
@ -76,6 +76,18 @@ def external_files():
|
||||||
urls = ["https://storage.googleapis.com/mediapipe-assets/cat_mask.jpg?generation=1661875677203533"],
|
urls = ["https://storage.googleapis.com/mediapipe-assets/cat_mask.jpg?generation=1661875677203533"],
|
||||||
)
|
)
|
||||||
|
|
||||||
|
http_file(
|
||||||
|
name = "com_google_mediapipe_cat_rotated_jpg",
|
||||||
|
sha256 = "b78cee5ad14c9f36b1c25d103db371d81ca74d99030063c46a38e80bb8f38649",
|
||||||
|
urls = ["https://storage.googleapis.com/mediapipe-assets/cat_rotated.jpg?generation=1666304165042123"],
|
||||||
|
)
|
||||||
|
|
||||||
|
http_file(
|
||||||
|
name = "com_google_mediapipe_cat_rotated_mask_jpg",
|
||||||
|
sha256 = "f336973e7621d602f2ebc9a6ab1c62d8502272d391713f369d3b99541afda861",
|
||||||
|
urls = ["https://storage.googleapis.com/mediapipe-assets/cat_rotated_mask.jpg?generation=1666304167148173"],
|
||||||
|
)
|
||||||
|
|
||||||
http_file(
|
http_file(
|
||||||
name = "com_google_mediapipe_cats_and_dogs_jpg",
|
name = "com_google_mediapipe_cats_and_dogs_jpg",
|
||||||
sha256 = "a2eaa7ad3a1aae4e623dd362a5f737e8a88d122597ecd1a02b3e1444db56df9c",
|
sha256 = "a2eaa7ad3a1aae4e623dd362a5f737e8a88d122597ecd1a02b3e1444db56df9c",
|
||||||
|
@ -162,8 +174,8 @@ def external_files():
|
||||||
|
|
||||||
http_file(
|
http_file(
|
||||||
name = "com_google_mediapipe_expected_left_down_hand_rotated_landmarks_prototxt",
|
name = "com_google_mediapipe_expected_left_down_hand_rotated_landmarks_prototxt",
|
||||||
sha256 = "a16d6cb8dd07d60f0678ddeb6a7447b73b9b03d4ddde365c8770b472205bb6cf",
|
sha256 = "c4dfdcc2e4cd366eb5f8ad227be94049eb593e3a528564611094687912463687",
|
||||||
urls = ["https://storage.googleapis.com/mediapipe-assets/expected_left_down_hand_rotated_landmarks.prototxt?generation=1666037061297507"],
|
urls = ["https://storage.googleapis.com/mediapipe-assets/expected_left_down_hand_rotated_landmarks.prototxt?generation=1666304169636598"],
|
||||||
)
|
)
|
||||||
|
|
||||||
http_file(
|
http_file(
|
||||||
|
@ -174,8 +186,8 @@ def external_files():
|
||||||
|
|
||||||
http_file(
|
http_file(
|
||||||
name = "com_google_mediapipe_expected_left_up_hand_rotated_landmarks_prototxt",
|
name = "com_google_mediapipe_expected_left_up_hand_rotated_landmarks_prototxt",
|
||||||
sha256 = "a9b9789c274d48a7cb9cc10af7bc644eb2512bb934529790d0a5404726daa86a",
|
sha256 = "7fb2d33cf69d2da50952a45bad0c0618f30859e608958fee95948a6e0de63ccb",
|
||||||
urls = ["https://storage.googleapis.com/mediapipe-assets/expected_left_up_hand_rotated_landmarks.prototxt?generation=1666037063443676"],
|
urls = ["https://storage.googleapis.com/mediapipe-assets/expected_left_up_hand_rotated_landmarks.prototxt?generation=1666304171758037"],
|
||||||
)
|
)
|
||||||
|
|
||||||
http_file(
|
http_file(
|
||||||
|
@ -258,8 +270,8 @@ def external_files():
|
||||||
|
|
||||||
http_file(
|
http_file(
|
||||||
name = "com_google_mediapipe_hand_detector_result_one_hand_rotated_pbtxt",
|
name = "com_google_mediapipe_hand_detector_result_one_hand_rotated_pbtxt",
|
||||||
sha256 = "ff5ca0654028d78a3380df90054273cae79abe1b7369b164063fd1d5758ec370",
|
sha256 = "555079c274ea91699757a0b9888c9993a8ab450069103b1bcd4ebb805a8e023c",
|
||||||
urls = ["https://storage.googleapis.com/mediapipe-assets/hand_detector_result_one_hand_rotated.pbtxt?generation=1666037065601724"],
|
urls = ["https://storage.googleapis.com/mediapipe-assets/hand_detector_result_one_hand_rotated.pbtxt?generation=1666304174234283"],
|
||||||
)
|
)
|
||||||
|
|
||||||
http_file(
|
http_file(
|
||||||
|
@ -606,8 +618,8 @@ def external_files():
|
||||||
|
|
||||||
http_file(
|
http_file(
|
||||||
name = "com_google_mediapipe_pointing_up_rotated_landmarks_pbtxt",
|
name = "com_google_mediapipe_pointing_up_rotated_landmarks_pbtxt",
|
||||||
sha256 = "ccf67e5867094ffb6c465a4dfbf2ef1eb3f9db2465803fc25a0b84c958e050de",
|
sha256 = "5ec37218d8b613436f5c10121dc689bf9ee69af0656a6ccf8c2e3e8b652e2ad6",
|
||||||
urls = ["https://storage.googleapis.com/mediapipe-assets/pointing_up_rotated_landmarks.pbtxt?generation=1666037074376515"],
|
urls = ["https://storage.googleapis.com/mediapipe-assets/pointing_up_rotated_landmarks.pbtxt?generation=1666304178388806"],
|
||||||
)
|
)
|
||||||
|
|
||||||
http_file(
|
http_file(
|
||||||
|
@ -798,8 +810,8 @@ def external_files():
|
||||||
|
|
||||||
http_file(
|
http_file(
|
||||||
name = "com_google_mediapipe_thumb_up_rotated_landmarks_pbtxt",
|
name = "com_google_mediapipe_thumb_up_rotated_landmarks_pbtxt",
|
||||||
sha256 = "5d0a465959cacbd201ac8dd8fc8a66c5997a172b71809b12d27296db6a28a102",
|
sha256 = "6645bbd98ea7f90b3e1ba297e16ea5280847fc5bf5400726d98c282f6c597257",
|
||||||
urls = ["https://storage.googleapis.com/mediapipe-assets/thumb_up_rotated_landmarks.pbtxt?generation=1666037079490527"],
|
urls = ["https://storage.googleapis.com/mediapipe-assets/thumb_up_rotated_landmarks.pbtxt?generation=1666304181397432"],
|
||||||
)
|
)
|
||||||
|
|
||||||
http_file(
|
http_file(
|
||||||
|
|
Loading…
Reference in New Issue
Block a user