From 01c64082f133f0703113ea0292332d5a86257a8b Mon Sep 17 00:00:00 2001 From: MediaPipe Team Date: Thu, 23 Feb 2023 14:10:21 -0800 Subject: [PATCH] ImageToTensorCalculator decides the output tensor size in runtime from the input image size. PiperOrigin-RevId: 511882195 --- mediapipe/calculators/tensor/BUILD | 1 + .../tensor/image_to_tensor_calculator.cc | 10 +-- .../tensor/image_to_tensor_calculator.proto | 2 + .../tensor/image_to_tensor_calculator_test.cc | 65 ++++++++++++------- .../tensor/image_to_tensor_utils.h | 25 ++++--- .../tensor/image_to_tensor_utils_test.cc | 27 ++++++-- 6 files changed, 88 insertions(+), 42 deletions(-) diff --git a/mediapipe/calculators/tensor/BUILD b/mediapipe/calculators/tensor/BUILD index 1ac5644c1..c66665b68 100644 --- a/mediapipe/calculators/tensor/BUILD +++ b/mediapipe/calculators/tensor/BUILD @@ -1033,6 +1033,7 @@ cc_test( "@com_google_absl//absl/flags:flag", "@com_google_absl//absl/memory", "@com_google_absl//absl/strings", + "@com_google_absl//absl/strings:str_format", ], ) diff --git a/mediapipe/calculators/tensor/image_to_tensor_calculator.cc b/mediapipe/calculators/tensor/image_to_tensor_calculator.cc index 5af4cdb60..499b497b0 100644 --- a/mediapipe/calculators/tensor/image_to_tensor_calculator.cc +++ b/mediapipe/calculators/tensor/image_to_tensor_calculator.cc @@ -195,8 +195,9 @@ class ImageToTensorCalculator : public Node { #endif // MEDIAPIPE_DISABLE_GPU RotatedRect roi = GetRoi(image->width(), image->height(), norm_rect); - ASSIGN_OR_RETURN(auto padding, PadRoi(options_.output_tensor_width(), - options_.output_tensor_height(), + const int tensor_width = params_.output_width.value_or(image->width()); + const int tensor_height = params_.output_height.value_or(image->height()); + ASSIGN_OR_RETURN(auto padding, PadRoi(tensor_width, tensor_height, options_.keep_aspect_ratio(), &roi)); if (kOutLetterboxPadding(cc).IsConnected()) { kOutLetterboxPadding(cc).Send(padding); @@ -214,9 +215,8 @@ class ImageToTensorCalculator : public Node { Tensor::ElementType output_tensor_type = GetOutputTensorType(image->UsesGpu(), params_); - Tensor tensor(output_tensor_type, - {1, params_.output_height, params_.output_width, - GetNumOutputChannels(*image)}); + Tensor tensor(output_tensor_type, {1, tensor_height, tensor_width, + GetNumOutputChannels(*image)}); MP_RETURN_IF_ERROR((image->UsesGpu() ? gpu_converter_ : cpu_converter_) ->Convert(*image, roi, params_.range_min, params_.range_max, diff --git a/mediapipe/calculators/tensor/image_to_tensor_calculator.proto b/mediapipe/calculators/tensor/image_to_tensor_calculator.proto index 780ee8021..183f933df 100644 --- a/mediapipe/calculators/tensor/image_to_tensor_calculator.proto +++ b/mediapipe/calculators/tensor/image_to_tensor_calculator.proto @@ -54,6 +54,8 @@ message ImageToTensorCalculatorOptions { BORDER_REPLICATE = 2; } + // The width and height of output tensor. The output tensor would have the + // input image width/height if not set. optional int32 output_tensor_width = 1; optional int32 output_tensor_height = 2; diff --git a/mediapipe/calculators/tensor/image_to_tensor_calculator_test.cc b/mediapipe/calculators/tensor/image_to_tensor_calculator_test.cc index ceb1fc502..ed7d93886 100644 --- a/mediapipe/calculators/tensor/image_to_tensor_calculator_test.cc +++ b/mediapipe/calculators/tensor/image_to_tensor_calculator_test.cc @@ -13,10 +13,13 @@ // limitations under the License. #include +#include +#include #include #include "absl/flags/flag.h" #include "absl/memory/memory.h" +#include "absl/strings/str_format.h" #include "absl/strings/substitute.h" #include "mediapipe/calculators/tensor/image_to_tensor_converter.h" #include "mediapipe/calculators/tensor/image_to_tensor_utils.h" @@ -51,13 +54,12 @@ std::string GetFilePath(absl::string_view filename) { // Image to tensor test template. // No processing/assertions should be done after the function is invoked. -void RunTestWithInputImagePacket(const Packet& input_image_packet, - cv::Mat expected_result, float range_min, - float range_max, int tensor_width, - int tensor_height, bool keep_aspect, - absl::optional border_mode, - const mediapipe::NormalizedRect& roi, - bool output_int_tensor) { +void RunTestWithInputImagePacket( + const Packet& input_image_packet, cv::Mat expected_result, float range_min, + float range_max, std::optional tensor_width, + std::optional tensor_height, bool keep_aspect, + absl::optional border_mode, + const mediapipe::NormalizedRect& roi, bool output_int_tensor) { std::string border_mode_str; if (border_mode) { switch (*border_mode) { @@ -93,8 +95,9 @@ void RunTestWithInputImagePacket(const Packet& input_image_packet, })", range_min, range_max); } - auto graph_config = mediapipe::ParseTextProtoOrDie( - absl::Substitute(R"( + auto graph_config = + mediapipe::ParseTextProtoOrDie(absl::Substitute( + R"( input_stream: "input_image" input_stream: "roi" node { @@ -104,8 +107,8 @@ void RunTestWithInputImagePacket(const Packet& input_image_packet, output_stream: "TENSORS:tensor" options { [mediapipe.ImageToTensorCalculatorOptions.ext] { - output_tensor_width: $0 - output_tensor_height: $1 + $0 # output tensor width + $1 # output tensor height keep_aspect_ratio: $2 $3 # output range $4 # border mode @@ -113,11 +116,16 @@ void RunTestWithInputImagePacket(const Packet& input_image_packet, } } )", - /*$0=*/tensor_width, - /*$1=*/tensor_height, - /*$2=*/keep_aspect ? "true" : "false", - /*$3=*/output_tensor_range, - /*$4=*/border_mode_str)); + /*$0=*/tensor_width.has_value() + ? absl::StrFormat("output_tensor_width: %d", tensor_width.value()) + : "", + /*$1=*/tensor_height.has_value() + ? absl::StrFormat("output_tensor_height: %d", + tensor_height.value()) + : "", + /*$2=*/keep_aspect ? "true" : "false", + /*$3=*/output_tensor_range, + /*$4=*/border_mode_str)); std::vector output_packets; tool::AddVectorSink("tensor", &graph_config, &output_packets); @@ -149,18 +157,18 @@ void RunTestWithInputImagePacket(const Packet& input_image_packet, if (output_int_tensor) { if (range_min < 0) { EXPECT_EQ(tensor.element_type(), Tensor::ElementType::kInt8); - tensor_mat = cv::Mat(tensor_height, tensor_width, + tensor_mat = cv::Mat(expected_result.rows, expected_result.cols, channels == 1 ? CV_8SC1 : CV_8SC3, const_cast(view.buffer())); } else { EXPECT_EQ(tensor.element_type(), Tensor::ElementType::kUInt8); - tensor_mat = cv::Mat(tensor_height, tensor_width, + tensor_mat = cv::Mat(expected_result.rows, expected_result.cols, channels == 1 ? CV_8UC1 : CV_8UC3, const_cast(view.buffer())); } } else { EXPECT_EQ(tensor.element_type(), Tensor::ElementType::kFloat32); - tensor_mat = cv::Mat(tensor_height, tensor_width, + tensor_mat = cv::Mat(expected_result.rows, expected_result.cols, channels == 1 ? CV_32FC1 : CV_32FC3, const_cast(view.buffer())); } @@ -216,9 +224,9 @@ const std::vector kInputTypesToTest = {InputType::kImageFrame, void RunTest(cv::Mat input, cv::Mat expected_result, std::vector> float_ranges, - std::vector> int_ranges, int tensor_width, - int tensor_height, bool keep_aspect, - absl::optional border_mode, + std::vector> int_ranges, + std::optional tensor_width, std::optional tensor_height, + bool keep_aspect, absl::optional border_mode, const mediapipe::NormalizedRect& roi) { for (auto input_type : kInputTypesToTest) { for (auto float_range : float_ranges) { @@ -486,5 +494,18 @@ TEST(ImageToTensorCalculatorTest, NoOpExceptRangeBorderZero) { BorderMode::kZero, roi); } +TEST(ImageToTensorCalculatorTest, NoOpExceptRangeAndUseInputImageDims) { + mediapipe::NormalizedRect roi; + roi.set_x_center(0.5f); + roi.set_y_center(0.5f); + roi.set_width(1.0f); + roi.set_height(1.0f); + RunTest(GetRgb(GetFilePath("input.jpg")), + GetRgb(GetFilePath("noop_except_range.png")), + /*float_ranges=*/{{-1.0f, 1.0f}}, + /*int_ranges=*/{{0, 255}, {-128, 127}}, + /*tensor_width=*/std::nullopt, /*tensor_height=*/std::nullopt, + /*keep_aspect=*/false, BorderMode::kZero, roi); +} } // namespace } // namespace mediapipe diff --git a/mediapipe/calculators/tensor/image_to_tensor_utils.h b/mediapipe/calculators/tensor/image_to_tensor_utils.h index dc38ac7bc..a73529dce 100644 --- a/mediapipe/calculators/tensor/image_to_tensor_utils.h +++ b/mediapipe/calculators/tensor/image_to_tensor_utils.h @@ -16,6 +16,7 @@ #define MEDIAPIPE_CALCULATORS_TENSOR_IMAGE_TO_TENSOR_UTILS_H_ #include +#include #include "absl/types/optional.h" #include "mediapipe/calculators/tensor/image_to_tensor_calculator.pb.h" @@ -51,8 +52,8 @@ enum class BorderMode { kZero, kReplicate }; // Struct that host commonly accessed parameters used in the // ImageTo[Batch]TensorCalculator. struct OutputTensorParams { - int output_height; - int output_width; + std::optional output_height; + std::optional output_width; int output_batch; bool is_float_output; float range_min; @@ -161,10 +162,14 @@ absl::Status ValidateOptionOutputDims(const T& options) { << "The maximum of the output int tensor range must be less than or " "equal to 127."; } - RET_CHECK_GT(options.output_tensor_width(), 0) - << "Valid output tensor width is required."; - RET_CHECK_GT(options.output_tensor_height(), 0) - << "Valid output tensor height is required."; + if (options.has_output_tensor_width()) { + RET_CHECK_GT(options.output_tensor_width(), 0) + << "Valid output tensor width is required."; + } + if (options.has_output_tensor_height()) { + RET_CHECK_GT(options.output_tensor_height(), 0) + << "Valid output tensor height is required."; + } return absl::OkStatus(); } @@ -185,8 +190,12 @@ OutputTensorParams GetOutputTensorParams(const T& options) { params.range_min = options.output_tensor_float_range().min(); params.range_max = options.output_tensor_float_range().max(); } - params.output_width = options.output_tensor_width(); - params.output_height = options.output_tensor_height(); + if (options.has_output_tensor_width()) { + params.output_width = options.output_tensor_width(); + } + if (options.has_output_tensor_height()) { + params.output_height = options.output_tensor_height(); + } params.is_float_output = options.has_output_tensor_float_range(); params.output_batch = 1; return params; diff --git a/mediapipe/calculators/tensor/image_to_tensor_utils_test.cc b/mediapipe/calculators/tensor/image_to_tensor_utils_test.cc index 450bcba31..70f39d52e 100644 --- a/mediapipe/calculators/tensor/image_to_tensor_utils_test.cc +++ b/mediapipe/calculators/tensor/image_to_tensor_utils_test.cc @@ -14,6 +14,8 @@ #include "mediapipe/calculators/tensor/image_to_tensor_utils.h" +#include + #include "mediapipe/framework/formats/rect.pb.h" #include "mediapipe/framework/port/gtest.h" #include "mediapipe/framework/port/parse_text_proto.h" @@ -172,6 +174,10 @@ constexpr char kValidIntProto[] = R"( output_tensor_height: 200 )"; +constexpr char kValidNoTensorDimsProto[] = R"( + output_tensor_float_range { min: 0 max: 255 } +)"; + TEST(ValidateOptionOutputDims, ImageToTensorCalcOptions) { const auto float_options = mediapipe::ParseTextProtoOrDie( @@ -193,13 +199,6 @@ TEST(ValidateOptionOutputDims, EmptyProto) { ValidateOptionOutputDims(options), StatusIs(absl::StatusCode::kInternal, HasSubstr("Valid output float tensor range is required"))); - - // Output width/height is not set. - options.mutable_output_tensor_float_range()->set_min(0.0); - options.mutable_output_tensor_float_range()->set_max(1.0); - EXPECT_THAT(ValidateOptionOutputDims(options), - StatusIs(absl::StatusCode::kInternal, - HasSubstr("Valid output tensor width is required"))); } TEST(GetOutputTensorParams, ImageToTensorCalcOptionsSetValues) { @@ -215,6 +214,20 @@ TEST(GetOutputTensorParams, ImageToTensorCalcOptionsSetValues) { EXPECT_EQ(params2.output_height, 200); } +TEST(GetOutputTensorParams, ImageToTensorCalcOptionsNoTensorDims) { + // Test valid option for ImageToTensorCalculatorOptions without output + // width/height. + const auto options = + mediapipe::ParseTextProtoOrDie( + kValidNoTensorDimsProto); + const auto params3 = GetOutputTensorParams(options); + EXPECT_EQ(params3.range_min, 0.0f); + EXPECT_EQ(params3.range_max, 255.0f); + EXPECT_EQ(params3.output_batch, 1); + EXPECT_EQ(params3.output_width, std::nullopt); + EXPECT_EQ(params3.output_height, std::nullopt); +} + TEST(GetBorderMode, GetBorderMode) { // Default to REPLICATE. auto border_mode =