ImageToTensorCalculator decides the output tensor size in runtime from the input image size.
PiperOrigin-RevId: 511882195
This commit is contained in:
parent
0981367e84
commit
01c64082f1
|
@ -1033,6 +1033,7 @@ cc_test(
|
||||||
"@com_google_absl//absl/flags:flag",
|
"@com_google_absl//absl/flags:flag",
|
||||||
"@com_google_absl//absl/memory",
|
"@com_google_absl//absl/memory",
|
||||||
"@com_google_absl//absl/strings",
|
"@com_google_absl//absl/strings",
|
||||||
|
"@com_google_absl//absl/strings:str_format",
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
|
@ -195,8 +195,9 @@ class ImageToTensorCalculator : public Node {
|
||||||
#endif // MEDIAPIPE_DISABLE_GPU
|
#endif // MEDIAPIPE_DISABLE_GPU
|
||||||
|
|
||||||
RotatedRect roi = GetRoi(image->width(), image->height(), norm_rect);
|
RotatedRect roi = GetRoi(image->width(), image->height(), norm_rect);
|
||||||
ASSIGN_OR_RETURN(auto padding, PadRoi(options_.output_tensor_width(),
|
const int tensor_width = params_.output_width.value_or(image->width());
|
||||||
options_.output_tensor_height(),
|
const int tensor_height = params_.output_height.value_or(image->height());
|
||||||
|
ASSIGN_OR_RETURN(auto padding, PadRoi(tensor_width, tensor_height,
|
||||||
options_.keep_aspect_ratio(), &roi));
|
options_.keep_aspect_ratio(), &roi));
|
||||||
if (kOutLetterboxPadding(cc).IsConnected()) {
|
if (kOutLetterboxPadding(cc).IsConnected()) {
|
||||||
kOutLetterboxPadding(cc).Send(padding);
|
kOutLetterboxPadding(cc).Send(padding);
|
||||||
|
@ -214,8 +215,7 @@ class ImageToTensorCalculator : public Node {
|
||||||
|
|
||||||
Tensor::ElementType output_tensor_type =
|
Tensor::ElementType output_tensor_type =
|
||||||
GetOutputTensorType(image->UsesGpu(), params_);
|
GetOutputTensorType(image->UsesGpu(), params_);
|
||||||
Tensor tensor(output_tensor_type,
|
Tensor tensor(output_tensor_type, {1, tensor_height, tensor_width,
|
||||||
{1, params_.output_height, params_.output_width,
|
|
||||||
GetNumOutputChannels(*image)});
|
GetNumOutputChannels(*image)});
|
||||||
MP_RETURN_IF_ERROR((image->UsesGpu() ? gpu_converter_ : cpu_converter_)
|
MP_RETURN_IF_ERROR((image->UsesGpu() ? gpu_converter_ : cpu_converter_)
|
||||||
->Convert(*image, roi, params_.range_min,
|
->Convert(*image, roi, params_.range_min,
|
||||||
|
|
|
@ -54,6 +54,8 @@ message ImageToTensorCalculatorOptions {
|
||||||
BORDER_REPLICATE = 2;
|
BORDER_REPLICATE = 2;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// The width and height of output tensor. The output tensor would have the
|
||||||
|
// input image width/height if not set.
|
||||||
optional int32 output_tensor_width = 1;
|
optional int32 output_tensor_width = 1;
|
||||||
optional int32 output_tensor_height = 2;
|
optional int32 output_tensor_height = 2;
|
||||||
|
|
||||||
|
|
|
@ -13,10 +13,13 @@
|
||||||
// limitations under the License.
|
// limitations under the License.
|
||||||
|
|
||||||
#include <cmath>
|
#include <cmath>
|
||||||
|
#include <optional>
|
||||||
|
#include <string>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
#include "absl/flags/flag.h"
|
#include "absl/flags/flag.h"
|
||||||
#include "absl/memory/memory.h"
|
#include "absl/memory/memory.h"
|
||||||
|
#include "absl/strings/str_format.h"
|
||||||
#include "absl/strings/substitute.h"
|
#include "absl/strings/substitute.h"
|
||||||
#include "mediapipe/calculators/tensor/image_to_tensor_converter.h"
|
#include "mediapipe/calculators/tensor/image_to_tensor_converter.h"
|
||||||
#include "mediapipe/calculators/tensor/image_to_tensor_utils.h"
|
#include "mediapipe/calculators/tensor/image_to_tensor_utils.h"
|
||||||
|
@ -51,13 +54,12 @@ std::string GetFilePath(absl::string_view filename) {
|
||||||
|
|
||||||
// Image to tensor test template.
|
// Image to tensor test template.
|
||||||
// No processing/assertions should be done after the function is invoked.
|
// No processing/assertions should be done after the function is invoked.
|
||||||
void RunTestWithInputImagePacket(const Packet& input_image_packet,
|
void RunTestWithInputImagePacket(
|
||||||
cv::Mat expected_result, float range_min,
|
const Packet& input_image_packet, cv::Mat expected_result, float range_min,
|
||||||
float range_max, int tensor_width,
|
float range_max, std::optional<int> tensor_width,
|
||||||
int tensor_height, bool keep_aspect,
|
std::optional<int> tensor_height, bool keep_aspect,
|
||||||
absl::optional<BorderMode> border_mode,
|
absl::optional<BorderMode> border_mode,
|
||||||
const mediapipe::NormalizedRect& roi,
|
const mediapipe::NormalizedRect& roi, bool output_int_tensor) {
|
||||||
bool output_int_tensor) {
|
|
||||||
std::string border_mode_str;
|
std::string border_mode_str;
|
||||||
if (border_mode) {
|
if (border_mode) {
|
||||||
switch (*border_mode) {
|
switch (*border_mode) {
|
||||||
|
@ -93,8 +95,9 @@ void RunTestWithInputImagePacket(const Packet& input_image_packet,
|
||||||
})",
|
})",
|
||||||
range_min, range_max);
|
range_min, range_max);
|
||||||
}
|
}
|
||||||
auto graph_config = mediapipe::ParseTextProtoOrDie<CalculatorGraphConfig>(
|
auto graph_config =
|
||||||
absl::Substitute(R"(
|
mediapipe::ParseTextProtoOrDie<CalculatorGraphConfig>(absl::Substitute(
|
||||||
|
R"(
|
||||||
input_stream: "input_image"
|
input_stream: "input_image"
|
||||||
input_stream: "roi"
|
input_stream: "roi"
|
||||||
node {
|
node {
|
||||||
|
@ -104,8 +107,8 @@ void RunTestWithInputImagePacket(const Packet& input_image_packet,
|
||||||
output_stream: "TENSORS:tensor"
|
output_stream: "TENSORS:tensor"
|
||||||
options {
|
options {
|
||||||
[mediapipe.ImageToTensorCalculatorOptions.ext] {
|
[mediapipe.ImageToTensorCalculatorOptions.ext] {
|
||||||
output_tensor_width: $0
|
$0 # output tensor width
|
||||||
output_tensor_height: $1
|
$1 # output tensor height
|
||||||
keep_aspect_ratio: $2
|
keep_aspect_ratio: $2
|
||||||
$3 # output range
|
$3 # output range
|
||||||
$4 # border mode
|
$4 # border mode
|
||||||
|
@ -113,8 +116,13 @@ void RunTestWithInputImagePacket(const Packet& input_image_packet,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
)",
|
)",
|
||||||
/*$0=*/tensor_width,
|
/*$0=*/tensor_width.has_value()
|
||||||
/*$1=*/tensor_height,
|
? absl::StrFormat("output_tensor_width: %d", tensor_width.value())
|
||||||
|
: "",
|
||||||
|
/*$1=*/tensor_height.has_value()
|
||||||
|
? absl::StrFormat("output_tensor_height: %d",
|
||||||
|
tensor_height.value())
|
||||||
|
: "",
|
||||||
/*$2=*/keep_aspect ? "true" : "false",
|
/*$2=*/keep_aspect ? "true" : "false",
|
||||||
/*$3=*/output_tensor_range,
|
/*$3=*/output_tensor_range,
|
||||||
/*$4=*/border_mode_str));
|
/*$4=*/border_mode_str));
|
||||||
|
@ -149,18 +157,18 @@ void RunTestWithInputImagePacket(const Packet& input_image_packet,
|
||||||
if (output_int_tensor) {
|
if (output_int_tensor) {
|
||||||
if (range_min < 0) {
|
if (range_min < 0) {
|
||||||
EXPECT_EQ(tensor.element_type(), Tensor::ElementType::kInt8);
|
EXPECT_EQ(tensor.element_type(), Tensor::ElementType::kInt8);
|
||||||
tensor_mat = cv::Mat(tensor_height, tensor_width,
|
tensor_mat = cv::Mat(expected_result.rows, expected_result.cols,
|
||||||
channels == 1 ? CV_8SC1 : CV_8SC3,
|
channels == 1 ? CV_8SC1 : CV_8SC3,
|
||||||
const_cast<int8*>(view.buffer<int8>()));
|
const_cast<int8*>(view.buffer<int8>()));
|
||||||
} else {
|
} else {
|
||||||
EXPECT_EQ(tensor.element_type(), Tensor::ElementType::kUInt8);
|
EXPECT_EQ(tensor.element_type(), Tensor::ElementType::kUInt8);
|
||||||
tensor_mat = cv::Mat(tensor_height, tensor_width,
|
tensor_mat = cv::Mat(expected_result.rows, expected_result.cols,
|
||||||
channels == 1 ? CV_8UC1 : CV_8UC3,
|
channels == 1 ? CV_8UC1 : CV_8UC3,
|
||||||
const_cast<uint8*>(view.buffer<uint8>()));
|
const_cast<uint8*>(view.buffer<uint8>()));
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
EXPECT_EQ(tensor.element_type(), Tensor::ElementType::kFloat32);
|
EXPECT_EQ(tensor.element_type(), Tensor::ElementType::kFloat32);
|
||||||
tensor_mat = cv::Mat(tensor_height, tensor_width,
|
tensor_mat = cv::Mat(expected_result.rows, expected_result.cols,
|
||||||
channels == 1 ? CV_32FC1 : CV_32FC3,
|
channels == 1 ? CV_32FC1 : CV_32FC3,
|
||||||
const_cast<float*>(view.buffer<float>()));
|
const_cast<float*>(view.buffer<float>()));
|
||||||
}
|
}
|
||||||
|
@ -216,9 +224,9 @@ const std::vector<InputType> kInputTypesToTest = {InputType::kImageFrame,
|
||||||
|
|
||||||
void RunTest(cv::Mat input, cv::Mat expected_result,
|
void RunTest(cv::Mat input, cv::Mat expected_result,
|
||||||
std::vector<std::pair<float, float>> float_ranges,
|
std::vector<std::pair<float, float>> float_ranges,
|
||||||
std::vector<std::pair<int, int>> int_ranges, int tensor_width,
|
std::vector<std::pair<int, int>> int_ranges,
|
||||||
int tensor_height, bool keep_aspect,
|
std::optional<int> tensor_width, std::optional<int> tensor_height,
|
||||||
absl::optional<BorderMode> border_mode,
|
bool keep_aspect, absl::optional<BorderMode> border_mode,
|
||||||
const mediapipe::NormalizedRect& roi) {
|
const mediapipe::NormalizedRect& roi) {
|
||||||
for (auto input_type : kInputTypesToTest) {
|
for (auto input_type : kInputTypesToTest) {
|
||||||
for (auto float_range : float_ranges) {
|
for (auto float_range : float_ranges) {
|
||||||
|
@ -486,5 +494,18 @@ TEST(ImageToTensorCalculatorTest, NoOpExceptRangeBorderZero) {
|
||||||
BorderMode::kZero, roi);
|
BorderMode::kZero, roi);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST(ImageToTensorCalculatorTest, NoOpExceptRangeAndUseInputImageDims) {
|
||||||
|
mediapipe::NormalizedRect roi;
|
||||||
|
roi.set_x_center(0.5f);
|
||||||
|
roi.set_y_center(0.5f);
|
||||||
|
roi.set_width(1.0f);
|
||||||
|
roi.set_height(1.0f);
|
||||||
|
RunTest(GetRgb(GetFilePath("input.jpg")),
|
||||||
|
GetRgb(GetFilePath("noop_except_range.png")),
|
||||||
|
/*float_ranges=*/{{-1.0f, 1.0f}},
|
||||||
|
/*int_ranges=*/{{0, 255}, {-128, 127}},
|
||||||
|
/*tensor_width=*/std::nullopt, /*tensor_height=*/std::nullopt,
|
||||||
|
/*keep_aspect=*/false, BorderMode::kZero, roi);
|
||||||
|
}
|
||||||
} // namespace
|
} // namespace
|
||||||
} // namespace mediapipe
|
} // namespace mediapipe
|
||||||
|
|
|
@ -16,6 +16,7 @@
|
||||||
#define MEDIAPIPE_CALCULATORS_TENSOR_IMAGE_TO_TENSOR_UTILS_H_
|
#define MEDIAPIPE_CALCULATORS_TENSOR_IMAGE_TO_TENSOR_UTILS_H_
|
||||||
|
|
||||||
#include <array>
|
#include <array>
|
||||||
|
#include <optional>
|
||||||
|
|
||||||
#include "absl/types/optional.h"
|
#include "absl/types/optional.h"
|
||||||
#include "mediapipe/calculators/tensor/image_to_tensor_calculator.pb.h"
|
#include "mediapipe/calculators/tensor/image_to_tensor_calculator.pb.h"
|
||||||
|
@ -51,8 +52,8 @@ enum class BorderMode { kZero, kReplicate };
|
||||||
// Struct that host commonly accessed parameters used in the
|
// Struct that host commonly accessed parameters used in the
|
||||||
// ImageTo[Batch]TensorCalculator.
|
// ImageTo[Batch]TensorCalculator.
|
||||||
struct OutputTensorParams {
|
struct OutputTensorParams {
|
||||||
int output_height;
|
std::optional<int> output_height;
|
||||||
int output_width;
|
std::optional<int> output_width;
|
||||||
int output_batch;
|
int output_batch;
|
||||||
bool is_float_output;
|
bool is_float_output;
|
||||||
float range_min;
|
float range_min;
|
||||||
|
@ -161,10 +162,14 @@ absl::Status ValidateOptionOutputDims(const T& options) {
|
||||||
<< "The maximum of the output int tensor range must be less than or "
|
<< "The maximum of the output int tensor range must be less than or "
|
||||||
"equal to 127.";
|
"equal to 127.";
|
||||||
}
|
}
|
||||||
|
if (options.has_output_tensor_width()) {
|
||||||
RET_CHECK_GT(options.output_tensor_width(), 0)
|
RET_CHECK_GT(options.output_tensor_width(), 0)
|
||||||
<< "Valid output tensor width is required.";
|
<< "Valid output tensor width is required.";
|
||||||
|
}
|
||||||
|
if (options.has_output_tensor_height()) {
|
||||||
RET_CHECK_GT(options.output_tensor_height(), 0)
|
RET_CHECK_GT(options.output_tensor_height(), 0)
|
||||||
<< "Valid output tensor height is required.";
|
<< "Valid output tensor height is required.";
|
||||||
|
}
|
||||||
return absl::OkStatus();
|
return absl::OkStatus();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -185,8 +190,12 @@ OutputTensorParams GetOutputTensorParams(const T& options) {
|
||||||
params.range_min = options.output_tensor_float_range().min();
|
params.range_min = options.output_tensor_float_range().min();
|
||||||
params.range_max = options.output_tensor_float_range().max();
|
params.range_max = options.output_tensor_float_range().max();
|
||||||
}
|
}
|
||||||
|
if (options.has_output_tensor_width()) {
|
||||||
params.output_width = options.output_tensor_width();
|
params.output_width = options.output_tensor_width();
|
||||||
|
}
|
||||||
|
if (options.has_output_tensor_height()) {
|
||||||
params.output_height = options.output_tensor_height();
|
params.output_height = options.output_tensor_height();
|
||||||
|
}
|
||||||
params.is_float_output = options.has_output_tensor_float_range();
|
params.is_float_output = options.has_output_tensor_float_range();
|
||||||
params.output_batch = 1;
|
params.output_batch = 1;
|
||||||
return params;
|
return params;
|
||||||
|
|
|
@ -14,6 +14,8 @@
|
||||||
|
|
||||||
#include "mediapipe/calculators/tensor/image_to_tensor_utils.h"
|
#include "mediapipe/calculators/tensor/image_to_tensor_utils.h"
|
||||||
|
|
||||||
|
#include <optional>
|
||||||
|
|
||||||
#include "mediapipe/framework/formats/rect.pb.h"
|
#include "mediapipe/framework/formats/rect.pb.h"
|
||||||
#include "mediapipe/framework/port/gtest.h"
|
#include "mediapipe/framework/port/gtest.h"
|
||||||
#include "mediapipe/framework/port/parse_text_proto.h"
|
#include "mediapipe/framework/port/parse_text_proto.h"
|
||||||
|
@ -172,6 +174,10 @@ constexpr char kValidIntProto[] = R"(
|
||||||
output_tensor_height: 200
|
output_tensor_height: 200
|
||||||
)";
|
)";
|
||||||
|
|
||||||
|
constexpr char kValidNoTensorDimsProto[] = R"(
|
||||||
|
output_tensor_float_range { min: 0 max: 255 }
|
||||||
|
)";
|
||||||
|
|
||||||
TEST(ValidateOptionOutputDims, ImageToTensorCalcOptions) {
|
TEST(ValidateOptionOutputDims, ImageToTensorCalcOptions) {
|
||||||
const auto float_options =
|
const auto float_options =
|
||||||
mediapipe::ParseTextProtoOrDie<mediapipe::ImageToTensorCalculatorOptions>(
|
mediapipe::ParseTextProtoOrDie<mediapipe::ImageToTensorCalculatorOptions>(
|
||||||
|
@ -193,13 +199,6 @@ TEST(ValidateOptionOutputDims, EmptyProto) {
|
||||||
ValidateOptionOutputDims(options),
|
ValidateOptionOutputDims(options),
|
||||||
StatusIs(absl::StatusCode::kInternal,
|
StatusIs(absl::StatusCode::kInternal,
|
||||||
HasSubstr("Valid output float tensor range is required")));
|
HasSubstr("Valid output float tensor range is required")));
|
||||||
|
|
||||||
// Output width/height is not set.
|
|
||||||
options.mutable_output_tensor_float_range()->set_min(0.0);
|
|
||||||
options.mutable_output_tensor_float_range()->set_max(1.0);
|
|
||||||
EXPECT_THAT(ValidateOptionOutputDims(options),
|
|
||||||
StatusIs(absl::StatusCode::kInternal,
|
|
||||||
HasSubstr("Valid output tensor width is required")));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST(GetOutputTensorParams, ImageToTensorCalcOptionsSetValues) {
|
TEST(GetOutputTensorParams, ImageToTensorCalcOptionsSetValues) {
|
||||||
|
@ -215,6 +214,20 @@ TEST(GetOutputTensorParams, ImageToTensorCalcOptionsSetValues) {
|
||||||
EXPECT_EQ(params2.output_height, 200);
|
EXPECT_EQ(params2.output_height, 200);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST(GetOutputTensorParams, ImageToTensorCalcOptionsNoTensorDims) {
|
||||||
|
// Test valid option for ImageToTensorCalculatorOptions without output
|
||||||
|
// width/height.
|
||||||
|
const auto options =
|
||||||
|
mediapipe::ParseTextProtoOrDie<mediapipe::ImageToTensorCalculatorOptions>(
|
||||||
|
kValidNoTensorDimsProto);
|
||||||
|
const auto params3 = GetOutputTensorParams(options);
|
||||||
|
EXPECT_EQ(params3.range_min, 0.0f);
|
||||||
|
EXPECT_EQ(params3.range_max, 255.0f);
|
||||||
|
EXPECT_EQ(params3.output_batch, 1);
|
||||||
|
EXPECT_EQ(params3.output_width, std::nullopt);
|
||||||
|
EXPECT_EQ(params3.output_height, std::nullopt);
|
||||||
|
}
|
||||||
|
|
||||||
TEST(GetBorderMode, GetBorderMode) {
|
TEST(GetBorderMode, GetBorderMode) {
|
||||||
// Default to REPLICATE.
|
// Default to REPLICATE.
|
||||||
auto border_mode =
|
auto border_mode =
|
||||||
|
|
Loading…
Reference in New Issue
Block a user