diff --git a/mediapipe/calculators/image/BUILD b/mediapipe/calculators/image/BUILD index e32e7165d..24d8e4fea 100644 --- a/mediapipe/calculators/image/BUILD +++ b/mediapipe/calculators/image/BUILD @@ -301,9 +301,11 @@ cc_test( "//mediapipe/framework/port:parse_text_proto", "//mediapipe/gpu:gpu_buffer_to_image_frame_calculator", "//mediapipe/gpu:image_frame_to_gpu_buffer_calculator", + "//mediapipe/gpu:multi_pool", "//third_party:opencv", "@com_google_absl//absl/container:flat_hash_set", "@com_google_absl//absl/flags:flag", + "@com_google_absl//absl/log:absl_check", "@com_google_absl//absl/strings", "@com_google_googletest//:gtest_main", ], diff --git a/mediapipe/calculators/image/image_transformation_calculator.cc b/mediapipe/calculators/image/image_transformation_calculator.cc index 8c6f715a0..15b534517 100644 --- a/mediapipe/calculators/image/image_transformation_calculator.cc +++ b/mediapipe/calculators/image/image_transformation_calculator.cc @@ -656,6 +656,15 @@ absl::Status ImageTransformationCalculator::RenderGpu(CalculatorContext* cc) { input.format()); gpu_helper_.BindFramebuffer(dst); + + if (scale_mode_ == mediapipe::ScaleMode::FIT) { + // In kFit scale mode, the rendered quad does not fill the whole + // framebuffer, so clear it beforehand. + glClearColor(padding_color_[0] / 255.0f, padding_color_[1] / 255.0f, + padding_color_[2] / 255.0f, 1.0f); + glClear(GL_COLOR_BUFFER_BIT); + } + glActiveTexture(GL_TEXTURE1); glBindTexture(src1.target(), src1.name()); diff --git a/mediapipe/calculators/image/image_transformation_calculator.proto b/mediapipe/calculators/image/image_transformation_calculator.proto index 0e2453a46..732f0c55e 100644 --- a/mediapipe/calculators/image/image_transformation_calculator.proto +++ b/mediapipe/calculators/image/image_transformation_calculator.proto @@ -46,13 +46,14 @@ message ImageTransformationCalculatorOptions { optional bool flip_horizontally = 5 [default = false]; // Scale mode. optional ScaleMode.Mode scale_mode = 6; - // Padding type. This option is only used when the scale mode is FIT. - // Default is to use BORDER_CONSTANT. If set to false, it will use - // BORDER_REPLICATE instead. + // Padding type. This option is only used when the scale mode is FIT. If set + // to true (default), a constant border is added with color specified by + // padding_color. If set to false, a border is added by replicating edge + // pixels (only supported for CPU). optional bool constant_padding = 7 [default = true]; // The color for the padding. This option is only used when the scale mode is - // FIT. Default is black. This is for CPU only. + // FIT. Default is black. optional Color padding_color = 8; // Interpolation method to use. Note that on CPU when LINEAR is specified, diff --git a/mediapipe/calculators/image/image_transformation_calculator_test.cc b/mediapipe/calculators/image/image_transformation_calculator_test.cc index 48828cc70..09b9b0076 100644 --- a/mediapipe/calculators/image/image_transformation_calculator_test.cc +++ b/mediapipe/calculators/image/image_transformation_calculator_test.cc @@ -1,9 +1,11 @@ +#include #include #include #include #include "absl/container/flat_hash_set.h" #include "absl/flags/flag.h" +#include "absl/log/absl_check.h" #include "absl/strings/substitute.h" #include "mediapipe/framework/calculator.pb.h" #include "mediapipe/framework/calculator_framework.h" @@ -16,10 +18,14 @@ #include "mediapipe/framework/port/opencv_imgcodecs_inc.h" #include "mediapipe/framework/port/opencv_imgproc_inc.h" #include "mediapipe/framework/port/parse_text_proto.h" +#include "mediapipe/gpu/multi_pool.h" #include "testing/base/public/gmock.h" #include "testing/base/public/googletest.h" +#include "testing/base/public/gunit.h" #include "third_party/OpenCV/core.hpp" // IWYU pragma: keep +#include "third_party/OpenCV/core/base.hpp" #include "third_party/OpenCV/core/mat.hpp" +#include "third_party/OpenCV/core/types.hpp" namespace mediapipe { @@ -76,11 +82,12 @@ TEST(ImageTransformationCalculatorTest, NearestNeighborResizing) { ->Tag("OUTPUT_DIMENSIONS") .packets.push_back(input_output_dim_packet.At(Timestamp(0))); - MP_ASSERT_OK(runner.Run()); + ABSL_QCHECK_OK(runner.Run()); const auto& outputs = runner.Outputs(); - ASSERT_EQ(outputs.NumEntries(), 1); + ABSL_QCHECK_EQ(outputs.NumEntries(), 1); const std::vector& packets = outputs.Tag("IMAGE").packets; - ASSERT_EQ(packets.size(), 1); + ABSL_QCHECK_EQ(packets.size(), 1); + const auto& result = packets[0].Get(); ASSERT_EQ(output_dim.first, result.Width()); ASSERT_EQ(output_dim.second, result.Height()); @@ -137,11 +144,12 @@ TEST(ImageTransformationCalculatorTest, ->Tag("OUTPUT_DIMENSIONS") .packets.push_back(input_output_dim_packet.At(Timestamp(0))); - MP_ASSERT_OK(runner.Run()); + ABSL_QCHECK_OK(runner.Run()); const auto& outputs = runner.Outputs(); - ASSERT_EQ(outputs.NumEntries(), 1); + ABSL_QCHECK_EQ(outputs.NumEntries(), 1); const std::vector& packets = outputs.Tag("IMAGE").packets; - ASSERT_EQ(packets.size(), 1); + ABSL_QCHECK_EQ(packets.size(), 1); + const auto& result = packets[0].Get(); ASSERT_EQ(output_dim.first, result.Width()); ASSERT_EQ(output_dim.second, result.Height()); @@ -207,17 +215,17 @@ TEST(ImageTransformationCalculatorTest, NearestNeighborResizingGpu) { tool::AddVectorSink("output_image", &graph_config, &output_image_packets); CalculatorGraph graph(graph_config); - MP_ASSERT_OK(graph.StartRun({})); + ABSL_QCHECK_OK(graph.StartRun({})); - MP_ASSERT_OK(graph.AddPacketToInputStream( + ABSL_QCHECK_OK(graph.AddPacketToInputStream( "input_image", MakePacket(std::move(input_image)).At(Timestamp(0)))); - MP_ASSERT_OK(graph.AddPacketToInputStream( + ABSL_QCHECK_OK(graph.AddPacketToInputStream( "image_size", MakePacket>(output_dim).At(Timestamp(0)))); - MP_ASSERT_OK(graph.WaitUntilIdle()); - ASSERT_THAT(output_image_packets, testing::SizeIs(1)); + ABSL_QCHECK_OK(graph.WaitUntilIdle()); + ABSL_QCHECK_EQ(output_image_packets.size(), 1); const auto& output_image = output_image_packets[0].Get(); ASSERT_EQ(output_dim.first, output_image.Width()); @@ -287,16 +295,16 @@ TEST(ImageTransformationCalculatorTest, tool::AddVectorSink("output_image", &graph_config, &output_image_packets); CalculatorGraph graph(graph_config); - MP_ASSERT_OK(graph.StartRun({})); + ABSL_QCHECK_OK(graph.StartRun({})); - MP_ASSERT_OK(graph.AddPacketToInputStream( + ABSL_QCHECK_OK(graph.AddPacketToInputStream( "input_image", input_image_packet.At(Timestamp(0)))); - MP_ASSERT_OK(graph.AddPacketToInputStream( + ABSL_QCHECK_OK(graph.AddPacketToInputStream( "image_size", MakePacket>(output_dim).At(Timestamp(0)))); - MP_ASSERT_OK(graph.WaitUntilIdle()); - ASSERT_THAT(output_image_packets, testing::SizeIs(1)); + ABSL_QCHECK_OK(graph.WaitUntilIdle()); + ABSL_QCHECK_EQ(output_image_packets.size(), 1); const auto& output_image = output_image_packets[0].Get(); ASSERT_EQ(output_dim.first, output_image.Width()); @@ -311,5 +319,112 @@ TEST(ImageTransformationCalculatorTest, } } +TEST(ImageTransformationCalculatorTest, FitScalingClearsBackground) { + // Regression test for not clearing the background in FIT scaling mode. + // First scale an all-red (=r) image from 8x4 to 8x4, so it's a plain copy: + // rrrrrrrr + // rrrrrrrr + // rrrrrrrr + // rrrrrrrr + // Then scale an all-blue image from 4x4 to 8x4 in FIT mode. This should + // introduce dark yellow (=y) letterboxes left and right due to padding_color: + // yybbbbyy + // yybbbbyy + // yybbbbyy + // yybbbbyy + // We make sure that the all-red buffer gets reused. Without clearing the + // background, the blue (=b) image will have red letterboxes: + // rrbbbbrr + // rrbbbbrr + // rrbbbbrr + // rrbbbbrr + + constexpr int kSmall = 4, kLarge = 8; + ImageFrame input_image_red(ImageFormat::SRGBA, kLarge, kSmall); + cv::Mat input_image_red_mat = formats::MatView(&input_image_red); + input_image_red_mat = cv::Scalar(255, 0, 0, 255); + + ImageFrame input_image_blue(ImageFormat::SRGBA, kSmall, kSmall); + cv::Mat input_image_blue_mat = formats::MatView(&input_image_blue); + input_image_blue_mat = cv::Scalar(0, 0, 255, 255); + + Packet input_image_red_packet = + MakePacket(std::move(input_image_red)); + Packet input_image_blue_packet = + MakePacket(std::move(input_image_blue)); + + CalculatorGraphConfig graph_config = + ParseTextProtoOrDie(absl::Substitute( + R"pb( + input_stream: "input_image" + output_stream: "output_image" + + node { + calculator: "ImageFrameToGpuBufferCalculator" + input_stream: "input_image" + output_stream: "input_image_gpu" + } + + node { + calculator: "ImageTransformationCalculator" + input_stream: "IMAGE_GPU:input_image_gpu" + output_stream: "IMAGE_GPU:output_image_gpu" + options: { + [mediapipe.ImageTransformationCalculatorOptions.ext]: { + scale_mode: FIT + output_width: $0, + output_height: $1, + padding_color: { red: 128, green: 128, blue: 0 } + } + } + } + + node { + calculator: "GpuBufferToImageFrameCalculator" + input_stream: "output_image_gpu" + output_stream: "output_image" + })pb", + kLarge, kSmall)); + + std::vector output_image_packets; + tool::AddVectorSink("output_image", &graph_config, &output_image_packets); + + CalculatorGraph graph(graph_config); + ABSL_QCHECK_OK(graph.StartRun({})); + + // Send the red image multiple times to cause the GPU pool to actually use + // a pool. + int num_red_packets = + std::max(kDefaultMultiPoolOptions.min_requests_before_pool, 1); + for (int n = 0; n < num_red_packets; ++n) { + ABSL_QCHECK_OK(graph.AddPacketToInputStream( + "input_image", input_image_red_packet.At(Timestamp(n)))); + } + ABSL_QCHECK_OK(graph.AddPacketToInputStream( + "input_image", input_image_blue_packet.At(Timestamp(num_red_packets)))); + + ABSL_QCHECK_OK(graph.WaitUntilIdle()); + ABSL_QCHECK_EQ(output_image_packets.size(), num_red_packets + 1); + + const auto& output_image_red = output_image_packets[0].Get(); + const auto& output_image_blue = + output_image_packets[num_red_packets].Get(); + + ABSL_QCHECK_EQ(output_image_red.Width(), kLarge); + ABSL_QCHECK_EQ(output_image_red.Height(), kSmall); + ABSL_QCHECK_EQ(output_image_blue.Width(), kLarge); + ABSL_QCHECK_EQ(output_image_blue.Height(), kSmall); + + cv::Mat output_image_blue_mat = formats::MatView(&output_image_blue); + ImageFrame expected_image_blue(ImageFormat::SRGBA, kLarge, kSmall); + cv::Mat expected_image_blue_mat = formats::MatView(&expected_image_blue); + expected_image_blue_mat = cv::Scalar(128, 128, 0, 255); + cv::Rect rect((kLarge - kSmall) / 2, 0, kSmall, kSmall); + cv::rectangle(expected_image_blue_mat, rect, cv::Scalar(0, 0, 255, 255), + cv::FILLED); + EXPECT_EQ(cv::sum(cv::sum(output_image_blue_mat != expected_image_blue_mat)), + cv::Scalar(0)); +} + } // namespace } // namespace mediapipe