From 0ba35cf1a714af06bbc171252e27f91fa85661d2 Mon Sep 17 00:00:00 2001 From: MediaPipe Team Date: Thu, 2 Mar 2023 12:26:42 -0800 Subject: [PATCH] Internal change PiperOrigin-RevId: 513608516 --- mediapipe/calculators/image/BUILD | 6 + .../image/image_cropping_calculator.cc | 68 +++++--- .../image/image_cropping_calculator.h | 4 +- .../image/image_cropping_calculator_test.cc | 164 ++++++++++++++++++ 4 files changed, 219 insertions(+), 23 deletions(-) diff --git a/mediapipe/calculators/image/BUILD b/mediapipe/calculators/image/BUILD index 9aae8cfbc..18a1d60ae 100644 --- a/mediapipe/calculators/image/BUILD +++ b/mediapipe/calculators/image/BUILD @@ -301,12 +301,18 @@ cc_test( ":image_cropping_calculator", ":image_cropping_calculator_cc_proto", "//mediapipe/framework:calculator_framework", + "//mediapipe/framework:calculator_runner", + "//mediapipe/framework/formats:image_frame_opencv", "//mediapipe/framework/formats:rect_cc_proto", "//mediapipe/framework/port:gtest_main", + "//mediapipe/framework/port:opencv_core", + "//mediapipe/framework/port:opencv_imgproc", "//mediapipe/framework/port:parse_text_proto", "//mediapipe/framework/port:status", "//mediapipe/framework/tool:tag_map", "//mediapipe/framework/tool:tag_map_helper", + "//mediapipe/gpu:gpu_buffer_to_image_frame_calculator", + "//mediapipe/gpu:image_frame_to_gpu_buffer_calculator", ], ) diff --git a/mediapipe/calculators/image/image_cropping_calculator.cc b/mediapipe/calculators/image/image_cropping_calculator.cc index 1a2b2e5b0..6776da7c8 100644 --- a/mediapipe/calculators/image/image_cropping_calculator.cc +++ b/mediapipe/calculators/image/image_cropping_calculator.cc @@ -223,8 +223,8 @@ absl::Status ImageCroppingCalculator::RenderCpu(CalculatorContext* cc) { cv::Mat input_mat = formats::MatView(&input_img); RectSpec specs = GetCropSpecs(cc, input_img.Width(), input_img.Height()); - int target_width = specs.width, target_height = specs.height, - rect_center_x = specs.center_x, rect_center_y = specs.center_y; + int target_width = specs.width, target_height = specs.height; + float rect_center_x = specs.center_x, rect_center_y = specs.center_y; float rotation = specs.rotation; // Get border mode and value for OpenCV. @@ -244,19 +244,29 @@ absl::Status ImageCroppingCalculator::RenderCpu(CalculatorContext* cc) { output_width *= scale; output_height *= scale; - float dst_corners[8] = {0, - output_height - 1, - 0, - 0, - output_width - 1, - 0, - output_width - 1, - output_height - 1}; - cv::Mat dst_points = cv::Mat(4, 2, CV_32F, dst_corners); - cv::Mat projection_matrix = + float dst_corners[8] = { + 0, output_height, 0, 0, output_width, 0, output_width, output_height}; + const cv::Mat dst_points = cv::Mat(4, 2, CV_32F, dst_corners); + // The projection matrix is computed using the corners of rects, not the + // centers of corner pixels + const cv::Mat projection_matrix = cv::getPerspectiveTransform(src_points, dst_points); + // The projection matrix need to be adjusted because `cv::warpPerspective` is + // based on integer centers. + // clang-format off + double shift_src_vec[9] = {1.0, 0.0, 0.5, + 0.0, 1.0, 0.5, + 0.0, 0.0, 1.0}; + double shift_dst_vec[9] = {1.0, 0.0, -0.5, + 0.0, 1.0, -0.5, + 0.0, 0.0, 1.0}; + // clang-format on + const cv::Mat shift_src = cv::Mat(3, 3, CV_64F, shift_src_vec); + const cv::Mat shift_dst = cv::Mat(3, 3, CV_64F, shift_dst_vec); + const cv::Mat adjusted_projection_matrix = + shift_dst * projection_matrix * shift_src; cv::Mat cropped_image; - cv::warpPerspective(input_mat, cropped_image, projection_matrix, + cv::warpPerspective(input_mat, cropped_image, adjusted_projection_matrix, cv::Size(output_width, output_height), /* flags = */ 0, /* borderMode = */ border_mode); @@ -416,8 +426,8 @@ void ImageCroppingCalculator::GetOutputDimensions(CalculatorContext* cc, int* dst_width, int* dst_height) { RectSpec specs = GetCropSpecs(cc, src_width, src_height); - int crop_width = specs.width, crop_height = specs.height, - x_center = specs.center_x, y_center = specs.center_y; + int crop_width = specs.width, crop_height = specs.height; + float x_center = specs.center_x, y_center = specs.center_y; float rotation = specs.rotation; const float half_width = crop_width / 2.0f; @@ -466,8 +476,8 @@ RectSpec ImageCroppingCalculator::GetCropSpecs(const CalculatorContext* cc, int crop_width = src_width; int crop_height = src_height; // Get the center of cropping box. Default is the at the center. - int x_center = src_width / 2; - int y_center = src_height / 2; + float x_center = src_width / 2.0f; + float y_center = src_height / 2.0f; // Get the rotation of the cropping box. float rotation = 0.0f; // Get the normalized width and height if specified by the inputs or options. @@ -494,8 +504,8 @@ RectSpec ImageCroppingCalculator::GetCropSpecs(const CalculatorContext* cc, if (norm_rect.width() > 0.0 && norm_rect.height() > 0.0) { normalized_width = norm_rect.width(); normalized_height = norm_rect.height(); - x_center = std::round(norm_rect.x_center() * src_width); - y_center = std::round(norm_rect.y_center() * src_height); + x_center = norm_rect.x_center() * src_width; + y_center = norm_rect.y_center() * src_height; rotation = norm_rect.rotation(); } } else if (cc->Inputs().HasTag(kWidthTag) && @@ -521,14 +531,30 @@ RectSpec ImageCroppingCalculator::GetCropSpecs(const CalculatorContext* cc, // present from the inputs. if (!cc->Inputs().HasTag(kRectTag) && !cc->Inputs().HasTag(kNormRectTag)) { if (options.has_norm_center_x() && options.has_norm_center_y()) { - x_center = std::round(options.norm_center_x() * src_width); - y_center = std::round(options.norm_center_y() * src_height); + x_center = options.norm_center_x() * src_width; + y_center = options.norm_center_y() * src_height; } if (options.has_rotation()) { rotation = options.rotation(); } } + if (rotation == 0.0f) { + // Adjust the center to the closest integer when the crop size is + // even-number and to the closest half-integer when the crop size is + // odd-number. + if (crop_width % 2 == 0) { + x_center = std::round(x_center); + } else { + x_center = std::round(x_center + 0.5f) - 0.5f; + } + if (crop_height % 2 == 0) { + y_center = std::round(y_center); + } else { + y_center = std::round(y_center + 0.5f) - 0.5f; + } + } + return {crop_width, crop_height, x_center, y_center, rotation}; } diff --git a/mediapipe/calculators/image/image_cropping_calculator.h b/mediapipe/calculators/image/image_cropping_calculator.h index 39d99cc55..a371ee689 100644 --- a/mediapipe/calculators/image/image_cropping_calculator.h +++ b/mediapipe/calculators/image/image_cropping_calculator.h @@ -42,8 +42,8 @@ namespace mediapipe { struct RectSpec { int width; int height; - int center_x; - int center_y; + float center_x; + float center_y; float rotation; bool operator==(const RectSpec& rect) const { diff --git a/mediapipe/calculators/image/image_cropping_calculator_test.cc b/mediapipe/calculators/image/image_cropping_calculator_test.cc index 3c565282b..140d6b4e0 100644 --- a/mediapipe/calculators/image/image_cropping_calculator_test.cc +++ b/mediapipe/calculators/image/image_cropping_calculator_test.cc @@ -19,8 +19,12 @@ #include "mediapipe/calculators/image/image_cropping_calculator.pb.h" #include "mediapipe/framework/calculator_framework.h" +#include "mediapipe/framework/calculator_runner.h" +#include "mediapipe/framework/formats/image_frame_opencv.h" #include "mediapipe/framework/formats/rect.pb.h" #include "mediapipe/framework/port/gtest.h" +#include "mediapipe/framework/port/opencv_core_inc.h" +#include "mediapipe/framework/port/opencv_imgproc_inc.h" #include "mediapipe/framework/port/parse_text_proto.h" #include "mediapipe/framework/port/status_matchers.h" #include "mediapipe/framework/tool/tag_map.h" @@ -37,6 +41,166 @@ constexpr char kRectTag[] = "RECT"; constexpr char kHeightTag[] = "HEIGHT"; constexpr char kWidthTag[] = "WIDTH"; +std::unique_ptr GetInputFrame(int width, int height, + int channel) { + const int total_size = width * height * channel; + + auto image_format = channel == 4 ? mediapipe::ImageFormat::SRGBA + : mediapipe::ImageFormat::SRGB; + + auto input_frame = std::make_unique( + image_format, width, height, /*alignment_boundary =*/1); + for (int i = 0; i < total_size; ++i) { + input_frame->MutablePixelData()[i] = i % 256; + } + + return input_frame; +} + +// Test identity function, where cropping size is same as input size +TEST(ImageCroppingCalculatorTest, IdentityFunctionCropWithOriginalSize) { + auto calculator_node = + ParseTextProtoOrDie( + absl::Substitute( + R"pb( + calculator: "ImageCroppingCalculator" + input_stream: "IMAGE:input_frames" + output_stream: "IMAGE:cropped_output_frames" + options: { + [mediapipe.ImageCroppingCalculatorOptions.ext] { + width: $0 + height: $1 + } + } + )pb", + input_width, input_height)); + mediapipe::CalculatorRunner runner(calculator_node); + + // Input frame. + const auto input_frame = GetInputFrame(input_width, input_height, 3); + auto input_frame_packet = + mediapipe::MakePacket(std::move(*input_frame)); + runner.MutableInputs()->Tag("IMAGE").packets.push_back( + input_frame_packet.At(mediapipe::Timestamp(1))); + + MP_ASSERT_OK(runner.Run()); + + const auto& outputs = runner.Outputs(); + EXPECT_EQ(outputs.NumEntries(), 1); + const auto& output_image = + outputs.Tag("IMAGE").packets[0].Get(); + + const auto expected_output = GetInputFrame(input_width, input_height, 3); + cv::Mat output_mat = formats::MatView(&output_image); + cv::Mat expected_mat = formats::MatView(expected_output.get()); + double max_diff = cv::norm(expected_mat, output_mat, cv::NORM_INF); + EXPECT_EQ(max_diff, 0); +} // TEST + +// Test identity function, where cropping size is same as input size. +// When an image has an odd number for its size, its center falls on a +// fractional pixel. As a result, the values for center_x and center_y need to +// be of type float. +TEST(ImageCroppingCalculatorTest, IdentityFunctionCropWithOddSize) { + const int input_width = 99; + const int input_height = 99; + + auto calculator_node = + ParseTextProtoOrDie( + absl::Substitute( + R"pb( + calculator: "ImageCroppingCalculator" + input_stream: "IMAGE:input_frames" + output_stream: "IMAGE:cropped_output_frames" + options: { + [mediapipe.ImageCroppingCalculatorOptions.ext] { + width: $0 + height: $1 + } + } + )pb", + input_width, input_height)); + mediapipe::CalculatorRunner runner(calculator_node); + + // Input frame. + const auto input_frame = GetInputFrame(input_width, input_height, 3); + auto input_frame_packet = + mediapipe::MakePacket(std::move(*input_frame)); + runner.MutableInputs()->Tag("IMAGE").packets.push_back( + input_frame_packet.At(mediapipe::Timestamp(1))); + + MP_ASSERT_OK(runner.Run()); + + const auto& outputs = runner.Outputs(); + EXPECT_EQ(outputs.NumEntries(), 1); + const auto& output_image = + outputs.Tag("IMAGE").packets[0].Get(); + + const auto expected_output = GetInputFrame(input_width, input_height, 3); + cv::Mat output_mat = formats::MatView(&output_image); + cv::Mat expected_mat = formats::MatView(expected_output.get()); + double max_diff = cv::norm(expected_mat, output_mat, cv::NORM_INF); + EXPECT_EQ(max_diff, 0); +} // TEST + +// Test identity function on GPU, where cropping size is same as input size. +TEST(ImageCroppingCalculatorTest, IdentityFunctionCropWithOriginalSizeGPU) { + mediapipe::CalculatorGraphConfig config = + ParseTextProtoOrDie(absl::Substitute( + R"pb( + input_stream: "input_frames" + node { + calculator: "ImageFrameToGpuBufferCalculator" + input_stream: "input_frames" + output_stream: "input_frames_gpu" + } + node { + calculator: "ImageCroppingCalculator" + input_stream: "IMAGE_GPU:input_frames_gpu" + output_stream: "IMAGE_GPU:cropped_output_frames_gpu" + options: { + [mediapipe.ImageCroppingCalculatorOptions.ext] { + width: $0 + height: $1 + } + } + } + node { + calculator: "GpuBufferToImageFrameCalculator" + input_stream: "cropped_output_frames_gpu" + output_stream: "cropped_output_frames" + } + )pb", + input_width, input_height)); + + std::vector output_packets; + tool::AddVectorSink("cropped_output_frames", &config, &output_packets); + + CalculatorGraph graph; + MP_ASSERT_OK(graph.Initialize(config)); + + // Input frame. + const auto input_frame = GetInputFrame(input_width, input_height, 4); + auto input_frame_packet = + mediapipe::MakePacket(std::move(*input_frame)); + + MP_ASSERT_OK(graph.StartRun({})); + MP_ASSERT_OK(graph.AddPacketToInputStream( + "input_frames", input_frame_packet.At(mediapipe::Timestamp(1)))); + MP_ASSERT_OK(graph.WaitUntilIdle()); + + // Get and process results. + const ImageFrame& output_image = output_packets[0].Get(); + std::cout << output_image.Width(); + + const auto expected_output = GetInputFrame(input_width, input_height, 4); + cv::Mat output_mat = formats::MatView(&output_image); + cv::Mat expected_mat = formats::MatView(expected_output.get()); + double max_diff = cv::norm(expected_mat, output_mat, cv::NORM_INF); + + EXPECT_EQ(max_diff, 0); +} // TEST + // Test normal case, where norm_width and norm_height in options are set. TEST(ImageCroppingCalculatorTest, GetCroppingDimensionsNormal) { auto calculator_node =