From 124a4de08df42a1487406ebe4b6a721bd771e2d3 Mon Sep 17 00:00:00 2001 From: MediaPipe Team Date: Thu, 14 Sep 2023 02:53:07 -0700 Subject: [PATCH] Clean up TensorConverterCalculator flipping behavior Returns an error if - gpu_origin is specified for a CPU image, and - gpu_origin and flip_vertically are both specified. Adds a test for an IMAGE_GPU input to validate flipping. PiperOrigin-RevId: 565311456 --- mediapipe/calculators/tensor/BUILD | 6 + .../tensor/tensor_converter_calculator.cc | 188 ++++++++++-------- .../tensor/tensor_converter_calculator.proto | 6 +- .../tensor_converter_calculator_test.cc | 59 +++++- 4 files changed, 165 insertions(+), 94 deletions(-) diff --git a/mediapipe/calculators/tensor/BUILD b/mediapipe/calculators/tensor/BUILD index e74ecffe6..7ebba901c 100644 --- a/mediapipe/calculators/tensor/BUILD +++ b/mediapipe/calculators/tensor/BUILD @@ -660,7 +660,12 @@ cc_library( "//mediapipe/gpu:gpu_buffer_format", "//mediapipe/gpu:gpu_origin_cc_proto", "//mediapipe/util:resource_util", + "@com_google_absl//absl/log", "@com_google_absl//absl/log:absl_check", + "@com_google_absl//absl/log:absl_log", + "@com_google_absl//absl/log:check", + "@com_google_absl//absl/status", + "@com_google_absl//absl/status:statusor", "@com_google_absl//absl/strings:str_format", ] + select({ "//mediapipe/gpu:disable_gpu": [], @@ -715,6 +720,7 @@ cc_test( "//mediapipe/framework/port:parse_text_proto", "//mediapipe/framework/tool:validate_type", "@com_google_absl//absl/memory", + "@com_google_absl//absl/status", "@com_google_absl//absl/strings", ], ) diff --git a/mediapipe/calculators/tensor/tensor_converter_calculator.cc b/mediapipe/calculators/tensor/tensor_converter_calculator.cc index af5b71d3a..f8268f3d2 100644 --- a/mediapipe/calculators/tensor/tensor_converter_calculator.cc +++ b/mediapipe/calculators/tensor/tensor_converter_calculator.cc @@ -17,6 +17,7 @@ #include #include "absl/log/absl_check.h" +#include "absl/log/absl_log.h" #include "absl/status/status.h" #include "absl/status/statusor.h" #include "absl/strings/str_format.h" @@ -57,11 +58,25 @@ int NumGroups(const int size, const int group_size) { // NOLINT } absl::StatusOr ShouldFlipVertically( - const mediapipe::TensorConverterCalculatorOptions& options) { + const mediapipe::TensorConverterCalculatorOptions& options, bool use_gpu) { + if (options.has_flip_vertically() && options.has_gpu_origin()) { + return absl::FailedPreconditionError(absl::StrFormat( + "Cannot specify both flip_vertically and gpu_origin options")); + } + if (!options.has_gpu_origin()) { + // Fall back to flip_vertically. return options.flip_vertically(); } + // Warn if gpu_origin is specified with a CPU input image. + // Those are always TOP_LEFT, so no flipping is necessary. + if (!use_gpu) { + ABSL_LOG(WARNING) + << "Ignoring gpu_origin option since IMAGE_GPU input is not specified"; + return false; + } + switch (options.gpu_origin()) { case mediapipe::GpuOrigin::TOP_LEFT: return false; @@ -140,7 +155,7 @@ class TensorConverterCalculator : public CalculatorBase { private: absl::Status InitGpu(CalculatorContext* cc); - absl::Status LoadOptions(CalculatorContext* cc); + absl::Status LoadOptions(CalculatorContext* cc, bool use_gpu); template absl::Status NormalizeImage(const ImageFrame& image_frame, bool flip_vertically, float* tensor_ptr); @@ -176,7 +191,8 @@ absl::Status TensorConverterCalculator::GetContract(CalculatorContract* cc) { RET_CHECK(static_cast(cc->Inputs().HasTag(kImageFrameTag)) + static_cast(cc->Inputs().HasTag(kGpuBufferTag)) + static_cast(cc->Inputs().HasTag(kMatrixTag)) == - 1); + 1) + << "Only one input tag of {IMAGE, IMAGE_GPU, MATRIX} may be specified"; if (cc->Inputs().HasTag(kImageFrameTag)) { cc->Inputs().Tag(kImageFrameTag).Set(); @@ -204,8 +220,6 @@ absl::Status TensorConverterCalculator::GetContract(CalculatorContract* cc) { absl::Status TensorConverterCalculator::Open(CalculatorContext* cc) { cc->SetOffset(TimestampDiff(0)); - MP_RETURN_IF_ERROR(LoadOptions(cc)); - #if !MEDIAPIPE_DISABLE_GPU if (cc->Inputs().HasTag(kGpuBufferTag)) { use_gpu_ = true; @@ -218,6 +232,8 @@ absl::Status TensorConverterCalculator::Open(CalculatorContext* cc) { } #endif // !MEDIAPIPE_DISABLE_GPU + MP_RETURN_IF_ERROR(LoadOptions(cc, use_gpu_)); + return absl::OkStatus(); } @@ -436,7 +452,7 @@ absl::Status TensorConverterCalculator::InitGpu(CalculatorContext* cc) { // Shader to convert GL Texture to Metal Buffer, // with normalization to either: [0,1] or [-1,1]. const std::string shader_source = absl::Substitute( - R"( + R"glsl( #include using namespace metal; @@ -455,7 +471,7 @@ absl::Status TensorConverterCalculator::InitGpu(CalculatorContext* cc) { $3 // g & b channels $4 // alpha channel } - )", + )glsl", /*$0=*/ output_range_.has_value() ? absl::Substitute("pixel = pixel * half($0) + half($1);", @@ -465,8 +481,8 @@ absl::Status TensorConverterCalculator::InitGpu(CalculatorContext* cc) { /*$1=*/max_num_channels_, /*$2=*/flip_vertically_ ? "(in_tex.get_height() - 1 - gid.y)" : "gid.y", /*$3=*/ - single_channel ? "" : R"(out_buf[linear_index + 1] = pixel.y; - out_buf[linear_index + 2] = pixel.z;)", + single_channel ? "" : R"glsl(out_buf[linear_index + 1] = pixel.y; + out_buf[linear_index + 2] = pixel.z;)glsl", /*$4=*/include_alpha ? "out_buf[linear_index + 3] = pixel.w;" : ""); NSString* library_source = @@ -484,17 +500,17 @@ absl::Status TensorConverterCalculator::InitGpu(CalculatorContext* cc) { RET_CHECK(to_buffer_program_ != nil) << "Couldn't create pipeline state " << [[error localizedDescription] UTF8String]; #elif MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_30 - MP_RETURN_IF_ERROR(gpu_helper_.RunInGlContext([this, &include_alpha, + MP_RETURN_IF_ERROR( + gpu_helper_.RunInGlContext([this, &include_alpha, #if MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_31 - &input, + &input, #endif // MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_31 - &single_channel]() - -> absl::Status { + &single_channel]() -> absl::Status { #if MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_31 - // Shader to convert GL Texture to Shader Storage Buffer Object (SSBO), - // with normalization to either: [0,1] or [-1,1]. - const std::string shader_source = absl::Substitute( - R"( #version 310 es + // Shader to convert GL Texture to Shader Storage Buffer Object (SSBO), + // with normalization to either: [0,1] or [-1,1]. + const std::string shader_source = absl::Substitute( + R"glsl( #version 310 es layout(local_size_x = $0, local_size_y = $0) in; layout(binding = 0) uniform sampler2D input_texture; layout(std430, binding = 1) buffer Output {float elements[];} output_data; @@ -508,38 +524,40 @@ absl::Status TensorConverterCalculator::InitGpu(CalculatorContext* cc) { output_data.elements[linear_index + 0] = pixel.x; // r channel $5 // g & b channels $6 // alpha channel - })", - /*$0=*/kWorkgroupSize, /*$1=*/input.width(), /*$2=*/input.height(), - /*$3=*/ - output_range_.has_value() - ? absl::Substitute("pixel = pixel * float($0) + float($1);", - (output_range_->second - output_range_->first), - output_range_->first) - : "", - /*$4=*/flip_vertically_ ? "(width_height.y - 1 - gid.y)" : "gid.y", - /*$5=*/ - single_channel ? "" - : R"(output_data.elements[linear_index + 1] = pixel.y; - output_data.elements[linear_index + 2] = pixel.z;)", - /*$6=*/ - include_alpha ? "output_data.elements[linear_index + 3] = pixel.w;" - : "", - /*$7=*/max_num_channels_); - GLuint shader = glCreateShader(GL_COMPUTE_SHADER); - const GLchar* sources[] = {shader_source.c_str()}; - glShaderSource(shader, 1, sources, NULL); - glCompileShader(shader); - GLint compiled = GL_FALSE; - glGetShaderiv(shader, GL_COMPILE_STATUS, &compiled); - RET_CHECK(compiled == GL_TRUE); - to_buffer_program_ = glCreateProgram(); - glAttachShader(to_buffer_program_, shader); - glDeleteShader(shader); - glLinkProgram(to_buffer_program_); + })glsl", + /*$0=*/kWorkgroupSize, /*$1=*/input.width(), /*$2=*/input.height(), + /*$3=*/ + output_range_.has_value() + ? absl::Substitute( + "pixel = pixel * float($0) + float($1);", + (output_range_->second - output_range_->first), + output_range_->first) + : "", + /*$4=*/flip_vertically_ ? "(width_height.y - 1 - gid.y)" : "gid.y", + /*$5=*/ + single_channel + ? "" + : R"glsl(output_data.elements[linear_index + 1] = pixel.y; + output_data.elements[linear_index + 2] = pixel.z;)glsl", + /*$6=*/ + include_alpha ? "output_data.elements[linear_index + 3] = pixel.w;" + : "", + /*$7=*/max_num_channels_); + GLuint shader = glCreateShader(GL_COMPUTE_SHADER); + const GLchar* sources[] = {shader_source.c_str()}; + glShaderSource(shader, 1, sources, NULL); + glCompileShader(shader); + GLint compiled = GL_FALSE; + glGetShaderiv(shader, GL_COMPILE_STATUS, &compiled); + RET_CHECK(compiled == GL_TRUE); + to_buffer_program_ = glCreateProgram(); + glAttachShader(to_buffer_program_, shader); + glDeleteShader(shader); + glLinkProgram(to_buffer_program_); #else - // OpenGL ES 3.0 fragment shader Texture2d -> Texture2d conversion. - const std::string shader_source = absl::Substitute( - R"( + // OpenGL ES 3.0 fragment shader Texture2d -> Texture2d conversion. + const std::string shader_source = absl::Substitute( + R"glsl( #if __VERSION__ < 130 #define in varying #endif // __VERSION__ < 130 @@ -565,49 +583,51 @@ absl::Status TensorConverterCalculator::InitGpu(CalculatorContext* cc) { fragColor.r = pixel.r; // r channel $3 // g & b channels $4 // alpha channel - })", - /*$0=*/single_channel ? "vec1" : "vec4", - /*$1=*/ - flip_vertically_ - ? "vec2(sample_coordinate.x, 1.0 - sample_coordinate.y);" - : "sample_coordinate;", - /*$2=*/output_range_.has_value() - ? absl::Substitute("pixel = pixel * float($0) + float($1);", - (output_range_->second - output_range_->first), - output_range_->first) - : "", - /*$3=*/single_channel ? "" : R"(fragColor.g = pixel.g; - fragColor.b = pixel.b;)", - /*$4=*/ - include_alpha ? "fragColor.a = pixel.a;" - : (single_channel ? "" : "fragColor.a = 1.0;")); + })glsl", + /*$0=*/single_channel ? "vec1" : "vec4", + /*$1=*/ + flip_vertically_ + ? "vec2(sample_coordinate.x, 1.0 - sample_coordinate.y);" + : "sample_coordinate;", + /*$2=*/output_range_.has_value() + ? absl::Substitute( + "pixel = pixel * float($0) + float($1);", + (output_range_->second - output_range_->first), + output_range_->first) + : "", + /*$3=*/single_channel ? "" : R"glsl(fragColor.g = pixel.g; + fragColor.b = pixel.b;)glsl", + /*$4=*/ + include_alpha ? "fragColor.a = pixel.a;" + : (single_channel ? "" : "fragColor.a = 1.0;")); - const GLint attr_location[NUM_ATTRIBUTES] = { - ATTRIB_VERTEX, - ATTRIB_TEXTURE_POSITION, - }; - const GLchar* attr_name[NUM_ATTRIBUTES] = { - "position", - "texture_coordinate", - }; - // shader program and params - mediapipe::GlhCreateProgram( - mediapipe::kBasicVertexShader, shader_source.c_str(), NUM_ATTRIBUTES, - &attr_name[0], attr_location, &to_tex2d_program_); - RET_CHECK(to_tex2d_program_) << "Problem initializing the program."; - glUseProgram(to_tex2d_program_); - glUniform1i(glGetUniformLocation(to_tex2d_program_, "frame"), 1); - glGenFramebuffers(1, &framebuffer_); + const GLint attr_location[NUM_ATTRIBUTES] = { + ATTRIB_VERTEX, + ATTRIB_TEXTURE_POSITION, + }; + const GLchar* attr_name[NUM_ATTRIBUTES] = { + "position", + "texture_coordinate", + }; + // shader program and params + mediapipe::GlhCreateProgram( + mediapipe::kBasicVertexShader, shader_source.c_str(), + NUM_ATTRIBUTES, &attr_name[0], attr_location, &to_tex2d_program_); + RET_CHECK(to_tex2d_program_) << "Problem initializing the program."; + glUseProgram(to_tex2d_program_); + glUniform1i(glGetUniformLocation(to_tex2d_program_, "frame"), 1); + glGenFramebuffers(1, &framebuffer_); #endif // MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_31 - return absl::OkStatus(); - })); + return absl::OkStatus(); + })); #endif // MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_30 #endif // !MEDIAPIPE_DISABLE_GPU return absl::OkStatus(); } -absl::Status TensorConverterCalculator::LoadOptions(CalculatorContext* cc) { +absl::Status TensorConverterCalculator::LoadOptions(CalculatorContext* cc, + bool use_gpu) { // Get calculator options specified in the graph. const auto& options = cc->Options<::mediapipe::TensorConverterCalculatorOptions>(); @@ -635,7 +655,7 @@ absl::Status TensorConverterCalculator::LoadOptions(CalculatorContext* cc) { } // Get y-flip mode. - ASSIGN_OR_RETURN(flip_vertically_, ShouldFlipVertically(options)); + ASSIGN_OR_RETURN(flip_vertically_, ShouldFlipVertically(options, use_gpu)); // Get row_major_matrix mode. row_major_matrix_ = options.row_major_matrix(); diff --git a/mediapipe/calculators/tensor/tensor_converter_calculator.proto b/mediapipe/calculators/tensor/tensor_converter_calculator.proto index 194dd417e..2c5e0be56 100644 --- a/mediapipe/calculators/tensor/tensor_converter_calculator.proto +++ b/mediapipe/calculators/tensor/tensor_converter_calculator.proto @@ -44,12 +44,14 @@ message TensorConverterCalculatorOptions { // with a coordinate system where the origin is at the bottom-left corner // (e.g., in OpenGL) whereas the ML model expects an image with a top-left // origin. - // Prefer gpu_origin over this field. + // Prefer gpu_origin over this field when using GPU input images. optional bool flip_vertically = 2 [default = false]; - // Determines when the input image should be flipped vertically. + // Determines when the input GPU image should be flipped vertically. // See GpuOrigin.Mode for more information. + // Affects only IMAGE_GPU inputs. // If unset, falls back to flip_vertically for backwards compatibility. + // Cannot set both gpu_origin and flip_vertically. optional GpuOrigin.Mode gpu_origin = 10; // Controls how many channels of the input image get passed through to the diff --git a/mediapipe/calculators/tensor/tensor_converter_calculator_test.cc b/mediapipe/calculators/tensor/tensor_converter_calculator_test.cc index b3df01522..0394daebd 100644 --- a/mediapipe/calculators/tensor/tensor_converter_calculator_test.cc +++ b/mediapipe/calculators/tensor/tensor_converter_calculator_test.cc @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include #include #include #include @@ -19,6 +20,7 @@ #include #include "absl/memory/memory.h" +#include "absl/status/status.h" #include "absl/strings/substitute.h" #include "mediapipe/framework/calculator_framework.h" #include "mediapipe/framework/calculator_runner.h" @@ -45,6 +47,7 @@ constexpr char kTransposeOptionsString[] = } // namespace using RandomEngine = std::mt19937_64; +using ::testing::HasSubstr; const uint32_t kSeed = 1234; const int kNumSizes = 8; const int sizes[kNumSizes][2] = {{1, 1}, {12, 1}, {1, 9}, {2, 2}, @@ -57,7 +60,7 @@ class TensorConverterCalculatorTest : public ::testing::Test { bool row_major_matrix = false) { RandomEngine random(kSeed); std::uniform_real_distribution<> uniform_dist(0, 1.0); - auto matrix = ::absl::make_unique(); + auto matrix = std::make_unique(); matrix->resize(num_rows, num_columns); if (row_major_matrix) { for (int y = 0; y < num_rows; ++y) { @@ -105,7 +108,7 @@ TEST_F(TensorConverterCalculatorTest, RandomMatrixColMajor) { tool::AddVectorSink("tensor", &graph_config, &output_packets); // Run the graph. - graph_ = absl::make_unique(); + graph_ = std::make_unique(); MP_ASSERT_OK(graph_->Initialize(graph_config)); MP_ASSERT_OK(graph_->StartRun({})); @@ -167,7 +170,7 @@ TEST_F(TensorConverterCalculatorTest, RandomMatrixRowMajor) { tool::AddVectorSink("tensor", &graph_config, &output_packets); // Run the graph. - graph_ = absl::make_unique(); + graph_ = std::make_unique(); MP_ASSERT_OK(graph_->Initialize(graph_config)); MP_ASSERT_OK(graph_->StartRun({})); @@ -231,7 +234,7 @@ TEST_F(TensorConverterCalculatorTest, CustomDivAndSub) { // Run the graph. MP_ASSERT_OK(graph.Initialize(graph_config)); MP_ASSERT_OK(graph.StartRun({})); - auto input_image = absl::make_unique(ImageFormat::GRAY8, 1, 1); + auto input_image = std::make_unique(ImageFormat::GRAY8, 1, 1); cv::Mat mat = mediapipe::formats::MatView(input_image.get()); mat.at(0, 0) = 200; MP_ASSERT_OK(graph.AddPacketToInputStream( @@ -285,7 +288,7 @@ TEST_F(TensorConverterCalculatorTest, SetOutputRange) { // Run the graph. MP_ASSERT_OK(graph.Initialize(graph_config)); MP_ASSERT_OK(graph.StartRun({})); - auto input_image = absl::make_unique(ImageFormat::GRAY8, 1, 1); + auto input_image = std::make_unique(ImageFormat::GRAY8, 1, 1); cv::Mat mat = mediapipe::formats::MatView(input_image.get()); mat.at(0, 0) = 200; MP_ASSERT_OK(graph.AddPacketToInputStream( @@ -341,7 +344,7 @@ TEST_F(TensorConverterCalculatorTest, FlipVertically) { // Run the graph. MP_ASSERT_OK(graph.Initialize(graph_config)); MP_ASSERT_OK(graph.StartRun({})); - auto input_image = absl::make_unique(ImageFormat::GRAY8, 1, 2); + auto input_image = std::make_unique(ImageFormat::GRAY8, 1, 2); cv::Mat mat = mediapipe::formats::MatView(input_image.get()); constexpr uint8_t kY0Value = 100; constexpr uint8_t kY1Value = 200; @@ -372,7 +375,8 @@ TEST_F(TensorConverterCalculatorTest, FlipVertically) { MP_ASSERT_OK(graph.WaitUntilDone()); } -TEST_F(TensorConverterCalculatorTest, GpuOriginOverridesFlipVertically) { +TEST_F(TensorConverterCalculatorTest, + CannotSpecifyBothFlipVerticallyAndGpuOrigin) { CalculatorGraph graph; CalculatorGraphConfig graph_config = mediapipe::ParseTextProtoOrDie(R"pb( @@ -396,7 +400,46 @@ TEST_F(TensorConverterCalculatorTest, GpuOriginOverridesFlipVertically) { // Run the graph. MP_ASSERT_OK(graph.Initialize(graph_config)); MP_ASSERT_OK(graph.StartRun({})); - auto input_image = absl::make_unique(ImageFormat::GRAY8, 1, 2); + auto input_image = std::make_unique(ImageFormat::GRAY8, 1, 1); + MP_ASSERT_OK(graph.AddPacketToInputStream( + "input_image", Adopt(input_image.release()).At(Timestamp(0)))); + + // Processing should fail as we specified both flip_vertically and gpu_origin. + absl::Status status = graph.WaitUntilIdle(); + EXPECT_FALSE(status.ok()); + EXPECT_THAT(status.message(), HasSubstr("flip_vertically and gpu_origin")); + EXPECT_EQ(output_packets.size(), 0); + + // Fully close graph at end, otherwise calculator+tensors are destroyed + // after calling WaitUntilDone(). + MP_ASSERT_OK(graph.CloseInputStream("input_image")); + EXPECT_FALSE(graph.WaitUntilDone().ok()); +} + +TEST_F(TensorConverterCalculatorTest, GpuOriginIsIgnoredWithCpuImage) { + CalculatorGraph graph; + CalculatorGraphConfig graph_config = + mediapipe::ParseTextProtoOrDie(R"pb( + input_stream: "input_image" + node { + calculator: "TensorConverterCalculator" + input_stream: "IMAGE:input_image" + output_stream: "TENSORS:tensor" + options { + [mediapipe.TensorConverterCalculatorOptions.ext] { + gpu_origin: CONVENTIONAL + output_tensor_float_range { min: 0 max: 255 } + } + } + } + )pb"); + std::vector output_packets; + tool::AddVectorSink("tensor", &graph_config, &output_packets); + + // Run the graph. + MP_ASSERT_OK(graph.Initialize(graph_config)); + MP_ASSERT_OK(graph.StartRun({})); + auto input_image = std::make_unique(ImageFormat::GRAY8, 1, 2); cv::Mat mat = mediapipe::formats::MatView(input_image.get()); constexpr uint8_t kY0Value = 100; constexpr uint8_t kY1Value = 200;