diff --git a/mediapipe/calculators/tensor/BUILD b/mediapipe/calculators/tensor/BUILD index a3e61c063..46c0f6f3e 100644 --- a/mediapipe/calculators/tensor/BUILD +++ b/mediapipe/calculators/tensor/BUILD @@ -620,6 +620,7 @@ mediapipe_proto_library( deps = [ "//mediapipe/framework:calculator_options_proto", "//mediapipe/framework:calculator_proto", + "//mediapipe/gpu:gpu_origin_proto", ], ) @@ -649,7 +650,11 @@ cc_library( "//mediapipe/framework/formats:matrix", "//mediapipe/framework/formats:tensor", "//mediapipe/framework/port:ret_check", + "//mediapipe/framework/port:status", + "//mediapipe/framework/port:statusor", + "//mediapipe/gpu:gpu_origin_cc_proto", "//mediapipe/util:resource_util", + "@com_google_absl//absl/strings:str_format", ] + select({ "//mediapipe/gpu:disable_gpu": [], "//conditions:default": ["tensor_converter_calculator_gpu_deps"], diff --git a/mediapipe/calculators/tensor/tensor_converter_calculator.cc b/mediapipe/calculators/tensor/tensor_converter_calculator.cc index c1bd92968..56b0099cc 100644 --- a/mediapipe/calculators/tensor/tensor_converter_calculator.cc +++ b/mediapipe/calculators/tensor/tensor_converter_calculator.cc @@ -15,6 +15,9 @@ #include #include +#include "absl/status/status.h" +#include "absl/status/statusor.h" +#include "absl/strings/str_format.h" #include "mediapipe/calculators/tensor/tensor_converter_calculator.pb.h" #include "mediapipe/framework/calculator_framework.h" #include "mediapipe/framework/formats/image_frame.h" @@ -22,7 +25,7 @@ #include "mediapipe/framework/formats/tensor.h" #include "mediapipe/framework/port.h" #include "mediapipe/framework/port/ret_check.h" -#include "mediapipe/util/resource_util.h" +#include "mediapipe/gpu/gpu_origin.pb.h" #if !MEDIAPIPE_DISABLE_GPU #include "mediapipe/gpu/gpu_buffer.h" @@ -43,12 +46,36 @@ #endif // !MEDIAPIPE_DISABLE_GPU namespace { + constexpr int kWorkgroupSize = 8; // Block size for GPU shader. // Commonly used to compute the number of blocks to launch in a kernel. int NumGroups(const int size, const int group_size) { // NOLINT return (size + group_size - 1) / group_size; } +absl::StatusOr ShouldFlipVertically( + const mediapipe::TensorConverterCalculatorOptions& options) { + if (!options.has_gpu_origin()) { + return options.flip_vertically(); + } + + switch (options.gpu_origin()) { + case mediapipe::GpuOrigin::TOP_LEFT: + return false; + case mediapipe::GpuOrigin::DEFAULT: + case mediapipe::GpuOrigin::CONVENTIONAL: + // TOP_LEFT on Metal, BOTTOM_LEFT on OpenGL. +#ifdef __APPLE__ + return false; +#else + return true; +#endif + } + + return absl::InvalidArgumentError( + absl::StrFormat("Unhandled GPU origin %i", options.gpu_origin())); +} + typedef Eigen::Matrix RowMajorMatrixXf; typedef Eigen::Matrix @@ -58,6 +85,7 @@ constexpr char kImageFrameTag[] = "IMAGE"; constexpr char kGpuBufferTag[] = "IMAGE_GPU"; constexpr char kTensorsTag[] = "TENSORS"; constexpr char kMatrixTag[] = "MATRIX"; + } // namespace namespace mediapipe { @@ -593,7 +621,7 @@ absl::Status TensorConverterCalculator::LoadOptions(CalculatorContext* cc) { } // Get y-flip mode. - flip_vertically_ = options.flip_vertically(); + ASSIGN_OR_RETURN(flip_vertically_, ShouldFlipVertically(options)); // Get row_major_matrix mode. row_major_matrix_ = options.row_major_matrix(); diff --git a/mediapipe/calculators/tensor/tensor_converter_calculator.proto b/mediapipe/calculators/tensor/tensor_converter_calculator.proto index 97c2154a0..194dd417e 100644 --- a/mediapipe/calculators/tensor/tensor_converter_calculator.proto +++ b/mediapipe/calculators/tensor/tensor_converter_calculator.proto @@ -3,6 +3,7 @@ syntax = "proto2"; package mediapipe; import "mediapipe/framework/calculator.proto"; +import "mediapipe/gpu/gpu_origin.proto"; // Full Example: // @@ -43,8 +44,14 @@ message TensorConverterCalculatorOptions { // with a coordinate system where the origin is at the bottom-left corner // (e.g., in OpenGL) whereas the ML model expects an image with a top-left // origin. + // Prefer gpu_origin over this field. optional bool flip_vertically = 2 [default = false]; + // Determines when the input image should be flipped vertically. + // See GpuOrigin.Mode for more information. + // If unset, falls back to flip_vertically for backwards compatibility. + optional GpuOrigin.Mode gpu_origin = 10; + // Controls how many channels of the input image get passed through to the // tensor. Valid values are 1,3,4 only. Ignored for iOS GPU. optional int32 max_num_channels = 3 [default = 3]; diff --git a/mediapipe/calculators/tensor/tensor_converter_calculator_test.cc b/mediapipe/calculators/tensor/tensor_converter_calculator_test.cc index 2cfbd3d1e..172541bf0 100644 --- a/mediapipe/calculators/tensor/tensor_converter_calculator_test.cc +++ b/mediapipe/calculators/tensor/tensor_converter_calculator_test.cc @@ -259,25 +259,22 @@ TEST_F(TensorConverterCalculatorTest, SetOutputRange) { for (std::pair range : range_values) { CalculatorGraph graph; CalculatorGraphConfig graph_config = - mediapipe::ParseTextProtoOrDie( - absl::Substitute(R"( - input_stream: "input_image" - node { - calculator: "TensorConverterCalculator" - input_stream: "IMAGE:input_image" - output_stream: "TENSORS:tensor" - options { - [mediapipe.TensorConverterCalculatorOptions.ext] { - output_tensor_float_range { - min: $0 - max: $1 + mediapipe::ParseTextProtoOrDie(absl::Substitute( + R"pb( + input_stream: "input_image" + node { + calculator: "TensorConverterCalculator" + input_stream: "IMAGE:input_image" + output_stream: "TENSORS:tensor" + options { + [mediapipe.TensorConverterCalculatorOptions.ext] { + output_tensor_float_range { min: $0 max: $1 } + } + } } - } - } - } - )", - /*$0=*/range.first, - /*$1=*/range.second)); + )pb", + /*$0=*/range.first, + /*$1=*/range.second)); std::vector output_packets; tool::AddVectorSink("tensor", &graph_config, &output_packets); @@ -320,4 +317,113 @@ TEST_F(TensorConverterCalculatorTest, SetOutputRange) { } } +TEST_F(TensorConverterCalculatorTest, FlipVertically) { + CalculatorGraph graph; + CalculatorGraphConfig graph_config = + mediapipe::ParseTextProtoOrDie(R"pb( + input_stream: "input_image" + node { + calculator: "TensorConverterCalculator" + input_stream: "IMAGE:input_image" + output_stream: "TENSORS:tensor" + options { + [mediapipe.TensorConverterCalculatorOptions.ext] { + flip_vertically: true + output_tensor_float_range { min: 0 max: 255 } + } + } + } + )pb"); + std::vector output_packets; + tool::AddVectorSink("tensor", &graph_config, &output_packets); + + // Run the graph. + MP_ASSERT_OK(graph.Initialize(graph_config)); + MP_ASSERT_OK(graph.StartRun({})); + auto input_image = absl::make_unique(ImageFormat::GRAY8, 1, 2); + cv::Mat mat = mediapipe::formats::MatView(input_image.get()); + constexpr uint8_t kY0Value = 100; + constexpr uint8_t kY1Value = 200; + mat.at(0, 0) = kY0Value; + mat.at(1, 0) = kY1Value; // Note: y, x! + MP_ASSERT_OK(graph.AddPacketToInputStream( + "input_image", Adopt(input_image.release()).At(Timestamp(0)))); + + // Wait until the calculator finishes processing. + MP_ASSERT_OK(graph.WaitUntilIdle()); + ASSERT_THAT(output_packets.size(), Eq(1)); + + // Get and process results. + const std::vector& tensor_vec = + output_packets[0].Get>(); + EXPECT_THAT(tensor_vec.size(), Eq(1)); + + const Tensor* tensor = &tensor_vec[0]; + + EXPECT_THAT(tensor->element_type(), Eq(Tensor::ElementType::kFloat32)); + const float* dataf = tensor->GetCpuReadView().buffer(); + EXPECT_EQ(kY1Value, static_cast(roundf(dataf[0]))); // Y0, Y1 flipped! + EXPECT_EQ(kY0Value, static_cast(roundf(dataf[1]))); + + // Fully close graph at end, otherwise calculator+tensors are destroyed + // after calling WaitUntilDone(). + MP_ASSERT_OK(graph.CloseInputStream("input_image")); + MP_ASSERT_OK(graph.WaitUntilDone()); +} + +TEST_F(TensorConverterCalculatorTest, GpuOriginOverridesFlipVertically) { + CalculatorGraph graph; + CalculatorGraphConfig graph_config = + mediapipe::ParseTextProtoOrDie(R"pb( + input_stream: "input_image" + node { + calculator: "TensorConverterCalculator" + input_stream: "IMAGE:input_image" + output_stream: "TENSORS:tensor" + options { + [mediapipe.TensorConverterCalculatorOptions.ext] { + flip_vertically: true + gpu_origin: TOP_LEFT + output_tensor_float_range { min: 0 max: 255 } + } + } + } + )pb"); + std::vector output_packets; + tool::AddVectorSink("tensor", &graph_config, &output_packets); + + // Run the graph. + MP_ASSERT_OK(graph.Initialize(graph_config)); + MP_ASSERT_OK(graph.StartRun({})); + auto input_image = absl::make_unique(ImageFormat::GRAY8, 1, 2); + cv::Mat mat = mediapipe::formats::MatView(input_image.get()); + constexpr uint8_t kY0Value = 100; + constexpr uint8_t kY1Value = 200; + mat.at(0, 0) = kY0Value; + mat.at(1, 0) = kY1Value; // Note: y, x! + MP_ASSERT_OK(graph.AddPacketToInputStream( + "input_image", Adopt(input_image.release()).At(Timestamp(0)))); + + // Wait until the calculator finishes processing. + MP_ASSERT_OK(graph.WaitUntilIdle()); + EXPECT_THAT(output_packets.size(), Eq(1)); + + // Get and process results. + const std::vector& tensor_vec = + output_packets[0].Get>(); + EXPECT_THAT(tensor_vec.size(), Eq(1)); + + const Tensor* tensor = &tensor_vec[0]; + + EXPECT_THAT(tensor->element_type(), Eq(Tensor::ElementType::kFloat32)); + const float* dataf = tensor->GetCpuReadView().buffer(); + EXPECT_EQ(kY0Value, static_cast(roundf(dataf[0]))); // Not flipped! + EXPECT_EQ(kY1Value, static_cast(roundf(dataf[1]))); + + // Fully close graph at end, otherwise calculator+tensors are destroyed + // after calling WaitUntilDone(). + MP_ASSERT_OK(graph.CloseInputStream("input_image")); + MP_ASSERT_OK(graph.WaitUntilDone()); +} + } // namespace mediapipe