Clean up TensorConverterCalculator flipping behavior

Returns an error if
- gpu_origin is specified for a CPU image, and
- gpu_origin and flip_vertically are both specified.
Adds a test for an IMAGE_GPU input to validate flipping.

PiperOrigin-RevId: 565311456
This commit is contained in:
MediaPipe Team 2023-09-14 02:53:07 -07:00 committed by Copybara-Service
parent 21646008d5
commit 124a4de08d
4 changed files with 165 additions and 94 deletions

View File

@ -660,7 +660,12 @@ cc_library(
"//mediapipe/gpu:gpu_buffer_format",
"//mediapipe/gpu:gpu_origin_cc_proto",
"//mediapipe/util:resource_util",
"@com_google_absl//absl/log",
"@com_google_absl//absl/log:absl_check",
"@com_google_absl//absl/log:absl_log",
"@com_google_absl//absl/log:check",
"@com_google_absl//absl/status",
"@com_google_absl//absl/status:statusor",
"@com_google_absl//absl/strings:str_format",
] + select({
"//mediapipe/gpu:disable_gpu": [],
@ -715,6 +720,7 @@ cc_test(
"//mediapipe/framework/port:parse_text_proto",
"//mediapipe/framework/tool:validate_type",
"@com_google_absl//absl/memory",
"@com_google_absl//absl/status",
"@com_google_absl//absl/strings",
],
)

View File

@ -17,6 +17,7 @@
#include <vector>
#include "absl/log/absl_check.h"
#include "absl/log/absl_log.h"
#include "absl/status/status.h"
#include "absl/status/statusor.h"
#include "absl/strings/str_format.h"
@ -57,11 +58,25 @@ int NumGroups(const int size, const int group_size) { // NOLINT
}
absl::StatusOr<bool> ShouldFlipVertically(
const mediapipe::TensorConverterCalculatorOptions& options) {
const mediapipe::TensorConverterCalculatorOptions& options, bool use_gpu) {
if (options.has_flip_vertically() && options.has_gpu_origin()) {
return absl::FailedPreconditionError(absl::StrFormat(
"Cannot specify both flip_vertically and gpu_origin options"));
}
if (!options.has_gpu_origin()) {
// Fall back to flip_vertically.
return options.flip_vertically();
}
// Warn if gpu_origin is specified with a CPU input image.
// Those are always TOP_LEFT, so no flipping is necessary.
if (!use_gpu) {
ABSL_LOG(WARNING)
<< "Ignoring gpu_origin option since IMAGE_GPU input is not specified";
return false;
}
switch (options.gpu_origin()) {
case mediapipe::GpuOrigin::TOP_LEFT:
return false;
@ -140,7 +155,7 @@ class TensorConverterCalculator : public CalculatorBase {
private:
absl::Status InitGpu(CalculatorContext* cc);
absl::Status LoadOptions(CalculatorContext* cc);
absl::Status LoadOptions(CalculatorContext* cc, bool use_gpu);
template <class T>
absl::Status NormalizeImage(const ImageFrame& image_frame,
bool flip_vertically, float* tensor_ptr);
@ -176,7 +191,8 @@ absl::Status TensorConverterCalculator::GetContract(CalculatorContract* cc) {
RET_CHECK(static_cast<int>(cc->Inputs().HasTag(kImageFrameTag)) +
static_cast<int>(cc->Inputs().HasTag(kGpuBufferTag)) +
static_cast<int>(cc->Inputs().HasTag(kMatrixTag)) ==
1);
1)
<< "Only one input tag of {IMAGE, IMAGE_GPU, MATRIX} may be specified";
if (cc->Inputs().HasTag(kImageFrameTag)) {
cc->Inputs().Tag(kImageFrameTag).Set<ImageFrame>();
@ -204,8 +220,6 @@ absl::Status TensorConverterCalculator::GetContract(CalculatorContract* cc) {
absl::Status TensorConverterCalculator::Open(CalculatorContext* cc) {
cc->SetOffset(TimestampDiff(0));
MP_RETURN_IF_ERROR(LoadOptions(cc));
#if !MEDIAPIPE_DISABLE_GPU
if (cc->Inputs().HasTag(kGpuBufferTag)) {
use_gpu_ = true;
@ -218,6 +232,8 @@ absl::Status TensorConverterCalculator::Open(CalculatorContext* cc) {
}
#endif // !MEDIAPIPE_DISABLE_GPU
MP_RETURN_IF_ERROR(LoadOptions(cc, use_gpu_));
return absl::OkStatus();
}
@ -436,7 +452,7 @@ absl::Status TensorConverterCalculator::InitGpu(CalculatorContext* cc) {
// Shader to convert GL Texture to Metal Buffer,
// with normalization to either: [0,1] or [-1,1].
const std::string shader_source = absl::Substitute(
R"(
R"glsl(
#include <metal_stdlib>
using namespace metal;
@ -455,7 +471,7 @@ absl::Status TensorConverterCalculator::InitGpu(CalculatorContext* cc) {
$3 // g & b channels
$4 // alpha channel
}
)",
)glsl",
/*$0=*/
output_range_.has_value()
? absl::Substitute("pixel = pixel * half($0) + half($1);",
@ -465,8 +481,8 @@ absl::Status TensorConverterCalculator::InitGpu(CalculatorContext* cc) {
/*$1=*/max_num_channels_,
/*$2=*/flip_vertically_ ? "(in_tex.get_height() - 1 - gid.y)" : "gid.y",
/*$3=*/
single_channel ? "" : R"(out_buf[linear_index + 1] = pixel.y;
out_buf[linear_index + 2] = pixel.z;)",
single_channel ? "" : R"glsl(out_buf[linear_index + 1] = pixel.y;
out_buf[linear_index + 2] = pixel.z;)glsl",
/*$4=*/include_alpha ? "out_buf[linear_index + 3] = pixel.w;" : "");
NSString* library_source =
@ -484,17 +500,17 @@ absl::Status TensorConverterCalculator::InitGpu(CalculatorContext* cc) {
RET_CHECK(to_buffer_program_ != nil) << "Couldn't create pipeline state " <<
[[error localizedDescription] UTF8String];
#elif MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_30
MP_RETURN_IF_ERROR(gpu_helper_.RunInGlContext([this, &include_alpha,
MP_RETURN_IF_ERROR(
gpu_helper_.RunInGlContext([this, &include_alpha,
#if MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_31
&input,
&input,
#endif // MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_31
&single_channel]()
-> absl::Status {
&single_channel]() -> absl::Status {
#if MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_31
// Shader to convert GL Texture to Shader Storage Buffer Object (SSBO),
// with normalization to either: [0,1] or [-1,1].
const std::string shader_source = absl::Substitute(
R"( #version 310 es
// Shader to convert GL Texture to Shader Storage Buffer Object (SSBO),
// with normalization to either: [0,1] or [-1,1].
const std::string shader_source = absl::Substitute(
R"glsl( #version 310 es
layout(local_size_x = $0, local_size_y = $0) in;
layout(binding = 0) uniform sampler2D input_texture;
layout(std430, binding = 1) buffer Output {float elements[];} output_data;
@ -508,38 +524,40 @@ absl::Status TensorConverterCalculator::InitGpu(CalculatorContext* cc) {
output_data.elements[linear_index + 0] = pixel.x; // r channel
$5 // g & b channels
$6 // alpha channel
})",
/*$0=*/kWorkgroupSize, /*$1=*/input.width(), /*$2=*/input.height(),
/*$3=*/
output_range_.has_value()
? absl::Substitute("pixel = pixel * float($0) + float($1);",
(output_range_->second - output_range_->first),
output_range_->first)
: "",
/*$4=*/flip_vertically_ ? "(width_height.y - 1 - gid.y)" : "gid.y",
/*$5=*/
single_channel ? ""
: R"(output_data.elements[linear_index + 1] = pixel.y;
output_data.elements[linear_index + 2] = pixel.z;)",
/*$6=*/
include_alpha ? "output_data.elements[linear_index + 3] = pixel.w;"
: "",
/*$7=*/max_num_channels_);
GLuint shader = glCreateShader(GL_COMPUTE_SHADER);
const GLchar* sources[] = {shader_source.c_str()};
glShaderSource(shader, 1, sources, NULL);
glCompileShader(shader);
GLint compiled = GL_FALSE;
glGetShaderiv(shader, GL_COMPILE_STATUS, &compiled);
RET_CHECK(compiled == GL_TRUE);
to_buffer_program_ = glCreateProgram();
glAttachShader(to_buffer_program_, shader);
glDeleteShader(shader);
glLinkProgram(to_buffer_program_);
})glsl",
/*$0=*/kWorkgroupSize, /*$1=*/input.width(), /*$2=*/input.height(),
/*$3=*/
output_range_.has_value()
? absl::Substitute(
"pixel = pixel * float($0) + float($1);",
(output_range_->second - output_range_->first),
output_range_->first)
: "",
/*$4=*/flip_vertically_ ? "(width_height.y - 1 - gid.y)" : "gid.y",
/*$5=*/
single_channel
? ""
: R"glsl(output_data.elements[linear_index + 1] = pixel.y;
output_data.elements[linear_index + 2] = pixel.z;)glsl",
/*$6=*/
include_alpha ? "output_data.elements[linear_index + 3] = pixel.w;"
: "",
/*$7=*/max_num_channels_);
GLuint shader = glCreateShader(GL_COMPUTE_SHADER);
const GLchar* sources[] = {shader_source.c_str()};
glShaderSource(shader, 1, sources, NULL);
glCompileShader(shader);
GLint compiled = GL_FALSE;
glGetShaderiv(shader, GL_COMPILE_STATUS, &compiled);
RET_CHECK(compiled == GL_TRUE);
to_buffer_program_ = glCreateProgram();
glAttachShader(to_buffer_program_, shader);
glDeleteShader(shader);
glLinkProgram(to_buffer_program_);
#else
// OpenGL ES 3.0 fragment shader Texture2d -> Texture2d conversion.
const std::string shader_source = absl::Substitute(
R"(
// OpenGL ES 3.0 fragment shader Texture2d -> Texture2d conversion.
const std::string shader_source = absl::Substitute(
R"glsl(
#if __VERSION__ < 130
#define in varying
#endif // __VERSION__ < 130
@ -565,49 +583,51 @@ absl::Status TensorConverterCalculator::InitGpu(CalculatorContext* cc) {
fragColor.r = pixel.r; // r channel
$3 // g & b channels
$4 // alpha channel
})",
/*$0=*/single_channel ? "vec1" : "vec4",
/*$1=*/
flip_vertically_
? "vec2(sample_coordinate.x, 1.0 - sample_coordinate.y);"
: "sample_coordinate;",
/*$2=*/output_range_.has_value()
? absl::Substitute("pixel = pixel * float($0) + float($1);",
(output_range_->second - output_range_->first),
output_range_->first)
: "",
/*$3=*/single_channel ? "" : R"(fragColor.g = pixel.g;
fragColor.b = pixel.b;)",
/*$4=*/
include_alpha ? "fragColor.a = pixel.a;"
: (single_channel ? "" : "fragColor.a = 1.0;"));
})glsl",
/*$0=*/single_channel ? "vec1" : "vec4",
/*$1=*/
flip_vertically_
? "vec2(sample_coordinate.x, 1.0 - sample_coordinate.y);"
: "sample_coordinate;",
/*$2=*/output_range_.has_value()
? absl::Substitute(
"pixel = pixel * float($0) + float($1);",
(output_range_->second - output_range_->first),
output_range_->first)
: "",
/*$3=*/single_channel ? "" : R"glsl(fragColor.g = pixel.g;
fragColor.b = pixel.b;)glsl",
/*$4=*/
include_alpha ? "fragColor.a = pixel.a;"
: (single_channel ? "" : "fragColor.a = 1.0;"));
const GLint attr_location[NUM_ATTRIBUTES] = {
ATTRIB_VERTEX,
ATTRIB_TEXTURE_POSITION,
};
const GLchar* attr_name[NUM_ATTRIBUTES] = {
"position",
"texture_coordinate",
};
// shader program and params
mediapipe::GlhCreateProgram(
mediapipe::kBasicVertexShader, shader_source.c_str(), NUM_ATTRIBUTES,
&attr_name[0], attr_location, &to_tex2d_program_);
RET_CHECK(to_tex2d_program_) << "Problem initializing the program.";
glUseProgram(to_tex2d_program_);
glUniform1i(glGetUniformLocation(to_tex2d_program_, "frame"), 1);
glGenFramebuffers(1, &framebuffer_);
const GLint attr_location[NUM_ATTRIBUTES] = {
ATTRIB_VERTEX,
ATTRIB_TEXTURE_POSITION,
};
const GLchar* attr_name[NUM_ATTRIBUTES] = {
"position",
"texture_coordinate",
};
// shader program and params
mediapipe::GlhCreateProgram(
mediapipe::kBasicVertexShader, shader_source.c_str(),
NUM_ATTRIBUTES, &attr_name[0], attr_location, &to_tex2d_program_);
RET_CHECK(to_tex2d_program_) << "Problem initializing the program.";
glUseProgram(to_tex2d_program_);
glUniform1i(glGetUniformLocation(to_tex2d_program_, "frame"), 1);
glGenFramebuffers(1, &framebuffer_);
#endif // MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_31
return absl::OkStatus();
}));
return absl::OkStatus();
}));
#endif // MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_30
#endif // !MEDIAPIPE_DISABLE_GPU
return absl::OkStatus();
}
absl::Status TensorConverterCalculator::LoadOptions(CalculatorContext* cc) {
absl::Status TensorConverterCalculator::LoadOptions(CalculatorContext* cc,
bool use_gpu) {
// Get calculator options specified in the graph.
const auto& options =
cc->Options<::mediapipe::TensorConverterCalculatorOptions>();
@ -635,7 +655,7 @@ absl::Status TensorConverterCalculator::LoadOptions(CalculatorContext* cc) {
}
// Get y-flip mode.
ASSIGN_OR_RETURN(flip_vertically_, ShouldFlipVertically(options));
ASSIGN_OR_RETURN(flip_vertically_, ShouldFlipVertically(options, use_gpu));
// Get row_major_matrix mode.
row_major_matrix_ = options.row_major_matrix();

View File

@ -44,12 +44,14 @@ message TensorConverterCalculatorOptions {
// with a coordinate system where the origin is at the bottom-left corner
// (e.g., in OpenGL) whereas the ML model expects an image with a top-left
// origin.
// Prefer gpu_origin over this field.
// Prefer gpu_origin over this field when using GPU input images.
optional bool flip_vertically = 2 [default = false];
// Determines when the input image should be flipped vertically.
// Determines when the input GPU image should be flipped vertically.
// See GpuOrigin.Mode for more information.
// Affects only IMAGE_GPU inputs.
// If unset, falls back to flip_vertically for backwards compatibility.
// Cannot set both gpu_origin and flip_vertically.
optional GpuOrigin.Mode gpu_origin = 10;
// Controls how many channels of the input image get passed through to the

View File

@ -12,6 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#include <cmath>
#include <cstdint>
#include <memory>
#include <random>
@ -19,6 +20,7 @@
#include <vector>
#include "absl/memory/memory.h"
#include "absl/status/status.h"
#include "absl/strings/substitute.h"
#include "mediapipe/framework/calculator_framework.h"
#include "mediapipe/framework/calculator_runner.h"
@ -45,6 +47,7 @@ constexpr char kTransposeOptionsString[] =
} // namespace
using RandomEngine = std::mt19937_64;
using ::testing::HasSubstr;
const uint32_t kSeed = 1234;
const int kNumSizes = 8;
const int sizes[kNumSizes][2] = {{1, 1}, {12, 1}, {1, 9}, {2, 2},
@ -57,7 +60,7 @@ class TensorConverterCalculatorTest : public ::testing::Test {
bool row_major_matrix = false) {
RandomEngine random(kSeed);
std::uniform_real_distribution<> uniform_dist(0, 1.0);
auto matrix = ::absl::make_unique<Matrix>();
auto matrix = std::make_unique<Matrix>();
matrix->resize(num_rows, num_columns);
if (row_major_matrix) {
for (int y = 0; y < num_rows; ++y) {
@ -105,7 +108,7 @@ TEST_F(TensorConverterCalculatorTest, RandomMatrixColMajor) {
tool::AddVectorSink("tensor", &graph_config, &output_packets);
// Run the graph.
graph_ = absl::make_unique<CalculatorGraph>();
graph_ = std::make_unique<CalculatorGraph>();
MP_ASSERT_OK(graph_->Initialize(graph_config));
MP_ASSERT_OK(graph_->StartRun({}));
@ -167,7 +170,7 @@ TEST_F(TensorConverterCalculatorTest, RandomMatrixRowMajor) {
tool::AddVectorSink("tensor", &graph_config, &output_packets);
// Run the graph.
graph_ = absl::make_unique<CalculatorGraph>();
graph_ = std::make_unique<CalculatorGraph>();
MP_ASSERT_OK(graph_->Initialize(graph_config));
MP_ASSERT_OK(graph_->StartRun({}));
@ -231,7 +234,7 @@ TEST_F(TensorConverterCalculatorTest, CustomDivAndSub) {
// Run the graph.
MP_ASSERT_OK(graph.Initialize(graph_config));
MP_ASSERT_OK(graph.StartRun({}));
auto input_image = absl::make_unique<ImageFrame>(ImageFormat::GRAY8, 1, 1);
auto input_image = std::make_unique<ImageFrame>(ImageFormat::GRAY8, 1, 1);
cv::Mat mat = mediapipe::formats::MatView(input_image.get());
mat.at<uint8_t>(0, 0) = 200;
MP_ASSERT_OK(graph.AddPacketToInputStream(
@ -285,7 +288,7 @@ TEST_F(TensorConverterCalculatorTest, SetOutputRange) {
// Run the graph.
MP_ASSERT_OK(graph.Initialize(graph_config));
MP_ASSERT_OK(graph.StartRun({}));
auto input_image = absl::make_unique<ImageFrame>(ImageFormat::GRAY8, 1, 1);
auto input_image = std::make_unique<ImageFrame>(ImageFormat::GRAY8, 1, 1);
cv::Mat mat = mediapipe::formats::MatView(input_image.get());
mat.at<uint8_t>(0, 0) = 200;
MP_ASSERT_OK(graph.AddPacketToInputStream(
@ -341,7 +344,7 @@ TEST_F(TensorConverterCalculatorTest, FlipVertically) {
// Run the graph.
MP_ASSERT_OK(graph.Initialize(graph_config));
MP_ASSERT_OK(graph.StartRun({}));
auto input_image = absl::make_unique<ImageFrame>(ImageFormat::GRAY8, 1, 2);
auto input_image = std::make_unique<ImageFrame>(ImageFormat::GRAY8, 1, 2);
cv::Mat mat = mediapipe::formats::MatView(input_image.get());
constexpr uint8_t kY0Value = 100;
constexpr uint8_t kY1Value = 200;
@ -372,7 +375,8 @@ TEST_F(TensorConverterCalculatorTest, FlipVertically) {
MP_ASSERT_OK(graph.WaitUntilDone());
}
TEST_F(TensorConverterCalculatorTest, GpuOriginOverridesFlipVertically) {
TEST_F(TensorConverterCalculatorTest,
CannotSpecifyBothFlipVerticallyAndGpuOrigin) {
CalculatorGraph graph;
CalculatorGraphConfig graph_config =
mediapipe::ParseTextProtoOrDie<CalculatorGraphConfig>(R"pb(
@ -396,7 +400,46 @@ TEST_F(TensorConverterCalculatorTest, GpuOriginOverridesFlipVertically) {
// Run the graph.
MP_ASSERT_OK(graph.Initialize(graph_config));
MP_ASSERT_OK(graph.StartRun({}));
auto input_image = absl::make_unique<ImageFrame>(ImageFormat::GRAY8, 1, 2);
auto input_image = std::make_unique<ImageFrame>(ImageFormat::GRAY8, 1, 1);
MP_ASSERT_OK(graph.AddPacketToInputStream(
"input_image", Adopt(input_image.release()).At(Timestamp(0))));
// Processing should fail as we specified both flip_vertically and gpu_origin.
absl::Status status = graph.WaitUntilIdle();
EXPECT_FALSE(status.ok());
EXPECT_THAT(status.message(), HasSubstr("flip_vertically and gpu_origin"));
EXPECT_EQ(output_packets.size(), 0);
// Fully close graph at end, otherwise calculator+tensors are destroyed
// after calling WaitUntilDone().
MP_ASSERT_OK(graph.CloseInputStream("input_image"));
EXPECT_FALSE(graph.WaitUntilDone().ok());
}
TEST_F(TensorConverterCalculatorTest, GpuOriginIsIgnoredWithCpuImage) {
CalculatorGraph graph;
CalculatorGraphConfig graph_config =
mediapipe::ParseTextProtoOrDie<CalculatorGraphConfig>(R"pb(
input_stream: "input_image"
node {
calculator: "TensorConverterCalculator"
input_stream: "IMAGE:input_image"
output_stream: "TENSORS:tensor"
options {
[mediapipe.TensorConverterCalculatorOptions.ext] {
gpu_origin: CONVENTIONAL
output_tensor_float_range { min: 0 max: 255 }
}
}
}
)pb");
std::vector<Packet> output_packets;
tool::AddVectorSink("tensor", &graph_config, &output_packets);
// Run the graph.
MP_ASSERT_OK(graph.Initialize(graph_config));
MP_ASSERT_OK(graph.StartRun({}));
auto input_image = std::make_unique<ImageFrame>(ImageFormat::GRAY8, 1, 2);
cv::Mat mat = mediapipe::formats::MatView(input_image.get());
constexpr uint8_t kY0Value = 100;
constexpr uint8_t kY1Value = 200;