Clean up TensorConverterCalculator flipping behavior
Returns an error if - gpu_origin is specified for a CPU image, and - gpu_origin and flip_vertically are both specified. Adds a test for an IMAGE_GPU input to validate flipping. PiperOrigin-RevId: 565311456
This commit is contained in:
parent
21646008d5
commit
124a4de08d
|
@ -660,7 +660,12 @@ cc_library(
|
|||
"//mediapipe/gpu:gpu_buffer_format",
|
||||
"//mediapipe/gpu:gpu_origin_cc_proto",
|
||||
"//mediapipe/util:resource_util",
|
||||
"@com_google_absl//absl/log",
|
||||
"@com_google_absl//absl/log:absl_check",
|
||||
"@com_google_absl//absl/log:absl_log",
|
||||
"@com_google_absl//absl/log:check",
|
||||
"@com_google_absl//absl/status",
|
||||
"@com_google_absl//absl/status:statusor",
|
||||
"@com_google_absl//absl/strings:str_format",
|
||||
] + select({
|
||||
"//mediapipe/gpu:disable_gpu": [],
|
||||
|
@ -715,6 +720,7 @@ cc_test(
|
|||
"//mediapipe/framework/port:parse_text_proto",
|
||||
"//mediapipe/framework/tool:validate_type",
|
||||
"@com_google_absl//absl/memory",
|
||||
"@com_google_absl//absl/status",
|
||||
"@com_google_absl//absl/strings",
|
||||
],
|
||||
)
|
||||
|
|
|
@ -17,6 +17,7 @@
|
|||
#include <vector>
|
||||
|
||||
#include "absl/log/absl_check.h"
|
||||
#include "absl/log/absl_log.h"
|
||||
#include "absl/status/status.h"
|
||||
#include "absl/status/statusor.h"
|
||||
#include "absl/strings/str_format.h"
|
||||
|
@ -57,11 +58,25 @@ int NumGroups(const int size, const int group_size) { // NOLINT
|
|||
}
|
||||
|
||||
absl::StatusOr<bool> ShouldFlipVertically(
|
||||
const mediapipe::TensorConverterCalculatorOptions& options) {
|
||||
const mediapipe::TensorConverterCalculatorOptions& options, bool use_gpu) {
|
||||
if (options.has_flip_vertically() && options.has_gpu_origin()) {
|
||||
return absl::FailedPreconditionError(absl::StrFormat(
|
||||
"Cannot specify both flip_vertically and gpu_origin options"));
|
||||
}
|
||||
|
||||
if (!options.has_gpu_origin()) {
|
||||
// Fall back to flip_vertically.
|
||||
return options.flip_vertically();
|
||||
}
|
||||
|
||||
// Warn if gpu_origin is specified with a CPU input image.
|
||||
// Those are always TOP_LEFT, so no flipping is necessary.
|
||||
if (!use_gpu) {
|
||||
ABSL_LOG(WARNING)
|
||||
<< "Ignoring gpu_origin option since IMAGE_GPU input is not specified";
|
||||
return false;
|
||||
}
|
||||
|
||||
switch (options.gpu_origin()) {
|
||||
case mediapipe::GpuOrigin::TOP_LEFT:
|
||||
return false;
|
||||
|
@ -140,7 +155,7 @@ class TensorConverterCalculator : public CalculatorBase {
|
|||
|
||||
private:
|
||||
absl::Status InitGpu(CalculatorContext* cc);
|
||||
absl::Status LoadOptions(CalculatorContext* cc);
|
||||
absl::Status LoadOptions(CalculatorContext* cc, bool use_gpu);
|
||||
template <class T>
|
||||
absl::Status NormalizeImage(const ImageFrame& image_frame,
|
||||
bool flip_vertically, float* tensor_ptr);
|
||||
|
@ -176,7 +191,8 @@ absl::Status TensorConverterCalculator::GetContract(CalculatorContract* cc) {
|
|||
RET_CHECK(static_cast<int>(cc->Inputs().HasTag(kImageFrameTag)) +
|
||||
static_cast<int>(cc->Inputs().HasTag(kGpuBufferTag)) +
|
||||
static_cast<int>(cc->Inputs().HasTag(kMatrixTag)) ==
|
||||
1);
|
||||
1)
|
||||
<< "Only one input tag of {IMAGE, IMAGE_GPU, MATRIX} may be specified";
|
||||
|
||||
if (cc->Inputs().HasTag(kImageFrameTag)) {
|
||||
cc->Inputs().Tag(kImageFrameTag).Set<ImageFrame>();
|
||||
|
@ -204,8 +220,6 @@ absl::Status TensorConverterCalculator::GetContract(CalculatorContract* cc) {
|
|||
absl::Status TensorConverterCalculator::Open(CalculatorContext* cc) {
|
||||
cc->SetOffset(TimestampDiff(0));
|
||||
|
||||
MP_RETURN_IF_ERROR(LoadOptions(cc));
|
||||
|
||||
#if !MEDIAPIPE_DISABLE_GPU
|
||||
if (cc->Inputs().HasTag(kGpuBufferTag)) {
|
||||
use_gpu_ = true;
|
||||
|
@ -218,6 +232,8 @@ absl::Status TensorConverterCalculator::Open(CalculatorContext* cc) {
|
|||
}
|
||||
#endif // !MEDIAPIPE_DISABLE_GPU
|
||||
|
||||
MP_RETURN_IF_ERROR(LoadOptions(cc, use_gpu_));
|
||||
|
||||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
|
@ -436,7 +452,7 @@ absl::Status TensorConverterCalculator::InitGpu(CalculatorContext* cc) {
|
|||
// Shader to convert GL Texture to Metal Buffer,
|
||||
// with normalization to either: [0,1] or [-1,1].
|
||||
const std::string shader_source = absl::Substitute(
|
||||
R"(
|
||||
R"glsl(
|
||||
#include <metal_stdlib>
|
||||
|
||||
using namespace metal;
|
||||
|
@ -455,7 +471,7 @@ absl::Status TensorConverterCalculator::InitGpu(CalculatorContext* cc) {
|
|||
$3 // g & b channels
|
||||
$4 // alpha channel
|
||||
}
|
||||
)",
|
||||
)glsl",
|
||||
/*$0=*/
|
||||
output_range_.has_value()
|
||||
? absl::Substitute("pixel = pixel * half($0) + half($1);",
|
||||
|
@ -465,8 +481,8 @@ absl::Status TensorConverterCalculator::InitGpu(CalculatorContext* cc) {
|
|||
/*$1=*/max_num_channels_,
|
||||
/*$2=*/flip_vertically_ ? "(in_tex.get_height() - 1 - gid.y)" : "gid.y",
|
||||
/*$3=*/
|
||||
single_channel ? "" : R"(out_buf[linear_index + 1] = pixel.y;
|
||||
out_buf[linear_index + 2] = pixel.z;)",
|
||||
single_channel ? "" : R"glsl(out_buf[linear_index + 1] = pixel.y;
|
||||
out_buf[linear_index + 2] = pixel.z;)glsl",
|
||||
/*$4=*/include_alpha ? "out_buf[linear_index + 3] = pixel.w;" : "");
|
||||
|
||||
NSString* library_source =
|
||||
|
@ -484,17 +500,17 @@ absl::Status TensorConverterCalculator::InitGpu(CalculatorContext* cc) {
|
|||
RET_CHECK(to_buffer_program_ != nil) << "Couldn't create pipeline state " <<
|
||||
[[error localizedDescription] UTF8String];
|
||||
#elif MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_30
|
||||
MP_RETURN_IF_ERROR(gpu_helper_.RunInGlContext([this, &include_alpha,
|
||||
MP_RETURN_IF_ERROR(
|
||||
gpu_helper_.RunInGlContext([this, &include_alpha,
|
||||
#if MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_31
|
||||
&input,
|
||||
#endif // MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_31
|
||||
&single_channel]()
|
||||
-> absl::Status {
|
||||
&single_channel]() -> absl::Status {
|
||||
#if MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_31
|
||||
// Shader to convert GL Texture to Shader Storage Buffer Object (SSBO),
|
||||
// with normalization to either: [0,1] or [-1,1].
|
||||
const std::string shader_source = absl::Substitute(
|
||||
R"( #version 310 es
|
||||
R"glsl( #version 310 es
|
||||
layout(local_size_x = $0, local_size_y = $0) in;
|
||||
layout(binding = 0) uniform sampler2D input_texture;
|
||||
layout(std430, binding = 1) buffer Output {float elements[];} output_data;
|
||||
|
@ -508,19 +524,21 @@ absl::Status TensorConverterCalculator::InitGpu(CalculatorContext* cc) {
|
|||
output_data.elements[linear_index + 0] = pixel.x; // r channel
|
||||
$5 // g & b channels
|
||||
$6 // alpha channel
|
||||
})",
|
||||
})glsl",
|
||||
/*$0=*/kWorkgroupSize, /*$1=*/input.width(), /*$2=*/input.height(),
|
||||
/*$3=*/
|
||||
output_range_.has_value()
|
||||
? absl::Substitute("pixel = pixel * float($0) + float($1);",
|
||||
? absl::Substitute(
|
||||
"pixel = pixel * float($0) + float($1);",
|
||||
(output_range_->second - output_range_->first),
|
||||
output_range_->first)
|
||||
: "",
|
||||
/*$4=*/flip_vertically_ ? "(width_height.y - 1 - gid.y)" : "gid.y",
|
||||
/*$5=*/
|
||||
single_channel ? ""
|
||||
: R"(output_data.elements[linear_index + 1] = pixel.y;
|
||||
output_data.elements[linear_index + 2] = pixel.z;)",
|
||||
single_channel
|
||||
? ""
|
||||
: R"glsl(output_data.elements[linear_index + 1] = pixel.y;
|
||||
output_data.elements[linear_index + 2] = pixel.z;)glsl",
|
||||
/*$6=*/
|
||||
include_alpha ? "output_data.elements[linear_index + 3] = pixel.w;"
|
||||
: "",
|
||||
|
@ -539,7 +557,7 @@ absl::Status TensorConverterCalculator::InitGpu(CalculatorContext* cc) {
|
|||
#else
|
||||
// OpenGL ES 3.0 fragment shader Texture2d -> Texture2d conversion.
|
||||
const std::string shader_source = absl::Substitute(
|
||||
R"(
|
||||
R"glsl(
|
||||
#if __VERSION__ < 130
|
||||
#define in varying
|
||||
#endif // __VERSION__ < 130
|
||||
|
@ -565,19 +583,20 @@ absl::Status TensorConverterCalculator::InitGpu(CalculatorContext* cc) {
|
|||
fragColor.r = pixel.r; // r channel
|
||||
$3 // g & b channels
|
||||
$4 // alpha channel
|
||||
})",
|
||||
})glsl",
|
||||
/*$0=*/single_channel ? "vec1" : "vec4",
|
||||
/*$1=*/
|
||||
flip_vertically_
|
||||
? "vec2(sample_coordinate.x, 1.0 - sample_coordinate.y);"
|
||||
: "sample_coordinate;",
|
||||
/*$2=*/output_range_.has_value()
|
||||
? absl::Substitute("pixel = pixel * float($0) + float($1);",
|
||||
? absl::Substitute(
|
||||
"pixel = pixel * float($0) + float($1);",
|
||||
(output_range_->second - output_range_->first),
|
||||
output_range_->first)
|
||||
: "",
|
||||
/*$3=*/single_channel ? "" : R"(fragColor.g = pixel.g;
|
||||
fragColor.b = pixel.b;)",
|
||||
/*$3=*/single_channel ? "" : R"glsl(fragColor.g = pixel.g;
|
||||
fragColor.b = pixel.b;)glsl",
|
||||
/*$4=*/
|
||||
include_alpha ? "fragColor.a = pixel.a;"
|
||||
: (single_channel ? "" : "fragColor.a = 1.0;"));
|
||||
|
@ -592,8 +611,8 @@ absl::Status TensorConverterCalculator::InitGpu(CalculatorContext* cc) {
|
|||
};
|
||||
// shader program and params
|
||||
mediapipe::GlhCreateProgram(
|
||||
mediapipe::kBasicVertexShader, shader_source.c_str(), NUM_ATTRIBUTES,
|
||||
&attr_name[0], attr_location, &to_tex2d_program_);
|
||||
mediapipe::kBasicVertexShader, shader_source.c_str(),
|
||||
NUM_ATTRIBUTES, &attr_name[0], attr_location, &to_tex2d_program_);
|
||||
RET_CHECK(to_tex2d_program_) << "Problem initializing the program.";
|
||||
glUseProgram(to_tex2d_program_);
|
||||
glUniform1i(glGetUniformLocation(to_tex2d_program_, "frame"), 1);
|
||||
|
@ -607,7 +626,8 @@ absl::Status TensorConverterCalculator::InitGpu(CalculatorContext* cc) {
|
|||
return absl::OkStatus();
|
||||
}
|
||||
|
||||
absl::Status TensorConverterCalculator::LoadOptions(CalculatorContext* cc) {
|
||||
absl::Status TensorConverterCalculator::LoadOptions(CalculatorContext* cc,
|
||||
bool use_gpu) {
|
||||
// Get calculator options specified in the graph.
|
||||
const auto& options =
|
||||
cc->Options<::mediapipe::TensorConverterCalculatorOptions>();
|
||||
|
@ -635,7 +655,7 @@ absl::Status TensorConverterCalculator::LoadOptions(CalculatorContext* cc) {
|
|||
}
|
||||
|
||||
// Get y-flip mode.
|
||||
ASSIGN_OR_RETURN(flip_vertically_, ShouldFlipVertically(options));
|
||||
ASSIGN_OR_RETURN(flip_vertically_, ShouldFlipVertically(options, use_gpu));
|
||||
|
||||
// Get row_major_matrix mode.
|
||||
row_major_matrix_ = options.row_major_matrix();
|
||||
|
|
|
@ -44,12 +44,14 @@ message TensorConverterCalculatorOptions {
|
|||
// with a coordinate system where the origin is at the bottom-left corner
|
||||
// (e.g., in OpenGL) whereas the ML model expects an image with a top-left
|
||||
// origin.
|
||||
// Prefer gpu_origin over this field.
|
||||
// Prefer gpu_origin over this field when using GPU input images.
|
||||
optional bool flip_vertically = 2 [default = false];
|
||||
|
||||
// Determines when the input image should be flipped vertically.
|
||||
// Determines when the input GPU image should be flipped vertically.
|
||||
// See GpuOrigin.Mode for more information.
|
||||
// Affects only IMAGE_GPU inputs.
|
||||
// If unset, falls back to flip_vertically for backwards compatibility.
|
||||
// Cannot set both gpu_origin and flip_vertically.
|
||||
optional GpuOrigin.Mode gpu_origin = 10;
|
||||
|
||||
// Controls how many channels of the input image get passed through to the
|
||||
|
|
|
@ -12,6 +12,7 @@
|
|||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include <cmath>
|
||||
#include <cstdint>
|
||||
#include <memory>
|
||||
#include <random>
|
||||
|
@ -19,6 +20,7 @@
|
|||
#include <vector>
|
||||
|
||||
#include "absl/memory/memory.h"
|
||||
#include "absl/status/status.h"
|
||||
#include "absl/strings/substitute.h"
|
||||
#include "mediapipe/framework/calculator_framework.h"
|
||||
#include "mediapipe/framework/calculator_runner.h"
|
||||
|
@ -45,6 +47,7 @@ constexpr char kTransposeOptionsString[] =
|
|||
} // namespace
|
||||
|
||||
using RandomEngine = std::mt19937_64;
|
||||
using ::testing::HasSubstr;
|
||||
const uint32_t kSeed = 1234;
|
||||
const int kNumSizes = 8;
|
||||
const int sizes[kNumSizes][2] = {{1, 1}, {12, 1}, {1, 9}, {2, 2},
|
||||
|
@ -57,7 +60,7 @@ class TensorConverterCalculatorTest : public ::testing::Test {
|
|||
bool row_major_matrix = false) {
|
||||
RandomEngine random(kSeed);
|
||||
std::uniform_real_distribution<> uniform_dist(0, 1.0);
|
||||
auto matrix = ::absl::make_unique<Matrix>();
|
||||
auto matrix = std::make_unique<Matrix>();
|
||||
matrix->resize(num_rows, num_columns);
|
||||
if (row_major_matrix) {
|
||||
for (int y = 0; y < num_rows; ++y) {
|
||||
|
@ -105,7 +108,7 @@ TEST_F(TensorConverterCalculatorTest, RandomMatrixColMajor) {
|
|||
tool::AddVectorSink("tensor", &graph_config, &output_packets);
|
||||
|
||||
// Run the graph.
|
||||
graph_ = absl::make_unique<CalculatorGraph>();
|
||||
graph_ = std::make_unique<CalculatorGraph>();
|
||||
MP_ASSERT_OK(graph_->Initialize(graph_config));
|
||||
MP_ASSERT_OK(graph_->StartRun({}));
|
||||
|
||||
|
@ -167,7 +170,7 @@ TEST_F(TensorConverterCalculatorTest, RandomMatrixRowMajor) {
|
|||
tool::AddVectorSink("tensor", &graph_config, &output_packets);
|
||||
|
||||
// Run the graph.
|
||||
graph_ = absl::make_unique<CalculatorGraph>();
|
||||
graph_ = std::make_unique<CalculatorGraph>();
|
||||
MP_ASSERT_OK(graph_->Initialize(graph_config));
|
||||
MP_ASSERT_OK(graph_->StartRun({}));
|
||||
|
||||
|
@ -231,7 +234,7 @@ TEST_F(TensorConverterCalculatorTest, CustomDivAndSub) {
|
|||
// Run the graph.
|
||||
MP_ASSERT_OK(graph.Initialize(graph_config));
|
||||
MP_ASSERT_OK(graph.StartRun({}));
|
||||
auto input_image = absl::make_unique<ImageFrame>(ImageFormat::GRAY8, 1, 1);
|
||||
auto input_image = std::make_unique<ImageFrame>(ImageFormat::GRAY8, 1, 1);
|
||||
cv::Mat mat = mediapipe::formats::MatView(input_image.get());
|
||||
mat.at<uint8_t>(0, 0) = 200;
|
||||
MP_ASSERT_OK(graph.AddPacketToInputStream(
|
||||
|
@ -285,7 +288,7 @@ TEST_F(TensorConverterCalculatorTest, SetOutputRange) {
|
|||
// Run the graph.
|
||||
MP_ASSERT_OK(graph.Initialize(graph_config));
|
||||
MP_ASSERT_OK(graph.StartRun({}));
|
||||
auto input_image = absl::make_unique<ImageFrame>(ImageFormat::GRAY8, 1, 1);
|
||||
auto input_image = std::make_unique<ImageFrame>(ImageFormat::GRAY8, 1, 1);
|
||||
cv::Mat mat = mediapipe::formats::MatView(input_image.get());
|
||||
mat.at<uint8_t>(0, 0) = 200;
|
||||
MP_ASSERT_OK(graph.AddPacketToInputStream(
|
||||
|
@ -341,7 +344,7 @@ TEST_F(TensorConverterCalculatorTest, FlipVertically) {
|
|||
// Run the graph.
|
||||
MP_ASSERT_OK(graph.Initialize(graph_config));
|
||||
MP_ASSERT_OK(graph.StartRun({}));
|
||||
auto input_image = absl::make_unique<ImageFrame>(ImageFormat::GRAY8, 1, 2);
|
||||
auto input_image = std::make_unique<ImageFrame>(ImageFormat::GRAY8, 1, 2);
|
||||
cv::Mat mat = mediapipe::formats::MatView(input_image.get());
|
||||
constexpr uint8_t kY0Value = 100;
|
||||
constexpr uint8_t kY1Value = 200;
|
||||
|
@ -372,7 +375,8 @@ TEST_F(TensorConverterCalculatorTest, FlipVertically) {
|
|||
MP_ASSERT_OK(graph.WaitUntilDone());
|
||||
}
|
||||
|
||||
TEST_F(TensorConverterCalculatorTest, GpuOriginOverridesFlipVertically) {
|
||||
TEST_F(TensorConverterCalculatorTest,
|
||||
CannotSpecifyBothFlipVerticallyAndGpuOrigin) {
|
||||
CalculatorGraph graph;
|
||||
CalculatorGraphConfig graph_config =
|
||||
mediapipe::ParseTextProtoOrDie<CalculatorGraphConfig>(R"pb(
|
||||
|
@ -396,7 +400,46 @@ TEST_F(TensorConverterCalculatorTest, GpuOriginOverridesFlipVertically) {
|
|||
// Run the graph.
|
||||
MP_ASSERT_OK(graph.Initialize(graph_config));
|
||||
MP_ASSERT_OK(graph.StartRun({}));
|
||||
auto input_image = absl::make_unique<ImageFrame>(ImageFormat::GRAY8, 1, 2);
|
||||
auto input_image = std::make_unique<ImageFrame>(ImageFormat::GRAY8, 1, 1);
|
||||
MP_ASSERT_OK(graph.AddPacketToInputStream(
|
||||
"input_image", Adopt(input_image.release()).At(Timestamp(0))));
|
||||
|
||||
// Processing should fail as we specified both flip_vertically and gpu_origin.
|
||||
absl::Status status = graph.WaitUntilIdle();
|
||||
EXPECT_FALSE(status.ok());
|
||||
EXPECT_THAT(status.message(), HasSubstr("flip_vertically and gpu_origin"));
|
||||
EXPECT_EQ(output_packets.size(), 0);
|
||||
|
||||
// Fully close graph at end, otherwise calculator+tensors are destroyed
|
||||
// after calling WaitUntilDone().
|
||||
MP_ASSERT_OK(graph.CloseInputStream("input_image"));
|
||||
EXPECT_FALSE(graph.WaitUntilDone().ok());
|
||||
}
|
||||
|
||||
TEST_F(TensorConverterCalculatorTest, GpuOriginIsIgnoredWithCpuImage) {
|
||||
CalculatorGraph graph;
|
||||
CalculatorGraphConfig graph_config =
|
||||
mediapipe::ParseTextProtoOrDie<CalculatorGraphConfig>(R"pb(
|
||||
input_stream: "input_image"
|
||||
node {
|
||||
calculator: "TensorConverterCalculator"
|
||||
input_stream: "IMAGE:input_image"
|
||||
output_stream: "TENSORS:tensor"
|
||||
options {
|
||||
[mediapipe.TensorConverterCalculatorOptions.ext] {
|
||||
gpu_origin: CONVENTIONAL
|
||||
output_tensor_float_range { min: 0 max: 255 }
|
||||
}
|
||||
}
|
||||
}
|
||||
)pb");
|
||||
std::vector<Packet> output_packets;
|
||||
tool::AddVectorSink("tensor", &graph_config, &output_packets);
|
||||
|
||||
// Run the graph.
|
||||
MP_ASSERT_OK(graph.Initialize(graph_config));
|
||||
MP_ASSERT_OK(graph.StartRun({}));
|
||||
auto input_image = std::make_unique<ImageFrame>(ImageFormat::GRAY8, 1, 2);
|
||||
cv::Mat mat = mediapipe::formats::MatView(input_image.get());
|
||||
constexpr uint8_t kY0Value = 100;
|
||||
constexpr uint8_t kY1Value = 200;
|
||||
|
|
Loading…
Reference in New Issue
Block a user