Add a GpuOrigin parameter to TensorConverterCalculator
The parameter superseeds flip_vertically. GpuOrigin works more generally than flip_vertically because CONVENTIONAL works on both iOS (no flip) and Android (yes flip). If not set, the calculator falls back to flip_vertically for backwards compatibility. Note that web demos actually use TOP_LEFT image orientation, so they shouldn't be flipped, but they still are by CONVENTIONAL. That's being discussed right now. PiperOrigin-RevId: 553400525
This commit is contained in:
parent
9325af0af3
commit
a0b91e4062
|
@ -620,6 +620,7 @@ mediapipe_proto_library(
|
||||||
deps = [
|
deps = [
|
||||||
"//mediapipe/framework:calculator_options_proto",
|
"//mediapipe/framework:calculator_options_proto",
|
||||||
"//mediapipe/framework:calculator_proto",
|
"//mediapipe/framework:calculator_proto",
|
||||||
|
"//mediapipe/gpu:gpu_origin_proto",
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -649,7 +650,11 @@ cc_library(
|
||||||
"//mediapipe/framework/formats:matrix",
|
"//mediapipe/framework/formats:matrix",
|
||||||
"//mediapipe/framework/formats:tensor",
|
"//mediapipe/framework/formats:tensor",
|
||||||
"//mediapipe/framework/port:ret_check",
|
"//mediapipe/framework/port:ret_check",
|
||||||
|
"//mediapipe/framework/port:status",
|
||||||
|
"//mediapipe/framework/port:statusor",
|
||||||
|
"//mediapipe/gpu:gpu_origin_cc_proto",
|
||||||
"//mediapipe/util:resource_util",
|
"//mediapipe/util:resource_util",
|
||||||
|
"@com_google_absl//absl/strings:str_format",
|
||||||
] + select({
|
] + select({
|
||||||
"//mediapipe/gpu:disable_gpu": [],
|
"//mediapipe/gpu:disable_gpu": [],
|
||||||
"//conditions:default": ["tensor_converter_calculator_gpu_deps"],
|
"//conditions:default": ["tensor_converter_calculator_gpu_deps"],
|
||||||
|
|
|
@ -15,6 +15,9 @@
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
|
#include "absl/status/status.h"
|
||||||
|
#include "absl/status/statusor.h"
|
||||||
|
#include "absl/strings/str_format.h"
|
||||||
#include "mediapipe/calculators/tensor/tensor_converter_calculator.pb.h"
|
#include "mediapipe/calculators/tensor/tensor_converter_calculator.pb.h"
|
||||||
#include "mediapipe/framework/calculator_framework.h"
|
#include "mediapipe/framework/calculator_framework.h"
|
||||||
#include "mediapipe/framework/formats/image_frame.h"
|
#include "mediapipe/framework/formats/image_frame.h"
|
||||||
|
@ -22,7 +25,7 @@
|
||||||
#include "mediapipe/framework/formats/tensor.h"
|
#include "mediapipe/framework/formats/tensor.h"
|
||||||
#include "mediapipe/framework/port.h"
|
#include "mediapipe/framework/port.h"
|
||||||
#include "mediapipe/framework/port/ret_check.h"
|
#include "mediapipe/framework/port/ret_check.h"
|
||||||
#include "mediapipe/util/resource_util.h"
|
#include "mediapipe/gpu/gpu_origin.pb.h"
|
||||||
|
|
||||||
#if !MEDIAPIPE_DISABLE_GPU
|
#if !MEDIAPIPE_DISABLE_GPU
|
||||||
#include "mediapipe/gpu/gpu_buffer.h"
|
#include "mediapipe/gpu/gpu_buffer.h"
|
||||||
|
@ -43,12 +46,36 @@
|
||||||
#endif // !MEDIAPIPE_DISABLE_GPU
|
#endif // !MEDIAPIPE_DISABLE_GPU
|
||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
|
|
||||||
constexpr int kWorkgroupSize = 8; // Block size for GPU shader.
|
constexpr int kWorkgroupSize = 8; // Block size for GPU shader.
|
||||||
// Commonly used to compute the number of blocks to launch in a kernel.
|
// Commonly used to compute the number of blocks to launch in a kernel.
|
||||||
int NumGroups(const int size, const int group_size) { // NOLINT
|
int NumGroups(const int size, const int group_size) { // NOLINT
|
||||||
return (size + group_size - 1) / group_size;
|
return (size + group_size - 1) / group_size;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
absl::StatusOr<bool> ShouldFlipVertically(
|
||||||
|
const mediapipe::TensorConverterCalculatorOptions& options) {
|
||||||
|
if (!options.has_gpu_origin()) {
|
||||||
|
return options.flip_vertically();
|
||||||
|
}
|
||||||
|
|
||||||
|
switch (options.gpu_origin()) {
|
||||||
|
case mediapipe::GpuOrigin::TOP_LEFT:
|
||||||
|
return false;
|
||||||
|
case mediapipe::GpuOrigin::DEFAULT:
|
||||||
|
case mediapipe::GpuOrigin::CONVENTIONAL:
|
||||||
|
// TOP_LEFT on Metal, BOTTOM_LEFT on OpenGL.
|
||||||
|
#ifdef __APPLE__
|
||||||
|
return false;
|
||||||
|
#else
|
||||||
|
return true;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
return absl::InvalidArgumentError(
|
||||||
|
absl::StrFormat("Unhandled GPU origin %i", options.gpu_origin()));
|
||||||
|
}
|
||||||
|
|
||||||
typedef Eigen::Matrix<float, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>
|
typedef Eigen::Matrix<float, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>
|
||||||
RowMajorMatrixXf;
|
RowMajorMatrixXf;
|
||||||
typedef Eigen::Matrix<float, Eigen::Dynamic, Eigen::Dynamic, Eigen::ColMajor>
|
typedef Eigen::Matrix<float, Eigen::Dynamic, Eigen::Dynamic, Eigen::ColMajor>
|
||||||
|
@ -58,6 +85,7 @@ constexpr char kImageFrameTag[] = "IMAGE";
|
||||||
constexpr char kGpuBufferTag[] = "IMAGE_GPU";
|
constexpr char kGpuBufferTag[] = "IMAGE_GPU";
|
||||||
constexpr char kTensorsTag[] = "TENSORS";
|
constexpr char kTensorsTag[] = "TENSORS";
|
||||||
constexpr char kMatrixTag[] = "MATRIX";
|
constexpr char kMatrixTag[] = "MATRIX";
|
||||||
|
|
||||||
} // namespace
|
} // namespace
|
||||||
|
|
||||||
namespace mediapipe {
|
namespace mediapipe {
|
||||||
|
@ -593,7 +621,7 @@ absl::Status TensorConverterCalculator::LoadOptions(CalculatorContext* cc) {
|
||||||
}
|
}
|
||||||
|
|
||||||
// Get y-flip mode.
|
// Get y-flip mode.
|
||||||
flip_vertically_ = options.flip_vertically();
|
ASSIGN_OR_RETURN(flip_vertically_, ShouldFlipVertically(options));
|
||||||
|
|
||||||
// Get row_major_matrix mode.
|
// Get row_major_matrix mode.
|
||||||
row_major_matrix_ = options.row_major_matrix();
|
row_major_matrix_ = options.row_major_matrix();
|
||||||
|
|
|
@ -3,6 +3,7 @@ syntax = "proto2";
|
||||||
package mediapipe;
|
package mediapipe;
|
||||||
|
|
||||||
import "mediapipe/framework/calculator.proto";
|
import "mediapipe/framework/calculator.proto";
|
||||||
|
import "mediapipe/gpu/gpu_origin.proto";
|
||||||
|
|
||||||
// Full Example:
|
// Full Example:
|
||||||
//
|
//
|
||||||
|
@ -43,8 +44,14 @@ message TensorConverterCalculatorOptions {
|
||||||
// with a coordinate system where the origin is at the bottom-left corner
|
// with a coordinate system where the origin is at the bottom-left corner
|
||||||
// (e.g., in OpenGL) whereas the ML model expects an image with a top-left
|
// (e.g., in OpenGL) whereas the ML model expects an image with a top-left
|
||||||
// origin.
|
// origin.
|
||||||
|
// Prefer gpu_origin over this field.
|
||||||
optional bool flip_vertically = 2 [default = false];
|
optional bool flip_vertically = 2 [default = false];
|
||||||
|
|
||||||
|
// Determines when the input image should be flipped vertically.
|
||||||
|
// See GpuOrigin.Mode for more information.
|
||||||
|
// If unset, falls back to flip_vertically for backwards compatibility.
|
||||||
|
optional GpuOrigin.Mode gpu_origin = 10;
|
||||||
|
|
||||||
// Controls how many channels of the input image get passed through to the
|
// Controls how many channels of the input image get passed through to the
|
||||||
// tensor. Valid values are 1,3,4 only. Ignored for iOS GPU.
|
// tensor. Valid values are 1,3,4 only. Ignored for iOS GPU.
|
||||||
optional int32 max_num_channels = 3 [default = 3];
|
optional int32 max_num_channels = 3 [default = 3];
|
||||||
|
|
|
@ -259,8 +259,8 @@ TEST_F(TensorConverterCalculatorTest, SetOutputRange) {
|
||||||
for (std::pair<float, float> range : range_values) {
|
for (std::pair<float, float> range : range_values) {
|
||||||
CalculatorGraph graph;
|
CalculatorGraph graph;
|
||||||
CalculatorGraphConfig graph_config =
|
CalculatorGraphConfig graph_config =
|
||||||
mediapipe::ParseTextProtoOrDie<CalculatorGraphConfig>(
|
mediapipe::ParseTextProtoOrDie<CalculatorGraphConfig>(absl::Substitute(
|
||||||
absl::Substitute(R"(
|
R"pb(
|
||||||
input_stream: "input_image"
|
input_stream: "input_image"
|
||||||
node {
|
node {
|
||||||
calculator: "TensorConverterCalculator"
|
calculator: "TensorConverterCalculator"
|
||||||
|
@ -268,14 +268,11 @@ TEST_F(TensorConverterCalculatorTest, SetOutputRange) {
|
||||||
output_stream: "TENSORS:tensor"
|
output_stream: "TENSORS:tensor"
|
||||||
options {
|
options {
|
||||||
[mediapipe.TensorConverterCalculatorOptions.ext] {
|
[mediapipe.TensorConverterCalculatorOptions.ext] {
|
||||||
output_tensor_float_range {
|
output_tensor_float_range { min: $0 max: $1 }
|
||||||
min: $0
|
|
||||||
max: $1
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
)pb",
|
||||||
)",
|
|
||||||
/*$0=*/range.first,
|
/*$0=*/range.first,
|
||||||
/*$1=*/range.second));
|
/*$1=*/range.second));
|
||||||
std::vector<Packet> output_packets;
|
std::vector<Packet> output_packets;
|
||||||
|
@ -320,4 +317,113 @@ TEST_F(TensorConverterCalculatorTest, SetOutputRange) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST_F(TensorConverterCalculatorTest, FlipVertically) {
|
||||||
|
CalculatorGraph graph;
|
||||||
|
CalculatorGraphConfig graph_config =
|
||||||
|
mediapipe::ParseTextProtoOrDie<CalculatorGraphConfig>(R"pb(
|
||||||
|
input_stream: "input_image"
|
||||||
|
node {
|
||||||
|
calculator: "TensorConverterCalculator"
|
||||||
|
input_stream: "IMAGE:input_image"
|
||||||
|
output_stream: "TENSORS:tensor"
|
||||||
|
options {
|
||||||
|
[mediapipe.TensorConverterCalculatorOptions.ext] {
|
||||||
|
flip_vertically: true
|
||||||
|
output_tensor_float_range { min: 0 max: 255 }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
)pb");
|
||||||
|
std::vector<Packet> output_packets;
|
||||||
|
tool::AddVectorSink("tensor", &graph_config, &output_packets);
|
||||||
|
|
||||||
|
// Run the graph.
|
||||||
|
MP_ASSERT_OK(graph.Initialize(graph_config));
|
||||||
|
MP_ASSERT_OK(graph.StartRun({}));
|
||||||
|
auto input_image = absl::make_unique<ImageFrame>(ImageFormat::GRAY8, 1, 2);
|
||||||
|
cv::Mat mat = mediapipe::formats::MatView(input_image.get());
|
||||||
|
constexpr uint8_t kY0Value = 100;
|
||||||
|
constexpr uint8_t kY1Value = 200;
|
||||||
|
mat.at<uint8_t>(0, 0) = kY0Value;
|
||||||
|
mat.at<uint8_t>(1, 0) = kY1Value; // Note: y, x!
|
||||||
|
MP_ASSERT_OK(graph.AddPacketToInputStream(
|
||||||
|
"input_image", Adopt(input_image.release()).At(Timestamp(0))));
|
||||||
|
|
||||||
|
// Wait until the calculator finishes processing.
|
||||||
|
MP_ASSERT_OK(graph.WaitUntilIdle());
|
||||||
|
ASSERT_THAT(output_packets.size(), Eq(1));
|
||||||
|
|
||||||
|
// Get and process results.
|
||||||
|
const std::vector<Tensor>& tensor_vec =
|
||||||
|
output_packets[0].Get<std::vector<Tensor>>();
|
||||||
|
EXPECT_THAT(tensor_vec.size(), Eq(1));
|
||||||
|
|
||||||
|
const Tensor* tensor = &tensor_vec[0];
|
||||||
|
|
||||||
|
EXPECT_THAT(tensor->element_type(), Eq(Tensor::ElementType::kFloat32));
|
||||||
|
const float* dataf = tensor->GetCpuReadView().buffer<float>();
|
||||||
|
EXPECT_EQ(kY1Value, static_cast<int>(roundf(dataf[0]))); // Y0, Y1 flipped!
|
||||||
|
EXPECT_EQ(kY0Value, static_cast<int>(roundf(dataf[1])));
|
||||||
|
|
||||||
|
// Fully close graph at end, otherwise calculator+tensors are destroyed
|
||||||
|
// after calling WaitUntilDone().
|
||||||
|
MP_ASSERT_OK(graph.CloseInputStream("input_image"));
|
||||||
|
MP_ASSERT_OK(graph.WaitUntilDone());
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_F(TensorConverterCalculatorTest, GpuOriginOverridesFlipVertically) {
|
||||||
|
CalculatorGraph graph;
|
||||||
|
CalculatorGraphConfig graph_config =
|
||||||
|
mediapipe::ParseTextProtoOrDie<CalculatorGraphConfig>(R"pb(
|
||||||
|
input_stream: "input_image"
|
||||||
|
node {
|
||||||
|
calculator: "TensorConverterCalculator"
|
||||||
|
input_stream: "IMAGE:input_image"
|
||||||
|
output_stream: "TENSORS:tensor"
|
||||||
|
options {
|
||||||
|
[mediapipe.TensorConverterCalculatorOptions.ext] {
|
||||||
|
flip_vertically: true
|
||||||
|
gpu_origin: TOP_LEFT
|
||||||
|
output_tensor_float_range { min: 0 max: 255 }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
)pb");
|
||||||
|
std::vector<Packet> output_packets;
|
||||||
|
tool::AddVectorSink("tensor", &graph_config, &output_packets);
|
||||||
|
|
||||||
|
// Run the graph.
|
||||||
|
MP_ASSERT_OK(graph.Initialize(graph_config));
|
||||||
|
MP_ASSERT_OK(graph.StartRun({}));
|
||||||
|
auto input_image = absl::make_unique<ImageFrame>(ImageFormat::GRAY8, 1, 2);
|
||||||
|
cv::Mat mat = mediapipe::formats::MatView(input_image.get());
|
||||||
|
constexpr uint8_t kY0Value = 100;
|
||||||
|
constexpr uint8_t kY1Value = 200;
|
||||||
|
mat.at<uint8_t>(0, 0) = kY0Value;
|
||||||
|
mat.at<uint8_t>(1, 0) = kY1Value; // Note: y, x!
|
||||||
|
MP_ASSERT_OK(graph.AddPacketToInputStream(
|
||||||
|
"input_image", Adopt(input_image.release()).At(Timestamp(0))));
|
||||||
|
|
||||||
|
// Wait until the calculator finishes processing.
|
||||||
|
MP_ASSERT_OK(graph.WaitUntilIdle());
|
||||||
|
EXPECT_THAT(output_packets.size(), Eq(1));
|
||||||
|
|
||||||
|
// Get and process results.
|
||||||
|
const std::vector<Tensor>& tensor_vec =
|
||||||
|
output_packets[0].Get<std::vector<Tensor>>();
|
||||||
|
EXPECT_THAT(tensor_vec.size(), Eq(1));
|
||||||
|
|
||||||
|
const Tensor* tensor = &tensor_vec[0];
|
||||||
|
|
||||||
|
EXPECT_THAT(tensor->element_type(), Eq(Tensor::ElementType::kFloat32));
|
||||||
|
const float* dataf = tensor->GetCpuReadView().buffer<float>();
|
||||||
|
EXPECT_EQ(kY0Value, static_cast<int>(roundf(dataf[0]))); // Not flipped!
|
||||||
|
EXPECT_EQ(kY1Value, static_cast<int>(roundf(dataf[1])));
|
||||||
|
|
||||||
|
// Fully close graph at end, otherwise calculator+tensors are destroyed
|
||||||
|
// after calling WaitUntilDone().
|
||||||
|
MP_ASSERT_OK(graph.CloseInputStream("input_image"));
|
||||||
|
MP_ASSERT_OK(graph.WaitUntilDone());
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace mediapipe
|
} // namespace mediapipe
|
||||||
|
|
Loading…
Reference in New Issue
Block a user