Internal change

PiperOrigin-RevId: 518814155
This commit is contained in:
MediaPipe Team 2023-03-23 03:31:45 -07:00 committed by Copybara-Service
parent 58fa1e2ec3
commit 5998e96eed
11 changed files with 413 additions and 0 deletions

View File

@ -22,6 +22,7 @@ cc_library(
deps = [
":buffer",
"//mediapipe/framework/formats:frame_buffer",
"//mediapipe/framework/formats:tensor",
"//mediapipe/framework/port:status",
"@com_google_absl//absl/status",
"@com_google_absl//absl/status:statusor",
@ -37,6 +38,7 @@ cc_test(
deps = [
":frame_buffer_util",
"//mediapipe/framework/formats:frame_buffer",
"//mediapipe/framework/formats:tensor",
"//mediapipe/framework/port:gtest_main",
"//mediapipe/framework/port:status",
],
@ -46,12 +48,14 @@ cc_library(
name = "buffer",
srcs = [
"buffer_common.cc",
"float_buffer.cc",
"gray_buffer.cc",
"rgb_buffer.cc",
"yuv_buffer.cc",
],
hdrs = [
"buffer_common.h",
"float_buffer.h",
"gray_buffer.h",
"rgb_buffer.h",
"yuv_buffer.h",
@ -61,6 +65,7 @@ cc_library(
"//mediapipe/util/frame_buffer/halide:gray_resize_halide",
"//mediapipe/util/frame_buffer/halide:gray_rotate_halide",
"//mediapipe/util/frame_buffer/halide:rgb_flip_halide",
"//mediapipe/util/frame_buffer/halide:rgb_float_halide",
"//mediapipe/util/frame_buffer/halide:rgb_gray_halide",
"//mediapipe/util/frame_buffer/halide:rgb_resize_halide",
"//mediapipe/util/frame_buffer/halide:rgb_rgb_halide",

View File

@ -0,0 +1,57 @@
// Copyright 2023 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "mediapipe/util/frame_buffer/float_buffer.h"
#include <memory>
namespace mediapipe {
namespace frame_buffer {
FloatBuffer::FloatBuffer(float* data, int width, int height, int channels)
: owned_buffer_(nullptr) {
Initialize(data, width, height, channels);
}
FloatBuffer::FloatBuffer(int width, int height, int channels) {
owned_buffer_ = std::make_unique<float[]>(FloatSize(width, height, channels));
Initialize(owned_buffer_.get(), width, height, channels);
}
FloatBuffer::FloatBuffer(const FloatBuffer& other) : buffer_(other.buffer_) {}
FloatBuffer::FloatBuffer(FloatBuffer&& other) { *this = std::move(other); }
FloatBuffer& FloatBuffer::operator=(const FloatBuffer& other) {
if (this != &other) {
buffer_ = other.buffer_;
}
return *this;
}
FloatBuffer& FloatBuffer::operator=(FloatBuffer&& other) {
if (this != &other) {
buffer_ = other.buffer_;
}
return *this;
}
FloatBuffer::~FloatBuffer() {}
void FloatBuffer::Initialize(float* data, int width, int height, int channels) {
buffer_ = Halide::Runtime::Buffer<float>::make_interleaved(data, width,
height, channels);
}
} // namespace frame_buffer
} // namespace mediapipe

View File

@ -0,0 +1,85 @@
// Copyright 2023 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef MEDIAPIPE_UTIL_FRAME_BUFFER_FLOAT_BUFFER_H_
#define MEDIAPIPE_UTIL_FRAME_BUFFER_FLOAT_BUFFER_H_
#include "HalideBuffer.h"
#include "HalideRuntime.h"
namespace mediapipe {
namespace frame_buffer {
// FloatBuffer represents a view over an interleaved floating-point image.
//
// FloatBuffers may be copied and moved efficiently; their backing buffers are
// shared and never deep copied.
//
// FloatBuffer requires a minimum image width depending on the natural vector
// size of the platform, e.g., 16px. This is not validated by FloatBuffer.
class FloatBuffer {
public:
// Returns the size (in number of float) of a FloatBuffer given dimensions.
static int FloatSize(int width, int height, int channels) {
return width * height * channels;
}
// Builds a FloatBuffer using the given backing buffer and dimensions.
FloatBuffer(float* data, int width, int height, int channels);
// Builds a FloatBuffer using the given dimensions.
//
// The underlying backing buffer if allocated and owned by this FloatBuffer.
FloatBuffer(int width, int height, int channels);
// FloatBuffer is copyable. The source retains ownership of its backing
// buffer.
FloatBuffer(const FloatBuffer& other);
// FloatBuffer is moveable. The source loses ownership of any backing buffers.
FloatBuffer(FloatBuffer&& other);
// FloatBuffer is assignable.
FloatBuffer& operator=(const FloatBuffer& other);
FloatBuffer& operator=(FloatBuffer&& other);
~FloatBuffer();
// Release ownership of the owned backing buffer.
float* Release() { return owned_buffer_.release(); }
// Returns the halide_buffer_t* for the image.
const halide_buffer_t* buffer() const { return buffer_.raw_buffer(); }
// Returns the halide_buffer_t* for the image.
halide_buffer_t* buffer() { return buffer_.raw_buffer(); }
// Returns the image width.
int width() const { return buffer_.dim(0).extent(); }
// Returns the image height.
int height() const { return buffer_.dim(1).extent(); }
// Returns the number of channels.
int channels() const { return buffer_.dim(2).extent(); }
private:
void Initialize(float* data, int width, int height, int channels);
// Non-NULL iff this FloatBuffer owns its backing buffer.
std::unique_ptr<float[]> owned_buffer_;
// Backing buffer: layout is always width x height x channel (interleaved).
Halide::Runtime::Buffer<float> buffer_;
};
} // namespace frame_buffer
} // namespace mediapipe
#endif // MEDIAPIPE_UTIL_FRAME_BUFFER_FLOAT_BUFFER_H_

View File

@ -22,7 +22,9 @@
#include "absl/status/statusor.h"
#include "absl/strings/str_format.h"
#include "mediapipe/framework/formats/frame_buffer.h"
#include "mediapipe/framework/formats/tensor.h"
#include "mediapipe/framework/port/status_macros.h"
#include "mediapipe/util/frame_buffer/float_buffer.h"
#include "mediapipe/util/frame_buffer/gray_buffer.h"
#include "mediapipe/util/frame_buffer/rgb_buffer.h"
#include "mediapipe/util/frame_buffer/yuv_buffer.h"
@ -50,6 +52,22 @@ bool IsSupportedYuvBuffer(const FrameBuffer& buffer) {
buffer.format() == FrameBuffer::Format::kYV21;
}
// Returns the number of channels for the provided buffer. Returns an error if
// the buffer is not using an interleaved single-planar format.
absl::StatusOr<int> NumberOfChannels(const FrameBuffer& buffer) {
switch (buffer.format()) {
case FrameBuffer::Format::kGRAY:
return kGrayChannel;
case FrameBuffer::Format::kRGB:
return kRgbChannels;
case FrameBuffer::Format::kRGBA:
return kRgbaChannels;
default:
return absl::InvalidArgumentError(
absl::StrFormat("Unsupported buffer format: %i.", buffer.format()));
}
}
// Shared validation functions.
//------------------------------------------------------------------------------
@ -216,6 +234,25 @@ absl::Status ValidateConvertFormats(FrameBuffer::Format from_format,
}
}
absl::Status ValidateFloatTensorInputs(const FrameBuffer& buffer,
const Tensor& tensor) {
if (tensor.element_type() != Tensor::ElementType::kFloat32) {
return absl::InvalidArgumentError(absl::StrFormat(
"Tensor type %i is not supported.", tensor.element_type()));
}
const auto& shape = tensor.shape();
if (shape.dims.size() != 4 || shape.dims[0] != 1) {
return absl::InvalidArgumentError("Expected tensor with batch size of 1.");
}
ASSIGN_OR_RETURN(int channels, NumberOfChannels(buffer));
if (shape.dims[2] != buffer.dimension().width ||
shape.dims[1] != buffer.dimension().height || shape.dims[3] != channels) {
return absl::InvalidArgumentError(
"Input buffer and output tensor must have the same dimensions.");
}
return absl::OkStatus();
}
// Construct buffer helper functions.
//------------------------------------------------------------------------------
@ -380,6 +417,19 @@ absl::Status RotateRgb(const FrameBuffer& buffer, int angle,
: absl::UnknownError("Halide rgb[a] rotate operation failed.");
}
absl::Status ToFloatTensorRgb(const FrameBuffer& buffer, float scale,
float offset, Tensor& tensor) {
ASSIGN_OR_RETURN(auto input, CreateRgbBuffer(buffer));
ASSIGN_OR_RETURN(int channels, NumberOfChannels(buffer));
auto view = tensor.GetCpuWriteView();
float* data = view.buffer<float>();
FloatBuffer output(data, buffer.dimension().width, buffer.dimension().height,
channels);
return input.ToFloat(scale, offset, &output)
? absl::OkStatus()
: absl::UnknownError("Halide rgb[a] to float conversion failed.");
}
// Yuv transformation functions.
//------------------------------------------------------------------------------
@ -717,6 +767,18 @@ absl::Status Convert(const FrameBuffer& buffer, FrameBuffer* output_buffer) {
}
}
absl::Status ToFloatTensor(const FrameBuffer& buffer, float scale, float offset,
Tensor& tensor) {
MP_RETURN_IF_ERROR(ValidateFloatTensorInputs(buffer, tensor));
switch (buffer.format()) {
case FrameBuffer::Format::kRGB:
return ToFloatTensorRgb(buffer, scale, offset, tensor);
default:
return absl::InvalidArgumentError(
absl::StrFormat("Format %i is not supported.", buffer.format()));
}
}
int GetFrameBufferByteSize(FrameBuffer::Dimension dimension,
FrameBuffer::Format format) {
switch (format) {

View File

@ -21,6 +21,7 @@
#include "absl/status/status.h"
#include "absl/status/statusor.h"
#include "mediapipe/framework/formats/frame_buffer.h"
#include "mediapipe/framework/formats/tensor.h"
namespace mediapipe {
namespace frame_buffer {
@ -98,6 +99,14 @@ absl::Status FlipVertically(const FrameBuffer& buffer,
// on the buffer and output_buffer dimensions.
absl::Status Convert(const FrameBuffer& buffer, FrameBuffer* output_buffer);
// Converts `buffer` into the provided float Tensor. Each value is converted to
// a float using:
// output = input * scale + offset
//
// Note that only interleaved single-planar formats support this operation.
absl::Status ToFloatTensor(const FrameBuffer& buffer, float scale, float offset,
Tensor& tensor);
// Miscellaneous Methods
// -----------------------------------------------------------------

View File

@ -19,6 +19,7 @@
#include <vector>
#include "mediapipe/framework/formats/frame_buffer.h"
#include "mediapipe/framework/formats/tensor.h"
#include "mediapipe/framework/port/gmock.h"
#include "mediapipe/framework/port/gtest.h"
#include "mediapipe/framework/port/status_macros.h"
@ -520,6 +521,27 @@ TEST(FrameBufferUtil, RgbaToRgbConversion) {
EXPECT_EQ(output_data[5], 100);
}
TEST(FrameBufferUtil, RgbToFloatTensor) {
constexpr FrameBuffer::Dimension kBufferDimension = {.width = 2, .height = 1};
constexpr float kScale = 0.1f, kOffset = 0.1f;
uint8_t data[] = {1, 2, 3, 4, 5, 6};
auto input = CreateFromRgbRawBuffer(data, kBufferDimension);
Tensor output(
Tensor::ElementType::kFloat32,
Tensor::Shape{1, kBufferDimension.height, kBufferDimension.width, 3});
MP_ASSERT_OK(ToFloatTensor(*input, kScale, kOffset, output));
auto view = output.GetCpuReadView();
const float* output_data = view.buffer<float>();
EXPECT_EQ(output_data[0], 0.2f);
EXPECT_EQ(output_data[1], 0.3f);
EXPECT_EQ(output_data[2], 0.4f);
EXPECT_EQ(output_data[3], 0.5f);
EXPECT_EQ(output_data[4], 0.6f);
EXPECT_EQ(output_data[5], 0.7f);
}
TEST(FrameBufferUtil, RgbaCrop) {
constexpr FrameBuffer::Dimension kBufferDimension = {.width = 3, .height = 2},
kOutputDimension = {.width = 1, .height = 1};

View File

@ -62,6 +62,12 @@ halide_library(
generator_name = "rgb_rgb_generator",
)
halide_library(
name = "rgb_float_halide",
srcs = ["rgb_float_generator.cc"],
generator_name = "rgb_float_generator",
)
# YUV operations:
halide_library(
name = "yuv_flip_halide",

View File

@ -0,0 +1,59 @@
// Copyright 2023 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "Halide.h"
namespace {
class RgbFloat : public Halide::Generator<RgbFloat> {
public:
Var x{"x"}, y{"y"}, c{"c"};
Input<Buffer<uint8_t, 3>> src_rgb{"src_rgb"};
Input<float> scale{"scale"};
Input<float> offset{"offset"};
Output<Buffer<float, 3>> dst_float{"dst_float"};
void generate();
void schedule();
};
void RgbFloat::generate() {
dst_float(x, y, c) = Halide::cast<float>(src_rgb(x, y, c)) * scale + offset;
}
void RgbFloat::schedule() {
Halide::Expr input_rgb_channels = src_rgb.dim(2).extent();
Halide::Expr output_float_channels = dst_float.dim(2).extent();
// The source buffer starts at zero in every dimension and requires an
// interleaved format.
src_rgb.dim(0).set_min(0);
src_rgb.dim(1).set_min(0);
src_rgb.dim(2).set_min(0);
src_rgb.dim(0).set_stride(input_rgb_channels);
src_rgb.dim(2).set_stride(1);
// The destination buffer starts at zero in every dimension and requires an
// interleaved format.
dst_float.dim(0).set_min(0);
dst_float.dim(1).set_min(0);
dst_float.dim(2).set_min(0);
dst_float.dim(0).set_stride(output_float_channels);
dst_float.dim(2).set_stride(1);
}
} // namespace
HALIDE_REGISTER_GENERATOR(RgbFloat, rgb_float_generator)

View File

@ -17,8 +17,10 @@
#include <utility>
#include "mediapipe/util/frame_buffer/buffer_common.h"
#include "mediapipe/util/frame_buffer/float_buffer.h"
#include "mediapipe/util/frame_buffer/gray_buffer.h"
#include "mediapipe/util/frame_buffer/halide/rgb_flip_halide.h"
#include "mediapipe/util/frame_buffer/halide/rgb_float_halide.h"
#include "mediapipe/util/frame_buffer/halide/rgb_gray_halide.h"
#include "mediapipe/util/frame_buffer/halide/rgb_resize_halide.h"
#include "mediapipe/util/frame_buffer/halide/rgb_rgb_halide.h"
@ -122,6 +124,12 @@ bool RgbBuffer::Convert(RgbBuffer* output) {
return result == 0;
}
bool RgbBuffer::ToFloat(float scale, float offset, FloatBuffer* output) {
const int result =
rgb_float_halide(buffer(), scale, offset, output->buffer());
return result == 0;
}
void RgbBuffer::Initialize(uint8_t* data, int width, int height, bool alpha) {
const int channels = alpha ? 4 : 3;
buffer_ = Halide::Runtime::Buffer<uint8_t>::make_interleaved(

View File

@ -19,6 +19,7 @@
#include "HalideBuffer.h"
#include "HalideRuntime.h"
#include "mediapipe/util/frame_buffer/float_buffer.h"
#include "mediapipe/util/frame_buffer/gray_buffer.h"
#include "mediapipe/util/frame_buffer/yuv_buffer.h"
@ -106,6 +107,9 @@ class RgbBuffer {
// Performs a rgb to rgba / rgba to rgb format conversion.
bool Convert(RgbBuffer* output);
// Performs a RGB to float conversion.
bool ToFloat(float scale, float offset, FloatBuffer* output);
// Release ownership of the owned backing buffer.
uint8_t* Release() { return owned_buffer_.release(); }

View File

@ -14,11 +14,13 @@
#include "mediapipe/util/frame_buffer/rgb_buffer.h"
#include <cstdlib>
#include <utility>
#include "absl/log/log.h"
#include "mediapipe/framework/port/gmock.h"
#include "mediapipe/framework/port/gtest.h"
#include "mediapipe/util/frame_buffer/float_buffer.h"
#include "mediapipe/util/frame_buffer/gray_buffer.h"
#include "mediapipe/util/frame_buffer/yuv_buffer.h"
@ -87,6 +89,22 @@ bool CompareArray(const uint8_t* lhs_ptr, const uint8_t* rhs_ptr, int width,
return true;
}
// Returns true if the data in the two arrays are the same. Otherwise, return
// false.
bool CompareArray(const float* lhs_ptr, const float* rhs_ptr, int width,
int height) {
constexpr float kTolerancy = 1e-6;
for (int i = 0; i < height; ++i) {
for (int j = 0; j < width; ++j) {
if (std::abs(lhs_ptr[i * width + j] - rhs_ptr[i * width + j]) >
kTolerancy) {
return false;
}
}
}
return true;
}
// Returns true if the halide buffers of two input GrayBuffer are identical.
// Otherwise, returns false;
bool CompareBuffer(const GrayBuffer& lhs, const GrayBuffer& rhs) {
@ -129,6 +147,20 @@ bool CompareBuffer(const YuvBuffer& lhs, const YuvBuffer& rhs) {
lhs.height() / 2);
}
// Returns true if the halide buffers of two input FloatBuffer are identical.
// Otherwise, returns false;
bool CompareBuffer(const FloatBuffer& lhs, const FloatBuffer& rhs) {
if (lhs.width() != rhs.width() || lhs.height() != rhs.height() ||
lhs.channels() != rhs.channels()) {
return false;
}
const float* reference_ptr = reinterpret_cast<const float*>(
const_cast<FloatBuffer&>(lhs).buffer()->host);
const float* converted_ptr = reinterpret_cast<const float*>(
const_cast<FloatBuffer&>(rhs).buffer()->host);
return CompareArray(reference_ptr, converted_ptr, lhs.width(), lhs.height());
}
TEST(RgbBufferTest, Properties) {
RgbBuffer rgb(2, 8, false), rgba(2, 8, true);
EXPECT_EQ(2, rgb.width());
@ -601,6 +633,70 @@ TEST(RgbBufferTest, PaddedRgbaConvertRgb) {
RgbBuffer rgb_buffer = RgbBuffer(rgb_data, kWidth, kHeight, false);
EXPECT_TRUE(CompareBuffer(rgb_buffer, result));
}
TEST(RgbBufferTest, RgbToFloat) {
constexpr int kWidth = 2, kHeight = 1, kChannels = 3;
constexpr float kScale = 0.01f, kOffset = 0.5f;
uint8_t rgb_data[] = {200, 100, 50, 100, 50, 20};
RgbBuffer source(rgb_data, kWidth, kHeight, false);
FloatBuffer result(kWidth, kHeight, kChannels);
ASSERT_TRUE(source.ToFloat(kScale, kOffset, &result));
float float_data[] = {2.5f, 1.5f, 1.0f, 1.5f, 1.0f, 0.7f};
FloatBuffer float_buffer =
FloatBuffer(float_data, kWidth, kHeight, kChannels);
EXPECT_TRUE(CompareBuffer(float_buffer, result));
}
TEST(RgbBufferTest, PaddedRgbToFloat) {
constexpr int kWidth = 4, kHeight = 2, kChannels = 3;
constexpr float kScale = 0.01f, kOffset = 0.0f;
RgbBuffer source = GetPaddedRgbBuffer();
FloatBuffer result(kWidth, kHeight, kChannels);
ASSERT_TRUE(source.ToFloat(kScale, kOffset, &result));
float float_data[] = {0.1f, 0.2f, 0.3f, 0.2f, 0.3f, 0.4f, 0.3f, 0.4f,
0.5f, 0.4f, 0.5f, 0.6f, 0.2f, 0.4f, 0.6f, 0.4f,
0.6f, 0.8f, 0.6f, 0.8f, 1.0f, 0.8f, 1.0f, 1.2f};
FloatBuffer float_buffer =
FloatBuffer(float_data, kWidth, kHeight, kChannels);
EXPECT_TRUE(CompareBuffer(float_buffer, result));
}
TEST(RgbBufferTest, RgbaToFloat) {
constexpr int kWidth = 2, kHeight = 1, kChannels = 4;
constexpr float kScale = 0.01f, kOffset = 0.5f;
uint8_t rgba_data[] = {200, 100, 50, 30, 100, 50, 20, 70};
RgbBuffer source(rgba_data, kWidth, kHeight, true);
FloatBuffer result(kWidth, kHeight, kChannels);
ASSERT_TRUE(source.ToFloat(kScale, kOffset, &result));
float float_data[] = {2.5f, 1.5f, 1.0f, 0.8f, 1.5f, 1.0f, 0.7f, 1.2f};
FloatBuffer float_buffer =
FloatBuffer(float_data, kWidth, kHeight, kChannels);
EXPECT_TRUE(CompareBuffer(float_buffer, result));
}
TEST(RgbBufferTest, PaddedRgbaToFloat) {
constexpr int kWidth = 4, kHeight = 2, kChannels = 4;
constexpr float kScale = 0.01f, kOffset = 0.0f;
RgbBuffer source = GetPaddedRgbaBuffer();
FloatBuffer result(kWidth, kHeight, kChannels);
ASSERT_TRUE(source.ToFloat(kScale, kOffset, &result));
float float_data[] = {0.1f, 0.2f, 0.3f, 2.55f, 0.2f, 0.3f, 0.4f, 2.55f,
0.3f, 0.4f, 0.5f, 2.55f, 0.4f, 0.5f, 0.6f, 2.55f,
0.2f, 0.4f, 0.6f, 2.55f, 0.4f, 0.6f, 0.8f, 2.55f,
0.6f, 0.8f, 1.0f, 2.55f, 0.8f, 1.0f, 1.2f, 2.55f};
FloatBuffer float_buffer =
FloatBuffer(float_data, kWidth, kHeight, kChannels);
EXPECT_TRUE(CompareBuffer(float_buffer, result));
}
} // namespace
} // namespace frame_buffer
} // namespace mediapipe