Internal change

PiperOrigin-RevId: 523351901
2023-04-11 03:17:01 -07:00 · 2023-04-11 03:17:01 -07:00 · a448790300
commit a448790300
parent 9793654364
7 changed files with 344 additions and 4 deletions
--- a/mediapipe/calculators/tensor/BUILD
+++ b/mediapipe/calculators/tensor/BUILD
@ -988,6 +988,11 @@ cc_library(
    }) + select({
        "//mediapipe/framework/port:disable_opencv": [],
        "//conditions:default": [":image_to_tensor_converter_opencv"],
+    }) + select({
+        "//mediapipe/framework/port:enable_halide": [
+            ":image_to_tensor_converter_frame_buffer",
+        ],
+        "//conditions:default": [],
    }),
    alwayslink = 1,
 )
@ -1121,6 +1126,26 @@ cc_library(
    ],
 )

+cc_library(
+    name = "image_to_tensor_converter_frame_buffer",
+    srcs = ["image_to_tensor_converter_frame_buffer.cc"],
+    hdrs = ["image_to_tensor_converter_frame_buffer.h"],
+    deps = [
+        ":image_to_tensor_converter",
+        ":image_to_tensor_utils",
+        "//mediapipe/framework:calculator_context",
+        "//mediapipe/framework/formats:frame_buffer",
+        "//mediapipe/framework/formats:image",
+        "//mediapipe/framework/formats:tensor",
+        "//mediapipe/framework/port:status",
+        "//mediapipe/gpu:frame_buffer_view",
+        "//mediapipe/util/frame_buffer:frame_buffer_util",
+        "@com_google_absl//absl/status",
+        "@com_google_absl//absl/status:statusor",
+        "@com_google_absl//absl/strings:str_format",
+    ],
+)
+
 cc_library(
    name = "image_to_tensor_converter_gl_buffer",
    srcs = ["image_to_tensor_converter_gl_buffer.cc"],
--- a/mediapipe/calculators/tensor/image_to_tensor_calculator.cc
+++ b/mediapipe/calculators/tensor/image_to_tensor_calculator.cc
@ -34,6 +34,8 @@

 #if !MEDIAPIPE_DISABLE_OPENCV
 #include "mediapipe/calculators/tensor/image_to_tensor_converter_opencv.h"
+#elif MEDIAPIPE_ENABLE_HALIDE
+#include "mediapipe/calculators/tensor/image_to_tensor_converter_frame_buffer.h"
 #endif

 #if !MEDIAPIPE_DISABLE_GPU
@ -273,10 +275,19 @@ class ImageToTensorCalculator : public Node {
                         CreateOpenCvConverter(
                             cc, GetBorderMode(options_.border_mode()),
                             GetOutputTensorType(/*uses_gpu=*/false, params_)));
+// TODO: FrameBuffer-based converter needs to call GetGpuBuffer()
+// to get access to a FrameBuffer view. Investigate if GetGpuBuffer() can be
+// made available even with MEDIAPIPE_DISABLE_GPU set.
+#elif MEDIAPIPE_ENABLE_HALIDE
+        ASSIGN_OR_RETURN(cpu_converter_,
+                         CreateFrameBufferConverter(
+                             cc, GetBorderMode(options_.border_mode()),
+                             GetOutputTensorType(/*uses_gpu=*/false, params_)));
 #else
-        LOG(FATAL) << "Cannot create image to tensor opencv converter since "
-                      "MEDIAPIPE_DISABLE_OPENCV is defined.";
-#endif  // !MEDIAPIPE_DISABLE_OPENCV
+        LOG(FATAL) << "Cannot create image to tensor CPU converter since "
+                      "MEDIAPIPE_DISABLE_OPENCV is defined and "
+                      "MEDIAPIPE_ENABLE_HALIDE is not defined.";
+#endif  // !MEDIAPIPE_DISABLE_HALIDE
      }
    }
    return absl::OkStatus();
--- a/mediapipe/calculators/tensor/image_to_tensor_converter_frame_buffer.cc
+++ b/mediapipe/calculators/tensor/image_to_tensor_converter_frame_buffer.cc
@ -0,0 +1,246 @@
+// Copyright 2023 The MediaPipe Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "mediapipe/calculators/tensor/image_to_tensor_converter_frame_buffer.h"
+
+#include <cmath>
+#include <cstddef>
+#include <cstdint>
+#include <memory>
+
+#include "absl/status/status.h"
+#include "absl/status/statusor.h"
+#include "absl/strings/str_format.h"
+#include "mediapipe/calculators/tensor/image_to_tensor_converter.h"
+#include "mediapipe/calculators/tensor/image_to_tensor_utils.h"
+#include "mediapipe/framework/calculator_context.h"
+#include "mediapipe/framework/formats/frame_buffer.h"
+#include "mediapipe/framework/formats/image.h"
+#include "mediapipe/framework/formats/tensor.h"
+#include "mediapipe/framework/port/status_macros.h"
+#include "mediapipe/gpu/frame_buffer_view.h"
+#include "mediapipe/util/frame_buffer/frame_buffer_util.h"
+
+namespace mediapipe {
+
+namespace {
+
+// Converts from radians (clockwise) to degrees (counter-clockwise) in [0,360).
+int RadiansToDegrees(float radians) {
+  int degrees = static_cast<int>(std::round(-radians * 180 / M_PI)) % 360;
+  if (degrees < 0) {
+    degrees += 360;
+  }
+  return degrees;
+}
+
+// FrameBuffer-based implementation of ImageToTensorConverter.
+class FrameBufferProcessor : public ImageToTensorConverter {
+ public:
+  FrameBufferProcessor(Tensor::ElementType tensor_type)
+      : tensor_type_(tensor_type) {}
+
+  absl::Status Convert(const mediapipe::Image& input, const RotatedRect& roi,
+                       float range_min, float range_max,
+                       int tensor_buffer_offset,
+                       Tensor& output_tensor) override;
+
+ private:
+  absl::Status ValidateTensorShape(const Tensor::Shape& output_shape);
+  // Crops, rotates and resizes the input based on the provided
+  // region-of-interest.
+  absl::Status CropRotateResize90Degrees(
+      std::shared_ptr<const FrameBuffer> input, const RotatedRect& roi,
+      std::shared_ptr<FrameBuffer> output);
+  // Converts the input FrameBuffer to a float Tensor. Output tensor must have
+  // type kFloat32.
+  absl::Status ConvertToFloatTensor(
+      std::shared_ptr<const FrameBuffer> input_frame, float range_min,
+      float range_max, Tensor& output_tensor);
+
+  Tensor::ElementType tensor_type_;
+
+  // Temporary buffers and their respective sizes.
+  std::unique_ptr<uint8_t[]> cropped_buffer_;
+  size_t cropped_buffer_size_ = 0;
+  std::unique_ptr<uint8_t[]> rotated_buffer_;
+  size_t rotated_buffer_size_ = 0;
+  std::unique_ptr<uint8_t[]> output_buffer_;
+  size_t output_buffer_size_ = 0;
+};
+
+absl::Status FrameBufferProcessor::Convert(const mediapipe::Image& input,
+                                           const RotatedRect& roi,
+                                           float range_min, float range_max,
+                                           int tensor_buffer_offset,
+                                           Tensor& output_tensor) {
+  // TODO: add support for non-zero tensor buffer offset.
+  RET_CHECK_EQ(tensor_buffer_offset, 0)
+      << "Non-zero tensor_buffer_offset input is not supported yet.";
+
+  // Range other than [0,255] is not supported for uint8 tensor outputs.
+  if (tensor_type_ == Tensor::ElementType::kUInt8) {
+    RET_CHECK(static_cast<int>(range_min) == 0 &&
+              static_cast<int>(range_max) == 255);
+  }
+
+  auto input_frame = input.GetGpuBuffer().GetReadView<FrameBuffer>();
+  const auto& output_shape = output_tensor.shape();
+  MP_RETURN_IF_ERROR(ValidateTensorShape(output_shape));
+  FrameBuffer::Dimension output_dimension{/*width=*/output_shape.dims[2],
+                                          /*height=*/output_shape.dims[1]};
+
+  // Optimized path for multiples of 90°.
+  if (RadiansToDegrees(roi.rotation) % 90 == 0) {
+    if (tensor_type_ == Tensor::ElementType::kUInt8) {
+      auto view = output_tensor.GetCpuWriteView();
+      uint8_t* data = view.buffer<uint8_t>();
+      auto output_frame =
+          frame_buffer::CreateFromRgbRawBuffer(data, output_dimension);
+      return CropRotateResize90Degrees(input_frame, roi, output_frame);
+    } else {
+      size_t output_buffer_size = frame_buffer::GetFrameBufferByteSize(
+          output_dimension, FrameBuffer::Format::kRGB);
+      if (output_buffer_size > output_buffer_size_) {
+        output_buffer_ = std::make_unique<uint8_t[]>(output_buffer_size);
+        output_buffer_size_ = output_buffer_size;
+      }
+      auto output_frame = frame_buffer::CreateFromRgbRawBuffer(
+          output_buffer_.get(), output_dimension);
+      MP_RETURN_IF_ERROR(
+          CropRotateResize90Degrees(input_frame, roi, output_frame));
+      return ConvertToFloatTensor(output_frame, range_min, range_max,
+                                  output_tensor);
+    }
+  } else {
+    // TODO: add support for arbitrary rotations
+    return absl::UnimplementedError(
+        "FrameBufferConverter doesn't yet support rotations that are not "
+        "multiples of 90°.");
+  }
+  return absl::OkStatus();
+}
+
+absl::Status FrameBufferProcessor::ValidateTensorShape(
+    const Tensor::Shape& shape) {
+  RET_CHECK_EQ(shape.dims.size(), 4)
+      << "Wrong output dims size: " << shape.dims.size();
+  RET_CHECK_EQ(shape.dims[0], 1)
+      << "Handling batch dimension not equal to 1 is not implemented in this "
+         "converter.";
+  RET_CHECK_EQ(shape.dims[3], 3) << "Wrong output channel: " << shape.dims[3];
+  return absl::OkStatus();
+}
+
+absl::Status FrameBufferProcessor::CropRotateResize90Degrees(
+    std::shared_ptr<const FrameBuffer> input, const RotatedRect& roi,
+    std::shared_ptr<FrameBuffer> output) {
+  int rotation_degrees = RadiansToDegrees(roi.rotation);
+  bool rotation_required = rotation_degrees != 0;
+  bool conversion_required = input->format() != output->format();
+
+  // First, crop and resize.
+  std::shared_ptr<FrameBuffer> cropped = output;
+  FrameBuffer::Dimension cropped_dims = output->dimension();
+  int left, right, top, bottom;
+  if (rotation_degrees % 180 != 0) {
+    cropped_dims.Swap();
+    left = roi.center_x - roi.height / 2;
+    right = left + roi.height - 1;
+    top = roi.center_y - roi.width / 2;
+    bottom = top + roi.width - 1;
+  } else {
+    left = roi.center_x - roi.width / 2;
+    right = left + roi.width - 1;
+    top = roi.center_y - roi.height / 2;
+    bottom = top + roi.height - 1;
+  }
+  if (rotation_required || conversion_required) {
+    // Create temporary FrameBuffer from recycled buffer.
+    size_t cropped_buffer_size =
+        frame_buffer::GetFrameBufferByteSize(cropped_dims, input->format());
+    if (cropped_buffer_size > cropped_buffer_size_) {
+      cropped_buffer_ = std::make_unique<uint8_t[]>(cropped_buffer_size);
+      cropped_buffer_size_ = cropped_buffer_size;
+    }
+    ASSIGN_OR_RETURN(cropped,
+                     frame_buffer::CreateFromRawBuffer(
+                         cropped_buffer_.get(), cropped_dims, input->format()));
+  }
+  MP_RETURN_IF_ERROR(
+      frame_buffer::Crop(*input, left, top, right, bottom, cropped.get()));
+
+  // Then rotate if needed.
+  std::shared_ptr<FrameBuffer> rotated = output;
+  if (rotation_required) {
+    if (conversion_required) {
+      // Create temporary FrameBuffer from recycled buffer.
+      FrameBuffer::Dimension rotated_dims = output->dimension();
+      size_t rotated_buffer_size =
+          frame_buffer::GetFrameBufferByteSize(rotated_dims, cropped->format());
+      if (rotated_buffer_size > rotated_buffer_size_) {
+        rotated_buffer_ = std::make_unique<uint8_t[]>(rotated_buffer_size);
+        rotated_buffer_size_ = rotated_buffer_size;
+      }
+      ASSIGN_OR_RETURN(auto rotated, frame_buffer::CreateFromRawBuffer(
+                                         rotated_buffer_.get(), rotated_dims,
+                                         cropped->format()));
+    }
+    MP_RETURN_IF_ERROR(
+        frame_buffer::Rotate(*cropped, rotation_degrees, rotated.get()));
+  } else {
+    rotated = cropped;
+  }
+
+  // Then convert if needed.
+  if (conversion_required) {
+    return frame_buffer::Convert(*rotated, output.get());
+  }
+  return absl::OkStatus();
+}
+
+absl::Status FrameBufferProcessor::ConvertToFloatTensor(
+    std::shared_ptr<const FrameBuffer> input_frame, float range_min,
+    float range_max, Tensor& output_tensor) {
+  RET_CHECK(output_tensor.element_type() == Tensor::ElementType::kFloat32);
+  constexpr float kInputImageRangeMin = 0.0f;
+  constexpr float kInputImageRangeMax = 255.0f;
+  ASSIGN_OR_RETURN(auto transform, GetValueRangeTransformation(
+                                       kInputImageRangeMin, kInputImageRangeMax,
+                                       range_min, range_max));
+  return frame_buffer::ToFloatTensor(*input_frame, transform.scale,
+                                     transform.offset, output_tensor);
+}
+
+}  // namespace
+
+absl::StatusOr<std::unique_ptr<ImageToTensorConverter>>
+CreateFrameBufferConverter(CalculatorContext* cc, BorderMode border_mode,
+                           Tensor::ElementType tensor_type) {
+  if (tensor_type != Tensor::ElementType::kUInt8 &&
+      tensor_type != Tensor::ElementType::kFloat32) {
+    return absl::InvalidArgumentError(
+        absl::StrFormat("Tensor type is currently not supported by "
+                        "FrameBufferProcessor, type: %d.",
+                        tensor_type));
+  }
+  // TODO: add support for BorderMode:kZero.
+  if (border_mode == BorderMode::kZero) {
+    return absl::UnimplementedError(
+        "BorderMode::kZero is not yet supported by FrameBufferProcessor");
+  }
+  return std::make_unique<FrameBufferProcessor>(tensor_type);
+}
+
+}  // namespace mediapipe
--- a/mediapipe/calculators/tensor/image_to_tensor_converter_frame_buffer.h
+++ b/mediapipe/calculators/tensor/image_to_tensor_converter_frame_buffer.h
@ -0,0 +1,34 @@
+// Copyright 2023 The MediaPipe Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef MEDIAPIPE_CALCULATORS_TENSOR_IMAGE_TO_TENSOR_CONVERTER_FRAME_BUFFER_H_
+#define MEDIAPIPE_CALCULATORS_TENSOR_IMAGE_TO_TENSOR_CONVERTER_FRAME_BUFFER_H_
+
+#include <memory>
+
+#include "absl/status/statusor.h"
+#include "mediapipe/calculators/tensor/image_to_tensor_converter.h"
+#include "mediapipe/calculators/tensor/image_to_tensor_utils.h"
+#include "mediapipe/framework/calculator_context.h"
+
+namespace mediapipe {
+
+// Creates FrameBuffer-based image-to-tensor converter relying on Halide.
+absl::StatusOr<std::unique_ptr<ImageToTensorConverter>>
+CreateFrameBufferConverter(CalculatorContext* cc, BorderMode border_mode,
+                           Tensor::ElementType tensor_type);
+
+}  // namespace mediapipe
+
+#endif  // MEDIAPIPE_CALCULATORS_TENSOR_IMAGE_TO_TENSOR_CONVERTER_FRAME_BUFFER_H_
--- a/mediapipe/framework/BUILD
+++ b/mediapipe/framework/BUILD
@ -969,6 +969,15 @@ cc_library(
    }) + select({
        "//conditions:default": [],
        "//mediapipe/framework/port:disable_opencv": ["MEDIAPIPE_DISABLE_OPENCV=1"],
+    }) + select({
+        "//conditions:default": [],
+        # TODO: Improve this. This only sets MEDIAPIPE_DISABLE_OPENCV as a "defines" Make
+        # value, not as a bazel "--define" variable, which has effects in C++ code but not in
+        # select() statements.
+        "//mediapipe/framework/port:enable_halide": [
+            "MEDIAPIPE_ENABLE_HALIDE=1",
+            "MEDIAPIPE_DISABLE_OPENCV=1",
+        ],
    }) + select({
        "//conditions:default": [],
        "//mediapipe/framework:disable_rtti_and_exceptions": [
--- a/mediapipe/framework/formats/image.h
+++ b/mediapipe/framework/formats/image.h
@ -111,12 +111,15 @@ class Image {
    return gpu_buffer_.internal_storage<mediapipe::GlTextureBuffer>();
  }
 #endif  // MEDIAPIPE_GPU_BUFFER_USE_CV_PIXEL_BUFFER
+#endif  // !MEDIAPIPE_DISABLE_GPU
+
  // Get a GPU view. Automatically uploads from CPU if needed.
  const mediapipe::GpuBuffer GetGpuBuffer() const {
+#if !MEDIAPIPE_DISABLE_GPU
    if (use_gpu_ == false) ConvertToGpu();
+#endif  // !MEDIAPIPE_DISABLE_GPU
    return gpu_buffer_;
  }
-#endif  // !MEDIAPIPE_DISABLE_GPU

  // Returns image properties.
  int width() const;
--- a/mediapipe/framework/port/BUILD
+++ b/mediapipe/framework/port/BUILD
@ -222,6 +222,18 @@ config_setting(
    },
 )

+# Enable Halide when defining MEDIAPIPE_ENABLE_HALIDE to 1 in bazel command.
+# This will  automatically disable opencv, i.e. will set MEDIAPIPE_DISABLE_OPENCV to 1.
+# Note that this only applies to a select few calculators/framework components currently.
+# TODO: Improve this. This only sets MEDIAPIPE_DISABLE_OPENCV as a "defines" Make value,
+# not as a bazel "--define" variable, which has effects in C++ code but not in select() statements.
+config_setting(
+    name = "enable_halide",
+    define_values = {
+        "MEDIAPIPE_ENABLE_HALIDE": "1",
+    },
+)
+
 cc_library(
    name = "opencv_core",
    hdrs = ["opencv_core_inc.h"],