Automatic selection of the tensor's storage type by recording previously requested views.

PiperOrigin-RevId: 496466136
2022-12-19 12:36:07 -08:00 · 2022-12-19 12:36:07 -08:00 · ef3fa67bf4
commit ef3fa67bf4
parent 3e6cd5d2bf
5 changed files with 67 additions and 36 deletions
--- a/mediapipe/framework/formats/BUILD
+++ b/mediapipe/framework/formats/BUILD
@ -428,7 +428,10 @@ cc_library(
            "tensor.cc",
            "tensor_ahwb.cc",
        ],
-    hdrs = ["tensor.h"],
+    hdrs = [
+        "tensor.h",
+        "tensor_internal.h",
+    ],
    copts = select({
        "//mediapipe:apple": [
            "-x objective-c++",
@ -452,6 +455,7 @@ cc_library(
        ],
    }),
    deps = [
+        "@com_google_absl//absl/container:flat_hash_set",
        "@com_google_absl//absl/memory",
        "@com_google_absl//absl/synchronization",
        "//mediapipe/framework:port",
--- a/mediapipe/framework/formats/tensor.cc
+++ b/mediapipe/framework/formats/tensor.cc
@ -246,10 +246,10 @@ Tensor::OpenGlTexture2dView::GetLayoutDimensions(const Tensor::Shape& shape,
      return Tensor::OpenGlTexture2dView::Layout::kAligned;
    }
  }
-  // The best performance of a compute shader can be achived with textures'
+  // The best performance of a compute shader can be achieved with textures'
  // width multiple of 256. Making minimum fixed width of 256 waste memory for
  // small tensors. The optimal balance memory-vs-performance is power of 2.
-  // The texture width and height are choosen to be closer to square.
+  // The texture width and height are chosen to be closer to square.
  float power = std::log2(std::sqrt(static_cast<float>(num_pixels)));
  w = 1 << static_cast<int>(power);
  int h = (num_pixels + w - 1) / w;
@ -326,7 +326,7 @@ Tensor::OpenGlBufferView Tensor::GetOpenGlBufferReadView() const {
  auto lock(absl::make_unique<absl::MutexLock>(&view_mutex_));
  AllocateOpenGlBuffer();
  if (!(valid_ & kValidOpenGlBuffer)) {
-    // If the call succeds then AHWB -> SSBO are synchronized so any usage of
+    // If the call succeeds then AHWB -> SSBO are synchronized so any usage of
    // the SSBO is correct after this call.
    if (!InsertAhwbToSsboFence()) {
      glBindBuffer(GL_SHADER_STORAGE_BUFFER, opengl_buffer_);
@ -348,8 +348,10 @@ Tensor::OpenGlBufferView Tensor::GetOpenGlBufferReadView() const {
  };
 }

-Tensor::OpenGlBufferView Tensor::GetOpenGlBufferWriteView() const {
+Tensor::OpenGlBufferView Tensor::GetOpenGlBufferWriteView(
+    uint64_t source_location_hash) const {
  auto lock(absl::make_unique<absl::MutexLock>(&view_mutex_));
+  TrackAhwbUsage(source_location_hash);
  AllocateOpenGlBuffer();
  valid_ = kValidOpenGlBuffer;
  return {opengl_buffer_, std::move(lock), nullptr};
@ -385,6 +387,7 @@ void Tensor::Move(Tensor* src) {
  src->element_type_ = ElementType::kNone;  // Mark as invalidated.
  cpu_buffer_ = src->cpu_buffer_;
  src->cpu_buffer_ = nullptr;
+  ahwb_tracking_key_ = src->ahwb_tracking_key_;
 #if MEDIAPIPE_METAL_ENABLED
  device_ = src->device_;
  src->device_ = nil;
@ -589,8 +592,10 @@ Tensor::CpuReadView Tensor::GetCpuReadView() const {
  return {cpu_buffer_, std::move(lock)};
 }

-Tensor::CpuWriteView Tensor::GetCpuWriteView() const {
+Tensor::CpuWriteView Tensor::GetCpuWriteView(
+    uint64_t source_location_hash) const {
  auto lock = absl::make_unique<absl::MutexLock>(&view_mutex_);
+  TrackAhwbUsage(source_location_hash);
  AllocateCpuBuffer();
  valid_ = kValidCpu;
 #ifdef MEDIAPIPE_TENSOR_USE_AHWB
@ -620,24 +625,4 @@ void Tensor::AllocateCpuBuffer() const {
  }
 }

-void Tensor::SetPreferredStorageType(StorageType type) {
-#ifdef MEDIAPIPE_TENSOR_USE_AHWB
-  if (__builtin_available(android 26, *)) {
-    use_ahwb_ = type == StorageType::kAhwb;
-    VLOG(4) << "Tensor: use of AHardwareBuffer is "
-            << (use_ahwb_ ? "allowed" : "not allowed");
-  }
-#else
-  VLOG(4) << "Tensor: use of AHardwareBuffer is not allowed";
-#endif  // MEDIAPIPE_TENSOR_USE_AHWB
-}
-
-Tensor::StorageType Tensor::GetPreferredStorageType() {
-#ifdef MEDIAPIPE_TENSOR_USE_AHWB
-  return use_ahwb_ ? StorageType::kAhwb : StorageType::kDefault;
-#else
-  return StorageType::kDefault;
-#endif  // MEDIAPIPE_TENSOR_USE_AHWB
-}
-
 }  // namespace mediapipe
--- a/mediapipe/framework/formats/tensor.h
+++ b/mediapipe/framework/formats/tensor.h
@ -24,8 +24,9 @@
 #include <utility>
 #include <vector>

-#include "absl/memory/memory.h"
+#include "absl/container/flat_hash_set.h"
 #include "absl/synchronization/mutex.h"
+#include "mediapipe/framework/formats/tensor_internal.h"
 #include "mediapipe/framework/port.h"

 #if MEDIAPIPE_METAL_ENABLED
@ -48,6 +49,22 @@
 #include "mediapipe/gpu/gl_context.h"
 #endif  // MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_30

+#if defined __has_builtin
+#if __has_builtin(__builtin_LINE)
+#define builtin_LINE __builtin_LINE
+#endif
+#if __has_builtin(__builtin_FILE)
+#define builtin_FILE __builtin_FILE
+#endif
+#endif
+
+#ifndef builtin_LINE
+#define builtin_LINE() 0
+#endif
+#ifndef builtin_FILE
+#define builtin_FILE() ""
+#endif
+
 namespace mediapipe {

 // Tensor is a container of multi-dimensional data that supports sharing the
@ -65,7 +82,7 @@ namespace mediapipe {
 // GLuint buffer = view.buffer();
 // Then the buffer can be bound to the GPU command buffer.
 // ...binding the buffer to the command buffer...
-// ...commiting command buffer and releasing the view...
+// ...committing command buffer and releasing the view...
 //
 // The following request for the CPU view will be blocked until the GPU view is
 // released and the GPU task is finished.
@ -161,7 +178,9 @@ class Tensor {
  using CpuReadView = CpuView<const void>;
  CpuReadView GetCpuReadView() const;
  using CpuWriteView = CpuView<void>;
-  CpuWriteView GetCpuWriteView() const;
+  CpuWriteView GetCpuWriteView(
+      uint64_t source_location_hash =
+          tensor_internal::FnvHash64(builtin_FILE(), builtin_LINE())) const;

 #if MEDIAPIPE_METAL_ENABLED
  // TODO: id<MTLBuffer> vs. MtlBufferView.
@ -305,7 +324,9 @@ class Tensor {
  // A valid OpenGL context must be bound to the calling thread due to possible
  // GPU resource allocation.
  OpenGlBufferView GetOpenGlBufferReadView() const;
-  OpenGlBufferView GetOpenGlBufferWriteView() const;
+  OpenGlBufferView GetOpenGlBufferWriteView(
+      uint64_t source_location_hash =
+          tensor_internal::FnvHash64(builtin_FILE(), builtin_LINE())) const;
 #endif  // MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_31

  const Shape& shape() const { return shape_; }
@ -410,7 +431,11 @@ class Tensor {
  void CreateEglSyncAndFd() const;
 #endif  // MEDIAPIPE_TENSOR_USE_AHWB
  // Use Ahwb for other views: OpenGL / CPU buffer.
-  static inline bool use_ahwb_ = false;
+  mutable bool use_ahwb_ = false;
+  mutable uint64_t ahwb_tracking_key_ = 0;
+  // TODO: Tracks all unique tensors. Can grow to a large number. LRU
+  // can be more predicted.
+  static inline absl::flat_hash_set<uint64_t> ahwb_usage_track_;
  // Expects the target SSBO to be already bound.
  bool AllocateAhwbMapToSsbo() const;
  bool InsertAhwbToSsboFence() const;
@ -419,6 +444,8 @@ class Tensor {
  void* MapAhwbToCpuRead() const;
  void* MapAhwbToCpuWrite() const;
  void MoveCpuOrSsboToAhwb() const;
+  // Set current tracking key, set "use ahwb" if the key is already marked.
+  void TrackAhwbUsage(uint64_t key) const;

 #if MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_30
  mutable std::shared_ptr<mediapipe::GlContext> gl_context_;
--- a/mediapipe/framework/formats/tensor_ahwb.cc
+++ b/mediapipe/framework/formats/tensor_ahwb.cc
@ -265,6 +265,10 @@ Tensor::AHardwareBufferView Tensor::GetAHardwareBufferWriteView(
 }

 bool Tensor::AllocateAHardwareBuffer(int size_alignment) const {
+  // Mark current tracking key as Ahwb-use.
+  ahwb_usage_track_.insert(ahwb_tracking_key_);
+  use_ahwb_ = true;
+
  if (__builtin_available(android 26, *)) {
    if (ahwb_ == nullptr) {
      AHardwareBuffer_Desc desc = {};
@ -447,6 +451,16 @@ void* Tensor::MapAhwbToCpuWrite() const {
  return nullptr;
 }

+void Tensor::TrackAhwbUsage(uint64_t source_location_hash) const {
+  if (ahwb_tracking_key_ == 0) {
+    ahwb_tracking_key_ = source_location_hash;
+    for (int dim : shape_.dims) {
+      ahwb_tracking_key_ = tensor_internal::FnvHash64(ahwb_tracking_key_, dim);
+    }
+  }
+  use_ahwb_ = ahwb_usage_track_.contains(ahwb_tracking_key_);
+}
+
 #else  // MEDIAPIPE_TENSOR_USE_AHWB

 bool Tensor::AllocateAhwbMapToSsbo() const { return false; }
@ -455,6 +469,7 @@ void Tensor::MoveAhwbStuff(Tensor* src) {}
 void Tensor::ReleaseAhwbStuff() {}
 void* Tensor::MapAhwbToCpuRead() const { return nullptr; }
 void* Tensor::MapAhwbToCpuWrite() const { return nullptr; }
+void Tensor::TrackAhwbUsage(uint64_t key) const {}

 #endif  // MEDIAPIPE_TENSOR_USE_AHWB

--- a/mediapipe/framework/formats/tensor_internal.h
+++ b/mediapipe/framework/formats/tensor_internal.h
@ -18,8 +18,6 @@
 #include <cstdint>
 #include <type_traits>

-#include "mediapipe/framework/tool/type_util.h"
-
 namespace mediapipe {

 // Generates unique view id at compile-time using FILE and LINE.
@ -41,10 +39,12 @@ namespace tensor_internal {
 // https://en.wikipedia.org/wiki/Fowler%E2%80%93Noll%E2%80%93Vo_hash_function
 constexpr uint64_t kFnvPrime = 0x00000100000001B3;
 constexpr uint64_t kFnvOffsetBias = 0xcbf29ce484222325;
-constexpr uint64_t FnvHash64(const char* str, uint64_t hash = kFnvOffsetBias) {
-  return (str[0] == 0) ? hash : FnvHash64(str + 1, (hash ^ str[0]) * kFnvPrime);
+constexpr uint64_t FnvHash64(uint64_t value1, uint64_t value2) {
+  return (value2 ^ value1) * kFnvPrime;
+}
+constexpr uint64_t FnvHash64(const char* str, uint64_t hash = kFnvOffsetBias) {
+  return (str[0] == 0) ? hash : FnvHash64(str + 1, FnvHash64(hash, str[0]));
 }
-
 template <typename... Ts>
 struct TypeList {
  static constexpr std::size_t size{sizeof...(Ts)};