From ef3fa67bf423e2d1c2ffba2bab01cc1c7b5d2ba5 Mon Sep 17 00:00:00 2001 From: Nikolay Chirkov Date: Mon, 19 Dec 2022 12:36:07 -0800 Subject: [PATCH] Automatic selection of the tensor's storage type by recording previously requested views. PiperOrigin-RevId: 496466136 --- mediapipe/framework/formats/BUILD | 6 ++- mediapipe/framework/formats/tensor.cc | 35 +++++------------- mediapipe/framework/formats/tensor.h | 37 ++++++++++++++++--- mediapipe/framework/formats/tensor_ahwb.cc | 15 ++++++++ mediapipe/framework/formats/tensor_internal.h | 10 ++--- 5 files changed, 67 insertions(+), 36 deletions(-) diff --git a/mediapipe/framework/formats/BUILD b/mediapipe/framework/formats/BUILD index fdb698c48..fdd9b8909 100644 --- a/mediapipe/framework/formats/BUILD +++ b/mediapipe/framework/formats/BUILD @@ -428,7 +428,10 @@ cc_library( "tensor.cc", "tensor_ahwb.cc", ], - hdrs = ["tensor.h"], + hdrs = [ + "tensor.h", + "tensor_internal.h", + ], copts = select({ "//mediapipe:apple": [ "-x objective-c++", @@ -452,6 +455,7 @@ cc_library( ], }), deps = [ + "@com_google_absl//absl/container:flat_hash_set", "@com_google_absl//absl/memory", "@com_google_absl//absl/synchronization", "//mediapipe/framework:port", diff --git a/mediapipe/framework/formats/tensor.cc b/mediapipe/framework/formats/tensor.cc index fdafbff5c..3f11d368a 100644 --- a/mediapipe/framework/formats/tensor.cc +++ b/mediapipe/framework/formats/tensor.cc @@ -246,10 +246,10 @@ Tensor::OpenGlTexture2dView::GetLayoutDimensions(const Tensor::Shape& shape, return Tensor::OpenGlTexture2dView::Layout::kAligned; } } - // The best performance of a compute shader can be achived with textures' + // The best performance of a compute shader can be achieved with textures' // width multiple of 256. Making minimum fixed width of 256 waste memory for // small tensors. The optimal balance memory-vs-performance is power of 2. - // The texture width and height are choosen to be closer to square. + // The texture width and height are chosen to be closer to square. float power = std::log2(std::sqrt(static_cast(num_pixels))); w = 1 << static_cast(power); int h = (num_pixels + w - 1) / w; @@ -326,7 +326,7 @@ Tensor::OpenGlBufferView Tensor::GetOpenGlBufferReadView() const { auto lock(absl::make_unique(&view_mutex_)); AllocateOpenGlBuffer(); if (!(valid_ & kValidOpenGlBuffer)) { - // If the call succeds then AHWB -> SSBO are synchronized so any usage of + // If the call succeeds then AHWB -> SSBO are synchronized so any usage of // the SSBO is correct after this call. if (!InsertAhwbToSsboFence()) { glBindBuffer(GL_SHADER_STORAGE_BUFFER, opengl_buffer_); @@ -348,8 +348,10 @@ Tensor::OpenGlBufferView Tensor::GetOpenGlBufferReadView() const { }; } -Tensor::OpenGlBufferView Tensor::GetOpenGlBufferWriteView() const { +Tensor::OpenGlBufferView Tensor::GetOpenGlBufferWriteView( + uint64_t source_location_hash) const { auto lock(absl::make_unique(&view_mutex_)); + TrackAhwbUsage(source_location_hash); AllocateOpenGlBuffer(); valid_ = kValidOpenGlBuffer; return {opengl_buffer_, std::move(lock), nullptr}; @@ -385,6 +387,7 @@ void Tensor::Move(Tensor* src) { src->element_type_ = ElementType::kNone; // Mark as invalidated. cpu_buffer_ = src->cpu_buffer_; src->cpu_buffer_ = nullptr; + ahwb_tracking_key_ = src->ahwb_tracking_key_; #if MEDIAPIPE_METAL_ENABLED device_ = src->device_; src->device_ = nil; @@ -589,8 +592,10 @@ Tensor::CpuReadView Tensor::GetCpuReadView() const { return {cpu_buffer_, std::move(lock)}; } -Tensor::CpuWriteView Tensor::GetCpuWriteView() const { +Tensor::CpuWriteView Tensor::GetCpuWriteView( + uint64_t source_location_hash) const { auto lock = absl::make_unique(&view_mutex_); + TrackAhwbUsage(source_location_hash); AllocateCpuBuffer(); valid_ = kValidCpu; #ifdef MEDIAPIPE_TENSOR_USE_AHWB @@ -620,24 +625,4 @@ void Tensor::AllocateCpuBuffer() const { } } -void Tensor::SetPreferredStorageType(StorageType type) { -#ifdef MEDIAPIPE_TENSOR_USE_AHWB - if (__builtin_available(android 26, *)) { - use_ahwb_ = type == StorageType::kAhwb; - VLOG(4) << "Tensor: use of AHardwareBuffer is " - << (use_ahwb_ ? "allowed" : "not allowed"); - } -#else - VLOG(4) << "Tensor: use of AHardwareBuffer is not allowed"; -#endif // MEDIAPIPE_TENSOR_USE_AHWB -} - -Tensor::StorageType Tensor::GetPreferredStorageType() { -#ifdef MEDIAPIPE_TENSOR_USE_AHWB - return use_ahwb_ ? StorageType::kAhwb : StorageType::kDefault; -#else - return StorageType::kDefault; -#endif // MEDIAPIPE_TENSOR_USE_AHWB -} - } // namespace mediapipe diff --git a/mediapipe/framework/formats/tensor.h b/mediapipe/framework/formats/tensor.h index f5a99cde1..8a6f02e9d 100644 --- a/mediapipe/framework/formats/tensor.h +++ b/mediapipe/framework/formats/tensor.h @@ -24,8 +24,9 @@ #include #include -#include "absl/memory/memory.h" +#include "absl/container/flat_hash_set.h" #include "absl/synchronization/mutex.h" +#include "mediapipe/framework/formats/tensor_internal.h" #include "mediapipe/framework/port.h" #if MEDIAPIPE_METAL_ENABLED @@ -48,6 +49,22 @@ #include "mediapipe/gpu/gl_context.h" #endif // MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_30 +#if defined __has_builtin +#if __has_builtin(__builtin_LINE) +#define builtin_LINE __builtin_LINE +#endif +#if __has_builtin(__builtin_FILE) +#define builtin_FILE __builtin_FILE +#endif +#endif + +#ifndef builtin_LINE +#define builtin_LINE() 0 +#endif +#ifndef builtin_FILE +#define builtin_FILE() "" +#endif + namespace mediapipe { // Tensor is a container of multi-dimensional data that supports sharing the @@ -65,7 +82,7 @@ namespace mediapipe { // GLuint buffer = view.buffer(); // Then the buffer can be bound to the GPU command buffer. // ...binding the buffer to the command buffer... -// ...commiting command buffer and releasing the view... +// ...committing command buffer and releasing the view... // // The following request for the CPU view will be blocked until the GPU view is // released and the GPU task is finished. @@ -161,7 +178,9 @@ class Tensor { using CpuReadView = CpuView; CpuReadView GetCpuReadView() const; using CpuWriteView = CpuView; - CpuWriteView GetCpuWriteView() const; + CpuWriteView GetCpuWriteView( + uint64_t source_location_hash = + tensor_internal::FnvHash64(builtin_FILE(), builtin_LINE())) const; #if MEDIAPIPE_METAL_ENABLED // TODO: id vs. MtlBufferView. @@ -305,7 +324,9 @@ class Tensor { // A valid OpenGL context must be bound to the calling thread due to possible // GPU resource allocation. OpenGlBufferView GetOpenGlBufferReadView() const; - OpenGlBufferView GetOpenGlBufferWriteView() const; + OpenGlBufferView GetOpenGlBufferWriteView( + uint64_t source_location_hash = + tensor_internal::FnvHash64(builtin_FILE(), builtin_LINE())) const; #endif // MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_31 const Shape& shape() const { return shape_; } @@ -410,7 +431,11 @@ class Tensor { void CreateEglSyncAndFd() const; #endif // MEDIAPIPE_TENSOR_USE_AHWB // Use Ahwb for other views: OpenGL / CPU buffer. - static inline bool use_ahwb_ = false; + mutable bool use_ahwb_ = false; + mutable uint64_t ahwb_tracking_key_ = 0; + // TODO: Tracks all unique tensors. Can grow to a large number. LRU + // can be more predicted. + static inline absl::flat_hash_set ahwb_usage_track_; // Expects the target SSBO to be already bound. bool AllocateAhwbMapToSsbo() const; bool InsertAhwbToSsboFence() const; @@ -419,6 +444,8 @@ class Tensor { void* MapAhwbToCpuRead() const; void* MapAhwbToCpuWrite() const; void MoveCpuOrSsboToAhwb() const; + // Set current tracking key, set "use ahwb" if the key is already marked. + void TrackAhwbUsage(uint64_t key) const; #if MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_30 mutable std::shared_ptr gl_context_; diff --git a/mediapipe/framework/formats/tensor_ahwb.cc b/mediapipe/framework/formats/tensor_ahwb.cc index 363c5efd0..466811be7 100644 --- a/mediapipe/framework/formats/tensor_ahwb.cc +++ b/mediapipe/framework/formats/tensor_ahwb.cc @@ -265,6 +265,10 @@ Tensor::AHardwareBufferView Tensor::GetAHardwareBufferWriteView( } bool Tensor::AllocateAHardwareBuffer(int size_alignment) const { + // Mark current tracking key as Ahwb-use. + ahwb_usage_track_.insert(ahwb_tracking_key_); + use_ahwb_ = true; + if (__builtin_available(android 26, *)) { if (ahwb_ == nullptr) { AHardwareBuffer_Desc desc = {}; @@ -447,6 +451,16 @@ void* Tensor::MapAhwbToCpuWrite() const { return nullptr; } +void Tensor::TrackAhwbUsage(uint64_t source_location_hash) const { + if (ahwb_tracking_key_ == 0) { + ahwb_tracking_key_ = source_location_hash; + for (int dim : shape_.dims) { + ahwb_tracking_key_ = tensor_internal::FnvHash64(ahwb_tracking_key_, dim); + } + } + use_ahwb_ = ahwb_usage_track_.contains(ahwb_tracking_key_); +} + #else // MEDIAPIPE_TENSOR_USE_AHWB bool Tensor::AllocateAhwbMapToSsbo() const { return false; } @@ -455,6 +469,7 @@ void Tensor::MoveAhwbStuff(Tensor* src) {} void Tensor::ReleaseAhwbStuff() {} void* Tensor::MapAhwbToCpuRead() const { return nullptr; } void* Tensor::MapAhwbToCpuWrite() const { return nullptr; } +void Tensor::TrackAhwbUsage(uint64_t key) const {} #endif // MEDIAPIPE_TENSOR_USE_AHWB diff --git a/mediapipe/framework/formats/tensor_internal.h b/mediapipe/framework/formats/tensor_internal.h index 1231a991c..c223c5b1d 100644 --- a/mediapipe/framework/formats/tensor_internal.h +++ b/mediapipe/framework/formats/tensor_internal.h @@ -18,8 +18,6 @@ #include #include -#include "mediapipe/framework/tool/type_util.h" - namespace mediapipe { // Generates unique view id at compile-time using FILE and LINE. @@ -41,10 +39,12 @@ namespace tensor_internal { // https://en.wikipedia.org/wiki/Fowler%E2%80%93Noll%E2%80%93Vo_hash_function constexpr uint64_t kFnvPrime = 0x00000100000001B3; constexpr uint64_t kFnvOffsetBias = 0xcbf29ce484222325; -constexpr uint64_t FnvHash64(const char* str, uint64_t hash = kFnvOffsetBias) { - return (str[0] == 0) ? hash : FnvHash64(str + 1, (hash ^ str[0]) * kFnvPrime); +constexpr uint64_t FnvHash64(uint64_t value1, uint64_t value2) { + return (value2 ^ value1) * kFnvPrime; +} +constexpr uint64_t FnvHash64(const char* str, uint64_t hash = kFnvOffsetBias) { + return (str[0] == 0) ? hash : FnvHash64(str + 1, FnvHash64(hash, str[0])); } - template struct TypeList { static constexpr std::size_t size{sizeof...(Ts)};