Automatic selection of the tensor's storage type by recording previously requested views.

PiperOrigin-RevId: 496466136
This commit is contained in:
Nikolay Chirkov 2022-12-19 12:36:07 -08:00 committed by Copybara-Service
parent 3e6cd5d2bf
commit ef3fa67bf4
5 changed files with 67 additions and 36 deletions

View File

@ -428,7 +428,10 @@ cc_library(
"tensor.cc", "tensor.cc",
"tensor_ahwb.cc", "tensor_ahwb.cc",
], ],
hdrs = ["tensor.h"], hdrs = [
"tensor.h",
"tensor_internal.h",
],
copts = select({ copts = select({
"//mediapipe:apple": [ "//mediapipe:apple": [
"-x objective-c++", "-x objective-c++",
@ -452,6 +455,7 @@ cc_library(
], ],
}), }),
deps = [ deps = [
"@com_google_absl//absl/container:flat_hash_set",
"@com_google_absl//absl/memory", "@com_google_absl//absl/memory",
"@com_google_absl//absl/synchronization", "@com_google_absl//absl/synchronization",
"//mediapipe/framework:port", "//mediapipe/framework:port",

View File

@ -246,10 +246,10 @@ Tensor::OpenGlTexture2dView::GetLayoutDimensions(const Tensor::Shape& shape,
return Tensor::OpenGlTexture2dView::Layout::kAligned; return Tensor::OpenGlTexture2dView::Layout::kAligned;
} }
} }
// The best performance of a compute shader can be achived with textures' // The best performance of a compute shader can be achieved with textures'
// width multiple of 256. Making minimum fixed width of 256 waste memory for // width multiple of 256. Making minimum fixed width of 256 waste memory for
// small tensors. The optimal balance memory-vs-performance is power of 2. // small tensors. The optimal balance memory-vs-performance is power of 2.
// The texture width and height are choosen to be closer to square. // The texture width and height are chosen to be closer to square.
float power = std::log2(std::sqrt(static_cast<float>(num_pixels))); float power = std::log2(std::sqrt(static_cast<float>(num_pixels)));
w = 1 << static_cast<int>(power); w = 1 << static_cast<int>(power);
int h = (num_pixels + w - 1) / w; int h = (num_pixels + w - 1) / w;
@ -326,7 +326,7 @@ Tensor::OpenGlBufferView Tensor::GetOpenGlBufferReadView() const {
auto lock(absl::make_unique<absl::MutexLock>(&view_mutex_)); auto lock(absl::make_unique<absl::MutexLock>(&view_mutex_));
AllocateOpenGlBuffer(); AllocateOpenGlBuffer();
if (!(valid_ & kValidOpenGlBuffer)) { if (!(valid_ & kValidOpenGlBuffer)) {
// If the call succeds then AHWB -> SSBO are synchronized so any usage of // If the call succeeds then AHWB -> SSBO are synchronized so any usage of
// the SSBO is correct after this call. // the SSBO is correct after this call.
if (!InsertAhwbToSsboFence()) { if (!InsertAhwbToSsboFence()) {
glBindBuffer(GL_SHADER_STORAGE_BUFFER, opengl_buffer_); glBindBuffer(GL_SHADER_STORAGE_BUFFER, opengl_buffer_);
@ -348,8 +348,10 @@ Tensor::OpenGlBufferView Tensor::GetOpenGlBufferReadView() const {
}; };
} }
Tensor::OpenGlBufferView Tensor::GetOpenGlBufferWriteView() const { Tensor::OpenGlBufferView Tensor::GetOpenGlBufferWriteView(
uint64_t source_location_hash) const {
auto lock(absl::make_unique<absl::MutexLock>(&view_mutex_)); auto lock(absl::make_unique<absl::MutexLock>(&view_mutex_));
TrackAhwbUsage(source_location_hash);
AllocateOpenGlBuffer(); AllocateOpenGlBuffer();
valid_ = kValidOpenGlBuffer; valid_ = kValidOpenGlBuffer;
return {opengl_buffer_, std::move(lock), nullptr}; return {opengl_buffer_, std::move(lock), nullptr};
@ -385,6 +387,7 @@ void Tensor::Move(Tensor* src) {
src->element_type_ = ElementType::kNone; // Mark as invalidated. src->element_type_ = ElementType::kNone; // Mark as invalidated.
cpu_buffer_ = src->cpu_buffer_; cpu_buffer_ = src->cpu_buffer_;
src->cpu_buffer_ = nullptr; src->cpu_buffer_ = nullptr;
ahwb_tracking_key_ = src->ahwb_tracking_key_;
#if MEDIAPIPE_METAL_ENABLED #if MEDIAPIPE_METAL_ENABLED
device_ = src->device_; device_ = src->device_;
src->device_ = nil; src->device_ = nil;
@ -589,8 +592,10 @@ Tensor::CpuReadView Tensor::GetCpuReadView() const {
return {cpu_buffer_, std::move(lock)}; return {cpu_buffer_, std::move(lock)};
} }
Tensor::CpuWriteView Tensor::GetCpuWriteView() const { Tensor::CpuWriteView Tensor::GetCpuWriteView(
uint64_t source_location_hash) const {
auto lock = absl::make_unique<absl::MutexLock>(&view_mutex_); auto lock = absl::make_unique<absl::MutexLock>(&view_mutex_);
TrackAhwbUsage(source_location_hash);
AllocateCpuBuffer(); AllocateCpuBuffer();
valid_ = kValidCpu; valid_ = kValidCpu;
#ifdef MEDIAPIPE_TENSOR_USE_AHWB #ifdef MEDIAPIPE_TENSOR_USE_AHWB
@ -620,24 +625,4 @@ void Tensor::AllocateCpuBuffer() const {
} }
} }
void Tensor::SetPreferredStorageType(StorageType type) {
#ifdef MEDIAPIPE_TENSOR_USE_AHWB
if (__builtin_available(android 26, *)) {
use_ahwb_ = type == StorageType::kAhwb;
VLOG(4) << "Tensor: use of AHardwareBuffer is "
<< (use_ahwb_ ? "allowed" : "not allowed");
}
#else
VLOG(4) << "Tensor: use of AHardwareBuffer is not allowed";
#endif // MEDIAPIPE_TENSOR_USE_AHWB
}
Tensor::StorageType Tensor::GetPreferredStorageType() {
#ifdef MEDIAPIPE_TENSOR_USE_AHWB
return use_ahwb_ ? StorageType::kAhwb : StorageType::kDefault;
#else
return StorageType::kDefault;
#endif // MEDIAPIPE_TENSOR_USE_AHWB
}
} // namespace mediapipe } // namespace mediapipe

View File

@ -24,8 +24,9 @@
#include <utility> #include <utility>
#include <vector> #include <vector>
#include "absl/memory/memory.h" #include "absl/container/flat_hash_set.h"
#include "absl/synchronization/mutex.h" #include "absl/synchronization/mutex.h"
#include "mediapipe/framework/formats/tensor_internal.h"
#include "mediapipe/framework/port.h" #include "mediapipe/framework/port.h"
#if MEDIAPIPE_METAL_ENABLED #if MEDIAPIPE_METAL_ENABLED
@ -48,6 +49,22 @@
#include "mediapipe/gpu/gl_context.h" #include "mediapipe/gpu/gl_context.h"
#endif // MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_30 #endif // MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_30
#if defined __has_builtin
#if __has_builtin(__builtin_LINE)
#define builtin_LINE __builtin_LINE
#endif
#if __has_builtin(__builtin_FILE)
#define builtin_FILE __builtin_FILE
#endif
#endif
#ifndef builtin_LINE
#define builtin_LINE() 0
#endif
#ifndef builtin_FILE
#define builtin_FILE() ""
#endif
namespace mediapipe { namespace mediapipe {
// Tensor is a container of multi-dimensional data that supports sharing the // Tensor is a container of multi-dimensional data that supports sharing the
@ -65,7 +82,7 @@ namespace mediapipe {
// GLuint buffer = view.buffer(); // GLuint buffer = view.buffer();
// Then the buffer can be bound to the GPU command buffer. // Then the buffer can be bound to the GPU command buffer.
// ...binding the buffer to the command buffer... // ...binding the buffer to the command buffer...
// ...commiting command buffer and releasing the view... // ...committing command buffer and releasing the view...
// //
// The following request for the CPU view will be blocked until the GPU view is // The following request for the CPU view will be blocked until the GPU view is
// released and the GPU task is finished. // released and the GPU task is finished.
@ -161,7 +178,9 @@ class Tensor {
using CpuReadView = CpuView<const void>; using CpuReadView = CpuView<const void>;
CpuReadView GetCpuReadView() const; CpuReadView GetCpuReadView() const;
using CpuWriteView = CpuView<void>; using CpuWriteView = CpuView<void>;
CpuWriteView GetCpuWriteView() const; CpuWriteView GetCpuWriteView(
uint64_t source_location_hash =
tensor_internal::FnvHash64(builtin_FILE(), builtin_LINE())) const;
#if MEDIAPIPE_METAL_ENABLED #if MEDIAPIPE_METAL_ENABLED
// TODO: id<MTLBuffer> vs. MtlBufferView. // TODO: id<MTLBuffer> vs. MtlBufferView.
@ -305,7 +324,9 @@ class Tensor {
// A valid OpenGL context must be bound to the calling thread due to possible // A valid OpenGL context must be bound to the calling thread due to possible
// GPU resource allocation. // GPU resource allocation.
OpenGlBufferView GetOpenGlBufferReadView() const; OpenGlBufferView GetOpenGlBufferReadView() const;
OpenGlBufferView GetOpenGlBufferWriteView() const; OpenGlBufferView GetOpenGlBufferWriteView(
uint64_t source_location_hash =
tensor_internal::FnvHash64(builtin_FILE(), builtin_LINE())) const;
#endif // MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_31 #endif // MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_31
const Shape& shape() const { return shape_; } const Shape& shape() const { return shape_; }
@ -410,7 +431,11 @@ class Tensor {
void CreateEglSyncAndFd() const; void CreateEglSyncAndFd() const;
#endif // MEDIAPIPE_TENSOR_USE_AHWB #endif // MEDIAPIPE_TENSOR_USE_AHWB
// Use Ahwb for other views: OpenGL / CPU buffer. // Use Ahwb for other views: OpenGL / CPU buffer.
static inline bool use_ahwb_ = false; mutable bool use_ahwb_ = false;
mutable uint64_t ahwb_tracking_key_ = 0;
// TODO: Tracks all unique tensors. Can grow to a large number. LRU
// can be more predicted.
static inline absl::flat_hash_set<uint64_t> ahwb_usage_track_;
// Expects the target SSBO to be already bound. // Expects the target SSBO to be already bound.
bool AllocateAhwbMapToSsbo() const; bool AllocateAhwbMapToSsbo() const;
bool InsertAhwbToSsboFence() const; bool InsertAhwbToSsboFence() const;
@ -419,6 +444,8 @@ class Tensor {
void* MapAhwbToCpuRead() const; void* MapAhwbToCpuRead() const;
void* MapAhwbToCpuWrite() const; void* MapAhwbToCpuWrite() const;
void MoveCpuOrSsboToAhwb() const; void MoveCpuOrSsboToAhwb() const;
// Set current tracking key, set "use ahwb" if the key is already marked.
void TrackAhwbUsage(uint64_t key) const;
#if MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_30 #if MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_30
mutable std::shared_ptr<mediapipe::GlContext> gl_context_; mutable std::shared_ptr<mediapipe::GlContext> gl_context_;

View File

@ -265,6 +265,10 @@ Tensor::AHardwareBufferView Tensor::GetAHardwareBufferWriteView(
} }
bool Tensor::AllocateAHardwareBuffer(int size_alignment) const { bool Tensor::AllocateAHardwareBuffer(int size_alignment) const {
// Mark current tracking key as Ahwb-use.
ahwb_usage_track_.insert(ahwb_tracking_key_);
use_ahwb_ = true;
if (__builtin_available(android 26, *)) { if (__builtin_available(android 26, *)) {
if (ahwb_ == nullptr) { if (ahwb_ == nullptr) {
AHardwareBuffer_Desc desc = {}; AHardwareBuffer_Desc desc = {};
@ -447,6 +451,16 @@ void* Tensor::MapAhwbToCpuWrite() const {
return nullptr; return nullptr;
} }
void Tensor::TrackAhwbUsage(uint64_t source_location_hash) const {
if (ahwb_tracking_key_ == 0) {
ahwb_tracking_key_ = source_location_hash;
for (int dim : shape_.dims) {
ahwb_tracking_key_ = tensor_internal::FnvHash64(ahwb_tracking_key_, dim);
}
}
use_ahwb_ = ahwb_usage_track_.contains(ahwb_tracking_key_);
}
#else // MEDIAPIPE_TENSOR_USE_AHWB #else // MEDIAPIPE_TENSOR_USE_AHWB
bool Tensor::AllocateAhwbMapToSsbo() const { return false; } bool Tensor::AllocateAhwbMapToSsbo() const { return false; }
@ -455,6 +469,7 @@ void Tensor::MoveAhwbStuff(Tensor* src) {}
void Tensor::ReleaseAhwbStuff() {} void Tensor::ReleaseAhwbStuff() {}
void* Tensor::MapAhwbToCpuRead() const { return nullptr; } void* Tensor::MapAhwbToCpuRead() const { return nullptr; }
void* Tensor::MapAhwbToCpuWrite() const { return nullptr; } void* Tensor::MapAhwbToCpuWrite() const { return nullptr; }
void Tensor::TrackAhwbUsage(uint64_t key) const {}
#endif // MEDIAPIPE_TENSOR_USE_AHWB #endif // MEDIAPIPE_TENSOR_USE_AHWB

View File

@ -18,8 +18,6 @@
#include <cstdint> #include <cstdint>
#include <type_traits> #include <type_traits>
#include "mediapipe/framework/tool/type_util.h"
namespace mediapipe { namespace mediapipe {
// Generates unique view id at compile-time using FILE and LINE. // Generates unique view id at compile-time using FILE and LINE.
@ -41,10 +39,12 @@ namespace tensor_internal {
// https://en.wikipedia.org/wiki/Fowler%E2%80%93Noll%E2%80%93Vo_hash_function // https://en.wikipedia.org/wiki/Fowler%E2%80%93Noll%E2%80%93Vo_hash_function
constexpr uint64_t kFnvPrime = 0x00000100000001B3; constexpr uint64_t kFnvPrime = 0x00000100000001B3;
constexpr uint64_t kFnvOffsetBias = 0xcbf29ce484222325; constexpr uint64_t kFnvOffsetBias = 0xcbf29ce484222325;
constexpr uint64_t FnvHash64(const char* str, uint64_t hash = kFnvOffsetBias) { constexpr uint64_t FnvHash64(uint64_t value1, uint64_t value2) {
return (str[0] == 0) ? hash : FnvHash64(str + 1, (hash ^ str[0]) * kFnvPrime); return (value2 ^ value1) * kFnvPrime;
}
constexpr uint64_t FnvHash64(const char* str, uint64_t hash = kFnvOffsetBias) {
return (str[0] == 0) ? hash : FnvHash64(str + 1, FnvHash64(hash, str[0]));
} }
template <typename... Ts> template <typename... Ts>
struct TypeList { struct TypeList {
static constexpr std::size_t size{sizeof...(Ts)}; static constexpr std::size_t size{sizeof...(Ts)};