Automatic selection of the tensor's storage type by recording previously requested views.
PiperOrigin-RevId: 496466136
This commit is contained in:
parent
3e6cd5d2bf
commit
ef3fa67bf4
|
@ -428,7 +428,10 @@ cc_library(
|
|||
"tensor.cc",
|
||||
"tensor_ahwb.cc",
|
||||
],
|
||||
hdrs = ["tensor.h"],
|
||||
hdrs = [
|
||||
"tensor.h",
|
||||
"tensor_internal.h",
|
||||
],
|
||||
copts = select({
|
||||
"//mediapipe:apple": [
|
||||
"-x objective-c++",
|
||||
|
@ -452,6 +455,7 @@ cc_library(
|
|||
],
|
||||
}),
|
||||
deps = [
|
||||
"@com_google_absl//absl/container:flat_hash_set",
|
||||
"@com_google_absl//absl/memory",
|
||||
"@com_google_absl//absl/synchronization",
|
||||
"//mediapipe/framework:port",
|
||||
|
|
|
@ -246,10 +246,10 @@ Tensor::OpenGlTexture2dView::GetLayoutDimensions(const Tensor::Shape& shape,
|
|||
return Tensor::OpenGlTexture2dView::Layout::kAligned;
|
||||
}
|
||||
}
|
||||
// The best performance of a compute shader can be achived with textures'
|
||||
// The best performance of a compute shader can be achieved with textures'
|
||||
// width multiple of 256. Making minimum fixed width of 256 waste memory for
|
||||
// small tensors. The optimal balance memory-vs-performance is power of 2.
|
||||
// The texture width and height are choosen to be closer to square.
|
||||
// The texture width and height are chosen to be closer to square.
|
||||
float power = std::log2(std::sqrt(static_cast<float>(num_pixels)));
|
||||
w = 1 << static_cast<int>(power);
|
||||
int h = (num_pixels + w - 1) / w;
|
||||
|
@ -326,7 +326,7 @@ Tensor::OpenGlBufferView Tensor::GetOpenGlBufferReadView() const {
|
|||
auto lock(absl::make_unique<absl::MutexLock>(&view_mutex_));
|
||||
AllocateOpenGlBuffer();
|
||||
if (!(valid_ & kValidOpenGlBuffer)) {
|
||||
// If the call succeds then AHWB -> SSBO are synchronized so any usage of
|
||||
// If the call succeeds then AHWB -> SSBO are synchronized so any usage of
|
||||
// the SSBO is correct after this call.
|
||||
if (!InsertAhwbToSsboFence()) {
|
||||
glBindBuffer(GL_SHADER_STORAGE_BUFFER, opengl_buffer_);
|
||||
|
@ -348,8 +348,10 @@ Tensor::OpenGlBufferView Tensor::GetOpenGlBufferReadView() const {
|
|||
};
|
||||
}
|
||||
|
||||
Tensor::OpenGlBufferView Tensor::GetOpenGlBufferWriteView() const {
|
||||
Tensor::OpenGlBufferView Tensor::GetOpenGlBufferWriteView(
|
||||
uint64_t source_location_hash) const {
|
||||
auto lock(absl::make_unique<absl::MutexLock>(&view_mutex_));
|
||||
TrackAhwbUsage(source_location_hash);
|
||||
AllocateOpenGlBuffer();
|
||||
valid_ = kValidOpenGlBuffer;
|
||||
return {opengl_buffer_, std::move(lock), nullptr};
|
||||
|
@ -385,6 +387,7 @@ void Tensor::Move(Tensor* src) {
|
|||
src->element_type_ = ElementType::kNone; // Mark as invalidated.
|
||||
cpu_buffer_ = src->cpu_buffer_;
|
||||
src->cpu_buffer_ = nullptr;
|
||||
ahwb_tracking_key_ = src->ahwb_tracking_key_;
|
||||
#if MEDIAPIPE_METAL_ENABLED
|
||||
device_ = src->device_;
|
||||
src->device_ = nil;
|
||||
|
@ -589,8 +592,10 @@ Tensor::CpuReadView Tensor::GetCpuReadView() const {
|
|||
return {cpu_buffer_, std::move(lock)};
|
||||
}
|
||||
|
||||
Tensor::CpuWriteView Tensor::GetCpuWriteView() const {
|
||||
Tensor::CpuWriteView Tensor::GetCpuWriteView(
|
||||
uint64_t source_location_hash) const {
|
||||
auto lock = absl::make_unique<absl::MutexLock>(&view_mutex_);
|
||||
TrackAhwbUsage(source_location_hash);
|
||||
AllocateCpuBuffer();
|
||||
valid_ = kValidCpu;
|
||||
#ifdef MEDIAPIPE_TENSOR_USE_AHWB
|
||||
|
@ -620,24 +625,4 @@ void Tensor::AllocateCpuBuffer() const {
|
|||
}
|
||||
}
|
||||
|
||||
void Tensor::SetPreferredStorageType(StorageType type) {
|
||||
#ifdef MEDIAPIPE_TENSOR_USE_AHWB
|
||||
if (__builtin_available(android 26, *)) {
|
||||
use_ahwb_ = type == StorageType::kAhwb;
|
||||
VLOG(4) << "Tensor: use of AHardwareBuffer is "
|
||||
<< (use_ahwb_ ? "allowed" : "not allowed");
|
||||
}
|
||||
#else
|
||||
VLOG(4) << "Tensor: use of AHardwareBuffer is not allowed";
|
||||
#endif // MEDIAPIPE_TENSOR_USE_AHWB
|
||||
}
|
||||
|
||||
Tensor::StorageType Tensor::GetPreferredStorageType() {
|
||||
#ifdef MEDIAPIPE_TENSOR_USE_AHWB
|
||||
return use_ahwb_ ? StorageType::kAhwb : StorageType::kDefault;
|
||||
#else
|
||||
return StorageType::kDefault;
|
||||
#endif // MEDIAPIPE_TENSOR_USE_AHWB
|
||||
}
|
||||
|
||||
} // namespace mediapipe
|
||||
|
|
|
@ -24,8 +24,9 @@
|
|||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "absl/memory/memory.h"
|
||||
#include "absl/container/flat_hash_set.h"
|
||||
#include "absl/synchronization/mutex.h"
|
||||
#include "mediapipe/framework/formats/tensor_internal.h"
|
||||
#include "mediapipe/framework/port.h"
|
||||
|
||||
#if MEDIAPIPE_METAL_ENABLED
|
||||
|
@ -48,6 +49,22 @@
|
|||
#include "mediapipe/gpu/gl_context.h"
|
||||
#endif // MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_30
|
||||
|
||||
#if defined __has_builtin
|
||||
#if __has_builtin(__builtin_LINE)
|
||||
#define builtin_LINE __builtin_LINE
|
||||
#endif
|
||||
#if __has_builtin(__builtin_FILE)
|
||||
#define builtin_FILE __builtin_FILE
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifndef builtin_LINE
|
||||
#define builtin_LINE() 0
|
||||
#endif
|
||||
#ifndef builtin_FILE
|
||||
#define builtin_FILE() ""
|
||||
#endif
|
||||
|
||||
namespace mediapipe {
|
||||
|
||||
// Tensor is a container of multi-dimensional data that supports sharing the
|
||||
|
@ -65,7 +82,7 @@ namespace mediapipe {
|
|||
// GLuint buffer = view.buffer();
|
||||
// Then the buffer can be bound to the GPU command buffer.
|
||||
// ...binding the buffer to the command buffer...
|
||||
// ...commiting command buffer and releasing the view...
|
||||
// ...committing command buffer and releasing the view...
|
||||
//
|
||||
// The following request for the CPU view will be blocked until the GPU view is
|
||||
// released and the GPU task is finished.
|
||||
|
@ -161,7 +178,9 @@ class Tensor {
|
|||
using CpuReadView = CpuView<const void>;
|
||||
CpuReadView GetCpuReadView() const;
|
||||
using CpuWriteView = CpuView<void>;
|
||||
CpuWriteView GetCpuWriteView() const;
|
||||
CpuWriteView GetCpuWriteView(
|
||||
uint64_t source_location_hash =
|
||||
tensor_internal::FnvHash64(builtin_FILE(), builtin_LINE())) const;
|
||||
|
||||
#if MEDIAPIPE_METAL_ENABLED
|
||||
// TODO: id<MTLBuffer> vs. MtlBufferView.
|
||||
|
@ -305,7 +324,9 @@ class Tensor {
|
|||
// A valid OpenGL context must be bound to the calling thread due to possible
|
||||
// GPU resource allocation.
|
||||
OpenGlBufferView GetOpenGlBufferReadView() const;
|
||||
OpenGlBufferView GetOpenGlBufferWriteView() const;
|
||||
OpenGlBufferView GetOpenGlBufferWriteView(
|
||||
uint64_t source_location_hash =
|
||||
tensor_internal::FnvHash64(builtin_FILE(), builtin_LINE())) const;
|
||||
#endif // MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_31
|
||||
|
||||
const Shape& shape() const { return shape_; }
|
||||
|
@ -410,7 +431,11 @@ class Tensor {
|
|||
void CreateEglSyncAndFd() const;
|
||||
#endif // MEDIAPIPE_TENSOR_USE_AHWB
|
||||
// Use Ahwb for other views: OpenGL / CPU buffer.
|
||||
static inline bool use_ahwb_ = false;
|
||||
mutable bool use_ahwb_ = false;
|
||||
mutable uint64_t ahwb_tracking_key_ = 0;
|
||||
// TODO: Tracks all unique tensors. Can grow to a large number. LRU
|
||||
// can be more predicted.
|
||||
static inline absl::flat_hash_set<uint64_t> ahwb_usage_track_;
|
||||
// Expects the target SSBO to be already bound.
|
||||
bool AllocateAhwbMapToSsbo() const;
|
||||
bool InsertAhwbToSsboFence() const;
|
||||
|
@ -419,6 +444,8 @@ class Tensor {
|
|||
void* MapAhwbToCpuRead() const;
|
||||
void* MapAhwbToCpuWrite() const;
|
||||
void MoveCpuOrSsboToAhwb() const;
|
||||
// Set current tracking key, set "use ahwb" if the key is already marked.
|
||||
void TrackAhwbUsage(uint64_t key) const;
|
||||
|
||||
#if MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_30
|
||||
mutable std::shared_ptr<mediapipe::GlContext> gl_context_;
|
||||
|
|
|
@ -265,6 +265,10 @@ Tensor::AHardwareBufferView Tensor::GetAHardwareBufferWriteView(
|
|||
}
|
||||
|
||||
bool Tensor::AllocateAHardwareBuffer(int size_alignment) const {
|
||||
// Mark current tracking key as Ahwb-use.
|
||||
ahwb_usage_track_.insert(ahwb_tracking_key_);
|
||||
use_ahwb_ = true;
|
||||
|
||||
if (__builtin_available(android 26, *)) {
|
||||
if (ahwb_ == nullptr) {
|
||||
AHardwareBuffer_Desc desc = {};
|
||||
|
@ -447,6 +451,16 @@ void* Tensor::MapAhwbToCpuWrite() const {
|
|||
return nullptr;
|
||||
}
|
||||
|
||||
void Tensor::TrackAhwbUsage(uint64_t source_location_hash) const {
|
||||
if (ahwb_tracking_key_ == 0) {
|
||||
ahwb_tracking_key_ = source_location_hash;
|
||||
for (int dim : shape_.dims) {
|
||||
ahwb_tracking_key_ = tensor_internal::FnvHash64(ahwb_tracking_key_, dim);
|
||||
}
|
||||
}
|
||||
use_ahwb_ = ahwb_usage_track_.contains(ahwb_tracking_key_);
|
||||
}
|
||||
|
||||
#else // MEDIAPIPE_TENSOR_USE_AHWB
|
||||
|
||||
bool Tensor::AllocateAhwbMapToSsbo() const { return false; }
|
||||
|
@ -455,6 +469,7 @@ void Tensor::MoveAhwbStuff(Tensor* src) {}
|
|||
void Tensor::ReleaseAhwbStuff() {}
|
||||
void* Tensor::MapAhwbToCpuRead() const { return nullptr; }
|
||||
void* Tensor::MapAhwbToCpuWrite() const { return nullptr; }
|
||||
void Tensor::TrackAhwbUsage(uint64_t key) const {}
|
||||
|
||||
#endif // MEDIAPIPE_TENSOR_USE_AHWB
|
||||
|
||||
|
|
|
@ -18,8 +18,6 @@
|
|||
#include <cstdint>
|
||||
#include <type_traits>
|
||||
|
||||
#include "mediapipe/framework/tool/type_util.h"
|
||||
|
||||
namespace mediapipe {
|
||||
|
||||
// Generates unique view id at compile-time using FILE and LINE.
|
||||
|
@ -41,10 +39,12 @@ namespace tensor_internal {
|
|||
// https://en.wikipedia.org/wiki/Fowler%E2%80%93Noll%E2%80%93Vo_hash_function
|
||||
constexpr uint64_t kFnvPrime = 0x00000100000001B3;
|
||||
constexpr uint64_t kFnvOffsetBias = 0xcbf29ce484222325;
|
||||
constexpr uint64_t FnvHash64(const char* str, uint64_t hash = kFnvOffsetBias) {
|
||||
return (str[0] == 0) ? hash : FnvHash64(str + 1, (hash ^ str[0]) * kFnvPrime);
|
||||
constexpr uint64_t FnvHash64(uint64_t value1, uint64_t value2) {
|
||||
return (value2 ^ value1) * kFnvPrime;
|
||||
}
|
||||
constexpr uint64_t FnvHash64(const char* str, uint64_t hash = kFnvOffsetBias) {
|
||||
return (str[0] == 0) ? hash : FnvHash64(str + 1, FnvHash64(hash, str[0]));
|
||||
}
|
||||
|
||||
template <typename... Ts>
|
||||
struct TypeList {
|
||||
static constexpr std::size_t size{sizeof...(Ts)};
|
||||
|
|
Loading…
Reference in New Issue
Block a user