Automatic selection of the tensor's storage type by recording previously requested views.
PiperOrigin-RevId: 496466136
This commit is contained in:
parent
3e6cd5d2bf
commit
ef3fa67bf4
|
@ -428,7 +428,10 @@ cc_library(
|
||||||
"tensor.cc",
|
"tensor.cc",
|
||||||
"tensor_ahwb.cc",
|
"tensor_ahwb.cc",
|
||||||
],
|
],
|
||||||
hdrs = ["tensor.h"],
|
hdrs = [
|
||||||
|
"tensor.h",
|
||||||
|
"tensor_internal.h",
|
||||||
|
],
|
||||||
copts = select({
|
copts = select({
|
||||||
"//mediapipe:apple": [
|
"//mediapipe:apple": [
|
||||||
"-x objective-c++",
|
"-x objective-c++",
|
||||||
|
@ -452,6 +455,7 @@ cc_library(
|
||||||
],
|
],
|
||||||
}),
|
}),
|
||||||
deps = [
|
deps = [
|
||||||
|
"@com_google_absl//absl/container:flat_hash_set",
|
||||||
"@com_google_absl//absl/memory",
|
"@com_google_absl//absl/memory",
|
||||||
"@com_google_absl//absl/synchronization",
|
"@com_google_absl//absl/synchronization",
|
||||||
"//mediapipe/framework:port",
|
"//mediapipe/framework:port",
|
||||||
|
|
|
@ -246,10 +246,10 @@ Tensor::OpenGlTexture2dView::GetLayoutDimensions(const Tensor::Shape& shape,
|
||||||
return Tensor::OpenGlTexture2dView::Layout::kAligned;
|
return Tensor::OpenGlTexture2dView::Layout::kAligned;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// The best performance of a compute shader can be achived with textures'
|
// The best performance of a compute shader can be achieved with textures'
|
||||||
// width multiple of 256. Making minimum fixed width of 256 waste memory for
|
// width multiple of 256. Making minimum fixed width of 256 waste memory for
|
||||||
// small tensors. The optimal balance memory-vs-performance is power of 2.
|
// small tensors. The optimal balance memory-vs-performance is power of 2.
|
||||||
// The texture width and height are choosen to be closer to square.
|
// The texture width and height are chosen to be closer to square.
|
||||||
float power = std::log2(std::sqrt(static_cast<float>(num_pixels)));
|
float power = std::log2(std::sqrt(static_cast<float>(num_pixels)));
|
||||||
w = 1 << static_cast<int>(power);
|
w = 1 << static_cast<int>(power);
|
||||||
int h = (num_pixels + w - 1) / w;
|
int h = (num_pixels + w - 1) / w;
|
||||||
|
@ -326,7 +326,7 @@ Tensor::OpenGlBufferView Tensor::GetOpenGlBufferReadView() const {
|
||||||
auto lock(absl::make_unique<absl::MutexLock>(&view_mutex_));
|
auto lock(absl::make_unique<absl::MutexLock>(&view_mutex_));
|
||||||
AllocateOpenGlBuffer();
|
AllocateOpenGlBuffer();
|
||||||
if (!(valid_ & kValidOpenGlBuffer)) {
|
if (!(valid_ & kValidOpenGlBuffer)) {
|
||||||
// If the call succeds then AHWB -> SSBO are synchronized so any usage of
|
// If the call succeeds then AHWB -> SSBO are synchronized so any usage of
|
||||||
// the SSBO is correct after this call.
|
// the SSBO is correct after this call.
|
||||||
if (!InsertAhwbToSsboFence()) {
|
if (!InsertAhwbToSsboFence()) {
|
||||||
glBindBuffer(GL_SHADER_STORAGE_BUFFER, opengl_buffer_);
|
glBindBuffer(GL_SHADER_STORAGE_BUFFER, opengl_buffer_);
|
||||||
|
@ -348,8 +348,10 @@ Tensor::OpenGlBufferView Tensor::GetOpenGlBufferReadView() const {
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
Tensor::OpenGlBufferView Tensor::GetOpenGlBufferWriteView() const {
|
Tensor::OpenGlBufferView Tensor::GetOpenGlBufferWriteView(
|
||||||
|
uint64_t source_location_hash) const {
|
||||||
auto lock(absl::make_unique<absl::MutexLock>(&view_mutex_));
|
auto lock(absl::make_unique<absl::MutexLock>(&view_mutex_));
|
||||||
|
TrackAhwbUsage(source_location_hash);
|
||||||
AllocateOpenGlBuffer();
|
AllocateOpenGlBuffer();
|
||||||
valid_ = kValidOpenGlBuffer;
|
valid_ = kValidOpenGlBuffer;
|
||||||
return {opengl_buffer_, std::move(lock), nullptr};
|
return {opengl_buffer_, std::move(lock), nullptr};
|
||||||
|
@ -385,6 +387,7 @@ void Tensor::Move(Tensor* src) {
|
||||||
src->element_type_ = ElementType::kNone; // Mark as invalidated.
|
src->element_type_ = ElementType::kNone; // Mark as invalidated.
|
||||||
cpu_buffer_ = src->cpu_buffer_;
|
cpu_buffer_ = src->cpu_buffer_;
|
||||||
src->cpu_buffer_ = nullptr;
|
src->cpu_buffer_ = nullptr;
|
||||||
|
ahwb_tracking_key_ = src->ahwb_tracking_key_;
|
||||||
#if MEDIAPIPE_METAL_ENABLED
|
#if MEDIAPIPE_METAL_ENABLED
|
||||||
device_ = src->device_;
|
device_ = src->device_;
|
||||||
src->device_ = nil;
|
src->device_ = nil;
|
||||||
|
@ -589,8 +592,10 @@ Tensor::CpuReadView Tensor::GetCpuReadView() const {
|
||||||
return {cpu_buffer_, std::move(lock)};
|
return {cpu_buffer_, std::move(lock)};
|
||||||
}
|
}
|
||||||
|
|
||||||
Tensor::CpuWriteView Tensor::GetCpuWriteView() const {
|
Tensor::CpuWriteView Tensor::GetCpuWriteView(
|
||||||
|
uint64_t source_location_hash) const {
|
||||||
auto lock = absl::make_unique<absl::MutexLock>(&view_mutex_);
|
auto lock = absl::make_unique<absl::MutexLock>(&view_mutex_);
|
||||||
|
TrackAhwbUsage(source_location_hash);
|
||||||
AllocateCpuBuffer();
|
AllocateCpuBuffer();
|
||||||
valid_ = kValidCpu;
|
valid_ = kValidCpu;
|
||||||
#ifdef MEDIAPIPE_TENSOR_USE_AHWB
|
#ifdef MEDIAPIPE_TENSOR_USE_AHWB
|
||||||
|
@ -620,24 +625,4 @@ void Tensor::AllocateCpuBuffer() const {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void Tensor::SetPreferredStorageType(StorageType type) {
|
|
||||||
#ifdef MEDIAPIPE_TENSOR_USE_AHWB
|
|
||||||
if (__builtin_available(android 26, *)) {
|
|
||||||
use_ahwb_ = type == StorageType::kAhwb;
|
|
||||||
VLOG(4) << "Tensor: use of AHardwareBuffer is "
|
|
||||||
<< (use_ahwb_ ? "allowed" : "not allowed");
|
|
||||||
}
|
|
||||||
#else
|
|
||||||
VLOG(4) << "Tensor: use of AHardwareBuffer is not allowed";
|
|
||||||
#endif // MEDIAPIPE_TENSOR_USE_AHWB
|
|
||||||
}
|
|
||||||
|
|
||||||
Tensor::StorageType Tensor::GetPreferredStorageType() {
|
|
||||||
#ifdef MEDIAPIPE_TENSOR_USE_AHWB
|
|
||||||
return use_ahwb_ ? StorageType::kAhwb : StorageType::kDefault;
|
|
||||||
#else
|
|
||||||
return StorageType::kDefault;
|
|
||||||
#endif // MEDIAPIPE_TENSOR_USE_AHWB
|
|
||||||
}
|
|
||||||
|
|
||||||
} // namespace mediapipe
|
} // namespace mediapipe
|
||||||
|
|
|
@ -24,8 +24,9 @@
|
||||||
#include <utility>
|
#include <utility>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
#include "absl/memory/memory.h"
|
#include "absl/container/flat_hash_set.h"
|
||||||
#include "absl/synchronization/mutex.h"
|
#include "absl/synchronization/mutex.h"
|
||||||
|
#include "mediapipe/framework/formats/tensor_internal.h"
|
||||||
#include "mediapipe/framework/port.h"
|
#include "mediapipe/framework/port.h"
|
||||||
|
|
||||||
#if MEDIAPIPE_METAL_ENABLED
|
#if MEDIAPIPE_METAL_ENABLED
|
||||||
|
@ -48,6 +49,22 @@
|
||||||
#include "mediapipe/gpu/gl_context.h"
|
#include "mediapipe/gpu/gl_context.h"
|
||||||
#endif // MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_30
|
#endif // MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_30
|
||||||
|
|
||||||
|
#if defined __has_builtin
|
||||||
|
#if __has_builtin(__builtin_LINE)
|
||||||
|
#define builtin_LINE __builtin_LINE
|
||||||
|
#endif
|
||||||
|
#if __has_builtin(__builtin_FILE)
|
||||||
|
#define builtin_FILE __builtin_FILE
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifndef builtin_LINE
|
||||||
|
#define builtin_LINE() 0
|
||||||
|
#endif
|
||||||
|
#ifndef builtin_FILE
|
||||||
|
#define builtin_FILE() ""
|
||||||
|
#endif
|
||||||
|
|
||||||
namespace mediapipe {
|
namespace mediapipe {
|
||||||
|
|
||||||
// Tensor is a container of multi-dimensional data that supports sharing the
|
// Tensor is a container of multi-dimensional data that supports sharing the
|
||||||
|
@ -65,7 +82,7 @@ namespace mediapipe {
|
||||||
// GLuint buffer = view.buffer();
|
// GLuint buffer = view.buffer();
|
||||||
// Then the buffer can be bound to the GPU command buffer.
|
// Then the buffer can be bound to the GPU command buffer.
|
||||||
// ...binding the buffer to the command buffer...
|
// ...binding the buffer to the command buffer...
|
||||||
// ...commiting command buffer and releasing the view...
|
// ...committing command buffer and releasing the view...
|
||||||
//
|
//
|
||||||
// The following request for the CPU view will be blocked until the GPU view is
|
// The following request for the CPU view will be blocked until the GPU view is
|
||||||
// released and the GPU task is finished.
|
// released and the GPU task is finished.
|
||||||
|
@ -161,7 +178,9 @@ class Tensor {
|
||||||
using CpuReadView = CpuView<const void>;
|
using CpuReadView = CpuView<const void>;
|
||||||
CpuReadView GetCpuReadView() const;
|
CpuReadView GetCpuReadView() const;
|
||||||
using CpuWriteView = CpuView<void>;
|
using CpuWriteView = CpuView<void>;
|
||||||
CpuWriteView GetCpuWriteView() const;
|
CpuWriteView GetCpuWriteView(
|
||||||
|
uint64_t source_location_hash =
|
||||||
|
tensor_internal::FnvHash64(builtin_FILE(), builtin_LINE())) const;
|
||||||
|
|
||||||
#if MEDIAPIPE_METAL_ENABLED
|
#if MEDIAPIPE_METAL_ENABLED
|
||||||
// TODO: id<MTLBuffer> vs. MtlBufferView.
|
// TODO: id<MTLBuffer> vs. MtlBufferView.
|
||||||
|
@ -305,7 +324,9 @@ class Tensor {
|
||||||
// A valid OpenGL context must be bound to the calling thread due to possible
|
// A valid OpenGL context must be bound to the calling thread due to possible
|
||||||
// GPU resource allocation.
|
// GPU resource allocation.
|
||||||
OpenGlBufferView GetOpenGlBufferReadView() const;
|
OpenGlBufferView GetOpenGlBufferReadView() const;
|
||||||
OpenGlBufferView GetOpenGlBufferWriteView() const;
|
OpenGlBufferView GetOpenGlBufferWriteView(
|
||||||
|
uint64_t source_location_hash =
|
||||||
|
tensor_internal::FnvHash64(builtin_FILE(), builtin_LINE())) const;
|
||||||
#endif // MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_31
|
#endif // MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_31
|
||||||
|
|
||||||
const Shape& shape() const { return shape_; }
|
const Shape& shape() const { return shape_; }
|
||||||
|
@ -410,7 +431,11 @@ class Tensor {
|
||||||
void CreateEglSyncAndFd() const;
|
void CreateEglSyncAndFd() const;
|
||||||
#endif // MEDIAPIPE_TENSOR_USE_AHWB
|
#endif // MEDIAPIPE_TENSOR_USE_AHWB
|
||||||
// Use Ahwb for other views: OpenGL / CPU buffer.
|
// Use Ahwb for other views: OpenGL / CPU buffer.
|
||||||
static inline bool use_ahwb_ = false;
|
mutable bool use_ahwb_ = false;
|
||||||
|
mutable uint64_t ahwb_tracking_key_ = 0;
|
||||||
|
// TODO: Tracks all unique tensors. Can grow to a large number. LRU
|
||||||
|
// can be more predicted.
|
||||||
|
static inline absl::flat_hash_set<uint64_t> ahwb_usage_track_;
|
||||||
// Expects the target SSBO to be already bound.
|
// Expects the target SSBO to be already bound.
|
||||||
bool AllocateAhwbMapToSsbo() const;
|
bool AllocateAhwbMapToSsbo() const;
|
||||||
bool InsertAhwbToSsboFence() const;
|
bool InsertAhwbToSsboFence() const;
|
||||||
|
@ -419,6 +444,8 @@ class Tensor {
|
||||||
void* MapAhwbToCpuRead() const;
|
void* MapAhwbToCpuRead() const;
|
||||||
void* MapAhwbToCpuWrite() const;
|
void* MapAhwbToCpuWrite() const;
|
||||||
void MoveCpuOrSsboToAhwb() const;
|
void MoveCpuOrSsboToAhwb() const;
|
||||||
|
// Set current tracking key, set "use ahwb" if the key is already marked.
|
||||||
|
void TrackAhwbUsage(uint64_t key) const;
|
||||||
|
|
||||||
#if MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_30
|
#if MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_30
|
||||||
mutable std::shared_ptr<mediapipe::GlContext> gl_context_;
|
mutable std::shared_ptr<mediapipe::GlContext> gl_context_;
|
||||||
|
|
|
@ -265,6 +265,10 @@ Tensor::AHardwareBufferView Tensor::GetAHardwareBufferWriteView(
|
||||||
}
|
}
|
||||||
|
|
||||||
bool Tensor::AllocateAHardwareBuffer(int size_alignment) const {
|
bool Tensor::AllocateAHardwareBuffer(int size_alignment) const {
|
||||||
|
// Mark current tracking key as Ahwb-use.
|
||||||
|
ahwb_usage_track_.insert(ahwb_tracking_key_);
|
||||||
|
use_ahwb_ = true;
|
||||||
|
|
||||||
if (__builtin_available(android 26, *)) {
|
if (__builtin_available(android 26, *)) {
|
||||||
if (ahwb_ == nullptr) {
|
if (ahwb_ == nullptr) {
|
||||||
AHardwareBuffer_Desc desc = {};
|
AHardwareBuffer_Desc desc = {};
|
||||||
|
@ -447,6 +451,16 @@ void* Tensor::MapAhwbToCpuWrite() const {
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void Tensor::TrackAhwbUsage(uint64_t source_location_hash) const {
|
||||||
|
if (ahwb_tracking_key_ == 0) {
|
||||||
|
ahwb_tracking_key_ = source_location_hash;
|
||||||
|
for (int dim : shape_.dims) {
|
||||||
|
ahwb_tracking_key_ = tensor_internal::FnvHash64(ahwb_tracking_key_, dim);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
use_ahwb_ = ahwb_usage_track_.contains(ahwb_tracking_key_);
|
||||||
|
}
|
||||||
|
|
||||||
#else // MEDIAPIPE_TENSOR_USE_AHWB
|
#else // MEDIAPIPE_TENSOR_USE_AHWB
|
||||||
|
|
||||||
bool Tensor::AllocateAhwbMapToSsbo() const { return false; }
|
bool Tensor::AllocateAhwbMapToSsbo() const { return false; }
|
||||||
|
@ -455,6 +469,7 @@ void Tensor::MoveAhwbStuff(Tensor* src) {}
|
||||||
void Tensor::ReleaseAhwbStuff() {}
|
void Tensor::ReleaseAhwbStuff() {}
|
||||||
void* Tensor::MapAhwbToCpuRead() const { return nullptr; }
|
void* Tensor::MapAhwbToCpuRead() const { return nullptr; }
|
||||||
void* Tensor::MapAhwbToCpuWrite() const { return nullptr; }
|
void* Tensor::MapAhwbToCpuWrite() const { return nullptr; }
|
||||||
|
void Tensor::TrackAhwbUsage(uint64_t key) const {}
|
||||||
|
|
||||||
#endif // MEDIAPIPE_TENSOR_USE_AHWB
|
#endif // MEDIAPIPE_TENSOR_USE_AHWB
|
||||||
|
|
||||||
|
|
|
@ -18,8 +18,6 @@
|
||||||
#include <cstdint>
|
#include <cstdint>
|
||||||
#include <type_traits>
|
#include <type_traits>
|
||||||
|
|
||||||
#include "mediapipe/framework/tool/type_util.h"
|
|
||||||
|
|
||||||
namespace mediapipe {
|
namespace mediapipe {
|
||||||
|
|
||||||
// Generates unique view id at compile-time using FILE and LINE.
|
// Generates unique view id at compile-time using FILE and LINE.
|
||||||
|
@ -41,10 +39,12 @@ namespace tensor_internal {
|
||||||
// https://en.wikipedia.org/wiki/Fowler%E2%80%93Noll%E2%80%93Vo_hash_function
|
// https://en.wikipedia.org/wiki/Fowler%E2%80%93Noll%E2%80%93Vo_hash_function
|
||||||
constexpr uint64_t kFnvPrime = 0x00000100000001B3;
|
constexpr uint64_t kFnvPrime = 0x00000100000001B3;
|
||||||
constexpr uint64_t kFnvOffsetBias = 0xcbf29ce484222325;
|
constexpr uint64_t kFnvOffsetBias = 0xcbf29ce484222325;
|
||||||
constexpr uint64_t FnvHash64(const char* str, uint64_t hash = kFnvOffsetBias) {
|
constexpr uint64_t FnvHash64(uint64_t value1, uint64_t value2) {
|
||||||
return (str[0] == 0) ? hash : FnvHash64(str + 1, (hash ^ str[0]) * kFnvPrime);
|
return (value2 ^ value1) * kFnvPrime;
|
||||||
|
}
|
||||||
|
constexpr uint64_t FnvHash64(const char* str, uint64_t hash = kFnvOffsetBias) {
|
||||||
|
return (str[0] == 0) ? hash : FnvHash64(str + 1, FnvHash64(hash, str[0]));
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename... Ts>
|
template <typename... Ts>
|
||||||
struct TypeList {
|
struct TypeList {
|
||||||
static constexpr std::size_t size{sizeof...(Ts)};
|
static constexpr std::size_t size{sizeof...(Ts)};
|
||||||
|
|
Loading…
Reference in New Issue
Block a user