Tensor: Move general CPU/SSBO tensor storage into Ahwb-backed CPU/SSBO storage.

PiperOrigin-RevId: 493710495
2022-12-07 14:31:02 -08:00 · 2022-12-07 14:31:02 -08:00 · 7faee517c4
commit 7faee517c4
parent ea74db86dd
2 changed files with 40 additions and 1 deletions
--- a/mediapipe/framework/formats/tensor.h
+++ b/mediapipe/framework/formats/tensor.h
@ -418,6 +418,7 @@ class Tensor {
  void ReleaseAhwbStuff();
  void* MapAhwbToCpuRead() const;
  void* MapAhwbToCpuWrite() const;
+  void MoveCpuOrSsboToAhwb() const;

 #if MEDIAPIPE_OPENGL_ES_VERSION >= MEDIAPIPE_OPENGL_ES_30
  mutable std::shared_ptr<mediapipe::GlContext> gl_context_;
--- a/mediapipe/framework/formats/tensor_ahwb.cc
+++ b/mediapipe/framework/formats/tensor_ahwb.cc
@ -215,10 +215,15 @@ Tensor::AHardwareBufferView Tensor::GetAHardwareBufferReadView() const {
  CHECK(ahwb_ || !(valid_ & kValidOpenGlBuffer))
      << "Interoperability bettween OpenGL buffer and AHardwareBuffer is not "
         "supported on targe system.";
+  bool transfer = !ahwb_;
  CHECK(AllocateAHardwareBuffer())
      << "AHardwareBuffer is not supported on the target system.";
  valid_ |= kValidAHardwareBuffer;
+  if (transfer) {
+    MoveCpuOrSsboToAhwb();
+  } else {
    if (valid_ & kValidOpenGlBuffer) CreateEglSyncAndFd();
+  }
  return {ahwb_,
          ssbo_written_,
          &fence_fd_,  // The FD is created for SSBO -> AHWB synchronization.
@ -303,6 +308,39 @@ bool Tensor::AllocateAhwbMapToSsbo() const {
  return false;
 }

+// Moves Cpu/Ssbo resource under the Ahwb backed memory.
+void Tensor::MoveCpuOrSsboToAhwb() const {
+  void* dest = nullptr;
+  if (__builtin_available(android 26, *)) {
+    auto error = AHardwareBuffer_lock(
+        ahwb_, AHARDWAREBUFFER_USAGE_CPU_WRITE_RARELY, -1, nullptr, &dest);
+    CHECK(error == 0) << "AHardwareBuffer_lock " << error;
+  }
+  if (valid_ & kValidOpenGlBuffer) {
+    gl_context_->Run([this, dest]() {
+      glBindBuffer(GL_SHADER_STORAGE_BUFFER, opengl_buffer_);
+      const void* src = glMapBufferRange(GL_SHADER_STORAGE_BUFFER, 0, bytes(),
+                                         GL_MAP_READ_BIT);
+      std::memcpy(dest, src, bytes());
+      glUnmapBuffer(GL_SHADER_STORAGE_BUFFER);
+      glDeleteBuffers(1, &opengl_buffer_);
+    });
+    opengl_buffer_ = GL_INVALID_INDEX;
+    gl_context_ = nullptr;
+  } else if (valid_ & kValidCpu) {
+    std::memcpy(dest, cpu_buffer_, bytes());
+    // Free CPU memory because next time AHWB is mapped instead.
+    free(cpu_buffer_);
+    cpu_buffer_ = nullptr;
+  } else {
+    LOG(FATAL) << "Can't convert tensor with mask " << valid_ << " into AHWB.";
+  }
+  if (__builtin_available(android 26, *)) {
+    auto error = AHardwareBuffer_unlock(ahwb_, nullptr);
+    CHECK(error == 0) << "AHardwareBuffer_unlock " << error;
+  }
+}
+
 // SSBO is created on top of AHWB. A fence is inserted into the GPU queue before
 // the GPU task that is going to read from the SSBO. When the writing into AHWB
 // is finished then the GPU reads from the SSBO.