Make cache writes optional in InferenceCalculatorAdvancedGL

Previously, caches were always written, and an error would cause the graph to close abruptly. This prevented services with read-only access to the cache from using the calculator. The new behavior allows services to choose whether or not to write caches. PiperOrigin-RevId: 561866791
2023-08-31 23:29:04 -07:00 · 2023-08-31 23:29:04 -07:00 · de0c7f2a30
commit de0c7f2a30
parent dea6ccba25
3 changed files with 55 additions and 2 deletions
--- a/mediapipe/calculators/tensor/inference_calculator.proto
+++ b/mediapipe/calculators/tensor/inference_calculator.proto
@ -88,6 +88,20 @@ message InferenceCalculatorOptions {
      // serialized model is invalid or missing.
      optional string serialized_model_dir = 7;

+      enum CacheWritingBehavior {
+        // Do not write any caches.
+        NO_WRITE = 0;
+
+        // Try to write caches, log on failure.
+        TRY_WRITE = 1;
+
+        // Write caches or return an error if write fails.
+        WRITE_OR_ERROR = 2;
+      }
+      // Specifies how GPU caches are written to disk.
+      optional CacheWritingBehavior cache_writing_behavior = 10
+          [default = WRITE_OR_ERROR];
+
      // Unique token identifying the model. Used in conjunction with
      // "serialized_model_dir". It is the caller's responsibility to ensure
      // there is no clash of the tokens.
--- a/mediapipe/calculators/tensor/inference_calculator_gl_advanced.cc
+++ b/mediapipe/calculators/tensor/inference_calculator_gl_advanced.cc
@ -12,6 +12,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

+#include <cstdint>
 #include <cstring>
 #include <memory>
 #include <string>
@ -26,6 +27,7 @@
 #include "mediapipe/util/tflite/tflite_gpu_runner.h"

 #if defined(MEDIAPIPE_ANDROID) || defined(MEDIAPIPE_CHROMIUMOS)
+#include "absl/log/absl_log.h"
 #include "mediapipe/framework/deps/file_path.h"
 #include "mediapipe/util/android/file/base/file.h"
 #include "mediapipe/util/android/file/base/filesystem.h"
@ -68,14 +70,21 @@ class InferenceCalculatorGlAdvancedImpl
        const mediapipe::InferenceCalculatorOptions::Delegate::Gpu&
            gpu_delegate_options);
    absl::Status ReadGpuCaches(tflite::gpu::TFLiteGPURunner* gpu_runner) const;
-    absl::Status SaveGpuCaches(tflite::gpu::TFLiteGPURunner* gpu_runner) const;
+    // Writes caches to disk based on |cache_writing_behavior_|.
+    absl::Status SaveGpuCachesBasedOnBehavior(
+        tflite::gpu::TFLiteGPURunner* gpu_runner) const;
    bool UseSerializedModel() const { return use_serialized_model_; }

   private:
+    // Writes caches to disk, returns error on failure.
+    absl::Status SaveGpuCaches(tflite::gpu::TFLiteGPURunner* gpu_runner) const;
+
    bool use_kernel_caching_ = false;
    std::string cached_kernel_filename_;
    bool use_serialized_model_ = false;
    std::string serialized_model_path_;
+    mediapipe::InferenceCalculatorOptions::Delegate::Gpu::CacheWritingBehavior
+        cache_writing_behavior_;
  };

  // Helper class that wraps everything related to GPU inference acceleration.
@ -232,7 +241,8 @@ InferenceCalculatorGlAdvancedImpl::GpuInferenceRunner::InitTFLiteGPURunner(
  MP_RETURN_IF_ERROR(
      on_disk_cache_helper_.ReadGpuCaches(tflite_gpu_runner_.get()));
  MP_RETURN_IF_ERROR(tflite_gpu_runner_->Build());
-  return on_disk_cache_helper_.SaveGpuCaches(tflite_gpu_runner_.get());
+  return on_disk_cache_helper_.SaveGpuCachesBasedOnBehavior(
+      tflite_gpu_runner_.get());
 }

 #if defined(MEDIAPIPE_ANDROID) || defined(MEDIAPIPE_CHROMIUMOS)
@ -261,9 +271,36 @@ absl::Status InferenceCalculatorGlAdvancedImpl::OnDiskCacheHelper::Init(
        mediapipe::file::JoinPath(gpu_delegate_options.serialized_model_dir(),
                                  gpu_delegate_options.model_token());
  }
+  cache_writing_behavior_ = gpu_delegate_options.has_cache_writing_behavior()
+                                ? gpu_delegate_options.cache_writing_behavior()
+                                : mediapipe::InferenceCalculatorOptions::
+                                      Delegate::Gpu::WRITE_OR_ERROR;
  return absl::OkStatus();
 }

+absl::Status InferenceCalculatorGlAdvancedImpl::OnDiskCacheHelper::
+    SaveGpuCachesBasedOnBehavior(
+        tflite::gpu::TFLiteGPURunner* gpu_runner) const {
+  switch (cache_writing_behavior_) {
+    case mediapipe::InferenceCalculatorOptions::Delegate::Gpu::NO_WRITE:
+      return absl::OkStatus();
+    case mediapipe::InferenceCalculatorOptions::Delegate::Gpu::TRY_WRITE: {
+      auto status = SaveGpuCaches(gpu_runner);
+      if (!status.ok()) {
+        ABSL_LOG_FIRST_N(WARNING, 1) << "Failed to save gpu caches: " << status;
+      }
+      return absl::OkStatus();
+    }
+    case mediapipe::InferenceCalculatorOptions::Delegate::Gpu::WRITE_OR_ERROR:
+      return SaveGpuCaches(gpu_runner);
+    default:
+      ABSL_LOG_FIRST_N(ERROR, 1)
+          << "Unknown cache writing behavior: "
+          << static_cast<uint32_t>(cache_writing_behavior_);
+      return absl::InvalidArgumentError("Unknown cache writing behavior.");
+  }
+}
+
 absl::Status
 InferenceCalculatorGlAdvancedImpl::OnDiskCacheHelper::SaveGpuCaches(
    tflite::gpu::TFLiteGPURunner* gpu_runner) const {
--- a/mediapipe/tasks/web/core/task_runner_test.ts
+++ b/mediapipe/tasks/web/core/task_runner_test.ts
@ -122,6 +122,8 @@ describe('TaskRunner', () => {
        allowPrecisionLoss: true,
        cachedKernelPath: undefined,
        serializedModelDir: undefined,
+        cacheWritingBehavior: InferenceCalculatorOptions.Delegate.Gpu
+                                  .CacheWritingBehavior.WRITE_OR_ERROR,
        modelToken: undefined,
        usage: InferenceCalculatorOptions.Delegate.Gpu.InferenceUsage
                   .SUSTAINED_SPEED,