mediapipe/mediapipe/calculators/tflite/tflite_inference_calculator.proto

// Copyright 2019 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

syntax = "proto2";

package mediapipe;

import "mediapipe/framework/calculator.proto";

// Full Example:
//
// node {
//   calculator: "TfLiteInferenceCalculator"
//   input_stream: "TENSOR_IN:image_tensors"
//   output_stream: "TENSOR_OUT:result_tensors"
//   options {
//     [mediapipe.TfLiteInferenceCalculatorOptions.ext] {
//       model_path: "model.tflite"
//       delegate { gpu {} }
//     }
//   }
// }
//
message TfLiteInferenceCalculatorOptions {
  extend mediapipe.CalculatorOptions {
    optional TfLiteInferenceCalculatorOptions ext = 233867213;
  }

  message Delegate {
    // Default inference provided by tflite.
    message TfLite {}
    // Delegate to run GPU inference depending on the device.
    // (Can use OpenGl, OpenCl, Metal depending on the device.)
    message Gpu {
      // Experimental, Android/Linux only. Use TFLite GPU delegate API2 for
      // the NN inference.
      // example:
      //   delegate: { gpu { use_advanced_gpu_api: true } }
      optional bool use_advanced_gpu_api = 1 [default = false];

      // This option is valid for TFLite GPU delegate API2 only,
      // Choose any of available APIs to force running inference using it.
      enum Api {
        ANY = 0;
        OPENGL = 1;
        OPENCL = 2;
      }
      optional Api api = 4 [default = ANY];

      // This option is valid for TFLite GPU delegate API2 only,
      // Set to true to use 16-bit float precision. If max precision is needed,
      // set to false for 32-bit float calculations only.
      optional bool allow_precision_loss = 3 [default = true];

      // Load pre-compiled serialized binary cache to accelerate init process.
      // Only available for OpenCL delegate on Android.
      // Kernel caching will only be enabled if this path is set.
      optional string cached_kernel_path = 2;

      // Encapsulated compilation/runtime tradeoffs.
      enum InferenceUsage {
        UNSPECIFIED = 0;

        // InferenceRunner will be used only once. Therefore, it is important to
        // minimize bootstrap time as well.
        FAST_SINGLE_ANSWER = 1;

        // Prefer maximizing the throughput. Same inference runner will be used
        // repeatedly on different inputs.
        SUSTAINED_SPEED = 2;
      }
      optional InferenceUsage usage = 5 [default = SUSTAINED_SPEED];
    }
    // Android only.
    message Nnapi {
      // Directory to store compilation cache. If unspecified, NNAPI will not
      // try caching the compilation.
      optional string cache_dir = 1;
      // Unique token identifying the model. It is the caller's responsibility
      // to ensure there is no clash of the tokens. If unspecified, NNAPI will
      // not try caching the compilation.
      optional string model_token = 2;
    }
    message Xnnpack {
      // Number of threads for XNNPACK delegate. (By default, calculator tries
      // to choose optimal number of threads depending on the device.)
      optional int32 num_threads = 1 [default = -1];
    }

    oneof delegate {
      TfLite tflite = 1;
      Gpu gpu = 2;
      Nnapi nnapi = 3;
      Xnnpack xnnpack = 4;
    }
  }

  // Path to the TF Lite model (ex: /path/to/modelname.tflite).
  // On mobile, this is generally just modelname.tflite.
  optional string model_path = 1;

  // Whether the TF Lite GPU or CPU backend should be used. Effective only when
  // input tensors are on CPU. For input tensors on GPU, GPU backend is always
  // used.
  // DEPRECATED: configure "delegate" instead.
  optional bool use_gpu = 2 [deprecated = true, default = false];

  // Android only. When true, an NNAPI delegate will be used for inference.
  // If NNAPI is not available, then the default CPU delegate will be used
  // automatically.
  // DEPRECATED: configure "delegate" instead.
  optional bool use_nnapi = 3 [deprecated = true, default = false];

  // The number of threads available to the interpreter. Effective only when
  // input tensors are on CPU and 'use_gpu' is false.
  optional int32 cpu_num_thread = 4 [default = -1];

  // TfLite delegate to run inference.
  // If not specified, when any of the input and output is on GPU (i.e, using
  // the TENSORS_GPU tag) TFLite GPU delegate is used (as if "gpu {}" is
  // specified), or otherwise regular TFLite on CPU is used (as if "tflite {}"
  // is specified) except when building with emscripten where xnnpack is used.
  // NOTE: use_gpu/use_nnapi are ignored if specified. (Delegate takes
  // precedence over use_* deprecated options.)
  optional Delegate delegate = 5;
}