Add a custom op resolver for fused batch norm.

PiperOrigin-RevId: 560795170
2023-08-28 13:01:17 -07:00 · 2023-08-28 13:01:17 -07:00 · f56b8a13a3
commit f56b8a13a3
parent 442940cd55
5 changed files with 363 additions and 0 deletions
--- a/mediapipe/tasks/cc/core/BUILD
+++ b/mediapipe/tasks/cc/core/BUILD
@ -80,6 +80,7 @@ cc_library(
        "//mediapipe/tasks/cc/text/custom_ops/sentencepiece:sentencepiece_tokenizer_tflite",
        "//mediapipe/tasks/cc/text/language_detector/custom_ops:kmeans_embedding_lookup",
        "//mediapipe/tasks/cc/text/language_detector/custom_ops:ngram_hash",
        "//mediapipe/tasks/cc/vision/custom_ops:fused_batch_norm",
        "//mediapipe/util/tflite/operations:landmarks_to_transform_matrix",
        "//mediapipe/util/tflite/operations:max_pool_argmax",
        "//mediapipe/util/tflite/operations:max_unpooling",
--- a/mediapipe/tasks/cc/core/mediapipe_builtin_op_resolver.cc
+++ b/mediapipe/tasks/cc/core/mediapipe_builtin_op_resolver.cc
@ -19,6 +19,7 @@ limitations under the License.
 #include "mediapipe/tasks/cc/text/custom_ops/sentencepiece/sentencepiece_tokenizer_tflite.h"
 #include "mediapipe/tasks/cc/text/language_detector/custom_ops/kmeans_embedding_lookup.h"
 #include "mediapipe/tasks/cc/text/language_detector/custom_ops/ngram_hash.h"
 #include "mediapipe/tasks/cc/vision/custom_ops/fused_batch_norm.h"
 #include "mediapipe/util/tflite/operations/landmarks_to_transform_matrix.h"
 #include "mediapipe/util/tflite/operations/max_pool_argmax.h"
 #include "mediapipe/util/tflite/operations/max_unpooling.h"
@ -56,6 +57,8 @@ MediaPipeBuiltinOpResolver::MediaPipeBuiltinOpResolver() {
            mediapipe::tflite_operations::Register_SENTENCEPIECE_TOKENIZER());
  AddCustom("RaggedTensorToTensor",
            mediapipe::tflite_operations::Register_RAGGED_TENSOR_TO_TENSOR());
  AddCustom("FusedBatchNormV3",
            mediapipe::tflite_operations::Register_FusedBatchNorm());
 }
 }  // namespace core
 }  // namespace tasks
--- a/mediapipe/tasks/cc/vision/custom_ops/BUILD
+++ b/mediapipe/tasks/cc/vision/custom_ops/BUILD
@ -0,0 +1,35 @@
 # Copyright 2023 The MediaPipe Authors.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #      http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 package(default_visibility = ["//mediapipe/tasks:internal"])
 licenses(["notice"])
 cc_library(
    name = "fused_batch_norm",
    srcs = ["fused_batch_norm.cc"],
    hdrs = ["fused_batch_norm.h"],
    visibility = [
        "//visibility:public",
    ],
    deps =
        [
            "@eigen_archive//:eigen3",
            "@org_tensorflow//tensorflow/lite:framework",
            "@org_tensorflow//tensorflow/lite/c:common",
            "@org_tensorflow//tensorflow/lite/core/c:private_common",
            "@org_tensorflow//tensorflow/lite/kernels:kernel_util",
            "@org_tensorflow//tensorflow/lite/kernels/internal:tensor",
        ],
 )
--- a/mediapipe/tasks/cc/vision/custom_ops/fused_batch_norm.cc
+++ b/mediapipe/tasks/cc/vision/custom_ops/fused_batch_norm.cc
@ -0,0 +1,296 @@
 /* Copyright 2023 The MediaPipe Authors.
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
    http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 #include "mediapipe/tasks/cc/vision/custom_ops/fused_batch_norm.h"
 #include <stddef.h>
 #include "Eigen/Core"
 #include "tensorflow/lite/c/common.h"
 #include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
 #include "tensorflow/lite/kernels/kernel_util.h"
 #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
 namespace mediapipe::tflite_operations {
 namespace vision::batch_norm {
 namespace {
 using tflite::GetTensorData;
 constexpr int kInputIndex = 0;
 constexpr int kInputScaleIndex = 1;
 constexpr int kInputOffsetIndex = 2;
 constexpr int kInputEstimatedMeanIndex = 3;
 constexpr int kInputEstimatedVarIndex = 4;
 constexpr int kOutputIndex = 0;
 constexpr int kOutputBatchMeanIndex = 1;
 constexpr int kOutputBatchVarIndex = 2;
 constexpr int kOutputSavedMeanIndex = 3;
 constexpr int kOutputSavedVarIndex = 4;
 template <typename T, int NDIMS = 1, typename IndexType = Eigen::DenseIndex>
 struct TTypes {
  // Rank-<NDIMS> tensor of scalar type T.
  typedef Eigen::TensorMap<Eigen::Tensor<T, NDIMS, Eigen::RowMajor, IndexType>,
                           Eigen::Aligned>
      Tensor;
  // Rank-1 tensor (vector) of scalar type T.
  typedef Eigen::TensorMap<Eigen::Tensor<T, 1, Eigen::RowMajor, IndexType>,
                           Eigen::Aligned>
      Vec;
  typedef Eigen::TensorMap<
      Eigen::Tensor<const T, 1, Eigen::RowMajor, IndexType>>
      ConstVec;
 };
 template <typename T, typename U>
 void FusedBarchNorm(TfLiteContext* context, TfLiteTensor* x_input,
                    TfLiteTensor* scale_input, TfLiteTensor* offset_input,
                    TfLiteTensor* running_mean_input,
                    TfLiteTensor* running_variance_input,
                    TfLiteTensor* y_output, TfLiteTensor* running_mean_output,
                    TfLiteTensor* running_var_output,
                    TfLiteTensor* saved_batch_mean_output,
                    TfLiteTensor* saved_batch_var_output,
                    U exponential_avg_factor, U epsilon) {
  const int batches = x_input->dims->data[0];
  const int height = x_input->dims->data[1];
  const int width = x_input->dims->data[2];
  const int depth = x_input->dims->data[3];
  Eigen::array<Eigen::DenseIndex, 4> x_dims = {batches, height, width, depth};
  Eigen::array<Eigen::DenseIndex, 1> depth_dims = {depth};
  const int rest_size = batches * height * width;
  typename TTypes<T, 4>::Tensor x(GetTensorData<T>(x_input), x_dims);
  typename TTypes<U>::ConstVec scale(GetTensorData<U>(scale_input), depth_dims);
  typename TTypes<U>::ConstVec offset(GetTensorData<U>(offset_input),
                                      depth_dims);
  typename TTypes<U>::ConstVec old_mean(GetTensorData<U>(running_mean_input),
                                        depth_dims);
  typename TTypes<U>::ConstVec old_variance(
      GetTensorData<U>(running_variance_input), depth_dims);
  typename TTypes<T, 4>::Tensor y(GetTensorData<T>(y_output), x_dims);
  typename TTypes<U>::Vec new_mean(GetTensorData<U>(running_mean_output),
                                   depth_dims);
  typename TTypes<U>::Vec new_variance(GetTensorData<U>(running_var_output),
                                       depth_dims);
  typename TTypes<U>::Vec saved_batch_mean(
      GetTensorData<U>(saved_batch_mean_output), depth_dims);
  typename TTypes<U>::Vec saved_batch_var(
      GetTensorData<U>(saved_batch_var_output), depth_dims);
  Eigen::DSizes<Eigen::Index, 2> rest_by_depth(rest_size, depth);
  Eigen::DSizes<Eigen::Index, 4> tensor_shape(batches, height, width, depth);
  Eigen::IndexList<Eigen::type2index<1>, Eigen::Index> one_by_depth;
  one_by_depth.set(1, depth);
  Eigen::IndexList<Eigen::type2index<0>> reduce_dims;
  Eigen::IndexList<Eigen::Index, Eigen::type2index<1>> bcast_spec;
  bcast_spec.set(0, rest_size);
  auto x_rest_by_depth = x.reshape(rest_by_depth).template cast<U>();
  const int rest_size_minus_one = (rest_size > 1) ? (rest_size - 1) : 1;
  U rest_size_inv = static_cast<U>(1.0f / static_cast<U>(rest_size));
  // This adjustment is for Bessel's correction
  U rest_size_adjust =
      static_cast<U>(rest_size) / static_cast<U>(rest_size_minus_one);
  Eigen::Tensor<U, 1, Eigen::RowMajor> batch_mean(depth);
  Eigen::Tensor<U, 1, Eigen::RowMajor> batch_variance(depth);
  batch_mean = (x_rest_by_depth.sum(reduce_dims) * rest_size_inv);
  auto x_centered =
      x_rest_by_depth - batch_mean.reshape(one_by_depth).broadcast(bcast_spec);
  batch_variance = x_centered.square().sum(reduce_dims) * rest_size_inv;
  auto scaling_factor = ((batch_variance + epsilon).rsqrt() * scale)
                            .eval()
                            .reshape(one_by_depth)
                            .broadcast(bcast_spec);
  auto x_scaled = x_centered * scaling_factor;
  auto x_shifted =
      (x_scaled + offset.reshape(one_by_depth).broadcast(bcast_spec))
          .template cast<T>();
  y.reshape(rest_by_depth) = x_shifted;
  if (exponential_avg_factor == U(1.0)) {
    saved_batch_var = batch_variance;
    saved_batch_mean = batch_mean;
    new_variance = batch_variance * rest_size_adjust;
    new_mean = batch_mean;
  } else {
    U one_minus_factor = U(1) - exponential_avg_factor;
    saved_batch_var = batch_variance;
    saved_batch_mean = batch_mean;
    new_variance = one_minus_factor * old_variance +
                   (exponential_avg_factor * rest_size_adjust) * batch_variance;
    new_mean =
        one_minus_factor * old_mean + exponential_avg_factor * batch_mean;
  }
 }
 }  // namespace
 // Initializes FusedBatchNorm object from serialized parameters.
 void* Initialize(TfLiteContext* /*context*/, const char* /*buffer*/,
                 size_t /*length*/) {
  return nullptr;
 }
 void Free(TfLiteContext* /*context*/, void* /*buffer*/) {}
 TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
  TF_LITE_ENSURE_EQ(context, tflite::NumInputs(node), 5);
  TF_LITE_ENSURE_EQ(context, tflite::NumOutputs(node), 6);
  TfLiteTensor* output = tflite::GetOutput(context, node, kOutputIndex);
  TF_LITE_ENSURE(context, output != nullptr);
  TfLiteTensor* batch_mean =
      tflite::GetOutput(context, node, kOutputBatchMeanIndex);
  TF_LITE_ENSURE(context, batch_mean != nullptr);
  TfLiteTensor* batch_var =
      tflite::GetOutput(context, node, kOutputBatchVarIndex);
  TF_LITE_ENSURE(context, batch_var != nullptr);
  TfLiteTensor* saved_mean =
      tflite::GetOutput(context, node, kOutputSavedMeanIndex);
  TF_LITE_ENSURE(context, saved_mean != nullptr);
  TfLiteTensor* saved_var =
      tflite::GetOutput(context, node, kOutputSavedVarIndex);
  TF_LITE_ENSURE(context, saved_var != nullptr);
  TfLiteTensor* dummy_reserve_space = tflite::GetOutput(context, node, 5);
  TF_LITE_ENSURE(context, dummy_reserve_space != nullptr);
  const TfLiteTensor* input = tflite::GetInput(context, node, kInputIndex);
  TF_LITE_ENSURE(context, input != nullptr);
  const TfLiteTensor* scale = tflite::GetInput(context, node, kInputScaleIndex);
  TF_LITE_ENSURE(context, scale != nullptr);
  const TfLiteTensor* offset =
      tflite::GetInput(context, node, kInputOffsetIndex);
  TF_LITE_ENSURE(context, offset != nullptr);
  const TfLiteTensor* estimated_mean =
      tflite::GetInput(context, node, kInputEstimatedMeanIndex);
  TF_LITE_ENSURE(context, estimated_mean != nullptr);
  const TfLiteTensor* estimated_var =
      tflite::GetInput(context, node, kInputEstimatedVarIndex);
  TF_LITE_ENSURE(context, estimated_var != nullptr);
  TF_LITE_ENSURE_EQ(context, tflite::NumDimensions(input), 4);
  TF_LITE_ENSURE_EQ(context, tflite::NumDimensions(scale), 1);
  TF_LITE_ENSURE_EQ(context, tflite::NumDimensions(offset), 1);
  TF_LITE_ENSURE_EQ(context, tflite::NumDimensions(estimated_mean), 1);
  TF_LITE_ENSURE_EQ(context, tflite::NumDimensions(estimated_var), 1);
  TF_LITE_ENSURE_EQ(context, input->type, kTfLiteFloat32);
  TF_LITE_ENSURE_EQ(context, output->type, kTfLiteFloat32);
  TF_LITE_ENSURE_EQ(context, scale->type, kTfLiteFloat32);
  TF_LITE_ENSURE_EQ(context, offset->type, kTfLiteFloat32);
  int batches = input->dims->data[0];
  int height = input->dims->data[1];
  int width = input->dims->data[2];
  int depth = input->dims->data[3];
  TfLiteIntArray* output_size = TfLiteIntArrayCreate(4);
  output_size->data[0] = batches;
  output_size->data[1] = height;
  output_size->data[2] = width;
  output_size->data[3] = depth;
  if (context->ResizeTensor(context, output, output_size) != kTfLiteOk) {
    return kTfLiteError;
  }
  TfLiteIntArray* batch_mean_size = TfLiteIntArrayCreate(1);
  batch_mean_size->data[0] = depth;
  if (context->ResizeTensor(context, batch_mean, batch_mean_size) !=
      kTfLiteOk) {
    return kTfLiteError;
  }
  TfLiteIntArray* batch_var_size = TfLiteIntArrayCreate(1);
  batch_var_size->data[0] = depth;
  if (context->ResizeTensor(context, batch_var, batch_var_size) != kTfLiteOk) {
    return kTfLiteError;
  }
  TfLiteIntArray* saved_mean_size = TfLiteIntArrayCreate(1);
  saved_mean_size->data[0] = depth;
  if (context->ResizeTensor(context, saved_mean, saved_mean_size) !=
      kTfLiteOk) {
    return kTfLiteError;
  }
  TfLiteIntArray* saved_var_size = TfLiteIntArrayCreate(1);
  saved_var_size->data[0] = depth;
  if (context->ResizeTensor(context, saved_var, saved_var_size) != kTfLiteOk) {
    return kTfLiteError;
  }
  TfLiteIntArray* dummy_reserve_size = TfLiteIntArrayCreate(1);
  dummy_reserve_size->data[0] = 1;
  if (context->ResizeTensor(context, dummy_reserve_space, dummy_reserve_size) !=
      kTfLiteOk) {
    return kTfLiteError;
  }
  return kTfLiteOk;
 }
 TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
  const TfLiteTensor* input = tflite::GetInput(context, node, kInputIndex);
  TF_LITE_ENSURE(context, input != nullptr);
  const TfLiteTensor* scale = tflite::GetInput(context, node, kInputScaleIndex);
  TF_LITE_ENSURE(context, scale != nullptr);
  const TfLiteTensor* offset =
      tflite::GetInput(context, node, kInputOffsetIndex);
  TF_LITE_ENSURE(context, offset != nullptr);
  const TfLiteTensor* estimated_mean =
      tflite::GetInput(context, node, kInputEstimatedMeanIndex);
  TF_LITE_ENSURE(context, estimated_mean != nullptr);
  const TfLiteTensor* estimated_var =
      tflite::GetInput(context, node, kInputEstimatedVarIndex);
  TF_LITE_ENSURE(context, estimated_var != nullptr);
  TfLiteTensor* output = tflite::GetOutput(context, node, kOutputIndex);
  TF_LITE_ENSURE(context, output != nullptr);
  TfLiteTensor* batch_mean =
      tflite::GetOutput(context, node, kOutputBatchMeanIndex);
  TF_LITE_ENSURE(context, batch_mean != nullptr);
  TfLiteTensor* batch_var =
      tflite::GetOutput(context, node, kOutputBatchVarIndex);
  TF_LITE_ENSURE(context, batch_var != nullptr);
  TfLiteTensor* saved_mean =
      tflite::GetOutput(context, node, kOutputSavedMeanIndex);
  TF_LITE_ENSURE(context, saved_mean != nullptr);
  TfLiteTensor* saved_var =
      tflite::GetOutput(context, node, kOutputSavedVarIndex);
  TF_LITE_ENSURE(context, saved_var != nullptr);
  FusedBarchNorm<float, float>(
      context, const_cast<TfLiteTensor*>(input),
      const_cast<TfLiteTensor*>(scale), const_cast<TfLiteTensor*>(offset),
      const_cast<TfLiteTensor*>(estimated_mean),
      const_cast<TfLiteTensor*>(estimated_var), output, batch_mean, batch_var,
      saved_mean, saved_var, /*exponential_avg_factor=*/0.001f,
      /*epsilon=*/0.001f);
  return kTfLiteOk;
 }
 }  // namespace vision::batch_norm
 TfLiteRegistration* Register_FusedBatchNorm() {
  static TfLiteRegistration r = {
      vision::batch_norm::Initialize, vision::batch_norm::Free,
      vision::batch_norm::Prepare, vision::batch_norm::Eval};
  return &r;
 }
 }  // namespace mediapipe::tflite_operations
--- a/mediapipe/tasks/cc/vision/custom_ops/fused_batch_norm.h
+++ b/mediapipe/tasks/cc/vision/custom_ops/fused_batch_norm.h
@ -0,0 +1,28 @@
 /* Copyright 2023 The MediaPipe Authors.
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
    http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 #ifndef MEDIAPIPE_TASKS_CC_VISION_CUSTOM_OPS_FUSED_BATCH_NORM_H_
 #define MEDIAPIPE_TASKS_CC_VISION_CUSTOM_OPS_FUSED_BATCH_NORM_H_
 #include "tensorflow/lite/core/c/common.h"
 namespace mediapipe::tflite_operations {
 // The FusedBatchNorm op resolver is CPU-friendly only.
 TfLiteRegistration* Register_FusedBatchNorm();
 }  // namespace mediapipe::tflite_operations
 #endif  // MEDIAPIPE_TASKS_CC_VISION_CUSTOM_OPS_FUSED_BATCH_NORM_H_