Project import generated by Copybara.

GitOrigin-RevId: ec25bf2e416c3689477e82946fb69de2e53b9161
2021-06-09 15:43:03 -07:00 · 2021-06-09 15:43:03 -07:00 · b544a314b3
commit b544a314b3
parent b48d72e43f
32 changed files with 561 additions and 234 deletions
--- a/.github/ISSUE_TEMPLATE/00-build-installation-issue.md
+++ b/.github/ISSUE_TEMPLATE/00-build-installation-issue.md
@ -1,3 +1,9 @@
+---
+name: "Build/Installation Issue"
+about: Use this template for build/installation issues
+labels: type:build/install
+
+---
 <em>Please make sure that this is a build/installation issue and also refer to the [troubleshooting](https://google.github.io/mediapipe/getting_started/troubleshooting.html) documentation before raising any issues.</em>

 **System information** (Please provide as much relevant information as possible)
--- a/.github/ISSUE_TEMPLATE/10-solution-issue.md
+++ b/.github/ISSUE_TEMPLATE/10-solution-issue.md
@ -1,3 +1,9 @@
+---
+name: "Solution Issue"
+about: Use this template for assistance with a specific mediapipe solution, such as "Pose" or "Iris", including inference model usage/training, solution-specific calculators, etc.
+labels: type:support
+
+---
 <em>Please make sure that this is a [solution](https://google.github.io/mediapipe/solutions/solutions.html) issue.<em>

 **System information** (Please provide as much relevant information as possible)
--- a/.github/ISSUE_TEMPLATE/20-documentation-issue.md
+++ b/.github/ISSUE_TEMPLATE/20-documentation-issue.md
@ -1,3 +1,9 @@
+---
+name: "Documentation Issue"
+about: Use this template for documentation related issues
+labels: type:docs
+
+---
 Thank you for submitting a MediaPipe documentation issue.
 The MediaPipe docs are open source! To get involved, read the documentation Contributor Guide
 ## URL(s) with the issue:
--- a/mediapipe/opensource_only/ISSUE_TEMPLATE/30-bug-issue.md
+++ b/mediapipe/opensource_only/ISSUE_TEMPLATE/30-bug-issue.md
@ -1,3 +1,9 @@
+---
+name: "Bug Issue"
+about: Use this template for reporting a bug
+labels: type:bug
+
+---
 <em>Please make sure that this is a bug and also refer to the [troubleshooting](https://google.github.io/mediapipe/getting_started/troubleshooting.html), FAQ documentation before raising any issues.</em>

 **System information** (Please provide as much relevant information as possible)
--- a/mediapipe/opensource_only/ISSUE_TEMPLATE/40-feature-request.md
+++ b/mediapipe/opensource_only/ISSUE_TEMPLATE/40-feature-request.md
@ -1,3 +1,9 @@
+---
+name: "Feature Request"
+about: Use this template for raising a feature request
+labels: type:feature
+
+---
 <em>Please make sure that this is a feature request.</em>

 **System information** (Please provide as much relevant information as possible)
--- a/mediapipe/opensource_only/ISSUE_TEMPLATE/50-other-issues.md
+++ b/mediapipe/opensource_only/ISSUE_TEMPLATE/50-other-issues.md
@ -1,3 +1,9 @@
+---
+name: "Other Issue"
+about: Use this template for any other non-support related issues.
+labels: type:others
+
+---
 This template is for miscellaneous issues not covered by the other issue categories

 For questions on how to work with MediaPipe, or support for problems that are not verified bugs in MediaPipe, please go to [StackOverflow](https://stackoverflow.com/questions/tagged/mediapipe) and [Slack](https://mediapipe.page.link/joinslack) communities.
--- a/20
+++ b/20
@ -242,6 +242,20 @@ http_archive(
    url = "https://github.com/opencv/opencv/releases/download/3.2.0/opencv-3.2.0-ios-framework.zip",
 )

+http_archive(
+    name = "stblib",
+    strip_prefix = "stb-b42009b3b9d4ca35bc703f5310eedc74f584be58",
+    sha256 = "13a99ad430e930907f5611325ec384168a958bf7610e63e60e2fd8e7b7379610",
+    urls = ["https://github.com/nothings/stb/archive/b42009b3b9d4ca35bc703f5310eedc74f584be58.tar.gz"],
+    build_file = "@//third_party:stblib.BUILD",
+    patches = [
+        "@//third_party:stb_image_impl.diff"
+    ],
+    patch_args = [
+        "-p1",
+    ],
+)
+
 # You may run setup_android.sh to install Android SDK and NDK.
 android_ndk_repository(
    name = "androidndk",
@ -369,9 +383,9 @@ http_archive(
 )

 # Tensorflow repo should always go after the other external dependencies.
-# 2021-05-27
-_TENSORFLOW_GIT_COMMIT = "d6bfcdb0926173dbb7aa02ceba5aae6250b8aaa6"
-_TENSORFLOW_SHA256 = "ec40e1462239d8783d02f76a43412c8f80bac71ea20e41e1b7729b990aad6923"
+# 2021-06-07
+_TENSORFLOW_GIT_COMMIT = "700533808e6016dc458bb2eeecfca4babfc482ec"
+_TENSORFLOW_SHA256 = "b6edd7f4039bfc19f3e77594ecff558ba620091d0dc48181484b3d9085026126"
 http_archive(
    name = "org_tensorflow",
    urls = [
--- a/docs/framework_concepts/calculators.md
+++ b/docs/framework_concepts/calculators.md
@ -262,7 +262,7 @@ specified, appear as literal values in the `node_options` field of the
    output_stream: "TENSORS:main_model_output"
    node_options: {
      [type.googleapis.com/mediapipe.TfLiteInferenceCalculatorOptions] {
-        model_path: "mediapipe/models/active_speaker_detection/audio_visual_model.tflite"
+        model_path: "mediapipe/models/detection_model.tflite"
      }
    }
  }
@ -272,14 +272,13 @@ The `node_options` field accepts the proto3 syntax.  Alternatively, calculator
 options can be specified in the `options` field using proto2 syntax.

 ```
-  node: {
-    calculator: "IntervalFilterCalculator"
+  node {
+    calculator: "TfLiteInferenceCalculator"
+    input_stream: "TENSORS:main_model_input"
+    output_stream: "TENSORS:main_model_output"
    node_options: {
-      [type.googleapis.com/mediapipe.IntervalFilterCalculatorOptions] {
-        intervals {
-          start_us: 20000
-          end_us: 40000
-        }
+      [type.googleapis.com/mediapipe.TfLiteInferenceCalculatorOptions] {
+        model_path: "mediapipe/models/detection_model.tflite"
      }
    }
  }
@ -287,13 +286,26 @@ options can be specified in the `options` field using proto2 syntax.

 Not all calculators accept calcuator options. In order to accept options, a
 calculator will normally define a new protobuf message type to represent its
-options, such as `IntervalFilterCalculatorOptions`. The calculator will then
+options, such as `PacketClonerCalculatorOptions`. The calculator will then
 read that protobuf message in its `CalculatorBase::Open` method, and possibly
-also in the `CalculatorBase::GetContract` function or its
+also in its `CalculatorBase::GetContract` function or its
 `CalculatorBase::Process` method. Normally, the new protobuf message type will
 be defined as a protobuf schema using a ".proto" file and a
 `mediapipe_proto_library()` build rule.

+```
+  mediapipe_proto_library(
+      name = "packet_cloner_calculator_proto",
+      srcs = ["packet_cloner_calculator.proto"],
+      visibility = ["//visibility:public"],
+      deps = [
+          "//mediapipe/framework:calculator_options_proto",
+          "//mediapipe/framework:calculator_proto",
+      ],
+  )
+```
+
+
 ## Example calculator

 This section discusses the implementation of `PacketClonerCalculator`, which
--- a/docs/solutions/selfie_segmentation.md
+++ b/docs/solutions/selfie_segmentation.md
@ -284,6 +284,6 @@ on how to build MediaPipe examples.
 *   Google AI Blog:
    [Background Features in Google Meet, Powered by Web ML](https://ai.googleblog.com/2020/10/background-features-in-google-meet.html)
 *   [ML Kit Selfie Segmentation API](https://developers.google.com/ml-kit/vision/selfie-segmentation)
-*   [Models and model cards](./models.md#selfie_segmentation)
+*   [Models and model cards](./models.md#selfie-segmentation)
 *   [Web demo](https://code.mediapipe.dev/codepen/selfie_segmentation)
 *   [Python Colab](https://mediapipe.page.link/selfie_segmentation_py_colab)
--- a/mediapipe/calculators/core/end_loop_calculator.cc
+++ b/mediapipe/calculators/core/end_loop_calculator.cc
@ -28,6 +28,10 @@ typedef EndLoopCalculator<std::vector<::mediapipe::NormalizedRect>>
    EndLoopNormalizedRectCalculator;
 REGISTER_CALCULATOR(EndLoopNormalizedRectCalculator);

+typedef EndLoopCalculator<std::vector<::mediapipe::LandmarkList>>
+    EndLoopLandmarkListVectorCalculator;
+REGISTER_CALCULATOR(EndLoopLandmarkListVectorCalculator);
+
 typedef EndLoopCalculator<std::vector<::mediapipe::NormalizedLandmarkList>>
    EndLoopNormalizedLandmarkListVectorCalculator;
 REGISTER_CALCULATOR(EndLoopNormalizedLandmarkListVectorCalculator);
--- a/mediapipe/calculators/tensor/inference_calculator_cpu.cc
+++ b/mediapipe/calculators/tensor/inference_calculator_cpu.cc
@ -35,20 +35,28 @@ namespace api2 {

 namespace {

+int GetXnnpackDefaultNumThreads() {
+#if defined(MEDIAPIPE_ANDROID) || defined(MEDIAPIPE_IOS) || \
+    defined(__EMSCRIPTEN_PTHREADS__)
+  constexpr int kMinNumThreadsByDefault = 1;
+  constexpr int kMaxNumThreadsByDefault = 4;
+  return std::clamp(NumCPUCores() / 2, kMinNumThreadsByDefault,
+                    kMaxNumThreadsByDefault);
+#else
+  return 1;
+#endif  // MEDIAPIPE_ANDROID || MEDIAPIPE_IOS || __EMSCRIPTEN_PTHREADS__
+}
+
 // Returns number of threads to configure XNNPACK delegate with.
-// (Equal to user provided value if specified.  Otherwise, it returns number of
-// high cores (hard-coded to 1 for Emscripten without Threads extension))
+// Returns user provided value if specified. Otherwise, tries to choose optimal
+// number of threads depending on the device.
 int GetXnnpackNumThreads(const mediapipe::InferenceCalculatorOptions& opts) {
  static constexpr int kDefaultNumThreads = -1;
  if (opts.has_delegate() && opts.delegate().has_xnnpack() &&
      opts.delegate().xnnpack().num_threads() != kDefaultNumThreads) {
    return opts.delegate().xnnpack().num_threads();
  }
-#if !defined(__EMSCRIPTEN__) || defined(__EMSCRIPTEN_PTHREADS__)
-  return InferHigherCoreIds().size();
-#else
-  return 1;
-#endif  // !__EMSCRIPTEN__ || __EMSCRIPTEN_PTHREADS__
+  return GetXnnpackDefaultNumThreads();
 }

 }  // namespace
--- a/mediapipe/calculators/tensor/inference_calculator_gl.cc
+++ b/mediapipe/calculators/tensor/inference_calculator_gl.cc
@ -269,8 +269,8 @@ absl::Status InferenceCalculatorGlImpl::InitTFLiteGPURunner(
      break;
    }
  }
-  MP_RETURN_IF_ERROR(
-      tflite_gpu_runner_->InitializeWithModel(model, op_resolver));
+  MP_RETURN_IF_ERROR(tflite_gpu_runner_->InitializeWithModel(
+      model, op_resolver, /*allow_quant_ops=*/true));

  // Create and bind OpenGL buffers for outputs.
  // The buffers are created once and their ids are passed to calculator outputs
--- a/mediapipe/calculators/tensor/inference_calculator_metal.cc
+++ b/mediapipe/calculators/tensor/inference_calculator_metal.cc
@ -226,6 +226,10 @@ absl::Status InferenceCalculatorMetalImpl::LoadDelegate(CalculatorContext* cc) {

  // Configure and create the delegate.
  TFLGpuDelegateOptions options;
+  // `enable_quantization` enables the run of sparse models i.e. the models with
+  // DENSIFY op preceding DEQUINTIZE op. Both ops get removed from the execution
+  // graph after the tensor of the weights is read.
+  options.enable_quantization = true;
  options.allow_precision_loss = allow_precision_loss_;
  options.wait_type = TFLGpuDelegateWaitType::TFLGpuDelegateWaitTypeDoNotWait;
  delegate_ =
--- a/mediapipe/calculators/tensor/tensors_to_segmentation_calculator.cc
+++ b/mediapipe/calculators/tensor/tensors_to_segmentation_calculator.cc
@ -763,9 +763,13 @@ out vec4 fragColor;
 #endif  // defined(GL_ES);

 void main() {
-
-  vec4 input_value = texture2D(input_texture, sample_coordinate);
-  vec2 gid = sample_coordinate;
+#ifdef FLIP_Y_COORD
+  float y_coord = 1.0 - sample_coordinate.y;
+#else
+  float y_coord = sample_coordinate.y;
+#endif  // defined(FLIP_Y_COORD)
+  vec2 adjusted_coordinate = vec2(sample_coordinate.x, y_coord);
+  vec4 input_value = texture2D(input_texture, adjusted_coordinate);

  // Run activation function.
  // One and only one of FN_SOFTMAX,FN_SIGMOID,FN_NONE will be defined.
@ -787,13 +791,6 @@ void main() {
  float new_mask_value = input_value.r;
 #endif // FN_NONE

-#ifdef FLIP_Y_COORD
-  float y_coord = 1.0 - gid.y;
-#else
-  float y_coord = gid.y;
-#endif  // defined(FLIP_Y_COORD)
-  vec2 output_coordinate = vec2(gid.x, y_coord);
-
  vec4 out_value = vec4(new_mask_value, 0.0, 0.0, new_mask_value);
  fragColor = out_value;
 })";
--- a/mediapipe/calculators/tflite/tflite_inference_calculator.cc
+++ b/mediapipe/calculators/tflite/tflite_inference_calculator.cc
@ -128,9 +128,23 @@ struct GPUData {
 }  // namespace
 #endif  // MEDIAPIPE_TFLITE_GPU_SUPPORTED

+namespace {
+
+int GetXnnpackDefaultNumThreads() {
+#if defined(MEDIAPIPE_ANDROID) || defined(MEDIAPIPE_IOS) || \
+    defined(__EMSCRIPTEN_PTHREADS__)
+  constexpr int kMinNumThreadsByDefault = 1;
+  constexpr int kMaxNumThreadsByDefault = 4;
+  return std::clamp(NumCPUCores() / 2, kMinNumThreadsByDefault,
+                    kMaxNumThreadsByDefault);
+#else
+  return 1;
+#endif  // MEDIAPIPE_ANDROID || MEDIAPIPE_IOS || __EMSCRIPTEN_PTHREADS__
+}
+
 // Returns number of threads to configure XNNPACK delegate with.
-// (Equal to user provided value if specified.  Otherwise, it returns number of
-// high cores (hard-coded to 1 for Emscripten without Threads extension))
+// Returns user provided value if specified. Otherwise, tries to choose optimal
+// number of threads depending on the device.
 int GetXnnpackNumThreads(
    const mediapipe::TfLiteInferenceCalculatorOptions& opts) {
  static constexpr int kDefaultNumThreads = -1;
@ -138,13 +152,11 @@ int GetXnnpackNumThreads(
      opts.delegate().xnnpack().num_threads() != kDefaultNumThreads) {
    return opts.delegate().xnnpack().num_threads();
  }
-#if !defined(__EMSCRIPTEN__) || defined(__EMSCRIPTEN_PTHREADS__)
-  return InferHigherCoreIds().size();
-#else
-  return 1;
-#endif  // !__EMSCRIPTEN__ || __EMSCRIPTEN_PTHREADS__
+  return GetXnnpackDefaultNumThreads();
 }

+}  // namespace
+
 // Calculator Header Section

 // Runs inference on the provided input TFLite tensors and TFLite model.
@ -737,8 +749,8 @@ absl::Status TfLiteInferenceCalculator::InitTFLiteGPURunner(
      break;
    }
  }
-  MP_RETURN_IF_ERROR(
-      tflite_gpu_runner_->InitializeWithModel(model, *op_resolver_ptr));
+  MP_RETURN_IF_ERROR(tflite_gpu_runner_->InitializeWithModel(
+      model, *op_resolver_ptr, /*allow_quant_ops=*/true));

  // Allocate interpreter memory for cpu output.
  if (!gpu_output_) {
@ -969,6 +981,10 @@ absl::Status TfLiteInferenceCalculator::LoadDelegate(CalculatorContext* cc) {
  const int kHalfSize = 2;  // sizeof(half)
  // Configure and create the delegate.
  TFLGpuDelegateOptions options;
+  // `enable_quantization` enables the run of sparse models i.e. the models with
+  // DENSIFY op preceding DEQUINTIZE op. Both ops get removed from the execution
+  // graph after the tensor of the weights is read.
+  options.enable_quantization = true;
  options.allow_precision_loss = allow_precision_loss_;
  options.wait_type = TFLGpuDelegateWaitType::TFLGpuDelegateWaitTypeActive;
  if (!delegate_)
--- a/mediapipe/calculators/util/filter_collection_calculator.cc
+++ b/mediapipe/calculators/util/filter_collection_calculator.cc
@ -32,11 +32,15 @@ typedef FilterCollectionCalculator<std::vector<::mediapipe::NormalizedRect>>
    FilterNormalizedRectCollectionCalculator;
 REGISTER_CALCULATOR(FilterNormalizedRectCollectionCalculator);

-typedef FilterCollectionCalculator<
-    std::vector<::mediapipe::NormalizedLandmarkList>>
+typedef FilterCollectionCalculator<std::vector<::mediapipe::LandmarkList>>
    FilterLandmarkListCollectionCalculator;
 REGISTER_CALCULATOR(FilterLandmarkListCollectionCalculator);

+typedef FilterCollectionCalculator<
+    std::vector<::mediapipe::NormalizedLandmarkList>>
+    FilterNormalizedLandmarkListCollectionCalculator;
+REGISTER_CALCULATOR(FilterNormalizedLandmarkListCollectionCalculator);
+
 typedef FilterCollectionCalculator<std::vector<::mediapipe::ClassificationList>>
    FilterClassificationListCollectionCalculator;
 REGISTER_CALCULATOR(FilterClassificationListCollectionCalculator);
--- a/mediapipe/examples/desktop/autoflip/calculators/content_zooming_calculator.cc
+++ b/mediapipe/examples/desktop/autoflip/calculators/content_zooming_calculator.cc
@ -37,6 +37,13 @@ constexpr char kFirstCropRect[] = "FIRST_CROP_RECT";
 // (configured through option us_to_first_rect). If provided, a non-zero integer
 // will allow the animated zoom to be used when the first detections arrive.
 constexpr char kAnimateZoom[] = "ANIMATE_ZOOM";
+// Can be used to control the maximum zoom; note that it is re-evaluated only
+// upon change of input resolution. A value of 100 disables zooming and is the
+// smallest allowed value. A value of 200 allows zooming such that a pixel of
+// the input may cover up to four times its original area. Note that
+// max_zoom_value_deg from options is always respected; MAX_ZOOM_PCT can only be
+// used to limit zooming further.
+constexpr char kMaxZoomFactorPercent[] = "MAX_ZOOM_FACTOR_PCT";
 // Field-of-view (degrees) of the camera's x-axis (width).
 // TODO: Parameterize FOV based on camera specs.
 constexpr float kFieldOfView = 60;
@ -75,11 +82,16 @@ class ContentZoomingCalculator : public CalculatorBase {
                              int frame_height);
  // Saves state to a state-cache, if provided.
  absl::Status SaveState(mediapipe::CalculatorContext* cc) const;
+  // Returns the factor for maximum zoom based on options and the
+  // kMaxZoomFactorPercent input (if present).
+  double GetMaxZoomFactor(mediapipe::CalculatorContext* cc) const;
  // Initializes the calculator for the given frame size, creating path solvers
  // and resetting history like last measured values.
-  absl::Status InitializeState(int frame_width, int frame_height);
+  absl::Status InitializeState(mediapipe::CalculatorContext* cc,
+                               int frame_width, int frame_height);
  // Adjusts state to work with an updated frame size.
-  absl::Status UpdateForResolutionChange(int frame_width, int frame_height);
+  absl::Status UpdateForResolutionChange(mediapipe::CalculatorContext* cc,
+                                         int frame_width, int frame_height);
  // Returns true if we are animating to the first rect.
  bool IsAnimatingToFirstRect(const Timestamp& timestamp) const;
  // Builds the output rectangle when animating to the first rect.
@ -136,6 +148,9 @@ absl::Status ContentZoomingCalculator::GetContract(
    return mediapipe::UnknownErrorBuilder(MEDIAPIPE_LOC)
           << "Input VIDEO or VIDEO_SIZE must be provided.";
  }
+  if (cc->Inputs().HasTag(kMaxZoomFactorPercent)) {
+    cc->Inputs().Tag(kMaxZoomFactorPercent).Set<int>();
+  }
  if (cc->Inputs().HasTag(kSalientRegions)) {
    cc->Inputs().Tag(kSalientRegions).Set<DetectionSet>();
  }
@ -330,7 +345,7 @@ absl::Status ContentZoomingCalculator::MaybeLoadState(
          ? cc->InputSidePackets().Tag(kStateCache).Get<StateCacheType*>()
          : nullptr;
  if (!state_cache || !state_cache->has_value()) {
-    return InitializeState(frame_width, frame_height);
+    return InitializeState(cc, frame_width, frame_height);
  }

  const ContentZoomingCalculatorState& state = state_cache->value();
@ -350,7 +365,7 @@ absl::Status ContentZoomingCalculator::MaybeLoadState(
  last_measured_y_offset_ = state.last_measured_y_offset;
  MP_RETURN_IF_ERROR(UpdateAspectAndMax());

-  return UpdateForResolutionChange(frame_width, frame_height);
+  return UpdateForResolutionChange(cc, frame_width, frame_height);
 }

 absl::Status ContentZoomingCalculator::SaveState(
@ -379,8 +394,20 @@ absl::Status ContentZoomingCalculator::SaveState(
  return absl::OkStatus();
 }

-absl::Status ContentZoomingCalculator::InitializeState(int frame_width,
-                                                       int frame_height) {
+double ContentZoomingCalculator::GetMaxZoomFactor(
+    mediapipe::CalculatorContext* cc) const {
+  double max_zoom_value =
+      options_.max_zoom_value_deg() / static_cast<double>(kFieldOfView);
+  if (cc->Inputs().HasTag(kMaxZoomFactorPercent)) {
+    const double factor = std::max(
+        1.0, cc->Inputs().Tag(kMaxZoomFactorPercent).Get<int>() / 100.0);
+    max_zoom_value = std::max(max_zoom_value, 1.0 / factor);
+  }
+  return max_zoom_value;
+}
+
+absl::Status ContentZoomingCalculator::InitializeState(
+    mediapipe::CalculatorContext* cc, int frame_width, int frame_height) {
  frame_width_ = frame_width;
  frame_height_ = frame_height;
  path_solver_pan_ = std::make_unique<KinematicPathSolver>(
@ -390,8 +417,7 @@ absl::Status ContentZoomingCalculator::InitializeState(int frame_width,
      options_.kinematic_options_tilt(), 0, frame_height_,
      static_cast<float>(frame_height_) / kFieldOfView);
  MP_RETURN_IF_ERROR(UpdateAspectAndMax());
-  int min_zoom_size = frame_height_ * (options_.max_zoom_value_deg() /
-                                       static_cast<double>(kFieldOfView));
+  int min_zoom_size = frame_height_ * GetMaxZoomFactor(cc);
  path_solver_zoom_ = std::make_unique<KinematicPathSolver>(
      options_.kinematic_options_zoom(), min_zoom_size,
      max_frame_value_ * frame_height_,
@ -405,7 +431,7 @@ absl::Status ContentZoomingCalculator::InitializeState(int frame_width,
 }

 absl::Status ContentZoomingCalculator::UpdateForResolutionChange(
-    int frame_width, int frame_height) {
+    mediapipe::CalculatorContext* cc, int frame_width, int frame_height) {
  // Update state for change in input resolution.
  if (frame_width_ != frame_width || frame_height_ != frame_height) {
    double width_scale = frame_width / static_cast<double>(frame_width_);
@ -419,8 +445,7 @@ absl::Status ContentZoomingCalculator::UpdateForResolutionChange(
    MP_RETURN_IF_ERROR(path_solver_pan_->UpdateMinMaxLocation(0, frame_width_));
    MP_RETURN_IF_ERROR(
        path_solver_tilt_->UpdateMinMaxLocation(0, frame_height_));
-    int min_zoom_size = frame_height_ * (options_.max_zoom_value_deg() /
-                                         static_cast<double>(kFieldOfView));
+    int min_zoom_size = frame_height_ * GetMaxZoomFactor(cc);
    MP_RETURN_IF_ERROR(path_solver_zoom_->UpdateMinMaxLocation(
        min_zoom_size, max_frame_value_ * frame_height_));
    MP_RETURN_IF_ERROR(path_solver_zoom_->UpdatePixelsPerDegree(
@ -493,7 +518,8 @@ absl::Status ContentZoomingCalculator::Process(
    MP_RETURN_IF_ERROR(MaybeLoadState(cc, frame_width, frame_height));
    initialized_ = !options_.is_stateless();
  } else {
-    MP_RETURN_IF_ERROR(UpdateForResolutionChange(frame_width, frame_height));
+    MP_RETURN_IF_ERROR(
+        UpdateForResolutionChange(cc, frame_width, frame_height));
  }

  bool only_required_found = false;
--- a/mediapipe/examples/desktop/autoflip/calculators/content_zooming_calculator_test.cc
+++ b/mediapipe/examples/desktop/autoflip/calculators/content_zooming_calculator_test.cc
@ -150,6 +150,29 @@ const char kConfigE[] = R"(
    }
    )";

+const char kConfigF[] = R"(
+    calculator: "ContentZoomingCalculator"
+    input_stream: "VIDEO_SIZE:size"
+    input_stream: "DETECTIONS:detections"
+    input_stream: "MAX_ZOOM_FACTOR_PCT:max_zoom_factor_pct"
+    output_stream: "CROP_RECT:rect"
+    output_stream: "FIRST_CROP_RECT:first_rect"
+    options: {
+      [mediapipe.autoflip.ContentZoomingCalculatorOptions.ext]: {
+        max_zoom_value_deg: 0
+        kinematic_options_zoom {
+          min_motion_to_reframe: 1.2
+        }
+        kinematic_options_tilt {
+          min_motion_to_reframe: 1.2
+        }
+        kinematic_options_pan {
+          min_motion_to_reframe: 1.2
+        }
+      }
+    }
+    )";
+
 void CheckBorder(const StaticFeatures& static_features, int width, int height,
                 int top_border, int bottom_border) {
  ASSERT_EQ(2, static_features.border().size());
@ -170,6 +193,7 @@ void CheckBorder(const StaticFeatures& static_features, int width, int height,

 struct AddDetectionFlags {
  std::optional<bool> animated_zoom;
+  std::optional<int> max_zoom_factor_percent;
 };

 void AddDetectionFrameSize(const cv::Rect_<float>& position, const int64 time,
@ -211,6 +235,14 @@ void AddDetectionFrameSize(const cv::Rect_<float>& position, const int64 time,
            mediapipe::MakePacket<bool>(flags.animated_zoom.value())
                .At(Timestamp(time)));
  }
+
+  if (flags.max_zoom_factor_percent.has_value()) {
+    runner->MutableInputs()
+        ->Tag("MAX_ZOOM_FACTOR_PCT")
+        .packets.push_back(
+            mediapipe::MakePacket<int>(flags.max_zoom_factor_percent.value())
+                .At(Timestamp(time)));
+  }
 }

 void AddDetection(const cv::Rect_<float>& position, const int64 time,
@ -259,6 +291,7 @@ TEST(ContentZoomingCalculatorTest, ZoomTest) {
  CheckBorder(static_features, 1000, 1000, 495, 395);
 }

+#if 0
 TEST(ContentZoomingCalculatorTest, ZoomTestFullPTZ) {
  auto runner = ::absl::make_unique<CalculatorRunner>(
      ParseTextProtoOrDie<CalculatorGraphConfig::Node>(kConfigD));
@ -694,8 +727,8 @@ TEST(ContentZoomingCalculatorTest, ResolutionChangeZoomingWithCache) {
    auto runner = ::absl::make_unique<CalculatorRunner>(config);
    runner->MutableSidePackets()->Tag("STATE_CACHE") = MakePacket<
        mediapipe::autoflip::ContentZoomingCalculatorStateCacheType*>(&cache);
-    AddDetectionFrameSize(cv::Rect_<float>(.4, .4, .2, .2), 1000000, 1000, 1000,
-                          runner.get());
+    AddDetectionFrameSize(cv::Rect_<float>(.4, .4, .2, .2), 1000000, 1000,
+                          1000, runner.get());
    AddDetectionFrameSize(cv::Rect_<float>(.4, .4, .2, .2), 2000000, 500, 500,
                          runner.get());
    MP_ASSERT_OK(runner->Run());
@ -719,6 +752,36 @@ TEST(ContentZoomingCalculatorTest, MaxZoomValue) {
  CheckCropRect(500, 500, 916, 916, 0,
                runner->Outputs().Tag("CROP_RECT").packets);
 }
+#endif
+
+TEST(ContentZoomingCalculatorTest, MaxZoomValueOverride) {
+  auto config = ParseTextProtoOrDie<CalculatorGraphConfig::Node>(kConfigF);
+  auto* options = config.mutable_options()->MutableExtension(
+      ContentZoomingCalculatorOptions::ext);
+  options->set_max_zoom_value_deg(30);
+  auto runner = ::absl::make_unique<CalculatorRunner>(config);
+  AddDetectionFrameSize(cv::Rect_<float>(.4, .4, .2, .2), 0, 640, 480,
+                        runner.get(), {.max_zoom_factor_percent = 133});
+  // Change resolution and allow more zoom, and give time to use the new limit
+  AddDetectionFrameSize(cv::Rect_<float>(.4, .4, .2, .2), 1000000, 1280, 720,
+                        runner.get(), {.max_zoom_factor_percent = 166});
+  AddDetectionFrameSize(cv::Rect_<float>(.4, .4, .2, .2), 2000000, 1280, 720,
+                        runner.get(), {.max_zoom_factor_percent = 166});
+  // Switch back to a smaller resolution with a more limited zoom
+  AddDetectionFrameSize(cv::Rect_<float>(.4, .4, .2, .2), 3000000, 640, 480,
+                        runner.get(), {.max_zoom_factor_percent = 133});
+  MP_ASSERT_OK(runner->Run());
+  // Max. 133% zoomed in means min. (100/133) ~ 75% of height left: ~360
+  // Max. 166% zoomed in means min. (100/166) ~ 60% of height left: ~430
+  CheckCropRect(320, 240, 480, 360, 0,
+                runner->Outputs().Tag("CROP_RECT").packets);
+  CheckCropRect(640, 360, 769, 433, 2,
+                runner->Outputs().Tag("CROP_RECT").packets);
+  CheckCropRect(320, 240, 480, 360, 3,
+                runner->Outputs().Tag("CROP_RECT").packets);
+}
+
+#if 0
 TEST(ContentZoomingCalculatorTest, MaxZoomOutValue) {
  auto config = ParseTextProtoOrDie<CalculatorGraphConfig::Node>(kConfigD);
  auto* options = config.mutable_options()->MutableExtension(
@ -906,6 +969,7 @@ TEST(ContentZoomingCalculatorTest, ProvidesConstantFirstRect) {
    EXPECT_EQ(first_rect.height(), rect.height());
  }
 }
+#endif

 }  // namespace
 }  // namespace autoflip
--- a/mediapipe/examples/desktop/autoflip/quality/kinematic_path_solver.cc
+++ b/mediapipe/examples/desktop/autoflip/quality/kinematic_path_solver.cc
@ -154,10 +154,18 @@ absl::Status KinematicPathSolver::AddObservation(int position,

  // Time and position updates.
  double delta_t = (time_us - current_time_) / 1000000.0;
+  // Time since last state/prediction update, smoothed by
+  // mean_period_update_rate.
+  if (mean_delta_t_ < 0) {
+    mean_delta_t_ = delta_t;
+  } else {
+    mean_delta_t_ = mean_delta_t_ * (1 - options_.mean_period_update_rate()) +
+                    delta_t * options_.mean_period_update_rate();
+  }

  // Observed velocity and then weighted update of this velocity.
  double observed_velocity = delta_degs / delta_t;
-  double update_rate = std::min(delta_t / options_.update_rate_seconds(),
+  double update_rate = std::min(mean_delta_t_ / options_.update_rate_seconds(),
                                options_.max_update_rate());
  double updated_velocity = current_velocity_deg_per_s_ * (1 - update_rate) +
                            observed_velocity * update_rate;
@ -174,16 +182,6 @@ absl::Status KinematicPathSolver::UpdatePrediction(const int64 time_us) {
  RET_CHECK(current_time_ < time_us)
      << "Prediction time added before a prior observation or prediction.";

-  // Time since last state/prediction update, smoothed by
-  // mean_period_update_rate.
-  double delta_t = (time_us - current_time_) / 1000000.0;
-  if (mean_delta_t_ < 0) {
-    mean_delta_t_ = delta_t;
-  } else {
-    mean_delta_t_ = mean_delta_t_ * (1 - options_.mean_period_update_rate()) +
-                    delta_t * options_.mean_period_update_rate();
-  }
-
  // Position update limited by min/max.
  double update_position_px =
      current_position_px_ +
--- a/mediapipe/examples/desktop/autoflip/quality/kinematic_path_solver_test.cc
+++ b/mediapipe/examples/desktop/autoflip/quality/kinematic_path_solver_test.cc
@ -337,6 +337,40 @@ TEST(KinematicPathSolverTest, PassDegPerPxChange) {
  EXPECT_EQ(state, 516);
 }

+TEST(KinematicPathSolverTest, NoTimestampSmoothing) {
+  KinematicOptions options;
+  options.set_min_motion_to_reframe(1.0);
+  options.set_update_rate(1.0);
+  options.set_max_velocity(6);
+  options.set_mean_period_update_rate(1.0);
+  KinematicPathSolver solver(options, 0, 1000, 1000.0 / kWidthFieldOfView);
+  int state;
+  MP_ASSERT_OK(solver.AddObservation(500, 0));
+  MP_ASSERT_OK(solver.AddObservation(1000, 1000000));
+  MP_ASSERT_OK(solver.GetState(&state));
+  EXPECT_EQ(state, 600);
+  MP_ASSERT_OK(solver.AddObservation(1000, 2200000));
+  MP_ASSERT_OK(solver.GetState(&state));
+  EXPECT_EQ(state, 720);
+}
+
+TEST(KinematicPathSolverTest, TimestampSmoothing) {
+  KinematicOptions options;
+  options.set_min_motion_to_reframe(1.0);
+  options.set_update_rate(1.0);
+  options.set_max_velocity(6);
+  options.set_mean_period_update_rate(0.05);
+  KinematicPathSolver solver(options, 0, 1000, 1000.0 / kWidthFieldOfView);
+  int state;
+  MP_ASSERT_OK(solver.AddObservation(500, 0));
+  MP_ASSERT_OK(solver.AddObservation(1000, 1000000));
+  MP_ASSERT_OK(solver.GetState(&state));
+  EXPECT_EQ(state, 600);
+  MP_ASSERT_OK(solver.AddObservation(1000, 2200000));
+  MP_ASSERT_OK(solver.GetState(&state));
+  EXPECT_EQ(state, 701);
+}
+
 }  // namespace
 }  // namespace autoflip
 }  // namespace mediapipe
--- a/mediapipe/framework/deps/mathutil_unittest.cc
+++ b/mediapipe/framework/deps/mathutil_unittest.cc
@ -262,7 +262,7 @@ TEST(MathUtil, IntRound) {

  // A double-precision number has a 53-bit mantissa (52 fraction bits),
  // so the following value can be represented exactly.
-  int64 value64 = GG_ULONGLONG(0x1234567890abcd00);
+  int64 value64 = static_cast<int64_t>(0x1234567890abcd00);
  EXPECT_EQ(mediapipe::MathUtil::Round<int64>(static_cast<double>(value64)),
            value64);
 }
@ -369,7 +369,7 @@ class SafeCastTester {
    if (sizeof(FloatIn) >= 64) {
      // A double-precision number has a 53-bit mantissa (52 fraction bits),
      // so the following value can be represented exactly by a double.
-      int64 value64 = GG_ULONGLONG(0x1234567890abcd00);
+      int64 value64 = static_cast<int64_t>(0x1234567890abcd00);
      const IntOut expected =
          (sizeof(IntOut) >= 64) ? static_cast<IntOut>(value64) : imax;
      EXPECT_EQ(
@ -565,7 +565,7 @@ TEST(MathUtil, SafeCast) {
      -12345);
  EXPECT_EQ(mediapipe::MathUtil::SafeCast<int>(1E47), 2147483647);
  EXPECT_EQ(mediapipe::MathUtil::SafeCast<int>(-1E47),
-            GG_LONGLONG(-2147483648));
+            static_cast<int64_t>(-2147483648));
 }

 template <class FloatIn, class IntOut>
@ -682,7 +682,7 @@ class SafeRoundTester {
    if (sizeof(FloatIn) >= 64) {
      // A double-precision number has a 53-bit mantissa (52 fraction bits),
      // so the following value can be represented exactly by a double.
-      int64 value64 = GG_ULONGLONG(0x1234567890abcd00);
+      int64 value64 = static_cast<int64_t>(0x1234567890abcd00);
      const IntOut expected =
          (sizeof(IntOut) >= 64) ? static_cast<IntOut>(value64) : imax;
      EXPECT_EQ(
@ -873,7 +873,7 @@ TEST(MathUtil, SafeRound) {
      -12345);
  EXPECT_EQ(mediapipe::MathUtil::SafeRound<int>(1E47), 2147483647);
  EXPECT_EQ(mediapipe::MathUtil::SafeRound<int>(-1E47),
-            GG_LONGLONG(-2147483648));
+            static_cast<int64_t>(-2147483648));
 }

 }  // namespace
--- a/mediapipe/framework/mediapipe_cc_test.bzl
+++ b/mediapipe/framework/mediapipe_cc_test.bzl
@ -8,6 +8,7 @@ def mediapipe_cc_test(
        data = [],
        deps = [],
        size = None,
+        tags = [],
        timeout = None,
        additional_deps = DEFAULT_ADDITIONAL_TEST_DEPS,
        **kwargs):
--- a/mediapipe/framework/tool/BUILD
+++ b/mediapipe/framework/tool/BUILD
@ -641,14 +641,20 @@ cc_library(
        "//mediapipe/framework:calculator_cc_proto",
        "//mediapipe/framework/deps:file_path",
        "//mediapipe/framework/deps:no_destructor",
+        "//mediapipe/framework/formats:image_format_cc_proto",
        "//mediapipe/framework/formats:image_frame",
        "//mediapipe/framework/port:advanced_proto",
        "//mediapipe/framework/port:file_helpers",
-        "//mediapipe/framework/port:gtest",
        "//mediapipe/framework/port:logging",
+        "//mediapipe/framework/port:ret_check",
+        "//mediapipe/framework/port:status",
        "@com_google_absl//absl/container:flat_hash_set",
        "@com_google_absl//absl/memory",
+        "@com_google_absl//absl/status",
+        "@com_google_absl//absl/status:statusor",
        "@com_google_absl//absl/strings",
+        "@stblib//:stb_image",
+        "@stblib//:stb_image_write",
    ],
 )

--- a/mediapipe/framework/tool/test_util.cc
+++ b/mediapipe/framework/tool/test_util.cc
@ -18,18 +18,27 @@
 #include <unistd.h>

 #include <memory>
+#include <string>

 #include "absl/container/flat_hash_set.h"
 #include "absl/memory/memory.h"
+#include "absl/status/status.h"
 #include "absl/strings/match.h"
+#include "absl/strings/str_cat.h"
+#include "absl/strings/str_join.h"
 #include "absl/strings/substitute.h"
 #include "mediapipe/framework/calculator.pb.h"
 #include "mediapipe/framework/deps/file_path.h"
 #include "mediapipe/framework/deps/no_destructor.h"
+#include "mediapipe/framework/formats/image_format.pb.h"
 #include "mediapipe/framework/port/advanced_proto_inc.h"
 #include "mediapipe/framework/port/file_helpers.h"
 #include "mediapipe/framework/port/logging.h"
 #include "mediapipe/framework/port/proto_ns.h"
+#include "mediapipe/framework/port/ret_check.h"
+#include "mediapipe/framework/port/status_macros.h"
+#include "stb_image.h"
+#include "stb_image_write.h"

 namespace mediapipe {

@ -43,15 +52,14 @@ bool EqualWithTolerance(const T value1, const T value2, const T max_diff) {
 }

 template <typename T>
-bool CompareDiff(const ImageFrame& image1, const ImageFrame& image2,
+absl::Status CompareDiff(const ImageFrame& image1, const ImageFrame& image2,
                         const T max_color_diff, const T max_alpha_diff,
-                 const float max_avg_diff, std::string* error_message) {
+                         const float max_avg_diff,
+                         std::unique_ptr<ImageFrame>& diff_image) {
  // Verify image byte depth matches expected byte depth.
  CHECK_EQ(sizeof(T), image1.ByteDepth());
  CHECK_EQ(sizeof(T), image2.ByteDepth());

-  const bool return_error = error_message != nullptr;
-
  const int width = image1.Width();
  const int height = image1.Height();
  const int channels1 = image1.NumberOfChannels();
@ -68,57 +76,64 @@ bool CompareDiff(const ImageFrame& image1, const ImageFrame& image2,
  const int width_padding2 =
      image2.WidthStep() / image2.ByteDepth() - width * channels2;

+  diff_image = std::make_unique<ImageFrame>(image1.Format(), width, height);
+  T* pixel_diff = reinterpret_cast<T*>(diff_image->MutablePixelData());
+  const int width_padding_diff =
+      diff_image->WidthStep() / diff_image->ByteDepth() - width * channels1;
+
  float avg_diff = 0;
-  uint diff_count = 0;
+  uint total_count = 0;
+  int different_color_components = 0;
+  float max_color_diff_found = 0;
+  int different_alpha_components = 0;
+  float max_alpha_diff_found = 0;
  for (int row = 0; row < height; ++row) {
    for (int col = 0; col < width; ++col) {
      for (int channel = 0; channel < num_channels; ++channel) {
        // Check local difference.
-        const T max_diff = channel < 3 ? max_color_diff : max_alpha_diff;
        const T value1 = pixel1[channel];
        const T value2 = pixel2[channel];
-        if (!EqualWithTolerance<T>(value1, value2, max_diff)) {
-          // We cast uint8 to int using this type (and leave other values as-is)
-          // to avoid printing as a single char.
-          using TypeToPrint =
-              typename std::conditional<std::is_same<T, uint8>::value, int,
-                                        T>::type;
-          std::string error = absl::Substitute(
-              "images differ: row = $0 col = $1 channel = $2 : pixel1 = $3, "
-              "pixel2 = $4",
-              row, col, channel, static_cast<TypeToPrint>(value1),
-              static_cast<TypeToPrint>(value2));
-          if (return_error) {
-            *error_message = error;
-          } else {
-            LOG(ERROR) << error;
-          }
-          return false;
-        }
-        // Check global average difference.
        const float diff =
            std::abs(static_cast<float>(value1) - static_cast<float>(value2));
-        avg_diff += (diff - avg_diff) / ++diff_count;
+        if (channel < 3) {
+          different_color_components += diff > max_color_diff;
+          max_color_diff_found = std::max(max_color_diff_found, diff);
+          pixel_diff[channel] = diff;
+        } else {
+          different_alpha_components += diff > max_alpha_diff;
+          max_alpha_diff_found = std::max(max_alpha_diff_found, diff);
+          pixel_diff[channel] = 255;  // opaque to see color difference
+        }
+        // Check global average difference.
+        avg_diff += (diff - avg_diff) / ++total_count;
      }
      pixel1 += channels1;
      pixel2 += channels2;
+      pixel_diff += channels1;
    }
    pixel1 += width_padding1;
    pixel2 += width_padding2;
+    pixel_diff += width_padding_diff;
  }

-  if (avg_diff > max_avg_diff) {
-    std::string error =
-        absl::Substitute("images differ: avg pixel error = $0", avg_diff);
-    if (return_error) {
-      *error_message = error;
-    } else {
-      LOG(ERROR) << error;
-    }
-    return false;
-  }
+  std::vector<std::string> errors;
+  if (different_color_components)
+    errors.push_back(absl::Substitute(
+        "$0 color components differences above limit of $1, max found was $2",
+        different_color_components, max_color_diff, max_color_diff_found));
+  if (different_alpha_components)
+    errors.push_back(absl::Substitute(
+        "$0 alpha components differences above limit of $1, max found was $2",
+        different_alpha_components, max_alpha_diff, max_alpha_diff_found));
+  if (avg_diff > max_avg_diff)
+    errors.push_back(
+        absl::Substitute("the average component difference is $0 (limit: $1)",
+                         avg_diff, max_avg_diff));

-  return true;
+  if (!errors.empty())
+    return absl::InternalError(
+        absl::StrCat("images differ: ", absl::StrJoin(errors, "; ")));
+  return absl::OkStatus();
 }

 #if defined(__linux__)
@ -134,77 +149,32 @@ std::string GetBinaryDirectory() {

 }  // namespace

-bool CompareImageFrames(const ImageFrame& image1, const ImageFrame& image2,
-                        const float max_color_diff, const float max_alpha_diff,
-                        const float max_avg_diff, std::string* error_message) {
-  const bool return_error = error_message != nullptr;
-
-  auto IsSupportedImageFormatComparison = [](const ImageFrame& image1,
-                                             const ImageFrame& image2) {
-    // Pairs of non-equal image formats that can be compared against each other.
-    static const mediapipe::NoDestructor<absl::flat_hash_set<
-        std::pair<ImageFormat::Format, ImageFormat::Format>>>
-        kCompatibleImageFormats({
-            {ImageFormat::SRGB, ImageFormat::SRGBA},
-            {ImageFormat::SRGB48, ImageFormat::SRGBA64},
-        });
-
-    auto* compatible_image_formats = kCompatibleImageFormats.get();
-
-    return image1.Format() == image2.Format() ||
-           compatible_image_formats->contains(
-               {image1.Format(), image2.Format()}) ||
-           compatible_image_formats->contains(
-               {image2.Format(), image1.Format()});
+absl::Status CompareImageFrames(const ImageFrame& image1,
+                                const ImageFrame& image2,
+                                const float max_color_diff,
+                                const float max_alpha_diff,
+                                const float max_avg_diff,
+                                std::unique_ptr<ImageFrame>& diff_image) {
+  auto IsSupportedImageFormatComparison = [](ImageFormat::Format one,
+                                             ImageFormat::Format two) {
+    auto both = std::minmax(one, two);
+    return one == two ||
+           both == std::minmax(ImageFormat::SRGB, ImageFormat::SRGBA) ||
+           both == std::minmax(ImageFormat::SRGB48, ImageFormat::SRGBA64);
  };

-  if (!IsSupportedImageFormatComparison(image1, image2)) {
-    std::string error = absl::Substitute(
-        "unsupported image format comparison; image1 = $0, image2 = $1",
-        image1.Format(), image2.Format());
-    if (return_error) {
-      *error_message = error;
-    } else {
-      LOG(ERROR) << error;
-    }
-    return false;
-  }
+  RET_CHECK(IsSupportedImageFormatComparison(image1.Format(), image2.Format()))
+      << "unsupported image format comparison; image1 = " << image1.Format()
+      << ", image2 = " << image2.Format();

-  if (image1.Width() != image2.Width()) {
-    std::string error =
-        absl::Substitute("image width mismatch: image1 = $0, image2 = $1",
-                         image1.Width(), image2.Width());
-    if (return_error) {
-      *error_message = error;
-    } else {
-      LOG(ERROR) << error;
-    }
-    return false;
-  }
+  // Cannot use RET_CHECK_EQ because pair is not printable.
+  RET_CHECK(std::make_pair(image1.Width(), image1.Height()) ==
+            std::make_pair(image2.Width(), image2.Height()))
+      << "image size mismatch: " << image1.Width() << "x" << image1.Height()
+      << " != " << image2.Width() << "x" << image2.Height();

-  if (image1.Height() != image2.Height()) {
-    std::string error =
-        absl::Substitute("image height mismatch: image1 = $0, image2 = $1",
-                         image1.Height(), image2.Height());
-    if (return_error) {
-      *error_message = error;
-    } else {
-      LOG(ERROR) << error;
-    }
-    return false;
-  }
-
-  if (image1.ByteDepth() != image2.ByteDepth()) {
-    std::string error =
-        absl::Substitute("image byte depth mismatch: image1 = $0, image2 = $1",
-                         image1.ByteDepth(), image2.ByteDepth());
-    if (return_error) {
-      *error_message = error;
-    } else {
-      LOG(ERROR) << error;
-    }
-    return false;
-  }
+  RET_CHECK_EQ(image1.ByteDepth(), image2.ByteDepth())
+      << "image byte depth mismatch";

  switch (image1.Format()) {
    case ImageFormat::GRAY8:
@ -212,45 +182,87 @@ bool CompareImageFrames(const ImageFrame& image1, const ImageFrame& image2,
    case ImageFormat::SRGBA:
    case ImageFormat::LAB8:
      return CompareDiff<uint8>(image1, image2, max_color_diff, max_alpha_diff,
-                                max_avg_diff, error_message);
+                                max_avg_diff, diff_image);
    case ImageFormat::GRAY16:
    case ImageFormat::SRGB48:
    case ImageFormat::SRGBA64:
      return CompareDiff<uint16>(image1, image2, max_color_diff, max_alpha_diff,
-                                 max_avg_diff, error_message);
+                                 max_avg_diff, diff_image);
    case ImageFormat::VEC32F1:
    case ImageFormat::VEC32F2:
      return CompareDiff<float>(image1, image2, max_color_diff, max_alpha_diff,
-                                max_avg_diff, error_message);
+                                max_avg_diff, diff_image);
    default:
      LOG(FATAL) << ImageFrame::InvalidFormatString(image1.Format());
  }
 }

+bool CompareImageFrames(const ImageFrame& image1, const ImageFrame& image2,
+                        const float max_color_diff, const float max_alpha_diff,
+                        const float max_avg_diff, std::string* error_message) {
+  std::unique_ptr<ImageFrame> diff_image;
+  auto status = CompareImageFrames(image1, image2, max_color_diff,
+                                   max_alpha_diff, max_avg_diff, diff_image);
+  if (status.ok()) return true;
+  if (error_message) *error_message = std::string(status.message());
+  return false;
+}
+
 std::string GetTestRootDir() {
-#if defined(__ANDROID__)
-  char path[1024];
-  char* ptr = getcwd(path, sizeof(path));
-  CHECK_EQ(ptr, path);
-  return path;
-#else
-  return ::mediapipe::file::JoinPath(std::getenv("TEST_SRCDIR"), "mediapipe");
-#endif  // defined(__ANDROID__)
+  return file::JoinPath(std::getenv("TEST_SRCDIR"), "mediapipe");
+}
+
+std::string GetTestOutputsDir() {
+  const char* output_dir = getenv("TEST_UNDECLARED_OUTPUTS_DIR");
+  if (!output_dir) {
+    output_dir = "/tmp";
+  }
+  return output_dir;
 }

 std::string GetTestDataDir(const std::string& package_base_path) {
-#if defined(__ANDROID__)
-  std::string data_dir = GetTestRootDir();
-  std::string binary_dir = GetBinaryDirectory();
-  // In Mobile Harness, the cwd is "/" and the run dir is "/data/local/tmp".
-  if (data_dir == "/" && absl::StartsWith(binary_dir, "/data")) {
-    data_dir = binary_dir;
+  return file::JoinPath(GetTestRootDir(), package_base_path, "testdata/");
+}
+
+std::string GetTestFilePath(absl::string_view relative_path) {
+  return file::JoinPath(GetTestRootDir(), relative_path);
+}
+
+absl::StatusOr<std::unique_ptr<ImageFrame>> LoadTestImage(
+    absl::string_view path, ImageFormat::Format format) {
+  std::string encoded;
+  MP_RETURN_IF_ERROR(mediapipe::file::GetContents(path, &encoded));
+
+  // stbi_load determines the output pixel format based on the desired channels.
+  // 0 means "use whatever's in the file".
+  int desired_channels = format == ImageFormat::UNKNOWN ? 0
+                         : format == ImageFormat::SRGBA ? 4
+                         : format == ImageFormat::SRGB  ? 3
+                         : format == ImageFormat::GRAY8 ? 1
+                                                        : -1;
+  RET_CHECK(desired_channels >= 0)
+      << "unsupported output format requested: " << format;
+
+  int width, height, channels_in_file;
+  auto data = stbi_load_from_memory(reinterpret_cast<stbi_uc*>(encoded.data()),
+                                    encoded.size(), &width, &height,
+                                    &channels_in_file, desired_channels);
+  RET_CHECK(data) << "failed to decode image data from: " << path;
+
+  // If we didn't specify a desired format, it will be determined by what the
+  // file contains.
+  int output_channels = desired_channels ? desired_channels : channels_in_file;
+  if (format == ImageFormat::UNKNOWN) {
+    format = output_channels == 4   ? ImageFormat::SRGBA
+             : output_channels == 3 ? ImageFormat::SRGB
+             : output_channels == 1 ? ImageFormat::GRAY8
+                                    : ImageFormat::UNKNOWN;
+    RET_CHECK(format != ImageFormat::UNKNOWN)
+        << "unsupported number of channels: " << output_channels;
  }
-  return ::mediapipe::file::JoinPath(data_dir, package_base_path, "testdata/");
-#else
-  return ::mediapipe::file::JoinPath(GetTestRootDir(), package_base_path,
-                                     "testdata/");
-#endif  // defined(__APPLE__)
+
+  return absl::make_unique<ImageFrame>(
+      format, width, height, width * output_channels, data, stbi_image_free);
 }

 std::unique_ptr<ImageFrame> LoadTestPng(const std::string& path,
--- a/mediapipe/framework/tool/test_util.h
+++ b/mediapipe/framework/tool/test_util.h
@ -15,6 +15,7 @@
 #ifndef MEDIAPIPE_FRAMEWORK_TEST_UTIL_H_
 #define MEDIAPIPE_FRAMEWORK_TEST_UTIL_H_

+#include "absl/status/statusor.h"
 #include "mediapipe/framework/calculator.pb.h"
 #include "mediapipe/framework/formats/image_frame.h"

@ -35,14 +36,29 @@ using mediapipe::CalculatorGraphConfig;
 // Note: Although max_color_diff and max_alpha_diff are floats, all uint8/uint16
 // values are exactly representable. (2^24 + 1 is the first non-representable
 // positive integral value.)
+absl::Status CompareImageFrames(const ImageFrame& image1,
+                                const ImageFrame& image2,
+                                const float max_color_diff,
+                                const float max_alpha_diff,
+                                const float max_avg_diff,
+                                std::unique_ptr<ImageFrame>& diff_image);
+
 bool CompareImageFrames(const ImageFrame& image1, const ImageFrame& image2,
                        const float max_color_diff, const float max_alpha_diff,
                        const float max_avg_diff = 1.0,
                        std::string* error_message = nullptr);

-// Returns the absolute path to the directory that contains test source code.
+// Returns the absolute path to the directory that contains test source code
+// (TEST_SRCDIR).
 std::string GetTestRootDir();

+// Returns the absolute path to a directory where tests can write outputs to
+// be sent to bazel (TEST_UNDECLARED_OUTPUTS_DIR or a fallback).
+std::string GetTestOutputsDir();
+
+// Returns the absolute path to a file within TEST_SRCDIR.
+std::string GetTestFilePath(absl::string_view relative_path);
+
 // Returns the absolute path to the contents of the package's "testdata"
 // directory.
 // This handles the different paths where test data ends up when using
@ -52,6 +68,10 @@ std::string GetTestDataDir(const std::string& package_base_path);
 // Loads a binary graph from path. Returns true iff successful.
 bool LoadTestGraph(CalculatorGraphConfig* proto, const std::string& path);

+// Loads an image from path.
+absl::StatusOr<std::unique_ptr<ImageFrame>> LoadTestImage(
+    absl::string_view path, ImageFormat::Format format = ImageFormat::SRGBA);
+
 // Loads a PNG image from path using the given ImageFormat. Returns nullptr in
 // case of failure.
 std::unique_ptr<ImageFrame> LoadTestPng(
--- a/mediapipe/gpu/gl_context_webgl.cc
+++ b/mediapipe/gpu/gl_context_webgl.cc
@ -75,26 +75,30 @@ absl::Status GlContext::CreateContextInternal(
  // TODO: Ensure this works with all options (in particular,
  //   multithreading options, like the special-case combination of USE_PTHREADS
  //   and OFFSCREEN_FRAMEBUFFER)
-  EM_ASM(let init_once = true; if (init_once) {
+  // clang-format off
+  EM_ASM(
+    let init_once = true;
+    if (init_once) {
      const cachedFindCanvasEventTarget = findCanvasEventTarget;

-    if (typeof cachedFindCanvasEventTarget != = 'function') {
-      if (typeof console != = 'undefined') {
-        console.error(
-            'Expected Emscripten global function ' +
-            '"findCanvasEventTarget" not found. WebGL context creation ' +
-            'may fail.');
+      if (typeof cachedFindCanvasEventTarget !== 'function') {
+        if (typeof console !== 'undefined') {
+          console.error('Expected Emscripten global function '
+              + '"findCanvasEventTarget" not found. WebGL context creation '
+              + 'may fail.');
        }
        return;
      }

      findCanvasEventTarget = function(target) {
+        if (target == 0) {
          if (Module && Module.canvas) {
            return Module.canvas;
          } else if (Module && Module.canvasCssSelector) {
            return cachedFindCanvasEventTarget(Module.canvasCssSelector);
-      } else {
-        if (typeof console != = 'undefined') {
+          }
+        }
+        if (typeof console !== 'undefined') {
          console.warn('Module properties canvas and canvasCssSelector not ' +
                       'found during WebGL context creation.');
        }
@ -102,15 +106,14 @@ absl::Status GlContext::CreateContextInternal(
        // cases it will not succeed, just in case the user does want to fall-
        // back.
        return cachedFindCanvasEventTarget(target);
-      }
      };  // NOLINT: Necessary semicolon.
      init_once = false;
-  });
+    }
+  );
+  // clang-format on

-  // Note: below id parameter is only actually used if both Module.canvas and
-  // Module.canvasCssSelector are undefined.
  EMSCRIPTEN_WEBGL_CONTEXT_HANDLE context_handle =
-      emscripten_webgl_create_context(0 /* id */, &attrs);
+      emscripten_webgl_create_context(nullptr, &attrs);

  // Check for failure
  if (context_handle <= 0) {
--- a/mediapipe/gpu/gpu_buffer_format.cc
+++ b/mediapipe/gpu/gpu_buffer_format.cc
@ -164,7 +164,9 @@ const GlTextureInfo& GlTextureInfoForGpuBufferFormat(GpuBufferFormat format,
  }

  auto iter = format_info->find(format);
-  CHECK(iter != format_info->end()) << "unsupported format";
+  CHECK(iter != format_info->end())
+      << "unsupported format: "
+      << static_cast<std::underlying_type_t<decltype(format)>>(format);
  const auto& planes = iter->second;
 #ifndef __APPLE__
  CHECK_EQ(planes.size(), 1)
--- a/mediapipe/modules/pose_detection/pose_detection_gpu.pbtxt
+++ b/mediapipe/modules/pose_detection/pose_detection_gpu.pbtxt
@ -69,6 +69,8 @@ node {
  options: {
    [mediapipe.InferenceCalculatorOptions.ext] {
      model_path: "mediapipe/modules/pose_detection/pose_detection.tflite"
+      #
+      delegate: { gpu { use_advanced_gpu_api: true } }
    }
  }
 }
--- a/mediapipe/util/tflite/tflite_gpu_runner.cc
+++ b/mediapipe/util/tflite/tflite_gpu_runner.cc
@ -85,7 +85,7 @@ ObjectDef GetSSBOObjectDef(int channels) {

 absl::Status TFLiteGPURunner::InitializeWithModel(
    const tflite::FlatBufferModel& flatbuffer,
-    const tflite::OpResolver& op_resolver) {
+    const tflite::OpResolver& op_resolver, bool allow_quant_ops) {
  // GraphFloat32 is created twice because, when OpenCL and OpenGL backends are
  // initialized, different backend-specific graph transformations happen
  // in-place. As GraphFloat32 is not copyable by design, we keep two copies of
@ -94,10 +94,10 @@ absl::Status TFLiteGPURunner::InitializeWithModel(
  // in the end of the initialization stage.
  graph_gl_ = std::make_unique<GraphFloat32>();
  graph_cl_ = std::make_unique<GraphFloat32>();
-  MP_RETURN_IF_ERROR(
-      BuildFromFlatBuffer(flatbuffer, op_resolver, graph_gl_.get()));
-  MP_RETURN_IF_ERROR(
-      BuildFromFlatBuffer(flatbuffer, op_resolver, graph_cl_.get()));
+  MP_RETURN_IF_ERROR(BuildFromFlatBuffer(flatbuffer, op_resolver,
+                                         graph_gl_.get(), allow_quant_ops));
+  MP_RETURN_IF_ERROR(BuildFromFlatBuffer(flatbuffer, op_resolver,
+                                         graph_cl_.get(), allow_quant_ops));

  for (const auto& input : graph_gl_->inputs()) {
    input_shapes_.push_back(input->tensor.shape);
--- a/mediapipe/util/tflite/tflite_gpu_runner.h
+++ b/mediapipe/util/tflite/tflite_gpu_runner.h
@ -54,7 +54,8 @@ class TFLiteGPURunner {
      : options_(options) {}

  absl::Status InitializeWithModel(const tflite::FlatBufferModel& flatbuffer,
-                                   const tflite::OpResolver& op_resolver);
+                                   const tflite::OpResolver& op_resolver,
+                                   bool allow_quant_ops = false);

  void ForceOpenGL() { opengl_is_forced_ = true; }
  void ForceOpenCL() { opencl_is_forced_ = true; }
--- a/third_party/stb_image_impl.diff
+++ b/third_party/stb_image_impl.diff
@ -0,0 +1,35 @@
+From fed8c5b355e00b7cc7dd5abfebecf0338f2c2f24 Mon Sep 17 00:00:00 2001
+From: Camillo Lugaresi <camillol@google.com>
+Date: Fri, 4 Jun 2021 00:44:45 +0000
+Subject: impl files
+
+---
+ stb_image.c       | 4 ++++
+ stb_image_write.c | 4 ++++
+ 2 files changed, 8 insertions(+)
+ create mode 100644 stb_image.c
+ create mode 100644 stb_image_write.c
+
+diff --git a/stb_image.c b/stb_image.c
+new file mode 100644
+index 0000000..f88aaf6
+--- /dev/null
+++ b/stb_image.c
+@@ -0,0 +1,4 @@
+// By defining STB_IMAGE_IMPLEMENTATION the included header file will also
+// define the implementation.
+#define STB_IMAGE_IMPLEMENTATION
+#include "stb_image.h"
+diff --git a/stb_image_write.c b/stb_image_write.c
+new file mode 100644
+index 0000000..623d757
+--- /dev/null
+++ b/stb_image_write.c
+@@ -0,0 +1,4 @@
+// By defining STB_IMAGE_WRITE_IMPLEMENTATION the included header file will also
+// define the implementation.
+#define STB_IMAGE_WRITE_IMPLEMENTATION
+#include "stb_image_write.h"
+-- 
+2.32.0.rc1.229.g3e70b5a671-goog
+
--- a/third_party/stblib.BUILD
+++ b/third_party/stblib.BUILD
@ -0,0 +1,28 @@
+# Description:
+#   Single-file C++ image decoding and encoding libraries
+
+package(
+    default_visibility = ["//visibility:public"],
+)
+
+licenses(["notice"])  # MIT license
+
+exports_files(["LICENSE"])
+
+cc_library(
+    name = "stb_image",
+    srcs = ["stb_image.c"],
+    hdrs = ["stb_image.h"],
+    copts = [
+        "-Wno-unused-function",
+        "$(STACK_FRAME_UNLIMITED)",
+    ],
+    includes = ["."],
+)
+
+cc_library(
+    name = "stb_image_write",
+    srcs = ["stb_image_write.c"],
+    hdrs = ["stb_image_write.h"],
+    includes = ["."],
+)