Project import generated by Copybara.

GitOrigin-RevId: d91373b4d4d10abef49cab410caa6aadf0875049
2019-12-06 15:33:11 -08:00 · 2019-12-06 15:33:11 -08:00 · d16cc3be5b
commit d16cc3be5b
parent 137867d088
137 changed files with 41548 additions and 31 deletions
--- a/.bazelrc
+++ b/.bazelrc
@ -19,6 +19,9 @@ build --incompatible_depset_is_not_iterable=false
 # Sets the default Apple platform to macOS.
 build --apple_platform_type=macos

+# Allow debugging with XCODE
+build --apple_generate_dsym
+
 # Android configs.
 build:android --crosstool_top=//external:android/crosstool
 build:android --host_crosstool_top=@bazel_tools//tools/cpp:toolchain
--- a/2
+++ b/2
@ -35,6 +35,8 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
        libopencv-highgui-dev \
        libopencv-imgproc-dev \
        libopencv-video-dev \
+        libopencv-calib3d-dev \
+        libopencv-features2d-dev \
        software-properties-common && \
    add-apt-repository -y ppa:openjdk-r/ppa && \
    apt-get update && apt-get install -y openjdk-8-jdk && \
--- a/README.md
+++ b/README.md
@ -14,12 +14,13 @@
 * [Face Detection](mediapipe/docs/face_detection_mobile_gpu.md)
 * [Hair Segmentation](mediapipe/docs/hair_segmentation_mobile_gpu.md)
 * [Object Detection](mediapipe/docs/object_detection_mobile_gpu.md)
+* [Object Detection and Tracking](mediapipe/docs/object_tracking_mobile_gpu.md)

 ![hand_tracking](mediapipe/docs/images/mobile/hand_tracking_3d_android_gpu_small.gif)
 ![multi-hand_tracking](mediapipe/docs/images/mobile/multi_hand_tracking_android_gpu_small.gif)
 ![face_detection](mediapipe/docs/images/mobile/face_detection_android_gpu_small.gif)
 ![hair_segmentation](mediapipe/docs/images/mobile/hair_segmentation_android_gpu_small.gif)
-![object_detection](mediapipe/docs/images/mobile/object_detection_android_gpu_small.gif)
+![object_tracking](mediapipe/docs/images/mobile/object_tracking_android_gpu_small.gif)

 ## Installation
 Follow these [instructions](mediapipe/docs/install.md).
--- a/22
+++ b/22
@ -116,6 +116,13 @@ http_archive(
      "https://mirror.bazel.build/github.com/tensorflow/tensorflow/archive/%s.tar.gz" % _TENSORFLOW_GIT_COMMIT,
      "https://github.com/tensorflow/tensorflow/archive/%s.tar.gz" % _TENSORFLOW_GIT_COMMIT,
    ],
+    # Patch https://github.com/tensorflow/tensorflow/commit/e3a7bdbebb99352351a19e2e403136166aa52934
+    patches = [
+        "@//third_party:org_tensorflow_e3a7bdbebb99352351a19e2e403136166aa52934.diff"
+    ],
+    patch_args = [
+        "-p1",
+    ],
    strip_prefix = "tensorflow-%s" % _TENSORFLOW_GIT_COMMIT,
    sha256 = _TENSORFLOW_SHA256,
 )
@ -123,8 +130,22 @@ http_archive(
 load("@org_tensorflow//tensorflow:workspace.bzl", "tf_workspace")
 tf_workspace(tf_repo_name = "org_tensorflow")

+http_archive(
+    name = "ceres_solver",
+    url = "https://github.com/ceres-solver/ceres-solver/archive/1.14.0.zip",
+    patches = [
+        "@//third_party:ceres_solver_9bf9588988236279e1262f75d7f4d85711dfa172.diff"
+    ],
+    patch_args = [
+        "-p1",
+    ],
+    strip_prefix = "ceres-solver-1.14.0",
+    sha256 = "5ba6d0db4e784621fda44a50c58bb23b0892684692f0c623e2063f9c19f192f1"
+)
+
 # Please run
 # $ sudo apt-get install libopencv-core-dev libopencv-highgui-dev \
+#                        libopencv-calib3d-dev libopencv-features2d-dev \
 #                        libopencv-imgproc-dev libopencv-video-dev
 new_local_repository(
    name = "linux_opencv",
@ -292,3 +313,4 @@ http_archive(
    strip_prefix = "google-toolbox-for-mac-2.2.1",
    build_file = "@//third_party:google_toolbox_for_mac.BUILD",
 )
+
--- a/mediapipe/MediaPipe.tulsiproj/Configs/MediaPipe.tulsigen
+++ b/mediapipe/MediaPipe.tulsiproj/Configs/MediaPipe.tulsigen
@ -9,6 +9,7 @@
    "mediapipe/examples/ios/facedetectiongpu/BUILD",
    "mediapipe/examples/ios/handdetectiongpu/BUILD",
    "mediapipe/examples/ios/handtrackinggpu/BUILD",
+    "mediapipe/examples/ios/multihandtrackinggpu/BUILD",
    "mediapipe/examples/ios/objectdetectioncpu/BUILD",
    "mediapipe/examples/ios/objectdetectiongpu/BUILD"
  ],
@ -18,6 +19,7 @@
    "//mediapipe/examples/ios/facedetectiongpu:FaceDetectionGpuApp",
    "//mediapipe/examples/ios/handdetectiongpu:HandDetectionGpuApp",
    "//mediapipe/examples/ios/handtrackinggpu:HandTrackingGpuApp",
+    "//mediapipe/examples/ios/multihandtrackinggpu:MultiHandTrackingGpuApp",
    "//mediapipe/examples/ios/objectdetectioncpu:ObjectDetectionCpuApp",
    "//mediapipe/examples/ios/objectdetectiongpu:ObjectDetectionGpuApp",
    "//mediapipe/objc:mediapipe_framework_ios"
@ -84,6 +86,8 @@
    "mediapipe/examples/ios/handdetectiongpu/Base.lproj",
    "mediapipe/examples/ios/handtrackinggpu",
    "mediapipe/examples/ios/handtrackinggpu/Base.lproj",
+    "mediapipe/examples/ios/multihandtrackinggpu",
+    "mediapipe/examples/ios/multihandtrackinggpu/Base.lproj",
    "mediapipe/examples/ios/objectdetectioncpu",
    "mediapipe/examples/ios/objectdetectioncpu/Base.lproj",
    "mediapipe/examples/ios/objectdetectiongpu",
--- a/mediapipe/MediaPipe.tulsiproj/project.tulsiconf
+++ b/mediapipe/MediaPipe.tulsiproj/project.tulsiconf
@ -16,6 +16,7 @@
    "mediapipe/examples/ios/facedetectiongpu",
    "mediapipe/examples/ios/handdetectiongpu",
    "mediapipe/examples/ios/handtrackinggpu",
+    "mediapipe/examples/ios/multihandtrackinggpu",
    "mediapipe/examples/ios/objectdetectioncpu",
    "mediapipe/examples/ios/objectdetectiongpu"
  ],
--- a/mediapipe/calculators/core/BUILD
+++ b/mediapipe/calculators/core/BUILD
@ -165,6 +165,7 @@ cc_library(
    deps = [
        "//mediapipe/framework:calculator_framework",
        "//mediapipe/framework/port:logging",
+        "//mediapipe/framework/port:status",
    ],
    alwayslink = 1,
 )
--- a/mediapipe/calculators/core/add_header_calculator.cc
+++ b/mediapipe/calculators/core/add_header_calculator.cc
@ -13,11 +13,12 @@
 // limitations under the License.

 #include "mediapipe/framework/calculator_framework.h"
+#include "mediapipe/framework/port/canonical_errors.h"
 #include "mediapipe/framework/port/logging.h"

 namespace mediapipe {

-// Attach the header from one stream to another stream.
+// Attach the header from a stream or side input to another stream.
 //
 // The header stream (tag HEADER) must not have any packets in it.
 //
@ -25,17 +26,53 @@ namespace mediapipe {
 // calculator to not need a header or to accept a separate stream with
 // a header, that would be more future proof.
 //
+// Example usage 1:
+// node {
+//   calculator: "AddHeaderCalculator"
+//   input_stream: "DATA:audio"
+//   input_stream: "HEADER:audio_header"
+//   output_stream: "audio_with_header"
+// }
+//
+// Example usage 2:
+// node {
+//   calculator: "AddHeaderCalculator"
+//   input_stream: "DATA:audio"
+//   input_side_packet: "HEADER:audio_header"
+//   output_stream: "audio_with_header"
+// }
+//
 class AddHeaderCalculator : public CalculatorBase {
 public:
  static ::mediapipe::Status GetContract(CalculatorContract* cc) {
-    cc->Inputs().Tag("HEADER").SetNone();
+    bool has_side_input = false;
+    bool has_header_stream = false;
+    if (cc->InputSidePackets().HasTag("HEADER")) {
+      cc->InputSidePackets().Tag("HEADER").SetAny();
+      has_side_input = true;
+    }
+    if (cc->Inputs().HasTag("HEADER")) {
+      cc->Inputs().Tag("HEADER").SetNone();
+      has_header_stream = true;
+    }
+    if (has_side_input == has_header_stream) {
+      return mediapipe::InvalidArgumentError(
+          "Header must be provided via exactly one of side input and input "
+          "stream");
+    }
    cc->Inputs().Tag("DATA").SetAny();
    cc->Outputs().Index(0).SetSameAs(&cc->Inputs().Tag("DATA"));
    return ::mediapipe::OkStatus();
  }

  ::mediapipe::Status Open(CalculatorContext* cc) override {
-    const Packet& header = cc->Inputs().Tag("HEADER").Header();
+    Packet header;
+    if (cc->InputSidePackets().HasTag("HEADER")) {
+      header = cc->InputSidePackets().Tag("HEADER");
+    }
+    if (cc->Inputs().HasTag("HEADER")) {
+      header = cc->Inputs().Tag("HEADER").Header();
+    }
    if (!header.IsEmpty()) {
      cc->Outputs().Index(0).SetHeader(header);
    }
--- a/mediapipe/calculators/core/add_header_calculator_test.cc
+++ b/mediapipe/calculators/core/add_header_calculator_test.cc
@ -14,8 +14,10 @@

 #include "mediapipe/framework/calculator_framework.h"
 #include "mediapipe/framework/calculator_runner.h"
+#include "mediapipe/framework/port/canonical_errors.h"
 #include "mediapipe/framework/port/gmock.h"
 #include "mediapipe/framework/port/gtest.h"
+#include "mediapipe/framework/port/status.h"
 #include "mediapipe/framework/port/status_matchers.h"
 #include "mediapipe/framework/timestamp.h"
 #include "mediapipe/framework/tool/validate_type.h"
@ -24,7 +26,7 @@ namespace mediapipe {

 class AddHeaderCalculatorTest : public ::testing::Test {};

-TEST_F(AddHeaderCalculatorTest, Works) {
+TEST_F(AddHeaderCalculatorTest, HeaderStream) {
  CalculatorGraphConfig::Node node;
  node.set_calculator("AddHeaderCalculator");
  node.add_input_stream("HEADER:header_stream");
@ -96,4 +98,62 @@ TEST_F(AddHeaderCalculatorTest, NoPacketsOnHeaderStream) {
  ASSERT_FALSE(runner.Run().ok());
 }

+TEST_F(AddHeaderCalculatorTest, InputSidePacket) {
+  CalculatorGraphConfig::Node node;
+  node.set_calculator("AddHeaderCalculator");
+  node.add_input_stream("DATA:data_stream");
+  node.add_output_stream("merged_stream");
+  node.add_input_side_packet("HEADER:header");
+
+  CalculatorRunner runner(node);
+
+  // Set header and add 5 packets.
+  runner.MutableSidePackets()->Tag("HEADER") =
+      Adopt(new std::string("my_header"));
+  for (int i = 0; i < 5; ++i) {
+    Packet packet = Adopt(new int(i)).At(Timestamp(i * 1000));
+    runner.MutableInputs()->Tag("DATA").packets.push_back(packet);
+  }
+
+  // Run calculator.
+  MP_ASSERT_OK(runner.Run());
+
+  ASSERT_EQ(1, runner.Outputs().NumEntries());
+
+  // Test output.
+  EXPECT_EQ(std::string("my_header"),
+            runner.Outputs().Index(0).header.Get<std::string>());
+  const std::vector<Packet>& output_packets = runner.Outputs().Index(0).packets;
+  ASSERT_EQ(5, output_packets.size());
+  for (int i = 0; i < 5; ++i) {
+    const int val = output_packets[i].Get<int>();
+    EXPECT_EQ(i, val);
+    EXPECT_EQ(Timestamp(i * 1000), output_packets[i].Timestamp());
+  }
+}
+
+TEST_F(AddHeaderCalculatorTest, UsingBothSideInputAndStream) {
+  CalculatorGraphConfig::Node node;
+  node.set_calculator("AddHeaderCalculator");
+  node.add_input_stream("HEADER:header_stream");
+  node.add_input_stream("DATA:data_stream");
+  node.add_output_stream("merged_stream");
+  node.add_input_side_packet("HEADER:header");
+
+  CalculatorRunner runner(node);
+
+  // Set both headers and add 5 packets.
+  runner.MutableSidePackets()->Tag("HEADER") =
+      Adopt(new std::string("my_header"));
+  runner.MutableSidePackets()->Tag("HEADER") =
+      Adopt(new std::string("my_header"));
+  for (int i = 0; i < 5; ++i) {
+    Packet packet = Adopt(new int(i)).At(Timestamp(i * 1000));
+    runner.MutableInputs()->Tag("DATA").packets.push_back(packet);
+  }
+
+  // Run should fail because header can only be provided one way.
+  EXPECT_EQ(runner.Run().code(), ::mediapipe::InvalidArgumentError("").code());
+}
+
 }  // namespace mediapipe
--- a/mediapipe/calculators/core/packet_resampler_calculator.cc
+++ b/mediapipe/calculators/core/packet_resampler_calculator.cc
@ -330,22 +330,27 @@ void PacketResamplerCalculator::UpdateNextOutputTimestampWithJitter() {
    return ::mediapipe::OkStatus();
  }

-  LOG_IF(WARNING, frame_time_usec_ <
-                      (cc->InputTimestamp() - last_packet_.Timestamp()).Value())
-      << "Adding jitter is meaningless when upsampling.";
-
-  const int64 curr_diff =
-      (next_output_timestamp_ - cc->InputTimestamp()).Value();
-  const int64 last_diff =
-      (next_output_timestamp_ - last_packet_.Timestamp()).Value();
-  if (curr_diff * last_diff > 0) {
-    return ::mediapipe::OkStatus();
+  if (frame_time_usec_ <
+      (cc->InputTimestamp() - last_packet_.Timestamp()).Value()) {
+    LOG_FIRST_N(WARNING, 2)
+        << "Adding jitter is not very useful when upsampling.";
+  }
+
+  while (true) {
+    const int64 last_diff =
+        (next_output_timestamp_ - last_packet_.Timestamp()).Value();
+    RET_CHECK_GT(last_diff, 0.0);
+    const int64 curr_diff =
+        (next_output_timestamp_ - cc->InputTimestamp()).Value();
+    if (curr_diff > 0.0) {
+      break;
+    }
+    OutputWithinLimits(cc, (std::abs(curr_diff) > last_diff
+                                ? last_packet_
+                                : cc->Inputs().Get(input_data_id_).Value())
+                               .At(next_output_timestamp_));
+    UpdateNextOutputTimestampWithJitter();
  }
-  OutputWithinLimits(cc, (std::abs(curr_diff) > std::abs(last_diff)
-                              ? last_packet_
-                              : cc->Inputs().Get(input_data_id_).Value())
-                             .At(next_output_timestamp_));
-  UpdateNextOutputTimestampWithJitter();
  return ::mediapipe::OkStatus();
 }

--- a/mediapipe/calculators/image/image_cropping_calculator.cc
+++ b/mediapipe/calculators/image/image_cropping_calculator.cc
@ -501,8 +501,11 @@ void ImageCroppingCalculator::GetOutputDimensions(CalculatorContext* cc,
    row_max = std::max(row_max, transformed_points_[i * 2 + 1]);
  }

-  *dst_width = std::round((col_max - col_min) * src_width);
-  *dst_height = std::round((row_max - row_min) * src_height);
+  int width = static_cast<int>(std::round((col_max - col_min) * src_width));
+  int height = static_cast<int>(std::round((row_max - row_min) * src_height));
+  // Minimum output dimension 1x1 prevents creation of textures with 0x0.
+  *dst_width = std::max(1, width);
+  *dst_height = std::max(1, height);
 }

 }  // namespace mediapipe
--- a/mediapipe/calculators/util/BUILD
+++ b/mediapipe/calculators/util/BUILD
@ -983,3 +983,31 @@ cc_test(
        "//mediapipe/framework/port:parse_text_proto",
    ],
 )
+
+cc_library(
+    name = "detections_to_timed_box_list_calculator",
+    srcs = ["detections_to_timed_box_list_calculator.cc"],
+    visibility = ["//visibility:public"],
+    deps = [
+        "//mediapipe/framework:calculator_framework",
+        "//mediapipe/framework/formats:detection_cc_proto",
+        "//mediapipe/framework/formats:location_data_cc_proto",
+        "//mediapipe/framework/port:ret_check",
+        "//mediapipe/framework/port:status",
+        "//mediapipe/util/tracking:box_tracker",
+    ],
+    alwayslink = 1,
+)
+
+cc_library(
+    name = "detection_unique_id_calculator",
+    srcs = ["detection_unique_id_calculator.cc"],
+    visibility = ["//visibility:public"],
+    deps = [
+        "//mediapipe/framework:calculator_framework",
+        "//mediapipe/framework/formats:detection_cc_proto",
+        "//mediapipe/framework/port:ret_check",
+        "//mediapipe/framework/port:status",
+    ],
+    alwayslink = 1,
+)
--- a/mediapipe/calculators/util/detection_unique_id_calculator.cc
+++ b/mediapipe/calculators/util/detection_unique_id_calculator.cc
@ -0,0 +1,110 @@
+// Copyright 2019 The MediaPipe Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "mediapipe/framework/calculator_framework.h"
+#include "mediapipe/framework/formats/detection.pb.h"
+#include "mediapipe/framework/port/status.h"
+
+namespace mediapipe {
+
+namespace {
+
+constexpr char kDetectionsTag[] = "DETECTIONS";
+constexpr char kDetectionListTag[] = "DETECTION_LIST";
+
+// Each detection processed by DetectionUniqueIDCalculator will be assigned an
+// unique id that starts from 1. If a detection already has an ID other than 0,
+// the ID will be overwritten.
+static int64 detection_id = 0;
+
+inline int GetNextDetectionId() { return ++detection_id; }
+
+}  // namespace
+
+// Assign a unique id to detections.
+// Note that the calculator will consume the input vector of Detection or
+// DetectionList. So the input stream can not be connected to other calculators.
+//
+// Example config:
+// node {
+//   calculator: "DetectionUniqueIdCalculator"
+//   input_stream: "DETECTIONS:detections"
+//   output_stream: "DETECTIONS:output_detections"
+// }
+class DetectionUniqueIdCalculator : public CalculatorBase {
+ public:
+  static ::mediapipe::Status GetContract(CalculatorContract* cc) {
+    RET_CHECK(cc->Inputs().HasTag(kDetectionListTag) ||
+              cc->Inputs().HasTag(kDetectionsTag))
+        << "None of the input streams are provided.";
+
+    if (cc->Inputs().HasTag(kDetectionListTag)) {
+      RET_CHECK(cc->Outputs().HasTag(kDetectionListTag));
+      cc->Inputs().Tag(kDetectionListTag).Set<DetectionList>();
+      cc->Outputs().Tag(kDetectionListTag).Set<DetectionList>();
+    }
+    if (cc->Inputs().HasTag(kDetectionsTag)) {
+      RET_CHECK(cc->Outputs().HasTag(kDetectionsTag));
+      cc->Inputs().Tag(kDetectionsTag).Set<std::vector<Detection>>();
+      cc->Outputs().Tag(kDetectionsTag).Set<std::vector<Detection>>();
+    }
+
+    return ::mediapipe::OkStatus();
+  }
+
+  ::mediapipe::Status Open(CalculatorContext* cc) override {
+    cc->SetOffset(::mediapipe::TimestampDiff(0));
+    return ::mediapipe::OkStatus();
+  }
+  ::mediapipe::Status Process(CalculatorContext* cc) override;
+};
+REGISTER_CALCULATOR(DetectionUniqueIdCalculator);
+
+::mediapipe::Status DetectionUniqueIdCalculator::Process(
+    CalculatorContext* cc) {
+  if (cc->Inputs().HasTag(kDetectionListTag) &&
+      !cc->Inputs().Tag(kDetectionListTag).IsEmpty()) {
+    auto result =
+        cc->Inputs().Tag(kDetectionListTag).Value().Consume<DetectionList>();
+    if (result.ok()) {
+      auto detection_list = std::move(result).ValueOrDie();
+      for (Detection& detection : *detection_list->mutable_detection()) {
+        detection.set_detection_id(GetNextDetectionId());
+      }
+      cc->Outputs()
+          .Tag(kDetectionListTag)
+          .Add(detection_list.release(), cc->InputTimestamp());
+    }
+  }
+
+  if (cc->Inputs().HasTag(kDetectionsTag) &&
+      !cc->Inputs().Tag(kDetectionsTag).IsEmpty()) {
+    auto result = cc->Inputs()
+                      .Tag(kDetectionsTag)
+                      .Value()
+                      .Consume<std::vector<Detection>>();
+    if (result.ok()) {
+      auto detections = std::move(result).ValueOrDie();
+      for (Detection& detection : *detections) {
+        detection.set_detection_id(GetNextDetectionId());
+      }
+      cc->Outputs()
+          .Tag(kDetectionsTag)
+          .Add(detections.release(), cc->InputTimestamp());
+    }
+  }
+  return ::mediapipe::OkStatus();
+}
+
+}  // namespace mediapipe
--- a/mediapipe/calculators/util/detections_to_render_data_calculator.cc
+++ b/mediapipe/calculators/util/detections_to_render_data_calculator.cc
@ -39,6 +39,8 @@ constexpr char kKeypointLabel[] = "KEYPOINT";
 // The ratio of detection label font height to the height of detection bounding
 // box.
 constexpr double kLabelToBoundingBoxRatio = 0.1;
+// Perserve 2 decimal digits.
+constexpr float kNumScoreDecimalDigitsMultipler = 100;

 }  // namespace

@ -235,18 +237,26 @@ void DetectionsToRenderDataCalculator::AddLabels(
    std::string label_str = detection.label().empty()
                                ? absl::StrCat(detection.label_id(i))
                                : detection.label(i);
+    const float rounded_score =
+        std::round(detection.score(i) * kNumScoreDecimalDigitsMultipler) /
+        kNumScoreDecimalDigitsMultipler;
    std::string label_and_score =
-        absl::StrCat(label_str, options.text_delimiter(), detection.score(i),
+        absl::StrCat(label_str, options.text_delimiter(), rounded_score,
                     options.text_delimiter());
    label_and_scores.push_back(label_and_score);
  }
  std::vector<std::string> labels;
+  if (options.render_detection_id()) {
+    const std::string detection_id_str =
+        absl::StrCat("Id: ", detection.detection_id());
+    labels.push_back(detection_id_str);
+  }
  if (options.one_label_per_line()) {
-    labels.swap(label_and_scores);
+    labels.insert(labels.end(), label_and_scores.begin(),
+                  label_and_scores.end());
  } else {
    labels.push_back(absl::StrJoin(label_and_scores, ""));
  }
-
  // Add the render annotations for "label(_id),score".
  for (int i = 0; i < labels.size(); ++i) {
    auto label = labels.at(i);
--- a/mediapipe/calculators/util/detections_to_render_data_calculator.proto
+++ b/mediapipe/calculators/util/detections_to_render_data_calculator.proto
@ -53,4 +53,7 @@ message DetectionsToRenderDataCalculatorOptions {
  // instances of this calculator are present in the graph, this value
  // should be unique among them.
  optional string scene_class = 7 [default = "DETECTION"];
+
+  // If true, renders the detection id in the first line before the labels.
+  optional bool render_detection_id = 8 [default = false];
 }
--- a/mediapipe/calculators/util/detections_to_timed_box_list_calculator.cc
+++ b/mediapipe/calculators/util/detections_to_timed_box_list_calculator.cc
@ -0,0 +1,110 @@
+// Copyright 2019 The MediaPipe Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "mediapipe/framework/calculator_framework.h"
+#include "mediapipe/framework/formats/detection.pb.h"
+#include "mediapipe/framework/formats/location_data.pb.h"
+#include "mediapipe/framework/port/status.h"
+#include "mediapipe/util/tracking/box_tracker.h"
+
+namespace mediapipe {
+
+namespace {
+
+constexpr char kDetectionsTag[] = "DETECTIONS";
+constexpr char kDetectionListTag[] = "DETECTION_LIST";
+constexpr char kBoxesTag[] = "BOXES";
+
+}  // namespace
+
+// A calculator that converts Detection proto to TimedBoxList proto for
+// tracking.
+//
+// Please note that only Location Data formats of RELATIVE_BOUNDING_BOX are
+// supported.
+//
+// Example config:
+// node {
+//   calculator: "DetectionsToTimedBoxListCalculator"
+//   input_stream: "DETECTIONS:detections"
+//   output_stream: "BOXES:boxes"
+// }
+class DetectionsToTimedBoxListCalculator : public CalculatorBase {
+ public:
+  static ::mediapipe::Status GetContract(CalculatorContract* cc) {
+    RET_CHECK(cc->Inputs().HasTag(kDetectionListTag) ||
+              cc->Inputs().HasTag(kDetectionsTag))
+        << "None of the input streams are provided.";
+    if (cc->Inputs().HasTag(kDetectionListTag)) {
+      cc->Inputs().Tag(kDetectionListTag).Set<DetectionList>();
+    }
+    if (cc->Inputs().HasTag(kDetectionsTag)) {
+      cc->Inputs().Tag(kDetectionsTag).Set<std::vector<Detection>>();
+    }
+    cc->Outputs().Tag(kBoxesTag).Set<TimedBoxProtoList>();
+    return ::mediapipe::OkStatus();
+  }
+
+  ::mediapipe::Status Open(CalculatorContext* cc) override {
+    cc->SetOffset(TimestampDiff(0));
+    return ::mediapipe::OkStatus();
+  }
+  ::mediapipe::Status Process(CalculatorContext* cc) override;
+
+ private:
+  void ConvertDetectionToTimedBox(const Detection& detection,
+                                  TimedBoxProto* box, CalculatorContext* cc);
+};
+REGISTER_CALCULATOR(DetectionsToTimedBoxListCalculator);
+
+::mediapipe::Status DetectionsToTimedBoxListCalculator::Process(
+    CalculatorContext* cc) {
+  auto output_timed_box_list = absl::make_unique<TimedBoxProtoList>();
+
+  if (cc->Inputs().HasTag(kDetectionListTag)) {
+    const auto& detection_list =
+        cc->Inputs().Tag(kDetectionListTag).Get<DetectionList>();
+    for (const auto& detection : detection_list.detection()) {
+      TimedBoxProto* box = output_timed_box_list->add_box();
+      ConvertDetectionToTimedBox(detection, box, cc);
+    }
+  }
+  if (cc->Inputs().HasTag(kDetectionsTag)) {
+    const auto& detections =
+        cc->Inputs().Tag(kDetectionsTag).Get<std::vector<Detection>>();
+    for (const auto& detection : detections) {
+      TimedBoxProto* box = output_timed_box_list->add_box();
+      ConvertDetectionToTimedBox(detection, box, cc);
+    }
+  }
+
+  cc->Outputs().Tag(kBoxesTag).Add(output_timed_box_list.release(),
+                                   cc->InputTimestamp());
+  return ::mediapipe::OkStatus();
+}
+
+void DetectionsToTimedBoxListCalculator::ConvertDetectionToTimedBox(
+    const Detection& detection, TimedBoxProto* box, CalculatorContext* cc) {
+  const auto& relative_bounding_box =
+      detection.location_data().relative_bounding_box();
+  box->set_left(relative_bounding_box.xmin());
+  box->set_right(relative_bounding_box.xmin() + relative_bounding_box.width());
+  box->set_top(relative_bounding_box.ymin());
+  box->set_bottom(relative_bounding_box.ymin() +
+                  relative_bounding_box.height());
+  box->set_id(detection.detection_id());
+  box->set_time_msec(cc->InputTimestamp().Microseconds() / 1000);
+}
+
+}  // namespace mediapipe
--- a/mediapipe/calculators/video/BUILD
+++ b/mediapipe/calculators/video/BUILD
@ -37,6 +37,67 @@ proto_library(
    deps = ["//mediapipe/framework:calculator_proto"],
 )

+proto_library(
+    name = "motion_analysis_calculator_proto",
+    srcs = ["motion_analysis_calculator.proto"],
+    deps = [
+        "//mediapipe/framework:calculator_proto",
+        "//mediapipe/util/tracking:motion_analysis_proto",
+    ],
+)
+
+proto_library(
+    name = "flow_packager_calculator_proto",
+    srcs = ["flow_packager_calculator.proto"],
+    deps = [
+        "//mediapipe/framework:calculator_proto",
+        "//mediapipe/util/tracking:flow_packager_proto",
+    ],
+)
+
+proto_library(
+    name = "box_tracker_calculator_proto",
+    srcs = ["box_tracker_calculator.proto"],
+    visibility = ["//visibility:public"],
+    deps = [
+        "//mediapipe/framework:calculator_proto",
+        "//mediapipe/util/tracking:box_tracker_proto",
+    ],
+)
+
+mediapipe_cc_proto_library(
+    name = "motion_analysis_calculator_cc_proto",
+    srcs = ["motion_analysis_calculator.proto"],
+    cc_deps = [
+        "//mediapipe/framework:calculator_cc_proto",
+        "//mediapipe/util/tracking:motion_analysis_cc_proto",
+    ],
+    visibility = ["//visibility:public"],
+    deps = [":motion_analysis_calculator_proto"],
+)
+
+mediapipe_cc_proto_library(
+    name = "flow_packager_calculator_cc_proto",
+    srcs = ["flow_packager_calculator.proto"],
+    cc_deps = [
+        "//mediapipe/framework:calculator_cc_proto",
+        "//mediapipe/util/tracking:flow_packager_cc_proto",
+    ],
+    visibility = ["//visibility:public"],
+    deps = [":flow_packager_calculator_proto"],
+)
+
+mediapipe_cc_proto_library(
+    name = "box_tracker_calculator_cc_proto",
+    srcs = ["box_tracker_calculator.proto"],
+    cc_deps = [
+        "//mediapipe/framework:calculator_cc_proto",
+        "//mediapipe/util/tracking:box_tracker_cc_proto",
+    ],
+    visibility = ["//visibility:public"],
+    deps = [":box_tracker_calculator_proto"],
+)
+
 mediapipe_cc_proto_library(
    name = "flow_to_image_calculator_cc_proto",
    srcs = ["flow_to_image_calculator.proto"],
@ -131,6 +192,94 @@ cc_library(
    alwayslink = 1,
 )

+cc_library(
+    name = "motion_analysis_calculator",
+    srcs = ["motion_analysis_calculator.cc"],
+    visibility = ["//visibility:public"],
+    deps = [
+        ":motion_analysis_calculator_cc_proto",
+        "//mediapipe/framework:calculator_framework",
+        "//mediapipe/framework/formats:image_frame",
+        "//mediapipe/framework/formats:image_frame_opencv",
+        "//mediapipe/framework/formats:video_stream_header",
+        "//mediapipe/framework/port:integral_types",
+        "//mediapipe/framework/port:logging",
+        "//mediapipe/framework/port:ret_check",
+        "//mediapipe/framework/port:status",
+        "//mediapipe/util/tracking:camera_motion",
+        "//mediapipe/util/tracking:camera_motion_cc_proto",
+        "//mediapipe/util/tracking:frame_selection_cc_proto",
+        "//mediapipe/util/tracking:motion_analysis",
+        "//mediapipe/util/tracking:motion_estimation",
+        "//mediapipe/util/tracking:motion_models",
+        "//mediapipe/util/tracking:region_flow_cc_proto",
+        "@com_google_absl//absl/strings",
+    ],
+    alwayslink = 1,
+)
+
+cc_library(
+    name = "flow_packager_calculator",
+    srcs = ["flow_packager_calculator.cc"],
+    visibility = ["//visibility:public"],
+    deps = [
+        ":flow_packager_calculator_cc_proto",
+        "//mediapipe/framework:calculator_framework",
+        "//mediapipe/framework/port:integral_types",
+        "//mediapipe/framework/port:logging",
+        "//mediapipe/util/tracking:camera_motion_cc_proto",
+        "//mediapipe/util/tracking:flow_packager",
+        "//mediapipe/util/tracking:region_flow_cc_proto",
+        "@com_google_absl//absl/strings",
+        "@com_google_absl//absl/strings:str_format",
+    ],
+    alwayslink = 1,
+)
+
+cc_library(
+    name = "box_tracker_calculator",
+    srcs = ["box_tracker_calculator.cc"],
+    visibility = ["//visibility:public"],
+    deps = [
+        ":box_tracker_calculator_cc_proto",
+        "//mediapipe/framework:calculator_framework",
+        "//mediapipe/framework/formats:image_frame",
+        "//mediapipe/framework/formats:image_frame_opencv",
+        "//mediapipe/framework/formats:video_stream_header",  # fixdeps: keep -- required for exobazel build.
+        "//mediapipe/framework/port:integral_types",
+        "//mediapipe/framework/port:logging",
+        "//mediapipe/framework/port:parse_text_proto",
+        "//mediapipe/framework/port:ret_check",
+        "//mediapipe/framework/port:status",
+        "//mediapipe/framework/tool:options_util",
+        "//mediapipe/util/tracking",
+        "//mediapipe/util/tracking:box_tracker",
+        "//mediapipe/util/tracking:tracking_visualization_utilities",
+        "@com_google_absl//absl/strings",
+    ],
+    alwayslink = 1,
+)
+
+cc_library(
+    name = "tracked_detection_manager_calculator",
+    srcs = ["tracked_detection_manager_calculator.cc"],
+    visibility = ["//visibility:public"],
+    deps = [
+        "//mediapipe/framework:calculator_framework",
+        "//mediapipe/framework/formats:detection_cc_proto",
+        "//mediapipe/framework/formats:location_data_cc_proto",
+        "//mediapipe/framework/formats:rect_cc_proto",
+        "//mediapipe/framework/port:status",
+        "//mediapipe/util/tracking",
+        "//mediapipe/util/tracking:box_tracker",
+        "//mediapipe/util/tracking:tracked_detection",
+        "//mediapipe/util/tracking:tracked_detection_manager",
+        "//mediapipe/util/tracking:tracking_visualization_utilities",
+        "@com_google_absl//absl/container:node_hash_map",
+    ],
+    alwayslink = 1,
+)
+
 filegroup(
    name = "test_videos",
    srcs = [
@ -201,3 +350,64 @@ cc_test(
        "//mediapipe/framework/port:parse_text_proto",
    ],
 )
+
+MEDIAPIPE_DEPS = [
+    "//mediapipe/calculators/video:box_tracker_calculator",
+    "//mediapipe/calculators/video:flow_packager_calculator",
+    "//mediapipe/calculators/video:motion_analysis_calculator",
+    "//mediapipe/framework/stream_handler:fixed_size_input_stream_handler",
+    "//mediapipe/framework/stream_handler:sync_set_input_stream_handler",
+]
+
+mediapipe_binary_graph(
+    name = "parallel_tracker_binarypb",
+    graph = "testdata/parallel_tracker_graph.pbtxt",
+    output_name = "testdata/parallel_tracker.binarypb",
+    visibility = ["//visibility:public"],
+    deps = MEDIAPIPE_DEPS,
+)
+
+mediapipe_binary_graph(
+    name = "tracker_binarypb",
+    graph = "testdata/tracker_graph.pbtxt",
+    output_name = "testdata/tracker.binarypb",
+    visibility = ["//visibility:public"],
+    deps = MEDIAPIPE_DEPS,
+)
+
+cc_test(
+    name = "tracking_graph_test",
+    size = "small",
+    srcs = ["tracking_graph_test.cc"],
+    copts = ["-DPARALLEL_INVOKER_ACTIVE"] + select({
+        "//mediapipe:apple": [],
+        "//mediapipe:android": [],
+        "//conditions:default": [],
+    }),
+    data = [
+        ":testdata/lenna.png",
+        ":testdata/parallel_tracker.binarypb",
+        ":testdata/tracker.binarypb",
+    ],
+    deps = [
+        ":box_tracker_calculator",
+        ":box_tracker_calculator_cc_proto",
+        ":flow_packager_calculator",
+        ":motion_analysis_calculator",
+        "//mediapipe/framework:calculator_cc_proto",
+        "//mediapipe/framework:calculator_framework",
+        "//mediapipe/framework:packet",
+        "//mediapipe/framework/deps:file_path",
+        "//mediapipe/framework/formats:image_frame",
+        "//mediapipe/framework/port:advanced_proto",
+        "//mediapipe/framework/port:core_proto",
+        "//mediapipe/framework/port:file_helpers",
+        "//mediapipe/framework/port:gtest_main",
+        "//mediapipe/framework/port:opencv_highgui",
+        "//mediapipe/framework/port:status",
+        "//mediapipe/framework/stream_handler:fixed_size_input_stream_handler",
+        "//mediapipe/framework/stream_handler:sync_set_input_stream_handler",
+        "//mediapipe/util/tracking:box_tracker_cc_proto",
+        "//mediapipe/util/tracking:tracking_cc_proto",
+    ],
+)
--- a/mediapipe/calculators/video/box_tracker_calculator.cc
+++ b/mediapipe/calculators/video/box_tracker_calculator.cc
--- a/mediapipe/calculators/video/box_tracker_calculator.proto
+++ b/mediapipe/calculators/video/box_tracker_calculator.proto
@ -0,0 +1,55 @@
+// Copyright 2019 The MediaPipe Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+syntax = "proto2";
+
+package mediapipe;
+
+import "mediapipe/framework/calculator.proto";
+import "mediapipe/util/tracking/box_tracker.proto";
+
+message BoxTrackerCalculatorOptions {
+  extend CalculatorOptions {
+    optional BoxTrackerCalculatorOptions ext = 268767860;
+  }
+
+  optional BoxTrackerOptions tracker_options = 1;
+
+  // Initial position to be tracked. Can also be supplied as side packet or
+  // as input stream.
+  optional TimedBoxProtoList initial_position = 2;
+
+  // If set and VIZ stream is present, renders tracking data into the
+  // visualization.
+  optional bool visualize_tracking_data = 3 [default = false];
+
+  // If set and VIZ stream is present, renders the box state
+  // into the visualization.
+  optional bool visualize_state = 4 [default = false];
+
+  // If set and VIZ stream is present, renders the internal box state
+  // into the visualization.
+  optional bool visualize_internal_state = 5 [default = false];
+
+  // Size of the track data cache during streaming mode. This allows to buffer
+  // track_data's for fast forward tracking, i.e. any TimedBox received
+  // via input stream START_POS can be tracked towards the current track head
+  // (i.e. last received TrackingData). Measured in number of frames.
+  optional int32 streaming_track_data_cache_size = 6 [default = 0];
+
+  // Add a transition period of N frames to smooth the jump from original
+  // tracking to reset start pos with motion compensation. The transition will
+  // be a linear decay of original tracking result. 0 means no transition.
+  optional int32 start_pos_transition_frames = 7 [default = 0];
+}
--- a/mediapipe/calculators/video/flow_packager_calculator.cc
+++ b/mediapipe/calculators/video/flow_packager_calculator.cc
@ -0,0 +1,281 @@
+// Copyright 2019 The MediaPipe Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <stdio.h>
+
+#include <fstream>
+#include <memory>
+
+#include "absl/strings/str_format.h"
+#include "absl/strings/string_view.h"
+#include "mediapipe/calculators/video/flow_packager_calculator.pb.h"
+#include "mediapipe/framework/calculator_framework.h"
+#include "mediapipe/framework/port/integral_types.h"
+#include "mediapipe/framework/port/logging.h"
+#include "mediapipe/util/tracking/camera_motion.pb.h"
+#include "mediapipe/util/tracking/flow_packager.h"
+#include "mediapipe/util/tracking/region_flow.pb.h"
+
+namespace mediapipe {
+
+using mediapipe::CameraMotion;
+using mediapipe::FlowPackager;
+using mediapipe::RegionFlowFeatureList;
+using mediapipe::TrackingData;
+using mediapipe::TrackingDataChunk;
+
+// A calculator that packages input CameraMotion and RegionFlowFeatureList
+// into a TrackingData and optionally writes TrackingDataChunks to file.
+//
+// Input stream:
+//   FLOW:       Input region flow (proto RegionFlowFeatureList).
+//   CAMERA:     Input camera stream (proto CameraMotion, optional).
+//
+// Input side packets:
+//   CACHE_DIR:  Optional caching directory tracking files are written to.
+//
+// Output streams.
+//   TRACKING:       Output tracking data (proto TrackingData, per frame
+//                   optional).
+//   TRACKING_CHUNK: Output tracking chunks (proto TrackingDataChunk,
+//                   per chunk, optional), output at the first timestamp
+//                   of each chunk.
+//   COMPLETE:       Optional output packet sent on PreStream to
+//                   to signal downstream calculators that all data has been
+//                   processed and calculator is closed. Can be used to indicate
+//                   that all data as been written to CACHE_DIR.
+class FlowPackagerCalculator : public CalculatorBase {
+ public:
+  ~FlowPackagerCalculator() override = default;
+
+  static ::mediapipe::Status GetContract(CalculatorContract* cc);
+
+  ::mediapipe::Status Open(CalculatorContext* cc) override;
+  ::mediapipe::Status Process(CalculatorContext* cc) override;
+  ::mediapipe::Status Close(CalculatorContext* cc) override;
+
+  // Writes passed chunk to disk.
+  void WriteChunk(const TrackingDataChunk& chunk) const;
+
+  // Initializes next chunk for tracking beginning from last frame of
+  // current chunk (Chunking is design with one frame overlap).
+  void PrepareCurrentForNextChunk(TrackingDataChunk* chunk);
+
+ private:
+  FlowPackagerCalculatorOptions options_;
+
+  // Caching options.
+  bool use_caching_ = false;
+  bool build_chunk_ = false;
+  std::string cache_dir_;
+  int chunk_idx_ = -1;
+  TrackingDataChunk tracking_chunk_;
+
+  int frame_idx_ = 0;
+
+  Timestamp prev_timestamp_;
+  std::unique_ptr<FlowPackager> flow_packager_;
+};
+
+REGISTER_CALCULATOR(FlowPackagerCalculator);
+
+::mediapipe::Status FlowPackagerCalculator::GetContract(
+    CalculatorContract* cc) {
+  if (!cc->Inputs().HasTag("FLOW")) {
+    return tool::StatusFail("No input flow was specified.");
+  }
+
+  cc->Inputs().Tag("FLOW").Set<RegionFlowFeatureList>();
+
+  if (cc->Inputs().HasTag("CAMERA")) {
+    cc->Inputs().Tag("CAMERA").Set<CameraMotion>();
+  }
+  if (cc->Outputs().HasTag("TRACKING")) {
+    cc->Outputs().Tag("TRACKING").Set<TrackingData>();
+  }
+  if (cc->Outputs().HasTag("TRACKING_CHUNK")) {
+    cc->Outputs().Tag("TRACKING_CHUNK").Set<TrackingDataChunk>();
+  }
+  if (cc->Outputs().HasTag("COMPLETE")) {
+    cc->Outputs().Tag("COMPLETE").Set<bool>();
+  }
+
+  if (cc->InputSidePackets().HasTag("CACHE_DIR")) {
+    cc->InputSidePackets().Tag("CACHE_DIR").Set<std::string>();
+  }
+
+  return ::mediapipe::OkStatus();
+}
+
+::mediapipe::Status FlowPackagerCalculator::Open(CalculatorContext* cc) {
+  options_ = cc->Options<FlowPackagerCalculatorOptions>();
+
+  flow_packager_.reset(new FlowPackager(options_.flow_packager_options()));
+
+  use_caching_ = cc->InputSidePackets().HasTag("CACHE_DIR");
+  build_chunk_ = use_caching_ || cc->Outputs().HasTag("TRACKING_CHUNK");
+  if (use_caching_) {
+    cache_dir_ = cc->InputSidePackets().Tag("CACHE_DIR").Get<std::string>();
+  }
+
+  return ::mediapipe::OkStatus();
+}
+
+::mediapipe::Status FlowPackagerCalculator::Process(CalculatorContext* cc) {
+  InputStream* flow_stream = &(cc->Inputs().Tag("FLOW"));
+  const RegionFlowFeatureList& flow = flow_stream->Get<RegionFlowFeatureList>();
+
+  const Timestamp timestamp = flow_stream->Value().Timestamp();
+
+  const CameraMotion* camera_motion = nullptr;
+  if (cc->Inputs().HasTag("CAMERA")) {
+    InputStream* camera_stream = &(cc->Inputs().Tag("CAMERA"));
+    camera_motion = &camera_stream->Get<CameraMotion>();
+  }
+
+  std::unique_ptr<TrackingData> tracking_data(new TrackingData());
+
+  flow_packager_->PackFlow(flow, camera_motion, tracking_data.get());
+
+  if (build_chunk_) {
+    if (chunk_idx_ < 0) {  // Lazy init, determine first start.
+      chunk_idx_ =
+          timestamp.Value() / 1000 / options_.caching_chunk_size_msec();
+      tracking_chunk_.set_first_chunk(true);
+    }
+    CHECK_GE(chunk_idx_, 0);
+
+    TrackingDataChunk::Item* item = tracking_chunk_.add_item();
+    item->set_frame_idx(frame_idx_);
+    item->set_timestamp_usec(timestamp.Value());
+    if (frame_idx_ > 0) {
+      item->set_prev_timestamp_usec(prev_timestamp_.Value());
+    }
+    if (cc->Outputs().HasTag("TRACKING")) {
+      // Need to copy as output is requested.
+      *item->mutable_tracking_data() = *tracking_data;
+    } else {
+      item->mutable_tracking_data()->Swap(tracking_data.get());
+    }
+
+    const int next_chunk_msec =
+        options_.caching_chunk_size_msec() * (chunk_idx_ + 1);
+
+    if (timestamp.Value() / 1000 >= next_chunk_msec) {
+      if (cc->Outputs().HasTag("TRACKING_CHUNK")) {
+        cc->Outputs()
+            .Tag("TRACKING_CHUNK")
+            .Add(new TrackingDataChunk(tracking_chunk_),
+                 Timestamp(tracking_chunk_.item(0).timestamp_usec()));
+      }
+      if (use_caching_) {
+        WriteChunk(tracking_chunk_);
+      }
+      PrepareCurrentForNextChunk(&tracking_chunk_);
+    }
+  }
+
+  if (cc->Outputs().HasTag("TRACKING")) {
+    cc->Outputs()
+        .Tag("TRACKING")
+        .Add(tracking_data.release(), flow_stream->Value().Timestamp());
+  }
+
+  prev_timestamp_ = timestamp;
+  ++frame_idx_;
+  return ::mediapipe::OkStatus();
+}
+
+::mediapipe::Status FlowPackagerCalculator::Close(CalculatorContext* cc) {
+  if (frame_idx_ > 0) {
+    tracking_chunk_.set_last_chunk(true);
+    if (cc->Outputs().HasTag("TRACKING_CHUNK")) {
+      cc->Outputs()
+          .Tag("TRACKING_CHUNK")
+          .Add(new TrackingDataChunk(tracking_chunk_),
+               Timestamp(tracking_chunk_.item(0).timestamp_usec()));
+    }
+
+    if (use_caching_) {
+      WriteChunk(tracking_chunk_);
+    }
+  }
+
+  if (cc->Outputs().HasTag("COMPLETE")) {
+    cc->Outputs().Tag("COMPLETE").Add(new bool(true), Timestamp::PreStream());
+  }
+
+  return ::mediapipe::OkStatus();
+}
+
+void FlowPackagerCalculator::WriteChunk(const TrackingDataChunk& chunk) const {
+  if (chunk.item_size() == 0) {
+    LOG(ERROR) << "Write chunk called with empty tracking data."
+               << "This can only occur if the spacing between frames "
+               << "is larger than the requested chunk size. Try increasing "
+               << "the chunk size";
+    return;
+  }
+
+  auto format_runtime =
+      absl::ParsedFormat<'d'>::New(options_.cache_file_format());
+
+  std::string chunk_file;
+  if (format_runtime) {
+    chunk_file =
+        cache_dir_ + "/" + absl::StrFormat(*format_runtime, chunk_idx_);
+  } else {
+    LOG(ERROR) << "chache_file_format wrong. fall back to chunk_%04d.";
+    chunk_file = cache_dir_ + "/" + absl::StrFormat("chunk_%04d", chunk_idx_);
+  }
+
+  std::string data;
+  chunk.SerializeToString(&data);
+
+  const char* temp_filename = tempnam(cache_dir_.c_str(), nullptr);
+  std::ofstream out_file(temp_filename);
+  if (!out_file) {
+    LOG(ERROR) << "Could not open " << temp_filename;
+  } else {
+    out_file.write(data.data(), data.size());
+  }
+
+  if (rename(temp_filename, chunk_file.c_str()) != 0) {
+    LOG(ERROR) << "Failed to rename to " << chunk_file;
+  }
+
+  LOG(INFO) << "Wrote chunk : " << chunk_file;
+}
+
+void FlowPackagerCalculator::PrepareCurrentForNextChunk(
+    TrackingDataChunk* chunk) {
+  CHECK(chunk);
+  if (chunk->item_size() == 0) {
+    LOG(ERROR) << "Called with empty chunk. Unexpected.";
+    return;
+  }
+
+  chunk->set_first_chunk(false);
+
+  // Buffer last item for next chunk.
+  TrackingDataChunk::Item last_item;
+  last_item.Swap(chunk->mutable_item(chunk->item_size() - 1));
+
+  chunk->Clear();
+  chunk->add_item()->Swap(&last_item);
+
+  ++chunk_idx_;
+}
+
+}  // namespace mediapipe
--- a/mediapipe/calculators/video/flow_packager_calculator.proto
+++ b/mediapipe/calculators/video/flow_packager_calculator.proto
@ -0,0 +1,36 @@
+// Copyright 2019 The MediaPipe Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+syntax = "proto2";
+
+package mediapipe;
+
+import "mediapipe/framework/calculator.proto";
+import "mediapipe/util/tracking/flow_packager.proto";
+
+message FlowPackagerCalculatorOptions {
+  extend CalculatorOptions {
+    optional FlowPackagerCalculatorOptions ext = 271236147;
+  }
+
+  optional mediapipe.FlowPackagerOptions flow_packager_options = 1;
+
+  // Chunk size for caching files that are written to the externally specified
+  // caching directory. Specified in msec.
+  // Note that each chunk always contains at its end the first frame of the
+  // next chunk (to enable forward tracking across chunk boundaries).
+  optional int32 caching_chunk_size_msec = 2 [default = 2500];
+
+  optional string cache_file_format = 3 [default = "chunk_%04d"];
+}
--- a/mediapipe/calculators/video/motion_analysis_calculator.cc
+++ b/mediapipe/calculators/video/motion_analysis_calculator.cc
@ -0,0 +1,988 @@
+// Copyright 2019 The MediaPipe Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <cmath>
+#include <fstream>
+#include <memory>
+
+#include "absl/strings/numbers.h"
+#include "absl/strings/str_split.h"
+#include "absl/strings/string_view.h"
+#include "mediapipe/calculators/video/motion_analysis_calculator.pb.h"
+#include "mediapipe/framework/calculator_framework.h"
+#include "mediapipe/framework/formats/image_frame.h"
+#include "mediapipe/framework/formats/image_frame_opencv.h"
+#include "mediapipe/framework/formats/video_stream_header.h"
+#include "mediapipe/framework/port/integral_types.h"
+#include "mediapipe/framework/port/logging.h"
+#include "mediapipe/framework/port/ret_check.h"
+#include "mediapipe/framework/port/status.h"
+#include "mediapipe/util/tracking/camera_motion.h"
+#include "mediapipe/util/tracking/camera_motion.pb.h"
+#include "mediapipe/util/tracking/frame_selection.pb.h"
+#include "mediapipe/util/tracking/motion_analysis.h"
+#include "mediapipe/util/tracking/motion_estimation.h"
+#include "mediapipe/util/tracking/motion_models.h"
+#include "mediapipe/util/tracking/region_flow.pb.h"
+
+namespace mediapipe {
+
+using mediapipe::AffineAdapter;
+using mediapipe::CameraMotion;
+using mediapipe::FrameSelectionResult;
+using mediapipe::Homography;
+using mediapipe::HomographyAdapter;
+using mediapipe::LinearSimilarityModel;
+using mediapipe::MixtureHomography;
+using mediapipe::MixtureRowWeights;
+using mediapipe::MotionAnalysis;
+using mediapipe::ProjectViaFit;
+using mediapipe::RegionFlowComputationOptions;
+using mediapipe::RegionFlowFeatureList;
+using mediapipe::SalientPointFrame;
+using mediapipe::TranslationModel;
+
+const char kOptionsTag[] = "OPTIONS";
+
+// A calculator that performs motion analysis on an incoming video stream.
+//
+// Input streams:  (at least one of them is required).
+//   VIDEO:     The input video stream (ImageFrame, sRGB, sRGBA or GRAY8).
+//   SELECTION: Optional input stream to perform analysis only on selected
+//              frames. If present needs to contain camera motion
+//              and features.
+//
+// Input side packets:
+//   CSV_FILE:  Read motion models as homographies from CSV file. Expected
+//              to be defined in the frame domain (un-normalized).
+//              Should store 9 floats per row.
+//              Specify number of homographies per frames via option
+//              meta_models_per_frame. For values > 1, MixtureHomographies
+//              are created, for value == 1, a single Homography is used.
+//   DOWNSAMPLE: Optionally specify downsampling factor via input side packet
+//               overriding value in the graph settings.
+// Output streams (all are optional).
+//   FLOW:      Sparse feature tracks in form of proto RegionFlowFeatureList.
+//   CAMERA:    Camera motion as proto CameraMotion describing the per frame-
+//              pair motion. Has VideoHeader from input video.
+//   SALIENCY:  Foreground saliency (objects moving different from the
+//              background) as proto SalientPointFrame.
+//   VIZ:       Visualization stream as ImageFrame, sRGB, visualizing
+//              features and saliency (set via
+//              analysis_options().visualization_options())
+//   DENSE_FG:  Dense foreground stream, describing per-pixel foreground-
+//              ness as confidence between 0 (background) and 255
+//              (foreground). Output is ImageFrame (GRAY8).
+//   VIDEO_OUT: Optional output stream when SELECTION is used. Output is input
+//              VIDEO at the selected frames. Required VIDEO to be present.
+//   GRAY_VIDEO_OUT: Optional output stream for downsampled, grayscale video.
+//                   Requires VIDEO to be present and SELECTION to not be used.
+class MotionAnalysisCalculator : public CalculatorBase {
+  // TODO: Activate once leakr approval is ready.
+  // typedef com::google::android::libraries::micro::proto::Data HomographyData;
+
+ public:
+  ~MotionAnalysisCalculator() override = default;
+
+  static ::mediapipe::Status GetContract(CalculatorContract* cc);
+
+  ::mediapipe::Status Open(CalculatorContext* cc) override;
+  ::mediapipe::Status Process(CalculatorContext* cc) override;
+  ::mediapipe::Status Close(CalculatorContext* cc) override;
+
+ private:
+  // Outputs results to Outputs() if MotionAnalysis buffered sufficient results.
+  // Otherwise no-op. Set flush to true to force output of all buffered data.
+  void OutputMotionAnalyzedFrames(bool flush, CalculatorContext* cc);
+
+  // Lazy init function to be called on Process.
+  ::mediapipe::Status InitOnProcess(InputStream* video_stream,
+                                    InputStream* selection_stream);
+
+  // Parses CSV file contents to homographies.
+  bool ParseModelCSV(const std::string& contents,
+                     std::deque<Homography>* homographies);
+
+  // Turns list of 9-tuple floating values into set of homographies.
+  bool HomographiesFromValues(const std::vector<float>& homog_values,
+                              std::deque<Homography>* homographies);
+
+  // Appends CameraMotions and features from homographies.
+  // Set append_identity to true to add an identity transform to the beginning
+  // of the each list *in addition* to the motions derived from homographies.
+  void AppendCameraMotionsFromHomographies(
+      const std::deque<Homography>& homographies, bool append_identity,
+      std::deque<CameraMotion>* camera_motions,
+      std::deque<RegionFlowFeatureList>* features);
+
+  // Helper function to subtract current metadata motion from features. Used
+  // for hybrid estimation case.
+  void SubtractMetaMotion(const CameraMotion& meta_motion,
+                          RegionFlowFeatureList* features);
+
+  // Inverse of above function to add back meta motion and replace
+  // feature location with originals after estimation.
+  void AddMetaMotion(const CameraMotion& meta_motion,
+                     const RegionFlowFeatureList& meta_features,
+                     RegionFlowFeatureList* features, CameraMotion* motion);
+
+  MotionAnalysisCalculatorOptions options_;
+  int frame_width_ = -1;
+  int frame_height_ = -1;
+  int frame_idx_ = 0;
+
+  // Buffers incoming video frame packets (if visualization output is requested)
+  std::vector<Packet> packet_buffer_;
+
+  // Buffers incoming timestamps until MotionAnalysis is ready to output via
+  // above OutputMotionAnalyzedFrames.
+  std::vector<Timestamp> timestamp_buffer_;
+
+  // Input indicators for each stream.
+  bool selection_input_ = false;
+  bool video_input_ = false;
+
+  // Output indicators for each stream.
+  bool region_flow_feature_output_ = false;
+  bool camera_motion_output_ = false;
+  bool saliency_output_ = false;
+  bool visualize_output_ = false;
+  bool dense_foreground_output_ = false;
+  bool video_output_ = false;
+  bool grayscale_output_ = false;
+  bool csv_file_input_ = false;
+
+  // Inidicates if saliency should be computed.
+  bool with_saliency_ = false;
+
+  // Set if hybrid meta analysis - see proto for details.
+  bool hybrid_meta_analysis_ = false;
+
+  // Concatenated motions for each selected frame. Used in case
+  // hybrid estimation is requested to fallback to valid models.
+  std::deque<CameraMotion> selected_motions_;
+
+  // Normalized homographies from CSV file or metadata.
+  std::deque<Homography> meta_homographies_;
+  std::deque<CameraMotion> meta_motions_;
+  std::deque<RegionFlowFeatureList> meta_features_;
+
+  // Offset into above meta_motions_ and features_ when using
+  // hybrid meta analysis.
+  int hybrid_meta_offset_ = 0;
+
+  std::unique_ptr<MotionAnalysis> motion_analysis_;
+
+  std::unique_ptr<MixtureRowWeights> row_weights_;
+};
+
+REGISTER_CALCULATOR(MotionAnalysisCalculator);
+
+::mediapipe::Status MotionAnalysisCalculator::GetContract(
+    CalculatorContract* cc) {
+  if (cc->Inputs().HasTag("VIDEO")) {
+    cc->Inputs().Tag("VIDEO").Set<ImageFrame>();
+  }
+
+  // Optional input stream from frame selection calculator.
+  if (cc->Inputs().HasTag("SELECTION")) {
+    cc->Inputs().Tag("SELECTION").Set<FrameSelectionResult>();
+  }
+
+  RET_CHECK(cc->Inputs().HasTag("VIDEO") || cc->Inputs().HasTag("SELECTION"))
+      << "Either VIDEO, SELECTION must be specified.";
+
+  if (cc->Outputs().HasTag("FLOW")) {
+    cc->Outputs().Tag("FLOW").Set<RegionFlowFeatureList>();
+  }
+
+  if (cc->Outputs().HasTag("CAMERA")) {
+    cc->Outputs().Tag("CAMERA").Set<CameraMotion>();
+  }
+
+  if (cc->Outputs().HasTag("SALIENCY")) {
+    cc->Outputs().Tag("SALIENCY").Set<SalientPointFrame>();
+  }
+
+  if (cc->Outputs().HasTag("VIZ")) {
+    cc->Outputs().Tag("VIZ").Set<ImageFrame>();
+  }
+
+  if (cc->Outputs().HasTag("DENSE_FG")) {
+    cc->Outputs().Tag("DENSE_FG").Set<ImageFrame>();
+  }
+
+  if (cc->Outputs().HasTag("VIDEO_OUT")) {
+    cc->Outputs().Tag("VIDEO_OUT").Set<ImageFrame>();
+  }
+
+  if (cc->Outputs().HasTag("GRAY_VIDEO_OUT")) {
+    // We only output grayscale video if we're actually performing full region-
+    // flow analysis on the video.
+    RET_CHECK(cc->Inputs().HasTag("VIDEO") &&
+              !cc->Inputs().HasTag("SELECTION"));
+    cc->Outputs().Tag("GRAY_VIDEO_OUT").Set<ImageFrame>();
+  }
+
+  if (cc->InputSidePackets().HasTag("CSV_FILE")) {
+    cc->InputSidePackets().Tag("CSV_FILE").Set<std::string>();
+  }
+  if (cc->InputSidePackets().HasTag("DOWNSAMPLE")) {
+    cc->InputSidePackets().Tag("DOWNSAMPLE").Set<float>();
+  }
+
+  if (cc->InputSidePackets().HasTag(kOptionsTag)) {
+    cc->InputSidePackets().Tag(kOptionsTag).Set<CalculatorOptions>();
+  }
+
+  return ::mediapipe::OkStatus();
+}
+
+::mediapipe::Status MotionAnalysisCalculator::Open(CalculatorContext* cc) {
+  options_ =
+      tool::RetrieveOptions(cc->Options<MotionAnalysisCalculatorOptions>(),
+                            cc->InputSidePackets(), kOptionsTag);
+
+  video_input_ = cc->Inputs().HasTag("VIDEO");
+  selection_input_ = cc->Inputs().HasTag("SELECTION");
+  region_flow_feature_output_ = cc->Outputs().HasTag("FLOW");
+  camera_motion_output_ = cc->Outputs().HasTag("CAMERA");
+  saliency_output_ = cc->Outputs().HasTag("SALIENCY");
+  visualize_output_ = cc->Outputs().HasTag("VIZ");
+  dense_foreground_output_ = cc->Outputs().HasTag("DENSE_FG");
+  video_output_ = cc->Outputs().HasTag("VIDEO_OUT");
+  grayscale_output_ = cc->Outputs().HasTag("GRAY_VIDEO_OUT");
+  csv_file_input_ = cc->InputSidePackets().HasTag("CSV_FILE");
+  hybrid_meta_analysis_ = options_.meta_analysis() ==
+                          MotionAnalysisCalculatorOptions::META_ANALYSIS_HYBRID;
+
+  if (video_output_) {
+    RET_CHECK(selection_input_) << "VIDEO_OUT requires SELECTION input";
+  }
+
+  if (selection_input_) {
+    switch (options_.selection_analysis()) {
+      case MotionAnalysisCalculatorOptions::NO_ANALYSIS_USE_SELECTION:
+        RET_CHECK(!visualize_output_)
+            << "Visualization not supported for NO_ANALYSIS_USE_SELECTION";
+        RET_CHECK(!dense_foreground_output_)
+            << "Dense foreground not supported for NO_ANALYSIS_USE_SELECTION";
+        RET_CHECK(!saliency_output_)
+            << "Saliency output not supported for NO_ANALYSIS_USE_SELECTION";
+        break;
+
+      case MotionAnalysisCalculatorOptions::ANALYSIS_RECOMPUTE:
+      case MotionAnalysisCalculatorOptions::ANALYSIS_WITH_SEED:
+        RET_CHECK(video_input_) << "Need video input for feature tracking.";
+        break;
+
+      case MotionAnalysisCalculatorOptions::ANALYSIS_FROM_FEATURES:
+        // Nothing to add here.
+        break;
+    }
+  }
+
+  if (visualize_output_ || dense_foreground_output_ || video_output_) {
+    RET_CHECK(video_input_) << "Video input required.";
+  }
+
+  if (csv_file_input_) {
+    RET_CHECK(!selection_input_)
+        << "Can not use selection input with csv input.";
+    if (!hybrid_meta_analysis_) {
+      RET_CHECK(!saliency_output_ && !visualize_output_ &&
+                !dense_foreground_output_ && !grayscale_output_)
+          << "CSV file and meta input only supports flow and camera motion "
+          << "output when using metadata only.";
+    }
+  }
+
+  if (csv_file_input_) {
+    // Read from file and parse.
+    const std::string filename =
+        cc->InputSidePackets().Tag("CSV_FILE").Get<std::string>();
+
+    std::string file_contents;
+    std::ifstream input_file(filename, std::ios::in);
+    input_file.seekg(0, std::ios::end);
+    const int file_length = input_file.tellg();
+    file_contents.resize(file_length);
+    input_file.seekg(0, std::ios::beg);
+    input_file.read(&file_contents[0], file_length);
+    input_file.close();
+
+    RET_CHECK(ParseModelCSV(file_contents, &meta_homographies_))
+        << "Could not parse CSV file";
+  }
+
+  // Get video header from video or selection input if present.
+  const VideoHeader* video_header = nullptr;
+  if (video_input_ && !cc->Inputs().Tag("VIDEO").Header().IsEmpty()) {
+    video_header = &(cc->Inputs().Tag("VIDEO").Header().Get<VideoHeader>());
+  } else if (selection_input_ &&
+             !cc->Inputs().Tag("SELECTION").Header().IsEmpty()) {
+    video_header = &(cc->Inputs().Tag("SELECTION").Header().Get<VideoHeader>());
+  } else {
+    LOG(WARNING) << "No input video header found. Downstream calculators "
+                    "expecting video headers are likely to fail.";
+  }
+
+  with_saliency_ = options_.analysis_options().compute_motion_saliency();
+  // Force computation of saliency if requested as output.
+  if (cc->Outputs().HasTag("SALIENCY")) {
+    with_saliency_ = true;
+    if (!options_.analysis_options().compute_motion_saliency()) {
+      LOG(WARNING) << "Enable saliency computation. Set "
+                   << "compute_motion_saliency to true to silence this "
+                   << "warning.";
+      options_.mutable_analysis_options()->set_compute_motion_saliency(true);
+    }
+  }
+
+  if (options_.bypass_mode()) {
+    cc->SetOffset(TimestampDiff(0));
+  }
+
+  if (cc->InputSidePackets().HasTag("DOWNSAMPLE")) {
+    options_.mutable_analysis_options()
+        ->mutable_flow_options()
+        ->set_downsample_factor(
+            cc->InputSidePackets().Tag("DOWNSAMPLE").Get<float>());
+  }
+
+  // If no video header is provided, just return and initialize on the first
+  // Process() call.
+  if (video_header == nullptr) {
+    return ::mediapipe::OkStatus();
+  }
+
+  ////////////// EARLY RETURN; ONLY HEADER OUTPUT SHOULD GO HERE ///////////////
+
+  if (visualize_output_) {
+    cc->Outputs().Tag("VIZ").SetHeader(Adopt(new VideoHeader(*video_header)));
+  }
+
+  if (video_output_) {
+    cc->Outputs()
+        .Tag("VIDEO_OUT")
+        .SetHeader(Adopt(new VideoHeader(*video_header)));
+  }
+
+  if (cc->Outputs().HasTag("DENSE_FG")) {
+    std::unique_ptr<VideoHeader> foreground_header(
+        new VideoHeader(*video_header));
+    foreground_header->format = ImageFormat::GRAY8;
+    cc->Outputs().Tag("DENSE_FG").SetHeader(Adopt(foreground_header.release()));
+  }
+
+  if (cc->Outputs().HasTag("CAMERA")) {
+    cc->Outputs().Tag("CAMERA").SetHeader(
+        Adopt(new VideoHeader(*video_header)));
+  }
+
+  if (cc->Outputs().HasTag("SALIENCY")) {
+    cc->Outputs()
+        .Tag("SALIENCY")
+        .SetHeader(Adopt(new VideoHeader(*video_header)));
+  }
+
+  return ::mediapipe::OkStatus();
+}
+
+::mediapipe::Status MotionAnalysisCalculator::Process(CalculatorContext* cc) {
+  if (options_.bypass_mode()) {
+    return ::mediapipe::OkStatus();
+  }
+
+  InputStream* video_stream =
+      video_input_ ? &(cc->Inputs().Tag("VIDEO")) : nullptr;
+  InputStream* selection_stream =
+      selection_input_ ? &(cc->Inputs().Tag("SELECTION")) : nullptr;
+
+  // Checked on Open.
+  CHECK(video_stream || selection_stream);
+
+  // Lazy init.
+  if (frame_width_ < 0 || frame_height_ < 0) {
+    MP_RETURN_IF_ERROR(InitOnProcess(video_stream, selection_stream));
+  }
+
+  const Timestamp timestamp = cc->InputTimestamp();
+  if ((csv_file_input_) && !hybrid_meta_analysis_) {
+    if (camera_motion_output_) {
+      RET_CHECK(!meta_motions_.empty()) << "Insufficient metadata.";
+
+      CameraMotion output_motion = meta_motions_.front();
+      meta_motions_.pop_front();
+      output_motion.set_timestamp_usec(timestamp.Value());
+      cc->Outputs().Tag("CAMERA").Add(new CameraMotion(output_motion),
+                                      timestamp);
+    }
+
+    if (region_flow_feature_output_) {
+      RET_CHECK(!meta_features_.empty()) << "Insufficient frames in CSV file";
+      RegionFlowFeatureList output_features = meta_features_.front();
+      meta_features_.pop_front();
+
+      output_features.set_timestamp_usec(timestamp.Value());
+      cc->Outputs().Tag("FLOW").Add(new RegionFlowFeatureList(output_features),
+                                    timestamp);
+    }
+
+    ++frame_idx_;
+    return ::mediapipe::OkStatus();
+  }
+
+  if (motion_analysis_ == nullptr) {
+    // We do not need MotionAnalysis when using just metadata.
+    motion_analysis_.reset(new MotionAnalysis(options_.analysis_options(),
+                                              frame_width_, frame_height_));
+  }
+
+  std::unique_ptr<FrameSelectionResult> frame_selection_result;
+  // Always use frame if selection is not activated.
+  bool use_frame = !selection_input_;
+  if (selection_input_) {
+    CHECK(selection_stream);
+
+    // Fill in timestamps we process.
+    if (!selection_stream->Value().IsEmpty()) {
+      ASSIGN_OR_RETURN(
+          frame_selection_result,
+          selection_stream->Value().ConsumeOrCopy<FrameSelectionResult>());
+      use_frame = true;
+
+      // Make sure both features and camera motion are present.
+      RET_CHECK(frame_selection_result->has_camera_motion() &&
+                frame_selection_result->has_features())
+          << "Frame selection input error at: " << timestamp
+          << " both camera motion and features need to be "
+             "present in FrameSelectionResult. "
+          << frame_selection_result->has_camera_motion() << " , "
+          << frame_selection_result->has_features();
+    }
+  }
+
+  if (selection_input_ && use_frame &&
+      options_.selection_analysis() ==
+          MotionAnalysisCalculatorOptions::NO_ANALYSIS_USE_SELECTION) {
+    // Output concatenated results, nothing to compute here.
+    if (camera_motion_output_) {
+      cc->Outputs().Tag("CAMERA").Add(
+          frame_selection_result->release_camera_motion(), timestamp);
+    }
+    if (region_flow_feature_output_) {
+      cc->Outputs().Tag("FLOW").Add(frame_selection_result->release_features(),
+                                    timestamp);
+    }
+
+    if (video_output_) {
+      cc->Outputs().Tag("VIDEO_OUT").AddPacket(video_stream->Value());
+    }
+
+    return ::mediapipe::OkStatus();
+  }
+
+  if (use_frame) {
+    if (!selection_input_) {
+      const cv::Mat input_view =
+          formats::MatView(&video_stream->Get<ImageFrame>());
+      if (hybrid_meta_analysis_) {
+        // Seed with meta homography.
+        RET_CHECK(hybrid_meta_offset_ < meta_motions_.size())
+            << "Not enough metadata received for hybrid meta analysis";
+        Homography initial_transform =
+            meta_motions_[hybrid_meta_offset_].homography();
+        std::function<void(RegionFlowFeatureList*)> subtract_helper = std::bind(
+            &MotionAnalysisCalculator::SubtractMetaMotion, this,
+            meta_motions_[hybrid_meta_offset_], std::placeholders::_1);
+
+        // Keep original features before modification around.
+        motion_analysis_->AddFrameGeneric(
+            input_view, timestamp.Value(), initial_transform, nullptr, nullptr,
+            &subtract_helper, &meta_features_[hybrid_meta_offset_]);
+        ++hybrid_meta_offset_;
+      } else {
+        motion_analysis_->AddFrame(input_view, timestamp.Value());
+      }
+    } else {
+      selected_motions_.push_back(frame_selection_result->camera_motion());
+      switch (options_.selection_analysis()) {
+        case MotionAnalysisCalculatorOptions::NO_ANALYSIS_USE_SELECTION:
+          return ::mediapipe::UnknownErrorBuilder(MEDIAPIPE_LOC)
+                 << "Should not reach this point!";
+
+        case MotionAnalysisCalculatorOptions::ANALYSIS_FROM_FEATURES:
+          motion_analysis_->AddFeatures(frame_selection_result->features());
+          break;
+
+        case MotionAnalysisCalculatorOptions::ANALYSIS_RECOMPUTE: {
+          const cv::Mat input_view =
+              formats::MatView(&video_stream->Get<ImageFrame>());
+          motion_analysis_->AddFrame(input_view, timestamp.Value());
+          break;
+        }
+
+        case MotionAnalysisCalculatorOptions::ANALYSIS_WITH_SEED: {
+          Homography homography;
+          CameraMotionToHomography(frame_selection_result->camera_motion(),
+                                   &homography);
+          const cv::Mat input_view =
+              formats::MatView(&video_stream->Get<ImageFrame>());
+          motion_analysis_->AddFrameGeneric(input_view, timestamp.Value(),
+                                            homography, &homography);
+          break;
+        }
+      }
+    }
+
+    timestamp_buffer_.push_back(timestamp);
+    ++frame_idx_;
+
+    VLOG_EVERY_N(0, 100) << "Analyzed frame " << frame_idx_;
+
+    // Buffer input frames only if visualization is requested.
+    if (visualize_output_ || video_output_) {
+      packet_buffer_.push_back(video_stream->Value());
+    }
+
+    // If requested, output grayscale thumbnails
+    if (grayscale_output_) {
+      cv::Mat grayscale_mat = motion_analysis_->GetGrayscaleFrameFromResults();
+      std::unique_ptr<ImageFrame> grayscale_image(new ImageFrame(
+          ImageFormat::GRAY8, grayscale_mat.cols, grayscale_mat.rows));
+      cv::Mat image_frame_mat = formats::MatView(grayscale_image.get());
+      grayscale_mat.copyTo(image_frame_mat);
+
+      cc->Outputs()
+          .Tag("GRAY_VIDEO_OUT")
+          .Add(grayscale_image.release(), timestamp);
+    }
+
+    // Output other results, if we have any yet.
+    OutputMotionAnalyzedFrames(false, cc);
+  }
+
+  return ::mediapipe::OkStatus();
+}
+
+::mediapipe::Status MotionAnalysisCalculator::Close(CalculatorContext* cc) {
+  // Guard against empty videos.
+  if (motion_analysis_) {
+    OutputMotionAnalyzedFrames(true, cc);
+  }
+  if (csv_file_input_) {
+    if (!meta_motions_.empty()) {
+      LOG(ERROR) << "More motions than frames. Unexpected! Remainder: "
+                 << meta_motions_.size();
+    }
+  }
+  return ::mediapipe::OkStatus();
+}
+
+void MotionAnalysisCalculator::OutputMotionAnalyzedFrames(
+    bool flush, CalculatorContext* cc) {
+  std::vector<std::unique_ptr<RegionFlowFeatureList>> features;
+  std::vector<std::unique_ptr<CameraMotion>> camera_motions;
+  std::vector<std::unique_ptr<SalientPointFrame>> saliency;
+
+  const int buffer_size = timestamp_buffer_.size();
+  const int num_results = motion_analysis_->GetResults(
+      flush, &features, &camera_motions, with_saliency_ ? &saliency : nullptr);
+
+  CHECK_LE(num_results, buffer_size);
+
+  if (num_results == 0) {
+    return;
+  }
+
+  for (int k = 0; k < num_results; ++k) {
+    // Region flow features and camera motion for this frame.
+    auto& feature_list = features[k];
+    auto& camera_motion = camera_motions[k];
+    const Timestamp timestamp = timestamp_buffer_[k];
+
+    if (selection_input_ && options_.hybrid_selection_camera()) {
+      if (camera_motion->type() > selected_motions_.front().type()) {
+        // Composited type is more stable.
+        camera_motion->Swap(&selected_motions_.front());
+      }
+      selected_motions_.pop_front();
+    }
+
+    if (hybrid_meta_analysis_) {
+      AddMetaMotion(meta_motions_.front(), meta_features_.front(),
+                    feature_list.get(), camera_motion.get());
+      meta_motions_.pop_front();
+      meta_features_.pop_front();
+    }
+
+    // Video frame for visualization.
+    std::unique_ptr<ImageFrame> visualization_frame;
+    cv::Mat visualization;
+    if (visualize_output_) {
+      // Initialize visualization frame with original frame.
+      visualization_frame.reset(new ImageFrame());
+      visualization_frame->CopyFrom(packet_buffer_[k].Get<ImageFrame>(), 16);
+      visualization = formats::MatView(visualization_frame.get());
+
+      motion_analysis_->RenderResults(
+          *feature_list, *camera_motion,
+          with_saliency_ ? saliency[k].get() : nullptr, &visualization);
+
+      cc->Outputs().Tag("VIZ").Add(visualization_frame.release(), timestamp);
+    }
+
+    // Output dense foreground mask.
+    if (dense_foreground_output_) {
+      std::unique_ptr<ImageFrame> foreground_frame(
+          new ImageFrame(ImageFormat::GRAY8, frame_width_, frame_height_));
+      cv::Mat foreground = formats::MatView(foreground_frame.get());
+      motion_analysis_->ComputeDenseForeground(*feature_list, *camera_motion,
+                                               &foreground);
+      cc->Outputs().Tag("DENSE_FG").Add(foreground_frame.release(), timestamp);
+    }
+
+    // Output flow features if requested.
+    if (region_flow_feature_output_) {
+      cc->Outputs().Tag("FLOW").Add(feature_list.release(), timestamp);
+    }
+
+    // Output camera motion.
+    if (camera_motion_output_) {
+      cc->Outputs().Tag("CAMERA").Add(camera_motion.release(), timestamp);
+    }
+
+    if (video_output_) {
+      cc->Outputs().Tag("VIDEO_OUT").AddPacket(packet_buffer_[k]);
+    }
+
+    // Output saliency.
+    if (saliency_output_) {
+      cc->Outputs().Tag("SALIENCY").Add(saliency[k].release(), timestamp);
+    }
+  }
+
+  if (hybrid_meta_analysis_) {
+    hybrid_meta_offset_ -= num_results;
+    CHECK_GE(hybrid_meta_offset_, 0);
+  }
+
+  timestamp_buffer_.erase(timestamp_buffer_.begin(),
+                          timestamp_buffer_.begin() + num_results);
+
+  if (visualize_output_ || video_output_) {
+    packet_buffer_.erase(packet_buffer_.begin(),
+                         packet_buffer_.begin() + num_results);
+  }
+}
+
+::mediapipe::Status MotionAnalysisCalculator::InitOnProcess(
+    InputStream* video_stream, InputStream* selection_stream) {
+  if (video_stream) {
+    frame_width_ = video_stream->Get<ImageFrame>().Width();
+    frame_height_ = video_stream->Get<ImageFrame>().Height();
+
+    // Ensure image options are set correctly.
+    auto* region_options =
+        options_.mutable_analysis_options()->mutable_flow_options();
+
+    // Use two possible formats to account for different channel orders.
+    RegionFlowComputationOptions::ImageFormat image_format;
+    RegionFlowComputationOptions::ImageFormat image_format2;
+    switch (video_stream->Get<ImageFrame>().Format()) {
+      case ImageFormat::GRAY8:
+        image_format = image_format2 =
+            RegionFlowComputationOptions::FORMAT_GRAYSCALE;
+        break;
+
+      case ImageFormat::SRGB:
+        image_format = RegionFlowComputationOptions::FORMAT_RGB;
+        image_format2 = RegionFlowComputationOptions::FORMAT_BGR;
+        break;
+
+      case ImageFormat::SRGBA:
+        image_format = RegionFlowComputationOptions::FORMAT_RGBA;
+        image_format2 = RegionFlowComputationOptions::FORMAT_BGRA;
+        break;
+
+      default:
+        RET_CHECK(false) << "Unsupported image format.";
+    }
+    if (region_options->image_format() != image_format &&
+        region_options->image_format() != image_format2) {
+      LOG(WARNING) << "Requested image format in RegionFlowComputation "
+                   << "does not match video stream format. Overriding.";
+      region_options->set_image_format(image_format);
+    }
+
+    // Account for downsampling mode INPUT_SIZE. In this case we are handed
+    // already downsampled frames but the resulting CameraMotion should
+    // be computed on higher resolution as specifed by the downsample scale.
+    if (region_options->downsample_mode() ==
+        RegionFlowComputationOptions::DOWNSAMPLE_TO_INPUT_SIZE) {
+      const float scale = region_options->downsample_factor();
+      frame_width_ = static_cast<int>(std::round(frame_width_ * scale));
+      frame_height_ = static_cast<int>(std::round(frame_height_ * scale));
+    }
+  } else if (selection_stream) {
+    const auto& camera_motion =
+        selection_stream->Get<FrameSelectionResult>().camera_motion();
+    frame_width_ = camera_motion.frame_width();
+    frame_height_ = camera_motion.frame_height();
+  } else {
+    LOG(FATAL) << "Either VIDEO or SELECTION stream need to be specified.";
+  }
+
+  // Filled by CSV file parsing.
+  if (!meta_homographies_.empty()) {
+    CHECK(csv_file_input_);
+    AppendCameraMotionsFromHomographies(meta_homographies_,
+                                        true,  // append identity.
+                                        &meta_motions_, &meta_features_);
+    meta_homographies_.clear();
+  }
+
+  // Filter weights before using for hybrid mode.
+  if (hybrid_meta_analysis_) {
+    auto* motion_options =
+        options_.mutable_analysis_options()->mutable_motion_options();
+    motion_options->set_filter_initialized_irls_weights(true);
+  }
+
+  return ::mediapipe::OkStatus();
+}
+
+bool MotionAnalysisCalculator::ParseModelCSV(
+    const std::string& contents, std::deque<Homography>* homographies) {
+  std::vector<absl::string_view> values =
+      absl::StrSplit(contents, absl::ByAnyChar(",\n"));
+
+  // Trim off any empty lines.
+  while (values.back().empty()) {
+    values.pop_back();
+  }
+
+  // Convert to float.
+  std::vector<float> homog_values;
+  homog_values.reserve(values.size());
+
+  for (const auto& value : values) {
+    double value_64f;
+    if (!absl::SimpleAtod(value, &value_64f)) {
+      LOG(ERROR) << "Not a double, expected!";
+      return false;
+    }
+
+    homog_values.push_back(value_64f);
+  }
+
+  return HomographiesFromValues(homog_values, homographies);
+}
+
+bool MotionAnalysisCalculator::HomographiesFromValues(
+    const std::vector<float>& homog_values,
+    std::deque<Homography>* homographies) {
+  CHECK(homographies);
+
+  // Obvious constants are obvious :D
+  constexpr int kHomographyValues = 9;
+  if (homog_values.size() % kHomographyValues != 0) {
+    LOG(ERROR) << "Contents not a multiple of " << kHomographyValues;
+    return false;
+  }
+
+  for (int k = 0; k < homog_values.size(); k += kHomographyValues) {
+    std::vector<double> h_vals(kHomographyValues);
+    for (int l = 0; l < kHomographyValues; ++l) {
+      h_vals[l] = homog_values[k + l];
+    }
+
+    // Normalize last entry to 1.
+    if (h_vals[kHomographyValues - 1] == 0) {
+      LOG(ERROR) << "Degenerate homography, last entry is zero";
+      return false;
+    }
+
+    const double scale = 1.0f / h_vals[kHomographyValues - 1];
+    for (int l = 0; l < kHomographyValues; ++l) {
+      h_vals[l] *= scale;
+    }
+
+    Homography h = HomographyAdapter::FromDoublePointer(h_vals.data(), false);
+    homographies->push_back(h);
+  }
+
+  if (homographies->size() % options_.meta_models_per_frame() != 0) {
+    LOG(ERROR) << "Total homographies not a multiple of specified models "
+               << "per frame.";
+    return false;
+  }
+
+  return true;
+}
+
+void MotionAnalysisCalculator::SubtractMetaMotion(
+    const CameraMotion& meta_motion, RegionFlowFeatureList* features) {
+  if (meta_motion.mixture_homography().model_size() > 0) {
+    CHECK(row_weights_ != nullptr);
+    RegionFlowFeatureListViaTransform(meta_motion.mixture_homography(),
+                                      features, -1.0f,
+                                      1.0f,  // subtract transformed.
+                                      true,  // replace feature loc.
+                                      row_weights_.get());
+  } else {
+    RegionFlowFeatureListViaTransform(meta_motion.homography(), features, -1.0f,
+                                      1.0f,   // subtract transformed.
+                                      true);  // replace feature loc.
+  }
+
+  // Clamp transformed features to domain and handle outliers.
+  const float domain_diam =
+      hypot(features->frame_width(), features->frame_height());
+  const float motion_mag = meta_motion.average_magnitude();
+  // Same irls fraction as used by MODEL_MIXTURE_HOMOGRAPHY scaling in
+  // MotionEstimation.
+  const float irls_fraction = options_.analysis_options()
+                                  .motion_options()
+                                  .irls_mixture_fraction_scale() *
+                              options_.analysis_options()
+                                  .motion_options()
+                                  .irls_motion_magnitude_fraction();
+  float err_scale = std::max(1.0f, motion_mag * irls_fraction);
+
+  const float max_err =
+      options_.meta_outlier_domain_ratio() * domain_diam * err_scale;
+  const float max_err_sq = max_err * max_err;
+
+  for (auto& feature : *features->mutable_feature()) {
+    feature.set_x(
+        std::max(0.0f, std::min(features->frame_width() - 1.0f, feature.x())));
+    feature.set_y(
+        std::max(0.0f, std::min(features->frame_height() - 1.0f, feature.y())));
+    // Label anything with large residual motion an outlier.
+    if (FeatureFlow(feature).Norm2() > max_err_sq) {
+      feature.set_irls_weight(0.0f);
+    }
+  }
+}
+
+void MotionAnalysisCalculator::AddMetaMotion(
+    const CameraMotion& meta_motion, const RegionFlowFeatureList& meta_features,
+    RegionFlowFeatureList* features, CameraMotion* motion) {
+  // Restore old feature location.
+  CHECK_EQ(meta_features.feature_size(), features->feature_size());
+  for (int k = 0; k < meta_features.feature_size(); ++k) {
+    auto feature = features->mutable_feature(k);
+    const auto& meta_feature = meta_features.feature(k);
+    feature->set_x(meta_feature.x());
+    feature->set_y(meta_feature.y());
+    feature->set_dx(meta_feature.dx());
+    feature->set_dy(meta_feature.dy());
+  }
+
+  // Composite camera motion.
+  *motion = ComposeCameraMotion(*motion, meta_motion);
+  // Restore type from metadata, i.e. do not declare motions as invalid.
+  motion->set_type(meta_motion.type());
+  motion->set_match_frame(-1);
+}
+
+void MotionAnalysisCalculator::AppendCameraMotionsFromHomographies(
+    const std::deque<Homography>& homographies, bool append_identity,
+    std::deque<CameraMotion>* camera_motions,
+    std::deque<RegionFlowFeatureList>* features) {
+  CHECK(camera_motions);
+  CHECK(features);
+
+  CameraMotion identity;
+  identity.set_frame_width(frame_width_);
+  identity.set_frame_height(frame_height_);
+
+  *identity.mutable_translation() = TranslationModel();
+  *identity.mutable_linear_similarity() = LinearSimilarityModel();
+  *identity.mutable_homography() = Homography();
+  identity.set_type(CameraMotion::VALID);
+  identity.set_match_frame(0);
+
+  RegionFlowFeatureList empty_list;
+  empty_list.set_long_tracks(true);
+  empty_list.set_match_frame(-1);
+  empty_list.set_frame_width(frame_width_);
+  empty_list.set_frame_height(frame_height_);
+
+  if (append_identity) {
+    camera_motions->push_back(identity);
+    features->push_back(empty_list);
+  }
+
+  const int models_per_frame = options_.meta_models_per_frame();
+  CHECK_GT(models_per_frame, 0) << "At least one model per frame is needed";
+  CHECK_EQ(0, homographies.size() % models_per_frame);
+  const int num_frames = homographies.size() / models_per_frame;
+
+  // Heuristic sigma, similar to what we use for rolling shutter removal.
+  const float mixture_sigma = 1.0f / models_per_frame;
+
+  if (row_weights_ == nullptr) {
+    row_weights_.reset(new MixtureRowWeights(frame_height_,
+                                             frame_height_ / 10,  // 10% margin
+                                             mixture_sigma * frame_height_,
+                                             1.0f, models_per_frame));
+  }
+
+  for (int f = 0; f < num_frames; ++f) {
+    MixtureHomography mix_homog;
+    const int model_start = f * models_per_frame;
+
+    for (int k = 0; k < models_per_frame; ++k) {
+      const Homography& homog = homographies[model_start + k];
+      *mix_homog.add_model() = ModelInvert(homog);
+    }
+
+    CameraMotion c = identity;
+    c.set_match_frame(-1);
+
+    if (mix_homog.model_size() > 1) {
+      *c.mutable_mixture_homography() = mix_homog;
+      c.set_mixture_row_sigma(mixture_sigma);
+
+      for (int k = 0; k < models_per_frame; ++k) {
+        c.add_mixture_inlier_coverage(1.0f);
+      }
+      *c.add_mixture_homography_spectrum() = mix_homog;
+      c.set_rolling_shutter_motion_index(0);
+
+      *c.mutable_homography() = ProjectViaFit<Homography>(
+          mix_homog, frame_width_, frame_height_, row_weights_.get());
+    } else {
+      // Guaranteed to exist because to check that models_per_frame > 0 above.
+      *c.mutable_homography() = mix_homog.model(0);
+    }
+
+    // Project remaining motions down.
+    *c.mutable_linear_similarity() = ProjectViaFit<LinearSimilarityModel>(
+        c.homography(), frame_width_, frame_height_);
+    *c.mutable_translation() = ProjectViaFit<TranslationModel>(
+        c.homography(), frame_width_, frame_height_);
+
+    c.set_average_magnitude(
+        std::hypot(c.translation().dx(), c.translation().dy()));
+
+    camera_motions->push_back(c);
+    features->push_back(empty_list);
+  }
+}
+
+}  // namespace mediapipe
--- a/mediapipe/calculators/video/motion_analysis_calculator.proto
+++ b/mediapipe/calculators/video/motion_analysis_calculator.proto
@ -0,0 +1,111 @@
+// Copyright 2019 The MediaPipe Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+syntax = "proto2";
+
+package mediapipe;
+
+import "mediapipe/framework/calculator.proto";
+import "mediapipe/util/tracking/motion_analysis.proto";
+
+// Next tag: 10
+message MotionAnalysisCalculatorOptions {
+  extend CalculatorOptions {
+    optional MotionAnalysisCalculatorOptions ext = 270698255;
+  }
+
+  optional mediapipe.MotionAnalysisOptions analysis_options = 1;
+
+  // Determines how optional input SELECTION (if present) is used to compute
+  // the final camera motion.
+  enum SelectionAnalysis {
+    // Recompute camera motion for selected frame neighbors.
+    ANALYSIS_RECOMPUTE = 1;
+
+    // Use composited camera motion and region flow from SELECTION input. No
+    // tracking or re-computation is performed.
+    // Note that in this case only CAMERA, FLOW and VIDEO_OUT tags are
+    // supported as output.
+    NO_ANALYSIS_USE_SELECTION = 2;
+
+    // Recompute camera motion for selected frame neighbors using
+    // features supplied by SELECTION input. No feature tracking is performed.
+    ANALYSIS_FROM_FEATURES = 3;
+
+    // Recomputes camera motion for selected frame neighbors but seeds
+    // initial transform with camera motion from SELECTION input.
+    ANALYSIS_WITH_SEED = 4;
+  }
+
+  optional SelectionAnalysis selection_analysis = 4
+      [default = ANALYSIS_WITH_SEED];
+
+  // If activated when SELECTION input is activated, will replace the computed
+  // camera motion (for any of the ANALYSIS_* case above) with the one supplied
+  // by the frame selection, in case the frame selection one is more stable.
+  // For example, if recomputed camera motion is unstable but the one from
+  // the selection result is stable, will use the stable result instead.
+  optional bool hybrid_selection_camera = 5 [default = false];
+
+  // Determines how optional input META is used to compute the final camera
+  // motion.
+  enum MetaAnalysis {
+    // Uses metadata supplied motions as is.
+    META_ANALYSIS_USE_META = 1;
+
+    // Seeds visual tracking from metadata motions - estimates visual residual
+    // motion and combines with metadata.
+    META_ANALYSIS_HYBRID = 2;
+  }
+
+  optional MetaAnalysis meta_analysis = 8 [default = META_ANALYSIS_USE_META];
+
+  // Determines number of homography models per frame stored in the CSV file
+  // or the homography metadata in META.
+  // For values > 1, MixtureHomographies are created.
+  optional int32 meta_models_per_frame = 6 [default = 1];
+
+  // Used for META_ANALYSIS_HYBRID. Rejects features which flow deviates
+  // domain_ratio * image diagonal size from the ground truth metadata motion.
+  optional float meta_outlier_domain_ratio = 9 [default = 0.0015];
+
+  // If true, the MotionAnalysisCalculator will skip all processing and emit no
+  // packets on any output. This is useful for quickly creating different
+  // versions of a MediaPipe graph without changing its structure, assuming that
+  // downstream calculators can handle missing input packets.
+  // TODO: Remove this hack. See b/36485206 for more details.
+  optional bool bypass_mode = 7 [default = false];
+}
+
+// Taken from
+// java/com/google/android/libraries/microvideo/proto/microvideo.proto to
+// satisfy leakr requirements
+// TODO: Remove and use above proto.
+message HomographyData {
+  // For each frame, there are 12 homography matrices stored. Each matrix is
+  // 3x3 (9 elements). This field will contain 12 x 3 x 3  float values. The
+  // first row of the first homography matrix will be followed by the second row
+  // of the first homography matrix, followed by third row of first homography
+  // matrix, followed by the first row of the second homography matrix, etc.
+  repeated float motion_homography_data = 1 [packed = true];
+
+  // Vector containing histogram counts for individual patches in the frame.
+  repeated uint32 histogram_count_data = 2 [packed = true];
+
+  // The width of the frame at the time metadata was sampled.
+  optional int32 frame_width = 3;
+
+  // The height of the frame at the time metadata was sampled.
+  optional int32 frame_height = 4;
+}
--- a/mediapipe/calculators/video/testdata/lenna.png
+++ b/mediapipe/calculators/video/testdata/lenna.png
--- a/mediapipe/calculators/video/testdata/parallel_tracker_graph.pbtxt
+++ b/mediapipe/calculators/video/testdata/parallel_tracker_graph.pbtxt
@ -0,0 +1,134 @@
+input_stream: "image_cpu_frames"
+input_stream: "start_pos"
+input_stream: "ra_track"
+
+num_threads: 4
+
+node: {
+  calculator: "MotionAnalysisCalculator"
+  input_stream: "VIDEO:image_cpu_frames"
+  output_stream: "CAMERA:camera_motion"
+  output_stream: "FLOW:region_flow"
+
+  options: {
+    [mediapipe.MotionAnalysisCalculatorOptions.ext]: {
+      analysis_options: {
+        analysis_policy: ANALYSIS_POLICY_CAMERA_MOBILE
+
+        flow_options: {
+          # Maybe move down to 50
+          fast_estimation_min_block_size: 100
+          top_inlier_sets: 1
+          frac_inlier_error_threshold: 3e-3
+          downsample_mode: DOWNSAMPLE_NONE
+          verification_distance: 5.0
+          verify_long_feature_acceleration: true
+          verify_long_feature_trigger_ratio: 0.1
+          tracking_options: {
+            max_features: 500
+            adaptive_extraction_levels: 2
+            min_eig_val_settings: {
+              adaptive_lowest_quality_level: 2e-4
+            }
+            klt_tracker_implementation: KLT_OPENCV
+          }
+        }
+
+        motion_options: {
+          label_empty_frames_as_valid: false
+        }
+      }
+    }
+  }
+}
+
+node: {
+  calculator: "FlowPackagerCalculator"
+  input_stream: "FLOW:region_flow"
+  input_stream: "CAMERA:camera_motion"
+  output_stream: "TRACKING:tracking_data"
+
+  options: {
+    [mediapipe.FlowPackagerCalculatorOptions.ext]: {
+      flow_packager_options: {
+        binary_tracking_data_support: false
+      }
+    }
+  }
+}
+
+node: {
+  calculator: "BoxTrackerCalculator"
+  input_stream: "TRACKING:tracking_data"
+  input_stream: "START_POS:start_pos"
+  output_stream: "BOXES:boxes"
+  input_side_packet: "OPTIONS:calculator_options"
+
+  input_stream_handler: {
+    input_stream_handler: "SyncSetInputStreamHandler"
+    options: {
+      [mediapipe.SyncSetInputStreamHandlerOptions.ext]: {
+        sync_set: {
+          tag_index: "TRACKING"
+        }
+        sync_set: {
+          tag_index: "START_POS"
+        }
+      }
+    }
+  }
+
+  options: {
+    [mediapipe.BoxTrackerCalculatorOptions.ext]: {
+      tracker_options: {
+        track_step_options: {
+          track_object_and_camera: true
+          tracking_degrees: TRACKING_DEGREE_OBJECT_PERSPECTIVE
+          object_similarity_min_contd_inliers: 6
+          inlier_spring_force: 0.0
+          static_motion_temporal_ratio: 3e-2
+        }
+      }
+      visualize_tracking_data: false
+      streaming_track_data_cache_size: 100
+    }
+  }
+}
+
+node: {
+  calculator: "BoxTrackerCalculator"
+  input_stream: "TRACKING:tracking_data"
+  input_stream: "RA_TRACK:ra_track"
+  output_stream: "RA_BOXES:ra_boxes"
+  input_side_packet: "OPTIONS:calculator_options"
+
+  input_stream_handler: {
+    input_stream_handler: "SyncSetInputStreamHandler"
+    options: {
+      [mediapipe.SyncSetInputStreamHandlerOptions.ext]: {
+        sync_set: {
+          tag_index: "TRACKING"
+        }
+        sync_set: {
+          tag_index: "RA_TRACK"
+        }
+      }
+    }
+  }
+
+  options: {
+    [mediapipe.BoxTrackerCalculatorOptions.ext]: {
+      tracker_options: {
+        track_step_options: {
+          track_object_and_camera: true
+          tracking_degrees: TRACKING_DEGREE_OBJECT_PERSPECTIVE
+          object_similarity_min_contd_inliers: 6
+          inlier_spring_force: 0.0
+          static_motion_temporal_ratio: 3e-2
+        }
+      }
+      visualize_tracking_data: false
+      streaming_track_data_cache_size: 100
+    }
+  }
+}
--- a/mediapipe/calculators/video/testdata/tracker_graph.pbtxt
+++ b/mediapipe/calculators/video/testdata/tracker_graph.pbtxt
@ -0,0 +1,122 @@
+input_stream: "image_cpu_frames"
+input_stream: "start_pos"
+input_stream: "cancel_object_id"
+input_stream: "ra_track"
+input_stream: "restart_pos"
+input_stream: "track_time"
+
+num_threads: 4
+
+node: {
+  calculator: "MotionAnalysisCalculator"
+  options: {
+    [mediapipe.MotionAnalysisCalculatorOptions.ext]: {
+      analysis_options: {
+        analysis_policy: ANALYSIS_POLICY_CAMERA_MOBILE
+
+        flow_options: {
+          # Maybe move down to 50
+          fast_estimation_min_block_size: 100
+          top_inlier_sets: 1
+          frac_inlier_error_threshold: 3e-3
+          # For mobile application, downsample before input into graph
+          # and use DOWNSAMPLE_TO_INPUT_SIZE and specify
+          # downsampling_factor option or DOWNSAMPLE input_side_packet
+          downsample_mode: DOWNSAMPLE_TO_INPUT_SIZE
+          verification_distance: 5.0
+          verify_long_feature_acceleration: true
+          verify_long_feature_trigger_ratio: 0.1
+          tracking_options: {
+            max_features: 500
+            corner_extraction_method: EXTRACTION_FAST
+            adaptive_extraction_levels: 2
+            min_eig_val_settings: {
+              adaptive_lowest_quality_level: 2e-4
+            }
+            klt_tracker_implementation: KLT_OPENCV
+          }
+        }
+      }
+    }
+  }
+
+  # Drops packets if calculator cannot keep up with the input rate.
+  input_stream_handler: {
+    input_stream_handler: "FixedSizeInputStreamHandler"
+  }
+
+  input_stream: "VIDEO:image_cpu_frames"
+  input_side_packet: "DOWNSAMPLE:analysis_downsample_factor"
+  input_side_packet: "OPTIONS:calculator_options"
+  output_stream: "CAMERA:camera_motion"
+  output_stream: "FLOW:region_flow"
+}
+
+node: {
+  calculator: "FlowPackagerCalculator"
+
+  input_stream: "FLOW:region_flow"
+  input_stream: "CAMERA:camera_motion"
+  output_stream: "TRACKING:tracking_data"
+  options: {
+    [mediapipe.FlowPackagerCalculatorOptions.ext]: {
+      flow_packager_options: {
+        binary_tracking_data_support: false
+      }
+    }
+  }
+}
+
+node: {
+  calculator: "BoxTrackerCalculator"
+
+  input_side_packet: "OPTIONS:calculator_options"
+  input_stream: "TRACKING:tracking_data"
+  input_stream: "TRACK_TIME:track_time"
+  input_stream: "START_POS:start_pos"
+  input_stream: "RESTART_POS:restart_pos"
+  input_stream: "CANCEL_OBJECT_ID:cancel_object_id"
+  input_stream: "RA_TRACK:ra_track"
+  output_stream: "BOXES:boxes"
+  output_stream: "RA_BOXES:ra_boxes"
+
+  input_stream_handler: {
+    input_stream_handler: "SyncSetInputStreamHandler"
+    options: {
+      [mediapipe.SyncSetInputStreamHandlerOptions.ext]: {
+        sync_set: {
+          tag_index: "TRACKING"
+          tag_index: "TRACK_TIME"
+        }
+        sync_set: {
+          tag_index: "START_POS"
+        }
+        sync_set: {
+          tag_index: "RESTART_POS"
+        }
+        sync_set: {
+          tag_index: "CANCEL_OBJECT_ID"
+        }
+        sync_set: {
+          tag_index: "RA_TRACK"
+        }
+      }
+    }
+  }
+
+  options: {
+    [mediapipe.BoxTrackerCalculatorOptions.ext]: {
+      tracker_options: {
+        track_step_options: {
+          track_object_and_camera: true
+          tracking_degrees: TRACKING_DEGREE_OBJECT_ROTATION_SCALE
+          inlier_spring_force: 0.0
+          static_motion_temporal_ratio: 3e-2
+          object_similarity_min_contd_inliers: 10
+        }
+      }
+      visualize_tracking_data: false
+      streaming_track_data_cache_size: 100
+    }
+  }
+}
--- a/mediapipe/calculators/video/tracked_detection_manager_calculator.cc
+++ b/mediapipe/calculators/video/tracked_detection_manager_calculator.cc
@ -0,0 +1,319 @@
+// Copyright 2019 The MediaPipe Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <memory>
+#include <string>
+#include <unordered_map>
+#include <vector>
+
+#include "absl/container/node_hash_map.h"
+#include "mediapipe/framework/calculator_framework.h"
+#include "mediapipe/framework/formats/detection.pb.h"
+#include "mediapipe/framework/formats/location_data.pb.h"
+#include "mediapipe/framework/formats/rect.pb.h"
+#include "mediapipe/framework/port/status.h"
+#include "mediapipe/util/tracking/box_tracker.h"
+#include "mediapipe/util/tracking/tracked_detection.h"
+#include "mediapipe/util/tracking/tracked_detection_manager.h"
+#include "mediapipe/util/tracking/tracking.h"
+
+namespace mediapipe {
+namespace {
+
+constexpr int kDetectionUpdateTimeOutMS = 5000;
+constexpr char kDetectionsTag[] = "DETECTIONS";
+constexpr char kDetectionBoxesTag[] = "DETECTION_BOXES";
+constexpr char kDetectionListTag[] = "DETECTION_LIST";
+constexpr char kTrackingBoxesTag[] = "TRACKING_BOXES";
+constexpr char kCancelObjectIdTag[] = "CANCEL_OBJECT_ID";
+
+// Move |src| to the back of |dst|.
+void MoveIds(std::vector<int>* dst, std::vector<int> src) {
+  dst->insert(dst->end(), std::make_move_iterator(src.begin()),
+              std::make_move_iterator(src.end()));
+}
+
+int64 GetInputTimestampMs(::mediapipe::CalculatorContext* cc) {
+  return cc->InputTimestamp().Microseconds() / 1000;  // 1 ms = 1000 us.
+}
+
+// Converts a Mediapipe Detection Proto to a TrackedDetection class.
+std::unique_ptr<TrackedDetection> GetTrackedDetectionFromDetection(
+    const Detection& detection, int64 timestamp) {
+  std::unique_ptr<TrackedDetection> tracked_detection =
+      absl::make_unique<TrackedDetection>(detection.detection_id(), timestamp);
+  const float top = detection.location_data().relative_bounding_box().ymin();
+  const float bottom =
+      detection.location_data().relative_bounding_box().ymin() +
+      detection.location_data().relative_bounding_box().height();
+  const float left = detection.location_data().relative_bounding_box().xmin();
+  const float right = detection.location_data().relative_bounding_box().xmin() +
+                      detection.location_data().relative_bounding_box().width();
+  NormalizedRect bounding_box;
+  bounding_box.set_x_center((left + right) / 2.f);
+  bounding_box.set_y_center((top + bottom) / 2.f);
+  bounding_box.set_height(bottom - top);
+  bounding_box.set_width(right - left);
+  tracked_detection->set_bounding_box(bounding_box);
+
+  for (int i = 0; i < detection.label_size(); ++i) {
+    tracked_detection->AddLabel(detection.label(i), detection.score(i));
+  }
+  return tracked_detection;
+}
+
+// Converts a TrackedDetection class to a Mediapipe Detection Proto.
+Detection GetAxisAlignedDetectionFromTrackedDetection(
+    const TrackedDetection& tracked_detection) {
+  Detection detection;
+  LocationData* location_data = detection.mutable_location_data();
+
+  auto corners = tracked_detection.GetCorners();
+
+  float x_min = std::numeric_limits<float>::max();
+  float x_max = std::numeric_limits<float>::min();
+  float y_min = std::numeric_limits<float>::max();
+  float y_max = std::numeric_limits<float>::min();
+  for (int i = 0; i < 4; ++i) {
+    x_min = std::min(x_min, corners[i].x());
+    x_max = std::max(x_max, corners[i].x());
+    y_min = std::min(y_min, corners[i].y());
+    y_max = std::max(y_max, corners[i].y());
+  }
+  location_data->set_format(LocationData::RELATIVE_BOUNDING_BOX);
+  LocationData::RelativeBoundingBox* relative_bbox =
+      location_data->mutable_relative_bounding_box();
+  relative_bbox->set_xmin(x_min);
+  relative_bbox->set_ymin(y_min);
+  relative_bbox->set_width(x_max - x_min);
+  relative_bbox->set_height(y_max - y_min);
+
+  // Use previous id which is the id the object when it's first detected.
+  if (tracked_detection.previous_id() > 0) {
+    detection.set_detection_id(tracked_detection.previous_id());
+  } else {
+    detection.set_detection_id(tracked_detection.unique_id());
+  }
+  for (const auto& label_and_score : tracked_detection.label_to_score_map()) {
+    detection.add_label(label_and_score.first);
+    detection.add_score(label_and_score.second);
+  }
+  return detection;
+}
+
+}  // namespace
+
+// TrackedDetectionManagerCalculator accepts detections and tracking results at
+// different frame rate for real time tracking of targets.
+// Input:
+//   DETECTIONS: A vector<Detection> of newly detected targets.
+//   TRACKING_BOXES: A TimedBoxProtoList which contains a list of tracked boxes
+//   from previous detections.
+//
+// Output:
+//   CANCEL_OBJECT_ID: Ids of targets that are missing/lost such that it should
+//   be removed from tracking.
+//   DETECTIONS: List of detections that are being tracked.
+//   DETECTION_BOXES: List of bounding boxes of detections that are being
+//   tracked.
+//
+// Usage example:
+// node {
+//   calculator: "TrackedDetectionManagerCalculator"
+//   input_stream: "DETECTIONS:detections"
+//   input_stream: "TRACKING_BOXES:boxes"
+//   output_stream: "CANCEL_OBJECT_ID:cancel_object_id"
+//   output_stream: "DETECTIONS:output_detections"
+// }
+class TrackedDetectionManagerCalculator : public CalculatorBase {
+ public:
+  static ::mediapipe::Status GetContract(CalculatorContract* cc);
+
+  ::mediapipe::Status Process(CalculatorContext* cc) override;
+
+ private:
+  // Adds new list of detections to |waiting_for_update_detections_|.
+  void AddDetectionList(const DetectionList& detection_list,
+                        CalculatorContext* cc);
+  void AddDetections(const std::vector<Detection>& detections,
+                     CalculatorContext* cc);
+
+  // Manages existing and new detections.
+  TrackedDetectionManager tracked_detection_manager_;
+
+  // Set of detections that are not up to date yet. These detections will be
+  // added to the detection manager until they got updated from the box tracker.
+  absl::node_hash_map<int, std::unique_ptr<TrackedDetection>>
+      waiting_for_update_detections_;
+};
+REGISTER_CALCULATOR(TrackedDetectionManagerCalculator);
+
+::mediapipe::Status TrackedDetectionManagerCalculator::GetContract(
+    CalculatorContract* cc) {
+  if (cc->Inputs().HasTag(kDetectionsTag)) {
+    cc->Inputs().Tag(kDetectionsTag).Set<std::vector<Detection>>();
+  }
+  if (cc->Inputs().HasTag(kDetectionListTag)) {
+    cc->Inputs().Tag(kDetectionListTag).Set<DetectionList>();
+  }
+  if (cc->Inputs().HasTag(kTrackingBoxesTag)) {
+    cc->Inputs().Tag(kTrackingBoxesTag).Set<TimedBoxProtoList>();
+  }
+
+  if (cc->Outputs().HasTag(kCancelObjectIdTag)) {
+    cc->Outputs().Tag(kCancelObjectIdTag).Set<int>();
+  }
+  if (cc->Outputs().HasTag(kDetectionsTag)) {
+    cc->Outputs().Tag(kDetectionsTag).Set<std::vector<Detection>>();
+  }
+  if (cc->Outputs().HasTag(kDetectionBoxesTag)) {
+    cc->Outputs().Tag(kDetectionBoxesTag).Set<std::vector<NormalizedRect>>();
+  }
+
+  return ::mediapipe::OkStatus();
+}
+
+::mediapipe::Status TrackedDetectionManagerCalculator::Process(
+    CalculatorContext* cc) {
+  if (cc->Inputs().HasTag("TRACKING_BOXES")) {
+    if (!cc->Inputs().Tag("TRACKING_BOXES").IsEmpty()) {
+      const TimedBoxProtoList& tracked_boxes =
+          cc->Inputs().Tag("TRACKING_BOXES").Get<TimedBoxProtoList>();
+
+      // Collect all detections that are removed.
+      auto removed_detection_ids = absl::make_unique<std::vector<int>>();
+      for (const TimedBoxProto& tracked_box : tracked_boxes.box()) {
+        NormalizedRect bounding_box;
+        bounding_box.set_x_center((tracked_box.left() + tracked_box.right()) /
+                                  2.f);
+        bounding_box.set_y_center((tracked_box.bottom() + tracked_box.top()) /
+                                  2.f);
+        bounding_box.set_height(tracked_box.bottom() - tracked_box.top());
+        bounding_box.set_width(tracked_box.right() - tracked_box.left());
+        bounding_box.set_rotation(tracked_box.rotation());
+        // First check if this box updates a detection that's waiting for
+        // update from the tracker.
+        auto waiting_for_update_detectoin_ptr =
+            waiting_for_update_detections_.find(tracked_box.id());
+        if (waiting_for_update_detectoin_ptr !=
+            waiting_for_update_detections_.end()) {
+          // Add the detection and remove duplicated detections.
+          auto removed_ids = tracked_detection_manager_.AddDetection(
+              std::move(waiting_for_update_detectoin_ptr->second));
+          MoveIds(removed_detection_ids.get(), std::move(removed_ids));
+
+          waiting_for_update_detections_.erase(
+              waiting_for_update_detectoin_ptr);
+        }
+        auto removed_ids = tracked_detection_manager_.UpdateDetectionLocation(
+            tracked_box.id(), bounding_box, tracked_box.time_msec());
+        MoveIds(removed_detection_ids.get(), std::move(removed_ids));
+      }
+      // TODO: Should be handled automatically in detection manager.
+      auto removed_ids = tracked_detection_manager_.RemoveObsoleteDetections(
+          GetInputTimestampMs(cc) - kDetectionUpdateTimeOutMS);
+      MoveIds(removed_detection_ids.get(), std::move(removed_ids));
+
+      // TODO: Should be handled automatically in detection manager.
+      removed_ids = tracked_detection_manager_.RemoveOutOfViewDetections();
+      MoveIds(removed_detection_ids.get(), std::move(removed_ids));
+
+      if (!removed_detection_ids->empty() &&
+          cc->Outputs().HasTag(kCancelObjectIdTag)) {
+        auto timestamp = cc->InputTimestamp();
+        for (int box_id : *removed_detection_ids) {
+          // The timestamp is incremented (by 1 us) because currently the box
+          // tracker calculator only accepts one cancel object ID for any given
+          // timestamp.
+          cc->Outputs()
+              .Tag(kCancelObjectIdTag)
+              .AddPacket(mediapipe::MakePacket<int>(box_id).At(timestamp++));
+        }
+      }
+
+      // Output detections and corresponding bounding boxes.
+      const auto& all_detections =
+          tracked_detection_manager_.GetAllTrackedDetections();
+      auto output_detections = absl::make_unique<std::vector<Detection>>();
+      auto output_boxes = absl::make_unique<std::vector<NormalizedRect>>();
+
+      for (const auto& detection_ptr : all_detections) {
+        const auto& detection = *detection_ptr.second;
+        // Only output detections that are synced.
+        if (detection.last_updated_timestamp() <
+            cc->InputTimestamp().Microseconds() / 1000) {
+          continue;
+        }
+        output_detections->emplace_back(
+            GetAxisAlignedDetectionFromTrackedDetection(detection));
+        output_boxes->emplace_back(detection.bounding_box());
+      }
+      if (cc->Outputs().HasTag(kDetectionsTag)) {
+        cc->Outputs()
+            .Tag(kDetectionsTag)
+            .Add(output_detections.release(), cc->InputTimestamp());
+      }
+
+      if (cc->Outputs().HasTag(kDetectionBoxesTag)) {
+        cc->Outputs()
+            .Tag(kDetectionBoxesTag)
+            .Add(output_boxes.release(), cc->InputTimestamp());
+      }
+    }
+  }
+
+  if (cc->Inputs().HasTag(kDetectionsTag) &&
+      !cc->Inputs().Tag(kDetectionsTag).IsEmpty()) {
+    const auto detections =
+        cc->Inputs().Tag(kDetectionsTag).Get<std::vector<Detection>>();
+    AddDetections(detections, cc);
+  }
+
+  if (cc->Inputs().HasTag(kDetectionListTag) &&
+      !cc->Inputs().Tag(kDetectionListTag).IsEmpty()) {
+    const auto detection_list =
+        cc->Inputs().Tag(kDetectionListTag).Get<DetectionList>();
+    AddDetectionList(detection_list, cc);
+  }
+
+  return ::mediapipe::OkStatus();
+}
+
+void TrackedDetectionManagerCalculator::AddDetectionList(
+    const DetectionList& detection_list, CalculatorContext* cc) {
+  for (const auto& detection : detection_list.detection()) {
+    // Convert from microseconds to milliseconds.
+    std::unique_ptr<TrackedDetection> new_detection =
+        GetTrackedDetectionFromDetection(
+            detection, cc->InputTimestamp().Microseconds() / 1000);
+
+    const int id = new_detection->unique_id();
+    waiting_for_update_detections_[id] = std::move(new_detection);
+  }
+}
+
+void TrackedDetectionManagerCalculator::AddDetections(
+    const std::vector<Detection>& detections, CalculatorContext* cc) {
+  for (const auto& detection : detections) {
+    // Convert from microseconds to milliseconds.
+    std::unique_ptr<TrackedDetection> new_detection =
+        GetTrackedDetectionFromDetection(
+            detection, cc->InputTimestamp().Microseconds() / 1000);
+
+    const int id = new_detection->unique_id();
+    waiting_for_update_detections_[id] = std::move(new_detection);
+  }
+}
+
+}  // namespace mediapipe
--- a/mediapipe/calculators/video/tracking_graph_test.cc
+++ b/mediapipe/calculators/video/tracking_graph_test.cc
@ -0,0 +1,709 @@
+// Copyright 2019 The MediaPipe Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <fstream>
+#include <map>
+#include <memory>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "mediapipe/calculators/video/box_tracker_calculator.pb.h"
+#include "mediapipe/framework/calculator.pb.h"
+#include "mediapipe/framework/calculator_framework.h"
+#include "mediapipe/framework/deps/file_path.h"
+#include "mediapipe/framework/formats/image_frame.h"
+#include "mediapipe/framework/packet.h"
+#include "mediapipe/framework/port/advanced_proto_inc.h"
+#include "mediapipe/framework/port/file_helpers.h"
+#include "mediapipe/framework/port/gmock.h"
+#include "mediapipe/framework/port/gtest.h"
+#include "mediapipe/framework/port/opencv_highgui_inc.h"
+#include "mediapipe/framework/port/proto_ns.h"
+#include "mediapipe/framework/port/status.h"
+#include "mediapipe/framework/port/status_matchers.h"
+#include "mediapipe/util/tracking/box_tracker.pb.h"
+#include "mediapipe/util/tracking/tracking.pb.h"
+
+#ifdef __APPLE__
+#include <CoreFoundation/CoreFoundation.h>
+#endif  // defined(__APPLE__)
+
+namespace mediapipe {
+namespace {
+using ::testing::FloatNear;
+using ::testing::Test;
+
+std::string GetTestDir() {
+#ifdef __APPLE__
+  char path[1024];
+  CFURLRef bundle_url = CFBundleCopyBundleURL(CFBundleGetMainBundle());
+  CFURLGetFileSystemRepresentation(
+      bundle_url, true, reinterpret_cast<UInt8*>(path), sizeof(path));
+  CFRelease(bundle_url);
+  return ::mediapipe::file::JoinPath(path, "testdata");
+#elif defined(__ANDROID__)
+  char path[1024];
+  getcwd(path, sizeof(path));
+  return ::mediapipe::file::JoinPath(path,
+                                     "mediapipe/calculators/video/testdata");
+#else
+  return ::mediapipe::file::JoinPath(
+      "./",
+      // This should match the path of the output files
+      // of the genrule() that generates test model files.
+      "mediapipe/calculators/video/testdata");
+#endif  // defined(__APPLE__)
+}
+
+bool LoadBinaryTestGraph(const std::string& graph_path,
+                         CalculatorGraphConfig* config) {
+  std::ifstream ifs;
+  ifs.open(graph_path.c_str());
+  proto_ns::io::IstreamInputStream in_stream(&ifs);
+  bool success = config->ParseFromZeroCopyStream(&in_stream);
+  ifs.close();
+  if (!success) {
+    LOG(ERROR) << "could not parse test graph: " << graph_path;
+  }
+  return success;
+}
+
+class TrackingGraphTest : public Test {
+ protected:
+  TrackingGraphTest() {}
+
+  void SetUp() override {
+    test_dir_ = GetTestDir();
+    const auto graph_path = file::JoinPath(test_dir_, "tracker.binarypb");
+    ASSERT_TRUE(LoadBinaryTestGraph(graph_path, &config_));
+
+    original_image_ = cv::imread(file::JoinPath(test_dir_, "lenna.png"));
+    CreateInputFramesFromOriginalImage(kNumImages, kTranslationStep,
+                                       &input_frames_packets_);
+
+    const auto& first_input_img = input_frames_packets_[0].Get<ImageFrame>();
+    const int img_width = first_input_img.Width();
+    const int img_height = first_input_img.Height();
+    translation_step_x_ = kTranslationStep / static_cast<float>(img_width);
+    translation_step_y_ = kTranslationStep / static_cast<float>(img_height);
+
+    // Creat new configure and packet dump vector to store output.
+    mediapipe::CalculatorGraphConfig config_copy = config_;
+    mediapipe::tool::AddVectorSink("boxes", &config_copy, &output_packets_);
+    mediapipe::tool::AddVectorSink("ra_boxes", &config_copy,
+                                   &random_access_results_packets_);
+
+    // Initialize graph
+    MP_ASSERT_OK(graph_.Initialize(config_copy));
+
+    const auto parallel_graph_path =
+        file::JoinPath(test_dir_, "parallel_tracker.binarypb");
+    CalculatorGraphConfig parallel_config;
+    ASSERT_TRUE(LoadBinaryTestGraph(parallel_graph_path, &parallel_config));
+    mediapipe::tool::AddVectorSink("boxes", &parallel_config, &output_packets_);
+    mediapipe::tool::AddVectorSink("ra_boxes", &parallel_config,
+                                   &random_access_results_packets_);
+    MP_ASSERT_OK(parallel_graph_.Initialize(parallel_config));
+  }
+
+  void CreateInputFramesFromOriginalImage(
+      int num_images, int translation_step,
+      std::vector<Packet>* input_frames_packets);
+
+  void TearDown() override {
+    output_packets_.clear();
+    random_access_results_packets_.clear();
+  }
+
+  std::unique_ptr<TimedBoxProtoList> MakeBoxList(
+      const Timestamp& timestamp, const std::vector<bool>& is_quad_tracking,
+      const std::vector<bool>& is_pnp_tracking,
+      const std::vector<bool>& reacquisition) const;
+
+  void RunGraphWithSidePacketsAndInputs(
+      const std::map<std::string, mediapipe::Packet>& side_packets,
+      const mediapipe::Packet& start_pos_packet);
+
+  // Utility functions used to judge if a given quad or box is near to the
+  // groundtruth location at a given frame.
+  // Examine box.reacquisition() field equals to `reacquisition`.
+  // `frame` can be float number to account for inter-frame interpolation.
+  void ExpectBoxAtFrame(const TimedBoxProto& box, float frame,
+                        bool reacquisition);
+
+  // Examine box.aspect_ratio() field equals to `aspect_ratio` if asepct_ratio
+  // is positive.
+  void ExpectQuadAtFrame(const TimedBoxProto& box, float frame,
+                         float aspect_ratio, bool reacquisition);
+
+  // Utility function to judge if two quad are near to each other.
+  void ExpectQuadNear(const TimedBoxProto& box1, const TimedBoxProto& box2);
+
+  std::unique_ptr<TimedBoxProtoList> CreateRandomAccessTrackingBoxList(
+      const std::vector<Timestamp>& start_timestamps,
+      const std::vector<Timestamp>& end_timestamps) const;
+
+  CalculatorGraph graph_;
+  CalculatorGraph parallel_graph_;
+  CalculatorGraphConfig config_;
+  std::string test_dir_;
+  cv::Mat original_image_;
+  std::vector<Packet> input_frames_packets_;
+  std::vector<mediapipe::Packet> output_packets_;
+  std::vector<mediapipe::Packet> random_access_results_packets_;
+  float translation_step_x_;  // normalized translation step in x direction
+  float translation_step_y_;  // normalized translation step in y direction
+  static constexpr float kInitialBoxHalfWidthNormalized = 0.25f;
+  static constexpr float kInitialBoxHalfHeightNormalized = 0.25f;
+  static constexpr float kImageAspectRatio = 1.0f;  // for lenna.png
+  static constexpr float kInitialBoxLeft =
+      0.5f - kInitialBoxHalfWidthNormalized;
+  static constexpr float kInitialBoxRight =
+      0.5f + kInitialBoxHalfWidthNormalized;
+  static constexpr float kInitialBoxTop =
+      0.5f - kInitialBoxHalfHeightNormalized;
+  static constexpr float kInitialBoxBottom =
+      0.5f + kInitialBoxHalfHeightNormalized;
+  static constexpr int kFrameIntervalUs = 30000;
+  static constexpr int kNumImages = 8;
+  // Each image is shifted to the right and bottom by kTranslationStep
+  // pixels compared with the previous image.
+  static constexpr int kTranslationStep = 10;
+  static constexpr float kEqualityTolerance = 3e-4f;
+};
+
+void TrackingGraphTest::ExpectBoxAtFrame(const TimedBoxProto& box, float frame,
+                                         bool reacquisition) {
+  EXPECT_EQ(box.reacquisition(), reacquisition);
+  EXPECT_TRUE(box.has_rotation());
+  EXPECT_THAT(box.rotation(), FloatNear(0, kEqualityTolerance));
+  EXPECT_THAT(box.left(),
+              FloatNear(kInitialBoxLeft - frame * translation_step_x_,
+                        kEqualityTolerance));
+  EXPECT_THAT(box.top(), FloatNear(kInitialBoxTop - frame * translation_step_y_,
+                                   kEqualityTolerance));
+  EXPECT_THAT(box.bottom(),
+              FloatNear(kInitialBoxBottom - frame * translation_step_y_,
+                        kEqualityTolerance));
+  EXPECT_THAT(box.right(),
+              FloatNear(kInitialBoxRight - frame * translation_step_x_,
+                        kEqualityTolerance));
+}
+
+void TrackingGraphTest::ExpectQuadAtFrame(const TimedBoxProto& box, float frame,
+                                          float aspect_ratio,
+                                          bool reacquisition) {
+  EXPECT_TRUE(box.has_quad()) << "quad must exist!";
+  if (aspect_ratio > 0) {
+    EXPECT_TRUE(box.has_aspect_ratio());
+    EXPECT_NEAR(box.aspect_ratio(), aspect_ratio, kEqualityTolerance);
+  }
+
+  EXPECT_EQ(box.reacquisition(), reacquisition);
+
+  const auto& quad = box.quad();
+  EXPECT_EQ(8, quad.vertices_size())
+      << "quad has only " << box.quad().vertices_size() << " vertices";
+  EXPECT_THAT(quad.vertices(0),
+              FloatNear(kInitialBoxLeft - frame * translation_step_x_,
+                        kEqualityTolerance));
+  EXPECT_THAT(quad.vertices(1),
+              FloatNear(kInitialBoxTop - frame * translation_step_y_,
+                        kEqualityTolerance));
+  EXPECT_THAT(quad.vertices(3),
+              FloatNear(kInitialBoxBottom - frame * translation_step_y_,
+                        kEqualityTolerance));
+  EXPECT_THAT(quad.vertices(4),
+              FloatNear(kInitialBoxRight - frame * translation_step_x_,
+                        kEqualityTolerance));
+}
+
+void TrackingGraphTest::ExpectQuadNear(const TimedBoxProto& box1,
+                                       const TimedBoxProto& box2) {
+  EXPECT_TRUE(box1.has_quad());
+  EXPECT_TRUE(box2.has_quad());
+  EXPECT_EQ(8, box1.quad().vertices_size())
+      << "quad has only " << box1.quad().vertices_size() << " vertices";
+  EXPECT_EQ(8, box2.quad().vertices_size())
+      << "quad has only " << box2.quad().vertices_size() << " vertices";
+  for (int j = 0; j < box1.quad().vertices_size(); ++j) {
+    EXPECT_NEAR(box1.quad().vertices(j), box2.quad().vertices(j),
+                kEqualityTolerance);
+  }
+}
+
+std::unique_ptr<TimedBoxProtoList> TrackingGraphTest::MakeBoxList(
+    const Timestamp& timestamp, const std::vector<bool>& is_quad_tracking,
+    const std::vector<bool>& is_pnp_tracking,
+    const std::vector<bool>& reacquisition) const {
+  auto box_list = absl::make_unique<TimedBoxProtoList>();
+  int box_id = 0;
+  for (int j = 0; j < is_quad_tracking.size(); ++j) {
+    TimedBoxProto* box = box_list->add_box();
+    if (is_quad_tracking[j]) {
+      box->mutable_quad()->add_vertices(kInitialBoxLeft);
+      box->mutable_quad()->add_vertices(kInitialBoxTop);
+      box->mutable_quad()->add_vertices(kInitialBoxLeft);
+      box->mutable_quad()->add_vertices(kInitialBoxBottom);
+      box->mutable_quad()->add_vertices(kInitialBoxRight);
+      box->mutable_quad()->add_vertices(kInitialBoxBottom);
+      box->mutable_quad()->add_vertices(kInitialBoxRight);
+      box->mutable_quad()->add_vertices(kInitialBoxTop);
+
+      if (is_pnp_tracking[j]) {
+        box->set_aspect_ratio(kImageAspectRatio);
+      }
+    } else {
+      box->set_left(kInitialBoxLeft);
+      box->set_right(kInitialBoxRight);
+      box->set_top(kInitialBoxTop);
+      box->set_bottom(kInitialBoxBottom);
+    }
+    box->set_id(box_id++);
+    box->set_time_msec(timestamp.Value() / 1000);
+    box->set_reacquisition(reacquisition[j]);
+  }
+
+  return box_list;
+}
+
+void TrackingGraphTest::CreateInputFramesFromOriginalImage(
+    int num_images, int translation_step,
+    std::vector<Packet>* input_frames_packets) {
+  const int crop_width = original_image_.cols - num_images * translation_step;
+  const int crop_height = original_image_.rows - num_images * translation_step;
+  for (int i = 0; i < num_images; ++i) {
+    cv::Rect roi(i * translation_step, i * translation_step, crop_width,
+                 crop_height);
+    cv::Mat cropped_img = cv::Mat(original_image_, roi);
+    auto cropped_image_frame = absl::make_unique<ImageFrame>(
+        ImageFormat::SRGB, crop_width, crop_height, cropped_img.step[0],
+        cropped_img.data, ImageFrame::PixelDataDeleter::kNone);
+    Timestamp curr_timestamp = Timestamp(i * kFrameIntervalUs);
+    Packet image_packet =
+        Adopt(cropped_image_frame.release()).At(curr_timestamp);
+    input_frames_packets->push_back(image_packet);
+  }
+}
+
+void TrackingGraphTest::RunGraphWithSidePacketsAndInputs(
+    const std::map<std::string, mediapipe::Packet>& side_packets,
+    const mediapipe::Packet& start_pos_packet) {
+  // Start running the graph
+  MP_EXPECT_OK(graph_.StartRun(side_packets));
+
+  MP_EXPECT_OK(graph_.AddPacketToInputStream("start_pos", start_pos_packet));
+
+  for (auto frame_packet : input_frames_packets_) {
+    MP_EXPECT_OK(
+        graph_.AddPacketToInputStream("image_cpu_frames", frame_packet));
+    MP_EXPECT_OK(graph_.WaitUntilIdle());
+  }
+
+  MP_EXPECT_OK(graph_.CloseAllInputStreams());
+  MP_EXPECT_OK(graph_.WaitUntilDone());
+}
+
+std::unique_ptr<TimedBoxProtoList>
+TrackingGraphTest::CreateRandomAccessTrackingBoxList(
+    const std::vector<Timestamp>& start_timestamps,
+    const std::vector<Timestamp>& end_timestamps) const {
+  CHECK_EQ(start_timestamps.size(), end_timestamps.size());
+  auto ra_boxes = absl::make_unique<TimedBoxProtoList>();
+  for (int i = 0; i < start_timestamps.size(); ++i) {
+    auto start_box_list =
+        MakeBoxList(start_timestamps[i], std::vector<bool>{true},
+                    std::vector<bool>{true}, std::vector<bool>{false});
+    auto end_box_list =
+        MakeBoxList(end_timestamps[i], std::vector<bool>{true},
+                    std::vector<bool>{true}, std::vector<bool>{false});
+    *(ra_boxes->add_box()) = (*start_box_list).box(0);
+    *(ra_boxes->add_box()) = (*end_box_list).box(0);
+  }
+  return ra_boxes;
+}
+
+TEST_F(TrackingGraphTest, BasicBoxTrackingSanityCheck) {
+  // Create input side packets.
+  std::map<std::string, mediapipe::Packet> side_packets;
+  side_packets.insert(std::make_pair("analysis_downsample_factor",
+                                     mediapipe::MakePacket<float>(1.0f)));
+  side_packets.insert(std::make_pair(
+      "calculator_options",
+      mediapipe::MakePacket<CalculatorOptions>(CalculatorOptions())));
+
+  // Run the graph with input side packets, start_pos, and input image frames.
+  Timestamp start_box_time = input_frames_packets_[0].Timestamp();
+  // is_quad_tracking is used to indicate whether to track quad for each
+  // individual box.
+  std::vector<bool> is_quad_tracking{false};
+  // is_pnp_tracking is used to indicate whether to use perspective transform to
+  // track quad.
+  std::vector<bool> is_pnp_tracking{false};
+  // is_reacquisition is used to indicate whether to enable reacquisition for
+  // the box.
+  std::vector<bool> is_reacquisition{false};
+  auto start_box_list = MakeBoxList(start_box_time, is_quad_tracking,
+                                    is_pnp_tracking, is_reacquisition);
+  Packet start_pos_packet = Adopt(start_box_list.release()).At(start_box_time);
+  RunGraphWithSidePacketsAndInputs(side_packets, start_pos_packet);
+
+  EXPECT_EQ(input_frames_packets_.size(), output_packets_.size());
+
+  for (int i = 0; i < output_packets_.size(); ++i) {
+    const TimedBoxProtoList& boxes =
+        output_packets_[i].Get<TimedBoxProtoList>();
+    EXPECT_EQ(is_quad_tracking.size(), boxes.box_size());
+    ExpectBoxAtFrame(boxes.box(0), i, false);
+  }
+}
+
+TEST_F(TrackingGraphTest, BasicQuadTrackingSanityCheck) {
+  // Create input side packets.
+  std::map<std::string, mediapipe::Packet> side_packets;
+  side_packets.insert(std::make_pair("analysis_downsample_factor",
+                                     mediapipe::MakePacket<float>(1.0f)));
+  CalculatorOptions calculator_options;
+  calculator_options.MutableExtension(BoxTrackerCalculatorOptions::ext)
+      ->mutable_tracker_options()
+      ->mutable_track_step_options()
+      ->set_tracking_degrees(
+          TrackStepOptions::TRACKING_DEGREE_OBJECT_PERSPECTIVE);
+  side_packets.insert(std::make_pair(
+      "calculator_options",
+      mediapipe::MakePacket<CalculatorOptions>(calculator_options)));
+
+  Timestamp start_box_time = input_frames_packets_[0].Timestamp();
+  // Box id 0 use quad tracking with 8DoF homography transform.
+  // Box id 1 use quad tracking with 6DoF perspective transform.
+  // Box id 2 use box tracking with 4DoF similarity transform.
+  std::vector<bool> is_quad_tracking{true, true, false};
+  std::vector<bool> is_pnp_tracking{false, true, false};
+  std::vector<bool> is_reacquisition{true, false, true};
+  auto start_box_list = MakeBoxList(start_box_time, is_quad_tracking,
+                                    is_pnp_tracking, is_reacquisition);
+  Packet start_pos_packet = Adopt(start_box_list.release()).At(start_box_time);
+  RunGraphWithSidePacketsAndInputs(side_packets, start_pos_packet);
+
+  EXPECT_EQ(input_frames_packets_.size(), output_packets_.size());
+  for (int i = 0; i < output_packets_.size(); ++i) {
+    const TimedBoxProtoList& boxes =
+        output_packets_[i].Get<TimedBoxProtoList>();
+    EXPECT_EQ(is_quad_tracking.size(), boxes.box_size());
+    for (int j = 0; j < boxes.box_size(); ++j) {
+      const TimedBoxProto& box = boxes.box(j);
+      if (is_quad_tracking[box.id()]) {
+        ExpectQuadAtFrame(box, i,
+                          is_pnp_tracking[box.id()] ? kImageAspectRatio : -1.0f,
+                          is_reacquisition[box.id()]);
+      } else {
+        ExpectBoxAtFrame(box, i, is_reacquisition[box.id()]);
+      }
+    }
+  }
+}
+
+TEST_F(TrackingGraphTest, TestRandomAccessTrackingResults) {
+  // Create input side packets.
+  std::map<std::string, mediapipe::Packet> side_packets;
+  side_packets.insert(std::make_pair("analysis_downsample_factor",
+                                     mediapipe::MakePacket<float>(1.0f)));
+  CalculatorOptions calculator_options;
+  calculator_options.MutableExtension(BoxTrackerCalculatorOptions::ext)
+      ->mutable_tracker_options()
+      ->mutable_track_step_options()
+      ->set_tracking_degrees(
+          TrackStepOptions::TRACKING_DEGREE_OBJECT_PERSPECTIVE);
+  side_packets.insert(std::make_pair(
+      "calculator_options",
+      mediapipe::MakePacket<CalculatorOptions>(calculator_options)));
+
+  ASSERT_GT(input_frames_packets_.size(), 2);  // at least 3 frames
+  ASSERT_TRUE(input_frames_packets_[2].Timestamp() -
+                  input_frames_packets_[1].Timestamp() >
+              TimestampDiff(1000));
+
+  constexpr int start_frame = 0;
+  Timestamp start_box_time = input_frames_packets_[start_frame].Timestamp();
+  auto start_box_list =
+      MakeBoxList(start_box_time, std::vector<bool>{true},
+                  std::vector<bool>{true}, std::vector<bool>{false});
+  constexpr int end_frame = 2;
+  Timestamp end_box_time = input_frames_packets_[end_frame].Timestamp();
+
+  // Also test reverse random access tracking.
+  // This offset of 1ms is simulating the case where the start query timestamp
+  // to be not any existing frame timestamp. In reality, it's highly encouraged
+  // to have the start query timestamp be aligned with frame timestamp.
+  constexpr int reverse_start_frame = 1;
+  Timestamp reverse_start_box_time =
+      input_frames_packets_[reverse_start_frame].Timestamp() + 1000;
+
+  auto ra_boxes = CreateRandomAccessTrackingBoxList(
+      {start_box_time, reverse_start_box_time}, {end_box_time, start_box_time});
+
+  Packet ra_packet = Adopt(ra_boxes.release()).At(start_box_time);
+  Packet start_packet = Adopt(start_box_list.release()).At(start_box_time);
+
+  // Start running the ordinary graph, verify random access produce same result
+  // as normal tracking.
+  MP_EXPECT_OK(graph_.StartRun(side_packets));
+  MP_EXPECT_OK(graph_.AddPacketToInputStream("start_pos", start_packet));
+  for (auto frame_packet : input_frames_packets_) {
+    MP_EXPECT_OK(
+        graph_.AddPacketToInputStream("image_cpu_frames", frame_packet));
+    Packet track_time_packet = Adopt(new int(0)).At(frame_packet.Timestamp());
+    MP_EXPECT_OK(
+        graph_.AddPacketToInputStream("track_time", track_time_packet));
+    MP_EXPECT_OK(graph_.WaitUntilIdle());
+  }
+  MP_EXPECT_OK(graph_.AddPacketToInputStream("ra_track", ra_packet));
+  MP_EXPECT_OK(graph_.CloseAllInputStreams());
+  MP_EXPECT_OK(graph_.WaitUntilDone());
+
+  EXPECT_EQ(input_frames_packets_.size(), output_packets_.size());
+  const TimedBoxProtoList tracking_result =
+      output_packets_[end_frame].Get<TimedBoxProtoList>();
+  EXPECT_EQ(1, tracking_result.box_size());
+
+  // Should have 1 random access packet.
+  EXPECT_EQ(1, random_access_results_packets_.size());
+  const TimedBoxProtoList& ra_result =
+      random_access_results_packets_[0].Get<TimedBoxProtoList>();
+  // Two box tracking results. One for comparison with normal tracking. The
+  // other for reverse random access tracking.
+  EXPECT_EQ(2, ra_result.box_size());
+
+  // Check if randan access tracking has same result with normal tracking.
+  ExpectQuadNear(tracking_result.box(0), ra_result.box(0));
+  ExpectQuadAtFrame(ra_result.box(0), end_frame - start_frame,
+                    kImageAspectRatio, false);
+  ExpectQuadAtFrame(ra_result.box(1), start_frame - reverse_start_frame - 1,
+                    kImageAspectRatio, false);
+
+  // Clear output and ra result packet vector before test parallel graph.
+  TearDown();
+
+  // Start running the parallel graph, verify random access produce same result
+  // as normal tracking.
+  MP_EXPECT_OK(parallel_graph_.StartRun(side_packets));
+  MP_EXPECT_OK(
+      parallel_graph_.AddPacketToInputStream("start_pos", start_packet));
+  for (auto frame_packet : input_frames_packets_) {
+    MP_EXPECT_OK(parallel_graph_.AddPacketToInputStream("image_cpu_frames",
+                                                        frame_packet));
+    MP_EXPECT_OK(parallel_graph_.WaitUntilIdle());
+  }
+  MP_EXPECT_OK(parallel_graph_.AddPacketToInputStream("ra_track", ra_packet));
+  MP_EXPECT_OK(parallel_graph_.CloseAllInputStreams());
+  MP_EXPECT_OK(parallel_graph_.WaitUntilDone());
+
+  EXPECT_EQ(input_frames_packets_.size(), output_packets_.size());
+  const TimedBoxProtoList parallel_tracking_result =
+      output_packets_[end_frame].Get<TimedBoxProtoList>();
+  EXPECT_EQ(1, parallel_tracking_result.box_size());
+
+  // should have only 1 random access
+  EXPECT_EQ(1, random_access_results_packets_.size());
+  const TimedBoxProtoList& parallel_ra_result =
+      random_access_results_packets_[0].Get<TimedBoxProtoList>();
+  EXPECT_EQ(2, parallel_ra_result.box_size());
+
+  // Check if randan access tracking has same result with normal tracking.
+  ExpectQuadNear(parallel_tracking_result.box(0), parallel_ra_result.box(0));
+  ExpectQuadAtFrame(parallel_ra_result.box(0), end_frame - start_frame,
+                    kImageAspectRatio, false);
+  ExpectQuadAtFrame(parallel_ra_result.box(1),
+                    start_frame - reverse_start_frame - 1, kImageAspectRatio,
+                    false);
+}
+
+// Tests what happens when random access request timestamps are
+// outside of cache.
+TEST_F(TrackingGraphTest, TestRandomAccessTrackingTimestamps) {
+  // Create input side packets.
+  std::map<std::string, mediapipe::Packet> side_packets;
+  side_packets.insert(std::make_pair("analysis_downsample_factor",
+                                     mediapipe::MakePacket<float>(1.0f)));
+  CalculatorOptions calculator_options;
+  calculator_options.MutableExtension(BoxTrackerCalculatorOptions::ext)
+      ->mutable_tracker_options()
+      ->mutable_track_step_options()
+      ->set_tracking_degrees(
+          TrackStepOptions::TRACKING_DEGREE_OBJECT_PERSPECTIVE);
+  // We intentionally don't cache all frames, to see what happens when
+  // random access tracking request time falls outside cache range.
+  calculator_options.MutableExtension(BoxTrackerCalculatorOptions::ext)
+      ->set_streaming_track_data_cache_size(input_frames_packets_.size() - 1);
+  side_packets.insert(std::make_pair(
+      "calculator_options",
+      mediapipe::MakePacket<CalculatorOptions>(calculator_options)));
+
+  // Set up random access boxes
+  const int num_frames = input_frames_packets_.size();
+  const int64 usec_in_sec = 1000000;
+  std::vector<Timestamp> start_timestamps{
+      input_frames_packets_[0].Timestamp() - usec_in_sec,  // forward
+      input_frames_packets_[0].Timestamp(),                // forward
+      input_frames_packets_[1].Timestamp(),                // forward
+      input_frames_packets_[num_frames - 1].Timestamp() + usec_in_sec,  // fwd
+      input_frames_packets_[0].Timestamp(),               // backward
+      input_frames_packets_[num_frames - 1].Timestamp(),  // backward
+      input_frames_packets_[num_frames - 1].Timestamp(),  // backward
+      input_frames_packets_[num_frames - 1].Timestamp() + usec_in_sec  // back
+  };
+  std::vector<Timestamp> end_timestamps{
+      input_frames_packets_[num_frames - 1].Timestamp(),
+      input_frames_packets_[num_frames - 1].Timestamp(),
+      input_frames_packets_[num_frames - 1].Timestamp() + usec_in_sec,
+      input_frames_packets_[num_frames - 1].Timestamp() + 2 * usec_in_sec,
+      input_frames_packets_[0].Timestamp() - usec_in_sec,
+      input_frames_packets_[0].Timestamp(),
+      input_frames_packets_[0].Timestamp() - usec_in_sec,
+      input_frames_packets_[1].Timestamp()};
+  auto ra_boxes =
+      CreateRandomAccessTrackingBoxList(start_timestamps, end_timestamps);
+  Packet ra_packet =
+      Adopt(ra_boxes.release()).At(input_frames_packets_[0].Timestamp());
+
+  // Run the graph and check if the outside-cache request have no results.
+  // Start running the parallel graph, verify random access produce same result
+  // as normal tracking.
+  MP_EXPECT_OK(parallel_graph_.StartRun(side_packets));
+  for (auto frame_packet : input_frames_packets_) {
+    MP_EXPECT_OK(parallel_graph_.AddPacketToInputStream("image_cpu_frames",
+                                                        frame_packet));
+    MP_EXPECT_OK(parallel_graph_.WaitUntilIdle());
+  }
+  MP_EXPECT_OK(parallel_graph_.AddPacketToInputStream("ra_track", ra_packet));
+  MP_EXPECT_OK(parallel_graph_.CloseAllInputStreams());
+  MP_EXPECT_OK(parallel_graph_.WaitUntilDone());
+
+  // should have 1 random access packet with 0 result boxes
+  EXPECT_EQ(1, random_access_results_packets_.size());
+  const auto& ra_returned_boxes =
+      random_access_results_packets_[0].Get<TimedBoxProtoList>();
+  const int num_returned_ra_boxes = ra_returned_boxes.box_size();
+  EXPECT_EQ(0, num_returned_ra_boxes);
+}
+
+TEST_F(TrackingGraphTest, TestTransitionFramesForReacquisition) {
+  // Create input side packets.
+  std::map<std::string, mediapipe::Packet> side_packets;
+  side_packets.insert(std::make_pair("analysis_downsample_factor",
+                                     mediapipe::MakePacket<float>(1.0f)));
+  CalculatorOptions calculator_options;
+  calculator_options.MutableExtension(BoxTrackerCalculatorOptions::ext)
+      ->mutable_tracker_options()
+      ->mutable_track_step_options()
+      ->set_tracking_degrees(
+          TrackStepOptions::TRACKING_DEGREE_OBJECT_PERSPECTIVE);
+  constexpr int kTransitionFrames = 3;
+  calculator_options.MutableExtension(BoxTrackerCalculatorOptions::ext)
+      ->set_start_pos_transition_frames(kTransitionFrames);
+
+  side_packets.insert(std::make_pair(
+      "calculator_options",
+      mediapipe::MakePacket<CalculatorOptions>(calculator_options)));
+
+  Timestamp start_box_time = input_frames_packets_[0].Timestamp();
+  // Box id 0 use quad tracking with 8DoF homography transform.
+  // Box id 1 use quad tracking with 6DoF perspective transform.
+  // Box id 2 use box tracking with 4DoF similarity transform.
+  std::vector<bool> is_quad_tracking{true, true, false};
+  std::vector<bool> is_pnp_tracking{false, true, false};
+  std::vector<bool> is_reacquisition{true, true, true};
+  auto start_box_list = MakeBoxList(start_box_time, is_quad_tracking,
+                                    is_pnp_tracking, is_reacquisition);
+  Packet start_pos_packet = Adopt(start_box_list.release()).At(start_box_time);
+
+  // Setting box pos restart from initial position (frame 0's position).
+  constexpr int kRestartFrame = 3;
+  Timestamp restart_box_time = input_frames_packets_[kRestartFrame].Timestamp();
+  auto restart_box_list = MakeBoxList(restart_box_time, is_quad_tracking,
+                                      is_pnp_tracking, is_reacquisition);
+  Packet restart_pos_packet =
+      Adopt(restart_box_list.release()).At(restart_box_time);
+  MP_EXPECT_OK(graph_.StartRun(side_packets));
+  MP_EXPECT_OK(graph_.AddPacketToInputStream("start_pos", start_pos_packet));
+
+  for (int j = 0; j < input_frames_packets_.size(); ++j) {
+    // Add TRACK_TIME stream queries in between 2 frames.
+    if (j > 0) {
+      Timestamp track_time = Timestamp((j - 0.5f) * kFrameIntervalUs);
+      LOG(INFO) << track_time.Value();
+      Packet track_time_packet = Adopt(new Timestamp).At(track_time);
+      MP_EXPECT_OK(
+          graph_.AddPacketToInputStream("track_time", track_time_packet));
+    }
+
+    MP_EXPECT_OK(graph_.AddPacketToInputStream("image_cpu_frames",
+                                               input_frames_packets_[j]));
+    Packet track_time_packet =
+        Adopt(new int(0)).At(input_frames_packets_[j].Timestamp());
+    MP_EXPECT_OK(
+        graph_.AddPacketToInputStream("track_time", track_time_packet));
+    MP_EXPECT_OK(graph_.WaitUntilIdle());
+
+    if (j == kRestartFrame) {
+      MP_EXPECT_OK(
+          graph_.AddPacketToInputStream("restart_pos", restart_pos_packet));
+    }
+  }
+
+  MP_EXPECT_OK(graph_.CloseAllInputStreams());
+  MP_EXPECT_OK(graph_.WaitUntilDone());
+
+  EXPECT_EQ(input_frames_packets_.size() * 2 - 1, output_packets_.size());
+  for (int i = 0; i < output_packets_.size(); ++i) {
+    const TimedBoxProtoList& boxes =
+        output_packets_[i].Get<TimedBoxProtoList>();
+    EXPECT_EQ(is_quad_tracking.size(), boxes.box_size());
+    float frame_id = i / 2.0f;
+    float expected_frame_id;
+    if (frame_id <= kRestartFrame) {
+      // before transition
+      expected_frame_id = frame_id;
+    } else {
+      float transition_frames = frame_id - kRestartFrame;
+      if (transition_frames <= kTransitionFrames) {
+        // transitioning.
+        expected_frame_id =
+            kRestartFrame -
+            transition_frames / kTransitionFrames * kRestartFrame +
+            transition_frames;
+      } else {
+        // after transition.
+        expected_frame_id = transition_frames;
+      }
+    }
+
+    for (int j = 0; j < boxes.box_size(); ++j) {
+      const TimedBoxProto& box = boxes.box(j);
+      if (is_quad_tracking[box.id()]) {
+        ExpectQuadAtFrame(box, expected_frame_id,
+                          is_pnp_tracking[box.id()] ? kImageAspectRatio : -1.0f,
+                          is_reacquisition[box.id()]);
+      } else {
+        ExpectBoxAtFrame(box, expected_frame_id, is_reacquisition[box.id()]);
+      }
+    }
+  }
+}
+
+// TODO: Add test for reacquisition.
+
+}  // namespace
+}  // namespace mediapipe
--- a/mediapipe/docs/examples.md
+++ b/mediapipe/docs/examples.md
@ -39,6 +39,11 @@ pipeline.
 the same TFLite model in a CPU-based pipeline. This example highlights how
 graphs can be easily adapted to run on CPU v.s. GPU.

+### Object Detection and Tracking with GPU
+
+[Object Detection and Tracking with GPU](./object_tracking_mobile_gpu.md) illustrates how to
+use MediaPipe for object detection and tracking.
+
 ### Face Detection with GPU

 [Face Detection with GPU](./face_detection_mobile_gpu.md) illustrates how to use
--- a/mediapipe/docs/images/mobile/box_tracking_subgraph.png
+++ b/mediapipe/docs/images/mobile/box_tracking_subgraph.png
--- a/mediapipe/docs/images/mobile/object_detection_gpu_subgraph.png
+++ b/mediapipe/docs/images/mobile/object_detection_gpu_subgraph.png
--- a/mediapipe/docs/images/mobile/object_tracking_android_gpu.gif
+++ b/mediapipe/docs/images/mobile/object_tracking_android_gpu.gif
--- a/mediapipe/docs/images/mobile/object_tracking_android_gpu_small.gif
+++ b/mediapipe/docs/images/mobile/object_tracking_android_gpu_small.gif
--- a/mediapipe/docs/images/mobile/object_tracking_mobile_gpu.png
+++ b/mediapipe/docs/images/mobile/object_tracking_mobile_gpu.png
--- a/mediapipe/docs/images/mobile/object_tracking_renderer_gpu_subgraph.png
+++ b/mediapipe/docs/images/mobile/object_tracking_renderer_gpu_subgraph.png
--- a/mediapipe/docs/images/mobile/object_tracking_subgraph.png
+++ b/mediapipe/docs/images/mobile/object_tracking_subgraph.png
--- a/mediapipe/docs/install.md
+++ b/mediapipe/docs/install.md
@ -63,6 +63,7 @@ To build and run iOS apps:

    ```bash
    $ sudo apt-get install libopencv-core-dev libopencv-highgui-dev \
+                           libopencv-calib3d-dev libopencv-features2d-dev \
                           libopencv-imgproc-dev libopencv-video-dev
    ```

@ -411,6 +412,7 @@ To build and run iOS apps:

    ```bash
    username@DESKTOP-TMVLBJ1:~/mediapipe$ sudo apt-get install libopencv-core-dev libopencv-highgui-dev \
+                           libopencv-calib3d-dev libopencv-features2d-dev \
                           libopencv-imgproc-dev libopencv-video-dev
    ```

--- a/mediapipe/docs/object_tracking_mobile_gpu.md
+++ b/mediapipe/docs/object_tracking_mobile_gpu.md
@ -0,0 +1,475 @@
+# Object Detection and Tracking
+
+This doc focuses on the
+[below example graph](https://github.com/google/mediapipe/tree/master/mediapipe/graphs/tracking/object_detection_tracking_mobile_gpu.pbtxt)
+that performs object detection and tracking.
+Note that object detection is using TensorFlow Lite on GPU while tracking is using CPU.
+
+For overall context on hand detection and hand tracking, please read this
+[Google Developer Blog](https://mediapipe.page.link/objecttrackingblog).
+
+![object_tracking_android_gpu_gif](images/mobile/object_tracking_android_gpu.gif)
+
+## Android
+
+[Source](https://github.com/google/mediapipe/tree/master/mediapipe/examples/android/src/java/com/google/mediapipe/apps/objecttrackinggpu)
+
+To build and install the app:
+
+```bash
+bazel build -c opt --config=android_arm64 mediapipe/examples/android/src/java/com/google/mediapipe/apps/objecttrackinggpu
+```
+
+Once the app is built, install it on Android device with:
+
+```bash
+adb install bazel-bin/mediapipe/examples/android/src/java/com/google/mediapipe/apps/objecttrackinggpu/objecttrackinggpu.apk
+```
+
+## Graph
+The object detection and tracking [main graph](#main-graph) internally utilizes a
+[object detection subgraph](#object-detection-subgraph), a
+[object tracking subgraph](#object-tracking-subgraph) and a
+[renderer subgraph](#renderer-subgraph).
+
+
+The subgraphs show up in the main graph visualization as nodes colored in
+purple, and the subgraph itself can also be visualized just like a regular
+graph. For more information on how to visualize a graph that includes subgraphs,
+see the Visualizing Subgraphs section in the
+[visualizer documentation](./visualizer.md).
+
+### Main Graph
+
+
+![object_detection_mobile_gpu_graph](images/mobile/object_tracking_mobile_gpu.png)
+
+[Source pbtxt file](https://github.com/google/mediapipe/tree/master/mediapipe/graphs/object_detection/object_detection_mobile_gpu.pbtxt)
+
+```bash
+# MediaPipe graph that performs object detection and tracking.
+# Used in the examples in
+# mediapipie/examples/android/src/java/com/mediapipe/apps/objecttrackinggpu
+
+# Images on GPU coming into and out of the graph.
+input_stream: "input_video"
+output_stream: "output_video"
+
+# Resamples the images by specific frame rate. This calculator is used to
+# control the frequecy of subsequent calculators/subgraphs, e.g. less power
+# consumption for expensive process.
+node {
+  calculator: "PacketResamplerCalculator"
+  input_stream: "DATA:input_video"
+  output_stream: "DATA:throttled_input_video"
+  node_options: {
+    [type.googleapis.com/mediapipe.PacketResamplerCalculatorOptions] {
+      frame_rate: 0.5
+    }
+  }
+}
+
+# Subgraph that detections objects (see object_detection_gpu.pbtxt).
+node {
+  calculator: "ObjectDetectionSubgraph"
+  input_stream: "IMAGE:throttled_input_video"
+  output_stream: "DETECTIONS:output_detections"
+}
+
+# Subgraph that tracks objects (see object_tracking.pbtxt).
+node {
+  calculator: "ObjectTrackingSubgraph"
+  input_stream: "VIDEO:input_video"
+  input_stream: "DETECTIONS:output_detections"
+  output_stream: "DETECTIONS:tracked_detections"
+}
+
+# Subgraph that renders annotations and overlays them on top of the input
+# images (see renderer_gpu.pbtxt).
+node {
+  calculator: "RendererSubgraph"
+  input_stream: "IMAGE:input_video"
+  input_stream: "DETECTIONS:tracked_detections"
+  output_stream: "IMAGE:output_video"
+}
+```
+
+### Object Detection Subgraph
+
+![object_detection_gpu_subgraph](images/mobile/object_detection_gpu_subgraph.png)
+
+[Source pbtxt file](https://github.com/google/mediapipe/tree/master/mediapipe/graphs/tracking/subgraphs/object_detection_gpu.pbtxt)
+
+```bash
+# MediaPipe object detection subgraph.
+
+type: "ObjectDetectionSubgraph"
+
+input_stream: "IMAGE:input_video"
+output_stream: "DETECTIONS:output_detections"
+
+# Transforms the input image on GPU to a 320x320 image. To scale the image, by
+# default it uses the STRETCH scale mode that maps the entire input image to the
+# entire transformed image. As a result, image aspect ratio may be changed and
+# objects in the image may be deformed (stretched or squeezed), but the object
+# detection model used in this graph is agnostic to that deformation.
+node: {
+  calculator: "ImageTransformationCalculator"
+  input_stream: "IMAGE_GPU:input_video"
+  output_stream: "IMAGE_GPU:transformed_input_video"
+  node_options: {
+    [type.googleapis.com/mediapipe.ImageTransformationCalculatorOptions] {
+      output_width: 320
+      output_height: 320
+    }
+  }
+}
+
+# Converts the transformed input image on GPU into an image tensor stored as a
+# TfLiteTensor.
+node {
+  calculator: "TfLiteConverterCalculator"
+  input_stream: "IMAGE_GPU:transformed_input_video"
+  output_stream: "TENSORS_GPU:image_tensor"
+}
+
+# Runs a TensorFlow Lite model on GPU that takes an image tensor and outputs a
+# vector of tensors representing, for instance, detection boxes/keypoints and
+# scores.
+node {
+  calculator: "TfLiteInferenceCalculator"
+  input_stream: "TENSORS_GPU:image_tensor"
+  output_stream: "TENSORS_GPU:detection_tensors"
+  node_options: {
+    [type.googleapis.com/mediapipe.TfLiteInferenceCalculatorOptions] {
+      model_path: "mediapipe/models/ssdlite_object_detection.tflite"
+    }
+  }
+}
+
+# Generates a single side packet containing a vector of SSD anchors based on
+# the specification in the options.
+node {
+  calculator: "SsdAnchorsCalculator"
+  output_side_packet: "anchors"
+  node_options: {
+    [type.googleapis.com/mediapipe.SsdAnchorsCalculatorOptions] {
+      num_layers: 6
+      min_scale: 0.2
+      max_scale: 0.95
+      input_size_height: 320
+      input_size_width: 320
+      anchor_offset_x: 0.5
+      anchor_offset_y: 0.5
+      strides: 16
+      strides: 32
+      strides: 64
+      strides: 128
+      strides: 256
+      strides: 512
+      aspect_ratios: 1.0
+      aspect_ratios: 2.0
+      aspect_ratios: 0.5
+      aspect_ratios: 3.0
+      aspect_ratios: 0.3333
+      reduce_boxes_in_lowest_layer: true
+    }
+  }
+}
+
+# Decodes the detection tensors generated by the TensorFlow Lite model, based on
+# the SSD anchors and the specification in the options, into a vector of
+# detections. Each detection describes a detected object.
+node {
+  calculator: "TfLiteTensorsToDetectionsCalculator"
+  input_stream: "TENSORS_GPU:detection_tensors"
+  input_side_packet: "ANCHORS:anchors"
+  output_stream: "DETECTIONS:detections"
+  node_options: {
+    [type.googleapis.com/mediapipe.TfLiteTensorsToDetectionsCalculatorOptions] {
+      num_classes: 91
+      num_boxes: 2034
+      num_coords: 4
+      ignore_classes: 0
+      sigmoid_score: true
+      apply_exponential_on_box_size: true
+      x_scale: 10.0
+      y_scale: 10.0
+      h_scale: 5.0
+      w_scale: 5.0
+      min_score_thresh: 0.6
+    }
+  }
+}
+
+# Performs non-max suppression to remove excessive detections.
+node {
+  calculator: "NonMaxSuppressionCalculator"
+  input_stream: "detections"
+  output_stream: "filtered_detections"
+  node_options: {
+    [type.googleapis.com/mediapipe.NonMaxSuppressionCalculatorOptions] {
+      min_suppression_threshold: 0.4
+      max_num_detections: 3
+      overlap_type: INTERSECTION_OVER_UNION
+      return_empty_detections: true
+    }
+  }
+}
+
+# Maps detection label IDs to the corresponding label text. The label map is
+# provided in the label_map_path option.
+node {
+  calculator: "DetectionLabelIdToTextCalculator"
+  input_stream: "filtered_detections"
+  output_stream: "output_detections"
+  node_options: {
+    [type.googleapis.com/mediapipe.DetectionLabelIdToTextCalculatorOptions] {
+      label_map_path: "mediapipe/models/ssdlite_object_detection_labelmap.txt"
+    }
+  }
+}
+```
+
+### Object Tracking Subgraph
+
+Object tracking subgraph uses a box tracking subgraph which is a generic
+tracking library that can be used for other use cases.
+
+![object_tracking_subgraph](images/mobile/object_tracking_subgraph.png)
+
+[Source pbtxt file](https://github.com/google/mediapipe/tree/master/mediapipe/graphs/tracking/subgraphs/object_tracking.pbtxt)
+
+```bash
+# MediaPipe object tracking subgraph.
+
+type: "ObjectTrackingSubgraph"
+
+input_stream: "VIDEO:input_video"
+input_stream: "DETECTIONS:new_detections"
+output_stream: "DETECTIONS:tracked_detections"
+
+# Assigns an unique id for each new detection.
+node {
+  calculator: "DetectionUniqueIdCalculator"
+  input_stream: "DETECTIONS:new_detections"
+  output_stream: "DETECTIONS:detections_with_id"
+}
+
+# Converts detections to TimedBox protos which are used as initial location
+# for tracking.
+node {
+  calculator: "DetectionsToTimedBoxListCalculator"
+  input_stream: "DETECTIONS:detections_with_id"
+  output_stream: "BOXES:start_pos"
+}
+
+# Subgraph that tracks boxes (see box_tracking.pbtxt).
+node {
+  calculator: "BoxTrackingSubgraph"
+  input_stream: "VIDEO:input_video"
+  input_stream: "BOXES:start_pos"
+  input_stream: "CANCEL_ID:cancel_object_id"
+  output_stream: "BOXES:boxes"
+}
+
+# Managers new detected objects and objects that are being tracked.
+# It associates the duplicated detections and updates the locations of
+# detections from tracking.
+node: {
+  calculator: "TrackedDetectionManagerCalculator"
+  input_stream: "DETECTIONS:detections_with_id"
+  input_stream: "TRACKING_BOXES:boxes"
+  output_stream: "DETECTIONS:tracked_detections"
+  output_stream: "CANCEL_OBJECT_ID:cancel_object_id"
+
+  input_stream_handler {
+    input_stream_handler: "SyncSetInputStreamHandler"
+    options {
+      [mediapipe.SyncSetInputStreamHandlerOptions.ext] {
+        sync_set {
+          tag_index: "TRACKING_BOXES"
+        }
+        sync_set {
+          tag_index: "DETECTIONS"
+        }
+      }
+    }
+  }
+}
+
+```
+
+### Box Tracking Subgraph
+
+![box_tracking_subgraph](images/mobile/box_tracking_subgraph.png)
+
+[Source pbtxt file](https://github.com/google/mediapipe/tree/master/mediapipe/graphs/tracking/subgraphs/box_tracking.pbtxt)
+
+```bash
+# MediaPipe box tracking subgraph.
+
+type: "BoxTrackingSubgraph"
+
+input_stream: "VIDEO:input_video"
+input_stream: "BOXES:start_pos"
+input_stream: "CANCEL_ID:cancel_object_id"
+output_stream: "BOXES:boxes"
+
+node: {
+  calculator: "ImageTransformationCalculator"
+  input_stream: "IMAGE_GPU:input_video"
+  output_stream: "IMAGE_GPU:downscaled_input_video"
+  node_options: {
+    [type.googleapis.com/mediapipe.ImageTransformationCalculatorOptions] {
+      output_width: 240
+      output_height: 320
+    }
+  }
+}
+
+# Converts GPU buffer to ImageFrame for processing tracking.
+node: {
+  calculator: "GpuBufferToImageFrameCalculator"
+  input_stream: "downscaled_input_video"
+  output_stream: "downscaled_input_video_cpu"
+}
+
+# Performs motion analysis on an incoming video stream.
+node: {
+  calculator: "MotionAnalysisCalculator"
+  input_stream: "VIDEO:downscaled_input_video_cpu"
+  output_stream: "CAMERA:camera_motion"
+  output_stream: "FLOW:region_flow"
+
+  node_options: {
+    [type.googleapis.com/mediapipe.MotionAnalysisCalculatorOptions]: {
+      analysis_options {
+        analysis_policy: ANALYSIS_POLICY_CAMERA_MOBILE
+        flow_options {
+          fast_estimation_min_block_size: 100
+          top_inlier_sets: 1
+          frac_inlier_error_threshold: 3e-3
+          downsample_mode: DOWNSAMPLE_TO_INPUT_SIZE
+          verification_distance: 5.0
+          verify_long_feature_acceleration: true
+          verify_long_feature_trigger_ratio: 0.1
+          tracking_options {
+            max_features: 500
+            adaptive_extraction_levels: 2
+            min_eig_val_settings {
+              adaptive_lowest_quality_level: 2e-4
+            }
+            klt_tracker_implementation: KLT_OPENCV
+          }
+        }
+      }
+    }
+  }
+}
+
+# Reads optical flow fields defined in
+# mediapipe/framework/formats/motion/optical_flow_field.h,
+# returns a VideoFrame with 2 channels (v_x and v_y), each channel is quantized
+# to 0-255.
+node: {
+  calculator: "FlowPackagerCalculator"
+  input_stream: "FLOW:region_flow"
+  input_stream: "CAMERA:camera_motion"
+  output_stream: "TRACKING:tracking_data"
+
+  node_options: {
+    [type.googleapis.com/mediapipe.FlowPackagerCalculatorOptions]: {
+      flow_packager_options: {
+        binary_tracking_data_support: false
+      }
+    }
+  }
+}
+
+# Tracks box positions over time.
+node: {
+  calculator: "BoxTrackerCalculator"
+  input_stream: "TRACKING:tracking_data"
+  input_stream: "TRACK_TIME:input_video"
+  input_stream: "START_POS:start_pos"
+  input_stream: "CANCEL_OBJECT_ID:cancel_object_id"
+  input_stream_info: {
+    tag_index: "CANCEL_OBJECT_ID"
+    back_edge: true
+  }
+  output_stream: "BOXES:boxes"
+
+  input_stream_handler {
+    input_stream_handler: "SyncSetInputStreamHandler"
+    options {
+      [mediapipe.SyncSetInputStreamHandlerOptions.ext] {
+        sync_set {
+          tag_index: "TRACKING"
+          tag_index: "TRACK_TIME"
+        }
+        sync_set {
+          tag_index: "START_POS"
+        }
+        sync_set {
+          tag_index: "CANCEL_OBJECT_ID"
+        }
+      }
+    }
+  }
+
+  node_options: {
+    [type.googleapis.com/mediapipe.BoxTrackerCalculatorOptions]: {
+      tracker_options: {
+        track_step_options {
+          track_object_and_camera: true
+          tracking_degrees: TRACKING_DEGREE_OBJECT_SCALE
+          inlier_spring_force: 0.0
+          static_motion_temporal_ratio: 3e-2
+        }
+      }
+      visualize_tracking_data: false
+      streaming_track_data_cache_size: 100
+    }
+  }
+}
+```
+
+### Renderer Subgraph
+
+![hand_renderer_gpu_subgraph.pbtxt](images/mobile/object_tracking_renderer_gpu_subgraph.png)
+
+[Source pbtxt file](https://github.com/google/mediapipe/tree/master/mediapipe/graphs/tracking/subgraphs/renderer_gpu.pbtxt)
+
+```bash
+# MediaPipe object tracking rendering subgraph.
+
+type: "RendererSubgraph"
+
+input_stream: "IMAGE:input_image"
+input_stream: "DETECTIONS:detections"
+output_stream: "IMAGE:output_image"
+
+# Converts the detections to drawing primitives for annotation overlay.
+node {
+  calculator: "DetectionsToRenderDataCalculator"
+  input_stream: "DETECTIONS:detections"
+  output_stream: "RENDER_DATA:detections_render_data"
+  node_options: {
+    [type.googleapis.com/mediapipe.DetectionsToRenderDataCalculatorOptions] {
+      thickness: 4.0
+      color { r: 255 g: 0 b: 0 }
+      render_detection_id: true
+    }
+  }
+}
+
+# Draws annotations and overlays them on top of the input images.
+node {
+  calculator: "AnnotationOverlayCalculator"
+  input_stream: "INPUT_FRAME_GPU:input_image"
+  input_stream: "detections_render_data"
+  output_stream: "OUTPUT_FRAME_GPU:output_image"
+}
+
+```
--- a/mediapipe/examples/android/src/java/com/google/mediapipe/apps/objecttrackinggpu/AndroidManifest.xml
+++ b/mediapipe/examples/android/src/java/com/google/mediapipe/apps/objecttrackinggpu/AndroidManifest.xml
@ -0,0 +1,33 @@
+<?xml version="1.0" encoding="utf-8"?>
+<manifest xmlns:android="http://schemas.android.com/apk/res/android"
+    package="com.google.mediapipe.apps.objecttrackinggpu">
+
+  <uses-sdk
+      android:minSdkVersion="21"
+      android:targetSdkVersion="27" />
+
+  <!-- For using the camera -->
+  <uses-permission android:name="android.permission.CAMERA" />
+  <uses-feature android:name="android.hardware.camera" />
+  <uses-feature android:name="android.hardware.camera.autofocus" />
+  <!-- For MediaPipe -->
+  <uses-feature android:glEsVersion="0x00020000" android:required="true" />
+
+
+  <application
+      android:allowBackup="true"
+      android:label="@string/app_name"
+      android:supportsRtl="true"
+      android:theme="@style/AppTheme">
+      <activity
+          android:name=".MainActivity"
+          android:exported="true"
+          android:screenOrientation="portrait">
+          <intent-filter>
+              <action android:name="android.intent.action.MAIN" />
+              <category android:name="android.intent.category.LAUNCHER" />
+          </intent-filter>
+      </activity>
+  </application>
+
+</manifest>
--- a/mediapipe/examples/android/src/java/com/google/mediapipe/apps/objecttrackinggpu/BUILD
+++ b/mediapipe/examples/android/src/java/com/google/mediapipe/apps/objecttrackinggpu/BUILD
@ -0,0 +1,81 @@
+# Copyright 2019 The MediaPipe Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+licenses(["notice"])  # Apache 2.0
+
+package(default_visibility = ["//visibility:private"])
+
+cc_binary(
+    name = "libmediapipe_jni.so",
+    linkshared = 1,
+    linkstatic = 1,
+    deps = [
+        "//mediapipe/graphs/tracking:mobile_calculators",
+        "//mediapipe/java/com/google/mediapipe/framework/jni:mediapipe_framework_jni",
+    ],
+)
+
+cc_library(
+    name = "mediapipe_jni_lib",
+    srcs = [":libmediapipe_jni.so"],
+    alwayslink = 1,
+)
+
+# Maps the binary graph to an alias (e.g., the app name) for convenience so that the alias can be
+# easily incorporated into the app via, for example,
+# MainActivity.BINARY_GRAPH_NAME = "appname.binarypb".
+genrule(
+    name = "binary_graph",
+    srcs = ["//mediapipe/graphs/tracking:mobile_gpu_binary_graph"],
+    outs = ["objecttrackinggpu.binarypb"],
+    cmd = "cp $< $@",
+)
+
+android_library(
+    name = "mediapipe_lib",
+    srcs = glob(["*.java"]),
+    assets = [
+        ":binary_graph",
+        "//mediapipe/models:ssdlite_object_detection.tflite",
+        "//mediapipe/models:ssdlite_object_detection_labelmap.txt",
+    ],
+    assets_dir = "",
+    manifest = "AndroidManifest.xml",
+    resource_files = glob(["res/**"]),
+    deps = [
+        ":mediapipe_jni_lib",
+        "//mediapipe/java/com/google/mediapipe/components:android_camerax_helper",
+        "//mediapipe/java/com/google/mediapipe/components:android_components",
+        "//mediapipe/java/com/google/mediapipe/framework:android_framework",
+        "//mediapipe/java/com/google/mediapipe/glutil",
+        "//third_party:androidx_appcompat",
+        "//third_party:androidx_constraint_layout",
+        "//third_party:androidx_legacy_support_v4",
+        "//third_party:androidx_recyclerview",
+        "//third_party:opencv",
+        "@androidx_concurrent_futures//jar",
+        "@androidx_lifecycle//jar",
+        "@com_google_guava_android//jar",
+    ],
+)
+
+android_binary(
+    name = "objecttrackinggpu",
+    manifest = "AndroidManifest.xml",
+    manifest_values = {"applicationId": "com.google.mediapipe.apps.objecttrackinggpu"},
+    multidex = "native",
+    deps = [
+        ":mediapipe_lib",
+    ],
+)
--- a/mediapipe/examples/android/src/java/com/google/mediapipe/apps/objecttrackinggpu/MainActivity.java
+++ b/mediapipe/examples/android/src/java/com/google/mediapipe/apps/objecttrackinggpu/MainActivity.java
@ -0,0 +1,167 @@
+// Copyright 2019 The MediaPipe Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package com.google.mediapipe.apps.objecttrackinggpu;
+
+import android.graphics.SurfaceTexture;
+import android.os.Bundle;
+import androidx.appcompat.app.AppCompatActivity;
+import android.util.Size;
+import android.view.SurfaceHolder;
+import android.view.SurfaceView;
+import android.view.View;
+import android.view.ViewGroup;
+import com.google.mediapipe.components.CameraHelper;
+import com.google.mediapipe.components.CameraXPreviewHelper;
+import com.google.mediapipe.components.ExternalTextureConverter;
+import com.google.mediapipe.components.FrameProcessor;
+import com.google.mediapipe.components.PermissionHelper;
+import com.google.mediapipe.framework.AndroidAssetUtil;
+import com.google.mediapipe.glutil.EglManager;
+
+/** Main activity of MediaPipe example apps. */
+public class MainActivity extends AppCompatActivity {
+  private static final String TAG = "MainActivity";
+
+  private static final String BINARY_GRAPH_NAME = "objecttrackinggpu.binarypb";
+  private static final String INPUT_VIDEO_STREAM_NAME = "input_video";
+  private static final String OUTPUT_VIDEO_STREAM_NAME = "output_video";
+  private static final CameraHelper.CameraFacing CAMERA_FACING = CameraHelper.CameraFacing.BACK;
+
+  // Flips the camera-preview frames vertically before sending them into FrameProcessor to be
+  // processed in a MediaPipe graph, and flips the processed frames back when they are displayed.
+  // This is needed because OpenGL represents images assuming the image origin is at the bottom-left
+  // corner, whereas MediaPipe in general assumes the image origin is at top-left.
+  private static final boolean FLIP_FRAMES_VERTICALLY = true;
+
+  static {
+    // Load all native libraries needed by the app.
+    System.loadLibrary("mediapipe_jni");
+    System.loadLibrary("opencv_java3");
+  }
+
+  // {@link SurfaceTexture} where the camera-preview frames can be accessed.
+  private SurfaceTexture previewFrameTexture;
+  // {@link SurfaceView} that displays the camera-preview frames processed by a MediaPipe graph.
+  private SurfaceView previewDisplayView;
+
+  // Creates and manages an {@link EGLContext}.
+  private EglManager eglManager;
+  // Sends camera-preview frames into a MediaPipe graph for processing, and displays the processed
+  // frames onto a {@link Surface}.
+  private FrameProcessor processor;
+  // Converts the GL_TEXTURE_EXTERNAL_OES texture from Android camera into a regular texture to be
+  // consumed by {@link FrameProcessor} and the underlying MediaPipe graph.
+  private ExternalTextureConverter converter;
+
+  // Handles camera access via the {@link CameraX} Jetpack support library.
+  private CameraXPreviewHelper cameraHelper;
+
+  @Override
+  protected void onCreate(Bundle savedInstanceState) {
+    super.onCreate(savedInstanceState);
+    setContentView(R.layout.activity_main);
+
+    previewDisplayView = new SurfaceView(this);
+    setupPreviewDisplayView();
+
+    // Initialize asset manager so that MediaPipe native libraries can access the app assets, e.g.,
+    // binary graphs.
+    AndroidAssetUtil.initializeNativeAssetManager(this);
+
+    eglManager = new EglManager(null);
+    processor =
+        new FrameProcessor(
+            this,
+            eglManager.getNativeContext(),
+            BINARY_GRAPH_NAME,
+            INPUT_VIDEO_STREAM_NAME,
+            OUTPUT_VIDEO_STREAM_NAME);
+    processor.getVideoSurfaceOutput().setFlipY(FLIP_FRAMES_VERTICALLY);
+
+    PermissionHelper.checkAndRequestCameraPermissions(this);
+  }
+
+  @Override
+  protected void onResume() {
+    super.onResume();
+    converter = new ExternalTextureConverter(eglManager.getContext());
+    converter.setFlipY(FLIP_FRAMES_VERTICALLY);
+    converter.setConsumer(processor);
+    if (PermissionHelper.cameraPermissionsGranted(this)) {
+      startCamera();
+    }
+  }
+
+  @Override
+  protected void onPause() {
+    super.onPause();
+    converter.close();
+  }
+
+  @Override
+  public void onRequestPermissionsResult(
+      int requestCode, String[] permissions, int[] grantResults) {
+    super.onRequestPermissionsResult(requestCode, permissions, grantResults);
+    PermissionHelper.onRequestPermissionsResult(requestCode, permissions, grantResults);
+  }
+
+  private void setupPreviewDisplayView() {
+    previewDisplayView.setVisibility(View.GONE);
+    ViewGroup viewGroup = findViewById(R.id.preview_display_layout);
+    viewGroup.addView(previewDisplayView);
+
+    previewDisplayView
+        .getHolder()
+        .addCallback(
+            new SurfaceHolder.Callback() {
+              @Override
+              public void surfaceCreated(SurfaceHolder holder) {
+                processor.getVideoSurfaceOutput().setSurface(holder.getSurface());
+              }
+
+              @Override
+              public void surfaceChanged(SurfaceHolder holder, int format, int width, int height) {
+                // (Re-)Compute the ideal size of the camera-preview display (the area that the
+                // camera-preview frames get rendered onto, potentially with scaling and rotation)
+                // based on the size of the SurfaceView that contains the display.
+                Size viewSize = new Size(width, height);
+                Size displaySize = cameraHelper.computeDisplaySizeFromViewSize(viewSize);
+
+                // Connect the converter to the camera-preview frames as its input (via
+                // previewFrameTexture), and configure the output width and height as the computed
+                // display size.
+                converter.setSurfaceTextureAndAttachToGLContext(
+                    previewFrameTexture, displaySize.getWidth(), displaySize.getHeight());
+              }
+
+              @Override
+              public void surfaceDestroyed(SurfaceHolder holder) {
+                processor.getVideoSurfaceOutput().setSurface(null);
+              }
+            });
+  }
+
+  private void startCamera() {
+    cameraHelper = new CameraXPreviewHelper();
+    cameraHelper.setOnCameraStartedListener(
+        surfaceTexture -> {
+          previewFrameTexture = surfaceTexture;
+          // Make the display view visible to start showing the preview. This triggers the
+          // SurfaceHolder.Callback added to (the holder of) previewDisplayView.
+          previewDisplayView.setVisibility(View.VISIBLE);
+        });
+    cameraHelper.startCamera(this, CAMERA_FACING, /*surfaceTexture=*/ null);
+  }
+}
--- a/mediapipe/examples/android/src/java/com/google/mediapipe/apps/objecttrackinggpu/res/layout/activity_main.xml
+++ b/mediapipe/examples/android/src/java/com/google/mediapipe/apps/objecttrackinggpu/res/layout/activity_main.xml
@ -0,0 +1,20 @@
+<?xml version="1.0" encoding="utf-8"?>
+<androidx.constraintlayout.widget.ConstraintLayout xmlns:android="http://schemas.android.com/apk/res/android"
+    xmlns:app="http://schemas.android.com/apk/res-auto"
+    xmlns:tools="http://schemas.android.com/tools"
+    android:layout_width="match_parent"
+    android:layout_height="match_parent">
+
+    <FrameLayout
+        android:id="@+id/preview_display_layout"
+        android:layout_width="fill_parent"
+        android:layout_height="fill_parent"
+        android:layout_weight="1">
+        <TextView
+            android:id="@+id/no_camera_access_view"
+            android:layout_height="fill_parent"
+            android:layout_width="fill_parent"
+            android:gravity="center"
+            android:text="@string/no_camera_access" />
+    </FrameLayout>
+</androidx.constraintlayout.widget.ConstraintLayout>
--- a/mediapipe/examples/android/src/java/com/google/mediapipe/apps/objecttrackinggpu/res/values/colors.xml
+++ b/mediapipe/examples/android/src/java/com/google/mediapipe/apps/objecttrackinggpu/res/values/colors.xml
@ -0,0 +1,6 @@
+<?xml version="1.0" encoding="utf-8"?>
+<resources>
+    <color name="colorPrimary">#008577</color>
+    <color name="colorPrimaryDark">#00574B</color>
+    <color name="colorAccent">#D81B60</color>
+</resources>
--- a/mediapipe/examples/android/src/java/com/google/mediapipe/apps/objecttrackinggpu/res/values/strings.xml
+++ b/mediapipe/examples/android/src/java/com/google/mediapipe/apps/objecttrackinggpu/res/values/strings.xml
@ -0,0 +1,4 @@
+<resources>
+    <string name="app_name" translatable="false">Object Tracking GPU</string>
+    <string name="no_camera_access" translatable="false">Please grant camera permissions.</string>
+</resources>
--- a/mediapipe/examples/android/src/java/com/google/mediapipe/apps/objecttrackinggpu/res/values/styles.xml
+++ b/mediapipe/examples/android/src/java/com/google/mediapipe/apps/objecttrackinggpu/res/values/styles.xml
@ -0,0 +1,11 @@
+<resources>
+
+    <!-- Base application theme. -->
+    <style name="AppTheme" parent="Theme.AppCompat.Light.DarkActionBar">
+        <!-- Customize your theme here. -->
+        <item name="colorPrimary">@color/colorPrimary</item>
+        <item name="colorPrimaryDark">@color/colorPrimaryDark</item>
+        <item name="colorAccent">@color/colorAccent</item>
+    </style>
+
+</resources>
--- a/mediapipe/examples/desktop/media_sequence/kinetics_dataset.py
+++ b/mediapipe/examples/desktop/media_sequence/kinetics_dataset.py
@ -93,15 +93,15 @@ FILEPATTERN = "kinetics_700_%s_25fps_rgb_flow"
 SPLITS = {
    "train": {
        "shards": 1000,
-        "examples": 541490
+        "examples": 541279
    },
    "validate": {
        "shards": 100,
-        "examples": 34715
+        "examples": 34688
    },
    "test": {
        "shards": 100,
-        "examples": 69321
+        "examples": 69278
    },
    "custom": {
        "csv": None,  # Add a CSV for your own data here.
--- a/mediapipe/framework/calculator_graph_event_loop_test.cc
+++ b/mediapipe/framework/calculator_graph_event_loop_test.cc
@ -541,5 +541,90 @@ TEST_F(CalculatorGraphEventLoopTest, WaitToAddPacketToInputStream) {
  ASSERT_EQ(kNumInputPackets, output_packets_.size());
 }

+// Captures log messages during testing.
+class TextMessageLogSink : public LogSink {
+ public:
+  std::vector<std::string> messages;
+  void Send(const LogEntry& entry) {
+    messages.push_back(std::string(entry.text_message()));
+  }
+};
+
+// Verifies that CalculatorGraph::UnthrottleSources does not run repeatedly
+// in a "busy-loop" while the graph is throttled due to a graph-output stream.
+TEST_F(CalculatorGraphEventLoopTest, UnthrottleSources) {
+  CalculatorGraphConfig graph_config;
+  ASSERT_TRUE(proto_ns::TextFormat::ParseFromString(
+      R"(
+          node {
+            calculator: "PassThroughCalculator"
+            input_stream: "input_numbers"
+            output_stream: "output_numbers"
+          }
+          input_stream: "input_numbers"
+          output_stream: "output_numbers"
+          num_threads: 2
+          max_queue_size: 5
+      )",
+      &graph_config));
+  constexpr int kQueueSize = 5;
+
+  // Initialize and start the mediapipe graph.
+  CalculatorGraph graph;
+  MP_ASSERT_OK(graph.Initialize(graph_config));
+  graph.SetGraphInputStreamAddMode(
+      CalculatorGraph::GraphInputStreamAddMode::ADD_IF_NOT_FULL);
+  auto poller_status = graph.AddOutputStreamPoller("output_numbers");
+  MP_ASSERT_OK(poller_status.status());
+  mediapipe::OutputStreamPoller& poller = poller_status.ValueOrDie();
+  poller.SetMaxQueueSize(kQueueSize);
+  MP_ASSERT_OK(graph.StartRun({}));
+
+  // Lambda that adds a packet to the calculator graph.
+  auto add_packet = [&graph](std::string s, int i) {
+    return graph.AddPacketToInputStream(s, MakePacket<int>(i).At(Timestamp(i)));
+  };
+
+  // Start capturing VLOG messages from the mediapipe::Scheduler.
+  TextMessageLogSink log_listener;
+  mediapipe::AddLogSink(&log_listener);
+  SetVLOGLevel("scheduler", 3);
+
+  // Add just enough packets to fill the output stream queue.
+  std::vector<Packet> out_packets;
+  for (int i = 0; i < kQueueSize; ++i) {
+    MP_EXPECT_OK(add_packet("input_numbers", i));
+    MP_EXPECT_OK(graph.WaitUntilIdle());
+  }
+
+  // The graph is throttled due to the full output stream.
+  EXPECT_FALSE(add_packet("input_numbers", kQueueSize).ok());
+
+  // CalculatorGraph::UnthrottleSources should be called just one time.
+  absl::SleepFor(absl::Milliseconds(100));
+
+  // Read all packets from the output stream queue and close the graph.
+  for (int i = 0; i < kQueueSize; ++i) {
+    Packet packet;
+    EXPECT_TRUE(poller.Next(&packet));
+    out_packets.push_back(packet);
+  }
+  MP_EXPECT_OK(graph.CloseAllInputStreams());
+  MP_EXPECT_OK(graph.WaitUntilDone());
+  EXPECT_EQ(kQueueSize, out_packets.size());
+
+  // Stop capturing VLOG messages.
+  SetVLOGLevel("scheduler", 0);
+  mediapipe::RemoveLogSink(&log_listener);
+
+  // Count and validate the calls to UnthrottleSources.
+  int loop_count = 0;
+  for (auto& message : log_listener.messages) {
+    loop_count += (message == "HandleIdle: unthrottling") ? 1 : 0;
+  }
+  EXPECT_GE(loop_count, 1);
+  EXPECT_LE(loop_count, 2);
+}
+
 }  // namespace
 }  // namespace mediapipe
--- a/mediapipe/framework/formats/BUILD
+++ b/mediapipe/framework/formats/BUILD
@ -241,7 +241,9 @@ cc_test(
 proto_library(
    name = "rect_proto",
    srcs = ["rect.proto"],
-    visibility = ["//visibility:public"],
+    visibility = [
+        "//visibility:public",
+    ],
 )

 mediapipe_cc_proto_library(
@ -253,10 +255,21 @@ mediapipe_cc_proto_library(
    deps = [":rect_proto"],
 )

+java_lite_proto_library(
+    name = "rect_java_proto_lite",
+    strict_deps = 0,
+    visibility = [
+        "//mediapipe:__subpackages__",
+    ],
+    deps = [":landmark_proto"],
+)
+
 proto_library(
    name = "landmark_proto",
    srcs = ["landmark.proto"],
-    visibility = ["//visibility:public"],
+    visibility = [
+        "//visibility:public",
+    ],
 )

 mediapipe_cc_proto_library(
@ -269,7 +282,9 @@ mediapipe_cc_proto_library(
 java_lite_proto_library(
    name = "landmark_java_proto_lite",
    strict_deps = 0,
-    visibility = ["//mediapipe:__subpackages__"],
+    visibility = [
+        "//mediapipe:__subpackages__",
+    ],
    deps = [":landmark_proto"],
 )

--- a/mediapipe/framework/port/BUILD
+++ b/mediapipe/framework/port/BUILD
@ -176,6 +176,7 @@ cc_library(
    deps = [
        "//mediapipe/framework:port",
        "@com_github_glog_glog//:glog",
+        "@com_google_absl//absl/time",
    ],
 )

--- a/mediapipe/framework/port/logging.h
+++ b/mediapipe/framework/port/logging.h
@ -15,6 +15,50 @@
 #ifndef MEDIAPIPE_PORT_LOGGING_H_
 #define MEDIAPIPE_PORT_LOGGING_H_

+#include "absl/time/time.h"
 #include "glog/logging.h"

+namespace mediapipe {
+using LogSeverity = google::LogSeverity;
+const auto SetVLOGLevel = google::SetVLOGLevel;
+class LogEntry {
+ public:
+  LogEntry(LogSeverity severity, const struct ::tm* tm_time,
+           absl::string_view message)
+      : severity_(severity),
+        timestamp_(absl::FromTM(*tm_time, absl::LocalTimeZone())),
+        text_message_(message) {}
+  LogSeverity log_severity() const { return severity_; }
+  absl::Time timestamp() const { return timestamp_; }
+  absl::string_view text_message() const { return text_message_; }
+
+ private:
+  LogSeverity severity_;
+  absl::Time timestamp_;
+  absl::string_view text_message_;
+};
+class LogSink : public google::LogSink {
+ public:
+  virtual ~LogSink() = default;
+  virtual void Send(const LogEntry& entry) = 0;
+  virtual void WaitTillSent() {}
+
+ private:
+  virtual void send(LogSeverity severity, const char* full_filename,
+                    const char* base_filename, int line,
+                    const struct ::tm* tm_time, const char* message,
+                    size_t message_len) {
+    LogEntry log_entry(severity, tm_time,
+                       absl::string_view(message, message_len));
+    Send(log_entry);
+  }
+};
+inline void AddLogSink(LogSink* destination) {
+  google::AddLogSink(destination);
+}
+inline void RemoveLogSink(LogSink* destination) {
+  google::RemoveLogSink(destination);
+}
+}  // namespace mediapipe
+
 #endif  // MEDIAPIPE_PORT_LOGGING_H_
--- a/mediapipe/graphs/object_detection/object_detection_desktop_lstd_tflite_graph.pbtxt
+++ b/mediapipe/graphs/object_detection/object_detection_desktop_lstd_tflite_graph.pbtxt
@ -0,0 +1,176 @@
+# MediaPipe graph that performs LSTM based object detection on desktop with
+# TensorFlow Lite on CPU.
+
+# max_queue_size limits the number of packets enqueued on any input stream
+# by throttling inputs to the graph. This makes the graph only process one
+# frame per time.
+max_queue_size: 1
+
+# Decodes an input video file into images and a video header.
+node {
+  calculator: "OpenCvVideoDecoderCalculator"
+  input_side_packet: "INPUT_FILE_PATH:input_video_path"
+  output_stream: "VIDEO:input_video"
+  output_stream: "VIDEO_PRESTREAM:input_video_header"
+}
+
+# Transforms the input image on CPU to a 256x256 image. To scale the image, by
+# default it uses the STRETCH scale mode that maps the entire input image to the
+# entire transformed image. As a result, image aspect ratio may be changed and
+# objects in the image may be deformed (stretched or squeezed), but the object
+# detection model used in this graph is agnostic to that deformation.
+node: {
+  calculator: "ImageTransformationCalculator"
+  input_stream: "IMAGE:input_video"
+  output_stream: "IMAGE:transformed_input_video"
+  node_options: {
+    [type.googleapis.com/mediapipe.ImageTransformationCalculatorOptions] {
+      output_width: 256
+      output_height: 256
+    }
+  }
+}
+
+# Converts the transformed input image on CPU into an image tensor as a
+# TfLiteTensor. The zero_center option is set to true to normalize the
+# pixel values to [-1.f, 1.f] as opposed to [0.f, 1.f].
+node {
+  calculator: "TfLiteConverterCalculator"
+  input_stream: "IMAGE:transformed_input_video"
+  output_stream: "TENSORS:image_tensor"
+  node_options: {
+    [type.googleapis.com/mediapipe.TfLiteConverterCalculatorOptions] {
+      zero_center: true
+      use_quantized_tensors: true
+    }
+  }
+}
+
+# Joins the input image_tensor and previous lstm state tensors into a single
+# output vector of tensors.
+node {
+  calculator: "ConcatenateTfLiteTensorVectorCalculator"
+  input_stream: "image_tensor"
+  input_stream: "prev_lstm_tensors"
+  output_stream: "concatenated_tensors"
+}
+
+# Runs a TensorFlow Lite model on CPU that takes an image tensor and state
+# tensors, then outputs a vector of tensors representing, for instance,
+# detection boxes/keypoints and scores.
+node {
+  calculator: "TfLiteInferenceCalculator"
+  input_stream: "TENSORS:concatenated_tensors"
+  output_stream: "TENSORS:inference_tensors"
+  node_options: {
+    [type.googleapis.com/mediapipe.TfLiteInferenceCalculatorOptions] {
+      model_path: "mediapipe/models/lstdlite_object_detection.tflite"
+    }
+  }
+}
+
+# Splits up the output tensors into two streams [detection_boxes,
+# detection_classes, detection_scores, num_detections], and [lstm_c, lstm_h].
+node {
+  calculator: "SplitTfLiteTensorVectorCalculator"
+  input_stream: "inference_tensors"
+  output_stream: "detection_tensors"
+  output_stream: "lstm_tensors"
+  options {
+    [type.googleapis.com/mediapipe.SplitVectorCalculatorOptions] {
+      ranges: { begin: 0 end: 4 }
+      ranges: { begin: 4 end: 6 }
+    }
+  }
+}
+
+# Waits for the LSTM state output from the TfLite inferencing to feed back in as
+# an input tensor for the next frame.
+node {
+  calculator: "PreviousLoopbackCalculator"
+  input_stream: "MAIN:transformed_input_video"
+  input_stream: "LOOP:lstm_tensors"
+  input_stream_info: { tag_index: "LOOP" back_edge: true }
+  output_stream: "PREV_LOOP:prev_lstm_tensors"
+}
+
+# Decodes the detection tensors generated by the TensorFlow Lite model, based on
+# the specification in the options, into a vector of detections. Each detection
+# describes a detected object.
+node {
+  calculator: "TfLiteTensorsToDetectionsCalculator"
+  input_stream: "TENSORS:detection_tensors"
+  output_stream: "DETECTIONS:detections"
+  node_options: {
+    [type.googleapis.com/mediapipe.TfLiteTensorsToDetectionsCalculatorOptions] {
+      num_classes: 7
+      num_boxes: 40
+      num_coords: 4
+    }
+  }
+}
+
+# Performs non-max suppression to remove excessive detections.
+node {
+  calculator: "NonMaxSuppressionCalculator"
+  input_stream: "detections"
+  output_stream: "filtered_detections"
+  node_options: {
+    [type.googleapis.com/mediapipe.NonMaxSuppressionCalculatorOptions] {
+      min_suppression_threshold: 0.4
+      min_score_threshold: 0.2
+      max_num_detections: 5
+      overlap_type: INTERSECTION_OVER_UNION
+    }
+  }
+}
+
+# Maps detection label IDs to the corresponding label text. The label map is
+# provided in the label_map_path option.
+node {
+  calculator: "DetectionLabelIdToTextCalculator"
+  input_stream: "filtered_detections"
+  output_stream: "output_detections"
+  node_options: {
+    [type.googleapis.com/mediapipe.DetectionLabelIdToTextCalculatorOptions] {
+      label_map_path: "mediapipe/models/lstdlite_object_detection_labelmap.txt"
+    }
+  }
+}
+
+# Converts the detections to drawing primitives for annotation overlay.
+node {
+  calculator: "DetectionsToRenderDataCalculator"
+  input_stream: "DETECTIONS:output_detections"
+  output_stream: "RENDER_DATA:render_data"
+  node_options: {
+    [type.googleapis.com/mediapipe.DetectionsToRenderDataCalculatorOptions] {
+      thickness: 2.0
+      color { r: 255 g: 255 b: 255 }
+    }
+  }
+}
+
+# Draws annotations and overlays them on top of the original image coming into
+# the graph.
+node {
+  calculator: "AnnotationOverlayCalculator"
+  input_stream: "INPUT_FRAME:input_video"
+  input_stream: "render_data"
+  output_stream: "OUTPUT_FRAME:output_video"
+}
+
+# Encodes the annotated images into a video file, adopting properties specified
+# in the input video header, e.g., video framerate.
+node {
+  calculator: "OpenCvVideoEncoderCalculator"
+  input_stream: "VIDEO:output_video"
+  input_stream: "VIDEO_PRESTREAM:input_video_header"
+  input_side_packet: "OUTPUT_FILE_PATH:output_video_path"
+  node_options: {
+    [type.googleapis.com/mediapipe.OpenCvVideoEncoderCalculatorOptions]: {
+      codec: "avc1"
+      video_format: "mp4"
+    }
+  }
+}
--- a/mediapipe/graphs/tracking/BUILD
+++ b/mediapipe/graphs/tracking/BUILD
@ -0,0 +1,39 @@
+# Copyright 2019 The MediaPipe Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+load(
+    "//mediapipe/framework/tool:mediapipe_graph.bzl",
+    "mediapipe_binary_graph",
+)
+
+licenses(["notice"])  # Apache 2.0
+
+package(default_visibility = ["//visibility:public"])
+
+cc_library(
+    name = "mobile_calculators",
+    deps = [
+        "//mediapipe/calculators/core:packet_resampler_calculator",
+        "//mediapipe/graphs/tracking/subgraphs:object_detection_gpu",
+        "//mediapipe/graphs/tracking/subgraphs:object_tracking",
+        "//mediapipe/graphs/tracking/subgraphs:renderer_gpu",
+    ],
+)
+
+mediapipe_binary_graph(
+    name = "mobile_gpu_binary_graph",
+    graph = "object_detection_tracking_mobile_gpu.pbtxt",
+    output_name = "mobile_gpu.binarypb",
+    deps = [":mobile_calculators"],
+)
--- a/mediapipe/graphs/tracking/object_detection_tracking_mobile_gpu.pbtxt
+++ b/mediapipe/graphs/tracking/object_detection_tracking_mobile_gpu.pbtxt
@ -0,0 +1,46 @@
+# MediaPipe graph that performs object detection and tracking with TensorFlow
+# Lite on GPU.
+# Used in the examples in
+# mediapipie/examples/android/src/java/com/mediapipe/apps/objecttrackinggpu
+
+# Images on GPU coming into and out of the graph.
+input_stream: "input_video"
+output_stream: "output_video"
+
+# Resamples the images by specific frame rate. This calculator is used to
+# control the frequecy of subsequent calculators/subgraphs, e.g. less power
+# consumption for expensive process.
+node {
+  calculator: "PacketResamplerCalculator"
+  input_stream: "DATA:input_video"
+  output_stream: "DATA:throttled_input_video"
+  node_options: {
+    [type.googleapis.com/mediapipe.PacketResamplerCalculatorOptions] {
+      frame_rate: 0.5
+    }
+  }
+}
+
+# Subgraph that detections objects (see object_detection_gpu.pbtxt).
+node {
+  calculator: "ObjectDetectionSubgraph"
+  input_stream: "IMAGE:throttled_input_video"
+  output_stream: "DETECTIONS:output_detections"
+}
+
+# Subgraph that tracks objects (see object_tracking.pbtxt).
+node {
+  calculator: "ObjectTrackingSubgraph"
+  input_stream: "VIDEO:input_video"
+  input_stream: "DETECTIONS:output_detections"
+  output_stream: "DETECTIONS:tracked_detections"
+}
+
+# Subgraph that renders annotations and overlays them on top of the input
+# images (see renderer_gpu.pbtxt).
+node {
+  calculator: "RendererSubgraph"
+  input_stream: "IMAGE:input_video"
+  input_stream: "DETECTIONS:tracked_detections"
+  output_stream: "IMAGE:output_video"
+}
--- a/mediapipe/graphs/tracking/subgraphs/BUILD
+++ b/mediapipe/graphs/tracking/subgraphs/BUILD
@ -0,0 +1,76 @@
+# Copyright 2019 The MediaPipe Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+load(
+    "//mediapipe/framework/tool:mediapipe_graph.bzl",
+    "mediapipe_simple_subgraph",
+)
+
+licenses(["notice"])  # Apache 2.0
+
+package(default_visibility = ["//visibility:public"])
+
+mediapipe_simple_subgraph(
+    name = "box_tracking",
+    graph = "box_tracking.pbtxt",
+    register_as = "BoxTrackingSubgraph",
+    deps = [
+        "//mediapipe/calculators/image:image_transformation_calculator",
+        "//mediapipe/calculators/video:box_tracker_calculator",
+        "//mediapipe/calculators/video:flow_packager_calculator",
+        "//mediapipe/calculators/video:motion_analysis_calculator",
+        "//mediapipe/framework/stream_handler:immediate_input_stream_handler",
+        "//mediapipe/framework/stream_handler:sync_set_input_stream_handler",
+        "//mediapipe/gpu:gpu_buffer_to_image_frame_calculator",
+    ],
+)
+
+mediapipe_simple_subgraph(
+    name = "object_tracking",
+    graph = "object_tracking.pbtxt",
+    register_as = "ObjectTrackingSubgraph",
+    deps = [
+        "//mediapipe/calculators/util:detection_unique_id_calculator",
+        "//mediapipe/calculators/util:detections_to_timed_box_list_calculator",
+        "//mediapipe/calculators/video:tracked_detection_manager_calculator",
+        "//mediapipe/framework/stream_handler:sync_set_input_stream_handler",
+        "//mediapipe/graphs/tracking/subgraphs:box_tracking",
+    ],
+)
+
+mediapipe_simple_subgraph(
+    name = "object_detection_gpu",
+    graph = "object_detection_gpu.pbtxt",
+    register_as = "ObjectDetectionSubgraph",
+    deps = [
+        "//mediapipe/calculators/image:image_transformation_calculator",
+        "//mediapipe/calculators/tflite:ssd_anchors_calculator",
+        "//mediapipe/calculators/tflite:tflite_converter_calculator",
+        "//mediapipe/calculators/tflite:tflite_inference_calculator",
+        "//mediapipe/calculators/tflite:tflite_tensors_to_detections_calculator",
+        "//mediapipe/calculators/util:detection_label_id_to_text_calculator",
+        "//mediapipe/calculators/util:non_max_suppression_calculator",
+    ],
+)
+
+mediapipe_simple_subgraph(
+    name = "renderer_gpu",
+    graph = "renderer_gpu.pbtxt",
+    register_as = "RendererSubgraph",
+    deps = [
+        "//mediapipe/calculators/util:annotation_overlay_calculator",
+        "//mediapipe/calculators/util:detections_to_render_data_calculator",
+        "//mediapipe/calculators/util:rect_to_render_data_calculator",
+    ],
+)
--- a/mediapipe/graphs/tracking/subgraphs/box_tracking.pbtxt
+++ b/mediapipe/graphs/tracking/subgraphs/box_tracking.pbtxt
@ -0,0 +1,126 @@
+# MediaPipe box tracking subgraph.
+
+type: "BoxTrackingSubgraph"
+
+input_stream: "VIDEO:input_video"
+input_stream: "BOXES:start_pos"
+input_stream: "CANCEL_ID:cancel_object_id"
+output_stream: "BOXES:boxes"
+
+node: {
+  calculator: "ImageTransformationCalculator"
+  input_stream: "IMAGE_GPU:input_video"
+  output_stream: "IMAGE_GPU:downscaled_input_video"
+  node_options: {
+    [type.googleapis.com/mediapipe.ImageTransformationCalculatorOptions] {
+      output_width: 240
+      output_height: 320
+    }
+  }
+}
+
+# Converts GPU buffer to ImageFrame for processing tracking.
+node: {
+  calculator: "GpuBufferToImageFrameCalculator"
+  input_stream: "downscaled_input_video"
+  output_stream: "downscaled_input_video_cpu"
+}
+
+# Performs motion analysis on an incoming video stream.
+node: {
+  calculator: "MotionAnalysisCalculator"
+  input_stream: "VIDEO:downscaled_input_video_cpu"
+  output_stream: "CAMERA:camera_motion"
+  output_stream: "FLOW:region_flow"
+
+  node_options: {
+    [type.googleapis.com/mediapipe.MotionAnalysisCalculatorOptions]: {
+      analysis_options {
+        analysis_policy: ANALYSIS_POLICY_CAMERA_MOBILE
+        flow_options {
+          fast_estimation_min_block_size: 100
+          top_inlier_sets: 1
+          frac_inlier_error_threshold: 3e-3
+          downsample_mode: DOWNSAMPLE_TO_INPUT_SIZE
+          verification_distance: 5.0
+          verify_long_feature_acceleration: true
+          verify_long_feature_trigger_ratio: 0.1
+          tracking_options {
+            max_features: 500
+            adaptive_extraction_levels: 2
+            min_eig_val_settings {
+              adaptive_lowest_quality_level: 2e-4
+            }
+            klt_tracker_implementation: KLT_OPENCV
+          }
+        }
+      }
+    }
+  }
+}
+
+# Reads optical flow fields defined in
+# mediapipe/framework/formats/motion/optical_flow_field.h,
+# returns a VideoFrame with 2 channels (v_x and v_y), each channel is quantized
+# to 0-255.
+node: {
+  calculator: "FlowPackagerCalculator"
+  input_stream: "FLOW:region_flow"
+  input_stream: "CAMERA:camera_motion"
+  output_stream: "TRACKING:tracking_data"
+
+  node_options: {
+    [type.googleapis.com/mediapipe.FlowPackagerCalculatorOptions]: {
+      flow_packager_options: {
+        binary_tracking_data_support: false
+      }
+    }
+  }
+}
+
+# Tracks box positions over time.
+node: {
+  calculator: "BoxTrackerCalculator"
+  input_stream: "TRACKING:tracking_data"
+  input_stream: "TRACK_TIME:input_video"
+  input_stream: "START_POS:start_pos"
+  input_stream: "CANCEL_OBJECT_ID:cancel_object_id"
+  input_stream_info: {
+    tag_index: "CANCEL_OBJECT_ID"
+    back_edge: true
+  }
+  output_stream: "BOXES:boxes"
+
+  input_stream_handler {
+    input_stream_handler: "SyncSetInputStreamHandler"
+    options {
+      [mediapipe.SyncSetInputStreamHandlerOptions.ext] {
+        sync_set {
+          tag_index: "TRACKING"
+          tag_index: "TRACK_TIME"
+        }
+        sync_set {
+          tag_index: "START_POS"
+        }
+        sync_set {
+          tag_index: "CANCEL_OBJECT_ID"
+        }
+      }
+    }
+  }
+
+  node_options: {
+    [type.googleapis.com/mediapipe.BoxTrackerCalculatorOptions]: {
+      tracker_options: {
+        track_step_options {
+          track_object_and_camera: true
+          tracking_degrees: TRACKING_DEGREE_OBJECT_SCALE
+          inlier_spring_force: 0.0
+          static_motion_temporal_ratio: 3e-2
+        }
+      }
+      visualize_tracking_data: false
+      streaming_track_data_cache_size: 100
+    }
+  }
+}
--- a/mediapipe/graphs/tracking/subgraphs/object_detection_gpu.pbtxt
+++ b/mediapipe/graphs/tracking/subgraphs/object_detection_gpu.pbtxt
@ -0,0 +1,128 @@
+# MediaPipe object detection subgraph.
+
+type: "ObjectDetectionSubgraph"
+
+input_stream: "IMAGE:input_video"
+output_stream: "DETECTIONS:output_detections"
+
+# Transforms the input image on GPU to a 320x320 image. To scale the image, by
+# default it uses the STRETCH scale mode that maps the entire input image to the
+# entire transformed image. As a result, image aspect ratio may be changed and
+# objects in the image may be deformed (stretched or squeezed), but the object
+# detection model used in this graph is agnostic to that deformation.
+node: {
+  calculator: "ImageTransformationCalculator"
+  input_stream: "IMAGE_GPU:input_video"
+  output_stream: "IMAGE_GPU:transformed_input_video"
+  node_options: {
+    [type.googleapis.com/mediapipe.ImageTransformationCalculatorOptions] {
+      output_width: 320
+      output_height: 320
+    }
+  }
+}
+
+# Converts the transformed input image on GPU into an image tensor stored as a
+# TfLiteTensor.
+node {
+  calculator: "TfLiteConverterCalculator"
+  input_stream: "IMAGE_GPU:transformed_input_video"
+  output_stream: "TENSORS_GPU:image_tensor"
+}
+
+# Runs a TensorFlow Lite model on GPU that takes an image tensor and outputs a
+# vector of tensors representing, for instance, detection boxes/keypoints and
+# scores.
+node {
+  calculator: "TfLiteInferenceCalculator"
+  input_stream: "TENSORS_GPU:image_tensor"
+  output_stream: "TENSORS_GPU:detection_tensors"
+  node_options: {
+    [type.googleapis.com/mediapipe.TfLiteInferenceCalculatorOptions] {
+      model_path: "mediapipe/models/ssdlite_object_detection.tflite"
+    }
+  }
+}
+
+# Generates a single side packet containing a vector of SSD anchors based on
+# the specification in the options.
+node {
+  calculator: "SsdAnchorsCalculator"
+  output_side_packet: "anchors"
+  node_options: {
+    [type.googleapis.com/mediapipe.SsdAnchorsCalculatorOptions] {
+      num_layers: 6
+      min_scale: 0.2
+      max_scale: 0.95
+      input_size_height: 320
+      input_size_width: 320
+      anchor_offset_x: 0.5
+      anchor_offset_y: 0.5
+      strides: 16
+      strides: 32
+      strides: 64
+      strides: 128
+      strides: 256
+      strides: 512
+      aspect_ratios: 1.0
+      aspect_ratios: 2.0
+      aspect_ratios: 0.5
+      aspect_ratios: 3.0
+      aspect_ratios: 0.3333
+      reduce_boxes_in_lowest_layer: true
+    }
+  }
+}
+
+# Decodes the detection tensors generated by the TensorFlow Lite model, based on
+# the SSD anchors and the specification in the options, into a vector of
+# detections. Each detection describes a detected object.
+node {
+  calculator: "TfLiteTensorsToDetectionsCalculator"
+  input_stream: "TENSORS_GPU:detection_tensors"
+  input_side_packet: "ANCHORS:anchors"
+  output_stream: "DETECTIONS:detections"
+  node_options: {
+    [type.googleapis.com/mediapipe.TfLiteTensorsToDetectionsCalculatorOptions] {
+      num_classes: 91
+      num_boxes: 2034
+      num_coords: 4
+      ignore_classes: 0
+      sigmoid_score: true
+      apply_exponential_on_box_size: true
+      x_scale: 10.0
+      y_scale: 10.0
+      h_scale: 5.0
+      w_scale: 5.0
+      min_score_thresh: 0.6
+    }
+  }
+}
+
+# Performs non-max suppression to remove excessive detections.
+node {
+  calculator: "NonMaxSuppressionCalculator"
+  input_stream: "detections"
+  output_stream: "filtered_detections"
+  node_options: {
+    [type.googleapis.com/mediapipe.NonMaxSuppressionCalculatorOptions] {
+      min_suppression_threshold: 0.4
+      max_num_detections: 3
+      overlap_type: INTERSECTION_OVER_UNION
+      return_empty_detections: true
+    }
+  }
+}
+
+# Maps detection label IDs to the corresponding label text. The label map is
+# provided in the label_map_path option.
+node {
+  calculator: "DetectionLabelIdToTextCalculator"
+  input_stream: "filtered_detections"
+  output_stream: "output_detections"
+  node_options: {
+    [type.googleapis.com/mediapipe.DetectionLabelIdToTextCalculatorOptions] {
+      label_map_path: "mediapipe/models/ssdlite_object_detection_labelmap.txt"
+    }
+  }
+}
--- a/mediapipe/graphs/tracking/subgraphs/object_tracking.pbtxt
+++ b/mediapipe/graphs/tracking/subgraphs/object_tracking.pbtxt
@ -0,0 +1,56 @@
+# MediaPipe object tracking subgraph.
+
+type: "ObjectTrackingSubgraph"
+
+input_stream: "VIDEO:input_video"
+input_stream: "DETECTIONS:new_detections"
+output_stream: "DETECTIONS:tracked_detections"
+
+# Assigns an unique id for each new detection.
+node {
+  calculator: "DetectionUniqueIdCalculator"
+  input_stream: "DETECTIONS:new_detections"
+  output_stream: "DETECTIONS:detections_with_id"
+}
+
+# Converts detections to TimedBox protos which are used as initial location
+# for tracking.
+node {
+  calculator: "DetectionsToTimedBoxListCalculator"
+  input_stream: "DETECTIONS:detections_with_id"
+  output_stream: "BOXES:start_pos"
+}
+
+# Subgraph that tracks boxes (see box_tracking.pbtxt).
+node {
+  calculator: "BoxTrackingSubgraph"
+  input_stream: "VIDEO:input_video"
+  input_stream: "BOXES:start_pos"
+  input_stream: "CANCEL_ID:cancel_object_id"
+  output_stream: "BOXES:boxes"
+}
+
+# Managers new detected objects and objects that are being tracked.
+# It associates the duplicated detections and updates the locations of
+# detections from tracking.
+node: {
+  calculator: "TrackedDetectionManagerCalculator"
+  input_stream: "DETECTIONS:detections_with_id"
+  input_stream: "TRACKING_BOXES:boxes"
+  output_stream: "DETECTIONS:tracked_detections"
+  output_stream: "CANCEL_OBJECT_ID:cancel_object_id"
+
+  input_stream_handler {
+    input_stream_handler: "SyncSetInputStreamHandler"
+    options {
+      [mediapipe.SyncSetInputStreamHandlerOptions.ext] {
+        sync_set {
+          tag_index: "TRACKING_BOXES"
+        }
+        sync_set {
+          tag_index: "DETECTIONS"
+        }
+      }
+    }
+  }
+}
--- a/mediapipe/graphs/tracking/subgraphs/renderer_gpu.pbtxt
+++ b/mediapipe/graphs/tracking/subgraphs/renderer_gpu.pbtxt
@ -0,0 +1,29 @@
+# MediaPipe object tracking rendering subgraph.
+
+type: "RendererSubgraph"
+
+input_stream: "IMAGE:input_image"
+input_stream: "DETECTIONS:detections"
+output_stream: "IMAGE:output_image"
+
+# Converts the detections to drawing primitives for annotation overlay.
+node {
+  calculator: "DetectionsToRenderDataCalculator"
+  input_stream: "DETECTIONS:detections"
+  output_stream: "RENDER_DATA:detections_render_data"
+  node_options: {
+    [type.googleapis.com/mediapipe.DetectionsToRenderDataCalculatorOptions] {
+      thickness: 4.0
+      color { r: 255 g: 0 b: 0 }
+      render_detection_id: true
+    }
+  }
+}
+
+# Draws annotations and overlays them on top of the input images.
+node {
+  calculator: "AnnotationOverlayCalculator"
+  input_stream: "INPUT_FRAME_GPU:input_image"
+  input_stream: "detections_render_data"
+  output_stream: "OUTPUT_FRAME_GPU:output_image"
+}
--- a/mediapipe/java/com/google/mediapipe/framework/MediaPipeRunner.java
+++ b/mediapipe/java/com/google/mediapipe/framework/MediaPipeRunner.java
@ -50,4 +50,16 @@ public abstract class MediaPipeRunner extends Graph {
   * Stops the running graph and releases the resource. Call this in Activity onDestroy callback.
   */
  public abstract void release();
+  /**
+   * Stops the running graph and releases the resource. Call this in Activity onDestroy callback.
+   *
+   * <p>Like {@link #release()} but with a timeout. The default behavior is to call
+   * {@link #release()}. The implementation can override this method if it cares about timing out
+   * releasing resources.
+   *
+   * @param timeoutMillis the time it takes to force timeout from releasing mff context.
+   */
+  public void release(long timeoutMillis) {
+    release();
+  }
 }
--- a/mediapipe/util/tracking/BUILD
+++ b/mediapipe/util/tracking/BUILD
@ -0,0 +1,742 @@
+# Copyright 2019 The MediaPipe Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+load("//mediapipe/framework/port:build_config.bzl", "mediapipe_cc_proto_library")
+
+licenses(["notice"])  # Apache 2.0
+
+package(default_visibility = ["//visibility:public"])
+
+proto_library(
+    name = "tone_models_proto",
+    srcs = ["tone_models.proto"],
+)
+
+proto_library(
+    name = "tone_estimation_proto",
+    srcs = ["tone_estimation.proto"],
+    deps = [":tone_models_proto"],
+)
+
+proto_library(
+    name = "region_flow_computation_proto",
+    srcs = ["region_flow_computation.proto"],
+    deps = [
+        ":tone_estimation_proto",
+    ],
+)
+
+proto_library(
+    name = "motion_saliency_proto",
+    srcs = ["motion_saliency.proto"],
+)
+
+proto_library(
+    name = "motion_estimation_proto",
+    srcs = ["motion_estimation.proto"],
+)
+
+proto_library(
+    name = "motion_analysis_proto",
+    srcs = ["motion_analysis.proto"],
+    deps = [
+        ":motion_estimation_proto",
+        ":motion_saliency_proto",
+        ":region_flow_computation_proto",
+    ],
+)
+
+proto_library(
+    name = "region_flow_proto",
+    srcs = ["region_flow.proto"],
+)
+
+proto_library(
+    name = "motion_models_proto",
+    srcs = ["motion_models.proto"],
+)
+
+proto_library(
+    name = "camera_motion_proto",
+    srcs = ["camera_motion.proto"],
+    deps = [":motion_models_proto"],
+)
+
+proto_library(
+    name = "push_pull_filtering_proto",
+    srcs = ["push_pull_filtering.proto"],
+)
+
+proto_library(
+    name = "frame_selection_solution_evaluator_proto",
+    srcs = ["frame_selection_solution_evaluator.proto"],
+)
+
+proto_library(
+    name = "frame_selection_proto",
+    srcs = ["frame_selection.proto"],
+    deps = [
+        ":camera_motion_proto",
+        ":frame_selection_solution_evaluator_proto",
+        ":region_flow_proto",
+    ],
+)
+
+proto_library(
+    name = "flow_packager_proto",
+    srcs = ["flow_packager.proto"],
+    deps = [
+        ":motion_models_proto",
+        ":region_flow_proto",
+    ],
+)
+
+proto_library(
+    name = "tracking_proto",
+    srcs = ["tracking.proto"],
+    deps = [
+        ":motion_models_proto",
+    ],
+)
+
+proto_library(
+    name = "box_tracker_proto",
+    srcs = ["box_tracker.proto"],
+    deps = [":tracking_proto"],
+)
+
+mediapipe_cc_proto_library(
+    name = "tone_models_cc_proto",
+    srcs = ["tone_models.proto"],
+    visibility = ["//visibility:public"],
+    deps = [":tone_models_proto"],
+)
+
+mediapipe_cc_proto_library(
+    name = "tone_estimation_cc_proto",
+    srcs = ["tone_estimation.proto"],
+    cc_deps = [":tone_models_cc_proto"],
+    visibility = ["//visibility:public"],
+    deps = [":tone_estimation_proto"],
+)
+
+mediapipe_cc_proto_library(
+    name = "region_flow_computation_cc_proto",
+    srcs = ["region_flow_computation.proto"],
+    cc_deps = [
+        ":tone_estimation_cc_proto",
+        ":tone_models_cc_proto",
+    ],
+    visibility = ["//visibility:public"],
+    deps = [":region_flow_computation_proto"],
+)
+
+mediapipe_cc_proto_library(
+    name = "motion_saliency_cc_proto",
+    srcs = ["motion_saliency.proto"],
+    visibility = ["//visibility:public"],
+    deps = [":motion_saliency_proto"],
+)
+
+mediapipe_cc_proto_library(
+    name = "motion_estimation_cc_proto",
+    srcs = ["motion_estimation.proto"],
+    visibility = ["//visibility:public"],
+    deps = [":motion_estimation_proto"],
+)
+
+mediapipe_cc_proto_library(
+    name = "motion_analysis_cc_proto",
+    srcs = ["motion_analysis.proto"],
+    cc_deps = [
+        ":motion_estimation_cc_proto",
+        ":motion_saliency_cc_proto",
+        ":region_flow_computation_cc_proto",
+    ],
+    visibility = ["//visibility:public"],
+    deps = [":motion_analysis_proto"],
+)
+
+mediapipe_cc_proto_library(
+    name = "region_flow_cc_proto",
+    srcs = ["region_flow.proto"],
+    cc_deps = [":motion_models_cc_proto"],
+    visibility = ["//visibility:public"],
+    deps = [":region_flow_proto"],
+)
+
+mediapipe_cc_proto_library(
+    name = "motion_models_cc_proto",
+    srcs = ["motion_models.proto"],
+    visibility = ["//visibility:public"],
+    deps = [":motion_models_proto"],
+)
+
+mediapipe_cc_proto_library(
+    name = "camera_motion_cc_proto",
+    srcs = ["camera_motion.proto"],
+    cc_deps = [":motion_models_cc_proto"],
+    visibility = ["//visibility:public"],
+    deps = [":camera_motion_proto"],
+)
+
+mediapipe_cc_proto_library(
+    name = "push_pull_filtering_cc_proto",
+    srcs = ["push_pull_filtering.proto"],
+    visibility = ["//visibility:public"],
+    deps = [":push_pull_filtering_proto"],
+)
+
+mediapipe_cc_proto_library(
+    name = "frame_selection_solution_evaluator_cc_proto",
+    srcs = ["frame_selection_solution_evaluator.proto"],
+    visibility = ["//visibility:public"],
+    deps = [":frame_selection_solution_evaluator_proto"],
+)
+
+mediapipe_cc_proto_library(
+    name = "frame_selection_cc_proto",
+    srcs = ["frame_selection.proto"],
+    cc_deps = [
+        ":camera_motion_cc_proto",
+        ":frame_selection_solution_evaluator_cc_proto",
+        ":region_flow_cc_proto",
+    ],
+    visibility = ["//visibility:public"],
+    deps = [":frame_selection_proto"],
+)
+
+mediapipe_cc_proto_library(
+    name = "flow_packager_cc_proto",
+    srcs = ["flow_packager.proto"],
+    cc_deps = [
+        ":motion_models_cc_proto",
+        ":region_flow_cc_proto",
+    ],
+    visibility = ["//visibility:public"],
+    deps = [":flow_packager_proto"],
+)
+
+mediapipe_cc_proto_library(
+    name = "tracking_cc_proto",
+    srcs = ["tracking.proto"],
+    cc_deps = [":motion_models_cc_proto"],
+    visibility = ["//visibility:public"],
+    deps = [":tracking_proto"],
+)
+
+mediapipe_cc_proto_library(
+    name = "box_tracker_cc_proto",
+    srcs = ["box_tracker.proto"],
+    cc_deps = [":tracking_cc_proto"],
+    visibility = ["//visibility:public"],
+    deps = [":box_tracker_proto"],
+)
+
+cc_library(
+    name = "motion_models",
+    srcs = ["motion_models.cc"],
+    hdrs = ["motion_models.h"],
+    deps = [
+        ":camera_motion_cc_proto",
+        ":motion_models_cc_proto",
+        ":region_flow_cc_proto",
+        "//mediapipe/framework/port:logging",
+        "//mediapipe/framework/port:singleton",
+        "//mediapipe/framework/port:vector",
+        "@com_google_absl//absl/container:node_hash_map",
+        "@com_google_absl//absl/strings:str_format",
+        "@eigen_archive//:eigen",
+    ],
+)
+
+cc_library(
+    name = "motion_models_cv",
+    srcs = ["motion_models_cv.cc"],
+    hdrs = ["motion_models_cv.h"],
+    deps = [
+        ":camera_motion_cc_proto",
+        ":motion_models",
+        ":motion_models_cc_proto",
+        "//mediapipe/framework/port:opencv_core",
+    ],
+)
+
+cc_library(
+    name = "region_flow",
+    srcs = ["region_flow.cc"],
+    hdrs = ["region_flow.h"],
+    deps = [
+        ":measure_time",
+        ":motion_models",
+        ":parallel_invoker",
+        ":region_flow_cc_proto",
+        "//mediapipe/framework/port:integral_types",
+        "//mediapipe/framework/port:logging",
+        "//mediapipe/framework/port:vector",
+        "@com_google_absl//absl/strings",
+    ],
+)
+
+cc_library(
+    name = "camera_motion",
+    srcs = ["camera_motion.cc"],
+    hdrs = ["camera_motion.h"],
+    deps = [
+        ":camera_motion_cc_proto",
+        ":motion_models",
+        ":region_flow",
+        ":region_flow_cc_proto",
+        "@com_google_absl//absl/strings:str_format",
+    ],
+)
+
+PARALLEL_COPTS = ["-DPARALLEL_INVOKER_ACTIVE"] + select({
+    "//mediapipe:apple": [],
+    "//mediapipe:android": [],
+    "//conditions:default": [],
+})
+
+PARALLEL_LINKOPTS = []
+
+cc_library(
+    name = "measure_time",
+    srcs = ["measure_time.cc"],
+    hdrs = ["measure_time.h"],
+    deps = [
+        "//mediapipe/framework/port:integral_types",
+        "//mediapipe/framework/port:logging",
+        "@com_google_absl//absl/strings",
+        "@com_google_absl//absl/synchronization",
+        "@com_google_absl//absl/time",
+    ],
+)
+
+cc_library(
+    name = "parallel_invoker",
+    srcs = ["parallel_invoker.cc"],
+    hdrs = ["parallel_invoker.h"],
+    copts = PARALLEL_COPTS,
+    linkopts = PARALLEL_LINKOPTS,
+    deps = [
+        ":parallel_invoker_forbid_mixed_active",
+        "//mediapipe/framework/port:logging",
+        "//mediapipe/framework/port:threadpool",
+        "@com_google_absl//absl/synchronization",
+    ],
+)
+
+cc_library(
+    name = "parallel_invoker_forbid_mixed_active",
+    srcs = ["parallel_invoker_forbid_mixed.cc"],
+    copts = PARALLEL_COPTS,
+    alwayslink = 1,  # Forces all symbols to be included.
+)
+
+cc_library(
+    name = "image_util",
+    srcs = ["image_util.cc"],
+    hdrs = ["image_util.h"],
+    deps = [
+        ":motion_models",
+        ":motion_models_cc_proto",
+        ":region_flow",
+        ":region_flow_cc_proto",
+        "//mediapipe/framework/port:logging",
+        "//mediapipe/framework/port:opencv_core",
+        "//mediapipe/framework/port:opencv_imgproc",
+        "//mediapipe/framework/port:vector",
+    ],
+)
+
+cc_library(
+    name = "streaming_buffer",
+    srcs = ["streaming_buffer.cc"],
+    hdrs = ["streaming_buffer.h"],
+    deps = [
+        "//mediapipe/framework/port:logging",
+        "@com_google_absl//absl/container:node_hash_map",
+        "@com_google_absl//absl/strings",
+        "@com_google_absl//absl/types:any",
+    ],
+)
+
+cc_library(
+    name = "motion_estimation",
+    srcs = ["motion_estimation.cc"],
+    hdrs = ["motion_estimation.h"],
+    copts = PARALLEL_COPTS,
+    linkopts = PARALLEL_LINKOPTS,
+    deps = [
+        ":camera_motion",
+        ":camera_motion_cc_proto",
+        ":measure_time",
+        ":motion_estimation_cc_proto",
+        ":motion_models",
+        ":motion_models_cc_proto",
+        ":parallel_invoker",
+        ":region_flow",
+        ":region_flow_cc_proto",
+        "//mediapipe/framework/port:integral_types",
+        "//mediapipe/framework/port:logging",
+        "//mediapipe/framework/port:vector",
+        "@com_google_absl//absl/container:node_hash_map",
+        "@com_google_absl//absl/strings",
+        "@eigen_archive//:eigen",
+    ],
+)
+
+cc_library(
+    name = "motion_saliency",
+    srcs = ["motion_saliency.cc"],
+    hdrs = ["motion_saliency.h"],
+    deps = [
+        ":camera_motion",
+        ":measure_time",
+        ":motion_saliency_cc_proto",
+        ":region_flow",
+        ":region_flow_cc_proto",
+        "//mediapipe/framework/port:logging",
+        "//mediapipe/framework/port:vector",
+    ],
+)
+
+cc_library(
+    name = "push_pull_filtering",
+    hdrs = ["push_pull_filtering.h"],
+    deps = [
+        ":image_util",
+        ":push_pull_filtering_cc_proto",
+        "//mediapipe/framework/port:integral_types",
+        "//mediapipe/framework/port:logging",
+        "//mediapipe/framework/port:opencv_core",
+        "//mediapipe/framework/port:opencv_highgui",
+    ],
+)
+
+cc_library(
+    name = "tone_models",
+    srcs = ["tone_models.cc"],
+    hdrs = ["tone_models.h"],
+    deps = [
+        ":tone_models_cc_proto",
+        "//mediapipe/framework/port:integral_types",
+        "//mediapipe/framework/port:logging",
+        "//mediapipe/framework/port:opencv_core",
+        "//mediapipe/framework/port:vector",
+        "@com_google_absl//absl/strings:str_format",
+    ],
+)
+
+cc_library(
+    name = "tone_estimation",
+    srcs = ["tone_estimation.cc"],
+    hdrs = ["tone_estimation.h"],
+    deps = [
+        ":motion_models_cc_proto",
+        ":region_flow",
+        ":region_flow_cc_proto",
+        ":tone_estimation_cc_proto",
+        ":tone_models",
+        ":tone_models_cc_proto",
+        "//mediapipe/framework/port:integral_types",
+        "//mediapipe/framework/port:logging",
+        "//mediapipe/framework/port:opencv_core",
+        "//mediapipe/framework/port:opencv_imgproc",
+        "//mediapipe/framework/port:vector",
+    ],
+)
+
+cc_library(
+    name = "region_flow_computation",
+    srcs = ["region_flow_computation.cc"],
+    hdrs = ["region_flow_computation.h"],
+    copts = PARALLEL_COPTS,
+    linkopts = PARALLEL_LINKOPTS,
+    deps = [
+        ":camera_motion_cc_proto",
+        ":image_util",
+        ":measure_time",
+        ":motion_estimation",
+        ":motion_estimation_cc_proto",
+        ":motion_models",
+        ":motion_models_cc_proto",
+        ":parallel_invoker",
+        ":region_flow",
+        ":region_flow_cc_proto",
+        ":region_flow_computation_cc_proto",
+        ":tone_estimation",
+        ":tone_estimation_cc_proto",
+        ":tone_models",
+        ":tone_models_cc_proto",
+        "//mediapipe/framework/port:integral_types",
+        "//mediapipe/framework/port:logging",
+        "//mediapipe/framework/port:opencv_core",
+        "//mediapipe/framework/port:opencv_features2d",
+        "//mediapipe/framework/port:opencv_imgproc",
+        "//mediapipe/framework/port:opencv_video",
+        "//mediapipe/framework/port:vector",
+        "@com_google_absl//absl/container:node_hash_set",
+        "@com_google_absl//absl/memory",
+        "@eigen_archive//:eigen",
+    ],
+)
+
+cc_library(
+    name = "region_flow_visualization",
+    srcs = ["region_flow_visualization.cc"],
+    hdrs = ["region_flow_visualization.h"],
+    deps = [
+        ":measure_time",
+        ":motion_models",
+        ":parallel_invoker",
+        ":region_flow",
+        ":region_flow_cc_proto",
+        "//mediapipe/framework/port:integral_types",
+        "//mediapipe/framework/port:logging",
+        "//mediapipe/framework/port:opencv_core",
+        "//mediapipe/framework/port:opencv_imgproc",
+        "//mediapipe/framework/port:vector",
+        "@com_google_absl//absl/strings",
+    ],
+)
+
+cc_library(
+    name = "motion_analysis",
+    srcs = ["motion_analysis.cc"],
+    hdrs = ["motion_analysis.h"],
+    deps = [
+        ":camera_motion",
+        ":camera_motion_cc_proto",
+        ":image_util",
+        ":measure_time",
+        ":motion_analysis_cc_proto",
+        ":motion_estimation",
+        ":motion_estimation_cc_proto",
+        ":motion_saliency",
+        ":motion_saliency_cc_proto",
+        ":push_pull_filtering",
+        ":region_flow",
+        ":region_flow_cc_proto",
+        ":region_flow_computation",
+        ":region_flow_computation_cc_proto",
+        ":region_flow_visualization",
+        ":streaming_buffer",
+        "//mediapipe/framework/port:integral_types",
+        "//mediapipe/framework/port:logging",
+        "//mediapipe/framework/port:opencv_core",
+        "//mediapipe/framework/port:opencv_imgproc",
+        "//mediapipe/framework/port:vector",
+        "@com_google_absl//absl/strings:str_format",
+    ],
+)
+
+cc_library(
+    name = "flow_packager",
+    srcs = ["flow_packager.cc"],
+    hdrs = ["flow_packager.h"],
+    deps = [
+        ":camera_motion",
+        ":camera_motion_cc_proto",
+        ":flow_packager_cc_proto",
+        ":motion_estimation",
+        ":motion_estimation_cc_proto",
+        ":motion_models",
+        ":motion_models_cc_proto",
+        ":region_flow",
+        ":region_flow_cc_proto",
+        "//mediapipe/framework/port:integral_types",
+        "//mediapipe/framework/port:logging",
+        "//mediapipe/framework/port:vector",
+        "@com_google_absl//absl/base:core_headers",
+        "@com_google_absl//absl/strings",
+    ],
+)
+
+cc_library(
+    name = "tracking",
+    srcs = ["tracking.cc"],
+    hdrs = ["tracking.h"],
+    copts = PARALLEL_COPTS,
+    linkopts = PARALLEL_LINKOPTS,
+    deps = [
+        ":flow_packager_cc_proto",
+        ":measure_time",
+        ":motion_models",
+        ":motion_models_cc_proto",
+        ":motion_models_cv",
+        ":parallel_invoker",
+        ":region_flow",
+        ":tracking_cc_proto",
+        "//mediapipe/framework/port:logging",
+        "//mediapipe/framework/port:opencv_calib3d",
+        "//mediapipe/framework/port:opencv_core",
+        "//mediapipe/framework/port:opencv_imgproc",
+        "//mediapipe/framework/port:vector",
+        "@com_google_absl//absl/algorithm:container",
+        "@com_google_absl//absl/memory",
+        "@eigen_archive//:eigen",
+    ],
+    alwayslink = 1,
+)
+
+cc_library(
+    name = "box_tracker",
+    srcs = ["box_tracker.cc"],
+    hdrs = ["box_tracker.h"],
+    deps = [
+        ":box_tracker_cc_proto",
+        ":flow_packager_cc_proto",
+        ":measure_time",
+        ":tracking",
+        ":tracking_cc_proto",
+        "//mediapipe/framework/port:integral_types",
+        "//mediapipe/framework/port:logging",
+        "//mediapipe/framework/port:threadpool",
+        "@com_google_absl//absl/strings",
+        "@com_google_absl//absl/strings:str_format",
+        "@com_google_absl//absl/synchronization",
+        "@com_google_absl//absl/time",
+    ],
+)
+
+cc_library(
+    name = "tracking_visualization_utilities",
+    srcs = ["tracking_visualization_utilities.cc"],
+    hdrs = ["tracking_visualization_utilities.h"],
+    deps = [
+        ":box_tracker",
+        ":box_tracker_cc_proto",
+        ":flow_packager_cc_proto",
+        ":tracking",
+        ":tracking_cc_proto",
+        "//mediapipe/framework/port:opencv_core",
+        "//mediapipe/framework/port:opencv_imgproc",
+        "@com_google_absl//absl/strings:str_format",
+    ],
+)
+
+cc_test(
+    name = "parallel_invoker_test",
+    srcs = ["parallel_invoker_test.cc"],
+    copts = PARALLEL_COPTS,
+    linkopts = PARALLEL_LINKOPTS,
+    deps = [
+        ":parallel_invoker",
+        "//mediapipe/framework/port:gtest_main",
+        "@com_google_absl//absl/synchronization",
+    ],
+)
+
+cc_test(
+    name = "image_util_test",
+    srcs = [
+        "image_util_test.cc",
+    ],
+    deps = [
+        ":image_util",
+        "//mediapipe/framework/port:gtest_main",
+    ],
+)
+
+cc_test(
+    name = "motion_models_test",
+    srcs = ["motion_models_test.cc"],
+    deps = [
+        ":motion_estimation",
+        ":motion_models",
+        "//mediapipe/framework/deps:message_matchers",
+        "//mediapipe/framework/port:gtest_main",
+        "//mediapipe/framework/port:parse_text_proto",
+        "//mediapipe/framework/port:vector",
+    ],
+)
+
+cc_test(
+    name = "region_flow_computation_test",
+    srcs = ["region_flow_computation_test.cc"],
+    copts = PARALLEL_COPTS,
+    data = ["testdata/stabilize_test.png"],
+    linkopts = PARALLEL_LINKOPTS,
+    deps = [
+        ":region_flow",
+        ":region_flow_cc_proto",
+        ":region_flow_computation",
+        "//mediapipe/framework/deps:file_path",
+        "//mediapipe/framework/port:commandlineflags",
+        "//mediapipe/framework/port:file_helpers",
+        "//mediapipe/framework/port:gtest_main",
+        "//mediapipe/framework/port:logging",
+        "//mediapipe/framework/port:opencv_core",
+        "//mediapipe/framework/port:opencv_highgui",
+        "//mediapipe/framework/port:opencv_imgproc",
+        "//mediapipe/framework/port:status",
+        "//mediapipe/framework/port:vector",
+        "@com_google_absl//absl/time",
+    ],
+)
+
+cc_test(
+    name = "box_tracker_test",
+    timeout = "short",
+    srcs = ["box_tracker_test.cc"],
+    data = glob(["testdata/box_tracker/*"]),
+    deps = [
+        ":box_tracker",
+        "//mediapipe/framework/deps:file_path",
+        "//mediapipe/framework/port:gtest_main",
+    ],
+)
+
+cc_library(
+    name = "tracked_detection",
+    srcs = [
+        "tracked_detection.cc",
+    ],
+    hdrs = [
+        "tracked_detection.h",
+    ],
+    deps = [
+        "//mediapipe/framework/formats:rect_cc_proto",
+        "//mediapipe/framework/port:vector",
+        "@com_google_absl//absl/container:node_hash_map",
+    ],
+)
+
+cc_test(
+    name = "tracked_detection_test",
+    srcs = [
+        "tracked_detection_test.cc",
+    ],
+    deps = [
+        ":tracked_detection",
+        "//mediapipe/framework/port:gtest_main",
+    ],
+)
+
+cc_library(
+    name = "tracked_detection_manager",
+    srcs = [
+        "tracked_detection_manager.cc",
+    ],
+    hdrs = [
+        "tracked_detection_manager.h",
+    ],
+    deps = [
+        ":tracked_detection",
+        "//mediapipe/framework/formats:rect_cc_proto",
+        "@com_google_absl//absl/container:node_hash_map",
+    ],
+)
--- a/mediapipe/util/tracking/OWNERS
+++ b/mediapipe/util/tracking/OWNERS
@ -0,0 +1,2 @@
+jianingwei
+yegenzhi
--- a/mediapipe/util/tracking/box_tracker.cc
+++ b/mediapipe/util/tracking/box_tracker.cc
--- a/mediapipe/util/tracking/box_tracker.h
+++ b/mediapipe/util/tracking/box_tracker.h
@ -0,0 +1,372 @@
+// Copyright 2019 The MediaPipe Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef MEDIAPIPE_UTIL_TRACKING_BOX_TRACKER_H_
+#define MEDIAPIPE_UTIL_TRACKING_BOX_TRACKER_H_
+
+#include <inttypes.h>
+
+#include <map>
+#include <unordered_map>
+#include <vector>
+
+#include "absl/strings/str_format.h"
+#include "absl/synchronization/mutex.h"
+#include "mediapipe/framework/port/threadpool.h"
+#include "mediapipe/util/tracking/box_tracker.pb.h"
+#include "mediapipe/util/tracking/flow_packager.pb.h"
+#include "mediapipe/util/tracking/tracking.h"
+#include "mediapipe/util/tracking/tracking.pb.h"
+
+namespace mediapipe {
+
+// Describes a rectangle box at an instance of time.
+struct TimedBox {
+  static const int kNumQuadVertices = 4;
+  // Dimensions are in normalized coordinates [0, 1].
+  float top = 0;
+  float left = 0;
+  float bottom = 0;
+  float right = 0;
+  // Rotation of box w.r.t. center in radians.
+  float rotation = 0;
+  int64 time_msec = 0;
+  // Confidence of the tracked box in range [0, 1].
+  float confidence = 0;
+  std::vector<Vector2_f> quad_vertices;
+  // Aspect ratio (width / height) for the tracked rectangle in physical space.
+  float aspect_ratio = -1.0;
+  // Whether we want this box to be potentially grouped with other boxes
+  // to track together. This is useful for tracking small boxes that lie
+  // on a plane. For example, when we detect a plane,
+  // track the plane, then all boxes within the plane can share the same
+  // homography transform.
+  bool request_grouping = false;
+
+  bool operator<(const TimedBox& rhs) const {
+    return time_msec < rhs.time_msec;
+  }
+
+  // Returns (1.0 - alpha) * lhs + alpha * rhs;
+  static TimedBox Blend(const TimedBox& lhs, const TimedBox& rhs, double alpha);
+
+  // Returns lhs * alpha + rhs * beta;
+  static TimedBox Blend(const TimedBox& lhs, const TimedBox& rhs, double alpha,
+                        double beta);
+
+  std::string ToString() const {
+    return absl::StrFormat(
+        "top: %.3f left: %.3f bottom: %.3f right: %.3f "
+        "rot: %.3f t: %d",
+        top, left, bottom, right, rotation, static_cast<int64_t>(time_msec));
+  }
+
+  // Returns corners of TimedBox in the requested domain.
+  std::array<Vector2_f, 4> Corners(float width, float height) const;
+
+  static TimedBox FromProto(const TimedBoxProto& proto);
+  TimedBoxProto ToProto() const;
+};
+
+// TimedBox augment with internal states.
+struct InternalTimedBox : public TimedBox {
+  InternalTimedBox() = default;
+  // Convenience constructor.
+  InternalTimedBox(const TimedBox& box, const MotionBoxState* state_)
+      : TimedBox(box), state(state_) {}
+
+  // Corresponding MotionBoxState a TimedBox.
+  std::shared_ptr<const MotionBoxState> state;
+};
+
+// Initializes beginning tracking state from a TimedBox.
+void MotionBoxStateFromTimedBox(const TimedBox& box, MotionBoxState* state);
+
+// Retrieves box position and time from a tracking state.
+void TimedBoxFromMotionBoxState(const MotionBoxState& state, TimedBox* box);
+
+// Downgrade tracking degrees of TrackStepOptions to
+// TRACKING_DEGREE_OBJECT_ROTATION_SCALE if originally
+// specified TRACKING_DEGREE_OBJECT_PERSPECTIVE, but start pos
+// does not contain quad or contains invalid quad.
+template <typename T>
+void ChangeTrackingDegreesBasedOnStartPos(
+    const T& start_pos, TrackStepOptions* track_step_options) {
+  if (track_step_options->tracking_degrees() ==
+          TrackStepOptions::TRACKING_DEGREE_OBJECT_PERSPECTIVE &&
+      (!start_pos.has_quad() || start_pos.quad().vertices_size() != 8)) {
+    track_step_options->set_tracking_degrees(
+        TrackStepOptions::TRACKING_DEGREE_OBJECT_ROTATION_SCALE);
+    VLOG(1) << "Originally specified TRACKING_DEGREE_OBJECT_PERSPECTIVE, but "
+               "changed to TRACKING_DEGREE_OBJECT_ROTATION_SCALE";
+  }
+}
+
+// Set of boxes for a particular checkpoint.
+typedef std::deque<InternalTimedBox> PathSegment;
+
+// Stores the PathSegment for each checkpoint time.
+typedef std::map<int, PathSegment> Path;
+
+// Returns box at specified time for a specific path segment.
+// Returns true on success, otherwise box is left untouched.
+// Optionally returns closest known MotionBoxState if state is not null.
+bool TimedBoxAtTime(const PathSegment& segment, int64 time_msec, TimedBox* box,
+                    MotionBoxState* state = nullptr);
+
+// Tracks timed boxes from cached TrackingDataChunks created by
+// FlowPackagerCalculator. For usage see accompanying test.
+class BoxTracker {
+ public:
+  // Initializes a new BoxTracker to work on cached TrackingData from a chunk
+  // directory.
+  BoxTracker(const std::string& cache_dir, const BoxTrackerOptions& options);
+
+  // Initializes a new BoxTracker to work on the passed TrackingDataChunks.
+  // If copy_data is true, BoxTracker will retain its own copy of the data;
+  // otherwise the passed pointer need to be valid for the lifetime of the
+  // BoxTracker.
+  BoxTracker(const std::vector<const TrackingDataChunk*>& tracking_data,
+             bool copy_data, const BoxTrackerOptions& options);
+
+  // Add single TrackingDataChunk. This chunk must be correctly aligned with
+  // existing chunks. If chunk starting timestamp is larger than next valid
+  // chunk timestamp, empty chunks will be added to fill the gap. If copy_data
+  // is true, BoxTracker will retain its own copy of the data; otherwise the
+  // passed pointers need to be valid for the lifetime of the BoxTracker.
+  void AddTrackingDataChunk(const TrackingDataChunk* chunk, bool copy_data);
+  // Add new TrackingDataChunks. This method allows to streamline BoxTracker
+  // usage. Instead of collecting all tracking chunks and then calling the
+  // constructor, it makes possible to pass only minimally required chunks to
+  // constructor, and then append new chunks when available.
+  // Caller is responsible to guarantee that all chunks that are necessary for
+  // tracking are at the disposal of BoxTracker before calling NewBoxTrack, i.e.
+  // min_msec and max_msec parameters must be in the range of chunks passed to
+  // BoxTracker.
+  // Chunks will be added in the order they are specified in tracking_data. Each
+  // chunk must be correctly aligned with existing chunks. If chunk starting
+  // timestamp is larger than next valid chunk timestamp, empty chunks will be
+  // added to fill the gap. If copy_data is true, BoxTracker will retain its own
+  // copy of the data; otherwise the passed pointers need to be valid for the
+  // lifetime of the BoxTracker.
+  void AddTrackingDataChunks(
+      const std::vector<const TrackingDataChunk*>& tracking_data,
+      bool copy_data);
+
+  // Starts a new track for the specified box until tracking terminates or
+  // until track length achieves max_length.
+  // Does not block caller, returns immediately.
+  // Note: Use positive integers for id, we reserve negative ones for debugging
+  // and visualization purposes.
+  void NewBoxTrack(const TimedBox& initial_pos, int id, int64 min_msec = 0,
+                   int64 max_msec = std::numeric_limits<int64>::max());
+
+  // Returns interval for which the state of the specified box is known.
+  // (Returns -1, -1 if id is missing or no tracking has been done).
+  std::pair<int64, int64> TrackInterval(int id);
+
+  // Returns box position for requested box at specified time.
+  // Returns false if no such box exists.
+  // Optional, if record_path_states is enabled in options outputs the
+  // corresponding states for the resulting TimedBox. Note, as TimedBoxes are
+  // generally interpolated from the underlying data and blended across
+  // checkpoints, the returned states are the closest (snapped) known tracking
+  // states for the left and right checkpoint path. Therefore the size of states
+  // is either two or one (if only one checkpoint exists).
+  bool GetTimedPosition(int id, int64 time_msec, TimedBox* result,
+                        std::vector<MotionBoxState>* states = nullptr);
+
+  // Returns chunk index for specified time.
+  int ChunkIdxFromTime(int64 msec) const {
+    return msec / options_.caching_chunk_size_msec();
+  }
+
+  // Returns true if any tracking is ongoing for the specified id.
+  bool IsTrackingOngoingForId(int id) LOCKS_EXCLUDED(status_mutex_);
+
+  // Returns true if any tracking is ongoing.
+  bool IsTrackingOngoing() LOCKS_EXCLUDED(status_mutex_);
+
+  // Cancels all ongoing tracks. To avoid race conditions all NewBoxTrack's in
+  // flight will also be canceled. Future NewBoxTrack's will be canceled.
+  // NOTE: To resume execution, you have to call ResumeTracking() before
+  //       issuing more NewBoxTrack calls.
+  void CancelAllOngoingTracks() LOCKS_EXCLUDED(status_mutex_);
+  void ResumeTracking() LOCKS_EXCLUDED(status_mutex_);
+
+  // Waits for all ongoing tracks to complete.
+  // Optionally accepts a timeout in microseconds (== 0 for infinite wait).
+  // Returns true on success, false if timeout is reached.
+  // NOTE: If WaitForAllOngoingTracks timed out, CancelAllOngoingTracks() must
+  // be called before destructing the BoxTracker object or dangeling running
+  // threads might try to access invalid data.
+  bool WaitForAllOngoingTracks(int timeout_us = 0)
+      LOCKS_EXCLUDED(status_mutex_);
+
+  // Debug function to obtain raw TrackingData closest to the specified
+  // timestamp. This call will read from disk on every invocation so it is
+  // expensive.
+  // To not interfere with other tracking requests it is recommended that you
+  // use a unique id here.
+  // Returns true on success.
+  bool GetTrackingData(int id, int64 request_time_msec,
+                       TrackingData* tracking_data,
+                       int* tracking_data_msec = nullptr);
+
+ private:
+  // Asynchronous implementation function for box tracking. Schedules forward
+  // and backward tracking.
+  void NewBoxTrackAsync(const TimedBox& initial_pos, int id, int64 min_msec,
+                        int64 max_msec);
+
+  typedef std::pair<const TrackingDataChunk*, bool> AugmentedChunkPtr;
+  // Attempts to read chunk at chunk_idx if it exists. Reads from cache
+  // directory or from in memory cache.
+  // Important: 2nd part of return value indicates if returned tracking data
+  // will be owned by the caller (if true). In that case caller is responsible
+  // for releasing the returned chunk.
+  AugmentedChunkPtr ReadChunk(int id, int checkpoint, int chunk_idx);
+
+  // Attempts to read specified chunk from caching directory. Blocks and waits
+  // until chunk is available or internal time out is reached.
+  // Returns nullptr if data could not be read.
+  std::unique_ptr<TrackingDataChunk> ReadChunkFromCache(int id, int checkpoint,
+                                                        int chunk_idx);
+
+  // Waits with timeout for chunkfile to become available. Returns true on
+  // success, false if waited till timeout or when canceled.
+  bool WaitForChunkFile(int id, int checkpoint, const std::string& chunk_file)
+      LOCKS_EXCLUDED(status_mutex_);
+
+  // Determines closest index in passed TrackingDataChunk
+  int ClosestFrameIndex(int64 msec, const TrackingDataChunk& chunk) const;
+
+  // Adds new TimedBox to specified checkpoint with state.
+  void AddBoxResult(const TimedBox& box, int id, int checkpoint,
+                    const MotionBoxState& state);
+
+  // Callback can only handle 5 args max.
+  // Set own_data to true for args to assume ownership.
+  struct TrackingImplArgs {
+    TrackingImplArgs(AugmentedChunkPtr chunk_ptr,
+                     const MotionBoxState& start_state_, int start_frame_,
+                     int chunk_idx_, int id_, int checkpoint_, bool forward_,
+                     bool first_call_, int64 min_msec_, int64 max_msec_)
+        : start_state(start_state_),
+          start_frame(start_frame_),
+          chunk_idx(chunk_idx_),
+          id(id_),
+          checkpoint(checkpoint_),
+          forward(forward_),
+          first_call(first_call_),
+          min_msec(min_msec_),
+          max_msec(max_msec_) {
+      if (chunk_ptr.second) {
+        chunk_data_buffer.reset(chunk_ptr.first);
+      }
+
+      chunk_data = chunk_ptr.first;
+    }
+
+    TrackingImplArgs(const TrackingImplArgs&) = default;
+
+    // Storage for tracking data.
+    std::shared_ptr<const TrackingDataChunk> chunk_data_buffer;
+
+    // Pointer to the actual tracking data. Usually points to the buffer
+    // but can also point to external data for performance reasons.
+    const TrackingDataChunk* chunk_data;
+
+    MotionBoxState start_state;
+    int start_frame;
+    int chunk_idx;
+    int id;
+    int checkpoint;
+    bool forward = true;
+    bool first_call = true;
+    int64 min_msec;  // minimum timestamp to track to
+    int64 max_msec;  // maximum timestamp to track to
+  };
+
+  // Actual tracking algorithm.
+  void TrackingImpl(const TrackingImplArgs& args);
+
+  // Ids are scheduled exclusively, run this method to acquire lock.
+  // Returns false if id could not be scheduled (e.g. id got canceled during
+  // waiting).
+  bool WaitToScheduleId(int id) LOCKS_EXCLUDED(status_mutex_);
+
+  // Signals end of scheduling phase. Requires status mutex to be held.
+  void DoneSchedulingId(int id) EXCLUSIVE_LOCKS_REQUIRED(status_mutex_);
+
+  // Removes all checkpoints within vicinity of new checkpoint.
+  void RemoveCloseCheckpoints(int id, int checkpoint)
+      EXCLUSIVE_LOCKS_REQUIRED(status_mutex_);
+
+  // Removes specific checkpoint.
+  void ClearCheckpoint(int id, int checkpoint)
+      EXCLUSIVE_LOCKS_REQUIRED(status_mutex_);
+
+  // Terminates tracking for specific id and checkpoint.
+  void CancelTracking(int id, int checkpoint)
+      EXCLUSIVE_LOCKS_REQUIRED(status_mutex_);
+
+  // Implementation function for IsTrackingOngoing assuming mutex is already
+  // held.
+  bool IsTrackingOngoingMutexHeld() EXCLUSIVE_LOCKS_REQUIRED(status_mutex_);
+
+  // Captures tracking status for each checkpoint
+  struct TrackStatus {
+    // Indicates that all current tracking processes should be canceled.
+    bool canceled = false;
+
+    // Number of tracking requests than are currently ongoing.
+    int tracks_ongoing = 0;
+  };
+
+ private:
+  // Stores computed tracking paths_ for all boxes.
+  std::unordered_map<int, Path> paths_ GUARDED_BY(path_mutex_);
+  absl::Mutex path_mutex_;
+
+  // For each id and each checkpoint stores current tracking status.
+  std::unordered_map<int, std::map<int, TrackStatus>> track_status_
+      GUARDED_BY(status_mutex_);
+
+  // Keeps track which ids are currently processing in NewBoxTrack.
+  std::unordered_map<int, bool> new_box_track_ GUARDED_BY(status_mutex_);
+  absl::Mutex status_mutex_;
+
+  bool canceling_ GUARDED_BY(status_mutex_) = false;
+
+  // Use to signal changes to status_condvar_;
+  absl::CondVar status_condvar_ GUARDED_BY(status_mutex_);
+
+  BoxTrackerOptions options_;
+
+  // Caching directory for TrackingData stored on disk.
+  std::string cache_dir_;
+
+  // Pointers to tracking data stored in memory.
+  std::vector<const TrackingDataChunk*> tracking_data_;
+  // Buffer for tracking data in case we retain a deep copy.
+  std::vector<std::unique_ptr<TrackingDataChunk>> tracking_data_buffer_;
+
+  // Workers that run the tracking algorithm.
+  std::unique_ptr<ThreadPool> tracking_workers_;
+};
+
+}  // namespace mediapipe
+
+#endif  // MEDIAPIPE_UTIL_TRACKING_BOX_TRACKER_H_
--- a/mediapipe/util/tracking/box_tracker.proto
+++ b/mediapipe/util/tracking/box_tracker.proto
@ -0,0 +1,87 @@
+// Copyright 2019 The MediaPipe Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+syntax = "proto2";
+
+package mediapipe;
+
+import "mediapipe/util/tracking/tracking.proto";
+
+option java_package = "com.google.mediapipe.tracking";
+option java_outer_classname = "BoxTrackerProto";
+
+message BoxTrackerOptions {
+  // Chunk size for caching files. Should be equal to those
+  // written by the FlowPackagerCalculator.
+  optional int32 caching_chunk_size_msec = 1 [default = 2500];
+
+  // Chunk file format.
+  optional string cache_file_format = 2 [default = "chunk_%04d"];
+
+  // Number of simultaneous tracking requests.
+  optional int32 num_tracking_workers = 3 [default = 8];
+
+  // Maximum waiting time for next chunk, till function times out.
+  optional int32 read_chunk_timeout_msec = 4 [default = 60000];
+
+  // If set, box tracker will record the state for each computed TimedBox
+  // across all paths.
+  optional bool record_path_states = 5 [default = false];
+
+  // Actual tracking options to be used for every step.
+  optional TrackStepOptions track_step_options = 6;
+}
+
+// Next tag: 13
+// Proto equivalent of struct TimedBox.
+message TimedBoxProto {
+  // Normalized coords - in [0, 1]
+  optional float top = 1;
+  optional float left = 2;
+  optional float bottom = 3;
+  optional float right = 4;
+  // Rotation of box w.r.t. center in radians.
+  optional float rotation = 7;
+  optional MotionBoxState.Quad quad = 9;
+  optional int64 time_msec = 5 [default = 0];
+
+  // Unique per object id to disambiguate boxes.
+  optional int32 id = 6 [default = -1];
+
+  // Confidence of box tracked in the range [0, 1], with 0 being least
+  // confident, and 1 being most confident. A reasonable threshold is 0.5
+  // to filter out unconfident boxes.
+  optional float confidence = 8;
+
+  // Aspect ratio (width / height) for the tracked rectangle in physical space.
+  // If this field is provided, quad tracking will be performed using
+  // 6 degrees of freedom perspective transform between physical rectangle and
+  // frame quad. Otherwise, 8 degrees of freedom homography tracking between
+  // adjacent frames will be used.
+  optional float aspect_ratio = 10;
+
+  // Whether or not to enable reacquisition functionality for this specific box.
+  optional bool reacquisition = 11 [default = false];
+
+  // Whether we want this box to be potentially grouped with other boxes
+  // to track together. This is useful for tracking small boxes that lie
+  // on a plane. For example, when we detect a plane,
+  // track the plane, then all boxes within the plane can share the same
+  // homography transform.
+  optional bool request_grouping = 12 [default = false];
+}
+
+message TimedBoxProtoList {
+  repeated TimedBoxProto box = 1;
+}
--- a/mediapipe/util/tracking/box_tracker_test.cc
+++ b/mediapipe/util/tracking/box_tracker_test.cc
@ -0,0 +1,117 @@
+// Copyright 2019 The MediaPipe Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "mediapipe/util/tracking/box_tracker.h"
+
+#include "mediapipe/framework/deps/file_path.h"
+#include "mediapipe/framework/port/gtest.h"
+
+namespace mediapipe {
+namespace {
+
+constexpr double kWidth = 1280.0;
+constexpr double kHeight = 720.0;
+
+// Ground truth test; testing tracking accuracy and multi-thread load testing.
+TEST(BoxTrackerTest, MovingBoxTest) {
+  const std::string cache_dir =
+      file::JoinPath("./", "/mediapipe/util/tracking/testdata/box_tracker");
+  BoxTracker box_tracker(cache_dir, BoxTrackerOptions());
+
+  // Ground truth positions of the overlay (linear in between).
+  // @ 0:     (50, 100)
+  // @ 3000:  (50, 400)
+  // @ 6000:  (500, 400)
+  // @ 9000:  (1000, 50)
+  // @ 12000: (50, 100)
+  // @ 15000: (1000, 400)
+  //
+  // size of overlay: 220 x 252
+  std::vector<Vector2_d> positions{
+      {50.0 / kWidth, 100.0 / kHeight},  {50.0 / kWidth, 400.0 / kHeight},
+      {500.0 / kWidth, 400.0 / kHeight}, {1000.0 / kWidth, 50.0 / kHeight},
+      {50.0 / kWidth, 100.0 / kHeight},  {1000.0 / kWidth, 400.0 / kHeight},
+  };
+
+  const Vector2_d overlay_sz(220.0 / kWidth, 252.0 / kHeight);
+
+  TimedBox initial_pos;
+  initial_pos.left = positions[1].x();
+  initial_pos.top = positions[1].y();
+  initial_pos.right = initial_pos.left + overlay_sz.x();
+  initial_pos.bottom = initial_pos.top + overlay_sz.y();
+  initial_pos.time_msec = 3000;
+
+  // Test multithreading under load, ensure this does not crash or stall.
+  box_tracker.NewBoxTrack(initial_pos, 0);
+  // Cancel right after issuing.
+  box_tracker.CancelAllOngoingTracks();
+
+  // Should not be scheduled.
+  box_tracker.NewBoxTrack(initial_pos, 0);
+  EXPECT_FALSE(box_tracker.IsTrackingOngoing());
+  box_tracker.ResumeTracking();
+
+  box_tracker.NewBoxTrack(initial_pos, 0);
+  // Two cancelations in a row should not block.
+  box_tracker.CancelAllOngoingTracks();
+  box_tracker.CancelAllOngoingTracks();
+  box_tracker.ResumeTracking();
+
+  // Start again for real this time.
+  box_tracker.NewBoxTrack(initial_pos, 0);
+
+  // Wait to terminate.
+  box_tracker.WaitForAllOngoingTracks();
+
+  // Check that tracking did not abort.
+  EXPECT_EQ(0, box_tracker.TrackInterval(0).first);
+  EXPECT_GT(box_tracker.TrackInterval(0).second, 15000);
+
+  auto boxes_equal = [](const TimedBox& lhs, const TimedBox& rhs) {
+    constexpr float kAccuracy = 0.015f;
+    return (lhs.time_msec == rhs.time_msec &&
+            std::abs(lhs.top - rhs.top) < kAccuracy &&
+            std::abs(lhs.left - rhs.left) < kAccuracy &&
+            std::abs(lhs.right - rhs.right) < kAccuracy &&
+            std::abs(lhs.bottom - rhs.bottom) < kAccuracy);
+  };
+
+  for (int k = 0; k < 15000; k += 33) {
+    TimedBox box;
+    EXPECT_TRUE(box_tracker.GetTimedPosition(0, k, &box));
+
+    // One groundtruth position every 3s, linear in between.
+    const int rect_pos = k / 3000;
+    const int dt = k - rect_pos * 3000;
+    const int next_rect_pos = dt == 0 ? rect_pos : (rect_pos + 1);
+
+    // Blend to get initial position.
+    const double alpha = dt / 3000.0;
+    const Vector2_d gt_pos =
+        (1.0 - alpha) * positions[rect_pos] + alpha * positions[next_rect_pos];
+
+    TimedBox gt_box;
+    gt_box.time_msec = k;
+    gt_box.top = gt_pos.y();
+    gt_box.left = gt_pos.x();
+    gt_box.right = gt_box.left + overlay_sz.x();
+    gt_box.bottom = gt_box.top + overlay_sz.y();
+    EXPECT_TRUE(boxes_equal(gt_box, box));
+  }
+}
+
+}  // namespace
+
+}  // namespace mediapipe
--- a/mediapipe/util/tracking/camera_motion.cc
+++ b/mediapipe/util/tracking/camera_motion.cc
@ -0,0 +1,408 @@
+// Copyright 2019 The MediaPipe Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "mediapipe/util/tracking/camera_motion.h"
+
+#include <numeric>
+
+#include "absl/strings/str_format.h"
+#include "mediapipe/util/tracking/region_flow.h"
+
+namespace mediapipe {
+
+void CameraMotionToTranslation(const CameraMotion& camera_motion,
+                               TranslationModel* model) {
+  if (camera_motion.has_translation()) {
+    model->CopyFrom(camera_motion.translation());
+  } else {
+    model->Clear();
+  }
+}
+
+void CameraMotionToLinearSimilarity(const CameraMotion& camera_motion,
+                                    LinearSimilarityModel* model) {
+  if (camera_motion.has_linear_similarity()) {
+    model->CopyFrom(camera_motion.linear_similarity());
+  } else {
+    TranslationModel translation;
+    CameraMotionToTranslation(camera_motion, &translation);
+    model->CopyFrom(LinearSimilarityAdapter::Embed(translation));
+  }
+}
+
+void CameraMotionToAffine(const CameraMotion& camera_motion,
+                          AffineModel* model) {
+  if (camera_motion.has_affine()) {
+    model->CopyFrom(camera_motion.affine());
+  } else {
+    LinearSimilarityModel similarity;
+    CameraMotionToLinearSimilarity(camera_motion, &similarity);
+    model->CopyFrom(AffineAdapter::Embed(similarity));
+  }
+}
+
+void CameraMotionToHomography(const CameraMotion& camera_motion,
+                              Homography* model) {
+  if (camera_motion.has_homography()) {
+    model->CopyFrom(camera_motion.homography());
+  } else {
+    AffineModel affine;
+    CameraMotionToAffine(camera_motion, &affine);
+    model->CopyFrom(HomographyAdapter::Embed(affine));
+  }
+}
+
+void CameraMotionToMixtureHomography(const CameraMotion& camera_motion,
+                                     MixtureHomography* model) {
+  if (camera_motion.has_mixture_homography()) {
+    model->CopyFrom(camera_motion.mixture_homography());
+  } else {
+    Homography homography;
+    CameraMotionToHomography(camera_motion, &homography);
+    model->CopyFrom(MixtureHomographyAdapter::Embed(homography, 1));
+  }
+}
+
+CameraMotion ComposeCameraMotion(const CameraMotion& lhs,
+                                 const CameraMotion& rhs) {
+  CHECK_EQ(lhs.frame_width(), rhs.frame_width());
+  CHECK_EQ(lhs.frame_height(), rhs.frame_height());
+
+  CameraMotion result = rhs;
+  if (lhs.has_translation() || rhs.has_translation()) {
+    *result.mutable_translation() =
+        ModelCompose2(lhs.translation(), rhs.translation());
+  }
+
+  if (lhs.has_similarity() || rhs.has_similarity()) {
+    *result.mutable_similarity() =
+        ModelCompose2(lhs.similarity(), rhs.similarity());
+  }
+
+  if (lhs.has_linear_similarity() || rhs.has_linear_similarity()) {
+    *result.mutable_linear_similarity() =
+        ModelCompose2(lhs.linear_similarity(), rhs.linear_similarity());
+  }
+
+  if (lhs.has_affine() || rhs.has_affine()) {
+    *result.mutable_affine() = ModelCompose2(lhs.affine(), rhs.affine());
+  }
+
+  if (lhs.has_homography() || rhs.has_homography()) {
+    *result.mutable_homography() =
+        ModelCompose2(lhs.homography(), rhs.homography());
+  }
+
+  if (rhs.has_mixture_homography()) {
+    if (lhs.has_mixture_homography()) {
+      LOG(ERROR) << "Mixture homographies are not closed under composition, "
+                 << "Only rhs mixtures composed with lhs homographies "
+                 << "are supported.";
+    } else if (lhs.type() <= CameraMotion::UNSTABLE_SIM) {
+      // We only composit base model when stability is sufficient.
+      *result.mutable_mixture_homography() =
+          MixtureHomographyAdapter::ComposeLeft(rhs.mixture_homography(),
+                                                lhs.homography());
+    }
+  } else if (lhs.has_mixture_homography()) {
+    LOG(ERROR) << "Only rhs mixtures supported.";
+  }
+
+  // Select max unstable type.
+  result.set_type(std::max(lhs.type(), rhs.type()));
+  result.set_average_magnitude(lhs.average_magnitude() +
+                               rhs.average_magnitude());
+  result.set_translation_variance(
+      std::max(lhs.translation_variance(), rhs.translation_variance()));
+  result.set_similarity_inlier_ratio(
+      std::min(lhs.similarity_inlier_ratio(), rhs.similarity_inlier_ratio()));
+
+  result.set_similarity_strict_inlier_ratio(
+      std::min(lhs.similarity_strict_inlier_ratio(),
+               rhs.similarity_strict_inlier_ratio()));
+
+  result.set_average_homography_error(
+      std::max(lhs.average_homography_error(), rhs.average_homography_error()));
+  result.set_homography_inlier_coverage(std::min(
+      lhs.homography_inlier_coverage(), rhs.homography_inlier_coverage()));
+  result.set_homography_strict_inlier_coverage(
+      std::min(lhs.homography_strict_inlier_coverage(),
+               rhs.homography_strict_inlier_coverage()));
+
+  // TODO: Overlay stuff.
+
+  result.set_flags(lhs.flags() | rhs.flags());
+  result.set_timestamp_usec(
+      std::max(lhs.timestamp_usec(), rhs.timestamp_usec()));
+  result.set_match_frame(lhs.match_frame() + rhs.match_frame());
+
+  // TODO: Rest.
+  return result;
+}
+
+CameraMotion InvertCameraMotion(const CameraMotion& motion) {
+  CameraMotion inverted = motion;
+  if (motion.has_translation()) {
+    *inverted.mutable_translation() = ModelInvert(motion.translation());
+  }
+
+  if (motion.has_similarity()) {
+    *inverted.mutable_similarity() = ModelInvert(motion.similarity());
+  }
+
+  if (motion.has_linear_similarity()) {
+    *inverted.mutable_linear_similarity() =
+        ModelInvert(motion.linear_similarity());
+  }
+
+  if (motion.has_affine()) {
+    *inverted.mutable_affine() = ModelInvert(motion.affine());
+  }
+
+  if (motion.has_homography()) {
+    *inverted.mutable_homography() = ModelInvert(motion.homography());
+  }
+
+  if (motion.has_mixture_homography()) {
+    LOG(ERROR) << "Mixture homographies are not closed under inversion.";
+  }
+
+  return inverted;
+}
+
+void SubtractCameraMotionFromFeatures(
+    const std::vector<CameraMotion>& camera_motions,
+    std::vector<RegionFlowFeatureList*>* feature_lists) {
+  CHECK(feature_lists != nullptr);
+  CHECK_GE(camera_motions.size(), feature_lists->size());
+  if (feature_lists->empty()) {
+    return;
+  }
+
+  bool use_mixtures = camera_motions[0].has_mixture_homography();
+
+  std::unique_ptr<MixtureRowWeights> row_weights;
+  if (use_mixtures) {
+    row_weights.reset(MixtureRowWeightsFromCameraMotion(
+        camera_motions[0], (*feature_lists)[0]->frame_height()));
+  }
+
+  for (int k = 0; k < feature_lists->size(); ++k) {
+    Homography background_model;
+    MixtureHomography background_model_mixture;
+    if (use_mixtures) {
+      CameraMotionToMixtureHomography(camera_motions[k],
+                                      &background_model_mixture);
+    } else {
+      CameraMotionToHomography(camera_motions[k], &background_model);
+    }
+
+    // Remove motion due to camera motion, leaving only foreground motion.
+    for (auto& feature : *(*feature_lists)[k]->mutable_feature()) {
+      const Vector2_f background_motion =
+          (!use_mixtures ? HomographyAdapter::TransformPoint(
+                               background_model, FeatureLocation(feature))
+                         : MixtureHomographyAdapter::TransformPoint(
+                               background_model_mixture, *row_weights,
+                               FeatureLocation(feature))) -
+          FeatureLocation(feature);
+      const Vector2_f object_motion = FeatureFlow(feature) - background_motion;
+      feature.set_dx(object_motion.x());
+      feature.set_dy(object_motion.y());
+    }
+  }
+}
+
+float ForegroundMotion(const CameraMotion& camera_motion,
+                       const RegionFlowFeatureList& feature_list) {
+  if (camera_motion.has_mixture_homography()) {
+    LOG(WARNING) << "Mixture homographies are present but function is only "
+                 << "using homographies. Truncation error likely.";
+  }
+
+  Homography background_motion;
+  CameraMotionToHomography(camera_motion, &background_motion);
+
+  float foreground_motion = 0;
+  for (auto& feature : feature_list.feature()) {
+    const float error = (FeatureMatchLocation(feature) -
+                         HomographyAdapter::TransformPoint(
+                             background_motion, FeatureLocation(feature)))
+                            .Norm();
+    foreground_motion += error;
+  }
+
+  if (feature_list.feature_size() > 0) {
+    foreground_motion *= 1.0f / feature_list.feature_size();
+  }
+
+  return foreground_motion;
+}
+
+void InitCameraMotionFromFeatureList(const RegionFlowFeatureList& feature_list,
+                                     CameraMotion* camera_motion) {
+  camera_motion->set_blur_score(feature_list.blur_score());
+  camera_motion->set_frac_long_features_rejected(
+      feature_list.frac_long_features_rejected());
+  camera_motion->set_timestamp_usec(feature_list.timestamp_usec());
+  camera_motion->set_match_frame(feature_list.match_frame());
+  camera_motion->set_frame_width(feature_list.frame_width());
+  camera_motion->set_frame_height(feature_list.frame_height());
+}
+
+std::string CameraMotionFlagToString(const CameraMotion& camera_motion) {
+  std::string text;
+  if (camera_motion.flags() & CameraMotion::FLAG_SHOT_BOUNDARY) {
+    text += "SHOT_BOUNDARY|";
+  }
+
+  if (camera_motion.flags() & CameraMotion::FLAG_BLURRY_FRAME) {
+    text += absl::StrFormat("BLURRY_FRAME %.2f|", camera_motion.bluriness());
+  }
+
+  if (camera_motion.flags() & CameraMotion::FLAG_MAJOR_OVERLAY) {
+    text += "MAJOR_OVERLAY|";
+  }
+
+  if (camera_motion.flags() & CameraMotion::FLAG_SHARP_FRAME) {
+    text += "SHARP_FRAME|";
+  }
+
+  if (camera_motion.flags() & CameraMotion::FLAG_SHOT_FADE) {
+    text += "SHOT_FADE|";
+  }
+
+  if (camera_motion.flags() & CameraMotion::FLAG_DUPLICATED) {
+    text += "DUPLICATED|";
+  }
+  return text;
+}
+
+std::string CameraMotionTypeToString(const CameraMotion& motion) {
+  switch (motion.type()) {
+    case CameraMotion::VALID:
+      return "VALID";
+    case CameraMotion::UNSTABLE_HOMOG:
+      return "UNSTABLE_HOMOG";
+    case CameraMotion::UNSTABLE_SIM:
+      return "UNSTABLE_SIM";
+    case CameraMotion::UNSTABLE:
+      return "UNSTABLE";
+    case CameraMotion::INVALID:
+      return "INVALID";
+  }
+
+  return "NEVER HAPPENS WITH CLANG";
+}
+
+float InlierCoverage(const CameraMotion& camera_motion,
+                     bool use_homography_coverage) {
+  const int num_block_coverages = camera_motion.mixture_inlier_coverage_size();
+  if (num_block_coverages == 0 || use_homography_coverage) {
+    if (camera_motion.has_homography()) {
+      return camera_motion.homography_inlier_coverage();
+    } else {
+      return 1.0f;
+    }
+  } else {
+    return std::accumulate(camera_motion.mixture_inlier_coverage().begin(),
+                           camera_motion.mixture_inlier_coverage().end(),
+                           0.0f) *
+           (1.0f / num_block_coverages);
+  }
+}
+
+template <class CameraMotionContainer>
+CameraMotion FirstCameraMotionForLooping(
+    const CameraMotionContainer& camera_motions) {
+  if (camera_motions.size() < 2) {
+    LOG(ERROR) << "Not enough camera motions for refinement.";
+    return CameraMotion();
+  }
+
+  CameraMotion loop_motion = camera_motions[0];
+
+  // Only update motions present in first camera motion.
+  bool has_translation = loop_motion.has_translation();
+  bool has_similarity = loop_motion.has_linear_similarity();
+  bool has_homography = loop_motion.has_homography();
+
+  TranslationModel translation;
+  LinearSimilarityModel similarity;
+  Homography homography;
+
+  for (int i = 1; i < camera_motions.size(); ++i) {
+    const CameraMotion& motion = camera_motions[i];
+    if (motion.has_mixture_homography()) {
+      // TODO: Implement
+      LOG(WARNING) << "This function does not validly apply mixtures; "
+                   << "which are currently not closed under composition. ";
+    }
+
+    switch (motion.type()) {
+      case CameraMotion::INVALID:
+        has_translation = false;
+        has_similarity = false;
+        has_homography = false;
+        break;
+      case CameraMotion::UNSTABLE:
+        has_similarity = false;
+        has_homography = false;
+        break;
+      case CameraMotion::UNSTABLE_SIM:
+        has_homography = false;
+        break;
+      case CameraMotion::VALID:
+      case CameraMotion::UNSTABLE_HOMOG:
+        break;
+      default:
+        LOG(FATAL) << "Unknown CameraMotion::type.";
+    }
+
+    // Only accumulate motions which are valid for the entire chain, otherwise
+    // keep the pre-initialized motions.
+    has_translation =
+        has_translation && motion.has_translation() &&
+        AccumulateInvertedModel(motion.translation(), &translation);
+    has_similarity =
+        has_similarity && motion.has_linear_similarity() &&
+        AccumulateInvertedModel(motion.linear_similarity(), &similarity);
+    has_homography = has_homography && motion.has_homography() &&
+                     AccumulateInvertedModel(motion.homography(), &homography);
+  }
+
+  if (has_translation) {
+    *loop_motion.mutable_translation() = translation;
+  }
+  if (has_similarity && IsInverseStable(similarity)) {
+    *loop_motion.mutable_linear_similarity() = similarity;
+  }
+  if (has_homography && IsInverseStable(homography)) {
+    *loop_motion.mutable_homography() = homography;
+  }
+
+  VLOG(1) << "Looping camera motion refinement for "
+          << " translation:" << (has_translation ? "successful" : "failed")
+          << " similarity:" << (has_similarity ? "successful" : "failed")
+          << " homography:" << (has_homography ? "successful" : "failed");
+
+  return loop_motion;
+}
+
+// Explicit instantiation.
+template CameraMotion FirstCameraMotionForLooping<std::vector<CameraMotion>>(
+    const std::vector<CameraMotion>&);
+template CameraMotion FirstCameraMotionForLooping<std::deque<CameraMotion>>(
+    const std::deque<CameraMotion>&);
+}  // namespace mediapipe
--- a/mediapipe/util/tracking/camera_motion.h
+++ b/mediapipe/util/tracking/camera_motion.h
@ -0,0 +1,336 @@
+// Copyright 2019 The MediaPipe Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef MEDIAPIPE_UTIL_TRACKING_CAMERA_MOTION_H_
+#define MEDIAPIPE_UTIL_TRACKING_CAMERA_MOTION_H_
+
+#include <vector>
+
+#include "mediapipe/util/tracking/camera_motion.pb.h"
+#include "mediapipe/util/tracking/motion_models.h"
+#include "mediapipe/util/tracking/region_flow.pb.h"
+
+namespace mediapipe {
+
+// Helper functions to extract specific models from CameraMotion.
+// Returned is always the requested model. In case, model is not present (i.e.
+// has_<motion model> fails), the highest degree of freedom model
+// (lower or equal to the requested model) that is present is embedded in the
+// requested model.
+// Presence of the model depends on wich models were requesteded to be
+// estimated (via MotionEstimationOptions, to initialize requested models to
+// identity, use ResetMotionModels above). For example, assume linear similarity
+// was not requested to be estimated, but affine was requested. If
+// CameraMotionToLinearSimilarity is called, has_linear_similarity would be
+// false and the function fall back returning a translation model.
+void CameraMotionToTranslation(const CameraMotion& camera_motion,
+                               TranslationModel* model);
+void CameraMotionToLinearSimilarity(const CameraMotion& camera_motion,
+                                    LinearSimilarityModel* model);
+void CameraMotionToAffine(const CameraMotion& camera_motion,
+                          AffineModel* model);
+void CameraMotionToHomography(const CameraMotion& camera_motion,
+                              Homography* homography);
+void CameraMotionToMixtureHomography(const CameraMotion& camera_motion,
+                                     MixtureHomography* mixture);
+
+// TODO: Under development ...
+// Returns camera motion lhs * rhs. Initial camera motion is set to rhs
+// before composition.
+CameraMotion ComposeCameraMotion(const CameraMotion& lhs,
+                                 const CameraMotion& rhs);
+
+// Inverts every motion model that is set in CameraMotion.
+CameraMotion InvertCameraMotion(const CameraMotion& motion);
+
+// Templated wrapper for above calls.
+template <class Model>
+Model CameraMotionToModel(const CameraMotion& camera_motion);
+
+// Returns model from passed CameraMotion specified by unstable_type
+// (which must name a type != VALID, CHECK-ed) and embeds it in the specified
+// Model.
+template <class Model>
+Model UnstableCameraMotionToModel(const CameraMotion& camera_motion,
+                                  CameraMotion::Type unstable_type);
+
+// Projects passed model to lower degree of freedom model (embedded in original
+// type), as specified type. In case type is valid, function is effectively
+// identity function.
+// Only implemented for the following models:
+// - Translation
+// - LinearSimilarity
+// - AffineModel
+template <class Model>
+Model ProjectToTypeModel(const Model& model, float frame_width,
+                         float frame_height, CameraMotion::Type type);
+
+// Substract camera motion (specifically highest, degree of freedom model,
+// that has been estimated reliably) from feature lists. Operates on vectors
+// for improved performance. Size of camera_motions can be larger than
+// feature_lists, in this case last camera motions are ignored.
+void SubtractCameraMotionFromFeatures(
+    const std::vector<CameraMotion>& camera_motions,
+    std::vector<RegionFlowFeatureList*>* feature_lists);
+
+// Returns average motion magnitude after subtracting camera motion.
+float ForegroundMotion(const CameraMotion& camera_motion,
+                       const RegionFlowFeatureList& feature_list);
+
+// Initializes a CameraMotion with its corresponding fields from a
+// RegionFlowFeatureList.
+void InitCameraMotionFromFeatureList(const RegionFlowFeatureList& feature_list,
+                                     CameraMotion* camera_motion);
+
+// Converts Camera motion flag to std::string.
+std::string CameraMotionFlagToString(const CameraMotion& motion);
+
+// Converts Camera motion type to std::string. Used instead of builtin proto
+// function for mobile support.
+std::string CameraMotionTypeToString(const CameraMotion& motion);
+
+// Returns inlier coverage either based on mixture (if present, in this case
+// return mean of block coverages) or else homography.
+// If neither is present, returns 0 to signal insufficient inliers.
+// If use_homography_coverage is set, uses homography even when mixture is
+// present.
+float InlierCoverage(const CameraMotion& camera_motion,
+                     bool use_homography_coverage);
+
+// Downsamples passed motion models temporally by specified downsample_scale,
+// i.e. for models F_0, F_1, F_2, F_3, F_4 and downsample_scale of 2, models:
+// F_0 * F_1, F_2 * F_3 and F_4 are returned.
+// Optionally also performs downsampling of corresponding model_type returning
+// the least unstable for each composition.
+template <class Model>
+void DownsampleMotionModels(
+    const std::vector<Model>& models,
+    const std::vector<CameraMotion::Type>* model_type,  // optional.
+    int downsample_scale, std::vector<Model>* downsampled_models,
+    std::vector<CameraMotion::Type>* downsampled_types);
+
+// Compatible subsampling method to above DownsampleMotionModels.
+// Note, when downsampling for example:
+// F_0, F_1, F_2, F_3, F_4  by factor 3 via above function, downsampled result
+// will be F_0 * F_1 * F_2, F_3 * F_4
+// so we would need to pick entities at F_2 and F_4.
+// Template class Container must be SequenceContainer, like
+// std::vector, std::deque.
+template <class Container>
+void SubsampleEntities(const Container& input, int downsample_scale,
+                       Container* output);
+
+// For perfect looping, this function computes the motion in the first frame
+// to be the inverse of the accumulated motion from frame 1 to N.
+// If a particular motion type is not available or not invertible at any
+// frame pair, the original motion for that type is retained.
+// Does not work if mixtures are present.
+template <class CameraMotionContainer>  // STL container of CameraMotion's
+CameraMotion FirstCameraMotionForLooping(
+    const CameraMotionContainer& container);
+
+// Template implementation functions.
+
+template <class Model>
+Model UnstableCameraMotionToModel(const CameraMotion& camera_motion,
+                                  CameraMotion::Type unstable_type) {
+  switch (unstable_type) {
+    case CameraMotion::INVALID:
+      return Model();  // Identity.
+
+    case CameraMotion::UNSTABLE: {
+      return ModelAdapter<Model>::Embed(
+          CameraMotionToModel<TranslationModel>(camera_motion));
+    }
+
+    case CameraMotion::UNSTABLE_SIM: {
+      return ModelAdapter<Model>::Embed(
+          CameraMotionToModel<LinearSimilarityModel>(camera_motion));
+    }
+
+    case CameraMotion::UNSTABLE_HOMOG: {
+      return ModelAdapter<Model>::Embed(
+          CameraMotionToModel<Homography>(camera_motion));
+    }
+
+    case CameraMotion::VALID:
+      LOG(FATAL) << "Specify a type != VALID";
+      return Model();
+  }
+}
+
+template <>
+inline TranslationModel ProjectToTypeModel(const TranslationModel& model,
+                                           float frame_width,
+                                           float frame_height,
+                                           CameraMotion::Type type) {
+  switch (type) {
+    case CameraMotion::INVALID:
+      return TranslationModel();  // Identity.
+    default:
+      return model;
+  }
+}
+
+template <>
+inline LinearSimilarityModel ProjectToTypeModel(
+    const LinearSimilarityModel& model, float frame_width, float frame_height,
+    CameraMotion::Type type) {
+  switch (type) {
+    case CameraMotion::INVALID:
+      return LinearSimilarityModel();  // Identity.
+
+    case CameraMotion::UNSTABLE:
+      return LinearSimilarityAdapter::Embed(
+          TranslationAdapter::ProjectFrom(model, frame_width, frame_height));
+
+    default:
+      return model;
+  }
+}
+
+template <class Model>
+Model ProjectToTypeModel(const Model& model, float frame_width,
+                         float frame_height, CameraMotion::Type type) {
+  switch (type) {
+    case CameraMotion::INVALID:
+      return Model();  // Identity.
+
+    case CameraMotion::UNSTABLE:
+      return ModelAdapter<Model>::Embed(
+          TranslationAdapter::ProjectFrom(model, frame_width, frame_height));
+
+    case CameraMotion::UNSTABLE_SIM:
+      return ModelAdapter<Model>::Embed(LinearSimilarityAdapter::ProjectFrom(
+          model, frame_width, frame_height));
+
+      // case UNSTABLE_HOMOG does not occur except for mixtures.
+
+    default:
+      return model;
+  }
+}
+
+template <>
+inline MixtureHomography ProjectToTypeModel(const MixtureHomography&, float,
+                                            float, CameraMotion::Type) {
+  LOG(FATAL) << "Projection not supported for mixtures.";
+  return MixtureHomography();
+}
+
+template <class Model>
+void DownsampleMotionModels(
+    const std::vector<Model>& models,
+    const std::vector<CameraMotion::Type>* model_type, int downsample_scale,
+    std::vector<Model>* downsampled_models,
+    std::vector<CameraMotion::Type>* downsampled_types) {
+  if (model_type) {
+    CHECK_EQ(models.size(), model_type->size());
+    CHECK(downsampled_models) << "Expecting output models.";
+  }
+
+  CHECK(downsampled_models);
+  downsampled_models->clear();
+  if (downsampled_types) {
+    downsampled_types->clear();
+  }
+
+  const int num_models = models.size();
+
+  for (int model_idx = 0; model_idx < num_models;
+       model_idx += downsample_scale) {
+    const int last_idx =
+        std::min<int>(model_idx + downsample_scale, num_models) - 1;
+
+    CameraMotion::Type sampled_type = CameraMotion::VALID;
+    if (model_type) {
+      // Get least stable model within downsample window (max operation).
+      for (int i = model_idx; i <= last_idx; ++i) {
+        sampled_type = std::max(sampled_type, model_type->at(i));
+      }
+      downsampled_types->push_back(sampled_type);
+    }
+
+    // Concatenate models.
+    Model composed = models[last_idx];
+
+    for (int i = last_idx - 1; i >= model_idx; --i) {
+      composed = ModelCompose2(models[i], composed);
+    }
+
+    downsampled_models->push_back(composed);
+  }
+}
+
+template <class Container>
+void SubsampleEntities(const Container& input, int downsample_factor,
+                       Container* output) {
+  CHECK(output);
+  output->clear();
+
+  if (input.empty()) {
+    return;
+  }
+
+  for (int k = downsample_factor - 1; k < input.size();
+       k += downsample_factor) {
+    output->push_back(input[k]);
+  }
+
+  if (input.size() % downsample_factor != 0) {
+    // We need to add last constraint as termination.
+    output->push_back(input.back());
+  }
+}
+
+template <>
+inline TranslationModel CameraMotionToModel(const CameraMotion& camera_motion) {
+  TranslationModel model;
+  CameraMotionToTranslation(camera_motion, &model);
+  return model;
+}
+
+template <>
+inline LinearSimilarityModel CameraMotionToModel(
+    const CameraMotion& camera_motion) {
+  LinearSimilarityModel model;
+  CameraMotionToLinearSimilarity(camera_motion, &model);
+  return model;
+}
+
+template <>
+inline AffineModel CameraMotionToModel(const CameraMotion& camera_motion) {
+  AffineModel model;
+  CameraMotionToAffine(camera_motion, &model);
+  return model;
+}
+
+template <>
+inline Homography CameraMotionToModel(const CameraMotion& camera_motion) {
+  Homography model;
+  CameraMotionToHomography(camera_motion, &model);
+  return model;
+}
+
+template <>
+inline MixtureHomography CameraMotionToModel(
+    const CameraMotion& camera_motion) {
+  MixtureHomography model;
+  CameraMotionToMixtureHomography(camera_motion, &model);
+  return model;
+}
+
+}  // namespace mediapipe
+
+#endif  // MEDIAPIPE_UTIL_TRACKING_CAMERA_MOTION_H_
--- a/mediapipe/util/tracking/camera_motion.proto
+++ b/mediapipe/util/tracking/camera_motion.proto
@ -0,0 +1,198 @@
+// Copyright 2019 The MediaPipe Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Describes camera motion between two frames with various degree of freedom
+// parametric motion models.
+// In addition, stores features describing how reliable the estimated motion
+// model is.
+// Flags indicate several properties derived from the camera motion, e.g. if a
+// frame is sharp, blurry or contains overlays.
+
+syntax = "proto2";
+
+package mediapipe;
+
+import "mediapipe/util/tracking/motion_models.proto";
+
+// Next tag: 33
+message CameraMotion {
+  // Background motion expressed in various models.
+  // These are per-frame pair motions (from current to previous frame).
+  // Models are expressed in the un-normalized domain frame_width x frame_height
+  // that is passed to MotionEstimation (storred below).
+  optional TranslationModel translation = 1;
+  optional SimilarityModel similarity = 2;
+  optional LinearSimilarityModel linear_similarity = 3;
+  optional AffineModel affine = 4;
+  optional Homography homography = 5;
+
+  optional MixtureHomography mixture_homography = 8;
+
+  // Frame dimensions camera motion was computed over.
+  optional float frame_width = 31;
+  optional float frame_height = 32;
+
+  // Mixture homographies computed w.r.t. exponentially increasing
+  // regularizers. Above mixture_homography member is selected from spectrum
+  // based on amount of rolling shutter present in the video.
+  repeated MixtureHomography mixture_homography_spectrum = 23;
+
+  // Relative row sigma w.r.t. frame_height for mixture models.
+  optional float mixture_row_sigma = 10;
+
+  // Average of all motion vector magnitudes (without accounting for any motion
+  // model), within 10th to 90th percentile (to remove outliers).
+  optional float average_magnitude = 24 [default = 0.0];
+
+  // Inlier-weighted variance of the translation model.
+  // Specified, w.r.t. unnormalized video domain that motion models
+  // are computed for.
+  optional float translation_variance = 25 [default = 0.0];
+
+  // Ratio of inliers w.r.t. regular and stricter thresholds. In [0, 1].
+  optional float similarity_inlier_ratio = 29 [default = 0];
+  optional float similarity_strict_inlier_ratio = 30 [default = 0];
+
+  // Average registration error of homography in pixels.
+  // Note: These two parameters default to zero in-case homographies have not
+  // been estimated.
+  optional float average_homography_error = 11;
+
+  // Fraction, in [0,1], of homography inliers.
+  optional float homography_inlier_coverage = 12;
+
+  // Same as above but with stricter threshold.
+  // (For details, see: MotionEstimationOptions::strict_coverage_scale).
+  // Coverage is designed to measure the amount of significant outliers,
+  // which can affect the validity of the estimated homography.
+  // However, it does not discount small outliers, which occur in case
+  // of small rolling shutter wobbles. For this a stricter version of coverage
+  // is needed, which is essential for computing the rolling_shutter_guess,
+  // i.e. the increase in coverage by using mixtures vs. homographies.
+  optional float homography_strict_inlier_coverage = 22;
+
+  // Per-block inlier fraction for mixtures.
+  repeated float mixture_inlier_coverage = 13;
+
+  // Set based on stability analysis indicating if frame is likely to originate
+  // from a rolling shutter camera. (-1 is used to indicate frame was not
+  // tested, e.g. due to mixture deemed unstable for analysis).
+  // Guess is a scaler indicating by how much the mixture models (suitable for
+  // rolling shutter distortions) increased inlier coverage compared to a single
+  // homography. For example a value, of 1.3 indicates, that the mixture models
+  // increased inlier coverage by 30%.
+  // If not -1, range is in [0, inf] (values slightly smaller than 1 are
+  // possible due to suppression of noisy feature tracks during estimation).
+  optional float rolling_shutter_guess = 14;
+
+  // Indicating if CameraMotion is deemed to originate from rolling
+  // shutter camera (index >= 0), and if so, denotes the index in the
+  // mixture_homography_spectrum, where higher indices correspond to heavier
+  // regularized motions. If motion is not deemed to originate from a rolling
+  // shutter camera, index is set to -1.
+  optional int32 rolling_shutter_motion_index = 16 [default = -1];
+
+  // List of overlay indices (cell locations in column major format) over domain
+  // of size overlay_domain x overlay_domain, where
+  // overlay_domain is set by MotionEstimation to
+  // MotionEstimationOptions::OverlayDetectionOptions::analysis_mask_size.
+  // Overlay analysis is performed over chunk of frames, as specified by
+  // MotionEstimationOptions::overlay_analysis_chunk_size, with the resulting
+  // overlay indices being assigned to each frame of the chunk.
+  // Consequently it suffices to store the result only for the first frame
+  // of every chunk. Subsequent frames store a single negative index relative
+  // to the first chunk frame indicating where to locate the overlay indicies.
+  // Specifically if for frame f, overlay_indices(0) == -2, overlay indices for
+  // corresponding chunk can be found at frame f - 2.
+  // For details about how overlay indices are used to flag a frame to contain
+  // an overlay, see MotionFilterOptions::OverlayOptions.
+  repeated int32 overlay_indices = 17;
+  optional int32 overlay_domain = 18 [default = 10];
+
+  // CameraMotion type indicates whether highest degree of freedom (DOF)
+  // model estimation was deemed stable, in which case CameraMotion::Type is set
+  // to VALID.
+  // If a model was deemed not stable (according to *StabilityBounds in
+  // MotionEstimationOptions), it is set to the lower dof type which was deemed
+  // stable.
+  enum Type {
+    VALID = 0;           // All requested motion models estimated reliably.
+    UNSTABLE_HOMOG = 1;  // Fallback to homographies, mixture unreliable.
+    UNSTABLE_SIM = 2;    // Fallback to similarity model, homography
+                         // unreliable.
+    UNSTABLE = 3;        // Fallback to translation model, similarity
+                         // unreliable, legacy naming.
+    INVALID = 4;         // Identity model, translation unreliable.
+  }
+
+  optional Type type = 6 [default = VALID];
+
+  // If set, stores original type in case it was overriden (by filtering
+  // functions, etc.).
+  optional Type overridden_type = 15 [default = VALID];
+
+  // Set of optional *bit* flags set for various purposes.
+  enum Flags {
+    FLAG_SHOT_BOUNDARY = 1;  // Set to indicate presence of a
+                             // shot boundary.
+    FLAG_BLURRY_FRAME = 2;
+    FLAG_MAJOR_OVERLAY = 4;
+    FLAG_SHARP_FRAME = 8;           // Set if frame is considered sharp
+                                    // in a neighborhood of frames.
+    FLAG_SINGULAR_ESTIMATION = 16;  // Indicates that estimation resulted
+                                    // in singular optimization problem.
+                                    // Used internally by MotionEstimation.
+    // Indicates if shot boundary is part of a fade. If so, all frames of the
+    // fade will be labeled with the FLAG but only the begin and end of the fade
+    // will have the FLAG_SHOT_BOUNDARY set.
+    FLAG_SHOT_FADE = 32;
+
+    FLAG_DUPLICATED = 64;     // Set if frame is exact duplicate of
+                              // previous frame.
+    FLAG_CENTER_FRAME = 128;  // Indicates this frame is at the
+                              // center of the sequence. Currently
+                              // used to constrain stabilizing crop
+                              // transform.
+  }
+
+  optional int32 flags = 19 [default = 0];
+
+  // Same as in RegionFlowFeatureList (from region_flow.proto), measures blur
+  // as average cornerness over textured areas. As it depends on the image
+  // content, should only be used relative.
+  optional float blur_score = 20;
+
+  // Quanitifies amount of blur. Specified as ratio w.r.t. sharpest matching
+  // frame, i.e. 1 indicates no blur, values > 1 amount of blur w.r.t. sharpest
+  // frame.
+  optional float bluriness = 21 [default = 0.0];
+
+  // Same as in RegionFlowFeatureList (from region_flow.proto). Stores fraction
+  // of long feature tracks that got rejected for this frame.
+  optional float frac_long_features_rejected = 26;
+
+  // Same as in RegionFlowFeatureList (from region_flow.proto).
+  // Timestamp in micro seconds of the underlying frame.
+  optional int64 timestamp_usec = 27 [default = 0];
+
+  // Same as in RegionFlowFeatureList (from region_flow.proto).
+  // Denotes frame that motion was computed w.r.t. to, locally to the current
+  // frame. Values < 0 indicate backward tracking, while values > 0 indicate
+  // forward tracking. For example, match_frame = -1, indicates tracking is
+  // from current to previous frame.
+  optional int32 match_frame = 28 [default = 0];
+
+  // Deprecated fields.
+  extensions 9;
+}
--- a/mediapipe/util/tracking/flow_packager.cc
+++ b/mediapipe/util/tracking/flow_packager.cc
--- a/mediapipe/util/tracking/flow_packager.h
+++ b/mediapipe/util/tracking/flow_packager.h
@ -0,0 +1,168 @@
+// Copyright 2019 The MediaPipe Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef MEDIAPIPE_UTIL_TRACKING_FLOW_PACKAGER_H_
+#define MEDIAPIPE_UTIL_TRACKING_FLOW_PACKAGER_H_
+
+#include <string>
+#include <vector>
+
+#include "absl/strings/string_view.h"
+#include "mediapipe/framework/port/integral_types.h"
+#include "mediapipe/util/tracking/flow_packager.pb.h"
+#include "mediapipe/util/tracking/motion_estimation.pb.h"
+#include "mediapipe/util/tracking/region_flow.pb.h"
+
+namespace mediapipe {
+
+// Usage (output):
+// FlowPackager flow_packager((FlowPackagerOptions()));
+//
+// // Input: Feature lists and optional camera motion
+// vector<RegionFlowFeatureList> input_features;      // Externally supplied.
+// vector<CameraMotion> input_motions;
+//
+// const int num_frames = input_features.size();
+//
+// // Can encode to either TrackingContainerFormat or use protos.
+// TrackingContainerFormat container;
+// TrackingContainerProto proto;
+//
+// for (int f = 0; f < num_frames; ++f) {
+//   // Obtain tracking data.
+//   TrackingData track_data;
+//   flow_packager.PackFlow(input_features[f],
+//                          input_motions[f],
+//                          &track_data);
+//
+//   // Encode tracking data.
+//   BinaryTrackingData binary_data;
+//   flow_packager.EncodeTrackingData(track_data,
+//                                    &binary_data);
+//
+//   // Add to either container format or proto.
+//   // Container:
+//   TrackingContainer* track_data_encoded = container.add_track_data();
+//   flow_packager.BinaryTrackingDataToContainer(binary_data,
+//                                               track_data_encoded);
+//   // Proto:
+//   proto.add_track_data()->CopyFrom(binary_data);
+// }
+//
+// // Write meta and term containers.
+// flow_packager.FinalizeTrackingContainerFormat(&container);
+// flow_packager.FinalizeTrackingProto(&proto);
+//
+// // Convert to binary std::string to stream out.
+// std::string output;
+// flow_packager.TrackingContainerFormatToBinary(container, &output);
+// // OR:
+// proto.SerializeToString(&output);
+
+// Usage (input):
+// std::string input;
+// FlowPackager flow_packager((FlowPackagerOptions()));
+// TrackingContainerFormat container;
+// flow_packager.TrackingContainerFormatFromBinary(input, &container);
+//
+// // Obtain Track data.
+// vector<TrackingData> tracking_data;    // To be used by tracking.
+// for (const auto& track_data_encoded : container.track_data()) {
+//   tracking_data.push_back(TrackingData());
+//   flow_packager.DecodeTrackingData(track_data_encoded,
+//                                    &tracking_data.back());
+// }
+//
+// // Use tracking_data with Tracker.
+
+class CameraMotion;
+class RegionFlowFeatureList;
+
+class FlowPackager {
+ public:
+  explicit FlowPackager(const FlowPackagerOptions& options);
+  FlowPackager(const FlowPackager&) = delete;
+  FlowPackager& operator=(const FlowPackager&) = delete;
+
+  void PackFlow(const RegionFlowFeatureList& feature_list,
+                const CameraMotion* camera_motion,  // optional.
+                TrackingData* tracking_data) const;
+
+  // Converts TrackingData to condensed binary representation.
+  void EncodeTrackingData(const TrackingData& tracking_data,
+                          BinaryTrackingData* binary_data) const;
+
+  void DecodeTrackingData(const BinaryTrackingData& data,
+                          TrackingData* tracking_data) const;
+
+  void BinaryTrackingDataToContainer(const BinaryTrackingData& binary_data,
+                                     TrackingContainer* container) const;
+
+  void BinaryTrackingDataFromContainer(const TrackingContainer& container,
+                                       BinaryTrackingData* binary_data) const;
+
+  void DecodeMetaData(const TrackingContainer& data, MetaData* meta_ata) const;
+
+  // Fills in meta (first container) and termination data (last container).
+  // Optionally, pass timestamps for each frame.
+  void FinalizeTrackingContainerFormat(
+      std::vector<uint32>* timestamps,  // optional, can be null.
+      TrackingContainerFormat* container_fromat);
+  void FinalizeTrackingContainerProto(
+      std::vector<uint32>* timestamps,  // optional, can be null.
+      TrackingContainerProto* proto);
+
+  // Fast encode to binary representation.
+  void TrackingContainerFormatToBinary(
+      const TrackingContainerFormat& container_format, std::string* binary);
+
+  // Fast decode from binary representation.
+  void TrackingContainerFormatFromBinary(
+      const std::string& binary, TrackingContainerFormat* container_format);
+
+  // Checks whether tracking data can be encoded in high profile mode without
+  // duplicating any features. This occurs if the horizonal distance between two
+  // features is less than 64.
+  bool CompatibleForEncodeWithoutDuplication(
+      const TrackingData& tracking_data) const;
+
+  // Helper function for test. Sorts according to scaled, integer based
+  // lexicographical ordering.
+  // Declared public for access by test.
+  void SortRegionFlowFeatureList(float scale_x, float scale_y,
+                                 RegionFlowFeatureList* feature_list) const;
+
+  // Removes binary encoded container from std::string and parses it to
+  // container. Returns header std::string of the parsed container. Useful for
+  // random seek.
+  std::string SplitContainerFromString(absl::string_view* binary_data,
+                                       TrackingContainer* container);
+
+ private:
+  // Sets meta data for a set
+  void InitializeMetaData(int num_frames, const std::vector<uint32>& msecs,
+                          const std::vector<int>& data_sizes,
+                          MetaData* meta_data) const;
+
+  // Serializes container to binary std::string and adds it to binary_data.
+  void AddContainerToString(const TrackingContainer& container,
+                            std::string* binary_data);
+
+ private:
+  FlowPackagerOptions options_;
+};
+
+}  // namespace mediapipe
+
+#endif  // MEDIAPIPE_UTIL_TRACKING_FLOW_PACKAGER_H_
--- a/mediapipe/util/tracking/flow_packager.proto
+++ b/mediapipe/util/tracking/flow_packager.proto
@ -0,0 +1,319 @@
+// Copyright 2019 The MediaPipe Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+syntax = "proto2";
+
+package mediapipe;
+
+import "mediapipe/util/tracking/motion_models.proto";
+import "mediapipe/util/tracking/region_flow.proto";
+
+// Messages encapsulating compressed and uncompressed TrackingData.
+//
+// Uncompressed tracking data can be aggregated via an TrackingDataChunk
+// (e.g. to be cached to file per chunk). The whole chunk persists in memory
+// after reading.
+//
+// Compressed tracking data can be aggregated as binary encoded TrackingData
+// messages into two container formats (with support for random seeking):
+// 1) TrackingContainerProto:
+//    Encoding using proto buffer wire format, using default proto
+//    serialization and de-serialization to binary string.
+//    The container uses the MetaData message to store the stream offsets and
+//    milliseconds for each frame of TrackingData. TrackingData itself is custom
+//    encoded to binary using FlowPackager::EncodeTrackingData and the resulting
+//    binary blob wrapped in a BinaryTrackingData message.
+// 2) TrackingContainerFormat:
+//    Encoding without any dependencies to protobuffers, for clients without
+//    proto buffer support.
+//    Encoding is based on encoding binary blobs of data wrapped into repeated
+//    containers. The layout of a container is described by the message
+//    TrackingContainer and serialized to binary data as described below
+//    (without using proto encoding). Therefore, message TrackingContainer is
+//    mostly for documentation purposes than for direct use.
+//    The format is described by the proto message TrackingContainerFormat (used
+//    internally by FlowPackager) however serialization and de-serialization
+//    to binary string is performed using custom methods supplied by
+//    FlowPackager (TrackingContainerFormatToBinary and
+//    TrackingContainerFormatFromBinary).
+//    The format stores the MetaData first as above, although using custom
+//    encoding. TrackingData is encoded to binary as above using
+//    FlowPackager::EncodeTrackingData and the resulting binary blob is storred
+//    within a TrackingContainer.
+
+// Next flag: 9
+message TrackingData {
+  enum FrameFlags {
+    FLAG_PROFILE_BASELINE = 0;
+    FLAG_PROFILE_HIGH = 1;
+    FLAG_HIGH_FIDELITY_VECTORS = 2;
+    FLAG_BACKGROUND_UNSTABLE = 4;  // Background model could not be estimated.
+    FLAG_DUPLICATED = 8;           // Frame is duplicated, i.e. identical to
+                                   // previous one.
+    // Indicates the beginning of a new chunk. In this case the track_id's
+    // are not compatible w.r.t. previous one.
+    FLAG_CHUNK_BOUNDARY = 16;
+  }
+
+  optional int32 frame_flags = 1 [default = 0];
+
+  // Tracking data is resolution independent specified w.r.t.
+  // specified domain.
+  optional int32 domain_width = 2;
+  optional int32 domain_height = 3;
+
+  // Aspect ratio (w/h) of the original frame tracking data was computed from.
+  optional float frame_aspect = 6 [default = 1.0];
+
+  optional Homography background_model = 4;
+
+  // Stores num_elements vectors of motion data. (x,y) position encoded via
+  // row_indices and col_starts, as compressed sparse column matrix storage
+  // format:
+  // (https://en.wikipedia.org/wiki/Sparse_matrix#Compressed_sparse_column_.28CSC_or_CCS.29),
+  // Vector data is stored as (dx, dy) position. Optionally we store the fitting
+  // error and track id for each feature.
+  message MotionData {
+    optional int32 num_elements = 1;
+
+    // #num_elements pairs (flow_x, flow_y) densely packed.
+    repeated float vector_data = 2 [packed = true];
+
+    // Stores corresponding track index for each feature. Features belonging
+    // to the same track over time are assigned the same id.
+    // NOTE: Due to size, tracking ids are never stored as compressed binary
+    // tracking data.
+    repeated int32 track_id = 3 [packed = true];
+
+    // # num_elements row indices.
+    repeated int32 row_indices = 4 [packed = true];
+
+    // Start index in above array for each column (#domain_width + 1 entries).
+    repeated int32 col_starts = 5 [packed = true];
+
+    // Feature descriptors for num_elements feature points.
+    repeated BinaryFeatureDescriptor feature_descriptors = 6;
+  }
+
+  optional MotionData motion_data = 5;
+
+  // Total number of features in our analysis
+  optional uint32 global_feature_count = 7;
+
+  // Average of all motion vector magnitudes (without accounting for any motion
+  // model), within 10th to 90th percentile (to remove outliers).
+  optional float average_motion_magnitude = 8;
+}
+
+message TrackingDataChunk {
+  message Item {
+    optional TrackingData tracking_data = 1;
+    // Global frame index.
+    optional int32 frame_idx = 2;
+    // Corresponding timestamp.
+    optional int64 timestamp_usec = 3;
+    // Previous frame timestamp.
+    optional int64 prev_timestamp_usec = 4;
+  }
+
+  repeated Item item = 1;
+
+  // Set as marker for last chunk.
+  optional bool last_chunk = 2 [default = false];
+
+  // Set as marker for first chunk.
+  optional bool first_chunk = 3 [default = false];
+}
+
+// TrackingData in compressed binary format. Obtainable via
+// FlowPackager::EncodeTrackingData. Details of binary encode are below.
+message BinaryTrackingData {  // TrackingContainer::header = "TRAK"
+  optional bytes data = 1;
+}
+
+// Detailed explanation of binary Tracking data encode (LITTLE ENDIAN encode!)
+// TrackingData is stored in binary as a struct of the above fields and the
+// compressed motion data in sparse column matrix storage format.
+// (https://en.wikipedia.org/wiki/Sparse_matrix#Compressed_sparse_column_.28CSC_or_CCS.29)
+// Specifically, TrackingData is encoded as:
+// {  frame_flags        : 32 bit int    (from member)
+//    domain_width       : 32 bit int    (from member)
+//    domain_height      : 32 bit int    (from member)
+//    frame_aspect       : 32 bit float  (from member)
+//
+//    background_model   : 6 * 32 bit float  (dx, dy, a, b, c, d of AffineModel)
+//    scale              : 32 bit float  (scale vectors are multiplied with)
+//    num_vectors        : 32 bit int    (from member num_elements)
+//
+//    col_start_delta    : (domain_width + 1) * 8 bit uint   (col starts delta
+//                                                           encoded)
+//    row_idx_size       : 32 bit int     (size of row_idx array <= num_vectors)
+//    row_idx            : row_idx_size * 8 bit uint
+//    vector_size        : 32 bit int     (size of vector_data)
+//    vector_data        : vector_size * [8 bit | 16 bit] int
+//                         (depending on FLAG_HIGH_FIDELITY_VECTORS)
+// }
+//
+// >> Baseline encode <<
+// Scale is determined such that maximum vector value (maximum across x and y)
+// is mapped to highest 8 bit or 16 bit SIGNED int
+// (i.e. 7 or 15 bit resolution respectively).
+// Vector values are multiplied by this scale (storring float in int with
+// truncation) and (dx, dy) is packed as [dy | dx] into a 16bit or 32 bit word.
+// Unpacking requires therefore dividing the vector values by scale.
+//
+// Column starts are delta compressed, that is, col_start_delta[i] stores
+// col_starts(i) - col_starts(i - 1) from MotionData.
+//
+// Row indices are directly stored at 8 bit uints, that is row_idx_size ==
+// num_vectors in this case.
+//
+//
+// >> High profile encode <<
+// Scale is determined as above but for maximum vector deltas (maximum across x
+// and y of magnitude in difference between two adjacent vectors). Vector value
+// deltas are multiplied by this scale before encoding.
+//
+// Encoding is more complex compared to baseline. Instead of vector value, delta
+// vector values (difference in dx = ddx, difference in dy = ddy)
+// are multiplied by scale and storred packed as [ddy | ddx] into to 16bit or
+// 32bit word. Compression algorithm accounts for error accumulation, so
+// unpacking should first add deltas in integer domain (for x and y separately)
+// and then divide by scale to yield (an approximation) of the
+// original vector value.
+// Most importantly, not every vector value is storred, but only if the delta is
+// above the FlowPackagerOptions::high_profile_reuse_threshold, in which case we
+// advance to the next vector data. Otherwise the previous vector is used.
+//
+// The information whether to advance is stored for each vector in the
+// highest bit of of the row index (FlowPackagerOptions::ADVANCE_FLAG). Row
+// indicies are not storred as in the baseline profile directly, but as deltas
+// (reset at the beginning of every column). As deltas are small it is often
+// possible to store two deltas (if both are < 8) in a single byte. This is
+// indicated by the second highest flag in the row index
+// (FlowPackagerOptions::DOUBLE_INDEX_ENCODE). If set, row index stores
+// [row_delta_1 | row_delta_2] in the lower 6 bit. Note, that the advance flag
+// applies uniformly to both deltas in this case.
+// Sidenote (edge case): Due to the use of the top 2 bits as flags,
+// at times we cannot store the full row delta in the lower 6 bits.
+// In this case the vector is duplicated (using the ADVANCE_FLAG)
+// until the delta sum of duplicated vectors reaches the original delta.
+// Consequently, the compressed vector field in high profile may contain a few
+// vectors more than the original.
+//
+// Column starts are delta compressed as in baseline, but account for double
+// index encodes. Therefore each column delta is reduced by the number of double
+// index encodes occuring for this column. This has to be replicated on the
+// decoding side, each delta needs to be increased by the number of double index
+// encodes encountered during encoding.
+
+// Stores offsets for random seek and time offsets for each frame of
+// TrackingData. Stream offsets are specified relative w.r.t. end of metadata
+// blob.
+// Offsets specify start of the corresponding binary encoded TrackingContainer
+// (for TrackingContainerFormat) or BinaryTrackingData proto (for
+// TrackingContainerProto).
+message MetaData {  // TrackingContainer::header = "META"
+  optional fixed32 num_frames = 2;
+
+  message TrackOffset {
+    optional fixed32 msec = 1;           // Time offset of the metadata in msec.
+    optional fixed32 stream_offset = 2;  // Offset of TrackingContainer or
+                                         // respectively BinaryTrackingData
+                                         // in stream.
+                                         // Specifed w.r.t. end of the Metadata.
+  }
+
+  repeated TrackOffset track_offsets = 3;
+}
+
+// TrackingContainer is self-describing container format to store arbitrary
+// chunks of binary data. Each container is typed via its 4 character header,
+// versioned via an int, and followed by the size of the binary data and the
+// actual data. Designed for clients without availability of protobuffer
+// support.
+// Note: This message is mainly used for documentation purposes and uses custom
+// encoding as specified by FlowPackager::TrackingContainerFormatToBinary.
+// Default binary size of a TrackingContainer (DO NOT CHANGE!):
+// header:    4 byte +
+// version:   4 byte +
+// size:      4 byte +
+// data       #size
+// SUM:       12 + #size.
+message TrackingContainer {
+  optional string header = 1;                  // 4 character header.
+  optional fixed32 version = 2 [default = 1];  // Version information.
+  optional fixed32 size = 3;  // Size of binary data held by container
+  optional bytes data = 4;    // Binary data encoded.
+
+  // DO NOT alter layout of TrackingContainer.
+  // Use version to extend or alter encoded binary data.
+}
+
+// Container format for clients without proto support (written via
+// FlowPackager::TrackingContainerFormatToBinary and read via
+// FlowPackager::TrackingContainerFormatFromBinary).
+// Proto here is intermediate format for documentationa and internal use.
+// Stores multiple TrackingContainers of different types.
+// Meta data is storred first, to facilitate random seek (via stream offset
+// positions) to arbitrary binary TrackinData. Termination container signals end
+// of stream.
+message TrackingContainerFormat {
+  optional TrackingContainer meta_data = 1;   // Wraps binary meta data, via
+                                              // custom encode.
+  repeated TrackingContainer track_data = 2;  // Wraps BinaryTrackingData.
+
+  // Add new TrackingContainers above before end of stream indicator.
+  // Zero sized termination container with TrackingContainer::header = "TERM".
+  optional TrackingContainer term_data = 3;
+}
+
+// Simplified proto format of above TrackingContainerFormat. Instead of using
+// self-describing TrackingContainer's, we simply use the proto wire format for
+// encoding and decoding (proto format is typed and versioned via ids).
+message TrackingContainerProto {
+  optional MetaData meta_data = 1;
+  repeated BinaryTrackingData track_data = 2;
+}
+
+// Options controlling compression and encoding.
+message FlowPackagerOptions {
+  // Tracking data is resolution independent specified w.r.t.
+  // specified domain. Only values <= 256 are supported if binary tracking data
+  // is requested to be supported (see below).
+  optional int32 domain_width = 1 [default = 256];
+  optional int32 domain_height = 2 [default = 192];
+
+  // Needs to be set for calls to FlowPackager::EncodeTrackingData. If encoding
+  // is not required, can be set to false in which case a higher domain_width
+  // can be used.
+  optional bool binary_tracking_data_support = 6 [default = true];
+
+  optional bool use_high_profile = 3 [default = false];
+
+  // If set uses 16 bit encode for vector data, in BinaryTrackingData,
+  // otherwise only 8 bits are used.
+  optional bool high_fidelity_16bit_encode = 4 [default = true];
+
+  // In high profile encode, re-use previously encoded vector when absolute
+  // difference to current vector is below threshold.
+  optional float high_profile_reuse_threshold = 5 [default = 0.5];
+
+  // High profile encoding flags.
+  enum HighProfileEncoding {
+    ADVANCE_FLAG = 0x80;
+    DOUBLE_INDEX_ENCODE = 0x40;
+    INDEX_MASK = 0x3F;
+  }
+}
--- a/mediapipe/util/tracking/frame_selection.proto
+++ b/mediapipe/util/tracking/frame_selection.proto
@ -0,0 +1,93 @@
+// Copyright 2019 The MediaPipe Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+syntax = "proto2";
+
+package mediapipe;
+
+import "mediapipe/util/tracking/camera_motion.proto";
+import "mediapipe/util/tracking/frame_selection_solution_evaluator.proto";
+import "mediapipe/util/tracking/region_flow.proto";
+
+// Stores selected timestamps and corresponding frame index.
+message FrameSelectionTimestamp {
+  optional int64 timestamp = 1;  // Timestamp of the selected frame.
+
+  // Frame index of the selected frame in the initial video stream.  If this
+  // timestamp was manufactured, this will be the index of the initial frame.
+  optional int32 frame_idx = 2;
+
+  // If this timestamp was manufactured, the timestamp of the original frame.
+  optional int64 processed_from_timestamp = 3 [default = -1];
+}
+
+// Stores the result of the frame selection, with composited features.
+// Next index: 6
+message FrameSelectionResult {
+  optional int64 timestamp = 1;  // Timestamp of the selected frame.
+
+  // Frame index of the selected frame in the initial video stream.  If this
+  // timestamp was manufactured, this will be the index of the initial frame.
+  optional int32 frame_idx = 2;
+
+  // CameraMotion from selected item to previous selected item.
+  optional CameraMotion camera_motion = 3;
+
+  // Features from selected item to previous selected item.
+  optional RegionFlowFeatureList features = 4;
+
+  // If this FrameSelectionResult was the result of processing a previous one,
+  // the timestamp of the original frame.
+  optional int64 processed_from_timestamp = 5 [default = -1];
+}
+
+// Next index: 7
+message FrameSelectionCriterion {
+  // Interval at which frames should be sampled; set to zero if sampling should
+  // not be enforced (i.e. selection is performed w.r.t. other criteria).
+  optional int32 sampling_rate = 1 [default = 0];
+
+  // Bandwidth used during dynamic programming. The larger the bandwidth the
+  // more accurate the result w.r.t. the specified sampling rate. Smaller
+  // bandwidth's bias the solution suboptimally to center around the mean
+  // frame numbers of the sampling rate.
+  // If in (0, 1), assumed to specify fraction of total number of input frames,
+  // otherwise must be an integer.
+  optional float bandwidth_frames = 2 [default = 50];
+
+  // Search radius for dynamic programming (how many frames you are allowed to
+  // search around the previous frame).
+  optional int32 search_radius_frames = 3 [default = 1];
+
+  // Allows one to specify custom solution selection criteria (i.e. different
+  // way to choose the best row of the computed cost matrix).
+  optional FrameSelectionSolutionEvaluatorType solution_evaluator = 4;
+
+  // Outputs a fixed number of frames and automatically sets the appropriate
+  // sampling rate. Set to 0 by default (i.e. not enabled).
+  optional int32 max_output_frames = 5 [default = 0];
+}
+
+// Options for computing frame selection.
+// TODO: Support multiple criteria if required. Currently uses only the
+// first one.
+message FrameSelectionOptions {
+  repeated FrameSelectionCriterion criterion = 1;
+
+  // FrameSelection buffers incoming CameraMotions for specified chunk size
+  // and creates cost matrices upon reaching the limit.
+  // TODO: Implement if necessary (currently nothing is cleared upon
+  // reaching the limit).
+  optional int32 chunk_size = 2 [default = 100];
+}
--- a/mediapipe/util/tracking/frame_selection_solution_evaluator.proto
+++ b/mediapipe/util/tracking/frame_selection_solution_evaluator.proto
@ -0,0 +1,27 @@
+// Copyright 2019 The MediaPipe Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+syntax = "proto2";
+
+package mediapipe;
+
+message FrameSelectionSolutionEvaluatorOptions {
+  extensions 20000 to max;
+}
+
+message FrameSelectionSolutionEvaluatorType {
+  // Class of type FrameSelectionSolution that computes the best row.
+  optional string class_name = 1 [default = "FrameSelectionSolutionEvaluator"];
+  optional FrameSelectionSolutionEvaluatorOptions options = 2;
+}
--- a/mediapipe/util/tracking/image_util.cc
+++ b/mediapipe/util/tracking/image_util.cc
@ -0,0 +1,116 @@
+// Copyright 2019 The MediaPipe Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "mediapipe/util/tracking/image_util.h"
+
+#include <algorithm>
+#include <vector>
+
+#include "mediapipe/framework/port/logging.h"
+#include "mediapipe/util/tracking/motion_models.h"
+#include "mediapipe/util/tracking/region_flow.h"
+
+namespace mediapipe {
+
+// Returns median of the L1 color distance between img_1 and img_2
+float FrameDifferenceMedian(const cv::Mat& img_1, const cv::Mat& img_2) {
+  CHECK(img_1.size() == img_2.size());
+  CHECK_EQ(img_1.channels(), img_2.channels());
+
+  std::vector<float> color_diffs;
+  color_diffs.reserve(img_1.cols * img_1.rows);
+  const int channels = img_1.channels();
+  for (int j = 0; j < img_1.rows; ++j) {
+    const uint8* src_1 = img_1.ptr<uint8>(j);
+    const uint8* src_2 = img_2.ptr<uint8>(j);
+    const int end_i = channels * img_1.cols;
+    const float inverse = 1.0f / channels;
+    for (int i = 0; i < end_i;) {
+      float color_diff = 0.0f;
+      for (int k = 0; k < channels; ++k, ++i) {
+        color_diff +=
+            std::abs(static_cast<int>(src_1[i]) - static_cast<int>(src_2[i]));
+      }
+      color_diffs.push_back(color_diff * inverse);
+    }
+  }
+
+  auto median = color_diffs.begin() + color_diffs.size() / 2;
+  std::nth_element(color_diffs.begin(), median, color_diffs.end());
+  return *median;
+}
+
+void JetColoring(int steps, std::vector<Vector3_f>* color_map) {
+  CHECK(color_map != nullptr);
+  color_map->resize(steps);
+  for (int i = 0; i < steps; ++i) {
+    const float frac = 2.0f * (i * (1.0f / steps) - 0.5f);
+    if (frac < -0.8f) {
+      (*color_map)[i] = Vector3_f(0, 0, 0.6f + (frac + 1.0) * 2.0f) * 255.0f;
+    } else if (frac >= -0.8f && frac < -0.25f) {
+      (*color_map)[i] = Vector3_f(0, (frac + 0.8f) * 1.82f, 1.0f) * 255.0f;
+    } else if (frac >= -0.25f && frac < 0.25f) {
+      (*color_map)[i] =
+          Vector3_f((frac + 0.25f) * 2.0f, 1.0f, 1.0 + (frac + 0.25f) * -2.0f) *
+          255.0f;
+    } else if (frac >= 0.25f && frac < 0.8f) {
+      (*color_map)[i] =
+          Vector3_f(1.0f, 1.0f + (frac - 0.25f) * -1.81f, 0.0f) * 255.0f;
+    } else if (frac >= 0.8f) {
+      (*color_map)[i] =
+          Vector3_f(1.0f + (frac - 0.8f) * -2.0f, 0.0f, 0.0f) * 255.0f;
+    } else {
+      LOG(ERROR) << "Out of bound value. Should not occur.";
+    }
+  }
+}
+
+void RenderSaliency(const SalientPointFrame& salient_points,
+                    const cv::Scalar& line_color, int line_thickness,
+                    bool render_bounding_box, cv::Mat* image) {
+  // Visualize salient points.
+  for (const auto& point : salient_points.point()) {
+    if (point.weight() > 0) {
+      SalientPoint copy = point;
+      ScaleSalientPoint(image->cols, image->rows, &copy);
+      Vector2_f pt(copy.norm_point_x(), copy.norm_point_y());
+
+      cv::ellipse(*image, cv::Point(pt.x(), pt.y()),
+                  cv::Size(copy.norm_major(), copy.norm_minor()),
+                  copy.angle() / M_PI * 180.0,
+                  0,    // start angle
+                  360,  // end angle
+                  line_color, line_thickness);
+
+      if (render_bounding_box) {
+        std::vector<Vector2_f> ellipse_bounding_box;
+        BoundingBoxFromEllipse(pt, copy.norm_major(), copy.norm_minor(),
+                               copy.angle(), &ellipse_bounding_box);
+
+        std::vector<cv::Point> corners;
+        corners.reserve(4);
+        for (const Vector2_f& corner : ellipse_bounding_box) {
+          corners.emplace_back(cv::Point(corner.x(), corner.y()));
+        }
+
+        for (int k = 0; k < 4; ++k) {
+          cv::line(*image, corners[k], corners[(k + 1) % 4], line_color,
+                   line_thickness, CV_AA);
+        }
+      }
+    }
+  }
+}
+
+}  // namespace mediapipe
--- a/mediapipe/util/tracking/image_util.h
+++ b/mediapipe/util/tracking/image_util.h
@ -0,0 +1,192 @@
+// Copyright 2019 The MediaPipe Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef MEDIAPIPE_UTIL_TRACKING_IMAGE_UTIL_H_
+#define MEDIAPIPE_UTIL_TRACKING_IMAGE_UTIL_H_
+
+#include <vector>
+
+#include "mediapipe/framework/port/opencv_core_inc.h"
+#include "mediapipe/framework/port/opencv_imgproc_inc.h"
+#include "mediapipe/framework/port/vector.h"
+#include "mediapipe/util/tracking/motion_models.pb.h"
+#include "mediapipe/util/tracking/region_flow.pb.h"
+
+namespace mediapipe {
+
+// Returns median of the L1 color distance between img_1 and img_2.
+float FrameDifferenceMedian(const cv::Mat& img_1, const cv::Mat& img_2);
+
+// Matlab's jet color map (returned assuming RGB channel order in [0, 1]
+// normalized intensity domain). For details: http://goo.gl/gmHKZ
+// Returned as map with num_entries entries.
+void JetColoring(int num_entries, std::vector<Vector3_f>* color_map);
+
+// Draws a saliency point frame to a single frame.
+// Optionally renders axis aligned bounding box for each SalientPointFrame.
+void RenderSaliency(const SalientPointFrame& salient_points,
+                    const cv::Scalar& line_color, int line_thickness,
+                    bool render_bounding_box, cv::Mat* image);
+
+// Templated CopyBorder methods for increased speed. In-place border copy
+// for specified Mat of type T with channels. Passed matrix is assumed to be of
+// full size, that is we copy the content at [border, cols - 2 * border] x
+// [border, rows - 2 * border] to the full size.
+template <typename T, int border, int channels>
+void CopyMatBorder(cv::Mat* mat);
+
+// Same as above for copying border only in X or Y
+template <typename T, int border, int channels>
+void CopyMatBorderX(cv::Mat* mat);
+template <typename T, int border, int channels>
+void CopyMatBorderY(cv::Mat* mat);
+
+template <typename T, int border, int channels>
+void CopyMatBorder(cv::Mat* mat) {
+  const int width = mat->cols - 2 * border;
+  const int height = mat->rows - 2 * border;
+
+  // Maximum values we clamp at to avoid going out of bound small images.
+  const int max_w = width - 1;
+  const int max_h = height - 1;
+
+  // Top rows.
+  for (int r = 0; r < border; ++r) {
+    const T* src_ptr =
+        mat->ptr<T>(border + std::min(r, max_h)) + border * channels;
+    T* dst_ptr = mat->ptr<T>(border - 1 - r);
+
+    // Top left elems.
+    for (int i = 0; i < border; ++i, dst_ptr += channels) {
+      for (int j = 0; j < channels; ++j) {
+        dst_ptr[j] = src_ptr[std::min(max_w, border - 1 - i) * channels + j];
+      }
+    }
+
+    // src and dst should point to same column from here.
+    DCHECK_EQ(0, (src_ptr - dst_ptr) * sizeof(T) % mat->step[0]);
+
+    // Top row copy.
+    memcpy(dst_ptr, src_ptr, width * channels * sizeof(dst_ptr[0]));
+    src_ptr += width * channels;  // Points one behind end.
+    dst_ptr += width * channels;
+
+    // Top right elems.
+    for (int i = 0; i < border; ++i, dst_ptr += channels) {
+      if (i <= max_w) {
+        src_ptr -= channels;
+      }
+      for (int j = 0; j < channels; ++j) {
+        dst_ptr[j] = src_ptr[j];
+      }
+    }
+  }
+
+  // Left and right border.
+  for (int r = 0; r < height; ++r) {
+    // Get pointers to left most and right most column within image.
+    T* left_ptr = mat->ptr<T>(r + border) + border * channels;
+    T* right_ptr = left_ptr + (width - 1) * channels;
+    for (int i = 0; i < border; ++i) {
+      for (int j = 0; j < channels; ++j) {
+        left_ptr[-(i + 1) * channels + j] =
+            left_ptr[std::min(max_w, i) * channels + j];
+        right_ptr[(i + 1) * channels + j] =
+            right_ptr[-std::min(max_w, i) * channels + j];
+      }
+    }
+  }
+
+  // Bottom rows.
+  for (int r = 0; r < border; ++r) {
+    const T* src_ptr = mat->ptr<T>(border + height - 1 - std::min(r, max_h)) +
+                       border * channels;
+    T* dst_ptr = mat->ptr<T>(border + height + r);
+
+    // First elems.
+    for (int i = 0; i < border; ++i, dst_ptr += channels) {
+      for (int j = 0; j < channels; ++j) {
+        dst_ptr[j] = src_ptr[(border - 1 - std::min(max_w, i)) * channels + j];
+      }
+    }
+
+    // src and dst should point to same column from here.
+    DCHECK_EQ(0, (dst_ptr - src_ptr) * sizeof(T) % mat->step[0]);
+    memcpy(dst_ptr, src_ptr, width * channels * sizeof(dst_ptr[0]));
+    src_ptr += width * channels;  // Points one behind the end.
+    dst_ptr += width * channels;
+
+    // Top right elems.
+    for (int i = 0; i < border; ++i, dst_ptr += channels) {
+      if (i <= max_w) {
+        src_ptr -= channels;
+      }
+      for (int j = 0; j < channels; ++j) {
+        dst_ptr[j] = src_ptr[j];
+      }
+    }
+  }
+}
+
+template <typename T, int border, int channels>
+void CopyMatBorderX(cv::Mat* mat) {
+  const int width = mat->cols - 2 * border;
+  const int height = mat->rows - 2 * border;
+
+  // Maximum values we clamp at to avoid going out of bound small images.
+  const int max_w = width - 1;
+
+  // Left and right border.
+  for (int r = 0; r < height; ++r) {
+    T* left_ptr = mat->ptr<T>(r + border) + border * channels;
+    T* right_ptr = left_ptr + (width - 1) * channels;
+    for (int i = 0; i < border; ++i) {
+      for (int j = 0; j < channels; ++j) {
+        left_ptr[-(i + 1) * channels + j] =
+            left_ptr[std::min(i, max_w) * channels + j];
+        right_ptr[(i + 1) * channels + j] =
+            right_ptr[-std::min(max_w, i) * channels + j];
+      }
+    }
+  }
+}
+
+template <typename T, int border, int channels>
+void CopyMatBorderY(cv::Mat* mat) {
+  const int width = mat->cols - 2 * border;
+  const int height = mat->rows - 2 * border;
+
+  // Maximum values we clamp at to avoid going out of bound small images.
+  const int max_h = height - 1;
+
+  // Top rows.
+  for (int r = 0; r < border; ++r) {
+    const T* src_ptr =
+        mat->ptr<T>(border + std::min(max_h, r)) + border * channels;
+    T* dst_ptr = mat->ptr<T>(border - 1 - r) + border * channels;
+    memcpy(dst_ptr, src_ptr, width * channels * sizeof(dst_ptr[0]));
+  }
+
+  // Bottom rows.
+  for (int r = 0; r < border; ++r) {
+    const T* src_ptr = mat->ptr<T>(border + height - 1 - std::min(max_h, r)) +
+                       border * channels;
+    T* dst_ptr = mat->ptr<T>(border + height + r) + border * channels;
+    memcpy(dst_ptr, src_ptr, width * channels * sizeof(dst_ptr[0]));
+  }
+}
+
+}  // namespace mediapipe
+
+#endif  // MEDIAPIPE_UTIL_TRACKING_IMAGE_UTIL_H_
--- a/mediapipe/util/tracking/image_util_test.cc
+++ b/mediapipe/util/tracking/image_util_test.cc
@ -0,0 +1,93 @@
+// Copyright 2019 The MediaPipe Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "mediapipe/util/tracking/image_util.h"
+
+#include "mediapipe/framework/port/gtest.h"
+
+namespace mediapipe {
+
+template <int border>
+void TestCopyBorder(cv::Mat* full_size) {
+  full_size->setTo(0);
+
+  const int width = full_size->cols - 2 * border;
+  // Fill center with per pixel value [1, 2, 3].
+  for (int y = border; y < full_size->rows - border; ++y) {
+    float* row_ptr = full_size->ptr<float>(y);
+    for (int x = border; x < full_size->cols - border; ++x) {
+      int value = x - border;
+      row_ptr[3 * x] = value + 0.1f;
+      row_ptr[3 * x + 1] = value + 0.2f;
+      row_ptr[3 * x + 2] = value + 0.3f;
+    }
+  }
+
+  CopyMatBorder<float, border, 3>(full_size);
+
+  // Main part should not be modified.
+  for (int y = border; y < full_size->rows - border; ++y) {
+    float* row_ptr = full_size->ptr<float>(y);
+    for (int x = border; x < full_size->cols - border; ++x) {
+      int value = x - border;
+      EXPECT_EQ(row_ptr[3 * x], value + 0.1f);
+      EXPECT_EQ(row_ptr[3 * x + 1], value + 0.2f);
+      EXPECT_EQ(row_ptr[3 * x + 2], value + 0.3f);
+    }
+  }
+
+  for (int y = 0; y < full_size->rows; ++y) {
+    const float* row_ptr = full_size->ptr<float>(y);
+    for (int x = 0; x < full_size->cols; ++x) {
+      // Memory copy, expect exact floating point equality.
+      int value = x - border;
+      if (x < border) {
+        // Cap to valid area.
+        value = std::min(width - 1, (border - 1) - x);
+      } else if (x >= full_size->cols - border) {
+        // Last column minus distance from frame boundary, capped
+        // to valid area.
+        value = std::max(0, width - 1 - (x - full_size->cols + border));
+      }
+      EXPECT_EQ(row_ptr[3 * x], value + 0.1f);
+      EXPECT_EQ(row_ptr[3 * x + 1], value + 0.2f);
+      EXPECT_EQ(row_ptr[3 * x + 2], value + 0.3f);
+    }
+  }
+}
+
+TEST(PushPullFilteringTest, CopyBorder) {
+  cv::Mat full_size(100, 50, CV_32FC3);
+  TestCopyBorder<1>(&full_size);
+  TestCopyBorder<2>(&full_size);
+  TestCopyBorder<3>(&full_size);
+  TestCopyBorder<4>(&full_size);
+  TestCopyBorder<5>(&full_size);
+}
+
+TEST(PushPullFilteringTest, CopyBorderSmallFrame) {
+  cv::Mat full_size(3, 3, CV_32FC3);
+  TestCopyBorder<1>(&full_size);
+
+  full_size.create(5, 5, CV_32FC3);
+  TestCopyBorder<2>(&full_size);
+
+  full_size.create(7, 7, CV_32FC3);
+  TestCopyBorder<3>(&full_size);
+
+  full_size.create(9, 9, CV_32FC3);
+  TestCopyBorder<4>(&full_size);
+}
+
+}  // namespace mediapipe
--- a/mediapipe/util/tracking/measure_time.cc
+++ b/mediapipe/util/tracking/measure_time.cc
@ -0,0 +1,22 @@
+// Copyright 2019 The MediaPipe Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "mediapipe/util/tracking/measure_time.h"
+
+#ifdef SET_FLAG_MEASURE_TIME
+// If set to true, outputs time measurements to LOG(INFO).
+bool flags_measure_time = true;
+#else
+bool flags_measure_time = false;
+#endif
--- a/mediapipe/util/tracking/measure_time.h
+++ b/mediapipe/util/tracking/measure_time.h
@ -0,0 +1,169 @@
+// Copyright 2019 The MediaPipe Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Helper class and macro to take time measurements within current scope.
+// Takes time measurement within current scope. Outputs to LOG(INFO) if
+// flag --measure_time is set or if build flag SET_FLAG_MEASURE_TIME is
+// defined (add --copt=-DSET_FLAG_MEASURE_TIME to your build command).
+// Additionally you can limit time measurements to specific files,
+// via the flag
+// --measure_time_filter="<comma separated list of substrings of file names>"
+// Example:
+// {     // Scope to be measured
+//   MEASURE_TIME << "Some additional logging and answers : " << 42;
+//   ...  // instructions.
+// }
+
+#ifndef MEDIAPIPE_UTIL_TRACKING_MEASURE_TIME_H_
+#define MEDIAPIPE_UTIL_TRACKING_MEASURE_TIME_H_
+
+#include <memory>
+#include <sstream>
+
+#include "absl/strings/str_split.h"
+#include "absl/strings/string_view.h"
+#include "absl/synchronization/mutex.h"
+#include "absl/time/clock.h"
+#include "mediapipe/framework/port/integral_types.h"
+#include "mediapipe/framework/port/logging.h"
+
+extern bool flags_measure_time;
+
+namespace mediapipe {
+class MeasureTimeFilter;
+}  // namespace mediapipe
+
+#define MEASURE_TIME \
+  MEASURE_TIME_PRE_IMPL(flags_measure_time, __LINE__, __FILE__)
+
+// Add level of indirection to trigger __LINE__ macro expansion.
+#define MEASURE_TIME_PRE_IMPL(show_output, line, file) \
+  MEASURE_TIME_IMPL(show_output, line, file)
+
+#define MEASURE_TIME_IMPL(show_output, line, file)                      \
+  std::unique_ptr<mediapipe::ScopedWallTimer> scoped_wall_timer_##line; \
+  const bool activated##line = show_output;                             \
+  if (activated##line) {                                                \
+    static mediapipe::ScopedWallTimer::Accumulator*                     \
+        scoped_wall_timer_accum_##line =                                \
+            new mediapipe::ScopedWallTimer::Accumulator;                \
+    scoped_wall_timer_##line.reset(new mediapipe::ScopedWallTimer(      \
+        file, line, show_output, scoped_wall_timer_accum_##line));      \
+  }                                                                     \
+  if (activated##line) /* NOLINT */                                     \
+  scoped_wall_timer_##line->stream()
+
+namespace mediapipe {
+
+class ScopedWallTimer {
+ public:
+  // Helper class for accumulating time across multiple calls to a scoped wall
+  // timer. Thread-safe (except on ANDROID, for which Mutex is mocked out).
+  class Accumulator {
+   public:
+    Accumulator() : accum_time_(0.0f), count_(0) {}
+    Accumulator(const Accumulator&) = delete;
+    Accumulator& operator=(const Accumulator&) = delete;
+
+    // Accumulates passed_time into accumulators. Returns total time and number
+    // of calls made.
+    void Accumulate(double passed_time, double* accum_time, int* count) {
+      absl::MutexLock lock(&mutex_);
+      accum_time_ += passed_time;
+      ++count_;
+      *accum_time = accum_time_;
+      *count = count_;
+    }
+
+   private:
+    double accum_time_;
+    int count_;
+    absl::Mutex mutex_;
+  };
+
+  // Creates a new ScopedWallTimer for current file and line. LogMessage is only
+  // initialized if show_output is set to true.
+  ScopedWallTimer(const char* file, int line, bool show_output,
+                  Accumulator* accumulator)
+      : file_(file),
+        line_(line),
+        show_output_(show_output),
+        accumulator_(accumulator) {
+    if (show_output_) {
+      CHECK(accumulator_);
+      start_time_ = GetWallTime();
+    }
+  }
+  ScopedWallTimer(const ScopedWallTimer&) = delete;
+  ScopedWallTimer& operator=(const ScopedWallTimer&) = delete;
+
+  // Destructor measures time and outputs to stream.
+  ~ScopedWallTimer() {
+    if (show_output_) {
+      double passed_time = GetWallTime() - start_time_;
+      double accum_time = 0.0;
+      int count = 0;
+      accumulator_->Accumulate(passed_time, &accum_time, &count);
+      LOG(INFO) << stream_.str() << " TIMES: [Curr: " << passed_time * 1e-6
+                << " ms, "
+                << "Avg: " << accum_time * 1e-6 / std::max(1, count) << " ms, "
+                << count << " calls]";
+    }
+  }
+
+  std::ostream& stream() { return stream_; }
+
+ private:
+  const char* file_;
+  int line_;
+  bool show_output_;
+  // We need to buffer information passed via stream operator <<
+  // While LogMessage is adequate for this, no good equivalent exists on
+  // Android, so we employ a portable ostringstream for buffering.
+  std::ostringstream stream_;
+  int64 start_time_;
+  Accumulator* accumulator_;
+
+  int64 GetWallTime() { return absl::GetCurrentTimeNanos(); }
+};
+
+class MeasureTimeFilter {
+ public:
+  static const MeasureTimeFilter* get() {
+    static MeasureTimeFilter instance;
+    return &instance;
+  }
+  MeasureTimeFilter(const MeasureTimeFilter&) = delete;
+  MeasureTimeFilter& operator=(const MeasureTimeFilter&) = delete;
+
+  bool Matches(const std::string& item) const {
+    for (const std::string& match_item : match_items_) {
+      if (item.find(match_item) != std::string::npos) {
+        return true;
+      }
+    }
+    return false;
+  }
+
+ private:
+  explicit MeasureTimeFilter() {}
+  explicit MeasureTimeFilter(const std::string& filter) {
+    match_items_ = absl::StrSplit(filter, absl::ByChar(','));
+  }
+  std::vector<std::string> match_items_;
+};
+
+}  // namespace mediapipe
+
+#endif  // MEDIAPIPE_UTIL_TRACKING_MEASURE_TIME_H_
--- a/mediapipe/util/tracking/motion_analysis.cc
+++ b/mediapipe/util/tracking/motion_analysis.cc
@ -0,0 +1,850 @@
+// Copyright 2019 The MediaPipe Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "mediapipe/util/tracking/motion_analysis.h"
+
+#include <algorithm>
+#include <cmath>
+#include <cstring>
+#include <deque>
+#include <memory>
+
+#include "absl/strings/str_format.h"
+#include "mediapipe/framework/port/integral_types.h"
+#include "mediapipe/framework/port/logging.h"
+#include "mediapipe/framework/port/vector.h"
+#include "mediapipe/util/tracking/camera_motion.h"
+#include "mediapipe/util/tracking/camera_motion.pb.h"
+#include "mediapipe/util/tracking/image_util.h"
+#include "mediapipe/util/tracking/measure_time.h"
+#include "mediapipe/util/tracking/motion_saliency.pb.h"
+#include "mediapipe/util/tracking/region_flow.h"
+#include "mediapipe/util/tracking/region_flow.pb.h"
+#include "mediapipe/util/tracking/region_flow_computation.h"
+#include "mediapipe/util/tracking/region_flow_computation.pb.h"
+#include "mediapipe/util/tracking/region_flow_visualization.h"
+
+namespace mediapipe {
+
+MotionAnalysis::MotionAnalysis(const MotionAnalysisOptions& options,
+                               int frame_width, int frame_height)
+    : options_(options),
+      frame_width_(frame_width),
+      frame_height_(frame_height) {
+  // Init options by policy.
+  InitPolicyOptions();
+  // Merge back in any overriden options.
+  options_.MergeFrom(options);
+
+  region_flow_computation_.reset(new RegionFlowComputation(
+      options_.flow_options(), frame_width_, frame_height_));
+  motion_estimation_.reset(new MotionEstimation(options_.motion_options(),
+                                                frame_width_, frame_height_));
+
+  if (options_.compute_motion_saliency()) {
+    motion_saliency_.reset(new MotionSaliency(options_.saliency_options(),
+                                              frame_width_, frame_height_));
+
+    // Compute overlap needed between clips if filtering or smoothing of
+    // saliency is requested.
+    if (options_.select_saliency_inliers()) {
+      // 1.65 sigmas in each direction = 90% of variance is captured.
+      overlap_size_ = std::max<int>(
+          overlap_size_, options_.saliency_options().selection_frame_radius());
+    }
+
+    if (options_.filter_saliency()) {
+      overlap_size_ = std::max<int>(
+          overlap_size_,
+          options_.saliency_options().filtering_sigma_time() * 1.65f);
+    }
+  }
+
+  long_feature_stream_.reset(new LongFeatureStream);
+
+  frame_num_ = 0;
+
+  // Determine if feature descriptors need to be computed.
+  // Required for irls smoothing, overlay detection and mixture homographies.
+  const bool compute_mixtures =
+      options_.motion_options().mix_homography_estimation() !=
+      MotionEstimationOptions::ESTIMATION_HOMOG_MIX_NONE;
+
+  const bool use_spatial_bias =
+      options_.motion_options().estimation_policy() ==
+          MotionEstimationOptions::TEMPORAL_LONG_FEATURE_BIAS &&
+      options_.motion_options().long_feature_bias_options().use_spatial_bias();
+
+  compute_feature_descriptors_ =
+      options_.post_irls_smoothing() ||
+      options_.motion_options().overlay_detection() || compute_mixtures ||
+      use_spatial_bias;
+
+  if (compute_feature_descriptors_) {
+    CHECK_EQ(RegionFlowComputationOptions::FORMAT_RGB,
+             options_.flow_options().image_format())
+        << "Feature descriptors only support RGB currently.";
+    prev_frame_.reset(new cv::Mat(frame_height_, frame_width_, CV_8UC3));
+  }
+
+  // Setup streaming buffer. By default we buffer features and motion.
+  // If saliency is computed, also buffer saliency and filtered/smoothed
+  // output_saliency.
+  std::vector<TaggedType> data_config{
+      TaggedPointerType<RegionFlowFeatureList>("features"),
+      TaggedPointerType<CameraMotion>("motion")};
+  std::vector<TaggedType> data_config_saliency = data_config;
+  data_config_saliency.push_back(
+      TaggedPointerType<SalientPointFrame>("saliency"));
+  data_config_saliency.push_back(
+      TaggedPointerType<SalientPointFrame>("output_saliency"));
+
+  // Store twice the overlap.
+  buffer_.reset(new StreamingBuffer(
+      options_.compute_motion_saliency() ? data_config_saliency : data_config,
+      2 * overlap_size_));
+}
+
+void MotionAnalysis::InitPolicyOptions() {
+  auto* flow_options = options_.mutable_flow_options();
+  auto* tracking_options = flow_options->mutable_tracking_options();
+  auto* motion_options = options_.mutable_motion_options();
+  auto* feature_bias_options =
+      motion_options->mutable_long_feature_bias_options();
+  auto* translation_bounds =
+      motion_options->mutable_stable_translation_bounds();
+  auto* similarity_bounds = motion_options->mutable_stable_similarity_bounds();
+  auto* homography_bounds = motion_options->mutable_stable_homography_bounds();
+
+  switch (options_.analysis_policy()) {
+    case MotionAnalysisOptions::ANALYSIS_POLICY_LEGACY:
+      break;
+
+    case MotionAnalysisOptions::ANALYSIS_POLICY_VIDEO:
+      // Long track settings. Temporally consistent.
+      options_.set_estimation_clip_size(64);
+
+      tracking_options->set_internal_tracking_direction(
+          TrackingOptions::FORWARD);
+      tracking_options->set_tracking_policy(
+          TrackingOptions::POLICY_LONG_TRACKS);
+
+      feature_bias_options->set_use_spatial_bias(false);
+      feature_bias_options->set_seed_priors_from_bias(true);
+
+      similarity_bounds->set_min_inlier_fraction(0.15f);
+      homography_bounds->set_min_inlier_coverage(0.25f);
+      flow_options->set_verify_long_features(false);
+
+      // Better features.
+      tracking_options->set_adaptive_features_levels(3);
+      tracking_options->set_use_cv_tracking_algorithm(true);
+
+      // Speed.
+      flow_options->set_downsample_mode(
+          RegionFlowComputationOptions::DOWNSAMPLE_BY_SCHEDULE);
+
+      motion_options->set_estimation_policy(
+          MotionEstimationOptions::TEMPORAL_LONG_FEATURE_BIAS);
+      motion_options->set_feature_density_normalization(true);
+      motion_options->set_overlay_detection(true);
+      motion_options->set_domain_limited_irls_scaling(true);
+      motion_options->set_irls_weights_preinitialized(true);
+      motion_options->mutable_irls_initialization()->set_activated(true);
+      motion_options->mutable_long_feature_initialization()->set_activated(
+          true);
+
+      break;
+
+    case MotionAnalysisOptions::ANALYSIS_POLICY_VIDEO_MOBILE:
+      // Long track settings. Temporally consistent.
+      options_.set_estimation_clip_size(32);
+      tracking_options->set_internal_tracking_direction(
+          TrackingOptions::FORWARD);
+      tracking_options->set_tracking_policy(
+          TrackingOptions::POLICY_LONG_TRACKS);
+
+      motion_options->set_estimation_policy(
+          MotionEstimationOptions::TEMPORAL_LONG_FEATURE_BIAS);
+      motion_options->set_feature_density_normalization(true);
+      motion_options->set_domain_limited_irls_scaling(true);
+      motion_options->mutable_irls_initialization()->set_activated(true);
+      motion_options->mutable_long_feature_initialization()->set_activated(
+          true);
+
+      feature_bias_options->set_use_spatial_bias(false);
+      feature_bias_options->set_seed_priors_from_bias(true);
+
+      similarity_bounds->set_inlier_threshold(2.0);
+      similarity_bounds->set_min_inlier_fraction(0.15f);
+
+      // Speed for mobile.
+      tracking_options->set_tracking_window_size(8);
+      tracking_options->set_tracking_iterations(8);
+      tracking_options->set_fractional_tracking_distance(0.1);
+      tracking_options->set_reuse_features_max_frame_distance(15);
+      tracking_options->set_max_features(500);
+      tracking_options->set_use_cv_tracking_algorithm(true);
+
+      flow_options->set_downsample_mode(
+          RegionFlowComputationOptions::DOWNSAMPLE_TO_MIN_SIZE);
+      flow_options->set_round_downsample_factor(true);
+      flow_options->set_downsampling_size(256);
+      flow_options->set_pre_blur_sigma(0);
+      flow_options->set_top_inlier_sets(1);
+      flow_options->set_ransac_rounds_per_region(10);
+      flow_options->mutable_visual_consistency_options()
+          ->set_compute_consistency(false);
+      flow_options->set_verify_long_features(false);
+
+      // Avoid resetting features if a large amount is flagged.
+      // In that case verify feature tracks to avoid pertubing the estimation.
+      flow_options->set_verify_long_feature_acceleration(true);
+      flow_options->set_verification_distance(5.0);
+      flow_options->set_verify_long_feature_trigger_ratio(0.1);
+
+      motion_options->set_use_exact_homography_estimation(false);
+      motion_options->set_use_highest_accuracy_for_normal_equations(false);
+
+      break;
+
+    case MotionAnalysisOptions::ANALYSIS_POLICY_CAMERA_MOBILE:
+      // Long track settings.
+      tracking_options->set_internal_tracking_direction(
+          TrackingOptions::FORWARD);
+      tracking_options->set_tracking_policy(
+          TrackingOptions::POLICY_LONG_TRACKS);
+
+      motion_options->set_estimation_policy(
+          MotionEstimationOptions::TEMPORAL_IRLS_MASK);
+      motion_options->set_feature_density_normalization(true);
+      motion_options->set_domain_limited_irls_scaling(true);
+      motion_options->mutable_irls_initialization()->set_activated(true);
+      motion_options->mutable_irls_initialization()->set_rounds(50);
+
+      feature_bias_options->set_use_spatial_bias(false);
+      feature_bias_options->set_seed_priors_from_bias(true);
+
+      similarity_bounds->set_inlier_threshold(2.0);
+      similarity_bounds->set_min_inlier_fraction(0.15f);
+
+      // Speed for mobile.
+      tracking_options->set_tracking_window_size(8);
+      tracking_options->set_tracking_iterations(8);
+      tracking_options->set_fractional_tracking_distance(0.1);
+      tracking_options->set_reuse_features_max_frame_distance(10);
+      tracking_options->set_reuse_features_min_survived_frac(0.6);
+      tracking_options->set_max_features(240);
+      tracking_options->set_adaptive_tracking_distance(true);
+      tracking_options->set_use_cv_tracking_algorithm(true);
+
+      // Assumes downsampled input.
+      flow_options->set_pre_blur_sigma(0);
+      flow_options->set_top_inlier_sets(1);
+      flow_options->set_ransac_rounds_per_region(10);
+      flow_options->mutable_visual_consistency_options()
+          ->set_compute_consistency(false);
+      flow_options->set_verify_long_features(false);
+
+      motion_options->set_use_exact_homography_estimation(false);
+      motion_options->set_use_highest_accuracy_for_normal_equations(false);
+
+      // Low latency.
+      options_.set_estimation_clip_size(1);
+      break;
+
+    case MotionAnalysisOptions::ANALYSIS_POLICY_HYPERLAPSE:
+      // Long track settings. Temporally consistent.
+      options_.set_estimation_clip_size(64);
+
+      tracking_options->set_internal_tracking_direction(
+          TrackingOptions::FORWARD);
+      tracking_options->set_tracking_policy(
+          TrackingOptions::POLICY_LONG_TRACKS);
+
+      feature_bias_options->set_use_spatial_bias(false);
+      feature_bias_options->set_seed_priors_from_bias(true);
+
+      // Relaxed stability bounds.
+      translation_bounds->set_max_motion_stdev(0.15);
+      translation_bounds->set_max_acceleration(60.0);
+      translation_bounds->set_frac_max_motion_magnitude(0.25);
+
+      similarity_bounds->set_min_inlier_fraction(0.02f);
+      similarity_bounds->set_min_inliers(5);
+      similarity_bounds->set_lower_scale(0.5);
+      similarity_bounds->set_upper_scale(2.0);
+      similarity_bounds->set_limit_rotation(0.5);
+
+      homography_bounds->set_lower_scale(0.5);
+      homography_bounds->set_upper_scale(2.0);
+      homography_bounds->set_limit_rotation(0.5);
+      homography_bounds->set_limit_perspective(0.004);
+      homography_bounds->set_min_inlier_coverage(0.1);
+
+      // Relax constraining of homographies.
+      motion_options->set_lin_sim_inlier_threshold(0.02);
+      // Anticipate much higher registration errors.
+      motion_options->set_irls_motion_magnitude_fraction(0.2);
+
+      flow_options->set_verify_long_features(false);
+
+      // Better features.
+      tracking_options->set_adaptive_features_levels(3);
+
+      // Speed.
+      tracking_options->set_use_cv_tracking_algorithm(true);
+      flow_options->set_downsample_mode(
+          RegionFlowComputationOptions::DOWNSAMPLE_BY_SCHEDULE);
+
+      // Less quality features.
+      flow_options->set_absolute_inlier_error_threshold(4);
+      flow_options->set_relative_inlier_error_threshold(0.2);
+
+      motion_options->set_estimation_policy(
+          MotionEstimationOptions::TEMPORAL_LONG_FEATURE_BIAS);
+      motion_options->set_feature_density_normalization(true);
+
+      // TODO: This needs to be activated and then tighten relaxed
+      // thresholds above again.
+      //  motion_options->set_domain_limited_irls_scaling(true);
+
+      motion_options->set_irls_weights_preinitialized(true);
+      motion_options->mutable_irls_initialization()->set_activated(true);
+
+      // More weight towards long feature tracks.
+      motion_options->mutable_long_feature_initialization()->set_activated(
+          true);
+      motion_options->mutable_long_feature_initialization()
+          ->set_min_length_percentile(0.975);
+      motion_options->mutable_long_feature_initialization()
+          ->set_upweight_multiplier(50);
+
+      // 2nd pass, no need to do overlay again.
+      motion_options->set_overlay_detection(false);
+
+      break;
+  }
+}
+
+bool MotionAnalysis::AddFrame(const cv::Mat& frame, int64 timestamp_usec,
+                              RegionFlowFeatureList* feature_list) {
+  return AddFrameWithSeed(frame, timestamp_usec, Homography(), feature_list);
+}
+
+bool MotionAnalysis::AddFrameWithSeed(const cv::Mat& frame,
+                                      int64 timestamp_usec,
+                                      const Homography& initial_transform,
+                                      RegionFlowFeatureList* feature_list) {
+  return AddFrameGeneric(frame, timestamp_usec, initial_transform,
+                         nullptr,  // rejection_transform
+                         nullptr,  // external features
+                         nullptr,  // feature modification function
+                         feature_list);
+}
+
+bool MotionAnalysis::AddFrameGeneric(
+    const cv::Mat& frame, int64 timestamp_usec,
+    const Homography& initial_transform, const Homography* rejection_transform,
+    const RegionFlowFeatureList* external_features,
+    std::function<void(RegionFlowFeatureList*)>* modify_features,
+    RegionFlowFeatureList* output_feature_list) {
+  // Don't check input sizes here, RegionFlowComputation does that based
+  // on its internal options.
+  CHECK(feature_computation_) << "Calls to AddFrame* can NOT be mixed "
+                              << "with AddFeatures";
+
+  // Compute RegionFlow.
+  {
+    MEASURE_TIME << "CALL RegionFlowComputation::AddImage";
+    if (!region_flow_computation_->AddImageWithSeed(frame, timestamp_usec,
+                                                    initial_transform)) {
+      LOG(ERROR) << "Error while computing region flow.";
+      return false;
+    }
+  }
+
+  // Buffer features.
+  std::unique_ptr<RegionFlowFeatureList> feature_list;
+  {
+    MEASURE_TIME << "CALL RegionFlowComputation::RetrieveRegionFlowFeatureList";
+    const bool compute_feature_match_descriptors =
+        compute_feature_descriptors_ && frame_num_ > 0;
+
+    // Determine which region flow should be computed in case multi frame
+    // tracking has been requested.
+    int max_track_index = 0;
+    if (options_.flow_options().tracking_options().tracking_policy() ==
+        TrackingOptions::POLICY_MULTI_FRAME) {
+      max_track_index = std::min(
+          options_.flow_options().tracking_options().multi_frames_to_track() -
+              1,
+          options_.track_index());
+    }
+
+    feature_list.reset(
+        region_flow_computation_->RetrieveMultiRegionFlowFeatureList(
+            std::min(std::max(0, frame_num_ - 1), max_track_index),
+            compute_feature_descriptors_, compute_feature_match_descriptors,
+            &frame,
+            compute_feature_match_descriptors ? prev_frame_.get() : nullptr));
+
+    if (feature_list == nullptr) {
+      LOG(ERROR) << "Error retrieving feature list.";
+      return false;
+    }
+  }
+
+  if (external_features) {
+    constexpr int kTrackIdShift = 1 << 20;
+    for (const auto& external_feat : external_features->feature()) {
+      auto* new_feat = feature_list->add_feature();
+      *new_feat = external_feat;
+      if (new_feat->track_id() >= 0) {
+        new_feat->set_track_id(new_feat->track_id() + kTrackIdShift);
+      }
+    }
+  }
+
+  if (rejection_transform) {
+    RegionFlowFeatureList tmp_list;
+    tmp_list.mutable_feature()->Swap(feature_list->mutable_feature());
+
+    for (const auto& feature : tmp_list.feature()) {
+      const Vector2_f diff =
+          TransformPoint(*rejection_transform, FeatureLocation(feature)) -
+          FeatureMatchLocation(feature);
+      if (diff.Norm() < options_.rejection_transform_threshold()) {
+        *feature_list->add_feature() = feature;
+      }
+    }
+  }
+
+  if (output_feature_list) {
+    *output_feature_list = *feature_list;
+  }
+
+  if (modify_features) {
+    (*modify_features)(feature_list.get());
+  }
+
+  buffer_->EmplaceDatum("features", feature_list.release());
+
+  // Store frame for next call.
+  if (compute_feature_descriptors_) {
+    frame.copyTo(*prev_frame_);
+  }
+
+  ++frame_num_;
+
+  return true;
+}
+
+void MotionAnalysis::AddFeatures(const RegionFlowFeatureList& features) {
+  feature_computation_ = false;
+  buffer_->EmplaceDatum("features", new RegionFlowFeatureList(features));
+
+  ++frame_num_;
+}
+
+void MotionAnalysis::EnqueueFeaturesAndMotions(
+    const RegionFlowFeatureList& features, const CameraMotion& motion) {
+  feature_computation_ = false;
+  CHECK(buffer_->HaveEqualSize({"motion", "features"}))
+      << "Can not be mixed with other Add* calls";
+  buffer_->EmplaceDatum("features", new RegionFlowFeatureList(features));
+  buffer_->EmplaceDatum("motion", new CameraMotion(motion));
+}
+
+cv::Mat MotionAnalysis::GetGrayscaleFrameFromResults() {
+  return region_flow_computation_->GetGrayscaleFrameFromResults();
+}
+
+int MotionAnalysis::GetResults(
+    bool flush, std::vector<std::unique_ptr<RegionFlowFeatureList>>* features,
+    std::vector<std::unique_ptr<CameraMotion>>* camera_motion,
+    std::vector<std::unique_ptr<SalientPointFrame>>* saliency) {
+  MEASURE_TIME << "GetResults";
+
+  const int num_features_lists = buffer_->BufferSize("features");
+  const int num_new_feature_lists = num_features_lists - overlap_start_;
+  CHECK_GE(num_new_feature_lists, 0);
+
+  if (!flush && num_new_feature_lists < options_.estimation_clip_size()) {
+    // Nothing to compute, return.
+    return 0;
+  }
+
+  const bool compute_saliency = options_.compute_motion_saliency();
+  CHECK_EQ(compute_saliency, saliency != nullptr)
+      << "Computing saliency requires saliency output and vice versa";
+
+  // Estimate motions for newly buffered RegionFlowFeatureLists, which also
+  // computes IRLS feature weights for foreground estimation, if needed
+  // (otherwise could be externally added).
+  const int num_motions_to_compute =
+      buffer_->BufferSize("features") - buffer_->BufferSize("motion");
+
+  if (num_motions_to_compute > 0) {
+    std::vector<CameraMotion> camera_motions;
+    std::vector<RegionFlowFeatureList*> feature_lists;
+    for (int k = overlap_start_; k < num_features_lists; ++k) {
+      feature_lists.push_back(
+          buffer_->GetMutableDatum<RegionFlowFeatureList>("features", k));
+    }
+
+    // TODO: Result should be vector of unique_ptr.
+    motion_estimation_->EstimateMotionsParallel(
+        options_.post_irls_smoothing(), &feature_lists, &camera_motions);
+
+    // Add solution to buffer.
+    for (const auto& motion : camera_motions) {
+      buffer_->EmplaceDatum("motion", new CameraMotion(motion));
+    }
+  }
+
+  CHECK(buffer_->HaveEqualSize({"features", "motion"}));
+
+  if (compute_saliency) {
+    ComputeSaliency();
+  }
+
+  return OutputResults(flush, features, camera_motion, saliency);
+}
+
+int MotionAnalysis::OutputResults(
+    bool flush, std::vector<std::unique_ptr<RegionFlowFeatureList>>* features,
+    std::vector<std::unique_ptr<CameraMotion>>* camera_motion,
+    std::vector<std::unique_ptr<SalientPointFrame>>* saliency) {
+  const bool compute_saliency = options_.compute_motion_saliency();
+  CHECK_EQ(compute_saliency, saliency != nullptr)
+      << "Computing saliency requires saliency output and vice versa";
+  CHECK(buffer_->HaveEqualSize({"features", "motion"}));
+
+  // Discard prev. overlap (already output, just used for filtering here).
+  buffer_->DiscardData(buffer_->AllTags(), prev_overlap_start_);
+  prev_overlap_start_ = 0;
+
+  // Output only frames not part of the overlap.
+  const int num_output_frames =
+      std::max(0, buffer_->MaxBufferSize() - (flush ? 0 : overlap_size_));
+
+  if (features) {
+    features->reserve(num_output_frames);
+  }
+  if (camera_motion) {
+    camera_motion->reserve(num_output_frames);
+  }
+  if (saliency) {
+    saliency->reserve(num_output_frames);
+  }
+
+  // Need to retain twice the overlap, for frames in that segment we need
+  // to create a copy.
+  const int new_overlap_start =
+      std::max(0, num_output_frames - (flush ? 0 : overlap_size_));
+
+  // Populate output.
+  for (int k = 0; k < num_output_frames; ++k) {
+    std::unique_ptr<RegionFlowFeatureList> out_features;
+    std::unique_ptr<CameraMotion> out_motion;
+    std::unique_ptr<SalientPointFrame> out_saliency;
+
+    if (k >= new_overlap_start) {
+      // Create copy.
+      out_features.reset(new RegionFlowFeatureList(
+          *buffer_->GetDatum<RegionFlowFeatureList>("features", k)));
+      out_motion.reset(
+          new CameraMotion(*buffer_->GetDatum<CameraMotion>("motion", k)));
+    } else {
+      // Release datum.
+      out_features =
+          buffer_->ReleaseDatum<RegionFlowFeatureList>("features", k);
+      out_motion = buffer_->ReleaseDatum<CameraMotion>("motion", k);
+    }
+
+    // output_saliency is temporary so we never need to buffer it.
+    if (compute_saliency) {
+      out_saliency =
+          buffer_->ReleaseDatum<SalientPointFrame>("output_saliency", k);
+    }
+
+    if (options_.subtract_camera_motion_from_features()) {
+      std::vector<RegionFlowFeatureList*> feature_view{out_features.get()};
+      SubtractCameraMotionFromFeatures({*out_motion}, &feature_view);
+    }
+
+    if (features != nullptr) {
+      features->push_back(std::move(out_features));
+    }
+    if (camera_motion != nullptr) {
+      camera_motion->push_back(std::move(out_motion));
+    }
+    if (saliency != nullptr) {
+      saliency->push_back(std::move(out_saliency));
+    }
+  }
+
+  // Reset for next chunk.
+  prev_overlap_start_ = num_output_frames - new_overlap_start;
+  CHECK_GE(prev_overlap_start_, 0);
+
+  CHECK(buffer_->TruncateBuffer(flush));
+
+  overlap_start_ = buffer_->MaxBufferSize();
+  return num_output_frames;
+}
+
+void MotionAnalysis::RenderResults(const RegionFlowFeatureList& feature_list,
+                                   const CameraMotion& motion,
+                                   const SalientPointFrame* saliency,
+                                   cv::Mat* rendered_results) {
+#ifndef NO_RENDERING
+  CHECK(rendered_results != nullptr);
+  CHECK_EQ(frame_width_, rendered_results->cols);
+  CHECK_EQ(frame_height_, rendered_results->rows);
+
+  const auto viz_options = options_.visualization_options();
+
+  // Visualize flow features if requested.
+  if (viz_options.visualize_region_flow_features()) {
+    cv::Scalar inlier_color(0, 255, 0);
+    cv::Scalar outlier_color(255, 0, 0);
+    if (feature_list.long_tracks()) {
+      long_feature_stream_->AddFeatures(feature_list,
+                                        true,   // Check connectivity.
+                                        true);  // Purge non present ones.
+
+      VisualizeLongFeatureStream(
+          *long_feature_stream_, inlier_color, outlier_color,
+          viz_options.min_long_feature_track(),
+          viz_options.max_long_feature_points(), 1.0f, 1.0f, rendered_results);
+    } else {
+      VisualizeRegionFlowFeatures(feature_list, inlier_color, outlier_color,
+                                  true, 1.0f, 1.0f, rendered_results);
+    }
+  }
+
+  if (saliency != nullptr && viz_options.visualize_salient_points()) {
+    static const cv::Scalar kColor(255, 0, 0);
+    RenderSaliency(*saliency, kColor, viz_options.line_thickness(), false,
+                   rendered_results);
+  }
+
+  if (viz_options.visualize_blur_analysis_region()) {
+    VisualizeBlurAnalysisRegions(rendered_results);
+  }
+
+  if (viz_options.visualize_stats()) {
+    // Output general stats.
+    std::string hud_text = absl::StrFormat(
+        "H-cvg %.2f | H-err %4.2f | Avg.t %3.1f | dx: %+2.1f dy: %+2.1f "
+        "Feat# %4d | %s | ",
+        motion.homography_inlier_coverage(), motion.average_homography_error(),
+        motion.average_magnitude(), motion.translation().dx(),
+        motion.translation().dy(), feature_list.feature_size(),
+        CameraMotionTypeToString(motion));
+    hud_text += CameraMotionFlagToString(motion);
+
+    const float text_scale = frame_width_ * 5.e-4;
+    cv::putText(*rendered_results, hud_text,
+                cv::Point(0.02 * frame_width_, 0.975 * frame_height_),
+                cv::FONT_HERSHEY_SIMPLEX, text_scale, cv::Scalar::all(255),
+                text_scale * 3, CV_AA);
+
+    cv::putText(*rendered_results,
+                absl::StrFormat("%6d", motion.timestamp_usec() / 1000),
+                cv::Point(0.9 * frame_width_, 0.05 * frame_height_),
+                cv::FONT_HERSHEY_SIMPLEX, text_scale, cv::Scalar::all(255),
+                text_scale * 3, CV_AA);
+  }
+#else
+  LOG(FATAL) << "Code stripped out because of NO_RENDERING";
+#endif
+}
+
+void MotionAnalysis::ComputeDenseForeground(
+    const RegionFlowFeatureList& feature_list,
+    const CameraMotion& camera_motion, cv::Mat* foreground_mask) {
+  const auto& foreground_options = options_.foreground_options();
+
+  if (foreground_push_pull_ == nullptr) {
+    foreground_push_pull_.reset(
+        new PushPullFilteringC1(cv::Size(frame_width_, frame_height_),
+                                PushPullFilteringC1::BINOMIAL_5X5, false,
+                                nullptr,    // Gaussian filter weights only.
+                                nullptr,    // No mip map visualizer.
+                                nullptr));  // No weight adjustment.
+  }
+
+  // Determine foreground weights for each features.
+  std::vector<float> foreground_weights;
+  ForegroundWeightsFromFeatures(
+      feature_list, foreground_options.foreground_threshold(),
+      foreground_options.foreground_gamma(),
+      foreground_options.threshold_coverage_scaling() ? &camera_motion
+                                                      : nullptr,
+      &foreground_weights);
+
+  // Setup push pull map (with border). Ensure constructor used the right type.
+  CHECK(foreground_push_pull_->filter_type() ==
+            PushPullFilteringC1::BINOMIAL_5X5 ||
+        foreground_push_pull_->filter_type() ==
+            PushPullFilteringC1::GAUSSIAN_5X5);
+
+  cv::Mat foreground_map(frame_height_ + 4, frame_width_ + 4, CV_32FC2);
+  std::vector<Vector2_f> feature_locations;
+  std::vector<cv::Vec<float, 1>> feature_irls;
+
+  for (int feat_idx = 0; feat_idx < foreground_weights.size(); ++feat_idx) {
+    // Skip marked outliers.
+    if (foreground_weights[feat_idx] == 0) {
+      continue;
+    }
+
+    feature_locations.push_back(
+        FeatureLocation(feature_list.feature(feat_idx)));
+    feature_irls.push_back(cv::Vec<float, 1>(foreground_weights[feat_idx]));
+  }
+
+  // Run push pull.
+  foreground_push_pull_->PerformPushPull(feature_locations, feature_irls, 0.2,
+                                         cv::Point2i(0, 0),
+                                         0,        // Default read out level.
+                                         nullptr,  // Uniform weights.
+                                         nullptr,  // No bilateral term.
+                                         &foreground_map);
+
+  // Convert to grayscale output.
+  foreground_mask->create(frame_height_, frame_width_, CV_8U);
+  for (int i = 0; i < frame_height_; ++i) {
+    const float* src_ptr = foreground_map.ptr<float>(i);
+    uint8* dst_ptr = foreground_mask->ptr<uint8>(i);
+    for (int j = 0; j < frame_width_; ++j) {
+      // Result is in first channel (second is confidence).
+      dst_ptr[j] =
+          std::max(0, std::min(255, static_cast<int>(src_ptr[2 * j] * 255.0f)));
+    }
+  }
+}
+
+void MotionAnalysis::VisualizeDenseForeground(const cv::Mat& foreground_mask,
+                                              cv::Mat* output) {
+  CHECK(output != nullptr);
+  CHECK(foreground_mask.size() == output->size());
+  // Map foreground measure to color (green by default).
+  std::vector<Vector3_f> color_map;
+  if (options_.visualization_options().foreground_jet_coloring()) {
+    JetColoring(1000, &color_map);
+  } else {
+    color_map.resize(1000, Vector3_f(0, 255, 0));
+  }
+
+  auto clamp = [](int value) -> int {
+    return std::max(0, std::min(255, value));
+  };
+
+  // Burn-in alpha compositing.
+  const float alpha = 1.3f;
+  for (int i = 0; i < frame_height_; ++i) {
+    uint8* image_ptr = output->ptr<uint8>(i);
+    const uint8* foreground_ptr = foreground_mask.ptr<uint8>(i);
+
+    for (int j = 0; j < frame_width_; ++j) {
+      const float norm_foreground = foreground_ptr[j] * (1.0 / 255.0f);
+      const float foreground = norm_foreground * alpha;
+      const float alpha_denom = 1.0f / (1.0f + foreground);
+
+      Vector3_f color =
+          color_map[static_cast<int>(norm_foreground * (color_map.size() - 1))];
+
+      image_ptr[3 * j] =
+          clamp((image_ptr[3 * j] + color[0] * foreground) * alpha_denom);
+      image_ptr[3 * j + 1] =
+          clamp((image_ptr[3 * j + 1] + color[1] * foreground) * alpha_denom);
+      image_ptr[3 * j + 2] =
+          clamp((image_ptr[3 * j + 2] + color[2] * foreground) * alpha_denom);
+    }
+  }
+}
+
+void MotionAnalysis::VisualizeBlurAnalysisRegions(cv::Mat* input_view) {
+  CHECK(input_view != nullptr);
+
+  cv::Mat intensity;
+  cv::cvtColor(*input_view, intensity, CV_RGB2GRAY);
+  cv::Mat corner_values;
+  cv::cornerMinEigenVal(intensity, corner_values, 3);
+
+  cv::Mat mask;
+  region_flow_computation_->ComputeBlurMask(*input_view, &corner_values, &mask);
+
+  cv::Mat mask_3c;
+  cv::cvtColor(mask, mask_3c, CV_GRAY2RGB);
+  cv::addWeighted(*input_view, 0.5, mask_3c, 0.5, -128, *input_view);
+}
+
+void MotionAnalysis::ComputeSaliency() {
+  MEASURE_TIME << "Saliency computation.";
+  CHECK_EQ(overlap_start_, buffer_->BufferSize("saliency"));
+
+  const int num_features_lists = buffer_->BufferSize("features");
+
+  // Compute saliency only for newly buffered RegionFlowFeatureLists.
+  for (int k = overlap_start_; k < num_features_lists; ++k) {
+    std::vector<float> foreground_weights;
+    ForegroundWeightsFromFeatures(
+        *buffer_->GetDatum<RegionFlowFeatureList>("features", k),
+        options_.foreground_options().foreground_threshold(),
+        options_.foreground_options().foreground_gamma(),
+        options_.foreground_options().threshold_coverage_scaling()
+            ? buffer_->GetDatum<CameraMotion>("motion", k)
+            : nullptr,
+        &foreground_weights);
+
+    std::unique_ptr<SalientPointFrame> saliency(new SalientPointFrame());
+    motion_saliency_->SaliencyFromFeatures(
+        *buffer_->GetDatum<RegionFlowFeatureList>("features", k),
+        &foreground_weights, saliency.get());
+
+    buffer_->AddDatum("saliency", std::move(saliency));
+  }
+
+  CHECK(buffer_->HaveEqualSize({"features", "motion", "saliency"}));
+
+  // Clear output saliency and copy from saliency.
+  buffer_->DiscardDatum("output_saliency",
+                        buffer_->BufferSize("output_saliency"));
+
+  for (int k = 0; k < buffer_->BufferSize("saliency"); ++k) {
+    std::unique_ptr<SalientPointFrame> copy(new SalientPointFrame());
+    *copy = *buffer_->GetDatum<SalientPointFrame>("saliency", k);
+    buffer_->AddDatum("output_saliency", std::move(copy));
+  }
+
+  // Create view.
+  std::vector<SalientPointFrame*> saliency_view =
+      buffer_->GetMutableDatumVector<SalientPointFrame>("output_saliency");
+
+  // saliency_frames are filtered after this point and ready for output.
+  if (options_.select_saliency_inliers()) {
+    motion_saliency_->SelectSaliencyInliers(&saliency_view, false);
+  }
+
+  if (options_.filter_saliency()) {
+    motion_saliency_->FilterMotionSaliency(&saliency_view);
+  }
+}
+
+}  // namespace mediapipe
--- a/mediapipe/util/tracking/motion_analysis.h
+++ b/mediapipe/util/tracking/motion_analysis.h
@ -0,0 +1,229 @@
+// Copyright 2019 The MediaPipe Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Module for performing motion analysis on a video stream including computing
+// locally filtered (robust) feature tracking, camera motion estimation, and
+// dense foreground saliency estimation.
+// Module buffers frames internally (using an adaptive overlap to achieve
+// temporal consistency):
+//
+// Usage example:
+//
+// MotionAnalysisOptions options();
+// // Should be always be a multiple 8 for optimal parallel performance
+// options.set_estimation_clip_size(16);
+// MotionAnalysis motion_analysis(options, 960, 540);
+
+// std::vector<cv::Mat> input_frames(N);
+// // Define output vectors.
+// std::vector<std::unique_ptr<RegionFlowFeatureList>> features;
+// std::vector<std::unique_ptr<CameraMotion>> camera_motion;
+// std::vector<std::unique_ptr<SalientPointFrame>> saliency;
+// std::vector<cv::Mat> rendered_results);   // Should to be initialized with
+//                                           // frame.
+//
+// for (int k = 0; k < N; ++k) {
+//   motion_analysis.AddFrame(input_frames[k], 0);
+//   // Outputs results, if new ones are available.
+//   // Output will be all of the same lengths (Length returned by function).
+//   if (motion_analysis.GetResults(k + 1 == N,         // Flush, force output.
+//                                  &features,
+//                                  &camera_motion,
+//                                  &saliency) > 0) {            // Optional.
+//      // Optionally render at i'th frame.
+//      motion_analysis.RenderResults(*features[i],
+//                                    *camera_motion[i],
+//                                    saliency[i].get(),
+//                                    &rendered_results[i]);
+//
+//      // Output results...
+//    }
+// }
+
+#ifndef MEDIAPIPE_UTIL_TRACKING_MOTION_ANALYSIS_H_
+#define MEDIAPIPE_UTIL_TRACKING_MOTION_ANALYSIS_H_
+
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "mediapipe/framework/port/opencv_core_inc.h"
+#include "mediapipe/util/tracking/camera_motion.pb.h"
+#include "mediapipe/util/tracking/motion_analysis.pb.h"
+#include "mediapipe/util/tracking/motion_estimation.h"
+#include "mediapipe/util/tracking/motion_estimation.pb.h"
+#include "mediapipe/util/tracking/motion_saliency.h"
+#include "mediapipe/util/tracking/push_pull_filtering.h"
+#include "mediapipe/util/tracking/region_flow.h"
+#include "mediapipe/util/tracking/region_flow.pb.h"
+#include "mediapipe/util/tracking/region_flow_computation.h"
+#include "mediapipe/util/tracking/streaming_buffer.h"
+
+namespace mediapipe {
+
+typedef PushPullFiltering<1, FilterWeightMultiplierOne> PushPullFlowC1;
+
+class MotionAnalysis {
+ public:
+  MotionAnalysis(const MotionAnalysisOptions& options, int frame_width,
+                 int frame_height);
+  ~MotionAnalysis() = default;
+  MotionAnalysis(const MotionAnalysis&) = delete;
+  MotionAnalysis& operator=(const MotionAnalysis&) = delete;
+
+  // Call with every frame. Timestamp is optional (set to zero if not needed).
+  // Optionally outputs list of features extracted from this frame.
+  // Returns true on success.
+  bool AddFrame(const cv::Mat& frame, int64 timestamp_usec,
+                RegionFlowFeatureList* feature_list = nullptr);
+
+  // Same as above, but uses specified initial transform to seed
+  // feature locations.
+  bool AddFrameWithSeed(const cv::Mat& frame, int64 timestamp_usec,
+                        const Homography& initial_transform,
+                        RegionFlowFeatureList* feature_list = nullptr);
+
+  // Generic function to perform motion analysis on the passed frame,
+  // with initial_transform used as seed.
+  // Optionally accepts external_feature to be added to the computed ones,
+  // and to reject all features that do not agree with the rejection transform
+  // within a specified threshold (rejection_transform_threshold in options).
+  // Also allows to modify feature locations before motion estimation by
+  // supplying appropiate callback - this is invoked *after* the rejection
+  // transform.
+  // Returns list of features extracted from this frame, *before* any
+  // modification is applied. To yield modified features, simply
+  // apply modify_features function to returned result.
+  bool AddFrameGeneric(
+      const cv::Mat& frame, int64 timestamp_usec,
+      const Homography& initial_transform,
+      const Homography* rejection_transform = nullptr,
+      const RegionFlowFeatureList* external_features = nullptr,
+      std::function<void(RegionFlowFeatureList*)>* modify_features = nullptr,
+      RegionFlowFeatureList* feature_list = nullptr);
+
+  // Instead of tracking passed frames, uses result directly as supplied by
+  // features. Can not be mixed with above AddFrame* calls.
+  void AddFeatures(const RegionFlowFeatureList& features);
+
+  // Instead of tracking and computing camera motions, simply adds precomputed
+  // features and camera motions to the internal buffers. Can not be mixed
+  // with above Add* calls.
+  // This is useful for just computing saliency via GetResults.
+  void EnqueueFeaturesAndMotions(const RegionFlowFeatureList& features,
+                                 const CameraMotion& motion);
+
+  // Returns motion results (features, camera motions and saliency, all
+  // optional).
+  // Call after every AddFrame for optimal performance.
+  // Returns number of available results. Note, this call with often return
+  // zero, and only return results (multiple in this case) when chunk boundaries
+  // are reached. The actual number returned depends on various smoothing
+  // settings for saliency and features.
+  // Set flush to true, to force output of all results (e.g. when the end of the
+  // video stream is reached).
+  // Note: Passing a non-zero argument for saliency, requires
+  // MotionAnalysisOptions::compute_motion_saliency to be set and
+  // vice versa. (CHECKED)
+  int GetResults(
+      bool flush,  // Forces output.
+      std::vector<std::unique_ptr<RegionFlowFeatureList>>* features = nullptr,
+      std::vector<std::unique_ptr<CameraMotion>>* camera_motion = nullptr,
+      std::vector<std::unique_ptr<SalientPointFrame>>* saliency = nullptr);
+
+  // Exposes the grayscale image frame from the most recently created region
+  // flow tracking data.
+  cv::Mat GetGrayscaleFrameFromResults();
+
+  // Renders features and saliency to rendered_results based on
+  // VisualizationOptions onto pre-initialized rendered_results (in most cases
+  // you want to create a copy of the input frame).
+  // NOTE:
+  // If features are requested to be rendered, this function should be
+  // called serially with each frame, or wrong feature location might be
+  // rendered.
+  void RenderResults(const RegionFlowFeatureList& features,
+                     const CameraMotion& camera_motion,
+                     const SalientPointFrame* saliency,  // Optional.
+                     cv::Mat* rendered_results);
+
+  // Determines dense foreground mask from features.
+  // Returns foreground mask as CV_8U image, indicating propability of
+  // foreground.
+  void ComputeDenseForeground(const RegionFlowFeatureList& feature_list,
+                              const CameraMotion& camera_motion,
+                              cv::Mat* foreground_mask);
+
+  // Overlays foreground mask over output as green burn in, or with
+  // jet_coloring (based on options).
+  void VisualizeDenseForeground(const cv::Mat& foreground_mask,
+                                cv::Mat* output);
+
+  // Masks out regions from input that are not used for blur analysis.
+  void VisualizeBlurAnalysisRegions(cv::Mat* input);
+
+  // Number of frames/features added so far.
+  int NumFrames() const { return frame_num_; }
+
+ private:
+  void InitPolicyOptions();
+
+  // Compute saliency from buffered features and motions.
+  void ComputeSaliency();
+
+  // Outputs computed results from the streaming buffer to the optional
+  // output args. Also performs overlap handling.
+  int OutputResults(
+      bool flush,  // Forces output.
+      std::vector<std::unique_ptr<RegionFlowFeatureList>>* features = nullptr,
+      std::vector<std::unique_ptr<CameraMotion>>* camera_motion = nullptr,
+      std::vector<std::unique_ptr<SalientPointFrame>>* saliency = nullptr);
+
+  MotionAnalysisOptions options_;
+  int frame_width_ = 0;
+  int frame_height_ = 0;
+  int frame_num_ = 0;
+
+  // Internal objects for actual motion analysis.
+  std::unique_ptr<RegionFlowComputation> region_flow_computation_;
+  std::unique_ptr<MotionEstimation> motion_estimation_;
+  std::unique_ptr<MotionSaliency> motion_saliency_;
+  std::unique_ptr<PushPullFlowC1> foreground_push_pull_;
+  // Used for visualization if long feature tracks are present.
+  std::unique_ptr<LongFeatureStream> long_feature_stream_;
+
+  std::unique_ptr<StreamingBuffer> buffer_;
+
+  // Indicates where previous overlap in above buffers starts (earlier data is
+  // just to improve smoothing).
+  int prev_overlap_start_ = 0;
+  // Indicates where actual overlap starts (data after this has not been
+  // output).
+  int overlap_start_ = 0;
+
+  // Buffers previous frame.
+  std::unique_ptr<cv::Mat> prev_frame_;
+
+  bool compute_feature_descriptors_ = false;
+
+  // Amount of overlap between clips. Determined from saliency smoothing
+  // and filtering options.
+  int overlap_size_ = 0;
+
+  bool feature_computation_ = true;
+};
+
+}  // namespace mediapipe
+
+#endif  // MEDIAPIPE_UTIL_TRACKING_MOTION_ANALYSIS_H_
--- a/mediapipe/util/tracking/motion_analysis.proto
+++ b/mediapipe/util/tracking/motion_analysis.proto
@ -0,0 +1,154 @@
+// Copyright 2019 The MediaPipe Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+syntax = "proto2";
+
+package mediapipe;
+
+import "mediapipe/util/tracking/motion_estimation.proto";
+import "mediapipe/util/tracking/motion_saliency.proto";
+import "mediapipe/util/tracking/region_flow_computation.proto";
+
+// Settings for MotionAnalysis. This class computes sparse, locally consistent
+// flow (referred to as region flow), camera motions, and foreground saliency
+// (i.e. likely foreground objects moving different from the background).
+// Next tag: 16
+message MotionAnalysisOptions {
+  // Pre-configured policies for MotionAnalysis.
+  // For general use, it is recommended to select an appropiate policy
+  // instead of customizing flow and motion options by hand.
+  // Policies are being kept up to date with appropiate settings.
+  enum AnalysisPolicy {
+    // Default legacy options. Effectivley no op.
+    ANALYSIS_POLICY_LEGACY = 0;
+
+    // Use for video.
+    ANALYSIS_POLICY_VIDEO = 1;
+
+    // Use for video on mobile.
+    ANALYSIS_POLICY_VIDEO_MOBILE = 2;
+
+    // Use if applied to camera stream on mobile, e.g.
+    // low latency and high throughput.
+    // ASSUMES DOWNSAMPLED INPUT, e.g. from GPU.
+    ANALYSIS_POLICY_CAMERA_MOBILE = 3;
+
+    // Use for sped up video / hyperlapse when adding frames with seeds
+    // and rejection transforms. Mostly ups temporal consistency weights
+    // and relaxes stability constraints.
+    // Only recommended to be used as second pass after initial MotionAnalysis
+    // and FrameSelection.
+    ANALYSIS_POLICY_HYPERLAPSE = 4;
+  }
+
+  optional AnalysisPolicy analysis_policy = 14
+      [default = ANALYSIS_POLICY_LEGACY];
+
+  // Options for the actual motion stabilization
+  // (in order of object usage).
+  optional RegionFlowComputationOptions flow_options = 1;
+  optional MotionEstimationOptions motion_options = 2;
+  optional MotionSaliencyOptions saliency_options = 3;
+
+  // Clip-size used for (parallelized) motion estimation.
+  optional int32 estimation_clip_size = 4 [default = 16];
+
+  // If set, camera motion is subtracted from features before output.
+  // Effectively outputs, residual motion w.r.t. background.
+  optional bool subtract_camera_motion_from_features = 5 [default = false];
+
+  // If flow_options().tracking_options().tracking_policy() equals
+  // POLICY_MULTI_FRAME, this flag indicates which RegionFlowFeatureList to use.
+  // Specifically, for frame C, we use the motion from C to C - 1 - track_index.
+  optional int32 track_index = 6 [default = 0];
+
+  // If set, compute motion saliency (regions of moving foreground).
+  optional bool compute_motion_saliency = 7 [default = false];
+
+  // Selects saliency inliers (only saliency locations with sufficient
+  // spatial and temporal support are kept).
+  // Only applied when compute_motion_saliency is set.
+  optional bool select_saliency_inliers = 8 [default = true];
+
+  // Performs spatio-temporal filtering of extracted foreground saliency. If
+  // used with above selection of saliency inliers, filtering is performed
+  // *after* inlier selection.
+  // Only applied when compute_motion_saliency is set.
+  optional bool filter_saliency = 9 [default = true];
+
+  // If set, irls weights of motion estimation are spatio-temporally smoothed
+  // after model estimation.
+  optional bool post_irls_smoothing = 10 [default = false];
+
+  // If a rejection_transform is passed to AddFrameGeneric, features that
+  // do not agree with the transform within below threshold are removed.
+  optional float rejection_transform_threshold = 13 [default = 20.0];
+
+  // Adapts visualization for rendered_results when passed to GetResults.
+  message VisualizationOptions {
+    // Visualizes tracked region flow features, colored w.r.t. fitting error.
+    optional bool visualize_region_flow_features = 1 [default = true];
+
+    // Visualizes salient points. Only applicable is compute_motion_saliency is
+    // set to true.
+    optional bool visualize_salient_points = 2 [default = false];
+
+    // Line thickness of ellipse when rendering salient points.
+    optional int32 line_thickness = 5 [default = 4];
+
+    // Instead of green burn in uses jet coloring to indicate magnitude of
+    // foreground motion.
+    optional bool foreground_jet_coloring = 3 [default = false];
+
+    // If set, only keeps masks of pixels that is used for blur analysis, rest
+    // is set to zero.
+    optional bool visualize_blur_analysis_region = 4 [default = false];
+
+    optional bool visualize_stats = 6 [default = true];
+
+    // Only long feature tracks with specified minimum length are rendered.
+    // Set to zero to consider all tracks.
+    optional int32 min_long_feature_track = 7 [default = 0];
+
+    // Only the last N points of a long feature track are rendered. Set to zero
+    // to render all points.
+    optional int32 max_long_feature_points = 8 [default = 0];
+  }
+
+  optional VisualizationOptions visualization_options = 11;
+
+  // Describes how to compute foreground from features.
+  message ForegroundOptions {
+    // Indicates the *inverse* registration error (i.e. the irls weight)
+    // that is deemed a complete inlier.
+    // Weights in the interval [0, foreground_threshold] (corresponding to
+    // pixel errors in the interval [1 / foreground_threshold, inf])
+    // are mappend to 1 - [0, 1], i.e. foreground threshold is mapped to zero
+    // with weights below the threshold being assigned values > 0.
+    // Therefore, larger values will increase amount of detected foreground
+    // as well as noise.
+    optional float foreground_threshold = 1 [default = 0.5];
+
+    // By using foreground_gamma < 1.0 you can increase resolution of small
+    // foreground motion at the expense of the resolution of large foreground
+    // motions.
+    optional float foreground_gamma = 2 [default = 1.0];
+
+    // Threshold is scaled by coverage, i.e. for frames with large registration
+    // error less forground is visualized.
+    optional bool threshold_coverage_scaling = 3 [default = true];
+  }
+
+  optional ForegroundOptions foreground_options = 12;
+}
--- a/mediapipe/util/tracking/motion_estimation.cc
+++ b/mediapipe/util/tracking/motion_estimation.cc
--- a/mediapipe/util/tracking/motion_estimation.h
+++ b/mediapipe/util/tracking/motion_estimation.h
@ -0,0 +1,758 @@
+// Copyright 2019 The MediaPipe Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Fits several linear motion models to the tracked features obtained
+// from RegionFlowComputation.
+//
+// --- Multi-threaded usage (parallel motion estimation over frames) ---
+// assume input: vector<RegionFlowFeatureList*> feature_lists
+// // Can be obtained after getting RegionFlowFrame from RegionFlowComputation
+// // and executing GetRegionFlowFeatureList (region_flow.h)
+// MotionEstimation motion_estimation(MotionEstimationOptions(),
+//                                    frame_width,
+//                                    frame_height);
+// vector<CameraMotion> camera_motions;
+// motion_estimation.EstimateMotionsParallel(false,    // no IRLS smoothing.
+//                                           &feature_lists,
+//                                           &camera_motions);
+// // RegionFlowFeatureList can be discarded or passed to Cropper.
+//
+//
+// --- DEPRECATED, per-frame usage ---
+// assume input: RegionFlowFrame* flow_frame  // from RegionFlowComputation.
+//
+// // Initialize with standard options.
+// MotionEstimation motion_estimation(MotionEstimationOptions(),
+//                                    frame_width,
+//                                    frame_height);
+// CameraMotion estimated_motion;
+// motion_estimation.EstimateMotion(flow_frame,
+//                                  NULL,               // deprecated param.
+//                                  NULL,               // deprecated param.
+//                                  &estimation_motion);
+//
+// // If features are not needed anymore flow_frame can be be discarded now.
+//
+// // Pass motion models in estimated_motion onto MotionStabilization,
+// // if stabilization is desired.
+
+#ifndef MEDIAPIPE_UTIL_TRACKING_MOTION_ESTIMATION_H_
+#define MEDIAPIPE_UTIL_TRACKING_MOTION_ESTIMATION_H_
+
+#include <algorithm>
+#include <deque>
+#include <list>
+#include <memory>
+#include <unordered_map>
+#include <vector>
+
+#include "mediapipe/framework/port/integral_types.h"
+#include "mediapipe/framework/port/vector.h"
+#include "mediapipe/util/tracking/camera_motion.pb.h"
+#include "mediapipe/util/tracking/motion_estimation.pb.h"
+#include "mediapipe/util/tracking/motion_models.pb.h"
+#include "mediapipe/util/tracking/region_flow.h"
+
+namespace mediapipe {
+
+class Homography;
+class LinearSimilarityModel;
+class MixtureHomography;
+class MixtureRowWeights;
+class RegionFlowFeature;
+class RegionFlowFeatureList;
+class RegionFlowFrame;
+
+class EstimateMotionIRLSInvoker;
+class InlierMask;
+class IrlsInitializationInvoker;
+// Thread local storage for pre-allocated memory.
+class MotionEstimationThreadStorage;
+class TrackFilterInvoker;
+
+class MotionEstimation {
+ public:
+  MotionEstimation(const MotionEstimationOptions& options, int frame_width,
+                   int frame_height);
+  virtual ~MotionEstimation();
+  MotionEstimation(const MotionEstimation&) = delete;
+  MotionEstimation& operator=(const MotionEstimation&) = delete;
+
+  // Can be used to re-initialize options between EstimateMotion /
+  // EstimateMotionsParallel calls.
+  void InitializeWithOptions(const MotionEstimationOptions& options);
+
+  // Estimates motion models from RegionFlowFeatureLists based on
+  // MotionEstimationOptions, in a multithreaded manner (frame parallel).
+  // The computed IRLS weights used on the last iteration of the highest
+  // degree of freedom model are *written* to the irls_weight member for each
+  // RegionFlowFeature in RegionFlowFeatureList which can be a useful
+  // feature for later processing.
+  // In addition the returned irls weights can be smoothed spatially
+  // and temporally before they are output.
+  // Note: The actual vector feature_lists is not modified.
+  virtual void EstimateMotionsParallel(
+      bool post_irls_weight_smoothing,
+      std::vector<RegionFlowFeatureList*>* feature_lists,
+      std::vector<CameraMotion>* camera_motions) const;
+
+  // DEPRECATED function, estimating Camera motion from a single
+  // RegionFlowFrame.
+  virtual void EstimateMotion(const RegionFlowFrame& region_flow_frame,
+                              const int*,  // deprecated, must be NULL.
+                              const int*,  // deprecated, must be NULL.
+                              CameraMotion* camera_motion) const;
+
+  // Public facing API to directly estimate motion models (as opposed to
+  // a cascade of increasing degree of freedom motion models with appropiate
+  // stability analysis via above EstimateMotionsParallel).
+  // Use this if all you need is just the a specific motion
+  // model describing/summarizing the motion of the RegionFlowFeatureList.
+  // Returns false if model estimation failed (in this case an identity model
+  // is set in camera_motion).
+  // NOTE: All direct estimation functions assume normalized feature input,
+  // i.e. transformed via NormalizeRegionFlowFeatureList.
+  //
+  // NOTE 2: For easy direct use see Fit* functions below class.
+  bool EstimateTranslationModel(RegionFlowFeatureList* feature_list,
+                                CameraMotion* camera_motion);
+
+  bool EstimateLinearSimilarityModel(RegionFlowFeatureList* feature_list,
+                                     CameraMotion* camera_motion);
+
+  bool EstimateAffineModel(RegionFlowFeatureList* feature_list,
+                           CameraMotion* camera_motion);
+
+  bool EstimateHomography(RegionFlowFeatureList* feature_list,
+                          CameraMotion* camera_motion);
+
+  bool EstimateMixtureHomography(RegionFlowFeatureList* feature_list,
+                                 CameraMotion* camera_motion);
+
+  // Static function which sets motion models (requested in options) to identity
+  // models.
+  static void ResetMotionModels(const MotionEstimationOptions& options,
+                                CameraMotion* camera_motion);
+
+  // The following functions ResetTo* functions reset all models that are
+  // present in camera_motion (tested via has_*) to identity or
+  // the passed model (which is embedded in higher degree of freedom models
+  // if applicable). CameraMotion::Type is set in accordance to function name.
+  static void ResetToIdentity(CameraMotion* camera_motion,
+                              bool consider_valid = false);  // Set to true
+                                                             // for type VALID
+
+  // Resets every specified model to embedded translation model.
+  // CameraMotion type is set to UNSTABLE.
+  static void ResetToTranslation(const TranslationModel& translation,
+                                 CameraMotion* camera_motion);
+
+  // Resets every specified model with more or equal DOF than a similarity
+  // to the passed model.
+  // Camera Motion type is set to UNSTABLE_SIM.
+  static void ResetToSimilarity(const LinearSimilarityModel& model,
+                                CameraMotion* camera_motion);
+
+  // Resets every specified model with more or equal DOF than a homography
+  // to the passed model. If flag_as_unstable_model is set, camera motion type
+  // is set to UNSTABLE_HOMOG.
+  static void ResetToHomography(const Homography& model,
+                                bool flag_as_unstable_model,
+                                CameraMotion* camera_motion);
+
+ private:
+  // Simple enum indicating with motion model should be estimated, mapped from
+  // MotionEstimationOptions.
+  enum MotionType {
+    MODEL_AVERAGE_MAGNITUDE = 0,
+    MODEL_TRANSLATION = 1,
+    MODEL_LINEAR_SIMILARITY = 2,
+    MODEL_AFFINE = 3,
+    MODEL_HOMOGRAPHY = 4,
+    MODEL_MIXTURE_HOMOGRAPHY = 5,
+    // ... internal enum values used for mixture spectrum (up to 10 mixtures are
+    // supported). Do not use directly.
+
+    // Change value if new motions are added.
+    MODEL_NUM_VALUES = 16,
+  };
+
+  // Determines shot boundaries from estimated motion models and input
+  // feature_lists by setting the corresponding flag in CameraMotion.
+  // Make sure, this function is called after motion estimation.
+  void DetermineShotBoundaries(
+      const std::vector<RegionFlowFeatureList*>& feature_lists,
+      std::vector<CameraMotion>* camera_motions) const;
+
+  // Implementation function to estimate CameraMotion's from
+  // RegionFlowFeatureLists.
+  void EstimateMotionsParallelImpl(
+      bool irls_weights_preinitialized,
+      std::vector<RegionFlowFeatureList*>* feature_lists,
+      std::vector<CameraMotion>* camera_motions) const;
+
+  struct SingleTrackClipData;
+  struct EstimateModelOptions;
+
+  EstimateModelOptions DefaultModelOptions() const;
+
+  // Implementation function to estimate all motions for a specific type
+  // across multiple single tracks. Motions are only estimated for those
+  // CameraMotions with type less or equal to max_unstable_type. Flag
+  // irls_weights_preinitialized enables some optimizations in case it is
+  // set to false as features are not pre-initialized.
+  // Optionally pass thread_storage.
+  // Returns true if requested type was attempt to be estimated
+  // (based on options) , false otherwise.
+  bool EstimateMotionModels(
+      const MotionType& max_type, const CameraMotion::Type& max_unstable_type,
+      const EstimateModelOptions& options,
+      const MotionEstimationThreadStorage* thread_storage,  // optional.
+      std::vector<SingleTrackClipData>* clip_datas) const;
+
+  // Multiplies input irls_weights by an upweight multiplier for each feature
+  // that is part of a sufficiently large track (contribution of each track
+  // length is by track_length_multiplier, mapping each track length
+  // to an importance weight in [0, 1]).
+  void LongFeatureInitialization(
+      const RegionFlowFeatureList& feature_list,
+      const LongFeatureInfo& feature_info,
+      const std::vector<float>& track_length_importance,
+      std::vector<float>* irls_weights) const;
+
+  // Multiplies input irls_weights by normalization factor that downweights
+  // features is areas of high density.
+  void FeatureDensityNormalization(const RegionFlowFeatureList& feature_list,
+                                   std::vector<float>* irls_weights) const;
+
+  // Initializes irls weights (if
+  // MotionEstimationOptions::irls_initialization::activated is true),
+  // based on a multitude of options (RANSAC based  pre-fitting of motion
+  // models, homography initialization, etc.).
+  // Processes one frame or all (if frame = -1) within a clip_data,
+  void IrlsInitialization(const MotionType& type, int max_unstable_type,
+                          int frame,  // Specify -1 for all frames.
+                          const EstimateModelOptions& options,
+                          SingleTrackClipData* clip_data) const;
+
+  // Estimation functions for models, called via options by EstimateMotion and
+  // EstimateMotionsParallel.
+  // NOTE: All direct estimation functions assume normalized feature input,
+  // i.e. transformed via
+  // TransformRegionFlowFeatureList(normalization_transform, feature_list);
+  // where normalization_transform =
+  //     LinearSimilarityAdapter::NormalizationTransform(frame_width,
+  //                                                     frame_height);
+  //
+  // Direct estimation functions perform estimation via iterated reweighted
+  // least squares (IRLS). In this case specify number of iterations (10 is a
+  // good default), and optionally the PriorFeatureWeights for each iteration.
+  // The alphas specify, how much weight should be given to the
+  // prior weight that the feature has before optimization. An alpha of zero
+  // indicates, no prior weighting, whereas as an alpha of one corresponds to
+  // full prior weighting. The actual prior is stored in priors.
+  // Each iteration is reweighted by numerator / error, where error is the L2
+  // fitting error after estimation and
+  // numerator = (1.0 - alpha) * 1.0 + alpha * prior
+  struct PriorFeatureWeights {
+    explicit PriorFeatureWeights(int num_iterations)
+        : alphas(num_iterations, 0.0f) {}
+    PriorFeatureWeights(int num_iterations, int num_features)
+        : alphas(num_iterations, 0.0f), priors(num_features, 1.0f) {}
+
+    // Tests for correct dimensions of PriorFeatureWeights.
+    bool HasCorrectDimension(int num_iterations, int num_features) const {
+      return alphas.size() == num_iterations && priors.size() == num_features;
+    }
+
+    // Returns true if at least one alpha is non-zero.
+    bool HasNonZeroAlpha() const {
+      return !alphas.empty() &&
+             *std::max_element(alphas.begin(), alphas.end()) > 0;
+    }
+
+    // Returns true, if a prior was specified.
+    bool HasPrior() const { return !priors.empty(); }
+
+    std::vector<float> alphas;  // Alpha for each IRLS round.
+    std::vector<float> priors;  // Prior weight for each feature.
+
+    // If set, above alpha are not adjusted with iterations, but always set to
+    // 1.0, given full weight to the prior.
+    bool use_full_prior = false;
+  };
+
+  // In addition, each estimation function can compute its corresponding
+  // stability features and store it in CameraMotion. These features are needed
+  // to test via the IsStable* functions further below.
+
+  // Estimates 2 DOF translation model.
+  // Note: feature_list is assumed to be normalized/transformed by
+  // LinearSimilarity::NormalizationTransform N. Returned irls weights and
+  // linear similarity are expressed in original frame, i.e. for estimated model
+  // M, M' = N^(-1) M N is returned.
+  void EstimateTranslationModelIRLS(
+      int irls_rounds, bool compute_stability,
+      RegionFlowFeatureList* feature_list,
+      const PriorFeatureWeights* prior_weights,  // optional.
+      CameraMotion* camera_motion) const;
+
+  // Estimates linear similarity from feature_list using irls_rounds iterative
+  // reweighted least squares iterations. For L2 estimation, use irls_round = 1.
+  // The irls_weight member of each RegionFlowFeature in feature_list will be
+  // set to the inverse residual w.r.t. estimated LinearSimilarityModel.
+  // Note: feature_list is assumed to be normalized/transformed by
+  // LinearSimilarity::NormalizationTransform N. Returned irls weights and
+  // linear similarity are expressed in original frame, i.e. for estimated model
+  // M, M' = N^(-1) M N is returned.
+  // Returns true if estimation was successful, otherwise returns false and sets
+  // the CameraMotion::type to INVALID.
+  bool EstimateLinearSimilarityModelIRLS(
+      int irls_rounds, bool compute_stability,
+      RegionFlowFeatureList* feature_list,
+      const PriorFeatureWeights* prior_weights,  // optional.
+      CameraMotion* camera_motion) const;
+
+  // Same as above for affine motion.
+  // Note: feature_list is assumed to be normalized/transformed by
+  // LinearSimilarity::NormalizationTransform N. Returned irls weights and
+  // affine model are expressed in original frame, i.e. for estimated model
+  // M, M' = N^(-1) M N is returned.
+  bool EstimateAffineModelIRLS(int irls_rounds,
+                               RegionFlowFeatureList* feature_list,
+                               CameraMotion* camera_motion) const;
+
+  // Same as above for homography.
+  // Note: feature_list is assumed to be normalized/transformed by
+  // LinearSimilarity::NormalizationTransform N. Returned irls weights and
+  // homography are expressed in original frame, i.e. for estimated model
+  // M, M' = N^(-1) M N is returned.
+  // Returns true if estimation was successful, otherwise returns false and sets
+  // the CameraMotion::type to INVALID.
+  bool EstimateHomographyIRLS(
+      int irls_rounds, bool compute_stability,
+      const PriorFeatureWeights* prior_weights,       // optional.
+      MotionEstimationThreadStorage* thread_storage,  // optional.
+      RegionFlowFeatureList* feature_list, CameraMotion* camera_motion) const;
+
+  // Same as above for mixture homography.
+  // Note: feature_list is assumed to be normalized/transformed by
+  // LinearSimilarity::NormalizationTransform N. Returned irls weights and
+  // mixture homography are expressed in original frame, i.e. for estimated
+  // model M, M' = N^(-1) M N is returned.
+  // Mixture model estimation customized by MotionEstimationOptions.
+  // Returns true if estimation was successful, otherwise returns false and sets
+  // the CameraMotion::type to INVALID.
+  // Supports computation for mixture spectrum, i.e. mixtures with different
+  // regularizers. For default regularizer pass
+  // MotionEstimationOptions::mixture_regularizer. Estimated motion will be
+  // stored in CameraMotion::mixture_homography_spectrum(spectrum_idx).
+  bool EstimateMixtureHomographyIRLS(
+      int irls_rounds, bool compute_stability, float regularizer,
+      int spectrum_idx,                               // 0 by default.
+      const PriorFeatureWeights* prior_weights,       // optional.
+      MotionEstimationThreadStorage* thread_storage,  // optional.
+      RegionFlowFeatureList* feature_list, CameraMotion* camera_motion) const;
+
+  // Returns weighted variance for mean translation from feature_list (assumed
+  // to be in normalized coordinates). Returned variance is in unnormalized
+  // domain.
+  float TranslationVariance(const RegionFlowFeatureList& feature_list,
+                            const Vector2_f& translation) const;
+
+  // Replace each features irls weight by robust min-filtered irls weight
+  // across each track.
+  void MinFilterIrlsWeightByTrack(SingleTrackClipData* clip_data) const;
+
+  // Performs filtering of irls weight across several tracking clip datas,
+  // to yield consistent irls weights.
+  void EnforceTrackConsistency(
+      std::vector<SingleTrackClipData>* clip_datas) const;
+
+  // Initializes or modifies prior_weights for passed feature_list by
+  // biasing toward previous (filtered) IRLS weight for that feature.
+  // This enables temporal coherence.
+  void BiasLongFeatures(RegionFlowFeatureList* feature_list, MotionType type,
+                        const EstimateModelOptions& model_options,
+                        PriorFeatureWeights* prior_weights) const;
+
+  // Called by above function to determine the bias each feature is multiplied
+  // with.
+  void BiasFromFeatures(const RegionFlowFeatureList& feature_list,
+                        MotionType type,
+                        const EstimateModelOptions& model_options,
+                        std::vector<float>* bias) const;
+
+  // Maps track index to tuple of spatial bias and number of similar
+  // looking long tracks.
+  typedef std::unordered_map<int, std::pair<float, float>> SpatialBiasMap;
+  void ComputeSpatialBias(MotionType type,
+                          const EstimateModelOptions& model_options,
+                          RegionFlowFeatureList* feature_list,
+                          SpatialBiasMap* spatial_bias) const;
+
+  // Updates features weights in feature_list by temporally consistent bias.
+  void UpdateLongFeatureBias(MotionType type,
+                             const EstimateModelOptions& model_options,
+                             bool remove_terminated_tracks,
+                             bool update_irls_observation,
+                             RegionFlowFeatureList* feature_list) const;
+
+  // Bilateral filtering of irls weights across the passed list.
+  void SmoothIRLSWeights(std::deque<float>* irls) const;
+
+  // Helper function. Returns number of irls iterations for passed MotionType
+  // derived from current MotionEstimationOptions. Returns zero, if no
+  // estimation should be attempted.
+  int IRLSRoundsFromSettings(const MotionType& type) const;
+
+  // Partitions irls_rounds into several rounds with each having irls_per_round
+  // interations each based on MotionEstimationOptions::EstimationPolicy.
+  // Post-condition: total_rounds * irls_per_rounds == irls_rounds.
+  void PolicyToIRLSRounds(int irls_rounds, int* total_rounds,
+                          int* irls_per_round) const;
+
+  // Check for specified MotionType is estimated model is stable. If not, resets
+  // feature's irls weights to reset_irls_weights (optional) and resets motion
+  // model in camera_motion to lower degree of freedom model. In this case,
+  // CameraMotion::Type is flagged as UNSTABLE_* where * denotes the lower
+  // degree of freedom model.
+  // Model is only checked those CameraMotions with type less than or equal to
+  // max_unstable_type.
+  void CheckModelStability(
+      const MotionType& type, const CameraMotion::Type& max_unstable_type,
+      const std::vector<std::vector<float>>* reset_irls_weights,
+      std::vector<RegionFlowFeatureList*>* feature_lists,
+      std::vector<CameraMotion>* camera_motions) const;
+
+  // Implementation function called by above function, to check for a single
+  // model.
+  void CheckSingleModelStability(const MotionType& type,
+                                 const CameraMotion::Type& max_unstable_type,
+                                 const std::vector<float>* reset_irls_weights,
+                                 RegionFlowFeatureList* feature_list,
+                                 CameraMotion* camera_motion) const;
+
+  // Projects motion model specified by type to lower degree of freedom models.
+  void ProjectMotionsDown(const MotionType& type,
+                          std::vector<CameraMotion>* camera_motions) const;
+
+  // Filters passed feature_lists based on
+  // MotionEstimationOptions::irls_weight_filter.
+  void IRLSWeightFilter(
+      std::vector<RegionFlowFeatureList*>* feature_lists) const;
+
+  // Inlier scale based on average motion magnitude and the fraction
+  // of the magnitude that is still considered an inlier.
+  // In general a residual of 1 pixel is assigned an IRLS weight of 1,
+  // this function returns a residual scale, such that a residual
+  // of distance_fraction * translation_magnitude equals an IRLS weight of 1
+  // if multiplied by returned scale.
+  float GetIRLSResidualScale(const float avg_motion_magnitude,
+                             float distance_fraction) const;
+
+  const LinearSimilarityModel& InverseNormalizationTransform() const {
+    return inv_normalization_transform_;
+  }
+
+  const LinearSimilarityModel& NormalizationTransform() const {
+    return normalization_transform_;
+  }
+
+  // Returns domain normalized features fall in.
+  Vector2_f NormalizedDomain() const { return normalized_domain_; }
+
+  // Returns index within the inlier mask for each feature point.
+  // Also returns for each bin normalizer to account for different number of
+  // features per bin during weighting.
+  void ComputeFeatureMask(const RegionFlowFeatureList& feature_list,
+                          std::vector<int>* mask_indices,
+                          std::vector<float>* bin_normalizer) const;
+
+  // Runs multiple rounds of RANSAC, resetting outlier IRLS weight to
+  // a low score.
+  // Optionally can perform temporally consistent selection if inlier_mask is
+  // specified.
+  // Returns best model across all iterations in best_model and true if
+  // estimated model was deemed stable.
+  bool GetTranslationIrlsInitialization(
+      RegionFlowFeatureList* feature_list,
+      const EstimateModelOptions& model_options, float avg_camera_motion,
+      InlierMask* inlier_mask,  // optional.
+      TranslationModel* best_model) const;
+
+  // Same as above for linear similarities.
+  bool GetSimilarityIrlsInitialization(
+      RegionFlowFeatureList* feature_list,
+      const EstimateModelOptions& model_options, float avg_camera_motion,
+      InlierMask* inlier_mask,  // optional.
+      LinearSimilarityModel* best_model) const;
+
+  // Computes number of inliers and strict inliers (satisfying much stricter
+  // threshold) for a given feature list after model fitting.
+  void ComputeSimilarityInliers(const RegionFlowFeatureList& feature_list,
+                                int* num_inliers,
+                                int* num_strict_inliers) const;
+
+  // Initializes irls weights based on setting
+  // MotionEstimationOptions::homography_irls_weight_initialization.
+  void GetHomographyIRLSCenterWeights(const RegionFlowFeatureList& feature_list,
+                                      std::vector<float>* center_weights) const;
+
+  // Checks for unreasonable large accelerationas between frames as specified by
+  // MotionEstimationOptions::StableTranslationBounds.
+  void CheckTranslationAcceleration(
+      std::vector<CameraMotion>* camera_motions) const;
+
+  // Functions below, test passed model is deemed stable according to
+  // several heuristics set by Stable[MODEL]Bounds in MotionEstimationOptions.
+  bool IsStableTranslation(const TranslationModel& normalized_translation,
+                           float translation_variance,
+                           const RegionFlowFeatureList& features) const;
+
+  // Tests if passed similarity is stable. Pass number of inliers from
+  // ComputeSimilarityInliers.
+  bool IsStableSimilarity(const LinearSimilarityModel& model,
+                          const RegionFlowFeatureList& features,
+                          int num_inliers) const;
+
+  bool IsStableHomography(const Homography& homography,
+                          float average_homography_error,
+                          float inlier_coverage) const;
+
+  bool IsStableMixtureHomography(
+      const MixtureHomography& homography, float min_block_inlier_coverage,
+      const std::vector<float>& block_inlier_coverage) const;
+
+  // Computes fraction (in [0, 1]) of inliers w.r.t. frame area using a grid of
+  // occupancy cells. A feature is consider an inlier if its irls_weight is
+  // larger or equal to min_inlier_score.
+  float GridCoverage(const RegionFlowFeatureList& feature_list,
+                     float min_inlier_score,
+                     MotionEstimationThreadStorage* thread_storage) const;
+
+  // Estimates per scanline-block coverage of mixture. If
+  // assume_rolling_shutter_camera is set, low textured features are allowed to
+  // have higher error as registration errors would not be as visible here.
+  void ComputeMixtureCoverage(const RegionFlowFeatureList& feature_list,
+                              float min_inlier_score,
+                              bool assume_rolling_shutter_camera,
+                              MotionEstimationThreadStorage* thread_storage,
+                              CameraMotion* camera_motion) const;
+
+  // Returns average motion magnitude as mean of the translation magnitude from
+  // the 10th to 90th percentile.
+  void EstimateAverageMotionMagnitude(const RegionFlowFeatureList& feature_list,
+                                      CameraMotion* camera_motion) const;
+
+  // Returns per iteration weight of the feature's irls weight initialization.
+  float IRLSPriorWeight(int iteration, int irls_rounds) const;
+
+  // Implementation function for above function. Estimates mixture homography
+  // from features and returns true if estimation was non-degenerate.
+  bool MixtureHomographyFromFeature(
+      const TranslationModel& translation, int irls_rounds, float regularizer,
+      const PriorFeatureWeights* prior_weights,  // optional.
+      RegionFlowFeatureList* feature_list,
+      MixtureHomography* mix_homography) const;
+
+  // Determines overlay indices (spatial bin locations that are likely to be
+  // affected by overlays) and stores them in corresponding member in
+  // CameraMotion. Features that fall within these bins will be assigned a
+  // weight of zero.
+  void DetermineOverlayIndices(
+      bool irls_weights_preinitialized,
+      std::vector<CameraMotion>* camera_motions,
+      std::vector<RegionFlowFeatureList*>* feature_lists) const;
+
+  // Determine features likely to be part of a static overlay, by setting their
+  // irls weight to zero.
+  // Returns fraction of the image domain that is considered to be occupied by
+  // overlays and specific overlay cell indices in overlay_indices.
+  float OverlayAnalysis(const std::vector<TranslationModel>& translations,
+                        std::vector<RegionFlowFeatureList*>* feature_lists,
+                        std::vector<int>* overlay_indices) const;
+
+  // Smooths feature's irls_weights spatio-temporally.
+  void PostIRLSSmoothing(
+      const std::vector<CameraMotion>& camera_motions,
+      std::vector<RegionFlowFeatureList*>* feature_lists) const;
+
+  // Initializes LUT for gaussian weighting. By default discretizes the domain
+  // [0, max_range] into 4K bins, returning scale to map from a value in the
+  // domain to the corresponding bin. If scale is nullptr max_range bins are
+  // created instead (in this case scale would be 1.0, i.e. value equals bin
+  // index).
+  void InitGaussLUT(float sigma, float max_range, std::vector<float>* lut,
+                    float* scale) const;
+
+  // Performs fast volumetric smoothing / filtering of irls weights. Weights are
+  // expected to be already binned using BuildFeatureGrid.
+  void RunTemporalIRLSSmoothing(
+      const std::vector<FeatureGrid<RegionFlowFeature>>& feature_grid,
+      const std::vector<std::vector<int>>& feature_taps_3,
+      const std::vector<std::vector<int>>& feature_taps_5,
+      const std::vector<float>& frame_confidence,
+      std::vector<RegionFlowFeatureView>* feature_views) const;
+
+ private:
+  MotionEstimationOptions options_;
+  int frame_width_;
+  int frame_height_;
+
+  LinearSimilarityModel normalization_transform_;
+  LinearSimilarityModel inv_normalization_transform_;
+
+  // Transform from normalized features to irls domain.
+  LinearSimilarityModel irls_transform_;
+
+  // Frame dimensions transformed by normalization transform.
+  Vector2_f normalized_domain_;
+  std::unique_ptr<MixtureRowWeights> row_weights_;
+
+  // For initialization biased towards previous frame.
+  std::unique_ptr<InlierMask> inlier_mask_;
+
+  // Stores current bias for each track and the last K irls observations.
+  struct LongFeatureBias {
+    explicit LongFeatureBias(float initial_weight) : bias(initial_weight) {
+      irls_values.push_back(1.0f / initial_weight);
+    }
+
+    LongFeatureBias() : LongFeatureBias(1.0f) {}
+
+    float bias = 1.0f;               // Current bias, stores pixel error,
+                                     // i.e. 1 / IRLS.
+    std::vector<float> irls_values;  // Recently observed IRLS values;
+                                     // Ring buffer.
+    int total_observations = 1;
+  };
+
+  // Maps track id to LongFeatureBias.
+  typedef std::unordered_map<int, LongFeatureBias> LongFeatureBiasMap;
+
+  // Bias map indexed by MotionType.
+  mutable std::vector<LongFeatureBiasMap> long_feature_bias_maps_ =
+      std::vector<LongFeatureBiasMap>(static_cast<int>(MODEL_NUM_VALUES));
+
+  // Lookup tables and scale for FeatureBias computation.
+  struct FeatureBiasLUT {
+    // For ComputeSpatialBias weighting.
+    std::vector<float> spatial_lut;
+    float spatial_scale;
+    std::vector<float> color_lut;
+    float color_scale;
+
+    // For BiasFromFeature computation.
+    std::vector<float> bias_weight_lut;
+    float bias_weight_scale;
+  };
+
+  FeatureBiasLUT feature_bias_lut_;
+
+  // Counts the number of consecutive duplicate frames for each motion model.
+  mutable std::vector<int> num_duplicate_frames_ =
+      std::vector<int>(static_cast<int>(MODEL_NUM_VALUES));
+
+  friend class EstimateMotionIRLSInvoker;
+  friend class IrlsInitializationInvoker;
+  friend class TrackFilterInvoker;
+  friend class MotionEstimationThreadStorage;
+};
+
+// Meta-data set in the header of filter streams to communicate information used
+// during camera motion estimation.
+struct CameraMotionStreamHeader {
+  CameraMotionStreamHeader() : frame_width(0), frame_height(0) {}
+  int32 frame_width;
+  int32 frame_height;
+};
+
+// Direct fitting functions.
+TranslationModel FitTranslationModel(const RegionFlowFeatureList& features);
+
+LinearSimilarityModel FitLinearSimilarityModel(
+    const RegionFlowFeatureList& features);
+
+AffineModel FitAffineModel(const RegionFlowFeatureList& features);
+
+Homography FitHomography(const RegionFlowFeatureList& features);
+
+MixtureHomography FitMixtureHomography(const RegionFlowFeatureList& features);
+
+// Templated fitting functions.
+template <class Model>
+Model FitModel(const RegionFlowFeatureList& features);
+
+template <>
+inline TranslationModel FitModel(const RegionFlowFeatureList& features) {
+  return FitTranslationModel(features);
+}
+
+template <>
+inline LinearSimilarityModel FitModel(const RegionFlowFeatureList& features) {
+  return FitLinearSimilarityModel(features);
+}
+
+template <>
+inline AffineModel FitModel(const RegionFlowFeatureList& features) {
+  return FitAffineModel(features);
+}
+
+template <>
+inline Homography FitModel(const RegionFlowFeatureList& features) {
+  return FitHomography(features);
+}
+
+template <>
+inline MixtureHomography FitModel(const RegionFlowFeatureList& features) {
+  return FitMixtureHomography(features);
+}
+
+// Generic projection function that projects models in an arbitrary direction
+// (that is from lower to higher or vice versa) via fast model fits, without
+// any error bound checking.
+// MixtureRowWeights are only necessary for ToModel == MixtureHomography.
+template <class ToModel, class FromModel>
+ToModel ProjectViaFit(const FromModel& model, int frame_width, int frame_height,
+                      MixtureRowWeights* row_weights = nullptr,
+                      int grid_dim = 10) {
+  // Build a grid of features.
+  const float dx = frame_width * 1.0f / grid_dim;
+  const float dy = frame_height * 1.0f / grid_dim;
+
+  // Create region flow from grid.
+  RegionFlowFeatureList grid_features;
+  grid_features.set_frame_width(frame_width);
+  grid_features.set_frame_height(frame_height);
+  grid_features.set_match_frame(-1);
+
+  for (int k = 0; k <= grid_dim; ++k) {
+    for (int l = 0; l <= grid_dim; ++l) {
+      auto* feat = grid_features.add_feature();
+      feat->set_x(l * dx);
+      feat->set_y(k * dy);
+    }
+  }
+
+  RegionFlowFeatureListViaTransform(model, &grid_features, 1.0f,
+                                    0.0f,   // Replace flow.
+                                    false,  // Don't change feature loc.
+                                    row_weights);
+  return FitModel<ToModel>(grid_features);
+}
+
+}  // namespace mediapipe
+
+#endif  // MEDIAPIPE_UTIL_TRACKING_MOTION_ESTIMATION_H_
--- a/mediapipe/util/tracking/motion_estimation.proto
+++ b/mediapipe/util/tracking/motion_estimation.proto
@ -0,0 +1,670 @@
+// Copyright 2019 The MediaPipe Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Settings for MotionEstimation.
+syntax = "proto2";
+
+package mediapipe;
+
+// Note: In general for Estimation modes, the prefix are used as follows:
+// L2:        minimize squared norm of error
+// IRLS:      iterative reweighted least square, L2 minimization using multiple
+//            iterations, downweighting outliers.
+// Next tag: 69
+message MotionEstimationOptions {
+  // Specifies which camera models should be estimated, translation is always
+  // estimated.
+  optional bool estimate_translation_irls = 1 [default = true];
+
+  enum LinearSimilarityEstimation {
+    ESTIMATION_LS_NONE = 0;
+    ESTIMATION_LS_L2 = 1;    // L2 estimation
+    ESTIMATION_LS_IRLS = 4;  // good performance, robust to outliers.
+
+    // DEPRECATED modes.
+    ESTIMATION_LS_L2_RANSAC = 2;  // DEPRECATED, use IRLS instead.
+    ESTIMATION_LS_L1 = 3;         // DEPRECATED, use IRLS instead, or static
+                                  // function MotionEstimation::
+                                  // EstimateLinearSimilarityL1
+  }
+
+  optional LinearSimilarityEstimation linear_similarity_estimation = 3
+      [default = ESTIMATION_LS_IRLS];
+
+  enum AffineEstimation {
+    ESTIMATION_AFFINE_NONE = 0;
+    ESTIMATION_AFFINE_L2 = 1;
+    ESTIMATION_AFFINE_IRLS = 2;
+  }
+
+  optional AffineEstimation affine_estimation = 30
+      [default = ESTIMATION_AFFINE_NONE];
+
+  enum HomographyEstimation {
+    ESTIMATION_HOMOG_NONE = 0;
+    ESTIMATION_HOMOG_L2 = 1;
+    ESTIMATION_HOMOG_IRLS = 2;
+  }
+
+  optional HomographyEstimation homography_estimation = 5
+      [default = ESTIMATION_HOMOG_IRLS];
+
+  // By default, homography estimation minimizes an objective that is not
+  // strictly the L2 distance between matched points. If the flag is set, each
+  // row of the linear system is scaled with the exact denominator which results
+  // in an objective that minimizes the L2 distance.
+  optional bool homography_exact_denominator_scaling = 53 [default = false];
+
+  // Per default, we use exact solver for over-determined system using
+  // well-conditioned QR decomposition. For better speed, set value to false to
+  // use estimation via normal equations.
+  optional bool use_exact_homography_estimation = 54 [default = true];
+
+  // If set uses double instead of float when computing normal equations.
+  optional bool use_highest_accuracy_for_normal_equations = 55 [default = true];
+
+  // Regularizer for perspective part of the homography. If zero, no
+  // regularization is performed. Should be >= 0.
+  optional float homography_perspective_regularizer = 61 [default = 0];
+
+  // Note: Mixture models have high DOF are much more affected by outliers
+  // than models above. It is recommended that if IRLS estimation is NOT used,
+  // that mixture_regularizer is increased by a factor >=3.
+  enum MixtureHomographyEstimation {
+    ESTIMATION_HOMOG_MIX_NONE = 0;
+    ESTIMATION_HOMOG_MIX_L2 = 1;
+    ESTIMATION_HOMOG_MIX_IRLS = 2;  // robust to outliers.
+  }
+
+  optional MixtureHomographyEstimation mix_homography_estimation = 12
+      [default = ESTIMATION_HOMOG_MIX_NONE];
+
+  // If row-wise mixture models are estimated, determines number of them.
+  // Note, changing number of mixtures, interpolation sigma and regularizer
+  // is very likely to impact the stability analysis for mixtures and rolling
+  // shutter scoring. At least MixtureHomographyBounds would need to be adjusted
+  // to the new values.
+  optional int32 num_mixtures = 13 [default = 10];
+
+  // If row-wise mixture models are estimated, determines how much each point is
+  // influenced by its neigbhoring mixtures. Specified as relative sigma
+  // (standard deviation) w.r.t. frame_height.
+  optional float mixture_row_sigma = 14 [default = 0.1];
+
+  // Mixture estimation uses L2 regularizer to assure that adjacent mixture
+  // models are similar.
+  optional float mixture_regularizer = 15 [default = 1e-4];
+
+  // Mixtures are estimated across a spectrum of exponentially increasingly
+  // regularizers. In particular the regularizer at level L is given as
+  // mixture_regularizer * mixture_regularizer_base^L.
+  // A maximum of 10 levels are supported (checked!).
+  // Note: When changing the number of levels you probably want to adapt the
+  //       MotionStabilizationOptions::rolling_shutter_increment value as well,
+  //       as the number of levels directly controls the highest threshold for
+  //       the rolling shutter index analysis.
+  optional float mixture_regularizer_levels = 42 [default = 3];
+  optional float mixture_regularizer_base = 43 [default = 2.2];
+  optional int32 mixture_rs_analysis_level = 44 [default = 2];
+
+  // IRLS rounds to down-weight outliers (default across all models).
+  // Note: IRLS in combination with full mixture models (as opposed to the
+  // default reduced ones) is somewhat expensive.
+  optional int32 irls_rounds = 17 [default = 10];
+
+  // If set to > 0 (always needs be less than 1.0), influence of supplied prior
+  // irls weights is linearlly decreased from the specified prior scale (weight
+  // 1.0) to prior_scale. Effectively, biases the solution to the
+  // supplied prior features.
+  // Note: Without irls_weights_preinitialized set to true, this option is
+  // effectively a no op.
+  // TODO: Retire this option.
+  optional float irls_prior_scale = 50 [default = 0.2];
+
+  // Determine how to normalize irls weights w.r.t. average motion magnitude.
+  // In general a residual of 1 pixel is assigned an IRLS weight of 1.
+  // However as larger motions in general are affected by a larger error, we
+  // normalize irls weights, such that a residual
+  // of distance of irls_motion_magnitude_fraction times
+  // <average translation magnitude> equals an IRLS weight of 1.
+  // Must be larger than zero.
+  optional float irls_motion_magnitude_fraction = 31 [default = 0.08];
+
+  // Scale that is applied for mixture (where error is expected to be bigger).
+  optional float irls_mixture_fraction_scale = 68 [default = 1.5];
+
+  // By default, irls weight of all features are set uniformly to one before
+  // estimating EACH model, refining them in subsequent irls iterations.
+  // If flag below is set, input irls weights are used instead for each motion
+  // model.
+  optional bool irls_weights_preinitialized = 39 [default = false];
+
+  // If weights are pre-initialized optionally min filter weights along track
+  // ids when long tracks are used. This can be used to consistently label
+  // outliers in time before estimation.
+  optional bool filter_initialized_irls_weights = 67 [default = false];
+
+  // If activated, irls weight of outlier features are reset. Outliers are
+  // defined as those features, for which the best model fit after #rounds
+  // iterations of RANSAC did NOT yield an error lower than cutoff.
+  // Only applies to translation and similarity estimation.
+  message IrlsOutlierInitialization {
+    optional bool activated = 1 [default = false];
+
+    optional int32 rounds = 2 [default = 100];
+    optional float cutoff = 3 [default = 0.003];
+  }
+  optional IrlsOutlierInitialization irls_initialization = 56;
+
+  // Normalizes feature's irls weights prior to estimation such that
+  // feature in high density areas are downweighted. Multiplicative in case
+  // irls_weights_preinitialized is set to true.
+  optional bool feature_density_normalization = 62 [default = false];
+
+  // A regular grid of size feature_mask_size x feature_mask_size
+  // is used to normalize features w.r.t. their density.
+  optional int32 feature_mask_size = 63 [default = 10];
+
+  // In addition to above outlier and density initialization, long features
+  // that are present for a specified ratio of the analysis interval can be
+  // upweighted. This greatly improves temporal consistency.
+  message LongFeatureInitialization {
+    optional bool activated = 1 [default = false];
+
+    // Tracks with a length greater of equal to the specified percentile
+    // are upweighted by the specified upweight_multiplier.
+    optional float min_length_percentile = 2 [default = 0.95];
+
+    // Features passing above test have their irls weight increased by the
+    // specified multiplier prior to estimation.
+    optional float upweight_multiplier = 3 [default = 5];
+  }
+
+  optional LongFeatureInitialization long_feature_initialization = 66;
+
+  // Irls initialization can be performed in a temporal depdent manner,
+  // (if estimation_policy() == TEMPORALLY_DEPENDENT), where
+  // the previous frame's motion estimation biases the IrlsInitialization of the
+  // currently processed frame. In particular the location and magnitude of
+  // inliers is used during the RANSAC selection stage, to favor those features
+  // that agree with the prior, represented as confidence mask of inliers
+  // (using same dimension as above feature_mask_size).
+  // After estimation, the prior is updated.
+  message IrlsMaskOptions {
+    // Amount prior is decayed after each iteration.
+    optional float decay = 2 [default = 0.7];
+
+    // Score that each inlier adds to the current prior. Specified w.r.t. total
+    // number of features, i.e. each feature increases a bins score by
+    // inlier_score.
+    optional float inlier_score = 3 [default = 0.4];
+
+    // Each inlier scores at least this value regardless of the inlier mask
+    // (additive).
+    optional float base_score = 4 [default = 0.2];
+
+    // Motions are scored relative to previous motion. Threshold denotes
+    // absolute minimum of denominator.
+    optional float min_translation_norm = 5 [default = 2e-3];
+
+    // Translation is updated in every step by blending it with the previous
+    // estimated translation. (alpha is within 0 to 1, where 0 indicates to use
+    // only measured translation, i.e. no blending).
+    optional float translation_blend_alpha = 6 [default = 0.7];
+
+    // Every time translation is updated, prior (in [0, 1]) is increased by the
+    // specified amount.
+    optional float translation_prior_increase = 7 [default = 0.2];
+
+    // Deprecated fields.
+    extensions 1;
+  }
+
+  optional IrlsMaskOptions irls_mask_options = 57;
+
+  // Describes how long feature tracks are leveraged for joint estimation across
+  // many frames.
+  message JointTrackEstimationOptions {
+    // For each frame-pair motion model, describing the motion between frame
+    // I and I - 1, estimate in addition several additional motion
+    // models along long feature tracks describing the motion between frame
+    // I and I - k * motion_stride (additional models are not output,
+    // but help to filter irls weights).
+    // Specifies total number of estimated motion models per frame-pair. Must be
+    // greater than zero.
+    optional int32 num_motion_models = 1 [default = 3];
+
+    // Spacing in frames for additional motion models.
+    optional int32 motion_stride = 2 [default = 15];
+
+    // If set, performs temporal smoothing across frames of the obtained irls
+    // weights.
+    optional bool temporal_smoothing = 3 [default = false];
+
+    // TODO: Specify which filter is used during temporal smoothing.
+    // TODO: Limit joint estimation to static scenes
+    //                 (dont violate rigidity of alignment in case of wide
+    //                  baselines). Adopt stride and num_motion_models
+    //                  accordingly.
+  }
+
+  optional JointTrackEstimationOptions joint_track_estimation = 59;
+
+  // Options being used to bias IRLS features if estimation mode
+  // TEMPORAL_LONG_FEATURE_BIAS is being used.
+  // Next Tag: 15
+  message LongFeatureBiasOptions {
+    // Estimation is performed multiple times, alternating between model
+    // estimation and smooth temporal feature biasing for the specified number
+    // of rounds.
+    optional int32 total_rounds = 13 [default = 1];
+
+    // Controls how fast the bias for a track gets updated, in case feature is
+    // an inlier. Use higher values for less decay of background motion over
+    // time.
+    optional float inlier_bias = 1 [default = 0.98];
+
+    // Same as above for outliers (or features with low prior), i.e those that
+    // got recently seeded.
+    optional float outlier_bias = 2 [default = 0.7];
+
+    // Number of elements after which we deem estimation to be stable.
+    // Used to control weight of bias if fewer than the specified number have
+    // been observed. Also used as maximum ring buffer size (only most recent
+    // number of observations are kept). Must be > 0.
+    optional int32 num_irls_observations = 3 [default = 10];
+
+    // Change in irls weight magnitude (from outlier to inlier) above which we
+    // reset the current bias.
+    optional float max_irls_change_ratio = 4 [default = 10.0];
+
+    // Irls weight above which we consider it to be an inlier for bias update
+    // purposes (see above inlier and outlier bias). By default, outliers are
+    // allowed to update their bias faster than inliers. Must be > 0.
+    optional float inlier_irls_weight = 5 [default = 0.2];
+
+    // Standard deviation used during feature initialization. Current bias of a
+    // track is used to pre-weight features via gaussian weighting with
+    // specified standard deviation.
+    optional float bias_stdev = 12 [default = 1.0];
+
+    // When seeding new tracks (on the first frame), we bilaterally pool
+    // neighboring feature biases as seed. Details are controlled by options
+    // below. If false, the feature's estimation error is used instead
+    // (faster, but less spatially smooth).
+    // If activated it is advised to use a patch descriptor radius of at least
+    // 20 pixels.
+    optional bool use_spatial_bias = 6 [default = true];
+
+    // Newly observered tracks's biases are seeded by similar looking features
+    // in close spatial proximity. For efficieny a grid is used to determine
+    // proximity.
+    // Grid size in normalized coordinates w.r.t. frame domain.
+    optional float grid_size = 7 [default = 0.04];
+
+    // Sigma's for combining feature biases.
+    optional float spatial_sigma = 8 [default = 0.02];
+    optional float color_sigma = 9 [default = 20.0];
+
+    // Defines what we consider to be a long track. Features spawned around
+    // locations of similar looking long tracks are considered to have
+    // high prior, e.g. their initilization is given more weight.
+    optional int32 long_track_threshold = 10 [default = 30];
+
+    // Determines with fraction of long tracks is considered to be sufficient
+    // for highly confident bias seed.
+    optional float long_track_confidence_fraction = 11 [default = 0.25];
+
+    // If activated, uses the irls weights from the estimation of the lower
+    // degree of freedom model to seed the bias of the higher degree of freedom
+    // model. This improves rigidity of the computed motion.
+    optional bool seed_priors_from_bias = 14 [default = false];
+  }
+
+  optional LongFeatureBiasOptions long_feature_bias_options = 64;
+
+  // Controls how multiple models via EstimateMotionsParallel are estimated.
+  enum EstimationPolicy {
+    INDEPENDENT_PARALLEL = 1;  // Models are estimated independently across
+                               // frames in parallel.
+    TEMPORAL_IRLS_MASK = 2;    // Previous frame's estimation biases
+                               // current one, controlled via above
+                               // IrlsMaskOptions.
+    TEMPORAL_LONG_FEATURE_BIAS = 4;  // Frame's estimation is biased along
+                                     // long features, controlled via above
+                                     // LongFeatureBiasOptions.
+    JOINTLY_FROM_TRACKS = 3;         // Estimation is performed jointly over
+                              // chunks of frames, exercising a consistent
+                              // irls weight PER track. Expects as input
+                              // RegionFlowFeatureList's with long_tracks.
+                              // Controlled via above
+                              // JointTrackEstimationOptions.
+  }
+  optional EstimationPolicy estimation_policy = 58
+      [default = INDEPENDENT_PARALLEL];
+
+  optional int32 coverage_grid_size = 51 [default = 10];
+
+  // Degree of freedom of estimated homography mixtures. If desired, specific
+  // parts of the homography can be held constant across the mixture.
+  // For fast draft TRANSLATION_MIXTURE is recommended, for high quality
+  // SKEW_ROTATION_MIXTURE.
+  enum MixtureModelMode {
+    FULL_MIXTURE = 0;           // 8 dof * num_mixtures
+    TRANSLATION_MIXTURE = 1;    // 6 dof + 2 dof * num_mixtures
+    SKEW_ROTATION_MIXTURE = 2;  // 4 dof + 4 dof * num_mixtures
+  }
+
+  optional MixtureModelMode mixture_model_mode = 23
+      [default = SKEW_ROTATION_MIXTURE];
+
+  // If specified, only features that agree with the estimated linear similarity
+  // will be used to estimate the homography.
+  // If set, linear_similarity_estimation can not be ESTIMATION_NONE! (checked)
+  optional bool use_only_lin_sim_inliers_for_homography = 6 [default = true];
+
+  // Max. deviation to be considered an inlier w.r.t. estimated similarity for
+  // above flag. This value is set w.r.t. normalized frame diameter.
+  // TODO: Should take GetIRLSResidualScale into account.
+  optional float lin_sim_inlier_threshold = 20 [default = 0.003];
+
+  // If any parameter of the input flow or estimated translation exceeds these
+  // thresholds we deem the motion INVALID.
+  message TranslationBounds {
+    // Absolute minimum of features present.
+    optional int32 min_features = 1 [default = 3];
+
+    // Max magnitude of the translation expressed w.r.t. frame diameter
+    optional float frac_max_motion_magnitude = 2 [default = 0.15];
+
+    // Motion magnitude is only tested for if standard deviation of estimated
+    // translation exceeds threshold.
+    optional float max_motion_stdev_threshold = 4 [default = 0.01];
+
+    // Max standard deviation of the estimated translation (normalized to frame
+    // diameter).
+    optional float max_motion_stdev = 3 [default = 0.065];
+
+    // Maximum acceleration between frames. Specified relative to minimum
+    // velocity across two adjacent frames (absolute minimum of 0.001 is
+    // enforced, ~1 pix for 480p).
+    // If exceeded for one frame, the whole batch passed to
+    // EstimateMotionsParallel is labeled unstable.
+    optional float max_acceleration = 5 [default = 20.0];
+  }
+
+  optional TranslationBounds stable_translation_bounds = 32;
+
+  // If any test/bound is violated, the motion is deemed UNSTABLE.
+  message SimilarityBounds {
+    // Input frame has to be labeled stable, i.e. enough features and coverage
+    // present.
+    optional bool only_stable_input = 1 [default = true];
+
+    // Minimum number of inlier features (absolute and as fraction of total
+    // number of features).
+    // TODO: Dataset run setting this to 0.15
+    optional float min_inlier_fraction = 2 [default = 0.2];
+    optional float min_inliers = 3 [default = 30];
+
+    // Bounds on valid similarities. We use larger values compared to
+    // homographies. Note: Bounds are necessary,  to guarantee invertability
+    // of the resulting similarity.
+    optional float lower_scale = 4 [default = 0.8];
+    optional float upper_scale = 5 [default = 1.25];     // 1 / 0.8.
+    optional float limit_rotation = 6 [default = 0.25];  // 15 degrees.
+
+    // Thresholds for a feature to be considered inlier w.r.t similarity
+    // transform, expressed in terms of pixel residual error. Max of absolute
+    // and fractional thresholds is used.
+    // Ratio of inliers that pass regular and strict thresholds are storred in
+    // CameraMotion.
+    //
+    // TODO: Just use lin_sim_inlier_threshold directly, however that
+    // recomputes the error, and requires regression testing. Using an extra
+    // fractional inlier threshold for now.
+    //
+    // Absolute in pixels.
+    optional float inlier_threshold = 7 [default = 4.0];
+    // Scaled by frame diameter.
+    optional float frac_inlier_threshold = 8 [default = 0];
+
+    // TODO: Revisit after frame selection change.
+    // Absolute in pixels.
+    optional float strict_inlier_threshold = 9 [default = 0.5];
+  }
+
+  optional SimilarityBounds stable_similarity_bounds = 33;
+
+  // If any parameter of the estimated homography exceeds these bounds,
+  // we deem it UNSTABLE_SIM and use estimated similarity instead.
+  message HomographyBounds {
+    optional float lower_scale = 1 [default = 0.8];
+    optional float upper_scale = 2 [default = 1.25];     // 1 / 0.8.
+    optional float limit_rotation = 3 [default = 0.25];  // 15 degrees.
+    optional float limit_perspective = 4 [default = 0.0004];
+
+    // Inlier coverage is only tested for if average homography error exceeds
+    // registration_thresholds. Max of the following two thresholds is used.
+    //
+    // Absolute in pixels.
+    optional float registration_threshold = 5 [default = 0.1];
+    // Scaled by frame diameter.
+    optional float frac_registration_threshold = 8 [default = 0];
+
+    // Minimum fraction of inlier features w.r.t. frame area.
+    optional float min_inlier_coverage = 6 [default = 0.3];
+
+    // Grid coverage inlier threshold. Pixel errors below this
+    // threshold are considered inliers. Defined w.r.t. frame diameter, approx.
+    // 1.5 for 16:9 SD video (480p), i.e. threshold is multiplied by frame
+    // diameter.
+    optional float frac_inlier_threshold = 7 [default = 2.0e-3];
+  }
+
+  optional HomographyBounds stable_homography_bounds = 11;
+
+  // If any parameter of the estimated homography mixture exceeds these bounds,
+  // we deem it UNSTABLE_HOMOG and use the estimated homography instead.
+  message MixtureHomographyBounds {
+    // Minimum fraction of inlier features w.r.t. block area.
+    optional float min_inlier_coverage = 1 [default = 0.4];
+
+    // Each block is tested to be stable, regarding the outliers.
+    // A frame is labeled unstable, if more or equal than the specified adjacent
+    // blocks are labeled outliers.
+    optional int32 max_adjacent_outlier_blocks = 2 [default = 5];
+
+    // Maximum number of adjacent empty blocks (no inliers).
+    optional int32 max_adjacent_empty_blocks = 3 [default = 3];
+
+    // Grid coverage threshold inlier threshold. See identical parameter in
+    // HomographyBounds.
+    optional float frac_inlier_threshold = 7 [default = 2.5e-3];
+  }
+
+  optional MixtureHomographyBounds stable_mixture_homography_bounds = 34;
+
+  // Scale for stricter coverage evaluation. Used for rolling shutter guess
+  // computation, by only using high quality inliers. Larger values reflect
+  // stricter coverage.
+  // Specifically, when computing coverage via GridCoverage call,
+  // frac_inlier_threshold is reduced (divided) by specified scale below.
+  optional float strict_coverage_scale = 41 [default = 1.333];
+
+  // By default frames with zero trackable features (e.g. at the beginning,
+  // empty frame or shot boundary) are set identity model but still labeled as
+  // valid. If set to false, these frames are flagged as invalid, which can be
+  // useful to locate shot boundaries, etc.
+  optional bool label_empty_frames_as_valid = 22 [default = true];
+
+  // Setting for temporal smoothing of irls weights in optional post-processing
+  // step.
+  // In normalized coordinates w.r.t. frame domain.
+  optional float feature_grid_size = 24 [default = 0.05];
+  optional float spatial_sigma = 25 [default = 0.01];
+
+  // Frame diameter across which smoothing is performed.
+  optional int32 temporal_irls_diameter = 26 [default = 20];
+  optional float temporal_sigma = 27 [default = 5];  // in frames.
+
+  // Bilateral weight (for un-normalized color domain [0, .. 255]).
+  optional float feature_sigma = 28 [default = 30.0];
+
+  // If set to false 3 taps are used.
+  optional bool filter_5_taps = 29 [default = false];
+
+  // If set, during temporal smoothing, each frame is weighted by its
+  // confidence, defined as the square coverage (or square mean mixture
+  // coverage). Therefore, low confidence fits do not errornouesly propagate
+  // over time. In addition, if the confidence is below the specified
+  // confidence_threshold (relative the the maximum coverage observed in the
+  // test interval), irls weights are reset to 1, i.e. biased to be
+  // agree with the (unkown) background motion.
+  optional bool frame_confidence_weighting = 48 [default = true];
+  optional float reset_confidence_threshold = 49 [default = 0.4];
+
+  // Filters irls weights before smoothing them according to specified
+  // operation.
+  enum IRLSWeightFilter {
+    IRLS_FILTER_NONE = 0;
+    IRLS_FILTER_TEXTURE = 1;
+    IRLS_FILTER_CORNER_RESPONSE = 2;
+  }
+
+  // Calls TextureFilteredRegionFlowFeatureIRLSWeights on computed irls weights
+  // before smoothing them.
+  optional IRLSWeightFilter irls_weight_filter = 35
+      [default = IRLS_FILTER_NONE];
+
+  // Attempts to detect overlays, i.e. static elements burned-into the video
+  // that potentially corrupt motion estimation.
+  optional bool overlay_detection = 36 [default = false];
+  // Overlay detection is performed over specified number of frames.
+  optional int32 overlay_analysis_chunk_size = 37 [default = 8];
+
+  message OverlayDetectionOptions {
+    // Potential overlay features are aggregated over a mask with cells
+    // mask_size x mask_size as specified below.
+    optional int32 analysis_mask_size = 1 [default = 10];
+
+    // There are two types of candidates of overlay features, strict and loose
+    // ones. Strict candidates are used to determine if a grid bin should be
+    // considered an overlay cell. If the grid is an overlay cell, *all*
+    // candidates, i.e. strict and loose ones will be flagged as overlay
+    // features by setting their corresponding irls weight to zero.
+
+    // A feature is a strict overlay feature if its motion is less than
+    // near_zero_motion and AND less than max_translation_ratio times the
+    // estimated translation magnitude at that frame AND is texturedness is
+    // sufficiently high.
+    optional float strict_near_zero_motion = 2 [default = 0.2];
+    optional float strict_max_translation_ratio = 3 [default = 0.2];
+    // Minimum texturedness of a feature to be considered an overlay.
+    // Motivation: Overlays are mostly text or graphics, i.e. have visually
+    // distinguished features.
+    optional float strict_min_texturedness = 5 [default = 0.1];
+
+    // A feature is a loose overlay feature if its motion is less than
+    // loose_near_zero_motion.
+    optional float loose_near_zero_motion = 4 [default = 1.0];
+
+    // Minimum fraction of strict overlay features within a cell to be
+    // considered an overlay cell.
+    optional float overlay_min_ratio = 6 [default = 0.3];
+
+    // Absolute minimum number of strict overlay features within a cell to be
+    // considered an overlay cel..
+    optional float overlay_min_features = 7 [default = 10];
+  }
+
+  optional OverlayDetectionOptions overlay_detection_options = 38;
+
+  // Shot boundaries are introduced in 3 different scenarios:
+  // a) Frame has zero tracked features w.r.t. previous frame
+  // b) Estimated motion is deemed invalid (CameraMotion::INVALID).
+  // c) Visual consistency is above threshold of two adjacent frames.
+  message ShotBoundaryOptions {
+    // After cases a & b are determined from features/camera motion, they
+    // are verified by ensuring visual consistency is above specified threshold,
+    // if visual consistency has been computed. Only if this is case will the
+    // frame be labeled as shot boundary. Motivation is, that there should
+    // always be some (even small) measurable increase in the frame difference
+    // at a shot boundary.
+    // Verification is only performed if visual_consistency has been evaluated
+    // (value >= 0).
+    optional float motion_consistency_threshold = 1 [default = 0.02];
+
+    // Threshold for case c). Sometimes, motion estimation will miss shot
+    // boundaries. We define shot boundaries for which the visual consistency is
+    // higher than the specified threshold for at least two adjacent frames.
+    optional float appearance_consistency_threshold = 2 [default = 0.075];
+  }
+
+  optional ShotBoundaryOptions shot_boundary_options = 60;
+
+  // By default, irls weights of each feature are overwritten with refined irls
+  // weights of the last iteration for the highest degree of freedom model that
+  // was estimated stable. If set to false, original irls weights are retained.
+  // Note: If overlay detection is activated, features to be deemed overlays
+  //       have their irls weight set to zero, regardless of this setting.
+  //       Similarily, an IRLSWeightFilter is applied if requested, regardless
+  //       of this setting.
+  optional bool output_refined_irls_weights = 40 [default = true];
+
+  // Weight initialization for homography estimation. This is to bias homography
+  // estimation either to foreground or background.
+  enum HomographyIrlsWeightInitialization {
+    IRLS_WEIGHT_CONSTANT_ONE = 1;     // Constant, treat all features equally.
+    IRLS_WEIGHT_CENTER_GAUSSIAN = 2;  // Weight features in the center higher.
+                                      // Tends to lock onto foreground.
+    IRLS_WEIGHT_PERIMETER_GAUSSIAN = 3;  // Weight features around the
+                                         // perimeter higher, tends to lock onto
+                                         // background and improves rigidity of
+                                         // the perspective degree of freedom.
+  }
+
+  // IRLS weights for homography estimation are initialized based on the
+  // specified options. If, options irls_weights_preinitialized is set,
+  // weights are multiplied instead of reset.
+  optional HomographyIrlsWeightInitialization
+      homography_irls_weight_initialization = 45
+      [default = IRLS_WEIGHT_PERIMETER_GAUSSIAN];
+
+  // If set to false use L1 norm irls weights instead of L0 norm irls weights.
+  optional bool irls_use_l0_norm = 46 [default = true];
+
+  // IRLS weights are determined in a limited domain (in particular helpful
+  // for stabilization analysis on HD videos).
+  // TODO: Make this the default.
+  optional bool domain_limited_irls_scaling = 65 [default = false];
+
+  // For comparison and debugging purposes. Simply estimates requested models
+  // without checking their stability via the stable_*_bounds parameters.
+  // However, invertibility is still checked to avoid invalid data being passed
+  // to later stages of the stabilizer.
+  optional bool deactivate_stable_motion_estimation = 47 [default = false];
+
+  // Projects higher order motions if estimated correctly down to lower order
+  // motions, therefore replacing the previously estimated motions.
+  optional bool project_valid_motions_down = 52 [default = false];
+
+  // DEPRECATED functionality. Use static functions as indicated instead.
+  //
+  // Non-linear similarity, use MotionEstimation::EstimateSimilarityModelL2.
+  optional bool estimate_similarity = 2 [deprecated = true];
+
+  // Deprecated fields.
+  extensions 7, 8, 16;
+}
--- a/mediapipe/util/tracking/motion_models.cc
+++ b/mediapipe/util/tracking/motion_models.cc
@ -0,0 +1,924 @@
+// Copyright 2019 The MediaPipe Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "mediapipe/util/tracking/motion_models.h"
+
+#include <stdlib.h>
+
+#include <cmath>
+#include <string>
+#include <utility>
+
+#include "Eigen/Core"
+#include "Eigen/Dense"
+#include "absl/strings/str_format.h"
+
+// Set to true to use catmull rom mixture weights instead of Gaussian weights
+// for homography mixture estimation.
+bool flags_catmull_rom_mixture_weights = false;
+
+namespace mediapipe {
+
+std::string ModelAdapter<TranslationModel>::ToString(
+    const TranslationModel& model) {
+  return absl::StrFormat("%7f", model.dx()) + " " +
+         absl::StrFormat("%7f", model.dy());
+}
+
+AffineModel ModelAdapter<TranslationModel>::ToAffine(
+    const TranslationModel& model) {
+  return AffineAdapter::FromArgs(model.dx(), model.dy(), 1, 0, 0, 1);
+}
+
+TranslationModel ModelAdapter<TranslationModel>::FromAffine(
+    const AffineModel& model) {
+  DCHECK_EQ(model.a(), 1);
+  DCHECK_EQ(model.b(), 0);
+  DCHECK_EQ(model.c(), 0);
+  DCHECK_EQ(model.d(), 1);
+
+  return TranslationAdapter::FromArgs(model.dx(), model.dy());
+}
+
+Homography ModelAdapter<TranslationModel>::ToHomography(
+    const TranslationModel& model) {
+  return HomographyAdapter::FromAffine(ToAffine(model));
+}
+
+TranslationModel ModelAdapter<TranslationModel>::FromHomography(
+    const Homography& model) {
+  return TranslationAdapter::FromAffine(HomographyAdapter::ToAffine(model));
+}
+
+void ModelAdapter<TranslationModel>::GetJacobianAtPoint(const Vector2_f& pt,
+                                                        float* jacobian) {
+  DCHECK(jacobian);
+  jacobian[0] = 1;
+  jacobian[1] = 0;
+  jacobian[2] = 0;
+  jacobian[3] = 1;
+}
+
+TranslationModel ModelAdapter<TranslationModel>::NormalizationTransform(
+    float frame_width, float frame_height) {
+  // Independent of frame size.
+  return TranslationModel();
+}
+
+TranslationModel ModelAdapter<TranslationModel>::Maximum(
+    const TranslationModel& lhs, const TranslationModel& rhs) {
+  return FromArgs(std::max(lhs.dx(), rhs.dx()), std::max(lhs.dy(), rhs.dy()));
+}
+
+TranslationModel ModelAdapter<TranslationModel>::ProjectFrom(
+    const LinearSimilarityModel& model, float frame_width, float frame_height) {
+  return LinearSimilarityAdapter::ProjectToTranslation(model, frame_width,
+                                                       frame_height);
+}
+
+TranslationModel ModelAdapter<TranslationModel>::ProjectFrom(
+    const AffineModel& model, float frame_width, float frame_height) {
+  return ProjectFrom(AffineAdapter::ProjectToLinearSimilarity(
+                         model, frame_width, frame_height),
+                     frame_width, frame_height);
+}
+
+TranslationModel ModelAdapter<TranslationModel>::ProjectFrom(
+    const Homography& model, float frame_width, float frame_height) {
+  return ProjectFrom(
+      HomographyAdapter::ProjectToAffine(model, frame_width, frame_height),
+      frame_width, frame_height);
+}
+
+// Similarity model.
+SimilarityModel ModelAdapter<SimilarityModel>::FromArgs(float dx, float dy,
+                                                        float scale,
+                                                        float rotation) {
+  SimilarityModel model;
+  model.set_dx(dx);
+  model.set_dy(dy);
+  model.set_scale(scale);
+  model.set_rotation(rotation);
+  return model;
+}
+
+SimilarityModel ModelAdapter<SimilarityModel>::FromFloatPointer(
+    const float* args, bool identity_parametrization) {
+  DCHECK(args);
+  SimilarityModel model;
+  model.set_dx(args[0]);
+  model.set_dy(args[1]);
+  model.set_scale((identity_parametrization ? 1.f : 0.f) + args[2]);
+  model.set_rotation(args[3]);
+  return model;
+}
+
+SimilarityModel ModelAdapter<SimilarityModel>::FromDoublePointer(
+    const double* args, bool identity_parametrization) {
+  DCHECK(args);
+  SimilarityModel model;
+  model.set_dx(args[0]);
+  model.set_dy(args[1]);
+  model.set_scale((identity_parametrization ? 1.f : 0.f) + args[2]);
+  model.set_rotation(args[3]);
+  return model;
+}
+
+Vector2_f ModelAdapter<SimilarityModel>::TransformPoint(
+    const SimilarityModel& model, const Vector2_f& pt) {
+  // Setup rotation part.
+  const float c_r = std::cos(model.rotation());
+  const float c_s = std::sin(model.rotation());
+  const Vector2_f pt_rot(c_r * pt.x() - c_s * pt.y(),
+                         c_s * pt.x() + c_r * pt.y());
+
+  return pt_rot * model.scale() + Vector2_f(model.dx(), model.dy());
+}
+
+SimilarityModel ModelAdapter<SimilarityModel>::Invert(
+    const SimilarityModel& model) {
+  bool success = true;
+  const SimilarityModel result = InvertChecked(model, &success);
+  if (!success) {
+    LOG(ERROR) << "Model not invertible. Returning identity.";
+    return SimilarityModel();
+  } else {
+    return result;
+  }
+}
+
+SimilarityModel ModelAdapter<SimilarityModel>::InvertChecked(
+    const SimilarityModel& model, bool* success) {
+  SimilarityModel inv_model;
+  inv_model.set_rotation(-model.rotation());
+
+  if (fabs(model.scale()) > kDetInvertibleEps) {
+    inv_model.set_scale(1.0 / model.scale());
+    *success = true;
+  } else {
+    *success = false;
+    VLOG(1) << "Model is not invertible.";
+    return SimilarityModel();
+  }
+
+  // Rotate and scale [dx, dy] by inv_model.
+  const float c_r = std::cos(inv_model.rotation());
+  const float c_s = std::sin(inv_model.rotation());
+  const Vector2_f pt_rot(c_r * model.dx() - c_s * model.dy(),
+                         c_s * model.dx() + c_r * model.dy());
+
+  const Vector2_f inv_trans = -pt_rot * inv_model.scale();
+
+  inv_model.set_dx(inv_trans.x());
+  inv_model.set_dy(inv_trans.y());
+
+  return inv_model;
+}
+
+SimilarityModel ModelAdapter<SimilarityModel>::Compose(
+    const SimilarityModel& lhs, const SimilarityModel& rhs) {
+  SimilarityModel result;
+  result.set_scale(lhs.scale() * rhs.scale());
+  result.set_rotation(lhs.rotation() + rhs.rotation());
+
+  // Apply lhs scale and rot to rhs translation.
+  const float c_r = std::cos(lhs.rotation());
+  const float c_s = std::sin(lhs.rotation());
+  const Vector2_f trans_rot(c_r * rhs.dx() - c_s * rhs.dy(),
+                            c_s * rhs.dx() + c_r * rhs.dy());
+
+  const Vector2_f trans_concat =
+      trans_rot * lhs.scale() + Vector2_f(lhs.dx(), lhs.dy());
+
+  result.set_dx(trans_concat.x());
+  result.set_dy(trans_concat.y());
+  return result;
+}
+
+float ModelAdapter<SimilarityModel>::GetParameter(const SimilarityModel& model,
+                                                  int id) {
+  switch (id) {
+    case 0:
+      return model.dx();
+    case 1:
+      return model.dy();
+    case 2:
+      return model.scale();
+    case 3:
+      return model.rotation();
+    default:
+      LOG(FATAL) << "Parameter id is out of bounds";
+  }
+
+  return 0;
+}
+
+std::string ModelAdapter<SimilarityModel>::ToString(
+    const SimilarityModel& model) {
+  return absl::StrFormat("%7f", model.dx()) + " " +
+         absl::StrFormat("%7f", model.dy()) + " " +
+         absl::StrFormat("%7f", model.scale()) + " " +
+         absl::StrFormat("%7f", model.rotation());
+}
+
+SimilarityModel ModelAdapter<SimilarityModel>::NormalizationTransform(
+    float frame_width, float frame_height) {
+  const float scale = std::hypot(frame_width, frame_height);
+  DCHECK_NE(scale, 0);
+  return SimilarityAdapter::FromArgs(0, 0, 1.0 / scale, 0);
+}
+
+TranslationModel ModelAdapter<SimilarityModel>::ProjectToTranslation(
+    const SimilarityModel& model, float frame_width, float frame_height) {
+  return LinearSimilarityAdapter::ProjectToTranslation(
+      LinearSimilarityAdapter::FromSimilarity(model), frame_width,
+      frame_height);
+}
+
+std::string ModelAdapter<LinearSimilarityModel>::ToString(
+    const LinearSimilarityModel& model) {
+  return absl::StrFormat("%7f", model.dx()) + " " +
+         absl::StrFormat("%7f", model.dy()) + " " +
+         absl::StrFormat("%7f", model.a()) + " " +
+         absl::StrFormat("%7f", model.b());
+}
+
+AffineModel ModelAdapter<LinearSimilarityModel>::ToAffine(
+    const LinearSimilarityModel& model) {
+  return AffineAdapter::FromArgs(model.dx(), model.dy(), model.a(), -model.b(),
+                                 model.b(), model.a());
+}
+
+LinearSimilarityModel ModelAdapter<LinearSimilarityModel>::FromAffine(
+    const AffineModel& model) {
+  DCHECK_EQ(model.a(), model.d());
+  DCHECK_EQ(model.b(), -model.c());
+
+  return LinearSimilarityAdapter::FromArgs(model.dx(), model.dy(), model.a(),
+                                           -model.b());
+}
+
+Homography ModelAdapter<LinearSimilarityModel>::ToHomography(
+    const LinearSimilarityModel& model) {
+  return HomographyAdapter::FromAffine(ToAffine(model));
+}
+
+LinearSimilarityModel ModelAdapter<LinearSimilarityModel>::FromHomography(
+    const Homography& model) {
+  return LinearSimilarityAdapter::FromAffine(
+      HomographyAdapter::ToAffine(model));
+}
+
+SimilarityModel ModelAdapter<LinearSimilarityModel>::ToSimilarity(
+    const LinearSimilarityModel& model) {
+  const float scale = std::hypot(model.a(), model.b());
+  return SimilarityAdapter::FromArgs(model.dx(), model.dy(), scale,
+                                     std::atan2(model.b(), model.a()));
+}
+
+LinearSimilarityModel ModelAdapter<LinearSimilarityModel>::FromSimilarity(
+    const SimilarityModel& model) {
+  return LinearSimilarityAdapter::FromArgs(
+      model.dx(), model.dy(), model.scale() * std::cos(model.rotation()),
+      model.scale() * std::sin(model.rotation()));
+}
+
+LinearSimilarityModel ModelAdapter<LinearSimilarityModel>::ScaleParameters(
+    const LinearSimilarityModel& model_in, float scale) {
+  LinearSimilarityModel model = model_in;
+  model.set_a(model.a() * scale);
+  model.set_b(model.b() * scale);
+  model.set_dx(model.dx() * scale);
+  model.set_dy(model.dy() * scale);
+  return model;
+}
+
+LinearSimilarityModel ModelAdapter<LinearSimilarityModel>::AddIdentity(
+    const LinearSimilarityModel& model_in) {
+  LinearSimilarityModel model = model_in;
+  model.set_a(model.a() + 1);
+  return model;
+}
+
+void ModelAdapter<LinearSimilarityModel>::GetJacobianAtPoint(
+    const Vector2_f& pt, float* jacobian) {
+  DCHECK(jacobian);
+  // First row.
+  jacobian[0] = 1;
+  jacobian[1] = 0;
+  jacobian[2] = pt.x();
+  jacobian[3] = -pt.y();
+  // Second row.
+  jacobian[4] = 0;
+  jacobian[5] = 1;
+  jacobian[6] = pt.y();
+  jacobian[7] = pt.x();
+}
+
+LinearSimilarityModel
+ModelAdapter<LinearSimilarityModel>::NormalizationTransform(
+    float frame_width, float frame_height) {
+  const float scale = std::hypot(frame_width, frame_height);
+  DCHECK_NE(scale, 0);
+  return LinearSimilarityAdapter::FromArgs(0, 0, 1.0 / scale, 0);
+}
+
+LinearSimilarityModel ModelAdapter<LinearSimilarityModel>::Maximum(
+    const LinearSimilarityModel& lhs, const LinearSimilarityModel& rhs) {
+  return FromArgs(std::max(lhs.dx(), rhs.dx()), std::max(lhs.dy(), rhs.dy()),
+                  std::max(lhs.a(), rhs.a()), std::max(lhs.b(), rhs.b()));
+}
+
+TranslationModel ModelAdapter<LinearSimilarityModel>::ProjectToTranslation(
+    const LinearSimilarityModel& model, float frame_width, float frame_height) {
+  LinearSimilarityModel center_trans = LinearSimilarityAdapter::FromArgs(
+      frame_width * 0.5f, frame_height * 0.5f, 1, 0);
+  LinearSimilarityModel inv_center_trans = LinearSimilarityAdapter::FromArgs(
+      -frame_width * 0.5f, -frame_height * 0.5f, 1, 0);
+
+  // Express model w.r.t. center.
+  LinearSimilarityModel center_model =
+      ModelCompose3(inv_center_trans, model, center_trans);
+
+  // No need to shift back to top-left after decomposition, as translations
+  // are independent from coordinate origin.
+  return TranslationAdapter::FromArgs(center_model.dx(), center_model.dy());
+}
+
+std::string ModelAdapter<AffineModel>::ToString(const AffineModel& model) {
+  return absl::StrFormat("%7f", model.dx()) + " " +
+         absl::StrFormat("%7f", model.dy()) + " " +
+         absl::StrFormat("%7f", model.a()) + " " +
+         absl::StrFormat("%7f", model.b()) + " " +
+         absl::StrFormat("%7f", model.c()) + " " +
+         absl::StrFormat("%7f", model.d());
+}
+
+AffineModel ModelAdapter<AffineModel>::NormalizationTransform(
+    float frame_width, float frame_height) {
+  const float scale = std::hypot(frame_width, frame_height);
+  DCHECK_NE(scale, 0);
+  return AffineAdapter::FromArgs(0, 0, 1.0f / scale, 0, 0, 1.0f / scale);
+}
+
+Homography ModelAdapter<AffineModel>::ToHomography(const AffineModel& model) {
+  float params[8] = {model.a(), model.b(),  model.dx(), model.c(),
+                     model.d(), model.dy(), 0,          0};
+  return HomographyAdapter::FromFloatPointer(params, false);
+}
+
+AffineModel ModelAdapter<AffineModel>::FromHomography(const Homography& model) {
+  DCHECK_EQ(model.h_20(), 0);
+  DCHECK_EQ(model.h_21(), 0);
+
+  float params[6] = {model.h_02(), model.h_12(),   // dx, dy
+                     model.h_00(), model.h_01(),   // a, b
+                     model.h_10(), model.h_11()};  // c, d
+
+  return FromFloatPointer(params, false);
+}
+
+AffineModel ModelAdapter<AffineModel>::ScaleParameters(
+    const AffineModel& model_in, float scale) {
+  AffineModel model = model_in;
+  model.set_a(model.a() * scale);
+  model.set_b(model.b() * scale);
+  model.set_c(model.c() * scale);
+  model.set_d(model.d() * scale);
+  model.set_dx(model.dx() * scale);
+  model.set_dy(model.dy() * scale);
+  return model;
+}
+
+AffineModel ModelAdapter<AffineModel>::AddIdentity(
+    const AffineModel& model_in) {
+  AffineModel model = model_in;
+  model.set_a(model.a() + 1);
+  model.set_d(model.d() + 1);
+  return model;
+}
+
+void ModelAdapter<AffineModel>::GetJacobianAtPoint(const Vector2_f& pt,
+                                                   float* jacobian) {
+  DCHECK(jacobian);
+  // First row.
+  jacobian[0] = 1;
+  jacobian[1] = 0;
+  jacobian[2] = pt.x();
+  jacobian[3] = pt.y();
+  jacobian[4] = 0;
+  jacobian[5] = 0;
+
+  // Second row.
+  jacobian[6] = 0;
+  jacobian[7] = 1;
+  jacobian[8] = 0;
+  jacobian[9] = 0;
+  jacobian[10] = pt.x();
+  jacobian[11] = pt.y();
+}
+
+AffineModel ModelAdapter<AffineModel>::Maximum(const AffineModel& lhs,
+                                               const AffineModel& rhs) {
+  return FromArgs(std::max(lhs.dx(), rhs.dx()), std::max(lhs.dy(), rhs.dy()),
+                  std::max(lhs.a(), rhs.a()), std::max(lhs.b(), rhs.b()),
+                  std::max(lhs.c(), rhs.c()), std::max(lhs.d(), rhs.d()));
+}
+
+LinearSimilarityModel ModelAdapter<LinearSimilarityModel>::ProjectFrom(
+    const AffineModel& model, float frame_width, float frame_height) {
+  return AffineAdapter::ProjectToLinearSimilarity(model, frame_width,
+                                                  frame_height);
+}
+
+LinearSimilarityModel ModelAdapter<LinearSimilarityModel>::ProjectFrom(
+    const Homography& model, float frame_width, float frame_height) {
+  return ProjectFrom(
+      AffineAdapter::ProjectFrom(model, frame_width, frame_height), frame_width,
+      frame_height);
+}
+
+LinearSimilarityModel ModelAdapter<AffineModel>::ProjectToLinearSimilarity(
+    const AffineModel& model, float frame_width, float frame_height) {
+  AffineModel center_trans = AffineAdapter::FromArgs(
+      frame_width * 0.5f, frame_height * 0.5f, 1, 0, 0, 1);
+  AffineModel inv_center_trans = AffineAdapter::FromArgs(
+      -frame_width * 0.5f, -frame_height * 0.5f, 1, 0, 0, 1);
+
+  // Express model w.r.t. center.
+  AffineModel center_model =
+      ModelCompose3(inv_center_trans, model, center_trans);
+
+  // Determine average scale.
+  const float scale = std::sqrt(AffineAdapter::Determinant(center_model));
+
+  // Goal is approximate matrix:
+  // (a   b           (a' -b'
+  //  c   d)  with     b'  a')
+  //
+  //  :=  :=
+  //  v_1 v_2
+  //  After normalization by the scale, a' = cos(u) and b' = sin(u)
+  //  therefore, the columns on the right hand side have norm 1 and are
+  //  orthogonal.
+  //
+  //  ==> Orthonormalize v_1 and v_2
+
+  Vector2_f x_map(center_model.a(), center_model.c());  // == v_1
+  Vector2_f y_map(center_model.b(), center_model.d());  // == v_2
+  x_map.Normalize();
+  y_map.Normalize();
+
+  // Two approaches here
+  // A) Perform Grahm Schmidt, i.e. QR decomp / polar decomposition, i.e.:
+  // y_map' = y_map - <y_map, x_map> * x_map;
+  //   ==> no error in x_map, error increasing with larger y
+  // B) Compute  the middle vector between x_map and y_map and create
+  //    orthogonal system from that
+  //    (rotate by -45':  [ 1  1
+  //                        -1 1]  * 1/sqrt(2)
+  //    Error equally distributed between x and y.
+  //
+  // Comparing approach A and B:
+  //
+  //             video 1 (gleicher4)      video 2 (pool dance)
+  // Method B
+  // Max diff : dx: 1.6359973             4.600112
+  //            dy: 2.1076841             11.51579
+  //            a: 1.004498               1.01036
+  //            b: 0.0047194548           0.027450036
+  // Method A
+  // Max diff : dx: 4.3549204             14.145205
+  //            dy: 2.4496114             7.7804289
+  //            a: 1.0136091              1.043335
+  //            b: 0.0024313219           0.0065769218
+
+  // Using method B:
+  Vector2_f middle = (x_map + y_map).Normalize();
+
+  Vector2_f a_b = Vector2_f(middle.x() + middle.y(),  // see above matrix.
+                            middle.y() - middle.x())
+                      .Normalize();
+  AffineModel lin_approx = AffineAdapter::FromArgs(
+      center_model.dx(), center_model.dy(), scale * a_b.x(), -scale * a_b.y(),
+      scale * a_b.y(), scale * a_b.x());
+
+  // Map back to top-left origin.
+  return LinearSimilarityAdapter::FromAffine(
+      ModelCompose3(center_trans, lin_approx, inv_center_trans));
+}
+
+AffineModel ModelAdapter<AffineModel>::ProjectFrom(const Homography& model,
+                                                   float frame_width,
+                                                   float frame_height) {
+  return HomographyAdapter::ProjectToAffine(model, frame_width, frame_height);
+}
+
+Homography ModelAdapter<Homography>::InvertChecked(const Homography& model,
+                                                   bool* success) {
+  // Could do adjoint method and do it by hand. Use Eigen for now, not that
+  // crucial at this point.
+
+  Eigen::Matrix3d model_mat;
+  model_mat(0, 0) = model.h_00();
+  model_mat(0, 1) = model.h_01();
+  model_mat(0, 2) = model.h_02();
+  model_mat(1, 0) = model.h_10();
+  model_mat(1, 1) = model.h_11();
+  model_mat(1, 2) = model.h_12();
+  model_mat(2, 0) = model.h_20();
+  model_mat(2, 1) = model.h_21();
+  model_mat(2, 2) = 1.0f;
+
+  if (model_mat.determinant() < kDetInvertibleEps) {
+    VLOG(1) << "Homography not invertible, det is zero.";
+    *success = false;
+    return Homography();
+  }
+
+  Eigen::Matrix3d inv_model_mat = model_mat.inverse();
+
+  if (inv_model_mat(2, 2) == 0) {
+    LOG(ERROR) << "Degenerate homography. See proto.";
+    *success = false;
+    return Homography();
+  }
+
+  *success = true;
+  Homography inv_model;
+  const float scale = 1.0f / inv_model_mat(2, 2);
+  inv_model.set_h_00(inv_model_mat(0, 0) * scale);
+  inv_model.set_h_01(inv_model_mat(0, 1) * scale);
+  inv_model.set_h_02(inv_model_mat(0, 2) * scale);
+  inv_model.set_h_10(inv_model_mat(1, 0) * scale);
+  inv_model.set_h_11(inv_model_mat(1, 1) * scale);
+  inv_model.set_h_12(inv_model_mat(1, 2) * scale);
+  inv_model.set_h_20(inv_model_mat(2, 0) * scale);
+  inv_model.set_h_21(inv_model_mat(2, 1) * scale);
+
+  return inv_model;
+}
+
+std::string ModelAdapter<Homography>::ToString(const Homography& model) {
+  return absl::StrFormat("%7f", model.h_00()) + " " +
+         absl::StrFormat("%7f", model.h_01()) + " " +
+         absl::StrFormat("%7f", model.h_02()) + " " +
+         absl::StrFormat("%7f", model.h_10()) + " " +
+         absl::StrFormat("%7f", model.h_11()) + " " +
+         absl::StrFormat("%7f", model.h_12()) + " " +
+         absl::StrFormat("%7f", model.h_20()) + " " +
+         absl::StrFormat("%7f", model.h_21());
+}
+
+AffineModel ModelAdapter<Homography>::ToAffine(const Homography& model) {
+  DCHECK_EQ(model.h_20(), 0);
+  DCHECK_EQ(model.h_21(), 0);
+  AffineModel affine_model;
+  affine_model.set_a(model.h_00());
+  affine_model.set_b(model.h_01());
+  affine_model.set_dx(model.h_02());
+  affine_model.set_c(model.h_10());
+  affine_model.set_d(model.h_11());
+  affine_model.set_dy(model.h_12());
+  return affine_model;
+}
+
+Homography ModelAdapter<Homography>::FromAffine(const AffineModel& model) {
+  return Embed(model);
+}
+
+bool ModelAdapter<Homography>::IsAffine(const Homography& model) {
+  return model.h_20() == 0 && model.h_21() == 0;
+}
+
+void ModelAdapter<Homography>::GetJacobianAtPoint(const Vector2_f& pt,
+                                                  float* jacobian) {
+  DCHECK(jacobian);
+  // First row.
+  jacobian[0] = pt.x();
+  jacobian[1] = pt.y();
+  jacobian[2] = 1;
+  jacobian[3] = 0;
+  jacobian[4] = 0;
+  jacobian[5] = 0;
+  jacobian[6] = -pt.x() * pt.x();
+  jacobian[7] = -pt.x() * pt.y();
+
+  // Second row.
+  jacobian[8] = 0;
+  jacobian[9] = 0;
+  jacobian[10] = 0;
+  jacobian[11] = pt.x();
+  jacobian[12] = pt.y();
+  jacobian[13] = 1;
+  jacobian[14] = -pt.x() * pt.y();
+  jacobian[15] = -pt.y() * pt.y();
+}
+
+Homography ModelAdapter<Homography>::NormalizationTransform(
+    float frame_width, float frame_height) {
+  const float scale = std::hypot(frame_width, frame_height);
+  DCHECK_NE(scale, 0);
+  return HomographyAdapter::FromArgs(1.0f / scale, 0, 0, 0, 1.0f / scale, 0, 0,
+                                     0);
+}
+
+AffineModel ModelAdapter<Homography>::ProjectToAffine(const Homography& model,
+                                                      float frame_width,
+                                                      float frame_height) {
+  Homography center_trans;
+  center_trans.set_h_02(frame_width * 0.5f);
+  center_trans.set_h_12(frame_height * 0.5f);
+
+  Homography inv_center_trans;
+  inv_center_trans.set_h_02(-frame_width * 0.5f);
+  inv_center_trans.set_h_12(-frame_height * 0.5f);
+
+  // Express model w.r.t. center.
+  Homography center_model =
+      ModelCompose3(inv_center_trans, model, center_trans);
+
+  // Zero out perspective.
+  center_model.set_h_20(0);
+  center_model.set_h_21(0);
+
+  // Map back to top left and embed.
+  return ToAffine(ModelCompose3(center_trans, center_model, inv_center_trans));
+}
+
+namespace {
+
+// Returns true if non-empty intersection is present. In this case, start and
+// end are clipped to rect, specifically after clipping the line
+// [start, end] is inside the rectangle or incident to the boundary
+// of the rectangle. If strict is set to true, [start, end] is strictly inside
+// the rectangle, i.e. not incident to the boundary (minus start and end
+// point which still can be incident).
+// Implemented using Liang-Barsky algorithm.
+// http://en.wikipedia.org/wiki/Liang%E2%80%93Barsky
+inline bool ClipLine(const Vector2_f& rect, bool strict, Vector2_f* start,
+                     Vector2_f* end) {
+  Vector2_f diff = *end - *start;
+  float p[4] = {-diff.x(), diff.x(), -diff.y(), diff.y()};
+
+  // Bounds are (x_min, y_min) = (0, 0)
+  //            (x_max, y_max) = rect
+  float q[4] = {start->x() - 0, rect.x() - start->x(), start->y() - 0,
+                rect.y() - start->y()};
+
+  // Compute parametric intersection points.
+  float near = -1e10;
+  float far = 1e10;
+  for (int k = 0; k < 4; ++k) {
+    if (fabs(p[k]) < 1e-6f) {
+      // Line is parallel to one axis of rectangle.
+      if ((strict && q[k] <= 0.0f) || q[k] < 0.0f) {
+        // Line is outside rectangle.
+        return false;
+      } else {
+        // Possible intersection along other dimensions.
+        continue;
+      }
+    } else {
+      // Line is not parallel -> compute intersection.
+      float intersect = q[k] / p[k];
+      // Sign of p determines if near or far parameter.
+      if (p[k] < 0.0f) {
+        near = std::max(near, intersect);
+      } else {
+        far = std::min(far, intersect);
+      }
+    }
+  }
+
+  if (near > far) {
+    // Line is outside of rectangle.
+    return false;
+  }
+
+  // Clip near and far to valid line segment interval [0, 1].
+  far = std::min(far, 1.0f);
+  near = std::max(near, 0.0f);
+
+  if (near <= far) {
+    // Non-empty intersection. Single points are considered valid intersection.
+    *end = *start + diff * far;
+    *start = *start + diff * near;
+    return true;
+  } else {
+    // Empty intersection.
+    return false;
+  }
+}
+
+}  // namespace.
+
+template <class Model>
+float ModelMethods<Model>::NormalizedIntersectionArea(const Model& model_1,
+                                                      const Model& model_2,
+                                                      const Vector2_f& rect) {
+  const float rect_area = rect.x() * rect.y();
+  if (rect_area <= 0) {
+    LOG(WARNING) << "Empty rectangle passed -> empty intersection.";
+    return 0.0f;
+  }
+
+  std::vector<std::pair<Vector2_f, Vector2_f>> lines(4);
+  lines[0] = std::make_pair(Vector2_f(0, 0), Vector2_f(0, rect.y()));
+  lines[1] =
+      std::make_pair(Vector2_f(0, rect.y()), Vector2_f(rect.x(), rect.y()));
+  lines[2] =
+      std::make_pair(Vector2_f(rect.x(), rect.y()), Vector2_f(rect.x(), 0));
+  lines[3] = std::make_pair(Vector2_f(rect.x(), 0), Vector2_f(0, 0));
+
+  float model_1_area = 0.0f;
+  float model_2_area = 0.0f;
+  for (int k = 0; k < 4; ++k) {
+    Vector2_f start_1 = Adapter::TransformPoint(model_1, lines[k].first);
+    Vector2_f end_1 = Adapter::TransformPoint(model_1, lines[k].second);
+    // Use trapezoidal rule for polygon area.
+    model_1_area += 0.5 * (end_1.y() + start_1.y()) * (end_1.x() - start_1.x());
+    Vector2_f start_2 = Adapter::TransformPoint(model_2, lines[k].first);
+    Vector2_f end_2 = Adapter::TransformPoint(model_2, lines[k].second);
+    model_2_area += 0.5 * (end_2.y() + start_2.y()) * (end_2.x() - start_2.x());
+  }
+
+  const float average_area = 0.5f * (model_1_area + model_2_area);
+  if (average_area <= 0) {
+    LOG(WARNING) << "Degenerative models passed -> empty intersection.";
+    return 0.0f;
+  }
+
+  // First, clip transformed rectangle against origin defined by model_1.
+  bool success = true;
+  Model diff = ModelDiffChecked(model_2, model_1, &success);
+  if (!success) {
+    LOG(WARNING) << "Model difference is singular -> empty intersection.";
+    return 0.0f;
+  }
+
+  float area = 0.0f;
+  for (int k = 0; k < 4; ++k) {
+    Vector2_f start_1 = Adapter::TransformPoint(diff, lines[k].first);
+    Vector2_f end_1 = Adapter::TransformPoint(diff, lines[k].second);
+    if (ClipLine(rect, false, &start_1, &end_1)) {
+      // Non-empty intersection.
+      // Transform intersection back to world coordinate system.
+      const Vector2_f start = Adapter::TransformPoint(model_1, start_1);
+      const Vector2_f end = Adapter::TransformPoint(model_1, end_1);
+      // Use trapezoidal rule for polygon area without explicit ordering of
+      // vertices.
+      area += 0.5 * (end.y() + start.y()) * (end.x() - start.x());
+    }
+  }
+
+  // Second, clip transformed rectangle against origin defined by model_2.
+  Model inv_diff = Adapter::InvertChecked(diff, &success);
+  if (!success) {
+    LOG(WARNING) << "Model difference is singular -> empty intersection.";
+    return 0.0f;
+  }
+
+  for (int k = 0; k < 4; ++k) {
+    Vector2_f start_2 = Adapter::TransformPoint(inv_diff, lines[k].first);
+    Vector2_f end_2 = Adapter::TransformPoint(inv_diff, lines[k].second);
+    // Use strict comparison to address degenerate case of incident rectangles
+    // in which case intersection would be counted twice if non-strict
+    // comparison is employed.
+    if (ClipLine(rect, true, &start_2, &end_2)) {
+      // Transform start and end back to origin.
+      const Vector2_f start = Adapter::TransformPoint(model_2, start_2);
+      const Vector2_f end = Adapter::TransformPoint(model_2, end_2);
+      area += 0.5 * (end.y() + start.y()) * (end.x() - start.x());
+    }
+  }
+
+  // Normalize w.r.t. average rectangle area.
+  return area / average_area;
+}
+
+MixtureRowWeights::MixtureRowWeights(int frame_height, int margin, float sigma,
+                                     float y_scale, int num_models)
+    : frame_height_(frame_height),
+      y_scale_(y_scale),
+      margin_(margin),
+      sigma_(sigma),
+      num_models_(num_models) {
+  mid_points_.resize(num_models_);
+
+  // Compute mid_point (row idx) for each model.
+  if (flags_catmull_rom_mixture_weights) {
+    const float model_height =
+        static_cast<float>(frame_height_) / (num_models - 1);
+
+    // Use Catmull-rom spline.
+    // Compute weighting matrix.
+    weights_.resize(frame_height * num_models);
+    float spline_weights[4];
+
+    // No margin support for splines.
+    if (margin_ > 0) {
+      LOG(WARNING) << "No margin support when flag catmull_rom_mixture_weights "
+                   << "is set. Margin is reset to zero, it is recommended "
+                   << "that RowWeightsBoundChecked is used to prevent "
+                   << "segfaults.";
+      margin_ = 0;
+    }
+
+    for (int i = 0; i < frame_height; ++i) {
+      float* weight_ptr = &weights_[i * num_models];
+
+      float float_pos = static_cast<float>(i) / model_height;
+      int int_pos = float_pos;
+      memset(weight_ptr, 0, sizeof(weight_ptr[0]) * num_models);
+
+      float dy = float_pos - int_pos;
+
+      // Weights sum to one, for all choices of dy.
+      // For definition see
+      // en.wikipedia.org/wiki/Cubic_Hermite_spline#Catmull.E2.80.93Rom_spline
+      spline_weights[0] = 0.5f * (dy * ((2.0f - dy) * dy - 1.0f));
+      spline_weights[1] = 0.5f * (dy * dy * (3.0f * dy - 5.0f) + 2.0f);
+      spline_weights[2] = 0.5f * (dy * ((4.0f - 3.0f * dy) * dy + 1.0f));
+      spline_weights[3] = 0.5f * (dy * dy * (dy - 1.0f));
+
+      weight_ptr[int_pos] += spline_weights[1];
+      if (int_pos > 0) {
+        weight_ptr[int_pos - 1] += spline_weights[0];
+      } else {
+        weight_ptr[int_pos] += spline_weights[0];  // Double knot.
+      }
+
+      CHECK_LT(int_pos, num_models - 1);
+      weight_ptr[int_pos + 1] += spline_weights[2];
+      if (int_pos + 1 < num_models - 1) {
+        weight_ptr[int_pos + 2] += spline_weights[3];
+      } else {
+        weight_ptr[int_pos + 1] += spline_weights[3];  // Double knot.
+      }
+    }
+  } else {
+    // Gaussian weights.
+    const float model_height = static_cast<float>(frame_height_) / num_models;
+
+    for (int i = 0; i < num_models; ++i) {
+      mid_points_[i] = (i + 0.5f) * model_height;
+    }
+
+    // Compute gaussian weights.
+    const int num_values = frame_height_ + 2 * margin_;
+    std::vector<float> row_dist_weights(num_values);
+    const float common = -0.5f / (sigma * sigma);
+    for (int i = 0; i < num_values; ++i) {
+      row_dist_weights[i] = std::exp(common * i * i);
+    }
+
+    // Compute weighting matrix.
+    weights_.resize(num_values * num_models);
+    for (int i = 0; i < num_values; ++i) {
+      float* weight_ptr = &weights_[i * num_models];
+      float weight_sum = 0;
+
+      // Gaussian weights via lookup.
+      for (int j = 0; j < num_models; ++j) {
+        weight_ptr[j] = row_dist_weights[abs(i - margin_ - mid_points_[j])];
+        weight_sum += weight_ptr[j];
+      }
+
+      // Normalize.
+      DCHECK_GT(weight_sum, 0);
+      const float inv_weight_sum = 1.0f / weight_sum;
+      for (int j = 0; j < num_models; ++j) {
+        weight_ptr[j] *= inv_weight_sum;
+      }
+    }
+  }
+}
+
+float MixtureRowWeights::WeightThreshold(float frac_blocks) {
+  const float model_height = static_cast<float>(frame_height_) / num_models_;
+
+  const float y = model_height * frac_blocks + mid_points_[0];
+  const float* row_weights = RowWeightsClamped(y / y_scale_);
+  return row_weights[0];
+}
+
+// Explicit instantiations of ModelMethods.
+template class ModelMethods<TranslationModel>;
+template class ModelMethods<SimilarityModel>;
+template class ModelMethods<LinearSimilarityModel>;
+template class ModelMethods<AffineModel>;
+template class ModelMethods<Homography>;
+
+}  // namespace mediapipe
--- a/mediapipe/util/tracking/motion_models.h
+++ b/mediapipe/util/tracking/motion_models.h
--- a/mediapipe/util/tracking/motion_models.proto
+++ b/mediapipe/util/tracking/motion_models.proto
@ -0,0 +1,122 @@
+// Copyright 2019 The MediaPipe Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Motion models, that are supported and estimated by MotionStabilization.
+// Note, that transformations represent the motion of the
+// feature points from the previous frame to the current one.
+// Note that the actual camera movement, is the inverse of this
+// transformation.
+// Check carefully, which movement (feature or camera) is required.
+// Note that for each model the default values always specify an identity
+// transform. Follow this rule when adding new models.
+
+syntax = "proto2";
+
+package mediapipe;
+
+// Here x represents a 2D point in the image plane, in the following
+// coordinate system:
+// *----> x
+// |
+// |
+// v  y
+
+// Simple translational model:
+// I * x + [dx; dy]   with I being 2x2 identity transform.
+message TranslationModel {
+  optional float dx = 1 [default = 0];
+  optional float dy = 2 [default = 0];
+}
+
+// Non-linear similarity model (w.r.t. to its parametrization).
+// c_r := cos(rotation);
+// s_r := sin(rotation);
+// Transformation applied to x:
+// [scale 0;      * [c_r  -s_r;   * x  + [dx;
+//  0     scale]     s_r   c_r]           dy]
+message SimilarityModel {
+  optional float dx = 1 [default = 0];
+  optional float dy = 2 [default = 0];
+  optional float scale = 3 [default = 1];
+  optional float rotation = 4 [default = 0];  // angle in [-pi, pi].
+}
+
+// Linear similarity model:
+//  [a -b;   * x + [dx;
+//   b  a]          dy]
+message LinearSimilarityModel {
+  optional float dx = 1 [default = 0];
+  optional float dy = 2 [default = 0];
+  optional float a = 3 [default = 1];
+  optional float b = 4 [default = 0];
+}
+
+// Affine according to
+//  (  [a b  * x + [dx;
+//  (   c d]        dy]
+message AffineModel {
+  optional float dx = 1 [default = 0];
+  optional float dy = 2 [default = 0];
+  optional float a = 3 [default = 1];
+  optional float b = 4 [default = 0];
+  optional float c = 5 [default = 0];
+  optional float d = 6 [default = 1];
+}
+
+// Homography according to
+// [h_00 h_01 h_02;
+//  h_10 h_11 h_12;
+//  h_20 h_21 1];
+//  Note: The parametrization with h_22 = 1 does not always hold, e.g.
+//        if the origin (0, 0, 1) gets mapped to the line at infinity
+//        (0, 0, 1). However for video we expect small perspective
+//        changes between frames and this parametrization improves
+//        robustness greatly as it removes an additional DOF.
+//        Therefore, all methods in motion_stabilization should not be
+//        used for general wide-baseline matching of frames.
+message Homography {
+  optional float h_00 = 1 [default = 1];
+  optional float h_01 = 2 [default = 0];
+  optional float h_02 = 3 [default = 0];
+  optional float h_10 = 4 [default = 0];
+  optional float h_11 = 5 [default = 1];
+  optional float h_12 = 6 [default = 0];
+  optional float h_20 = 7 [default = 0];
+  optional float h_21 = 8 [default = 0];
+}
+
+// Mixture models with higher degrees of freedom, according to
+// \sum_i model(i) * weight(i), where weights are passed during transform and
+// are expected to sum to one.
+message MixtureLinearSimilarity {
+  repeated LinearSimilarityModel model = 1;
+}
+
+message MixtureAffine {
+  repeated AffineModel model = 1;
+}
+
+message MixtureHomography {
+  repeated Homography model = 1;
+  // Specifies which degree of freedom vary across mixture.
+  // Can be used to implement several transformation functions quicker.
+  enum VariableDOF {
+    ALL_DOF = 0;            // All dof are variable.
+    TRANSLATION_DOF = 1;    // Only translation (h_02, h_12) varies.
+    SKEW_ROTATION_DOF = 2;  // Translation (h_02, h_12), and skew-rotation
+                            // (h_01, h_10) vary.
+    CONST_DOF = 3;          // Mixture is constant.
+  }
+  optional VariableDOF dof = 2 [default = ALL_DOF];
+}
--- a/mediapipe/util/tracking/motion_models_cv.cc
+++ b/mediapipe/util/tracking/motion_models_cv.cc
@ -0,0 +1,57 @@
+// Copyright 2019 The MediaPipe Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "mediapipe/util/tracking/motion_models_cv.h"
+
+namespace mediapipe {
+
+void ModelCvConvert<TranslationModel>::ToCvMat(const TranslationModel& model,
+                                               cv::Mat* matrix) {
+  ModelCvConvert<AffineModel>::ToCvMat(
+      ModelAdapter<TranslationModel>::ToAffine(model), matrix);
+}
+
+void ModelCvConvert<LinearSimilarityModel>::ToCvMat(
+    const LinearSimilarityModel& model, cv::Mat* matrix) {
+  ModelCvConvert<AffineModel>::ToCvMat(
+      ModelAdapter<LinearSimilarityModel>::ToAffine(model), matrix);
+}
+
+void ModelCvConvert<AffineModel>::ToCvMat(const AffineModel& model,
+                                          cv::Mat* matrix) {
+  matrix->create(2, 3, CV_32FC1);
+  matrix->at<float>(0, 0) = model.a();
+  matrix->at<float>(0, 1) = model.b();
+  matrix->at<float>(0, 2) = model.dx();
+  matrix->at<float>(1, 0) = model.c();
+  matrix->at<float>(1, 1) = model.d();
+  matrix->at<float>(1, 2) = model.dy();
+}
+
+void ModelCvConvert<Homography>::ToCvMat(const Homography& model,
+                                         cv::Mat* matrix) {
+  CHECK(matrix != nullptr);
+  matrix->create(3, 3, CV_32FC1);
+  matrix->at<float>(0, 0) = model.h_00();
+  matrix->at<float>(0, 1) = model.h_01();
+  matrix->at<float>(0, 2) = model.h_02();
+  matrix->at<float>(1, 0) = model.h_10();
+  matrix->at<float>(1, 1) = model.h_11();
+  matrix->at<float>(1, 2) = model.h_12();
+  matrix->at<float>(2, 0) = model.h_20();
+  matrix->at<float>(2, 1) = model.h_21();
+  matrix->at<float>(2, 2) = 1.0;
+}
+
+}  // namespace mediapipe
--- a/mediapipe/util/tracking/motion_models_cv.h
+++ b/mediapipe/util/tracking/motion_models_cv.h
@ -0,0 +1,63 @@
+// Copyright 2019 The MediaPipe Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef MEDIAPIPE_UTIL_TRACKING_MOTION_MODELS_CV_H_
+#define MEDIAPIPE_UTIL_TRACKING_MOTION_MODELS_CV_H_
+
+#include "mediapipe/framework/port/opencv_core_inc.h"
+#include "mediapipe/util/tracking/motion_models.h"
+#include "mediapipe/util/tracking/motion_models.pb.h"  // NOLINT
+
+namespace mediapipe {
+
+template <class Model>
+class ModelCvConvert {};
+
+// Specialized implementations, with additional functionality if needed.
+template <>
+class ModelCvConvert<TranslationModel> {
+ public:
+  // Returns 2x3 floating point cv::Mat with model parameters.
+  static void ToCvMat(const TranslationModel& model, cv::Mat* matrix);
+};
+
+template <>
+class ModelCvConvert<LinearSimilarityModel> {
+ public:
+  // Returns 2x3 floating point cv::Mat with model parameters.
+  static void ToCvMat(const LinearSimilarityModel& model, cv::Mat* matrix);
+};
+
+template <>
+class ModelCvConvert<AffineModel> {
+ public:
+  // Returns 2x3 floating point cv::Mat with model parameters.
+  static void ToCvMat(const AffineModel& model, cv::Mat* matrix);
+};
+
+template <>
+class ModelCvConvert<Homography> {
+ public:
+  // Returns 3x3 floating point cv::Mat with model parameters.
+  static void ToCvMat(const Homography& model, cv::Mat* matrix);
+};
+
+template <class Model>
+void ModelToCvMat(const Model& model, cv::Mat* matrix) {
+  ModelCvConvert<Model>::ToCvMat(model, matrix);
+}
+
+}  // namespace mediapipe
+
+#endif  // MEDIAPIPE_UTIL_TRACKING_MOTION_MODELS_CV_H_
--- a/mediapipe/util/tracking/motion_models_test.cc
+++ b/mediapipe/util/tracking/motion_models_test.cc
@ -0,0 +1,335 @@
+// Copyright 2019 The MediaPipe Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "mediapipe/util/tracking/motion_models.h"
+
+#include "mediapipe/framework/deps/message_matchers.h"
+#include "mediapipe/framework/port/gmock.h"
+#include "mediapipe/framework/port/gtest.h"
+#include "mediapipe/framework/port/parse_text_proto.h"
+#include "mediapipe/framework/port/proto_ns.h"
+#include "mediapipe/framework/port/vector.h"
+#include "mediapipe/util/tracking/motion_estimation.h"
+
+namespace mediapipe {
+namespace {
+
+static const float kArrayFloat[] = {1, 2, 3, 4, 5, 6, 7, 8};
+static const double kArrayDouble[] = {1, 2, 3, 4, 5, 6, 7, 8};
+
+class MotionModelsTest : public ::testing::Test {};
+
+// Test from/to array and parameter indexing functions.
+template <class Model>
+void CheckFromArrayAndGetParameter(const char* model_zero_string,
+                                   const char* model_identity_string) {
+  Model model_zero;
+  Model model_identity;
+
+  ASSERT_TRUE(
+      proto_ns::TextFormat::ParseFromString(model_zero_string, &model_zero));
+  ASSERT_TRUE(proto_ns::TextFormat::ParseFromString(model_identity_string,
+                                                    &model_identity));
+
+  typedef ModelAdapter<Model> Adapter;
+
+  EXPECT_THAT(Adapter::FromFloatPointer(kArrayFloat, false),
+              mediapipe::EqualsProto(model_zero));
+
+  EXPECT_THAT(Adapter::FromFloatPointer(kArrayFloat, true),
+              mediapipe::EqualsProto(model_identity));
+
+  EXPECT_THAT(Adapter::FromDoublePointer(kArrayDouble, false),
+              mediapipe::EqualsProto(model_zero));
+
+  EXPECT_THAT(Adapter::FromDoublePointer(kArrayDouble, true),
+              mediapipe::EqualsProto(model_identity));
+
+  ASSERT_LE(Adapter::NumParameters(), 8);
+  for (int i = 0; i < Adapter::NumParameters(); ++i) {
+    EXPECT_EQ(kArrayFloat[i], Adapter::GetParameter(model_zero, i));
+  }
+}
+
+TEST_F(MotionModelsTest, FromArrayAndGetParameter) {
+  CheckFromArrayAndGetParameter<TranslationModel>("dx: 1 dy: 2", "dx: 1 dy: 2");
+
+  CheckFromArrayAndGetParameter<SimilarityModel>(
+      "dx: 1 dy: 2 scale: 3 rotation: 4", "dx: 1 dy: 2 scale: 4 rotation: 4");
+
+  CheckFromArrayAndGetParameter<LinearSimilarityModel>("dx: 1 dy: 2 a: 3 b: 4",
+                                                       "dx: 1 dy: 2 a: 4 b: 4");
+
+  CheckFromArrayAndGetParameter<AffineModel>("dx: 1 dy: 2 a: 3 b: 4 c: 5 d: 6",
+                                             "dx: 1 dy: 2 a: 4 b: 4 c: 5 d: 7");
+
+  CheckFromArrayAndGetParameter<Homography>(
+      "h_00: 1 h_01: 2 h_02: 3 "
+      "h_10: 4 h_11: 5 h_12: 6 "
+      "h_20: 7 h_21: 8         ",
+      "h_00: 2 h_01: 2 h_02: 3 "
+      "h_10: 4 h_11: 6 h_12: 6 "
+      "h_20: 7 h_21: 8         ");
+}
+
+// Test point transformations.
+template <class Model>
+void CheckTransformPoint(const char* model_string, float x_in, float y_in,
+                         float x_out, float y_out) {
+  Model model;
+  ASSERT_TRUE(proto_ns::TextFormat::ParseFromString(model_string, &model));
+
+  const Vector2_f output =
+      ModelAdapter<Model>::TransformPoint(model, Vector2_f(x_in, y_in));
+
+  EXPECT_NEAR(x_out, output.x(), 1e-5);
+  EXPECT_NEAR(y_out, output.y(), 1e-5);
+}
+
+TEST_F(MotionModelsTest, TransformPoint) {
+  CheckTransformPoint<TranslationModel>("dx: 0 dy: 0", 1, 1, 1, 1);
+  CheckTransformPoint<TranslationModel>("dx: 1 dy: -1", 1, 1, 2, 0);
+
+  CheckTransformPoint<SimilarityModel>(
+      "dx: 0 dy: 0 scale: 1 rotation: 1.57079633", 1, 2, -2, 1);
+  CheckTransformPoint<SimilarityModel>(
+      "dx: 1 dy: -1 scale: 1 rotation: 1.57079633", 1, 2, -1, 0);
+  CheckTransformPoint<SimilarityModel>(
+      "dx: 1 dy: -1 scale: 2 rotation: 1.57079633", 1, 2, -3, 1);
+
+  CheckTransformPoint<LinearSimilarityModel>("dx: 0 dy: 0 a: 1 b: -0.5", 1, 2,
+                                             2, 1.5);
+  CheckTransformPoint<LinearSimilarityModel>("dx: 0.5 dy: -0.5 a: 1 b: 0.5", 1,
+                                             2, 0.5, 2);
+  CheckTransformPoint<LinearSimilarityModel>("dx: 0.5 dy: -0.5 a: 0.5 b: 0.5",
+                                             1, 2, 0, 1);
+
+  CheckTransformPoint<AffineModel>("dx: 0 dy: 0 a: 1 b: 0.5 c: -0.5 d: 1", 1, 2,
+                                   2, 1.5);
+  CheckTransformPoint<AffineModel>("dx: 0.5 dy: -0.5 a: 2 b: -0.5 c: 0.5 d: 1",
+                                   1, 2, 1.5, 2);
+  CheckTransformPoint<AffineModel>("dx: 1 dy: -1 a: 2 b: -2 c: 1 d: -1", 1, 2,
+                                   -1, -2);
+
+  // Transformations by Homography are followed by divsion by the 3rd element.
+  // Test division by value != 1.
+  CheckTransformPoint<Homography>(
+      "h_00: 1  h_01: 2  h_02: 3 "
+      "h_10: 4  h_11: 3  h_12: 6 "
+      "h_20: 7  h_21: 8          ",
+      1, 2, 8.0 / 24.0, 16.0 / 24.0);
+  // Test division by 1.
+  CheckTransformPoint<Homography>(
+      "h_00: 1  h_01:  2  h_02: 3 "
+      "h_10: 4  h_11:  3  h_12: 6 "
+      "h_20: 2  h_21: -1          ",
+      1, 2, 8.0, 16.0);
+}
+
+// Test model inversions.
+template <class Model>
+void CheckInvert(const char* model_string, const char* inv_model_string) {
+  Model model;
+  ASSERT_TRUE(proto_ns::TextFormat::ParseFromString(model_string, &model));
+
+  Model inv_model_expected;
+  ASSERT_TRUE(proto_ns::TextFormat::ParseFromString(inv_model_string,
+                                                    &inv_model_expected));
+
+  typedef ModelAdapter<Model> Adapter;
+  const Model inv_model_actual = Adapter::Invert(model);
+
+  for (int i = 0; i < Adapter::NumParameters(); ++i) {
+    EXPECT_NEAR(Adapter::GetParameter(inv_model_expected, i),
+                Adapter::GetParameter(inv_model_actual, i), 1e-5)
+        << "Parameter index: " << i << " of total " << Adapter::NumParameters();
+  }
+}
+
+TEST_F(MotionModelsTest, Invert) {
+  CheckInvert<TranslationModel>("dx:  1 dy: -2", "dx: -1 dy:  2");
+
+  CheckInvert<SimilarityModel>("dx: 0 dy:  0 scale: 1   rotation:  1.57079633",
+                               "dx: 0 dy:  0 scale: 1   rotation: -1.57079633");
+  CheckInvert<SimilarityModel>("dx: 1 dy: -2 scale: 1   rotation:  1.57079633",
+                               "dx: 2 dy:  1 scale: 1   rotation: -1.57079633");
+  CheckInvert<SimilarityModel>("dx: 1 dy: -2 scale: 0.5 rotation:  1.57079633",
+                               "dx: 4 dy:  2 scale: 2   rotation: -1.57079633");
+
+  CheckInvert<LinearSimilarityModel>("dx:  1    dy:  2    a: 3    b:  4    ",
+                                     "dx: -0.44 dy: -0.08 a: 0.12 b: -0.16 ");
+
+  // Test division by value != 1.
+  CheckInvert<Homography>(
+      "h_00:  1     h_01:  2     h_02:  3 "
+      "h_10: -3     h_11: -2     h_12: -1 "
+      "h_20:  8     h_21: -1              ",
+      "h_00: -0.75  h_01: -1.25  h_02:  1 "
+      "h_10: -1.25  h_11: -5.75  h_12: -2 "
+      "h_20:  4.75  h_21:  4.25           ");
+  // Test division by 1.
+  CheckInvert<Homography>(
+      "h_00: -0.75  h_01: -1.25  h_02:  1 "
+      "h_10: -1.25  h_11: -5.75  h_12: -2 "
+      "h_20:  4.75  h_21:  4.25           ",
+      "h_00:  1     h_01:  2     h_02:  3 "
+      "h_10: -3     h_11: -2     h_12: -1 "
+      "h_20:  8     h_21: -1              ");
+}
+
+// Test model compositions.
+template <class Model>
+void CheckCompose(const char* model_string1, const char* model_string2,
+                  const char* composed_string) {
+  Model model1;
+  ASSERT_TRUE(proto_ns::TextFormat::ParseFromString(model_string1, &model1));
+
+  Model model2;
+  ASSERT_TRUE(proto_ns::TextFormat::ParseFromString(model_string2, &model2));
+
+  Model composed_expected;
+  ASSERT_TRUE(proto_ns::TextFormat::ParseFromString(composed_string,
+                                                    &composed_expected));
+
+  typedef ModelAdapter<Model> Adapter;
+  const Model composed_actual = Adapter::Compose(model1, model2);
+
+  for (int i = 0; i < Adapter::NumParameters(); ++i) {
+    EXPECT_NEAR(Adapter::GetParameter(composed_expected, i),
+                Adapter::GetParameter(composed_actual, i), 1e-5)
+        << "Parameter index: " << i << " of total " << Adapter::NumParameters();
+  }
+}
+
+TEST_F(MotionModelsTest, Compose) {
+  CheckCompose<TranslationModel>("dx:  1 dy: -2", "dx: -3 dy:  4",
+                                 "dx: -2 dy:  2");
+
+  CheckCompose<SimilarityModel>(
+      "dx:  1   dy:  2 scale: 0.5 rotation:  1.57079633 ",
+      "dx: -2   dy: -1 scale: 2   rotation: -1.57079633 ",
+      "dx:  1.5 dy:  1 scale: 1   rotation:  0          ");
+
+  CheckCompose<LinearSimilarityModel>("dx:  1   dy:  2   a: 0.5  b:  0.5  ",
+                                      "dx: -2   dy: -1   a: 2    b: -0.5  ",
+                                      "dx:  0.5 dy:  0.5 a: 1.25 b:  0.75 ");
+
+  // Test division by value != 1.
+  CheckCompose<Homography>(
+      "h_00:  1  h_01:  2    h_02:  3 "
+      "h_10:  4  h_11:  5    h_12:  6 "
+      "h_20:  1  h_21: -1             ",
+      "h_00: -3  h_01: -2    h_02: -1 "
+      "h_10: -4  h_11: -5    h_12: -2 "
+      "h_20:  7  h_21:  8             ",
+      "h_00:  5  h_01:  6    h_02: -1 "
+      "h_10:  5  h_11:  7.5  h_12: -4 "
+      "h_20:  4  h_21:  5.5           ");
+  // Test division by 1.
+  CheckCompose<Homography>(
+      "h_00:  1  h_01:  2  h_02:  3 "
+      "h_10:  4  h_11:  5  h_12:  6 "
+      "h_20:  2  h_21: -1           ",
+      "h_00: -3  h_01: -2  h_02: -1 "
+      "h_10: -4  h_11: -5  h_12: -2 "
+      "h_20:  7  h_21:  8           ",
+      "h_00: 10  h_01: 12  h_02: -2 "
+      "h_10: 10  h_11: 15  h_12: -8 "
+      "h_20:  5  h_21:  9           ");
+}
+
+// Test conversions between models and their affine representations, and
+// vice-versa.
+template <class Model>
+void CheckToFromAffine(const char* model_string, const char* affine_string) {
+  Model model;
+  ASSERT_TRUE(proto_ns::TextFormat::ParseFromString(model_string, &model));
+
+  AffineModel affine;
+  ASSERT_TRUE(proto_ns::TextFormat::ParseFromString(affine_string, &affine));
+
+  typedef ModelAdapter<Model> Adapter;
+  EXPECT_THAT(Adapter::ToAffine(model), mediapipe::EqualsProto(affine));
+  EXPECT_THAT(Adapter::FromAffine(affine), mediapipe::EqualsProto(model));
+}
+
+TEST_F(MotionModelsTest, ToFromAffine) {
+  CheckToFromAffine<TranslationModel>("dx: 1 dy: 2",
+                                      "dx: 1 dy: 2 a: 1 b: 0 c: 0 d: 1");
+
+  CheckToFromAffine<LinearSimilarityModel>("dx: 1 dy: 2 a: 3 b: -4",
+                                           "dx: 1 dy: 2 a: 3 b: 4 c: -4 d: 3");
+
+  CheckToFromAffine<AffineModel>("dx: 1 dy: 2 a: 3 b: 4 c: 5 d: 6",
+                                 "dx: 1 dy: 2 a: 3 b: 4 c: 5 d: 6");
+
+  CheckToFromAffine<Homography>(
+      "h_00: 3  h_01: 4  h_02: 1 "
+      "h_10: 5  h_11: 6  h_12: 2 "
+      "h_20: 0  h_21: 0          ",
+      "dx: 1 dy: 2 a: 3 b: 4 c: 5 d: 6");
+  Homography homography;
+  ASSERT_TRUE(
+      proto_ns::TextFormat::ParseFromString("h_00: 3  h_01: 4  h_02: 1 "
+                                            "h_10: 5  h_11: 6  h_12: 2 "
+                                            "h_20: 0  h_21: 0          ",
+                                            &homography));
+
+  EXPECT_TRUE(HomographyAdapter::IsAffine(homography));
+  homography.set_h_20(7);
+  homography.set_h_21(8);
+  EXPECT_FALSE(HomographyAdapter::IsAffine(homography));
+}
+
+TEST_F(MotionModelsTest, ProjectModels) {
+  // Express models w.r.t. center for easy testing.
+  LinearSimilarityModel center_trans =
+      LinearSimilarityAdapter::FromArgs(50, 50, 1, 0);
+  LinearSimilarityModel inv_center_trans =
+      LinearSimilarityAdapter::FromArgs(-50, -50, 1, 0);
+
+  // 20 x 10 translation with scaling of factor 2 and rotation.
+  LinearSimilarityModel lin_sim =
+      LinearSimilarityAdapter::FromArgs(20, 10, 2 * cos(0.2), 2 * sin(0.2));
+
+  LinearSimilarityModel lin_sim_center =
+      ModelCompose3(center_trans, lin_sim, inv_center_trans);
+
+  TranslationModel translation =
+      TranslationAdapter::ProjectFrom(lin_sim_center, 100, 100);
+  EXPECT_NEAR(translation.dx(), 20, 1e-3);
+  EXPECT_NEAR(translation.dy(), 10, 1e-3);
+
+  translation = ProjectViaFit<TranslationModel>(lin_sim_center, 100, 100);
+  EXPECT_NEAR(translation.dx(), 20, 1e-3);
+  EXPECT_NEAR(translation.dy(), 10, 1e-3);
+
+  Homography homog = HomographyAdapter::FromArgs(
+      1, 0, 10, 0, 1, 20, 5e-3, 1e-3);  // Perspective transform: yaw + pitch.
+
+  Homography homog_center =
+      ModelCompose3(HomographyAdapter::Embed(center_trans), homog,
+                    HomographyAdapter::Embed(inv_center_trans));
+  // Rendering:
+  // https://www.wolframalpha.com/input/?i=ListPlot%5B%7B+%7B7,-7%7D,+%7B108,16%7D,+%7B104,96%7D,+%7B12.5,+125%7D,+%7B7,-7%7D%5D
+
+  translation = TranslationAdapter::ProjectFrom(homog_center, 100, 100);
+  EXPECT_NEAR(translation.dx(), 10, 1e-3);
+  EXPECT_NEAR(translation.dy(), 20, 1e-3);
+
+  // TODO: Investigate how ProjectViaFit can yield similar result.
+}
+
+}  // namespace
+}  // namespace mediapipe
--- a/mediapipe/util/tracking/motion_saliency.cc
+++ b/mediapipe/util/tracking/motion_saliency.cc
@ -0,0 +1,700 @@
+// Copyright 2019 The MediaPipe Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "mediapipe/util/tracking/motion_saliency.h"
+
+#include <math.h>
+
+#include <algorithm>
+#include <cmath>
+#include <deque>
+#include <iterator>
+#include <list>
+#include <memory>
+#include <vector>
+
+#include "mediapipe/framework/port/logging.h"
+#include "mediapipe/util/tracking/camera_motion.h"
+#include "mediapipe/util/tracking/measure_time.h"
+#include "mediapipe/util/tracking/region_flow.h"
+#include "mediapipe/util/tracking/region_flow.pb.h"
+
+namespace mediapipe {
+
+MotionSaliency::MotionSaliency(const MotionSaliencyOptions& options,
+                               int frame_width, int frame_height)
+    : options_(options),
+      frame_width_(frame_width),
+      frame_height_(frame_height) {}
+
+MotionSaliency::~MotionSaliency() {}
+
+void MotionSaliency::SaliencyFromFeatures(
+    const RegionFlowFeatureList& feature_list,
+    std::vector<float>* irls_weights,  // optional.
+    SalientPointFrame* salient_frame) {
+  CHECK(salient_frame);
+  CHECK_EQ(frame_width_, feature_list.frame_width());
+  CHECK_EQ(frame_height_, feature_list.frame_height());
+
+  if (irls_weights) {
+    CHECK_EQ(feature_list.feature_size(), irls_weights->size());
+  }
+
+  if (feature_list.feature_size() < 1) {
+    return;
+  }
+
+  float max_irls_weight = 0;
+  if (irls_weights) {
+    max_irls_weight =
+        *std::max_element(irls_weights->begin(), irls_weights->end());
+  } else {
+    struct FeatureIRLSComparator {
+      bool operator()(const RegionFlowFeature& lhs,
+                      const RegionFlowFeature& rhs) const {
+        return lhs.irls_weight() < rhs.irls_weight();
+      }
+    };
+    max_irls_weight =
+        std::max_element(feature_list.feature().begin(),
+                         feature_list.feature().end(), FeatureIRLSComparator())
+            ->irls_weight();
+  }
+
+  // Max weight is too small for meaningful mode finding, terminate.
+  if (max_irls_weight < 1e-2f) {
+    return;
+  }
+
+  // Discard small weights that just slow clustering down.
+  const float irls_cutoff = max_irls_weight * 1e-2f;
+
+  int feat_idx = 0;
+
+  // Create SalientLocation's from input feature_list.
+  std::vector<SalientLocation> features;
+  for (const auto& src_feature : feature_list.feature()) {
+    const float weight =
+        irls_weights ? (*irls_weights)[feat_idx] : src_feature.irls_weight();
+    ++feat_idx;
+
+    // Discard all features with small measure or zero weight from mode finding.
+    if (weight < irls_cutoff) {
+      continue;
+    }
+
+    features.push_back(SalientLocation(FeatureLocation(src_feature), weight));
+  }
+
+  DetermineSalientFrame(features, salient_frame);
+}
+
+void MotionSaliency::SaliencyFromPoints(const std::vector<Vector2_f>* points,
+                                        const std::vector<float>* weights,
+                                        SalientPointFrame* salient_frame) {
+  // TODO: Handle vectors of size zero.
+  CHECK(salient_frame);
+  CHECK_EQ(points->size(), weights->size());
+
+  float max_weight = *std::max_element(weights->begin(), weights->end());
+
+  // Max weight is too small for meaningful mode finding, terminate.
+  if (max_weight < 1e-2f) {
+    return;
+  }
+
+  // Discard small weights that just slow clustering down.
+  const float weight_cutoff = max_weight * 1e-2f;
+
+  // Create SalientLocation's from input points.
+  std::vector<SalientLocation> features;
+  for (int point_idx = 0; point_idx < points->size(); ++point_idx) {
+    const float weight = (*weights)[point_idx];
+    // Discard all features with small measure or zero weight from mode finding.
+    if (weight < weight_cutoff) {
+      continue;
+    }
+
+    features.push_back(SalientLocation((*points)[point_idx], weight));
+  }
+
+  DetermineSalientFrame(features, salient_frame);
+}
+
+// We only keep those salient points that have neighbors along the temporal
+// dimension.
+void MotionSaliency::SelectSaliencyInliers(
+    std::vector<SalientPointFrame*>* motion_saliency,
+    bool rescale_to_median_saliency_weight) {
+  float scale = 1.0;
+
+  if (rescale_to_median_saliency_weight) {
+    // Compute median saliency weight across all frames, to rescale saliency.
+    std::vector<float> saliency_weights;
+    for (int i = 0; i < motion_saliency->size(); ++i) {
+      for (const auto& salient_point : (*motion_saliency)[i]->point()) {
+        saliency_weights.push_back(salient_point.weight());
+      }
+    }
+
+    // Nothing to filter in the frame chunk.
+    if (saliency_weights.empty()) {
+      return;
+    }
+
+    auto median_iter = saliency_weights.begin() + saliency_weights.size() / 2;
+    std::nth_element(saliency_weights.begin(), median_iter,
+                     saliency_weights.end());
+
+    const float median_weight = *median_iter;
+    if (median_weight > 0) {
+      scale = options_.saliency_weight() / median_weight;
+    }
+  }
+
+  SaliencyPointList inlier_saliency(motion_saliency->size());
+  const float sq_support_distance = options_.selection_support_distance() *
+                                    options_.selection_support_distance();
+
+  // Test each point salient point for inlierness.
+  for (int i = 0; i < motion_saliency->size(); ++i) {
+    for (const auto& salient_point : (*motion_saliency)[i]->point()) {
+      int support = 0;
+      Vector2_f salient_location(salient_point.norm_point_x(),
+                                 salient_point.norm_point_y());
+
+      // Find supporting points (saliency points close enough to current one)
+      // in adjacent frames. Linear Complexity.
+      for (int j = std::max<int>(0, i - options_.selection_frame_radius()),
+               end_j = std::min<int>(i + options_.selection_frame_radius(),
+                                     motion_saliency->size() - 1);
+
+           j <= end_j; ++j) {
+        if (i == j) {
+          continue;
+        }
+
+        for (const auto& compare_point : (*motion_saliency)[j]->point()) {
+          Vector2_f compare_location(compare_point.norm_point_x(),
+                                     compare_point.norm_point_y());
+
+          if ((salient_location - compare_location).Norm2() <=
+              sq_support_distance) {
+            ++support;
+          }
+        }
+      }  // end neighbor frames iteration.
+
+      if (support >= options_.selection_minimum_support()) {
+        SalientPoint* scaled_point = inlier_saliency[i].add_point();
+        scaled_point->CopyFrom(salient_point);
+        scaled_point->set_weight(scaled_point->weight() * scale);
+      }
+    }  // end point traversal.
+  }    // end frame traversal.
+
+  for (int k = 0; k < motion_saliency->size(); ++k) {
+    (*motion_saliency)[k]->Swap(&inlier_saliency[k]);
+  }
+}
+
+void MotionSaliency::FilterMotionSaliency(
+    std::vector<SalientPointFrame*>* saliency_point_list) {
+  CHECK(saliency_point_list != nullptr);
+  const float sigma_time = options_.filtering_sigma_time();
+  const float sigma_space = options_.filtering_sigma_space();
+
+  const int time_radius = ceil(sigma_time * 1.5);
+  const int time_diameter = 2 * time_radius + 1;
+
+  // Create lookup table for weights.
+  std::vector<float> time_weights(time_diameter);
+  const float time_coeff = -0.5f / (sigma_time * sigma_time);
+
+  for (int i = -time_radius, time_idx = 0; i <= time_radius; ++i, ++time_idx) {
+    time_weights[time_idx] = std::exp(time_coeff * i * i);
+  }
+
+  // Ignore points further than 1.65 sigmas away (includes 90% of distribution).
+  const float space_cutoff = 1.65 * sigma_space;
+  const float space_exp_scale = -0.5f / (sigma_space * sigma_space);
+
+  // Copy saliency points.
+  const int num_frames = saliency_point_list->size();
+  std::vector<SalientPointFrame> points(num_frames + 2 * time_radius);
+
+  for (int k = 0; k < saliency_point_list->size(); ++k) {
+    points[time_radius + k].CopyFrom(*(*saliency_point_list)[k]);
+  }
+
+  // Copy border.
+  std::copy(points.rbegin() + time_radius, points.rbegin() + 2 * time_radius,
+            points.end() - time_radius);
+  std::copy(points.begin() + time_radius, points.begin() + 2 * time_radius,
+            points.rend() - time_radius);
+
+  // Apply filter.
+  for (int i = time_radius; i < num_frames + time_radius; ++i) {
+    const int frame_idx = i - time_radius;
+    for (auto& sample_point :
+         *(*saliency_point_list)[frame_idx]->mutable_point()) {
+      Vector2_f point_sum(0, 0);
+      // Sum for left, bottom, right, top tuple.
+      Vector4_f bound_sum;
+      Vector3_f ellipse_sum(0, 0, 0);  // Captures major, minor and angle.
+      float weight_sum = 0;
+      float filter_sum = 0;
+
+      const float sample_angle = sample_point.angle();
+      for (int k = i - time_radius, time_idx = 0; k <= i + time_radius;
+           ++k, ++time_idx) {
+        for (const auto& test_point : points[k].point()) {
+          const float diff = std::hypot(
+              test_point.norm_point_y() - sample_point.norm_point_y(),
+              test_point.norm_point_x() - sample_point.norm_point_x());
+          if (diff > space_cutoff) {
+            continue;
+          }
+
+          const float weight = time_weights[time_idx] * test_point.weight() *
+                               std::exp(diff * diff * space_exp_scale);
+
+          filter_sum += weight;
+          point_sum +=
+              Vector2_f(test_point.norm_point_x(), test_point.norm_point_y()) *
+              weight;
+          bound_sum += Vector4_f(test_point.left(), test_point.bottom(),
+                                 test_point.right(), test_point.top()) *
+                       weight;
+          weight_sum += test_point.weight() * weight;
+
+          // Ensure test_point and sample are less than pi / 2 apart.
+          float test_angle = test_point.angle();
+          if (fabs(test_angle - sample_angle) > M_PI / 2) {
+            if (sample_angle > M_PI / 2) {
+              test_angle += M_PI;
+            } else {
+              test_angle -= M_PI;
+            }
+          }
+
+          ellipse_sum += Vector3_f(test_point.norm_major(),
+                                   test_point.norm_minor(), test_angle) *
+                         weight;
+        }
+      }
+
+      if (filter_sum > 0) {
+        const float inv_filter_sum = 1.0f / filter_sum;
+        point_sum *= inv_filter_sum;
+        bound_sum *= inv_filter_sum;
+        weight_sum *= inv_filter_sum;
+        ellipse_sum *= inv_filter_sum;
+      }
+
+      sample_point.set_norm_point_x(point_sum.x());
+      sample_point.set_norm_point_y(point_sum.y());
+      sample_point.set_left(bound_sum.x());
+      sample_point.set_bottom(bound_sum.y());
+      sample_point.set_right(bound_sum.z());
+      sample_point.set_top(bound_sum.w());
+
+      sample_point.set_weight(weight_sum);
+      sample_point.set_norm_major(ellipse_sum.x());
+      sample_point.set_norm_minor(ellipse_sum.y());
+      sample_point.set_angle(ellipse_sum.z());
+
+      if (sample_point.angle() > M_PI) {
+        sample_point.set_angle(sample_point.angle() - M_PI);
+      }
+      if (sample_point.angle() < 0) {
+        sample_point.set_angle(sample_point.angle() + M_PI);
+      }
+    }
+  }
+}
+
+void MotionSaliency::CollapseMotionSaliency(
+    const SaliencyPointList& input_saliency, const Vector4_f& bounds,
+    SaliencyPointList* output_saliency) {
+  CHECK(output_saliency);
+  output_saliency->clear();
+  output_saliency->resize(input_saliency.size());
+
+  for (int f = 0; f < input_saliency.size(); ++f) {  // traverse frames.
+    Vector2_f mean_saliency(0, 0);
+    float weight_sum = 0;
+    for (const auto& salient_point : input_saliency[f].point()) {
+      mean_saliency +=
+          Vector2_f(salient_point.norm_point_x(), salient_point.norm_point_y());
+      weight_sum += 1;
+    }
+
+    if (weight_sum > 0) {
+      SalientPoint* collapsed = (*output_saliency)[f].add_point();
+      collapsed->set_norm_point_x(mean_saliency.x() / weight_sum);
+      collapsed->set_norm_point_y(mean_saliency.y() / weight_sum);
+      collapsed->set_left(bounds.x());
+      collapsed->set_bottom(bounds.y());
+      collapsed->set_right(bounds.z());
+      collapsed->set_top(bounds.w());
+      collapsed->set_weight(1.0f);
+    }
+  }
+}
+
+namespace {
+
+// Describes feature mode for a feature in a RegionFlowFeatureList stored
+// at index feature_idx.
+struct FeatureMode {
+  Vector2_f location;
+  float irls_weight;
+  int feature_idx;
+  int mode_bin;
+};
+
+// Determines mode for each feature in feature_view.
+// Returns modes as list of pointers in mode_ptrs. Actual modes are stored
+// binned into a grid of equal size as the passed FeatureGrid
+// (bins of size grid_resolution x grid_resolution).
+void DetermineFeatureModes(
+    const FeatureFrame<MotionSaliency::SalientLocation>& features,
+    float grid_resolution, const Vector2_i& grid_dims, float band_width,
+    const FeatureGrid<MotionSaliency::SalientLocation>& feature_grid,
+    const std::vector<std::vector<int>>& feature_taps,
+    const std::vector<float>& space_lut, float space_scale,
+    std::vector<std::list<FeatureMode>>* mode_grid,
+    std::vector<FeatureMode*>* mode_ptrs) {
+  CHECK(mode_grid);
+  CHECK(mode_ptrs);
+  const int num_features = features.size();
+  mode_ptrs->reserve(num_features);
+
+  const float grid_scale = 1.0f / grid_resolution;
+  int feature_idx = 0;
+  const int kMaxIter = 100;
+  // Set convergence radius to 0.1% of bandwidth.
+  const float sq_conv_radius = band_width * band_width * 1e-6f;
+  for (const auto& feature_ptr : features) {
+    Vector2_f center = feature_ptr->pt;
+    int iter = 0;
+    for (; iter < kMaxIter; ++iter) {
+      const int bin_x = center.x() * grid_scale;
+      const int bin_y = center.y() * grid_scale;
+      const int grid_loc = bin_y * grid_dims.x() + bin_x;
+
+      float sum_weight = 0;
+      Vector2_f new_center;
+      for (const auto& bin : feature_taps[grid_loc]) {
+        for (const auto& test_feat_ptr : feature_grid[bin]) {
+          const float dist = (test_feat_ptr->pt - center).Norm();
+          const float weight = space_lut[static_cast<int>(dist * space_scale)] *
+                               test_feat_ptr->weight;
+          sum_weight += weight;
+          new_center += weight * test_feat_ptr->pt;
+        }
+      }
+
+      if (sum_weight > 0) {
+        new_center *= (1.0f / sum_weight);
+        if ((center - new_center).Norm2() < sq_conv_radius) {
+          center = new_center;
+          break;
+        } else {
+          center = new_center;
+        }
+      } else {
+        LOG(WARNING) << "No features found in band_width radius, "
+                     << "should not happen. ";
+        break;
+      }
+    }
+
+    const int mode_bin_x = center.x() * grid_scale;
+    const int mode_bin_y = center.y() * grid_scale;
+    const int mode_grid_loc = mode_bin_y * grid_dims.x() + mode_bin_x;
+    FeatureMode mode{center, feature_ptr->weight, feature_idx, mode_grid_loc};
+
+    (*mode_grid)[mode_grid_loc].push_back(mode);
+    FeatureMode* added_mode = &(*mode_grid)[mode_grid_loc].back();
+    (*mode_ptrs).push_back(added_mode);
+    ++feature_idx;
+  }
+}
+
+}  // namespace.
+
+void MotionSaliency::SalientModeFinding(std::vector<SalientLocation>* locations,
+                                        std::vector<SalientMode>* modes) {
+  CHECK(modes);
+  CHECK(locations);
+  if (locations->empty()) {
+    return;
+  }
+
+  // Scale band_width to image domain.
+  const float band_width =
+      hypot(frame_width_, frame_height_) * options_.mode_band_width();
+
+  // Select all salient locations with non-zero weight.
+  FeatureFrame<SalientLocation> salient_features;
+  salient_features.reserve(locations->size());
+  for (auto& loc : *locations) {
+    if (loc.weight > 1e-6) {
+      salient_features.push_back(&loc);
+    }
+  }
+
+  const int num_features = salient_features.size();
+  if (num_features == 0) {
+    return;
+  }
+
+  // Build feature grid according to bandwith.
+  std::vector<FeatureGrid<SalientLocation>> feature_grids;
+  std::vector<std::vector<int>> feature_taps;
+
+  // Guarantee at least 1.5 sigmas in each direction are captured with
+  // tap 3 filtering (86 % of the data).
+  const float grid_resolution = 1.5f * band_width;
+  Vector2_i grid_dims;
+  BuildFeatureGrid(
+      frame_width_, frame_height_, grid_resolution, {salient_features},
+      [](const SalientLocation& l) -> Vector2_f { return l.pt; }, &feature_taps,
+      nullptr, &grid_dims, &feature_grids);
+
+  // Just one frame input, expect one grid as output.
+  CHECK_EQ(1, feature_grids.size());
+  const auto& feature_grid = feature_grids[0];
+
+  // Setup Gaussian LUT for smoothing in space, using 2^10 discretization bins.
+  const int lut_bins = 1 << 10;
+  std::vector<float> space_lut(lut_bins);
+
+  // Using 3 tap smoothing, max distance is 2 bin diagonals.
+  // We use maximum of 2 * sqrt(2) * bin_radius plus 1% room in case maximum
+  // value is attained.
+  const float max_space_diff = sqrt(2.0) * 2.f * grid_resolution * 1.01f;
+
+  const float space_bin_size = max_space_diff / lut_bins;
+  const float space_scale = 1.0f / space_bin_size;
+  const float space_coeff = -0.5f / (band_width * band_width);
+  for (int i = 0; i < lut_bins; ++i) {
+    const float value = i * space_bin_size;
+    space_lut[i] = std::exp(value * value * space_coeff);
+  }
+
+  // Store modes for each grid bin (to be averaged later).
+  std::vector<std::list<FeatureMode>> mode_grid(grid_dims.x() * grid_dims.y());
+  std::vector<FeatureMode*> mode_ptrs;
+
+  DetermineFeatureModes(salient_features, grid_resolution, grid_dims,
+                        band_width, feature_grid, feature_taps, space_lut,
+                        space_scale, &mode_grid, &mode_ptrs);
+
+  // Read out modes, ordered by decreasing weight.
+  struct FeatureModeComparator {
+    bool operator()(const FeatureMode* mode_lhs,
+                    const FeatureMode* mode_rhs) const {
+      return mode_lhs->irls_weight > mode_rhs->irls_weight;
+    }
+  };
+
+  // Sort pointers, to keep order immutable during flagging operations.
+  std::sort(mode_ptrs.begin(), mode_ptrs.end(), FeatureModeComparator());
+
+  for (int m = 0; m < mode_ptrs.size(); ++m) {
+    // We mark a mode as processed by assigning -1 to its index.
+    if (mode_ptrs[m]->feature_idx < 0) {
+      continue;
+    }
+
+    FeatureMode* mode = mode_ptrs[m];
+
+    // Average modes within band_width based on irls_weight * spatial weight.
+    double sum_weight = mode->irls_weight;
+    double mode_x = sum_weight * mode->location.x();
+    double mode_y = sum_weight * mode->location.y();
+
+    const Vector2_f& feat_loc = salient_features[mode->feature_idx]->pt;
+    double feat_x = sum_weight * feat_loc.x();
+    double feat_y = sum_weight * feat_loc.y();
+    double feat_xx = sum_weight * feat_loc.x() * feat_loc.x();
+    double feat_xy = sum_weight * feat_loc.x() * feat_loc.y();
+    double feat_yy = sum_weight * feat_loc.y() * feat_loc.y();
+
+    mode->feature_idx = -1;  // Flag as processed, does not change order
+                             // of traversal.
+
+    for (const auto& bin : feature_taps[mode->mode_bin]) {
+      for (auto& test_mode : mode_grid[bin]) {
+        if (test_mode.feature_idx >= 0) {
+          const float dist = (test_mode.location - mode->location).Norm();
+          if (dist <= band_width) {
+            const Vector2_f test_loc =
+                salient_features[test_mode.feature_idx]->pt;
+            const float weight =
+                space_lut[static_cast<int>(dist * space_scale)] *
+                test_mode.irls_weight;
+
+            sum_weight += weight;
+            mode_x += weight * test_mode.location.x();
+            mode_y += weight * test_mode.location.y();
+
+            const float test_loc_x_w = weight * test_loc.x();
+            const float test_loc_y_w = weight * test_loc.y();
+            feat_x += test_loc_x_w;
+            feat_y += test_loc_y_w;
+
+            feat_xx += test_loc_x_w * test_loc.x();
+            feat_xy += test_loc_x_w * test_loc.y();
+            feat_yy += test_loc_y_w * test_loc.y();
+
+            // Flag as processed, does not change order of traversal.
+            test_mode.feature_idx = -1;
+          }
+        }
+      }
+    }
+
+    if (sum_weight >= options_.min_irls_mode_weight()) {
+      double inv_sum_weight = 1.0f / sum_weight;
+      mode_x *= inv_sum_weight;
+      mode_y *= inv_sum_weight;
+      feat_x *= inv_sum_weight;
+      feat_y *= inv_sum_weight;
+      feat_xx *= inv_sum_weight;
+      feat_xy *= inv_sum_weight;
+      feat_yy *= inv_sum_weight;
+
+      // Covariance matrix.
+      const float a = feat_xx - 2.0 * feat_x * mode_x + mode_x * mode_x;
+      const float bc =
+          feat_xy - feat_x * mode_y - feat_y * mode_x + mode_x * mode_y;
+      const float d = feat_yy - 2.0 * feat_y * mode_y + mode_y * mode_y;
+
+      Vector2_f axis_magnitude;
+      float angle;
+      if (!EllipseFromCovariance(a, bc, d, &axis_magnitude, &angle)) {
+        angle = 0;
+        axis_magnitude = Vector2_f(1, 1);
+      } else {
+        if (angle < 0) {
+          angle += M_PI;
+        }
+        CHECK_GE(angle, 0);
+        CHECK_LE(angle, M_PI + 1e-3);
+      }
+
+      SalientMode irls_mode;
+      irls_mode.location = Vector2_f(mode_x, mode_y);
+      irls_mode.assignment_weight = sum_weight;
+      irls_mode.axis_magnitude = axis_magnitude;
+      irls_mode.angle = angle;
+      modes->push_back(irls_mode);
+    }
+  }
+
+  // Sort modes by descreasing weight.
+  struct ModeWeightCompare {
+    bool operator()(const SalientMode& lhs, const SalientMode& rhs) const {
+      return lhs.assignment_weight > rhs.assignment_weight;
+    }
+  };
+
+  std::sort(modes->begin(), modes->end(), ModeWeightCompare());
+}
+
+// Determines the salient frame for a list of SalientLocations by performing
+// mode finding and scales each point based on frame size.
+void MotionSaliency::DetermineSalientFrame(
+    std::vector<SalientLocation> locations, SalientPointFrame* salient_frame) {
+  CHECK(salient_frame);
+
+  std::vector<SalientMode> modes;
+  {
+    MEASURE_TIME << "Mode finding";
+    SalientModeFinding(&locations, &modes);
+  }
+
+  const float denom_x = 1.0f / frame_width_;
+  const float denom_y = 1.0f / frame_height_;
+
+  // Convert to salient points.
+  for (int mode_idx = 0,
+           mode_sz = std::min<int>(modes.size(), options_.num_top_irls_modes());
+       mode_idx < mode_sz; ++mode_idx) {
+    SalientPoint* pt = salient_frame->add_point();
+    pt->set_norm_point_x(modes[mode_idx].location.x());
+    pt->set_norm_point_y(modes[mode_idx].location.y());
+    pt->set_left(options_.bound_left());
+    pt->set_bottom(options_.bound_bottom());
+    pt->set_right(options_.bound_right());
+    pt->set_top(options_.bound_top());
+
+    pt->set_norm_major(modes[mode_idx].axis_magnitude.x());
+    pt->set_norm_minor(modes[mode_idx].axis_magnitude.y());
+    pt->set_angle(modes[mode_idx].angle);
+    pt->set_weight(modes[mode_idx].assignment_weight *
+                   options_.saliency_weight());
+
+    ScaleSalientPoint(denom_x, denom_y, pt);
+  }
+}
+
+void ForegroundWeightsFromFeatures(const RegionFlowFeatureList& feature_list,
+                                   float foreground_threshold,
+                                   float foreground_gamma,
+                                   const CameraMotion* camera_motion,
+                                   std::vector<float>* weights) {
+  CHECK(weights != nullptr);
+  weights->clear();
+
+  constexpr float kEpsilon = 1e-4f;
+
+  CHECK_GT(foreground_threshold, 0.0f);
+  if (camera_motion) {
+    foreground_threshold *=
+        std::max(kEpsilon, InlierCoverage(*camera_motion, false));
+  }
+
+  const float weight_denom = 1.0f / foreground_threshold;
+
+  // Map weights to foreground measure and determine minimum irls weight.
+  for (const auto& feature : feature_list.feature()) {
+    // Skip marked outliers.
+    if (feature.irls_weight() == 0) {
+      weights->push_back(0.0f);
+      continue;
+    }
+
+    // Maps an irls_weight of magnitude weight_denom (from above) to zero,
+    // with values below weight_denom assigned linearly mapped (zero is mapped
+    // ot 1). Avoid mapping to zero as it used to mark outliers.
+    const float foreground_measure =
+        std::max(0.0f, 1.0f - feature.irls_weight() * weight_denom);
+
+    if (std::abs(foreground_gamma - 1.0f) < 1e-3f) {
+      weights->push_back(std::max(kEpsilon, foreground_measure));
+    } else {
+      weights->push_back(
+          std::max(kEpsilon, std::pow(foreground_measure, foreground_gamma)));
+    }
+  }
+  CHECK_EQ(feature_list.feature_size(), weights->size());
+}
+
+}  // namespace mediapipe
--- a/mediapipe/util/tracking/motion_saliency.h
+++ b/mediapipe/util/tracking/motion_saliency.h
@ -0,0 +1,150 @@
+// Copyright 2019 The MediaPipe Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Computes MotionSaliency points that can be used for stabilization and
+// retargeting.
+
+#ifndef MEDIAPIPE_UTIL_TRACKING_MOTION_SALIENCY_H_
+#define MEDIAPIPE_UTIL_TRACKING_MOTION_SALIENCY_H_
+
+#include <utility>
+#include <vector>
+
+#include "mediapipe/framework/port/vector.h"
+#include "mediapipe/util/tracking/motion_saliency.pb.h"
+#include "mediapipe/util/tracking/region_flow.h"
+
+namespace mediapipe {
+class RegionFlowFeatureList;
+class RegionFlowFrame;
+class SalientPointFrame;
+}  // namespace mediapipe
+
+namespace mediapipe {
+
+class MotionSaliency {
+ public:
+  MotionSaliency(const MotionSaliencyOptions& options, int frame_width,
+                 int frame_height);
+  ~MotionSaliency();
+
+  // Finds modes in the RegionFlowFeatureList (clusters for high IRLS weight,
+  // per default features agreeing with the background motion).
+  // Optionally, per feature irls weights can be supplied instead of using the
+  // features weight to adapt modes that will be found, e.g. see
+  // ForegroundWeightsFromFeatures below.
+  void SaliencyFromFeatures(const RegionFlowFeatureList& feature_list,
+                            std::vector<float>* irls_weights,  // optional.
+                            SalientPointFrame* salient_frame);
+
+  // Finds saliency points (modes) from a list of points and their respective
+  // weights, outputs a SalientPointFrame.
+  void SaliencyFromPoints(const std::vector<Vector2_f>* points,
+                          const std::vector<float>* weights,
+                          SalientPointFrame* salient_frame);
+
+  // Selects saliency inliers, by searching for close-by salient points
+  // (within fractional MotionSaliencyOptions::filtering_support_distance)
+  // across adjacent frames (considered are
+  // #MotionSaliencyOptions::filtering_frame_radius before and after the
+  // current frame).
+  // If at least #MotionSaliencyOptions::filtering_minimum_support
+  // supporting points are found the tested salient point is kept, otherwise
+  // discarded.
+  // If desired performs rescaling, such that the median salient point weight
+  // equals MotionSaliencyOptions::saliency_weight().
+  void SelectSaliencyInliers(std::vector<SalientPointFrame*>* motion_saliency,
+                             bool rescale_to_median_saliency_weight);
+
+  // Averages all salient points (unweighted average) per frame. The resulting
+  // mean salient point is assigned weight one, and the specified normalized
+  // bounds (as tuple left, bottom, right, top).
+  void CollapseMotionSaliency(const SaliencyPointList& input_saliency,
+                              const Vector4_f& bounds,
+                              SaliencyPointList* output_saliency);
+
+  // Smooths saliency in space and time.
+  void FilterMotionSaliency(
+      std::vector<SalientPointFrame*>* saliency_point_list);
+
+  // Aggregates location in image domain and salient weight.
+  struct SalientLocation {
+    SalientLocation() {}
+    SalientLocation(const Vector2_f& _pt, float _weight)
+        : pt(_pt), weight(_weight) {}
+    Vector2_f pt;
+    float weight = 0;
+  };
+
+ private:
+  // Locates modes in a set of SalientLocation's.
+  // (using mean shift with bilateral weights, i.e. weight * spatial
+  // gaussian weighting).
+  // Only modes with for which the sum of total saliency weight is
+  // above min_irls_mode_sum are returned.
+  // Returns modes in the image domain as 2D points, sum of their
+  // assignment weights and spatial extend along major and minor axis.
+  // Modes are sorted w.r.t. their assignment irls weights (from highest to
+  // lowest).
+  struct SalientMode {
+    Vector2_f location;
+    // Total sum of irls weights assigned to this mode.
+    float assignment_weight = 0;
+    // Magnitude of major and minor axis storred in x and y, respectively.
+    Vector2_f axis_magnitude;
+    // Angle in radians w.r.t. x-axis.
+    float angle = 0;
+  };
+
+  // Note: input vector locations is not mutated by function.
+  void SalientModeFinding(std::vector<SalientLocation>* locations,
+                          std::vector<SalientMode>* modes);
+
+  // Determines the salient frame for a list of SalientLocations by performing
+  // mode finding and scaling each point based on frame size.
+  void DetermineSalientFrame(std::vector<SalientLocation> locations,
+                             SalientPointFrame* salient_frame);
+
+  MotionSaliencyOptions options_;
+  int frame_width_;
+  int frame_height_;
+};
+
+// Returns foregroundness weights in [0, 1] for each feature, by mapping irls
+// weight to foreground score in [0, 1].
+// In particular, the foreground threshold indicates the *inverse* registration
+// error (i.e. the irls weight) that is deemed a complete inlier.
+// Weights in the interval [0, foreground_threshold] (corresponding to
+// pixel errors in the interval [1 / foreground_threshold, inf])
+// are mapped to 1 - [0, 1], i.e. foreground threshold is mapped to zero
+// with weights below the threshold being assigned values > 0.
+// Therefore, larger values will increase amount of detected foreground
+// as well as noise.
+// In addition, foreground_gamma's < 1 can be used to increase the resolution
+// of small foreground motions (irls weight close to the foreground_threshold)
+// at the expense of larger foreground motions (irls weight close to zero).
+// If optional parameter camera_motion is specified, the passed foreground
+// threshold is scaled by the InlierCoverage of the camera_motion
+// (which is in 0, 1). That is for unstable frames with small coverage,
+// the threshold is tighter and fewer features are considered foreground.
+void ForegroundWeightsFromFeatures(
+    const RegionFlowFeatureList& feature_list,
+    float foreground_threshold,         // 0.5 is a good default value.
+    float foreground_gamma,             // use 1.0 for default
+    const CameraMotion* camera_motion,  // optional, can be nullptr.
+    std::vector<float>* weights);
+
+}  // namespace mediapipe
+
+#endif  // MEDIAPIPE_UTIL_TRACKING_MOTION_SALIENCY_H_
--- a/mediapipe/util/tracking/motion_saliency.proto
+++ b/mediapipe/util/tracking/motion_saliency.proto
@ -0,0 +1,71 @@
+// Copyright 2019 The MediaPipe Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+syntax = "proto2";
+
+package mediapipe;
+
+// Next tag: 17
+message MotionSaliencyOptions {
+  // Standard normalized bounds and weights used to initialize salient points.
+  // See region_flow.proto for details.
+  optional float bound_left = 1 [default = 0.3];
+  optional float bound_bottom = 2 [default = 0.3];
+  optional float bound_right = 15 [default = 0.3];
+  optional float bound_top = 16 [default = 0.3];
+
+  optional float saliency_weight = 3 [default = 20];
+
+  // If set, scales saliency_weight by flow magnitude.
+  optional bool scale_weight_by_flow_magnitude = 8 [default = false];
+
+  // Minimum number of features within a region to be considered salient.
+  // Only applicable for functions accepting RegionFlowFrames.
+  optional int32 min_features = 4 [default = 5];
+
+  // If set, only considers regions flagged as forground.
+  optional bool use_only_foreground_regions = 9 [default = false];
+
+  // Specifies roughly number of foreground features mapped to one mode,
+  // for mode to be considered salient.
+  optional float min_irls_mode_weight = 10 [default = 10];
+
+  // Only returns the top N irls modes.
+  optional int32 num_top_irls_modes = 11 [default = 3];
+
+  // Mode finding is performed with a fraction radius of 10% of frame
+  // diameter by default.
+  optional float mode_band_width = 12 [default = 0.10];
+
+  // We filter salient points along the temporal dimension only, keeping those
+  // that have sufficient support (in form of neighboring salient points). For
+  // every salient point in frame n, all points in frames
+  // [n - filtering_frame_radius, n + filtering_frame_radius] are tested,
+  // whether they support the current test point.
+  optional int32 selection_frame_radius = 5 [default = 5];
+
+  // Fractional distance to be considered a supporting salient point for a test
+  // point.
+  optional float selection_support_distance = 6 [default = 0.2];
+
+  // Minimum number of supporting salient points that need to be present in
+  // order for a point to be considered an inlier.
+  optional int32 selection_minimum_support = 7 [default = 4];
+
+  // Sigma in space (normalized domain).
+  optional float filtering_sigma_space = 13 [default = 0.05];
+
+  // Sigma in time (in frames).
+  optional float filtering_sigma_time = 14 [default = 5];
+}
--- a/mediapipe/util/tracking/parallel_invoker.cc
+++ b/mediapipe/util/tracking/parallel_invoker.cc
@ -0,0 +1,36 @@
+// Copyright 2019 The MediaPipe Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "mediapipe/util/tracking/parallel_invoker.h"
+
+// Choose between ThreadPool, OpenMP and serial execution.
+// Note only one parallel_using_* directive can be active.
+int flags_parallel_invoker_mode = PARALLEL_INVOKER_MAX_VALUE;
+int flags_parallel_invoker_max_threads = 4;
+
+namespace mediapipe {
+
+#if defined(PARALLEL_INVOKER_ACTIVE)
+ThreadPool* ParallelInvokerThreadPool() {
+  static ThreadPool* pool = []() -> ThreadPool* {
+    ThreadPool* new_pool =
+        new ThreadPool("ParallelInvoker", flags_parallel_invoker_max_threads);
+    new_pool->StartWorkers();
+    return new_pool;
+  }();
+  return pool;
+}
+#endif
+
+}  // namespace mediapipe
--- a/mediapipe/util/tracking/parallel_invoker.h
+++ b/mediapipe/util/tracking/parallel_invoker.h
@ -0,0 +1,507 @@
+// Copyright 2019 The MediaPipe Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Parallel for loop execution.
+// For details adapt parallel_using_* flags defined in parallel_invoker.cc.
+
+// Usage example (for 1D):
+
+// Define Functor or lambda function that implements:
+// void operator()(const BlockedRange & range) const;
+// (in addition functor needs to be copyable).
+
+// Execute a for loop in parallel from 0 to N via:
+// ParallelFor(0,              // start_index
+//             num_frames,     // end_index, exclusive
+//             1               // number of elements processed per iteration
+//             [](const BlockedRange& range) {
+//     // Process per-thread sub-range
+//     for (int i = range.begin(); i < range.end(); ++i) {
+//       // Process i'th item.
+//     }
+//  }
+
+// Specific implementation to copy a vector of images in parallel.
+// class CopyInvoker {
+//  public:
+//   CopyInvoker(const vector<cv::Mat>& inputs,
+//               vector<cv::Mat*>* outputs)
+//       : inputs_(inputs), outputs_(outputs) {
+//   }
+//   CopyInvoker(const CopyInvoker& rhs)
+//       : inputs_(rhs.inputs_), outputs_(rhs.outputs) {
+//   }
+//   void operator()(const BlockedRange& range) {
+//     for (int frame = range.begin(); frame < range.end(); ++frame) {
+//       inputs_[frame].copyTo(*(*outputs_)[frame]);
+//     }
+//   }
+//  private:
+//   const vector<cv::Mat>& inputs_;
+//   vector<cv::Mat*>* outputs_;
+// }
+
+// vector<cv::Mat> inputs;
+// vector<cv::Mat*> outputs;
+// ParallelFor(0, num_frames, 1, CopyInvoker(inputs, &outputs));
+//
+// OR (with lambdas):
+// ParallelFor(0, num_frames, 1,
+//             [&inputs, &outputs](const BlockedRange& range) {
+//     for (int frame = range.begin(); frame < range.end(); ++frame) {
+//       inputs[frame].copyTo(*(outputs)[frame]);
+//     }
+// }
+
+#ifndef MEDIAPIPE_UTIL_TRACKING_PARALLEL_INVOKER_H_
+#define MEDIAPIPE_UTIL_TRACKING_PARALLEL_INVOKER_H_
+
+#include <stddef.h>
+
+#include <memory>
+
+#include "absl/synchronization/mutex.h"
+#include "mediapipe/framework/port/logging.h"
+
+#ifdef PARALLEL_INVOKER_ACTIVE
+#include "mediapipe/framework/port/threadpool.h"
+
+#ifdef __APPLE__
+#include <dispatch/dispatch.h>
+#include <stdatomic.h>
+#endif
+
+#endif  // PARALLEL_INVOKER_ACTIVE
+
+// Specifies parallelization implementation to use.
+enum PARALLEL_INVOKER_MODE {
+  PARALLEL_INVOKER_NONE = 0,         // Uses single threaded execution
+  PARALLEL_INVOKER_THREAD_POOL = 1,  // Uses //thread/threadpool
+  PARALLEL_INVOKER_OPENMP = 2,       // Uses OpenMP (requires compiler support)
+  PARALLEL_INVOKER_GCD = 3,          // Uses GCD (Apple)
+  PARALLEL_INVOKER_MAX_VALUE = 4,    // Increase when adding more modes
+};
+
+extern int flags_parallel_invoker_mode;
+extern int flags_parallel_invoker_max_threads;
+
+// Note flag: Parallel processing only activated if
+// PARALLEL_INVOKER_ACTIVE is defined.
+
+namespace mediapipe {
+
+// Partitions the range [begin, end) into equal blocks of size grain_size each
+// (except last one, might be less than grain_size).
+class BlockedRange {
+ public:
+  BlockedRange(int begin, int end, int grain_size)
+      : begin_(begin), end_(end), grain_size_(grain_size) {}
+
+  int begin() const { return begin_; }
+  int end() const { return end_; }
+  int grain_size() const { return grain_size_; }
+
+ private:
+  int begin_;
+  int end_;
+  int grain_size_;
+};
+
+// Partitions the range row_range x col_range into equal
+// blocks of size row_range.grain_size() x col_range.grain_size() each
+// (except last column and row might be of size less than grain_size in one
+// or both of their dimensions).
+class BlockedRange2D {
+ public:
+  BlockedRange2D(const BlockedRange& rows, const BlockedRange& cols)
+      : rows_(rows), cols_(cols) {}
+
+  const BlockedRange& rows() const { return rows_; }
+  const BlockedRange& cols() const { return cols_; }
+
+ private:
+  BlockedRange rows_;
+  BlockedRange cols_;
+};
+
+#ifdef PARALLEL_INVOKER_ACTIVE
+
+// Singleton ThreadPool for parallel invoker.
+ThreadPool* ParallelInvokerThreadPool();
+
+#ifdef __APPLE__
+// Enable to allow GCD as an option beside ThreadPool.
+#define USE_PARALLEL_INVOKER_GCD 1
+#define CHECK_GCD_PARALLEL_WORK_COUNT DEBUG
+
+template <class Invoker>
+class ParallelInvokerGCDContext {
+ public:
+  ParallelInvokerGCDContext(const Invoker& invoker, const BlockedRange& rows)
+      : local_invoker_(invoker), rows_(rows) {
+#if CHECK_GCD_PARALLEL_WORK_COUNT
+    count_ = 0;
+#endif
+  }
+
+  const Invoker& invoker() {
+#if CHECK_GCD_PARALLEL_WORK_COUNT
+    // Implicitly tracking the # of launched tasks at invoker retrieval.
+    atomic_fetch_add(&count_, 1);
+#endif
+    return local_invoker_;
+  }
+  const BlockedRange& rows() const { return rows_; }
+#if CHECK_GCD_PARALLEL_WORK_COUNT
+  const int count() { return atomic_load(&count_); }
+#endif
+
+ private:
+  Invoker local_invoker_;
+  const BlockedRange& rows_;
+#if CHECK_GCD_PARALLEL_WORK_COUNT
+  _Atomic(int32_t) count_;
+#endif
+};
+
+template <class Invoker>
+class ParallelInvokerGCDContext2D : public ParallelInvokerGCDContext<Invoker> {
+ public:
+  ParallelInvokerGCDContext2D(const Invoker& invoker, const BlockedRange& rows,
+                              const BlockedRange& cols)
+      : ParallelInvokerGCDContext<Invoker>(invoker, rows), cols_(cols) {}
+
+  const BlockedRange& cols() const { return cols_; }
+
+ private:
+  BlockedRange cols_;
+};
+
+template <class Invoker>
+static void ParallelForGCDTask(void* context, size_t index) {
+  ParallelInvokerGCDContext<Invoker>* invoker_context =
+      static_cast<ParallelInvokerGCDContext<Invoker>*>(context);
+  const BlockedRange& all_tasks = invoker_context->rows();
+  int start = all_tasks.begin() + index * all_tasks.grain_size();
+  int end = std::min(all_tasks.end(), start + all_tasks.grain_size());
+  BlockedRange this_task(start, end, all_tasks.grain_size());
+
+  const Invoker& invoker = invoker_context->invoker();
+  invoker(this_task);
+}
+
+template <class Invoker>
+static void ParallelForGCDTask2D(void* context, size_t index) {
+  ParallelInvokerGCDContext2D<Invoker>* invoker_context =
+      static_cast<ParallelInvokerGCDContext2D<Invoker>*>(context);
+  // Partitioning across rows.
+  const BlockedRange& all_tasks = invoker_context->rows();
+  int start = all_tasks.begin() + index * all_tasks.grain_size();
+  int end = std::min(all_tasks.end(), start + all_tasks.grain_size());
+  BlockedRange this_task(start, end, all_tasks.grain_size());
+
+  const Invoker& invoker = invoker_context->invoker();
+  invoker(BlockedRange2D(this_task, invoker_context->cols()));
+}
+#endif  // __APPLE__
+
+#endif  // PARALLEL_INVOKER_ACTIVE
+// Simple wrapper for compatibility with below ParallelFor function.
+template <class Invoker>
+void SerialFor(size_t start, size_t end, size_t grain_size,
+               const Invoker& invoker) {
+  invoker(BlockedRange(start, end, 1));
+}
+
+inline void CheckAndSetInvokerOptions() {
+#if defined(PARALLEL_INVOKER_ACTIVE)
+#if defined(__ANDROID__)
+  // If unsupported option is selected, force usage of OpenMP if detected, and
+  // ThreadPool otherwise.
+  if (flags_parallel_invoker_mode != PARALLEL_INVOKER_NONE &&
+      flags_parallel_invoker_mode != PARALLEL_INVOKER_THREAD_POOL &&
+      flags_parallel_invoker_mode != PARALLEL_INVOKER_OPENMP) {
+#if defined(_OPENMP)
+    LOG(WARNING) << "Unsupported invoker mode selected on Android. "
+                 << "OpenMP linkage detected, so falling back to OpenMP";
+    flags_parallel_invoker_mode = PARALLEL_INVOKER_OPENMP;
+#else   // _OPENMP
+    // Fallback mode for active parallel invoker without OpenMP is ThreadPool.
+    LOG(WARNING) << "Unsupported invoker mode selected on Android. "
+                 << "Falling back to ThreadPool";
+    flags_parallel_invoker_mode = PARALLEL_INVOKER_THREAD_POOL;
+#endif  // _OPENMP
+  }
+#endif  // __ANDROID__
+
+#if defined(__APPLE__) || defined(__EMSCRIPTEN__)
+  // Force usage of ThreadPool if unsupported option is selected.
+  // (OpenMP is not supported on iOS, due to missing clang support).
+  if (flags_parallel_invoker_mode != PARALLEL_INVOKER_NONE &&
+#if defined(USE_PARALLEL_INVOKER_GCD)
+      flags_parallel_invoker_mode != PARALLEL_INVOKER_GCD &&
+#endif  // USE_PARALLEL_INVOKER_GCD
+      flags_parallel_invoker_mode != PARALLEL_INVOKER_THREAD_POOL) {
+    LOG(WARNING) << "Unsupported invoker mode selected on iOS. "
+                 << "Falling back to ThreadPool mode";
+    flags_parallel_invoker_mode = PARALLEL_INVOKER_THREAD_POOL;
+  }
+#endif  // __APPLE__ || __EMSCRIPTEN__
+
+#if !defined(__APPLE__) && !defined(__EMSCRIPTEN__) && !defined(__ANDROID__)
+  flags_parallel_invoker_mode = PARALLEL_INVOKER_THREAD_POOL;
+#endif  // !__APPLE__ && !__EMSCRIPTEN__ && !__ANDROID__
+
+  // If OpenMP is requested, make sure we can actually use it, and fall back
+  // to ThreadPool if not.
+  if (flags_parallel_invoker_mode == PARALLEL_INVOKER_OPENMP) {
+#if !defined(_OPENMP)
+    LOG(ERROR) << "OpenMP invoker mode selected but not compiling with OpenMP "
+               << "enabled. Falling back to ThreadPool";
+    flags_parallel_invoker_mode = PARALLEL_INVOKER_THREAD_POOL;
+#endif  // _OPENMP
+  }
+
+#else   // PARALLEL_INVOKER_ACTIVE
+  if (flags_parallel_invoker_mode != PARALLEL_INVOKER_NONE) {
+    LOG(ERROR) << "Parallel execution requested but PARALLEL_INVOKER_ACTIVE "
+               << "compile flag is not set. Falling back to single threaded "
+               << "execution.";
+    flags_parallel_invoker_mode = PARALLEL_INVOKER_NONE;
+  }
+#endif  // PARALLEL_INVOKER_ACTIVE
+
+  CHECK_LT(flags_parallel_invoker_mode, PARALLEL_INVOKER_MAX_VALUE)
+      << "Invalid invoker mode specified.";
+  CHECK_GE(flags_parallel_invoker_mode, 0) << "Invalid invoker mode specified.";
+}
+
+// Performs parallel iteration from [start to end), scheduling grain_size
+// iterations per thread. For each iteration
+// invoker(BlockedRange(thread_local_start, thread_local_end))
+// is called. Each thread is given its local copy of invoker, i.e.
+// invoker needs to have copy constructor defined.
+template <class Invoker>
+void ParallelFor(size_t start, size_t end, size_t grain_size,
+                 const Invoker& invoker) {
+#ifdef PARALLEL_INVOKER_ACTIVE
+  CheckAndSetInvokerOptions();
+  switch (flags_parallel_invoker_mode) {
+#if defined(__APPLE__)
+    case PARALLEL_INVOKER_GCD: {
+      int iterations_remain = (end - start + grain_size - 1) / grain_size;
+      CHECK_GT(iterations_remain, 0);
+      if (iterations_remain == 1) {
+        // Execute invoker serially.
+        invoker(BlockedRange(start, std::min(end, start + grain_size), 1));
+      } else {
+        BlockedRange all_tasks(start, end, grain_size);
+        ParallelInvokerGCDContext<Invoker> context(invoker, all_tasks);
+        dispatch_queue_t concurrent_queue =
+            dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_DEFAULT, 0);
+        dispatch_apply_f(iterations_remain, concurrent_queue, &context,
+                         ParallelForGCDTask<Invoker>);
+#if CHECK_GCD_PARALLEL_WORK_COUNT
+        CHECK_EQ(iterations_remain, context.count());
+#endif
+      }
+      break;
+    }
+#endif  // __APPLE__
+
+    case PARALLEL_INVOKER_THREAD_POOL: {
+      int iterations_remain = (end - start + grain_size - 1) / grain_size;
+      CHECK_GT(iterations_remain, 0);
+      if (iterations_remain == 1) {
+        // Execute invoker serially.
+        invoker(BlockedRange(start, std::min(end, start + grain_size), 1));
+        break;
+      }
+
+      struct {
+        absl::Mutex mutex;
+        absl::CondVar completed;
+        int iterations_remain GUARDED_BY(mutex);
+      } loop;
+      {
+        absl::MutexLock lock(&loop.mutex);
+        loop.iterations_remain = iterations_remain;
+      }
+
+      for (int x = start; x < end; x += grain_size) {
+        auto loop_func = [x, end, grain_size, &loop, invoker]() {
+          // Execute invoker.
+          invoker(BlockedRange(x, std::min(end, x + grain_size), 1));
+
+          // Decrement counter.
+          absl::MutexLock lock(&loop.mutex);
+          --loop.iterations_remain;
+          if (loop.iterations_remain == 0) {
+            loop.completed.SignalAll();
+          }
+        };
+
+        // Attempt to run in parallel, if busy run serial to avoid deadlocking.
+        // This can happen during nested invocation of ParallelFor, as if the
+        // loop iteration itself is calling ParallelFor we might deadlock if
+        // we can not guarantee for the iteration to be scheduled.
+        ParallelInvokerThreadPool()->Schedule(loop_func);
+      }
+
+      // Wait on termination of all iterations.
+      loop.mutex.Lock();
+      while (loop.iterations_remain > 0) {
+        loop.completed.Wait(&loop.mutex);
+      }
+      loop.mutex.Unlock();
+      break;
+    }
+
+    case PARALLEL_INVOKER_OPENMP: {
+      // Use thread-local copy of invoker.
+      Invoker local_invoker(invoker);
+#pragma omp parallel for firstprivate(local_invoker) \
+    num_threads(flags_parallel_invoker_max_threads)
+      for (int x = start; x < end; ++x) {
+        local_invoker(BlockedRange(x, x + 1, 1));
+      }
+      break;
+    }
+
+    case PARALLEL_INVOKER_NONE: {
+      SerialFor(start, end, grain_size, invoker);
+      break;
+    }
+
+    case PARALLEL_INVOKER_MAX_VALUE: {
+      LOG(FATAL) << "Impossible.";
+      break;
+    }
+  }
+#else
+  SerialFor(start, end, grain_size, invoker);
+#endif  // PARALLEL_INVOKER_ACTIVE
+}
+
+// Simple wrapper for compatibility with below ParallelFor2D function.
+template <class Invoker>
+void SerialFor2D(size_t start_row, size_t end_row, size_t start_col,
+                 size_t end_col, size_t grain_size, const Invoker& invoker) {
+  invoker(BlockedRange2D(BlockedRange(start_row, end_row, 1),
+                         BlockedRange(start_col, end_col, 1)));
+}
+
+// Same as above ParallelFor for 2D iteration.
+template <class Invoker>
+void ParallelFor2D(size_t start_row, size_t end_row, size_t start_col,
+                   size_t end_col, size_t grain_size, const Invoker& invoker) {
+#ifdef PARALLEL_INVOKER_ACTIVE
+  CheckAndSetInvokerOptions();
+  switch (flags_parallel_invoker_mode) {
+#if defined(__APPLE__)
+    case PARALLEL_INVOKER_GCD: {
+      const int iterations_remain =
+          (end_row - start_row + grain_size - 1) / grain_size;
+      CHECK_GT(iterations_remain, 0);
+      if (iterations_remain == 1) {
+        // Execute invoker serially.
+        invoker(BlockedRange2D(BlockedRange(start_row, end_row, 1),
+                               BlockedRange(start_col, end_col, 1)));
+      } else {
+        BlockedRange all_tasks(start_row, end_row, grain_size);
+        ParallelInvokerGCDContext2D<Invoker> context(
+            invoker, all_tasks, BlockedRange(start_col, end_col, grain_size));
+        dispatch_queue_t concurrent_queue =
+            dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_DEFAULT, 0);
+        dispatch_apply_f(iterations_remain, concurrent_queue, &context,
+                         ParallelForGCDTask2D<Invoker>);
+#if CHECK_GCD_PARALLEL_WORK_COUNT
+        CHECK_EQ(iterations_remain, context.count());
+#endif
+      }
+      break;
+    }
+#endif  // __APPLE__
+
+    case PARALLEL_INVOKER_THREAD_POOL: {
+      int iterations_remain = end_row - start_row;  // Guarded by loop_mutex
+      CHECK_GT(iterations_remain, 0);
+      if (iterations_remain == 1) {
+        // Execute invoker serially.
+        invoker(BlockedRange2D(BlockedRange(start_row, end_row, 1),
+                               BlockedRange(start_col, end_col, 1)));
+        break;
+      }
+
+      absl::Mutex loop_mutex;
+      absl::CondVar loop_completed;
+
+      for (int y = start_row; y < end_row; ++y) {
+        auto loop_func = [y, start_col, end_col, &loop_mutex, &loop_completed,
+                          &iterations_remain, invoker]() {
+          // Execute invoker.
+          invoker(BlockedRange2D(BlockedRange(y, y + 1, 1),
+                                 BlockedRange(start_col, end_col, 1)));
+
+          // Decrement counter.
+          absl::MutexLock lock(&loop_mutex);
+          --iterations_remain;
+          if (iterations_remain == 0) {
+            loop_completed.Signal();
+          }
+        };
+
+        // Attempt to run in parallel, if busy run serial to avoid deadlocking.
+        ParallelInvokerThreadPool()->Schedule(loop_func);
+      }
+
+      // Wait on termination of all iterations.
+      loop_mutex.Lock();
+      while (iterations_remain > 0) {
+        loop_completed.Wait(&loop_mutex);
+      }
+      loop_mutex.Unlock();
+      break;
+    }
+
+    case PARALLEL_INVOKER_OPENMP: {
+      // Use thread-local copy of invoker.
+      Invoker local_invoker(invoker);
+#pragma omp parallel for firstprivate(local_invoker) \
+    num_threads(flags_parallel_invoker_max_threads)
+      for (int y = start_row; y < end_row; ++y) {
+        local_invoker(BlockedRange2D(BlockedRange(y, y + 1, 1),
+                                     BlockedRange(start_col, end_col, 1)));
+      }
+      break;
+    }
+
+    case PARALLEL_INVOKER_NONE: {
+      SerialFor2D(start_row, end_row, start_col, end_col, grain_size, invoker);
+      break;
+    }
+
+    case PARALLEL_INVOKER_MAX_VALUE: {
+      LOG(FATAL) << "Impossible.";
+      break;
+    }
+  }
+#else
+  SerialFor2D(start_row, end_row, start_col, end_col, grain_size, invoker);
+#endif  // PARALLEL_INVOKER_ACTIVE
+}
+
+}  // namespace mediapipe
+
+#endif  // MEDIAPIPE_UTIL_TRACKING_PARALLEL_INVOKER_H_
--- a/mediapipe/util/tracking/parallel_invoker_forbid_mixed.cc
+++ b/mediapipe/util/tracking/parallel_invoker_forbid_mixed.cc
@ -0,0 +1,28 @@
+// Copyright 2019 The MediaPipe Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Guard to ensure clients do not link against both,
+// single and parallel version.
+#ifdef PARALLEL_INVOKER_ACTIVE
+int LinkageAgainstBothSingleAndParallelStabilizationVersionsDetected() {
+  return 0;
+}
+
+#endif  // PARALLEL_INVOKER_ACTIVE
+
+#ifdef PARALLEL_INVOKER_INACTIVE
+int LinkageAgainstBothSingleAndParallelStabilizationVersionsDetected() {
+  return 1;
+}
+#endif  // PARALLEL_INVOKER_INACTIVE
--- a/mediapipe/util/tracking/parallel_invoker_test.cc
+++ b/mediapipe/util/tracking/parallel_invoker_test.cc
@ -0,0 +1,60 @@
+// Copyright 2019 The MediaPipe Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "mediapipe/util/tracking/parallel_invoker.h"
+
+#include <algorithm>
+#include <numeric>
+
+#include "absl/synchronization/mutex.h"
+#include "mediapipe/framework/port/gtest.h"
+
+namespace mediapipe {
+namespace {
+
+void RunParallelTest() {
+  absl::Mutex numbers_mutex;
+  std::vector<int> numbers;
+  const int kArraySize = 5000;
+
+  // Fill number array in parallel.
+  ParallelFor(0, kArraySize, 1,
+              [&numbers_mutex, &numbers](const BlockedRange& b) {
+                for (int k = b.begin(); k != b.end(); ++k) {
+                  absl::MutexLock lock(&numbers_mutex);
+                  numbers.push_back(k);
+                }
+              });
+
+  std::vector<int> expected(kArraySize);
+  std::iota(expected.begin(), expected.end(), 0);
+  EXPECT_TRUE(
+      std::is_permutation(expected.begin(), expected.end(), numbers.begin()));
+}
+
+TEST(ParallelInvokerTest, PhotosTest) {
+  flags_parallel_invoker_mode = PARALLEL_INVOKER_OPENMP;
+
+  RunParallelTest();
+}
+
+TEST(ParallelInvokerTest, ThreadPoolTest) {
+  flags_parallel_invoker_mode = PARALLEL_INVOKER_THREAD_POOL;
+
+  // Needs to be run in opt mode to pass.
+  RunParallelTest();
+}
+
+}  // namespace
+}  // namespace mediapipe
--- a/mediapipe/util/tracking/push_pull_filtering.h
+++ b/mediapipe/util/tracking/push_pull_filtering.h
--- a/mediapipe/util/tracking/push_pull_filtering.proto
+++ b/mediapipe/util/tracking/push_pull_filtering.proto
@ -0,0 +1,39 @@
+// Copyright 2019 The MediaPipe Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+syntax = "proto2";
+
+package mediapipe;
+
+message PushPullOptions {
+  // Sigma for color difference.
+  optional float bilateral_sigma = 1 [default = 20.0];
+
+  // Determines how fast confident values can propagate. Filters are normalized,
+  // such that confidence dissipates quickly instead of propagating.
+  // To ensure confidence propagates the importance weight is scaled by the
+  // scalars specified below. Larger values yield quicker propagation.
+  optional float pull_propagation_scale = 3 [default = 8.0];
+  optional float push_propagation_scale = 4 [default = 8.0];
+
+  // Above bilateral sigma is scaled at each level by the specified scale
+  // (for push and pull phase). This is due to iterative downsampling of the
+  // guidance image introduces errors making bilateral weighting increasingly
+  // errorneous.
+  optional float pull_bilateral_scale = 5 [default = 0.7];
+  optional float push_bilateral_scale = 6 [default = 0.9];
+
+  // Deprecated fields.
+  extensions 2;
+}
--- a/mediapipe/util/tracking/region_flow.cc
+++ b/mediapipe/util/tracking/region_flow.cc
@ -0,0 +1,799 @@
+// Copyright 2019 The MediaPipe Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "mediapipe/util/tracking/region_flow.h"
+
+#include <stddef.h>
+
+#include <cmath>
+#include <memory>
+#include <numeric>
+
+#include "absl/strings/str_cat.h"
+#include "mediapipe/framework/port/integral_types.h"
+#include "mediapipe/util/tracking/measure_time.h"
+#include "mediapipe/util/tracking/parallel_invoker.h"
+
+namespace mediapipe {
+
+namespace {
+
+bool IsPointWithinBounds(const Vector2_f& pt, float bounds, int frame_width,
+                         int frame_height) {
+  // Ensure stability under float -> rounding operations.
+  if (pt.x() - 0.5f >= bounds && pt.x() + 0.5f <= frame_width - 1 - bounds &&
+      pt.y() - 0.5f >= bounds && pt.y() + 0.5f <= frame_height - 1 - bounds) {
+    return true;
+  } else {
+    return false;
+  }
+}
+
+}  // namespace.
+
+void GetRegionFlowFeatureList(const RegionFlowFrame& region_flow_frame,
+                              int distance_from_border,
+                              RegionFlowFeatureList* flow_feature_list) {
+  CHECK(flow_feature_list);
+  flow_feature_list->clear_feature();
+  const int frame_width = region_flow_frame.frame_width();
+  const int frame_height = region_flow_frame.frame_height();
+  flow_feature_list->set_frame_width(frame_width);
+  flow_feature_list->set_frame_height(frame_height);
+  flow_feature_list->set_unstable(region_flow_frame.unstable_frame());
+  flow_feature_list->set_distance_from_border(distance_from_border);
+  flow_feature_list->set_blur_score(region_flow_frame.blur_score());
+
+  for (const auto& region_flow : region_flow_frame.region_flow()) {
+    for (const auto& feature : region_flow.feature()) {
+      if (distance_from_border > 0) {
+        if (!IsPointWithinBounds(FeatureLocation(feature), distance_from_border,
+                                 frame_width, frame_height) ||
+            !IsPointWithinBounds(FeatureMatchLocation(feature),
+                                 distance_from_border, frame_width,
+                                 frame_height)) {
+          continue;
+        }
+      }
+
+      flow_feature_list->add_feature()->CopyFrom(feature);
+    }
+  }
+}
+
+float RegionFlowFeatureDistance(const PatchDescriptor& patch_desc_1,
+                                const PatchDescriptor& patch_desc_2) {
+  DCHECK_EQ(patch_desc_1.data_size(), patch_desc_2.data_size());
+  DCHECK_GE(patch_desc_1.data_size(), 3);
+
+  constexpr int kNumMeans = 3;
+  float sq_distance_sum = 0;
+  for (int n = 0; n < kNumMeans; ++n) {
+    const float distance = patch_desc_1.data(n) - patch_desc_2.data(n);
+    sq_distance_sum += distance * distance;
+  }
+
+  return std::sqrt(sq_distance_sum);
+}
+
+void ResetRegionFlowFeatureIRLSWeights(
+    float value, RegionFlowFeatureList* flow_feature_list) {
+  for (auto& feature : *flow_feature_list->mutable_feature()) {
+    feature.set_irls_weight(value);
+  }
+}
+
+double RegionFlowFeatureIRLSSum(const RegionFlowFeatureList& feature_list) {
+  double sum = 0.0;
+  for (const auto& feature : feature_list.feature()) {
+    sum += feature.irls_weight();
+  }
+  return sum;
+}
+
+void ClampRegionFlowFeatureIRLSWeights(float lower, float upper,
+                                       RegionFlowFeatureView* feature_view) {
+  for (auto& feature_ptr : *feature_view) {
+    if (feature_ptr->irls_weight() < lower) {
+      feature_ptr->set_irls_weight(lower);
+    } else if (feature_ptr->irls_weight() > upper) {
+      feature_ptr->set_irls_weight(upper);
+    }
+  }
+}
+
+void ComputeRegionFlowFeatureTexturedness(
+    const RegionFlowFeatureList& flow_feature_list, bool use_15percent_as_max,
+    std::vector<float>* texturedness) {
+  CHECK(texturedness != nullptr);
+  *texturedness = std::vector<float>(flow_feature_list.feature_size(), 1.0f);
+
+  int texture_idx = 0;
+  for (auto feature = flow_feature_list.feature().begin();
+       feature != flow_feature_list.feature().end(); ++feature, ++texture_idx) {
+    const float feature_stdev_l1 =
+        PatchDescriptorColorStdevL1(feature->feature_descriptor());
+
+    if (feature_stdev_l1 < 0.0f) {
+      LOG_IF(WARNING,
+             []() {
+               static int k = 0;
+               return k++ < 2;
+             }())
+          << "Feature descriptor does not contain variance information. Was "
+          << "ComputeRegionFlowFeatureDescriptors called?";
+      continue;
+    }
+
+    // feature_stdev_l1  is within [0, 3 * 128 = 384]
+    float alpha = feature_stdev_l1 / 384.f;
+
+    // In [0, 1], 0 = low texture, 1 = high texture. Scale such that around
+    // 15% of per channel maximum stdev is considered totally textured
+    // (1 / 0.15 * 3 ~ 18);
+    if (use_15percent_as_max) {
+      alpha = std::min(1.0f, alpha * 18.f);
+    }
+
+    (*texturedness)[texture_idx] = alpha;
+  }
+}
+
+void TextureFilteredRegionFlowFeatureIRLSWeights(
+    float low_texture_threshold, float low_texture_outlier_clamp,
+    RegionFlowFeatureList* flow_feature_list) {
+  std::vector<float> texturedness;
+  ComputeRegionFlowFeatureTexturedness(*flow_feature_list, true, &texturedness);
+
+  int texture_idx = 0;
+  for (auto feature = flow_feature_list->mutable_feature()->begin();
+       feature != flow_feature_list->mutable_feature()->end();
+       ++feature, ++texture_idx) {
+    if (feature->irls_weight() == 0.0f) {
+      continue;
+    }
+
+    if (texturedness[texture_idx] < low_texture_threshold &&
+        feature->irls_weight() < low_texture_outlier_clamp) {
+      feature->set_irls_weight(low_texture_outlier_clamp);
+    } else {
+      feature->set_irls_weight(feature->irls_weight() /
+                               (texturedness[texture_idx] + 1.e-6f));
+    }
+  }
+}
+
+void CornerFilteredRegionFlowFeatureIRLSWeights(
+    float low_corner_threshold, float low_corner_outlier_clamp,
+    RegionFlowFeatureList* flow_feature_list) {
+  for (auto feature = flow_feature_list->mutable_feature()->begin();
+       feature != flow_feature_list->mutable_feature()->end(); ++feature) {
+    if (feature->irls_weight() == 0.0f) {
+      continue;
+    }
+
+    const float corner_response = feature->corner_response();
+
+    if (corner_response < low_corner_threshold &&
+        feature->irls_weight() < low_corner_threshold) {
+      feature->set_irls_weight(low_corner_outlier_clamp);
+    } else {
+      feature->set_irls_weight(feature->irls_weight() /
+                               (corner_response + 1.e-6f));
+    }
+  }
+}
+
+void GetRegionFlowFeatureIRLSWeights(
+    const RegionFlowFeatureList& flow_feature_list,
+    std::vector<float>* irls_weights) {
+  CHECK(irls_weights != nullptr);
+  irls_weights->clear();
+  irls_weights->reserve(flow_feature_list.feature_size());
+  for (auto feature = flow_feature_list.feature().begin();
+       feature != flow_feature_list.feature().end(); ++feature) {
+    irls_weights->push_back(feature->irls_weight());
+  }
+}
+
+void SetRegionFlowFeatureIRLSWeights(const std::vector<float>& irls_weights,
+                                     RegionFlowFeatureList* flow_feature_list) {
+  CHECK(flow_feature_list != nullptr);
+  CHECK_EQ(irls_weights.size(), flow_feature_list->feature_size());
+  int idx = 0;
+  for (auto feature = flow_feature_list->mutable_feature()->begin();
+       feature != flow_feature_list->mutable_feature()->end();
+       ++feature, ++idx) {
+    feature->set_irls_weight(irls_weights[idx]);
+  }
+}
+
+int CountIgnoredRegionFlowFeatures(
+    const RegionFlowFeatureList& flow_feature_list, float threshold) {
+  int count = 0;
+  for (auto feature = flow_feature_list.feature().begin();
+       feature != flow_feature_list.feature().end(); ++feature) {
+    if (feature->irls_weight() <= threshold) {
+      ++count;
+    }
+  }
+  return count;
+}
+
+namespace {
+
+struct RegionFlowLocator {
+  bool operator()(const RegionFlow& lhs, const RegionFlow& rhs) const {
+    return lhs.region_id() < rhs.region_id();
+  }
+};
+
+}  // namespace.
+
+const RegionFlow* GetRegionFlowById(int region_id,
+                                    const RegionFlowFrame& flow_frame) {
+  RegionFlow region_flow;
+  region_flow.set_region_id(region_id);
+
+  const auto& region_pos = std::lower_bound(flow_frame.region_flow().begin(),
+                                            flow_frame.region_flow().end(),
+                                            region_flow, RegionFlowLocator());
+
+  if (region_pos == flow_frame.region_flow().end() ||
+      region_pos->region_id() != region_id) {
+    return NULL;
+  } else {
+    return &(*region_pos);
+  }
+}
+
+RegionFlow* GetMutableRegionFlowById(int region_id,
+                                     RegionFlowFrame* flow_frame) {
+  RegionFlow region_flow;
+  region_flow.set_region_id(region_id);
+
+  auto region_pos = std::lower_bound(flow_frame->mutable_region_flow()->begin(),
+                                     flow_frame->mutable_region_flow()->end(),
+                                     region_flow, RegionFlowLocator());
+
+  if (region_pos == flow_frame->mutable_region_flow()->end() ||
+      region_pos->region_id() != region_id) {
+    return NULL;
+  } else {
+    return &(*region_pos);
+  }
+}
+
+void SortRegionFlowById(RegionFlowFrame* flow_frame) {
+  std::sort(flow_frame->mutable_region_flow()->begin(),
+            flow_frame->mutable_region_flow()->end(), RegionFlowLocator());
+}
+
+void InvertRegionFlow(const RegionFlowFrame& region_flow_frame,
+                      RegionFlowFrame* inverted_flow_frame) {
+  CHECK(inverted_flow_frame);
+  inverted_flow_frame->CopyFrom(region_flow_frame);
+  for (auto& region_flow : *inverted_flow_frame->mutable_region_flow()) {
+    region_flow.set_centroid_x(region_flow.centroid_x() + region_flow.flow_x());
+    region_flow.set_centroid_y(region_flow.centroid_y() + region_flow.flow_y());
+    region_flow.set_flow_x(-region_flow.flow_x());
+    region_flow.set_flow_y(-region_flow.flow_y());
+
+    for (auto& feature : *region_flow.mutable_feature()) {
+      feature.set_x(feature.x() + feature.dx());
+      feature.set_y(feature.y() + feature.dy());
+      feature.set_dx(-feature.dx());
+      feature.set_dy(-feature.dy());
+    }
+  }
+}
+
+void InvertRegionFlowFeatureList(const RegionFlowFeatureList& feature_list,
+                                 RegionFlowFeatureList* inverted_feature_list) {
+  CHECK(inverted_feature_list);
+  *inverted_feature_list = feature_list;
+  for (auto& feature : *inverted_feature_list->mutable_feature()) {
+    InvertRegionFlowFeature(&feature);
+  }
+}
+
+void InvertRegionFlowFeature(RegionFlowFeature* feature) {
+  Vector2_f pt_match = FeatureMatchLocation(*feature);
+  feature->set_x(pt_match.x());
+  feature->set_y(pt_match.y());
+  Vector2_f flow = FeatureFlow(*feature);
+  feature->set_dx(-flow.x());
+  feature->set_dy(-flow.y());
+}
+
+void LimitFeaturesToBounds(int frame_width, int frame_height, float bounds,
+                           RegionFlowFeatureList* feature_list) {
+  RegionFlowFeatureList limited;
+  for (const auto& feature : feature_list->feature()) {
+    if (!IsPointWithinBounds(FeatureLocation(feature), bounds, frame_width,
+                             frame_height)) {
+      continue;
+    } else {
+      limited.add_feature()->CopyFrom(feature);
+    }
+  }
+  feature_list->Swap(&limited);
+}
+
+void NormalizeRegionFlowFeatureList(RegionFlowFeatureList* feature_list) {
+  const LinearSimilarityModel norm_model =
+      LinearSimilarityAdapter::NormalizationTransform(
+          feature_list->frame_width(), feature_list->frame_height());
+  TransformRegionFlowFeatureList(norm_model, feature_list);
+}
+
+void DeNormalizeRegionFlowFeatureList(RegionFlowFeatureList* feature_list) {
+  const LinearSimilarityModel norm_model =
+      LinearSimilarityAdapter::NormalizationTransform(
+          feature_list->frame_width(), feature_list->frame_height());
+  TransformRegionFlowFeatureList(ModelInvert(norm_model), feature_list);
+}
+
+void ScaleSalientPoint(float scale_x, float scale_y, SalientPoint* sp) {
+  sp->set_norm_point_x(sp->norm_point_x() * scale_x);
+  sp->set_norm_point_y(sp->norm_point_y() * scale_y);
+
+  const float cos_angle = std::cos(sp->angle());
+  const float sin_angle = std::sin(sp->angle());
+  Vector2_f major_axis = Vector2_f(cos_angle, sin_angle) * sp->norm_major();
+  Vector2_f minor_axis = Vector2_f(-sin_angle, cos_angle) * sp->norm_minor();
+
+  major_axis[0] *= scale_x;
+  major_axis[1] *= scale_y;
+  minor_axis[0] *= scale_x;
+  minor_axis[1] *= scale_y;
+
+  sp->set_norm_major(major_axis.Norm());
+  sp->set_norm_minor(minor_axis.Norm());
+  sp->set_angle(std::atan2(major_axis.y(), major_axis.x()));
+  if (sp->angle() < 0) {
+    sp->set_angle(sp->angle() + M_PI);
+  }
+}
+
+void ScaleSaliencyList(float scale, bool normalize_to_scale,
+                       SaliencyPointList* saliency_list) {
+  CHECK(saliency_list != nullptr);
+  for (auto& point_frame : *saliency_list) {
+    ScaleSalientPointFrame(scale, normalize_to_scale, &point_frame);
+  }
+}
+
+void ScaleSalientPointFrame(float scale, bool normalize_to_scale,
+                            SalientPointFrame* saliency) {
+  CHECK(saliency != nullptr);
+  float saliency_scale = scale;
+  if (normalize_to_scale) {
+    float weight_sum = 0.0f;
+    for (const auto& salient_point : saliency->point()) {
+      weight_sum += salient_point.weight();
+    }
+
+    if (weight_sum > 1e-6f) {
+      saliency_scale /= weight_sum;
+    }
+  }
+
+  for (auto& salient_point : *saliency->mutable_point()) {
+    salient_point.set_weight(salient_point.weight() * saliency_scale);
+  }
+}
+
+void ResetSaliencyBounds(float left, float bottom, float right, float top,
+                         SaliencyPointList* saliency_list) {
+  CHECK(saliency_list != nullptr);
+  for (auto& point_frame : *saliency_list) {
+    for (auto& salient_point : *point_frame.mutable_point()) {
+      salient_point.set_left(left);
+      salient_point.set_bottom(bottom);
+      salient_point.set_right(right);
+      salient_point.set_top(top);
+    }
+  }
+}
+
+bool EllipseFromCovariance(float a, float bc, float d,
+                           Vector2_f* axis_magnitude, float* angle) {
+  CHECK(axis_magnitude != nullptr);
+  CHECK(angle != nullptr);
+
+  // Get trace and determinant
+  const float trace = a + d;
+  const float det = a * d - bc * bc;
+
+  // If area is very small (small axis in at least one direction)
+  // axis are unreliable -> return false.
+  if (det < 4) {  // Measured in sq. pixels.
+    *axis_magnitude = Vector2_f(1, 1);
+    *angle = 0;
+    return false;
+  }
+
+  const float discriminant = std::max(0.f, trace * trace * 0.25f - det);
+  const float sqrt_discrm = std::sqrt(discriminant);
+
+  // Get eigenvalues.
+  float eig_1 = trace * 0.5f + sqrt_discrm;
+  float eig_2 = trace * 0.5f - sqrt_discrm;
+
+  // Compute eigenvectors.
+  Vector2_f vec_1, vec_2;
+
+  if (fabs(bc) < 1e-6) {
+    // Right-most case, we already have diagonal matrix.
+    vec_1.Set(1, 0);
+    vec_2.Set(0, 1);
+  } else {
+    vec_1.Set(eig_1 - d, bc);
+    vec_2.Set(eig_2 - d, bc);
+
+    // Normalize. Norm is always > 0, as bc is > 0 via above if.
+    vec_1 /= vec_1.Norm();
+    vec_2 /= vec_2.Norm();
+  }
+
+  // Select positive eigenvector.
+  if (eig_1 < 0) {
+    eig_1 *= -1.f;
+  }
+
+  if (eig_2 < 0) {
+    eig_2 *= -1.f;
+  }
+
+  // Major first.
+  if (eig_1 < eig_2) {
+    using std::swap;
+    swap(vec_1, vec_2);
+    swap(eig_1, eig_2);
+  }
+
+  *axis_magnitude = Vector2_f(std::sqrt(eig_1), std::sqrt(eig_2));
+  *angle = std::atan2(vec_1.y(), vec_1.x());
+
+  return eig_2 >= 1.5f;  // Measurement in pixels.
+}
+
+void BoundingBoxFromEllipse(const Vector2_f& center, float norm_major_axis,
+                            float norm_minor_axis, float angle,
+                            std::vector<Vector2_f>* bounding_box) {
+  CHECK(bounding_box != nullptr);
+  float dim_x;
+  float dim_y;
+  if (angle < M_PI * 0.25 || angle > M_PI * 0.75) {
+    dim_x = norm_major_axis;
+    dim_y = norm_minor_axis;
+  } else {
+    dim_y = norm_major_axis;
+    dim_x = norm_minor_axis;
+  }
+
+  // Construct bounding for for axes aligned ellipse.
+  *bounding_box = {
+      Vector2_f(-dim_x, -dim_y),
+      Vector2_f(-dim_x, dim_y),
+      Vector2_f(dim_x, dim_y),
+      Vector2_f(dim_x, -dim_y),
+  };
+
+  for (Vector2_f& corner : *bounding_box) {
+    corner += center;
+  }
+}
+
+void CopyToEmptyFeatureList(RegionFlowFeatureList* src,
+                            RegionFlowFeatureList* dst) {
+  CHECK(src != nullptr);
+  CHECK(dst != nullptr);
+
+  // Swap out features for empty list.
+  RegionFlowFeatureList empty_list;
+  empty_list.mutable_feature()->Swap(src->mutable_feature());
+
+  // Copy.
+  dst->CopyFrom(*src);
+
+  // Swap back.
+  src->mutable_feature()->Swap(empty_list.mutable_feature());
+
+  // src_features should be empty as in the beginning.
+  CHECK_EQ(0, empty_list.feature_size());
+}
+
+void IntersectRegionFlowFeatureList(
+    const RegionFlowFeatureList& to,
+    std::function<Vector2_f(const RegionFlowFeature&)> to_location_eval,
+    RegionFlowFeatureList* from, RegionFlowFeatureList* result,
+    std::vector<int>* source_indices) {
+  CHECK(from != nullptr);
+  CHECK(result != nullptr);
+  CHECK(from->long_tracks()) << "Intersection only works for long features";
+  CHECK(to.long_tracks()) << "Intersection only works for long features";
+
+  // Hash features in to, based on track_id.
+  std::unordered_map<int, const RegionFlowFeature*> track_map;
+  for (const auto& feature : to.feature()) {
+    track_map[feature.track_id()] = &feature;
+  }
+
+  // Initialize result.
+  CopyToEmptyFeatureList(from, result);
+  const int num_from_features = from->feature_size();
+  result->mutable_feature()->Reserve(num_from_features);
+
+  int feature_idx = 0;
+  for (const auto& feature : from->feature()) {
+    auto find_location = track_map.find(feature.track_id());
+    if (find_location != track_map.end()) {
+      const Vector2_f diff =
+          to_location_eval(*find_location->second) - FeatureLocation(feature);
+
+      RegionFlowFeature* new_feature = result->add_feature();
+      new_feature->CopyFrom(feature);
+      new_feature->set_dx(diff.x());
+      new_feature->set_dy(diff.y());
+      if (source_indices != nullptr) {
+        source_indices->push_back(feature_idx);
+      }
+    }
+
+    ++feature_idx;
+  }
+}
+
+void LongFeatureStream::AddFeatures(const RegionFlowFeatureList& feature_list,
+                                    bool check_connectivity,
+                                    bool purge_non_present_features) {
+  if (!feature_list.long_tracks()) {
+    LOG(ERROR) << "Feature stream should be used only used with long feature "
+               << "tracks. Ensure POLICY_LONG_FEATURE was used for "
+               << "RegionFlowComputation.";
+    return;
+  }
+
+  if (feature_list.match_frame() == 0) {
+    // Skip first frame.
+    return;
+  }
+
+  if (std::abs(feature_list.match_frame()) != 1) {
+    LOG(ERROR) << "Only matching frames one frame from current one are "
+               << "supported";
+    return;
+  }
+
+  // Record id of each track that is present in the current feature_list.
+  std::unordered_set<int> present_tracks;
+  for (auto feature : feature_list.feature()) {  // Copy feature.
+    if (feature.track_id() < 0) {
+      LOG_IF(WARNING, []() {
+        static int k = 0;
+        return k++ < 2;
+      }()) << "Feature does not have a valid track id assigned. Ignoring.";
+      continue;
+    }
+    present_tracks.insert(feature.track_id());
+    if (check_connectivity) {
+      // A new feature should never have been erased before.
+      CHECK(old_ids_.find(feature.track_id()) == old_ids_.end())
+          << "Feature : " << feature.track_id() << "was already removed.";
+    }
+
+    // Invert the features to be foward or backward according to forward_ flag.
+    if ((!forward_ && feature_list.match_frame() > 0) ||
+        (forward_ && feature_list.match_frame() < 0)) {
+      InvertRegionFlowFeature(&feature);
+    }
+
+    auto find_pos = tracks_.find(feature.track_id());
+    if (find_pos != tracks_.end()) {
+      // Track is present, add to it.
+      if (check_connectivity) {
+        CHECK_LT((FeatureLocation(find_pos->second.back()) -
+                  FeatureMatchLocation(feature))
+                     .Norm2(),
+                 1e-4);
+      }
+      find_pos->second.push_back(feature);
+    } else {
+      tracks_[feature.track_id()] = std::vector<RegionFlowFeature>(1, feature);
+    }
+  }
+
+  if (purge_non_present_features) {
+    std::vector<int> to_be_removed;
+    for (const auto& track : tracks_) {
+      if (present_tracks.find(track.first) == present_tracks.end()) {
+        to_be_removed.push_back(track.first);
+        if (check_connectivity) {
+          old_ids_.insert(track.first);
+        }
+      }
+    }
+    for (int id : to_be_removed) {
+      tracks_.erase(id);
+    }
+  }
+}
+
+void LongFeatureStream::FlattenTrack(
+    const std::vector<RegionFlowFeature>& features,
+    std::vector<Vector2_f>* result, std::vector<float>* irls_weight,
+    std::vector<Vector2_f>* flow) const {
+  CHECK(result != nullptr);
+  if (features.empty()) {
+    return;
+  }
+
+  if (irls_weight) {
+    irls_weight->clear();
+  }
+
+  if (flow) {
+    flow->clear();
+  }
+
+  if (!forward_) {
+    // Backward tracking, add first match.
+    result->push_back(FeatureMatchLocation(features[0]));
+  }
+
+  for (const auto& feature : features) {
+    result->push_back(FeatureLocation(feature));
+    if (flow) {
+      flow->push_back(FeatureFlow(feature));
+    }
+    if (irls_weight) {
+      irls_weight->push_back(feature.irls_weight());
+    }
+  }
+
+  if (forward_) {
+    // Forward tracking, add last match.
+    result->push_back(FeatureMatchLocation(features.back()));
+  }
+
+  // Replicate last irls weight.
+  if (irls_weight) {
+    irls_weight->push_back(irls_weight->back());
+  }
+}
+
+const std::vector<RegionFlowFeature>* LongFeatureStream::TrackById(
+    int id) const {
+  auto track_pos = tracks_.find(id);
+  if (track_pos == tracks_.end()) {
+    return nullptr;
+  } else {
+    return &track_pos->second;
+  }
+}
+
+std::vector<Vector2_f> LongFeatureStream::FlattenedTrackById(int id) const {
+  const auto* track = TrackById(id);
+  if (track != nullptr) {
+    std::vector<Vector2_f> points;
+    FlattenTrack(*track, &points, nullptr, nullptr);
+    return points;
+  } else {
+    return std::vector<Vector2_f>();
+  }
+}
+
+void LongFeatureInfo::AddFeatures(const RegionFlowFeatureList& feature_list) {
+  if (!feature_list.long_tracks()) {
+    LOG(ERROR) << "Passed feature list was not computed with long tracks. ";
+    return;
+  }
+
+  for (const auto& feature : feature_list.feature()) {
+    AddFeature(feature);
+  }
+
+  IncrementFrame();
+}
+
+void LongFeatureInfo::AddFeature(const RegionFlowFeature& feature) {
+  if (feature.irls_weight() == 0) {  // Skip outliers.
+    return;
+  }
+  const int track_id = feature.track_id();
+  if (track_id < 0) {  // Skip unassigned ids.
+    return;
+  }
+  auto insert_pos = track_info_.find(track_id);
+  if (insert_pos == track_info_.end()) {
+    track_info_[track_id].start = num_frames_;
+    track_info_[track_id].length = 1;
+  } else {
+    ++insert_pos->second.length;
+  }
+}
+
+void LongFeatureInfo::TrackLengths(const RegionFlowFeatureList& feature_list,
+                                   std::vector<int>* track_lengths) const {
+  CHECK(track_lengths);
+  const int feature_size = feature_list.feature_size();
+  track_lengths->resize(feature_size);
+  for (int k = 0; k < feature_size; ++k) {
+    (*track_lengths)[k] = TrackLength(feature_list.feature(k));
+  }
+}
+
+int LongFeatureInfo::TrackLength(const RegionFlowFeature& feature) const {
+  const auto insert_pos = track_info_.find(feature.track_id());
+  return insert_pos != track_info_.end() ? insert_pos->second.length : 0;
+}
+
+int LongFeatureInfo::TrackStart(const RegionFlowFeature& feature) const {
+  const auto insert_pos = track_info_.find(feature.track_id());
+  return insert_pos != track_info_.end() ? insert_pos->second.start : -1;
+}
+
+void LongFeatureInfo::Reset() {
+  track_info_.clear();
+  num_frames_ = 0;
+}
+
+int LongFeatureInfo::GlobalTrackLength(float percentile) const {
+  std::vector<int> track_lengths;
+  track_lengths.reserve(track_info_.size());
+
+  for (const auto& pair : track_info_) {
+    track_lengths.push_back(pair.second.length);
+  }
+
+  if (track_lengths.empty()) {
+    return 0;
+  }
+
+  auto percentile_item =
+      track_lengths.begin() + percentile * track_lengths.size();
+
+  std::nth_element(track_lengths.begin(), percentile_item, track_lengths.end());
+
+  return *percentile_item;
+}
+
+void GridTaps(int dim_x, int dim_y, int tap_radius,
+              std::vector<std::vector<int>>* taps) {
+  CHECK(taps);
+  const int grid_size = dim_x * dim_y;
+  const int diam = 2 * tap_radius + 1;
+  taps->resize(grid_size);
+
+  for (int i = 0; i < dim_y; ++i) {
+    for (int j = 0; j < dim_x; ++j) {
+      std::vector<int>& grid_bin = (*taps)[i * dim_x + j];
+      grid_bin.clear();
+      grid_bin.reserve(diam * diam);
+      for (int k = std::max(0, i - tap_radius),
+               end_k = std::min(dim_y - 1, i + tap_radius);
+           k <= end_k; ++k) {
+        for (int l = std::max(0, j - tap_radius),
+                 end_l = std::min(dim_x - 1, j + tap_radius);
+             l <= end_l; ++l) {
+          grid_bin.push_back(k * dim_x + l);
+        }
+      }
+    }
+  }
+}
+
+}  // namespace mediapipe
--- a/Show More
+++ b/Show More