diff --git a/.bazelrc b/.bazelrc
index 4ffb73bd4..37cdc3d88 100644
--- a/.bazelrc
+++ b/.bazelrc
@@ -19,6 +19,9 @@ build --incompatible_depset_is_not_iterable=false
 # Sets the default Apple platform to macOS.
 build --apple_platform_type=macos
 
+# Allow debugging with XCODE
+build --apple_generate_dsym
+
 # Android configs.
 build:android --crosstool_top=//external:android/crosstool
 build:android --host_crosstool_top=@bazel_tools//tools/cpp:toolchain
diff --git a/Dockerfile b/Dockerfile
index 8adcd6c24..ef7b4d43a 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -35,6 +35,8 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
         libopencv-highgui-dev \
         libopencv-imgproc-dev \
         libopencv-video-dev \
+        libopencv-calib3d-dev \
+        libopencv-features2d-dev \
         software-properties-common && \
     add-apt-repository -y ppa:openjdk-r/ppa && \
     apt-get update && apt-get install -y openjdk-8-jdk && \
diff --git a/README.md b/README.md
index f5a0bceef..1b6385a89 100644
--- a/README.md
+++ b/README.md
@@ -14,12 +14,13 @@
 * [Face Detection](mediapipe/docs/face_detection_mobile_gpu.md)
 * [Hair Segmentation](mediapipe/docs/hair_segmentation_mobile_gpu.md)
 * [Object Detection](mediapipe/docs/object_detection_mobile_gpu.md)
+* [Object Detection and Tracking](mediapipe/docs/object_tracking_mobile_gpu.md)
 
 ![hand_tracking](mediapipe/docs/images/mobile/hand_tracking_3d_android_gpu_small.gif)
 ![multi-hand_tracking](mediapipe/docs/images/mobile/multi_hand_tracking_android_gpu_small.gif)
 ![face_detection](mediapipe/docs/images/mobile/face_detection_android_gpu_small.gif)
 ![hair_segmentation](mediapipe/docs/images/mobile/hair_segmentation_android_gpu_small.gif)
-![object_detection](mediapipe/docs/images/mobile/object_detection_android_gpu_small.gif)
+![object_tracking](mediapipe/docs/images/mobile/object_tracking_android_gpu_small.gif)
 
 ## Installation
 Follow these [instructions](mediapipe/docs/install.md).
diff --git a/WORKSPACE b/WORKSPACE
index c09ca03a5..35d1a7e36 100644
--- a/WORKSPACE
+++ b/WORKSPACE
@@ -116,6 +116,13 @@ http_archive(
       "https://mirror.bazel.build/github.com/tensorflow/tensorflow/archive/%s.tar.gz" % _TENSORFLOW_GIT_COMMIT,
       "https://github.com/tensorflow/tensorflow/archive/%s.tar.gz" % _TENSORFLOW_GIT_COMMIT,
     ],
+    # Patch https://github.com/tensorflow/tensorflow/commit/e3a7bdbebb99352351a19e2e403136166aa52934
+    patches = [
+        "@//third_party:org_tensorflow_e3a7bdbebb99352351a19e2e403136166aa52934.diff"
+    ],
+    patch_args = [
+        "-p1",
+    ],
     strip_prefix = "tensorflow-%s" % _TENSORFLOW_GIT_COMMIT,
     sha256 = _TENSORFLOW_SHA256,
 )
@@ -123,8 +130,22 @@ http_archive(
 load("@org_tensorflow//tensorflow:workspace.bzl", "tf_workspace")
 tf_workspace(tf_repo_name = "org_tensorflow")
 
+http_archive(
+    name = "ceres_solver",
+    url = "https://github.com/ceres-solver/ceres-solver/archive/1.14.0.zip",
+    patches = [
+        "@//third_party:ceres_solver_9bf9588988236279e1262f75d7f4d85711dfa172.diff"
+    ],
+    patch_args = [
+        "-p1",
+    ],
+    strip_prefix = "ceres-solver-1.14.0",
+    sha256 = "5ba6d0db4e784621fda44a50c58bb23b0892684692f0c623e2063f9c19f192f1"
+)
+
 # Please run
 # $ sudo apt-get install libopencv-core-dev libopencv-highgui-dev \
+#                        libopencv-calib3d-dev libopencv-features2d-dev \
 #                        libopencv-imgproc-dev libopencv-video-dev
 new_local_repository(
     name = "linux_opencv",
@@ -292,3 +313,4 @@ http_archive(
     strip_prefix = "google-toolbox-for-mac-2.2.1",
     build_file = "@//third_party:google_toolbox_for_mac.BUILD",
 )
+
diff --git a/mediapipe/MediaPipe.tulsiproj/Configs/MediaPipe.tulsigen b/mediapipe/MediaPipe.tulsiproj/Configs/MediaPipe.tulsigen
index 51a68cc8e..a283b56fc 100644
--- a/mediapipe/MediaPipe.tulsiproj/Configs/MediaPipe.tulsigen
+++ b/mediapipe/MediaPipe.tulsiproj/Configs/MediaPipe.tulsigen
@@ -9,6 +9,7 @@
     "mediapipe/examples/ios/facedetectiongpu/BUILD",
     "mediapipe/examples/ios/handdetectiongpu/BUILD",
     "mediapipe/examples/ios/handtrackinggpu/BUILD",
+    "mediapipe/examples/ios/multihandtrackinggpu/BUILD",
     "mediapipe/examples/ios/objectdetectioncpu/BUILD",
     "mediapipe/examples/ios/objectdetectiongpu/BUILD"
   ],
@@ -18,6 +19,7 @@
     "//mediapipe/examples/ios/facedetectiongpu:FaceDetectionGpuApp",
     "//mediapipe/examples/ios/handdetectiongpu:HandDetectionGpuApp",
     "//mediapipe/examples/ios/handtrackinggpu:HandTrackingGpuApp",
+    "//mediapipe/examples/ios/multihandtrackinggpu:MultiHandTrackingGpuApp",
     "//mediapipe/examples/ios/objectdetectioncpu:ObjectDetectionCpuApp",
     "//mediapipe/examples/ios/objectdetectiongpu:ObjectDetectionGpuApp",
     "//mediapipe/objc:mediapipe_framework_ios"
@@ -84,6 +86,8 @@
     "mediapipe/examples/ios/handdetectiongpu/Base.lproj",
     "mediapipe/examples/ios/handtrackinggpu",
     "mediapipe/examples/ios/handtrackinggpu/Base.lproj",
+    "mediapipe/examples/ios/multihandtrackinggpu",
+    "mediapipe/examples/ios/multihandtrackinggpu/Base.lproj",
     "mediapipe/examples/ios/objectdetectioncpu",
     "mediapipe/examples/ios/objectdetectioncpu/Base.lproj",
     "mediapipe/examples/ios/objectdetectiongpu",
diff --git a/mediapipe/MediaPipe.tulsiproj/project.tulsiconf b/mediapipe/MediaPipe.tulsiproj/project.tulsiconf
index bb40e553a..24d8764f0 100644
--- a/mediapipe/MediaPipe.tulsiproj/project.tulsiconf
+++ b/mediapipe/MediaPipe.tulsiproj/project.tulsiconf
@@ -16,6 +16,7 @@
     "mediapipe/examples/ios/facedetectiongpu",
     "mediapipe/examples/ios/handdetectiongpu",
     "mediapipe/examples/ios/handtrackinggpu",
+    "mediapipe/examples/ios/multihandtrackinggpu",
     "mediapipe/examples/ios/objectdetectioncpu",
     "mediapipe/examples/ios/objectdetectiongpu"
   ],
diff --git a/mediapipe/calculators/core/BUILD b/mediapipe/calculators/core/BUILD
index 6de3c3828..aa8a94033 100644
--- a/mediapipe/calculators/core/BUILD
+++ b/mediapipe/calculators/core/BUILD
@@ -165,6 +165,7 @@ cc_library(
     deps = [
         "//mediapipe/framework:calculator_framework",
         "//mediapipe/framework/port:logging",
+        "//mediapipe/framework/port:status",
     ],
     alwayslink = 1,
 )
diff --git a/mediapipe/calculators/core/add_header_calculator.cc b/mediapipe/calculators/core/add_header_calculator.cc
index 341813817..393c12225 100644
--- a/mediapipe/calculators/core/add_header_calculator.cc
+++ b/mediapipe/calculators/core/add_header_calculator.cc
@@ -13,11 +13,12 @@
 // limitations under the License.
 
 #include "mediapipe/framework/calculator_framework.h"
+#include "mediapipe/framework/port/canonical_errors.h"
 #include "mediapipe/framework/port/logging.h"
 
 namespace mediapipe {
 
-// Attach the header from one stream to another stream.
+// Attach the header from a stream or side input to another stream.
 //
 // The header stream (tag HEADER) must not have any packets in it.
 //
@@ -25,17 +26,53 @@ namespace mediapipe {
 // calculator to not need a header or to accept a separate stream with
 // a header, that would be more future proof.
 //
+// Example usage 1:
+// node {
+//   calculator: "AddHeaderCalculator"
+//   input_stream: "DATA:audio"
+//   input_stream: "HEADER:audio_header"
+//   output_stream: "audio_with_header"
+// }
+//
+// Example usage 2:
+// node {
+//   calculator: "AddHeaderCalculator"
+//   input_stream: "DATA:audio"
+//   input_side_packet: "HEADER:audio_header"
+//   output_stream: "audio_with_header"
+// }
+//
 class AddHeaderCalculator : public CalculatorBase {
  public:
   static ::mediapipe::Status GetContract(CalculatorContract* cc) {
-    cc->Inputs().Tag("HEADER").SetNone();
+    bool has_side_input = false;
+    bool has_header_stream = false;
+    if (cc->InputSidePackets().HasTag("HEADER")) {
+      cc->InputSidePackets().Tag("HEADER").SetAny();
+      has_side_input = true;
+    }
+    if (cc->Inputs().HasTag("HEADER")) {
+      cc->Inputs().Tag("HEADER").SetNone();
+      has_header_stream = true;
+    }
+    if (has_side_input == has_header_stream) {
+      return mediapipe::InvalidArgumentError(
+          "Header must be provided via exactly one of side input and input "
+          "stream");
+    }
     cc->Inputs().Tag("DATA").SetAny();
     cc->Outputs().Index(0).SetSameAs(&cc->Inputs().Tag("DATA"));
     return ::mediapipe::OkStatus();
   }
 
   ::mediapipe::Status Open(CalculatorContext* cc) override {
-    const Packet& header = cc->Inputs().Tag("HEADER").Header();
+    Packet header;
+    if (cc->InputSidePackets().HasTag("HEADER")) {
+      header = cc->InputSidePackets().Tag("HEADER");
+    }
+    if (cc->Inputs().HasTag("HEADER")) {
+      header = cc->Inputs().Tag("HEADER").Header();
+    }
     if (!header.IsEmpty()) {
       cc->Outputs().Index(0).SetHeader(header);
     }
diff --git a/mediapipe/calculators/core/add_header_calculator_test.cc b/mediapipe/calculators/core/add_header_calculator_test.cc
index b67cf04dc..01ea986f1 100644
--- a/mediapipe/calculators/core/add_header_calculator_test.cc
+++ b/mediapipe/calculators/core/add_header_calculator_test.cc
@@ -14,8 +14,10 @@
 
 #include "mediapipe/framework/calculator_framework.h"
 #include "mediapipe/framework/calculator_runner.h"
+#include "mediapipe/framework/port/canonical_errors.h"
 #include "mediapipe/framework/port/gmock.h"
 #include "mediapipe/framework/port/gtest.h"
+#include "mediapipe/framework/port/status.h"
 #include "mediapipe/framework/port/status_matchers.h"
 #include "mediapipe/framework/timestamp.h"
 #include "mediapipe/framework/tool/validate_type.h"
@@ -24,7 +26,7 @@ namespace mediapipe {
 
 class AddHeaderCalculatorTest : public ::testing::Test {};
 
-TEST_F(AddHeaderCalculatorTest, Works) {
+TEST_F(AddHeaderCalculatorTest, HeaderStream) {
   CalculatorGraphConfig::Node node;
   node.set_calculator("AddHeaderCalculator");
   node.add_input_stream("HEADER:header_stream");
@@ -96,4 +98,62 @@ TEST_F(AddHeaderCalculatorTest, NoPacketsOnHeaderStream) {
   ASSERT_FALSE(runner.Run().ok());
 }
 
+TEST_F(AddHeaderCalculatorTest, InputSidePacket) {
+  CalculatorGraphConfig::Node node;
+  node.set_calculator("AddHeaderCalculator");
+  node.add_input_stream("DATA:data_stream");
+  node.add_output_stream("merged_stream");
+  node.add_input_side_packet("HEADER:header");
+
+  CalculatorRunner runner(node);
+
+  // Set header and add 5 packets.
+  runner.MutableSidePackets()->Tag("HEADER") =
+      Adopt(new std::string("my_header"));
+  for (int i = 0; i < 5; ++i) {
+    Packet packet = Adopt(new int(i)).At(Timestamp(i * 1000));
+    runner.MutableInputs()->Tag("DATA").packets.push_back(packet);
+  }
+
+  // Run calculator.
+  MP_ASSERT_OK(runner.Run());
+
+  ASSERT_EQ(1, runner.Outputs().NumEntries());
+
+  // Test output.
+  EXPECT_EQ(std::string("my_header"),
+            runner.Outputs().Index(0).header.Get<std::string>());
+  const std::vector<Packet>& output_packets = runner.Outputs().Index(0).packets;
+  ASSERT_EQ(5, output_packets.size());
+  for (int i = 0; i < 5; ++i) {
+    const int val = output_packets[i].Get<int>();
+    EXPECT_EQ(i, val);
+    EXPECT_EQ(Timestamp(i * 1000), output_packets[i].Timestamp());
+  }
+}
+
+TEST_F(AddHeaderCalculatorTest, UsingBothSideInputAndStream) {
+  CalculatorGraphConfig::Node node;
+  node.set_calculator("AddHeaderCalculator");
+  node.add_input_stream("HEADER:header_stream");
+  node.add_input_stream("DATA:data_stream");
+  node.add_output_stream("merged_stream");
+  node.add_input_side_packet("HEADER:header");
+
+  CalculatorRunner runner(node);
+
+  // Set both headers and add 5 packets.
+  runner.MutableSidePackets()->Tag("HEADER") =
+      Adopt(new std::string("my_header"));
+  runner.MutableSidePackets()->Tag("HEADER") =
+      Adopt(new std::string("my_header"));
+  for (int i = 0; i < 5; ++i) {
+    Packet packet = Adopt(new int(i)).At(Timestamp(i * 1000));
+    runner.MutableInputs()->Tag("DATA").packets.push_back(packet);
+  }
+
+  // Run should fail because header can only be provided one way.
+  EXPECT_EQ(runner.Run().code(), ::mediapipe::InvalidArgumentError("").code());
+}
+
 }  // namespace mediapipe
diff --git a/mediapipe/calculators/core/packet_resampler_calculator.cc b/mediapipe/calculators/core/packet_resampler_calculator.cc
index 2966e4589..e9c12880f 100644
--- a/mediapipe/calculators/core/packet_resampler_calculator.cc
+++ b/mediapipe/calculators/core/packet_resampler_calculator.cc
@@ -330,22 +330,27 @@ void PacketResamplerCalculator::UpdateNextOutputTimestampWithJitter() {
     return ::mediapipe::OkStatus();
   }
 
-  LOG_IF(WARNING, frame_time_usec_ <
-                      (cc->InputTimestamp() - last_packet_.Timestamp()).Value())
-      << "Adding jitter is meaningless when upsampling.";
-
-  const int64 curr_diff =
-      (next_output_timestamp_ - cc->InputTimestamp()).Value();
-  const int64 last_diff =
-      (next_output_timestamp_ - last_packet_.Timestamp()).Value();
-  if (curr_diff * last_diff > 0) {
-    return ::mediapipe::OkStatus();
+  if (frame_time_usec_ <
+      (cc->InputTimestamp() - last_packet_.Timestamp()).Value()) {
+    LOG_FIRST_N(WARNING, 2)
+        << "Adding jitter is not very useful when upsampling.";
+  }
+
+  while (true) {
+    const int64 last_diff =
+        (next_output_timestamp_ - last_packet_.Timestamp()).Value();
+    RET_CHECK_GT(last_diff, 0.0);
+    const int64 curr_diff =
+        (next_output_timestamp_ - cc->InputTimestamp()).Value();
+    if (curr_diff > 0.0) {
+      break;
+    }
+    OutputWithinLimits(cc, (std::abs(curr_diff) > last_diff
+                                ? last_packet_
+                                : cc->Inputs().Get(input_data_id_).Value())
+                               .At(next_output_timestamp_));
+    UpdateNextOutputTimestampWithJitter();
   }
-  OutputWithinLimits(cc, (std::abs(curr_diff) > std::abs(last_diff)
-                              ? last_packet_
-                              : cc->Inputs().Get(input_data_id_).Value())
-                             .At(next_output_timestamp_));
-  UpdateNextOutputTimestampWithJitter();
   return ::mediapipe::OkStatus();
 }
 
diff --git a/mediapipe/calculators/image/image_cropping_calculator.cc b/mediapipe/calculators/image/image_cropping_calculator.cc
index a9277e871..792c24466 100644
--- a/mediapipe/calculators/image/image_cropping_calculator.cc
+++ b/mediapipe/calculators/image/image_cropping_calculator.cc
@@ -501,8 +501,11 @@ void ImageCroppingCalculator::GetOutputDimensions(CalculatorContext* cc,
     row_max = std::max(row_max, transformed_points_[i * 2 + 1]);
   }
 
-  *dst_width = std::round((col_max - col_min) * src_width);
-  *dst_height = std::round((row_max - row_min) * src_height);
+  int width = static_cast<int>(std::round((col_max - col_min) * src_width));
+  int height = static_cast<int>(std::round((row_max - row_min) * src_height));
+  // Minimum output dimension 1x1 prevents creation of textures with 0x0.
+  *dst_width = std::max(1, width);
+  *dst_height = std::max(1, height);
 }
 
 }  // namespace mediapipe
diff --git a/mediapipe/calculators/util/BUILD b/mediapipe/calculators/util/BUILD
index d3a2e341d..b603efd1c 100644
--- a/mediapipe/calculators/util/BUILD
+++ b/mediapipe/calculators/util/BUILD
@@ -983,3 +983,31 @@ cc_test(
         "//mediapipe/framework/port:parse_text_proto",
     ],
 )
+
+cc_library(
+    name = "detections_to_timed_box_list_calculator",
+    srcs = ["detections_to_timed_box_list_calculator.cc"],
+    visibility = ["//visibility:public"],
+    deps = [
+        "//mediapipe/framework:calculator_framework",
+        "//mediapipe/framework/formats:detection_cc_proto",
+        "//mediapipe/framework/formats:location_data_cc_proto",
+        "//mediapipe/framework/port:ret_check",
+        "//mediapipe/framework/port:status",
+        "//mediapipe/util/tracking:box_tracker",
+    ],
+    alwayslink = 1,
+)
+
+cc_library(
+    name = "detection_unique_id_calculator",
+    srcs = ["detection_unique_id_calculator.cc"],
+    visibility = ["//visibility:public"],
+    deps = [
+        "//mediapipe/framework:calculator_framework",
+        "//mediapipe/framework/formats:detection_cc_proto",
+        "//mediapipe/framework/port:ret_check",
+        "//mediapipe/framework/port:status",
+    ],
+    alwayslink = 1,
+)
diff --git a/mediapipe/calculators/util/detection_unique_id_calculator.cc b/mediapipe/calculators/util/detection_unique_id_calculator.cc
new file mode 100644
index 000000000..2069f1677
--- /dev/null
+++ b/mediapipe/calculators/util/detection_unique_id_calculator.cc
@@ -0,0 +1,110 @@
+// Copyright 2019 The MediaPipe Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "mediapipe/framework/calculator_framework.h"
+#include "mediapipe/framework/formats/detection.pb.h"
+#include "mediapipe/framework/port/status.h"
+
+namespace mediapipe {
+
+namespace {
+
+constexpr char kDetectionsTag[] = "DETECTIONS";
+constexpr char kDetectionListTag[] = "DETECTION_LIST";
+
+// Each detection processed by DetectionUniqueIDCalculator will be assigned an
+// unique id that starts from 1. If a detection already has an ID other than 0,
+// the ID will be overwritten.
+static int64 detection_id = 0;
+
+inline int GetNextDetectionId() { return ++detection_id; }
+
+}  // namespace
+
+// Assign a unique id to detections.
+// Note that the calculator will consume the input vector of Detection or
+// DetectionList. So the input stream can not be connected to other calculators.
+//
+// Example config:
+// node {
+//   calculator: "DetectionUniqueIdCalculator"
+//   input_stream: "DETECTIONS:detections"
+//   output_stream: "DETECTIONS:output_detections"
+// }
+class DetectionUniqueIdCalculator : public CalculatorBase {
+ public:
+  static ::mediapipe::Status GetContract(CalculatorContract* cc) {
+    RET_CHECK(cc->Inputs().HasTag(kDetectionListTag) ||
+              cc->Inputs().HasTag(kDetectionsTag))
+        << "None of the input streams are provided.";
+
+    if (cc->Inputs().HasTag(kDetectionListTag)) {
+      RET_CHECK(cc->Outputs().HasTag(kDetectionListTag));
+      cc->Inputs().Tag(kDetectionListTag).Set<DetectionList>();
+      cc->Outputs().Tag(kDetectionListTag).Set<DetectionList>();
+    }
+    if (cc->Inputs().HasTag(kDetectionsTag)) {
+      RET_CHECK(cc->Outputs().HasTag(kDetectionsTag));
+      cc->Inputs().Tag(kDetectionsTag).Set<std::vector<Detection>>();
+      cc->Outputs().Tag(kDetectionsTag).Set<std::vector<Detection>>();
+    }
+
+    return ::mediapipe::OkStatus();
+  }
+
+  ::mediapipe::Status Open(CalculatorContext* cc) override {
+    cc->SetOffset(::mediapipe::TimestampDiff(0));
+    return ::mediapipe::OkStatus();
+  }
+  ::mediapipe::Status Process(CalculatorContext* cc) override;
+};
+REGISTER_CALCULATOR(DetectionUniqueIdCalculator);
+
+::mediapipe::Status DetectionUniqueIdCalculator::Process(
+    CalculatorContext* cc) {
+  if (cc->Inputs().HasTag(kDetectionListTag) &&
+      !cc->Inputs().Tag(kDetectionListTag).IsEmpty()) {
+    auto result =
+        cc->Inputs().Tag(kDetectionListTag).Value().Consume<DetectionList>();
+    if (result.ok()) {
+      auto detection_list = std::move(result).ValueOrDie();
+      for (Detection& detection : *detection_list->mutable_detection()) {
+        detection.set_detection_id(GetNextDetectionId());
+      }
+      cc->Outputs()
+          .Tag(kDetectionListTag)
+          .Add(detection_list.release(), cc->InputTimestamp());
+    }
+  }
+
+  if (cc->Inputs().HasTag(kDetectionsTag) &&
+      !cc->Inputs().Tag(kDetectionsTag).IsEmpty()) {
+    auto result = cc->Inputs()
+                      .Tag(kDetectionsTag)
+                      .Value()
+                      .Consume<std::vector<Detection>>();
+    if (result.ok()) {
+      auto detections = std::move(result).ValueOrDie();
+      for (Detection& detection : *detections) {
+        detection.set_detection_id(GetNextDetectionId());
+      }
+      cc->Outputs()
+          .Tag(kDetectionsTag)
+          .Add(detections.release(), cc->InputTimestamp());
+    }
+  }
+  return ::mediapipe::OkStatus();
+}
+
+}  // namespace mediapipe
diff --git a/mediapipe/calculators/util/detections_to_render_data_calculator.cc b/mediapipe/calculators/util/detections_to_render_data_calculator.cc
index 098334ec7..1077f7687 100644
--- a/mediapipe/calculators/util/detections_to_render_data_calculator.cc
+++ b/mediapipe/calculators/util/detections_to_render_data_calculator.cc
@@ -39,6 +39,8 @@ constexpr char kKeypointLabel[] = "KEYPOINT";
 // The ratio of detection label font height to the height of detection bounding
 // box.
 constexpr double kLabelToBoundingBoxRatio = 0.1;
+// Perserve 2 decimal digits.
+constexpr float kNumScoreDecimalDigitsMultipler = 100;
 
 }  // namespace
 
@@ -235,18 +237,26 @@ void DetectionsToRenderDataCalculator::AddLabels(
     std::string label_str = detection.label().empty()
                                 ? absl::StrCat(detection.label_id(i))
                                 : detection.label(i);
+    const float rounded_score =
+        std::round(detection.score(i) * kNumScoreDecimalDigitsMultipler) /
+        kNumScoreDecimalDigitsMultipler;
     std::string label_and_score =
-        absl::StrCat(label_str, options.text_delimiter(), detection.score(i),
+        absl::StrCat(label_str, options.text_delimiter(), rounded_score,
                      options.text_delimiter());
     label_and_scores.push_back(label_and_score);
   }
   std::vector<std::string> labels;
+  if (options.render_detection_id()) {
+    const std::string detection_id_str =
+        absl::StrCat("Id: ", detection.detection_id());
+    labels.push_back(detection_id_str);
+  }
   if (options.one_label_per_line()) {
-    labels.swap(label_and_scores);
+    labels.insert(labels.end(), label_and_scores.begin(),
+                  label_and_scores.end());
   } else {
     labels.push_back(absl::StrJoin(label_and_scores, ""));
   }
-
   // Add the render annotations for "label(_id),score".
   for (int i = 0; i < labels.size(); ++i) {
     auto label = labels.at(i);
diff --git a/mediapipe/calculators/util/detections_to_render_data_calculator.proto b/mediapipe/calculators/util/detections_to_render_data_calculator.proto
index 245f48fea..acc847dcd 100644
--- a/mediapipe/calculators/util/detections_to_render_data_calculator.proto
+++ b/mediapipe/calculators/util/detections_to_render_data_calculator.proto
@@ -53,4 +53,7 @@ message DetectionsToRenderDataCalculatorOptions {
   // instances of this calculator are present in the graph, this value
   // should be unique among them.
   optional string scene_class = 7 [default = "DETECTION"];
+
+  // If true, renders the detection id in the first line before the labels.
+  optional bool render_detection_id = 8 [default = false];
 }
diff --git a/mediapipe/calculators/util/detections_to_timed_box_list_calculator.cc b/mediapipe/calculators/util/detections_to_timed_box_list_calculator.cc
new file mode 100644
index 000000000..b0a177e58
--- /dev/null
+++ b/mediapipe/calculators/util/detections_to_timed_box_list_calculator.cc
@@ -0,0 +1,110 @@
+// Copyright 2019 The MediaPipe Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "mediapipe/framework/calculator_framework.h"
+#include "mediapipe/framework/formats/detection.pb.h"
+#include "mediapipe/framework/formats/location_data.pb.h"
+#include "mediapipe/framework/port/status.h"
+#include "mediapipe/util/tracking/box_tracker.h"
+
+namespace mediapipe {
+
+namespace {
+
+constexpr char kDetectionsTag[] = "DETECTIONS";
+constexpr char kDetectionListTag[] = "DETECTION_LIST";
+constexpr char kBoxesTag[] = "BOXES";
+
+}  // namespace
+
+// A calculator that converts Detection proto to TimedBoxList proto for
+// tracking.
+//
+// Please note that only Location Data formats of RELATIVE_BOUNDING_BOX are
+// supported.
+//
+// Example config:
+// node {
+//   calculator: "DetectionsToTimedBoxListCalculator"
+//   input_stream: "DETECTIONS:detections"
+//   output_stream: "BOXES:boxes"
+// }
+class DetectionsToTimedBoxListCalculator : public CalculatorBase {
+ public:
+  static ::mediapipe::Status GetContract(CalculatorContract* cc) {
+    RET_CHECK(cc->Inputs().HasTag(kDetectionListTag) ||
+              cc->Inputs().HasTag(kDetectionsTag))
+        << "None of the input streams are provided.";
+    if (cc->Inputs().HasTag(kDetectionListTag)) {
+      cc->Inputs().Tag(kDetectionListTag).Set<DetectionList>();
+    }
+    if (cc->Inputs().HasTag(kDetectionsTag)) {
+      cc->Inputs().Tag(kDetectionsTag).Set<std::vector<Detection>>();
+    }
+    cc->Outputs().Tag(kBoxesTag).Set<TimedBoxProtoList>();
+    return ::mediapipe::OkStatus();
+  }
+
+  ::mediapipe::Status Open(CalculatorContext* cc) override {
+    cc->SetOffset(TimestampDiff(0));
+    return ::mediapipe::OkStatus();
+  }
+  ::mediapipe::Status Process(CalculatorContext* cc) override;
+
+ private:
+  void ConvertDetectionToTimedBox(const Detection& detection,
+                                  TimedBoxProto* box, CalculatorContext* cc);
+};
+REGISTER_CALCULATOR(DetectionsToTimedBoxListCalculator);
+
+::mediapipe::Status DetectionsToTimedBoxListCalculator::Process(
+    CalculatorContext* cc) {
+  auto output_timed_box_list = absl::make_unique<TimedBoxProtoList>();
+
+  if (cc->Inputs().HasTag(kDetectionListTag)) {
+    const auto& detection_list =
+        cc->Inputs().Tag(kDetectionListTag).Get<DetectionList>();
+    for (const auto& detection : detection_list.detection()) {
+      TimedBoxProto* box = output_timed_box_list->add_box();
+      ConvertDetectionToTimedBox(detection, box, cc);
+    }
+  }
+  if (cc->Inputs().HasTag(kDetectionsTag)) {
+    const auto& detections =
+        cc->Inputs().Tag(kDetectionsTag).Get<std::vector<Detection>>();
+    for (const auto& detection : detections) {
+      TimedBoxProto* box = output_timed_box_list->add_box();
+      ConvertDetectionToTimedBox(detection, box, cc);
+    }
+  }
+
+  cc->Outputs().Tag(kBoxesTag).Add(output_timed_box_list.release(),
+                                   cc->InputTimestamp());
+  return ::mediapipe::OkStatus();
+}
+
+void DetectionsToTimedBoxListCalculator::ConvertDetectionToTimedBox(
+    const Detection& detection, TimedBoxProto* box, CalculatorContext* cc) {
+  const auto& relative_bounding_box =
+      detection.location_data().relative_bounding_box();
+  box->set_left(relative_bounding_box.xmin());
+  box->set_right(relative_bounding_box.xmin() + relative_bounding_box.width());
+  box->set_top(relative_bounding_box.ymin());
+  box->set_bottom(relative_bounding_box.ymin() +
+                  relative_bounding_box.height());
+  box->set_id(detection.detection_id());
+  box->set_time_msec(cc->InputTimestamp().Microseconds() / 1000);
+}
+
+}  // namespace mediapipe
diff --git a/mediapipe/calculators/video/BUILD b/mediapipe/calculators/video/BUILD
index d0f93a104..b82743d79 100644
--- a/mediapipe/calculators/video/BUILD
+++ b/mediapipe/calculators/video/BUILD
@@ -37,6 +37,67 @@ proto_library(
     deps = ["//mediapipe/framework:calculator_proto"],
 )
 
+proto_library(
+    name = "motion_analysis_calculator_proto",
+    srcs = ["motion_analysis_calculator.proto"],
+    deps = [
+        "//mediapipe/framework:calculator_proto",
+        "//mediapipe/util/tracking:motion_analysis_proto",
+    ],
+)
+
+proto_library(
+    name = "flow_packager_calculator_proto",
+    srcs = ["flow_packager_calculator.proto"],
+    deps = [
+        "//mediapipe/framework:calculator_proto",
+        "//mediapipe/util/tracking:flow_packager_proto",
+    ],
+)
+
+proto_library(
+    name = "box_tracker_calculator_proto",
+    srcs = ["box_tracker_calculator.proto"],
+    visibility = ["//visibility:public"],
+    deps = [
+        "//mediapipe/framework:calculator_proto",
+        "//mediapipe/util/tracking:box_tracker_proto",
+    ],
+)
+
+mediapipe_cc_proto_library(
+    name = "motion_analysis_calculator_cc_proto",
+    srcs = ["motion_analysis_calculator.proto"],
+    cc_deps = [
+        "//mediapipe/framework:calculator_cc_proto",
+        "//mediapipe/util/tracking:motion_analysis_cc_proto",
+    ],
+    visibility = ["//visibility:public"],
+    deps = [":motion_analysis_calculator_proto"],
+)
+
+mediapipe_cc_proto_library(
+    name = "flow_packager_calculator_cc_proto",
+    srcs = ["flow_packager_calculator.proto"],
+    cc_deps = [
+        "//mediapipe/framework:calculator_cc_proto",
+        "//mediapipe/util/tracking:flow_packager_cc_proto",
+    ],
+    visibility = ["//visibility:public"],
+    deps = [":flow_packager_calculator_proto"],
+)
+
+mediapipe_cc_proto_library(
+    name = "box_tracker_calculator_cc_proto",
+    srcs = ["box_tracker_calculator.proto"],
+    cc_deps = [
+        "//mediapipe/framework:calculator_cc_proto",
+        "//mediapipe/util/tracking:box_tracker_cc_proto",
+    ],
+    visibility = ["//visibility:public"],
+    deps = [":box_tracker_calculator_proto"],
+)
+
 mediapipe_cc_proto_library(
     name = "flow_to_image_calculator_cc_proto",
     srcs = ["flow_to_image_calculator.proto"],
@@ -131,6 +192,94 @@ cc_library(
     alwayslink = 1,
 )
 
+cc_library(
+    name = "motion_analysis_calculator",
+    srcs = ["motion_analysis_calculator.cc"],
+    visibility = ["//visibility:public"],
+    deps = [
+        ":motion_analysis_calculator_cc_proto",
+        "//mediapipe/framework:calculator_framework",
+        "//mediapipe/framework/formats:image_frame",
+        "//mediapipe/framework/formats:image_frame_opencv",
+        "//mediapipe/framework/formats:video_stream_header",
+        "//mediapipe/framework/port:integral_types",
+        "//mediapipe/framework/port:logging",
+        "//mediapipe/framework/port:ret_check",
+        "//mediapipe/framework/port:status",
+        "//mediapipe/util/tracking:camera_motion",
+        "//mediapipe/util/tracking:camera_motion_cc_proto",
+        "//mediapipe/util/tracking:frame_selection_cc_proto",
+        "//mediapipe/util/tracking:motion_analysis",
+        "//mediapipe/util/tracking:motion_estimation",
+        "//mediapipe/util/tracking:motion_models",
+        "//mediapipe/util/tracking:region_flow_cc_proto",
+        "@com_google_absl//absl/strings",
+    ],
+    alwayslink = 1,
+)
+
+cc_library(
+    name = "flow_packager_calculator",
+    srcs = ["flow_packager_calculator.cc"],
+    visibility = ["//visibility:public"],
+    deps = [
+        ":flow_packager_calculator_cc_proto",
+        "//mediapipe/framework:calculator_framework",
+        "//mediapipe/framework/port:integral_types",
+        "//mediapipe/framework/port:logging",
+        "//mediapipe/util/tracking:camera_motion_cc_proto",
+        "//mediapipe/util/tracking:flow_packager",
+        "//mediapipe/util/tracking:region_flow_cc_proto",
+        "@com_google_absl//absl/strings",
+        "@com_google_absl//absl/strings:str_format",
+    ],
+    alwayslink = 1,
+)
+
+cc_library(
+    name = "box_tracker_calculator",
+    srcs = ["box_tracker_calculator.cc"],
+    visibility = ["//visibility:public"],
+    deps = [
+        ":box_tracker_calculator_cc_proto",
+        "//mediapipe/framework:calculator_framework",
+        "//mediapipe/framework/formats:image_frame",
+        "//mediapipe/framework/formats:image_frame_opencv",
+        "//mediapipe/framework/formats:video_stream_header",  # fixdeps: keep -- required for exobazel build.
+        "//mediapipe/framework/port:integral_types",
+        "//mediapipe/framework/port:logging",
+        "//mediapipe/framework/port:parse_text_proto",
+        "//mediapipe/framework/port:ret_check",
+        "//mediapipe/framework/port:status",
+        "//mediapipe/framework/tool:options_util",
+        "//mediapipe/util/tracking",
+        "//mediapipe/util/tracking:box_tracker",
+        "//mediapipe/util/tracking:tracking_visualization_utilities",
+        "@com_google_absl//absl/strings",
+    ],
+    alwayslink = 1,
+)
+
+cc_library(
+    name = "tracked_detection_manager_calculator",
+    srcs = ["tracked_detection_manager_calculator.cc"],
+    visibility = ["//visibility:public"],
+    deps = [
+        "//mediapipe/framework:calculator_framework",
+        "//mediapipe/framework/formats:detection_cc_proto",
+        "//mediapipe/framework/formats:location_data_cc_proto",
+        "//mediapipe/framework/formats:rect_cc_proto",
+        "//mediapipe/framework/port:status",
+        "//mediapipe/util/tracking",
+        "//mediapipe/util/tracking:box_tracker",
+        "//mediapipe/util/tracking:tracked_detection",
+        "//mediapipe/util/tracking:tracked_detection_manager",
+        "//mediapipe/util/tracking:tracking_visualization_utilities",
+        "@com_google_absl//absl/container:node_hash_map",
+    ],
+    alwayslink = 1,
+)
+
 filegroup(
     name = "test_videos",
     srcs = [
@@ -201,3 +350,64 @@ cc_test(
         "//mediapipe/framework/port:parse_text_proto",
     ],
 )
+
+MEDIAPIPE_DEPS = [
+    "//mediapipe/calculators/video:box_tracker_calculator",
+    "//mediapipe/calculators/video:flow_packager_calculator",
+    "//mediapipe/calculators/video:motion_analysis_calculator",
+    "//mediapipe/framework/stream_handler:fixed_size_input_stream_handler",
+    "//mediapipe/framework/stream_handler:sync_set_input_stream_handler",
+]
+
+mediapipe_binary_graph(
+    name = "parallel_tracker_binarypb",
+    graph = "testdata/parallel_tracker_graph.pbtxt",
+    output_name = "testdata/parallel_tracker.binarypb",
+    visibility = ["//visibility:public"],
+    deps = MEDIAPIPE_DEPS,
+)
+
+mediapipe_binary_graph(
+    name = "tracker_binarypb",
+    graph = "testdata/tracker_graph.pbtxt",
+    output_name = "testdata/tracker.binarypb",
+    visibility = ["//visibility:public"],
+    deps = MEDIAPIPE_DEPS,
+)
+
+cc_test(
+    name = "tracking_graph_test",
+    size = "small",
+    srcs = ["tracking_graph_test.cc"],
+    copts = ["-DPARALLEL_INVOKER_ACTIVE"] + select({
+        "//mediapipe:apple": [],
+        "//mediapipe:android": [],
+        "//conditions:default": [],
+    }),
+    data = [
+        ":testdata/lenna.png",
+        ":testdata/parallel_tracker.binarypb",
+        ":testdata/tracker.binarypb",
+    ],
+    deps = [
+        ":box_tracker_calculator",
+        ":box_tracker_calculator_cc_proto",
+        ":flow_packager_calculator",
+        ":motion_analysis_calculator",
+        "//mediapipe/framework:calculator_cc_proto",
+        "//mediapipe/framework:calculator_framework",
+        "//mediapipe/framework:packet",
+        "//mediapipe/framework/deps:file_path",
+        "//mediapipe/framework/formats:image_frame",
+        "//mediapipe/framework/port:advanced_proto",
+        "//mediapipe/framework/port:core_proto",
+        "//mediapipe/framework/port:file_helpers",
+        "//mediapipe/framework/port:gtest_main",
+        "//mediapipe/framework/port:opencv_highgui",
+        "//mediapipe/framework/port:status",
+        "//mediapipe/framework/stream_handler:fixed_size_input_stream_handler",
+        "//mediapipe/framework/stream_handler:sync_set_input_stream_handler",
+        "//mediapipe/util/tracking:box_tracker_cc_proto",
+        "//mediapipe/util/tracking:tracking_cc_proto",
+    ],
+)
diff --git a/mediapipe/calculators/video/box_tracker_calculator.cc b/mediapipe/calculators/video/box_tracker_calculator.cc
new file mode 100644
index 000000000..9963d864a
--- /dev/null
+++ b/mediapipe/calculators/video/box_tracker_calculator.cc
@@ -0,0 +1,1271 @@
+// Copyright 2019 The MediaPipe Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <stdio.h>
+
+#include <memory>
+#include <unordered_map>
+#include <unordered_set>
+
+#include "absl/strings/numbers.h"
+#include "mediapipe/calculators/video/box_tracker_calculator.pb.h"
+#include "mediapipe/framework/calculator_framework.h"
+#include "mediapipe/framework/formats/image_frame.h"
+#include "mediapipe/framework/formats/image_frame_opencv.h"
+#include "mediapipe/framework/formats/video_stream_header.h"
+#include "mediapipe/framework/port/integral_types.h"
+#include "mediapipe/framework/port/logging.h"
+#include "mediapipe/framework/port/parse_text_proto.h"
+#include "mediapipe/framework/port/ret_check.h"
+#include "mediapipe/framework/port/status.h"
+#include "mediapipe/framework/tool/options_util.h"
+#include "mediapipe/util/tracking/box_tracker.h"
+#include "mediapipe/util/tracking/tracking.h"
+#include "mediapipe/util/tracking/tracking_visualization_utilities.h"
+
+namespace mediapipe {
+
+const char kOptionsTag[] = "OPTIONS";
+
+// A calculator to track box positions over time.
+// This calculator works in two modes:
+// a) Streaming mode, forward tracking only uses per frame TRACKING TrackingData
+//    supplied by tracking. For faster processing use TRACK_TIME to explicitly
+//    request tracking results at higher FPS than supplied by TRACKING.
+// b) Batch mode: Tracks from tracking chunk files as specified by CACHE_DIR
+//    side packet (forward and backward with multiple key framing support).
+//    NOTE: When using batch mode you might need some external logic
+//    to clear out the caching directory between runs / files; or otherwise
+//    stale chunk files might be used.
+//
+// Initial positions can be either supplied via calculator options or
+// INITIAL_POS (not supported on mobile) side packet, but not both.
+
+// Input stream:
+//   TRACKING: Input tracking data (proto TrackingData, required if CACHE_DIR
+//             is not defined)
+//   TRACK_TIME: Timestamps that tracking results should be generated at.
+//               Optional. Results generated at a TRACK_TIME w/o corresponding
+//               TRACKING packet will be queued up and returned when the next
+//               TRACKING input is observed. For those packets also no
+//               visualization output will be generated.
+//               Can be Packet of any type.
+//   VIDEO:    Optional input video stream tracked boxes are rendered over.
+//   START:    Optional input stream with PreStream packet to begin processing.
+//             Typical use case: When used in batch mode have
+//             FlowPackagerCalculator emit a COMPLETE packet to indicate caching
+//             is completed.
+//  START_POS: Optional initial positions to be tracked as TimedBoxProtoList.
+//             Timestamp of the box is used, so box timestamp does not have to
+//             be monotonic. Assign monotonic increasing timestamps for
+//             START_POS, e.g. 1,2,3 per request.
+//             Supplied starting positions are 'fast forwarded', i.e. quickly
+//             tracked towards current track head, i.e. last received
+//             TrackingData and added to current set of tracked boxes.
+//             This is recommended to be used with SyncSetInputStreamHandler.
+//  START_POS_PROTO_STRING: Same as START_POS, but is in the form of serialized
+//             protobuffer std::string. When both START_POS and
+//             START_POS_PROTO_STRING are present, START_POS is used. Suggest
+//             to specify only one of them.
+//   RESTART_POS: Same as START_POS, but exclusively for receiving detection
+//             results from reacquisition.
+//   CANCEL_OBJECT_ID: Optional id of box to be removed. This is recommended
+//             to be used with SyncSetInputStreamHandler.
+//   RA_TRACK: Performs random access tracking within the specified
+//             tracking cache, which is specified in the options of this
+//             calculator BoxTrackerCalculatorOptions. Input is of type
+//             TimedBoxProtoList.
+//             Assumed to be supplied as pair
+//             [start0, stop0, start1, stop1, ...] of boxes,
+//             (that is list size() % 2 == 0), where position, id and time
+//             is used for start, and only time for stop; that is position
+//             is ignored.
+//             Assign monotonically increasing packet timestamps for RA_TRACK,
+//             e.g. 1,2,3; however the timestamp in TimedBoxProtoList
+//             can be in arbitrary order.
+//             Use with SyncSetInputStreamHandler in streaming mode only.
+//   RA_TRACK_PROTO_STRING: Same as RA_TRACK, but is in the form of serialized
+//             protobuffer std::string. When both RA_TRACK and
+//             RA_TRACK_PROTO_STRING are present, RA_TRACK is used. Suggest
+//             to specify only one of them.
+//
+// Output streams:
+//   VIZ:   Optional output video stream with rendered box positions
+//          (requires VIDEO to be present)
+//   BOXES: Optional output stream of type TimedBoxProtoList for each
+//          initialized result.
+//   RA_BOXES: Optional output stream of type TimedBoxProtoList for each
+//             request in RA_TRACK. Same timestamp as request is used.
+//
+// Input side packets:
+//   INITIAL_POS: Optional initial positions to be tracked as text format proto
+//                of type TimedBoxProtoList. Can not be combined with initial
+//                position option. NOT SUPPORTED ON MOBILE.
+//   CACHE_DIR:   Optional caching directory tracking chunk files are read
+//                from.
+//
+//
+class BoxTrackerCalculator : public CalculatorBase {
+ public:
+  ~BoxTrackerCalculator() override = default;
+
+  static ::mediapipe::Status GetContract(CalculatorContract* cc);
+
+  ::mediapipe::Status Open(CalculatorContext* cc) override;
+  ::mediapipe::Status Process(CalculatorContext* cc) override;
+
+ protected:
+  void RenderStates(const std::vector<MotionBoxState>& states, cv::Mat* mat);
+  void RenderInternalStates(const std::vector<MotionBoxState>& states,
+                            cv::Mat* mat);
+
+  // MotionBox and corresponding PathSegment of results; used in streaming mode.
+  struct MotionBoxPath {
+    MotionBoxPath(MotionBox&& box_, PathSegment&& path_, bool reacq_ = false)
+        : box(std::move(box_)), path(std::move(path_)), reacquisition(reacq_) {}
+
+    MotionBoxPath() = default;
+
+    // Trims the state for forward/backward tracking.
+    void Trim(const int cache_size, const bool forward) {
+      if (forward) {
+        // Trims the box's states queue.
+        box.TrimFront(cache_size);
+        // Trims the path queue.
+        int trim_count = path.size() - cache_size;
+        while (trim_count-- > 0) {
+          path.pop_front();
+        }
+      } else {  // backward
+        // Trims the box's states queue.
+        box.TrimBack(cache_size);
+        // Trims the path queue.
+        int trim_count = path.size() - cache_size;
+        while (trim_count-- > 0) {
+          path.pop_back();
+        }
+      }
+    }
+
+    MotionBox box;
+    PathSegment path;
+    bool reacquisition;
+  };
+
+  // MotionBoxPath per unique id that we are tracking.
+  typedef std::unordered_map<int, MotionBoxPath> MotionBoxMap;
+
+  // Performs tracking of all MotionBoxes in box_map by one frame forward or
+  // backward to or from data_frame_num using passed TrackingData.
+  // Specify destination timestamp and frame duration TrackingData was
+  // computed for. Used in streaming mode.
+  // Returns list of ids that failed.
+  void StreamTrack(const TrackingData& data, int data_frame_num,
+                   int64 dst_timestamp_ms, int64 duration_ms, bool forward,
+                   MotionBoxMap* box_map, std::vector<int>* failed_ids);
+
+  // Fast forwards specified boxes from starting position to current play head
+  // and outputs successful boxes to box_map.
+  // Specify the timestamp boxes are tracked from via timestamp in each
+  // TimedBox.
+  void FastForwardStartPos(const TimedBoxProtoList& start_pos_list,
+                           MotionBoxMap* box_map);
+
+  // Performs random access tracking from box_list (start,stop) tuples and
+  // outputs results.
+  void OutputRandomAccessTrack(const TimedBoxProtoList& box_list,
+                               CalculatorContext* cc);
+
+ private:
+  BoxTrackerCalculatorOptions options_;
+
+  TimedBoxProtoList initial_pos_;
+
+  // Keeps tracks boxes that have already been initialized.
+  std::unordered_set<int> initialized_ids_;
+
+  // Non empty for batch mode tracking.
+  std::string cache_dir_;
+  // Ids to be tracked in batch_mode.
+  std::unordered_set<int> batch_track_ids_;
+
+  int frame_num_ = 0;
+
+  // Boxes that are tracked in streaming mode.
+  MotionBoxMap streaming_motion_boxes_;
+
+  std::unordered_map<int, std::pair<TimedBox, TimedBox>> last_tracked_boxes_;
+  int frame_num_since_reset_ = 0;
+
+  // Cache used during streaming mode for fast forward tracking.
+  std::deque<std::pair<Timestamp, TrackingData>> tracking_data_cache_;
+
+  // Indicator to track if box_tracker_ has started tracking.
+  bool tracking_issued_ = false;
+  std::unique_ptr<BoxTracker> box_tracker_;
+
+  // If set, renders tracking data into VIZ stream.
+  bool visualize_tracking_data_ = false;
+
+  // If set, renders the box state and internal box state into VIZ stream.
+  bool visualize_state_ = false;
+  bool visualize_internal_state_ = false;
+
+  // Timestamps for every tracking data input frame.
+  std::deque<Timestamp> track_timestamps_;
+
+  // For pruning track_timestamps_ queue.
+  static const int kTrackTimestampsMinQueueSize;
+
+  // The long-running index of the head of track_timestamps_.
+  int track_timestamps_base_index_ = 0;
+
+  // For pruning MotionBoxPath's state/path queues.
+  static const int kMotionBoxPathMinQueueSize;
+
+  // Queued track time requests.
+  std::vector<Timestamp> queued_track_requests_;
+
+  // Add smooth transition between re-acquisition and previous tracked boxes.
+  // `result_box` is the tracking result of one specific timestamp. The smoothed
+  // result will be updated in place.
+  // `subframe_alpha` is from 0 to 1 (0, 1 repressents previous and current
+  // frame with TRACKING_DATA). Any frames with TRACK_TIME should interpolate in
+  // between.
+  void AddSmoothTransitionToOutputBox(int box_id, TimedBox* result_box,
+                                      float subframe_alpha = 1.0f);
+
+  std::deque<Timestamp>::iterator GetRandomAccessTimestampPos(
+      const TimedBoxProto& start, bool forward_track);
+
+  std::deque<std::pair<Timestamp, TrackingData>>::iterator
+  GetRandomAccessStartData(
+      const std::deque<Timestamp>::iterator& timestamp_pos);
+
+  MotionBoxMap PrepareRandomAccessTrack(
+      const TimedBoxProto& start, int init_frame, bool forward_track,
+      const std::deque<std::pair<Timestamp, TrackingData>>::iterator&
+          start_data);
+
+  bool RunForwardTrack(
+      const std::deque<std::pair<Timestamp, TrackingData>>::iterator&
+          start_data,
+      int init_frame, MotionBoxMap* single_map, int64 end_time_msec);
+
+  bool RunBackwardTrack(
+      const std::deque<std::pair<Timestamp, TrackingData>>::iterator&
+          start_data,
+      int init_frame, MotionBoxMap* single_map, int64 end_time_msec);
+
+  void ObtainResultOfRandomAccessTrack(
+      const MotionBoxMap& single_map, const TimedBoxProto& start,
+      int64 end_time_msec,
+      const std::unique_ptr<TimedBoxProtoList>& result_list);
+};
+
+REGISTER_CALCULATOR(BoxTrackerCalculator);
+
+// At least 2 timestamps need to be present in track_timestamps_ or streaming
+// logic's duration calculation will break.
+const int BoxTrackerCalculator::kTrackTimestampsMinQueueSize = 2;
+
+// At least 2: the newly added state, and one from the history.
+const int BoxTrackerCalculator::kMotionBoxPathMinQueueSize = 2;
+
+namespace {
+
+// Convert box position according to rotation angle in degrees.
+void ConvertCoordinateForRotation(float in_top, float in_left, float in_bottom,
+                                  float in_right, int rotation, float* out_top,
+                                  float* out_left, float* out_bottom,
+                                  float* out_right) {
+  CHECK(out_top != nullptr);
+  CHECK(out_left != nullptr);
+  CHECK(out_bottom != nullptr);
+  CHECK(out_right != nullptr);
+  const float in_center_x = (in_left + in_right) * 0.5f;
+  const float in_center_y = (in_top + in_bottom) * 0.5f;
+  const float in_width = in_right - in_left;
+  const float in_height = in_bottom - in_top;
+  CHECK_GT(in_width, 0);
+  CHECK_GT(in_height, 0);
+  float out_center_x;
+  float out_center_y;
+  float out_width;
+  float out_height;
+  switch (rotation) {
+    case 0:
+      out_center_x = in_center_x;
+      out_center_y = in_center_y;
+      out_width = in_width;
+      out_height = in_height;
+      break;
+    case -270:  // FALL_THROUGH_INTENDED
+    case 90:
+      out_center_x = 1 - in_center_y;
+      out_center_y = in_center_x;
+      out_width = in_height;
+      out_height = in_width;
+      break;
+    case -180:  // FALL_THROUGH_INTENDED
+    case 180:
+      out_center_x = 1 - in_center_x;
+      out_center_y = 1 - in_center_y;
+      out_width = in_width;
+      out_height = in_height;
+      break;
+    case -90:  // FALL_THROUGH_INTENDED
+    case 270:
+      out_center_x = in_center_y;
+      out_center_y = 1 - in_center_x;
+      out_width = in_height;
+      out_height = in_width;
+      break;
+    default:
+      LOG(ERROR) << "invalid rotation " << rotation;
+      out_center_x = in_center_x;
+      out_center_y = in_center_y;
+      out_width = in_width;
+      out_height = in_height;
+      break;
+  }
+  *out_top = out_center_y - out_height * 0.5f;
+  *out_left = out_center_x - out_width * 0.5f;
+  *out_bottom = out_center_y + out_height * 0.5f;
+  *out_right = out_center_x + out_width * 0.5f;
+}
+
+void AddStateToPath(const MotionBoxState& state, int64 time_msec,
+                    PathSegment* path) {
+  CHECK(path);
+  TimedBox result;
+  TimedBoxFromMotionBoxState(state, &result);
+  result.time_msec = time_msec;
+
+  const auto insert_pos = std::lower_bound(path->begin(), path->end(), result);
+  // Do not duplicate box positions.
+  if (insert_pos == path->end() || insert_pos->time_msec != time_msec) {
+    path->insert(insert_pos,
+                 InternalTimedBox(result, new MotionBoxState(state)));
+  } else {
+    LOG(ERROR) << "Box at time " << time_msec << " already present; ignoring";
+  }
+}
+
+}  // namespace.
+
+::mediapipe::Status BoxTrackerCalculator::GetContract(CalculatorContract* cc) {
+  if (cc->Inputs().HasTag("TRACKING")) {
+    cc->Inputs().Tag("TRACKING").Set<TrackingData>();
+  }
+
+  if (cc->Inputs().HasTag("TRACK_TIME")) {
+    RET_CHECK(cc->Inputs().HasTag("TRACKING"))
+        << "TRACK_TIME needs TRACKING input";
+    cc->Inputs().Tag("TRACK_TIME").SetAny();
+  }
+
+  if (cc->Inputs().HasTag("VIDEO")) {
+    cc->Inputs().Tag("VIDEO").Set<ImageFrame>();
+  }
+
+  if (cc->Inputs().HasTag("START")) {
+    // Actual packet content does not matter.
+    cc->Inputs().Tag("START").SetAny();
+  }
+
+  if (cc->Inputs().HasTag("START_POS")) {
+    cc->Inputs().Tag("START_POS").Set<TimedBoxProtoList>();
+  }
+
+  if (cc->Inputs().HasTag("START_POS_PROTO_STRING")) {
+    cc->Inputs().Tag("START_POS_PROTO_STRING").Set<std::string>();
+  }
+
+  if (cc->Inputs().HasTag("RESTART_POS")) {
+    cc->Inputs().Tag("RESTART_POS").Set<TimedBoxProtoList>();
+  }
+
+  if (cc->Inputs().HasTag("CANCEL_OBJECT_ID")) {
+    cc->Inputs().Tag("CANCEL_OBJECT_ID").Set<int>();
+  }
+
+  if (cc->Inputs().HasTag("RA_TRACK")) {
+    cc->Inputs().Tag("RA_TRACK").Set<TimedBoxProtoList>();
+  }
+
+  if (cc->Inputs().HasTag("RA_TRACK_PROTO_STRING")) {
+    cc->Inputs().Tag("RA_TRACK_PROTO_STRING").Set<std::string>();
+  }
+
+  if (cc->Outputs().HasTag("VIZ")) {
+    RET_CHECK(cc->Inputs().HasTag("VIDEO"))
+        << "Output stream VIZ requires VIDEO to be present.";
+    cc->Outputs().Tag("VIZ").Set<ImageFrame>();
+  }
+
+  if (cc->Outputs().HasTag("BOXES")) {
+    cc->Outputs().Tag("BOXES").Set<TimedBoxProtoList>();
+  }
+
+  if (cc->Outputs().HasTag("RA_BOXES")) {
+    cc->Outputs().Tag("RA_BOXES").Set<TimedBoxProtoList>();
+  }
+
+#if defined(__ANDROID__) || defined(__APPLE__) || defined(__EMSCRIPTEN__)
+  RET_CHECK(!cc->InputSidePackets().HasTag("INITIAL_POS"))
+      << "Unsupported on mobile";
+#else
+  if (cc->InputSidePackets().HasTag("INITIAL_POS")) {
+    cc->InputSidePackets().Tag("INITIAL_POS").Set<std::string>();
+  }
+#endif  // defined(__ANDROID__) || defined(__APPLE__) || defined(__EMSCRIPTEN__)
+
+  if (cc->InputSidePackets().HasTag("CACHE_DIR")) {
+    cc->InputSidePackets().Tag("CACHE_DIR").Set<std::string>();
+  }
+
+  RET_CHECK(cc->Inputs().HasTag("TRACKING") !=
+            cc->InputSidePackets().HasTag("CACHE_DIR"))
+      << "Either TRACKING or CACHE_DIR needs to be specified.";
+
+  if (cc->InputSidePackets().HasTag(kOptionsTag)) {
+    cc->InputSidePackets().Tag(kOptionsTag).Set<CalculatorOptions>();
+  }
+
+  return ::mediapipe::OkStatus();
+}
+
+::mediapipe::Status BoxTrackerCalculator::Open(CalculatorContext* cc) {
+  options_ = tool::RetrieveOptions(cc->Options<BoxTrackerCalculatorOptions>(),
+                                   cc->InputSidePackets(), kOptionsTag);
+
+  RET_CHECK(!cc->InputSidePackets().HasTag("INITIAL_POS") ||
+            !options_.has_initial_position())
+      << "Can not specify initial position as side packet and via options";
+
+  if (options_.has_initial_position()) {
+    initial_pos_ = options_.initial_position();
+  }
+
+#if !defined(__ANDROID__) && !defined(__APPLE__) && !defined(__EMSCRIPTEN__)
+  if (cc->InputSidePackets().HasTag("INITIAL_POS")) {
+    LOG(INFO) << "Parsing: "
+              << cc->InputSidePackets().Tag("INITIAL_POS").Get<std::string>();
+    initial_pos_ = ParseTextProtoOrDie<TimedBoxProtoList>(
+        cc->InputSidePackets().Tag("INITIAL_POS").Get<std::string>());
+  }
+#endif  // !defined(__ANDROID__) && !defined(__APPLE__) &&
+        // !defined(__EMSCRIPTEN__)
+
+  // Compile list of ids to be tracked.
+  for (const auto& pos : initial_pos_.box()) {
+    RET_CHECK(pos.id() >= 0) << "Requires id to be set";
+    batch_track_ids_.insert(pos.id());
+  }
+
+  visualize_tracking_data_ =
+      options_.visualize_tracking_data() && cc->Outputs().HasTag("VIZ");
+  visualize_state_ = options_.visualize_state() && cc->Outputs().HasTag("VIZ");
+  visualize_internal_state_ =
+      options_.visualize_internal_state() && cc->Outputs().HasTag("VIZ");
+
+  // Force recording of internal state for rendering.
+  if (visualize_internal_state_) {
+    options_.mutable_tracker_options()
+        ->mutable_track_step_options()
+        ->set_return_internal_state(true);
+  }
+
+  if (visualize_state_ || visualize_internal_state_) {
+    options_.mutable_tracker_options()->set_record_path_states(true);
+  }
+
+  if (cc->InputSidePackets().HasTag("CACHE_DIR")) {
+    cache_dir_ = cc->InputSidePackets().Tag("CACHE_DIR").Get<std::string>();
+    RET_CHECK(!cache_dir_.empty());
+    box_tracker_.reset(new BoxTracker(cache_dir_, options_.tracker_options()));
+  } else {
+    // Check that all boxes have a unique id.
+    RET_CHECK(initial_pos_.box_size() == batch_track_ids_.size())
+        << "In streaming mode every box must be given its unique id";
+  }
+
+  if (options_.streaming_track_data_cache_size() > 0) {
+    RET_CHECK(!cc->InputSidePackets().HasTag("CACHE_DIR"))
+        << "Streaming mode not compatible with cache dir.";
+  }
+
+  return ::mediapipe::OkStatus();
+}
+
+::mediapipe::Status BoxTrackerCalculator::Process(CalculatorContext* cc) {
+  // Batch mode, issue tracking requests.
+  if (box_tracker_ && !tracking_issued_) {
+    for (const auto& pos : initial_pos_.box()) {
+      box_tracker_->NewBoxTrack(TimedBox::FromProto(pos), pos.id());
+    }
+    tracking_issued_ = true;
+  }
+
+  const Timestamp& timestamp = cc->InputTimestamp();
+  if (timestamp == Timestamp::PreStream()) {
+    // Indicator packet.
+    return ::mediapipe::OkStatus();
+  }
+
+  InputStream* track_stream = cc->Inputs().HasTag("TRACKING")
+                                  ? &(cc->Inputs().Tag("TRACKING"))
+                                  : nullptr;
+  InputStream* track_time_stream = cc->Inputs().HasTag("TRACK_TIME")
+                                       ? &(cc->Inputs().Tag("TRACK_TIME"))
+                                       : nullptr;
+
+  // Cache tracking data if possible.
+  if (track_stream && !track_stream->IsEmpty()) {
+    const TrackingData& track_data = track_stream->Get<TrackingData>();
+    const int track_cache_size = options_.streaming_track_data_cache_size();
+    if (track_cache_size > 0) {
+      tracking_data_cache_.push_back(std::make_pair(timestamp, track_data));
+      while (tracking_data_cache_.size() > track_cache_size) {
+        tracking_data_cache_.pop_front();
+      }
+    }
+    track_timestamps_.push_back(timestamp);
+    int trim_count = track_timestamps_.size() -
+                     std::max(track_cache_size, kTrackTimestampsMinQueueSize);
+    if (trim_count > 0) {
+      track_timestamps_base_index_ += trim_count;
+
+      while (trim_count-- > 0) {
+        track_timestamps_.pop_front();
+      }
+    }
+  }
+
+  InputStream* start_pos_stream = cc->Inputs().HasTag("START_POS")
+                                      ? &(cc->Inputs().Tag("START_POS"))
+                                      : nullptr;
+
+  MotionBoxMap fast_forward_boxes;
+  if (start_pos_stream && !start_pos_stream->IsEmpty()) {
+    // Try to fast forward boxes to current tracking head.
+    const TimedBoxProtoList& start_pos_list =
+        start_pos_stream->Get<TimedBoxProtoList>();
+    FastForwardStartPos(start_pos_list, &fast_forward_boxes);
+  }
+
+  InputStream* start_pos_proto_string_stream =
+      cc->Inputs().HasTag("START_POS_PROTO_STRING")
+          ? &(cc->Inputs().Tag("START_POS_PROTO_STRING"))
+          : nullptr;
+  if (start_pos_stream == nullptr || start_pos_stream->IsEmpty()) {
+    if (start_pos_proto_string_stream &&
+        !start_pos_proto_string_stream->IsEmpty()) {
+      auto start_pos_list_str =
+          start_pos_proto_string_stream->Get<std::string>();
+      TimedBoxProtoList start_pos_list;
+      start_pos_list.ParseFromString(start_pos_list_str);
+      FastForwardStartPos(start_pos_list, &fast_forward_boxes);
+    }
+  }
+
+  InputStream* restart_pos_stream = cc->Inputs().HasTag("RESTART_POS")
+                                        ? &(cc->Inputs().Tag("RESTART_POS"))
+                                        : nullptr;
+
+  if (restart_pos_stream && !restart_pos_stream->IsEmpty()) {
+    const TimedBoxProtoList& restart_pos_list =
+        restart_pos_stream->Get<TimedBoxProtoList>();
+    FastForwardStartPos(restart_pos_list, &fast_forward_boxes);
+  }
+
+  InputStream* cancel_object_id_stream =
+      cc->Inputs().HasTag("CANCEL_OBJECT_ID")
+          ? &(cc->Inputs().Tag("CANCEL_OBJECT_ID"))
+          : nullptr;
+  if (cancel_object_id_stream && !cancel_object_id_stream->IsEmpty()) {
+    const int cancel_object_id = cancel_object_id_stream->Get<int>();
+    if (streaming_motion_boxes_.erase(cancel_object_id) == 0) {
+      LOG(WARNING) << "box id " << cancel_object_id << " does not exist.";
+    }
+  }
+
+  cv::Mat input_view;
+  cv::Mat viz_view;
+  std::unique_ptr<ImageFrame> viz_frame;
+
+  TrackingData track_data_to_render;
+
+  if (cc->Outputs().HasTag("VIZ")) {
+    InputStream* video_stream = &(cc->Inputs().Tag("VIDEO"));
+    if (!video_stream->IsEmpty()) {
+      input_view = formats::MatView(&video_stream->Get<ImageFrame>());
+
+      viz_frame.reset(new ImageFrame());
+      viz_frame->CopyFrom(video_stream->Get<ImageFrame>(), 16);
+      viz_view = formats::MatView(viz_frame.get());
+    }
+  }
+
+  // Results to be output or rendered, list of TimedBox for every id that are
+  // present at this frame.
+  TimedBoxProtoList box_track_list;
+
+  CHECK(box_tracker_ || track_stream)
+      << "Expected either batch or streaming mode";
+
+  // Corresponding list of box states for rendering. For each id present at
+  // this frame stores closest 1-2 states.
+  std::vector<std::vector<MotionBoxState>> box_state_list;
+  int64 timestamp_msec = timestamp.Value() / 1000;
+
+  if (box_tracker_) {  // Batch mode.
+    // Ensure tracking has terminated.
+    box_tracker_->WaitForAllOngoingTracks();
+
+    // Cycle through ids.
+    for (int id : batch_track_ids_) {
+      TimedBox result;
+      std::vector<MotionBoxState> states;
+      std::vector<MotionBoxState>* states_ptr =
+          (visualize_state_ || visualize_internal_state_) ? &states : nullptr;
+
+      if (box_tracker_->GetTimedPosition(id, timestamp_msec, &result,
+                                         states_ptr)) {
+        TimedBoxProto proto = result.ToProto();
+        proto.set_id(id);
+        *box_track_list.add_box() = std::move(proto);
+
+        if (states_ptr) {
+          box_state_list.push_back(*states_ptr);
+        }
+      }
+    }
+
+    if (visualize_tracking_data_) {
+      constexpr int kVizId = -1;
+      box_tracker_->GetTrackingData(kVizId, timestamp_msec,
+                                    &track_data_to_render);
+    }
+  } else {
+    // Streaming mode.
+    // If track data is available advance all boxes by new data.
+    if (!track_stream->IsEmpty()) {
+      const TrackingData& track_data = track_stream->Get<TrackingData>();
+
+      if (visualize_tracking_data_) {
+        track_data_to_render = track_data;
+      }
+
+      const int64 time_ms = track_timestamps_.back().Value() / 1000;
+      const int64 duration_ms =
+          track_timestamps_.size() > 1
+              ? time_ms - track_timestamps_.rbegin()[1].Value() / 1000
+              : 0;
+
+      std::vector<int> failed_boxes;
+      StreamTrack(track_data, frame_num_, time_ms, duration_ms,
+                  true,  // forward.
+                  &streaming_motion_boxes_, &failed_boxes);
+
+      // Add fast forward boxes.
+      if (!fast_forward_boxes.empty()) {
+        for (const auto& box : fast_forward_boxes) {
+          streaming_motion_boxes_.emplace(box.first, box.second);
+        }
+        fast_forward_boxes.clear();
+      }
+
+      // Remove failed boxes.
+      for (int id : failed_boxes) {
+        streaming_motion_boxes_.erase(id);
+      }
+
+      // Init new boxes once data from previous time to current is available.
+      for (const auto& pos : initial_pos_.box()) {
+        if (timestamp_msec - pos.time_msec() >= 0 &&
+            initialized_ids_.find(pos.id()) == initialized_ids_.end()) {
+          MotionBoxState init_state;
+          MotionBoxStateFromTimedBox(TimedBox::FromProto(pos), &init_state);
+
+          InitializeInliersOutliersInMotionBoxState(track_data, &init_state);
+          InitializePnpHomographyInMotionBoxState(
+              track_data, options_.tracker_options().track_step_options(),
+              &init_state);
+
+          TrackStepOptions track_step_options =
+              options_.tracker_options().track_step_options();
+          ChangeTrackingDegreesBasedOnStartPos(pos, &track_step_options);
+          MotionBox init_box(track_step_options);
+
+          // Init at previous frame.
+          init_box.ResetAtFrame(frame_num_, init_state);
+
+          PathSegment init_path;
+          AddStateToPath(init_state, timestamp_msec, &init_path);
+
+          streaming_motion_boxes_.emplace(
+              pos.id(), MotionBoxPath(std::move(init_box), std::move(init_path),
+                                      pos.reacquisition()));
+          initialized_ids_.insert(pos.id());
+        }
+      }
+
+      ++frame_num_;
+    } else {
+      // Track stream is empty, if anything is requested on track_time_stream
+      // queue up requests.
+      if (track_time_stream && !track_time_stream->IsEmpty()) {
+        queued_track_requests_.push_back(timestamp);
+      }
+    }
+
+    // Can output be generated?
+    if (!track_stream->IsEmpty()) {
+      ++frame_num_since_reset_;
+
+      // Generate results for queued up request.
+      if (cc->Outputs().HasTag("BOXES") && !queued_track_requests_.empty()) {
+        for (int j = 0; j < queued_track_requests_.size(); ++j) {
+          const Timestamp& past_time = queued_track_requests_[j];
+          RET_CHECK(past_time.Value() < timestamp.Value())
+              << "Inconsistency, queued up requests should occur in past";
+          std::unique_ptr<TimedBoxProtoList> past_box_list(
+              new TimedBoxProtoList());
+
+          for (auto& motion_box_path : streaming_motion_boxes_) {
+            TimedBox result_box;
+            TimedBoxAtTime(motion_box_path.second.path,
+                           past_time.Value() / 1000, &result_box, nullptr);
+
+            const float subframe_alpha =
+                static_cast<float>(j + 1) / (queued_track_requests_.size() + 1);
+            AddSmoothTransitionToOutputBox(motion_box_path.first, &result_box,
+                                           subframe_alpha);
+
+            TimedBoxProto proto = result_box.ToProto();
+            proto.set_id(motion_box_path.first);
+            proto.set_reacquisition(motion_box_path.second.reacquisition);
+            *past_box_list->add_box() = std::move(proto);
+          }
+
+          // Output for every time.
+          cc->Outputs().Tag("BOXES").Add(past_box_list.release(), past_time);
+        }
+
+        queued_track_requests_.clear();
+      }
+
+      // Generate result at current frame.
+      for (auto& motion_box_path : streaming_motion_boxes_) {
+        TimedBox result_box;
+        MotionBoxState result_state;
+        TimedBoxAtTime(motion_box_path.second.path, timestamp_msec, &result_box,
+                       &result_state);
+
+        AddSmoothTransitionToOutputBox(motion_box_path.first, &result_box);
+
+        TimedBoxProto proto = result_box.ToProto();
+        proto.set_id(motion_box_path.first);
+        proto.set_reacquisition(motion_box_path.second.reacquisition);
+        *box_track_list.add_box() = std::move(proto);
+
+        if (visualize_state_ || visualize_internal_state_) {
+          box_state_list.push_back({result_state});
+        }
+      }
+    }
+    // end streaming mode case.
+  }
+
+  // Save a snapshot of latest tracking results before override with fast
+  // forwarded start pos.
+  if (!fast_forward_boxes.empty()) {
+    frame_num_since_reset_ = 0;
+    last_tracked_boxes_.clear();
+    // Add any remaining fast forward boxes. For example occurs if START_POS is
+    // specified with non-matching TRACKING mode
+    for (const auto& reset_box : fast_forward_boxes) {
+      const auto tracked_box_iter =
+          streaming_motion_boxes_.find(reset_box.first);
+      if (tracked_box_iter != streaming_motion_boxes_.end()) {
+        if (!reset_box.second.path.empty() &&
+            !tracked_box_iter->second.path.empty()) {
+          last_tracked_boxes_[reset_box.first] =
+              std::make_pair(tracked_box_iter->second.path.back(),
+                             reset_box.second.path.back());
+        }
+      }
+
+      // Override previous tracking with reset start pos.
+      streaming_motion_boxes_[reset_box.first] = reset_box.second;
+    }
+  }
+
+  if (viz_frame) {
+    if (visualize_tracking_data_) {
+      RenderTrackingData(track_data_to_render, &viz_view);
+    }
+
+    if (visualize_state_) {
+      for (const auto& state_vec : box_state_list) {
+        RenderStates(state_vec, &viz_view);
+      }
+    }
+
+    if (visualize_internal_state_) {
+      for (const auto& state_vec : box_state_list) {
+        RenderInternalStates(state_vec, &viz_view);
+      }
+    }
+
+    for (const auto& box : box_track_list.box()) {
+      RenderBox(box, &viz_view);
+    }
+  }
+
+  // Handle random access track requests.
+  InputStream* ra_track_stream = cc->Inputs().HasTag("RA_TRACK")
+                                     ? &(cc->Inputs().Tag("RA_TRACK"))
+                                     : nullptr;
+
+  if (ra_track_stream && !ra_track_stream->IsEmpty()) {
+    RET_CHECK(!box_tracker_) << "Random access only for streaming mode "
+                             << "implemented.";
+    const TimedBoxProtoList& box_list =
+        ra_track_stream->Get<TimedBoxProtoList>();
+    RET_CHECK(box_list.box_size() % 2 == 0)
+        << "Expect even number of (start,end) tuples but get "
+        << box_list.box_size();
+    OutputRandomAccessTrack(box_list, cc);
+  }
+
+  InputStream* ra_track_proto_string_stream =
+      cc->Inputs().HasTag("RA_TRACK_PROTO_STRING")
+          ? &(cc->Inputs().Tag("RA_TRACK_PROTO_STRING"))
+          : nullptr;
+  if (ra_track_stream == nullptr || ra_track_stream->IsEmpty()) {
+    if (ra_track_proto_string_stream &&
+        !ra_track_proto_string_stream->IsEmpty()) {
+      RET_CHECK(!box_tracker_) << "Random access only for streaming mode "
+                               << "implemented.";
+      auto box_list_str = ra_track_proto_string_stream->Get<std::string>();
+      TimedBoxProtoList box_list;
+      box_list.ParseFromString(box_list_str);
+      RET_CHECK(box_list.box_size() % 2 == 0)
+          << "Expect even number of (start,end) tuples but get "
+          << box_list.box_size();
+      OutputRandomAccessTrack(box_list, cc);
+    }
+  }
+
+  // Always output in batch, only output in streaming if tracking data
+  // is present (might be in fast forward mode instead).
+  if (cc->Outputs().HasTag("BOXES") &&
+      (box_tracker_ || !track_stream->IsEmpty())) {
+    std::unique_ptr<TimedBoxProtoList> boxes(new TimedBoxProtoList());
+    *boxes = std::move(box_track_list);
+    cc->Outputs().Tag("BOXES").Add(boxes.release(), timestamp);
+  }
+
+  if (viz_frame) {
+    cc->Outputs().Tag("VIZ").Add(viz_frame.release(), timestamp);
+  }
+
+  return ::mediapipe::OkStatus();
+}
+
+void BoxTrackerCalculator::AddSmoothTransitionToOutputBox(
+    int box_id, TimedBox* result_box, float subframe_alpha) {
+  if (options_.start_pos_transition_frames() > 0 &&
+      frame_num_since_reset_ <= options_.start_pos_transition_frames()) {
+    const auto& box_iter = last_tracked_boxes_.find(box_id);
+    if (box_iter != last_tracked_boxes_.end()) {
+      // We first compute the blend of last tracked box with reset box at the
+      // same timestamp as blend_start = alpha * reset_box + (1 - alpha) *
+      // last_tracked_box. Then apply the motion from current tracking to reset
+      // pos to the blended start pos as: result_box = blend_start +
+      // (current_box - reset_box) With some derivation, we can get result_box =
+      // (1 - alpha) * (last_track - reset_box) + current_box
+      TimedBox tmp_box = TimedBox::Blend(box_iter->second.first,
+                                         box_iter->second.second, 1.0, -1.0);
+      const float alpha = (frame_num_since_reset_ - 1.0f + subframe_alpha) /
+                          options_.start_pos_transition_frames();
+      *result_box = TimedBox::Blend(tmp_box, *result_box, 1.0 - alpha, 1.0);
+    }
+  }
+}
+
+void BoxTrackerCalculator::OutputRandomAccessTrack(
+    const TimedBoxProtoList& box_list, CalculatorContext* cc) {
+  std::unique_ptr<TimedBoxProtoList> result_list(new TimedBoxProtoList());
+
+  for (int i = 0; i < box_list.box_size(); i += 2) {
+    const TimedBoxProto start = box_list.box(i);
+    int64 end_time_msec = box_list.box(i + 1).time_msec();
+    const bool forward_track = start.time_msec() < end_time_msec;
+
+    if (track_timestamps_.empty()) {
+      LOG(WARNING) << "No tracking data cached yet.";
+      continue;
+    }
+
+    // Performing the range check in msec (b/138399787)
+    const int64 tracking_start_timestamp_msec =
+        track_timestamps_.front().Microseconds() / 1000;
+    const int64 tracking_end_timestamp_msec =
+        track_timestamps_.back().Microseconds() / 1000;
+    if (start.time_msec() < tracking_start_timestamp_msec) {
+      LOG(WARNING) << "Request start timestamp " << start.time_msec()
+                   << " too old. First frame in the window: "
+                   << tracking_start_timestamp_msec;
+      continue;
+    }
+    if (start.time_msec() > tracking_end_timestamp_msec) {
+      LOG(WARNING) << "Request start timestamp " << start.time_msec()
+                   << " too new. Last frame in the window: "
+                   << tracking_end_timestamp_msec;
+      continue;
+    }
+    if (end_time_msec < tracking_start_timestamp_msec) {
+      LOG(WARNING) << "Request end timestamp " << end_time_msec
+                   << " too old. First frame in the window: "
+                   << tracking_start_timestamp_msec;
+      continue;
+    }
+    if (end_time_msec > tracking_end_timestamp_msec) {
+      LOG(WARNING) << "Request end timestamp " << end_time_msec
+                   << " too new. Last frame in the window: "
+                   << tracking_end_timestamp_msec;
+      continue;
+    }
+
+    std::deque<Timestamp>::iterator timestamp_pos =
+        GetRandomAccessTimestampPos(start, forward_track);
+
+    if (timestamp_pos == track_timestamps_.end()) {
+      LOG(ERROR) << "Random access outside cached range";
+      continue;
+    }
+
+    // Locate start of tracking data.
+    std::deque<std::pair<Timestamp, TrackingData>>::iterator start_data =
+        GetRandomAccessStartData(timestamp_pos);
+
+    // TODO: Interpolate random access tracking start_data instead
+    // of dropping the request in the case of missing processed frame.
+    if (start_data == tracking_data_cache_.end()) {
+      LOG(ERROR) << "Random access starts at unprocessed frame.";
+      continue;
+    }
+
+    const int init_frame = timestamp_pos - track_timestamps_.begin() +
+                           track_timestamps_base_index_;
+    CHECK_GE(init_frame, 0);
+
+    MotionBoxMap single_map =
+        PrepareRandomAccessTrack(start, init_frame, forward_track, start_data);
+    bool track_error = forward_track
+                           ? RunForwardTrack(start_data, init_frame,
+                                             &single_map, end_time_msec)
+                           : RunBackwardTrack(start_data, init_frame,
+                                              &single_map, end_time_msec);
+
+    if (track_error) {
+      LOG(ERROR) << "Could not track box.";
+      continue;
+    }
+
+    ObtainResultOfRandomAccessTrack(single_map, start, end_time_msec,
+                                    result_list);
+  }
+
+  cc->Outputs()
+      .Tag("RA_BOXES")
+      .Add(result_list.release(), cc->InputTimestamp());
+}
+
+std::deque<Timestamp>::iterator
+BoxTrackerCalculator::GetRandomAccessTimestampPos(const TimedBoxProto& start,
+                                                  bool forward_track) {
+  std::deque<Timestamp>::iterator timestamp_pos;
+  Timestamp timestamp(start.time_msec() * 1000);
+  if (forward_track) {
+    timestamp_pos = std::upper_bound(track_timestamps_.begin(),
+                                     track_timestamps_.end(), timestamp);
+  } else {
+    timestamp_pos = std::lower_bound(track_timestamps_.begin(),
+                                     track_timestamps_.end(), timestamp);
+  }
+  return timestamp_pos;
+}
+
+std::deque<std::pair<Timestamp, TrackingData>>::iterator
+BoxTrackerCalculator::GetRandomAccessStartData(
+    const std::deque<Timestamp>::iterator& timestamp_pos) {
+  std::deque<std::pair<Timestamp, TrackingData>>::iterator start_data =
+      std::find_if(tracking_data_cache_.begin(), tracking_data_cache_.end(),
+                   [timestamp_pos](
+                       const std::pair<Timestamp, TrackingData>& item) -> bool {
+                     return item.first == *timestamp_pos;
+                   });
+  return start_data;
+}
+
+BoxTrackerCalculator::MotionBoxMap
+BoxTrackerCalculator::PrepareRandomAccessTrack(
+    const TimedBoxProto& start, int init_frame, bool forward_track,
+    const std::deque<std::pair<Timestamp, TrackingData>>::iterator&
+        start_data) {
+  MotionBoxMap single_map;
+  // Init state at request time.
+  MotionBoxState init_state;
+  MotionBoxStateFromTimedBox(TimedBox::FromProto(start), &init_state);
+
+  InitializeInliersOutliersInMotionBoxState(start_data->second, &init_state);
+  InitializePnpHomographyInMotionBoxState(
+      start_data->second, options_.tracker_options().track_step_options(),
+      &init_state);
+
+  TrackStepOptions track_step_options =
+      options_.tracker_options().track_step_options();
+  ChangeTrackingDegreesBasedOnStartPos(start, &track_step_options);
+  MotionBox init_box(track_step_options);
+  init_box.ResetAtFrame(init_frame - (forward_track ? 1 : 0), init_state);
+
+  PathSegment init_path;
+
+  // Avoid duplicating start time in case TrackingData has same value.
+  // Note: For backward tracking we always arrive at an earlier frame, so
+  // no duplication can happen, see StreamTrack for details.
+  if (start.time_msec() != start_data->first.Value() / 1000 || !forward_track) {
+    AddStateToPath(init_state, start.time_msec(), &init_path);
+  }
+
+  single_map.emplace(start.id(),
+                     MotionBoxPath(std::move(init_box), std::move(init_path)));
+  return single_map;
+}
+
+bool BoxTrackerCalculator::RunForwardTrack(
+    const std::deque<std::pair<Timestamp, TrackingData>>::iterator& start_data,
+    int init_frame, MotionBoxMap* single_map, int64 end_time_msec) {
+  int curr_frame = init_frame;
+  for (auto cache_pos = start_data; cache_pos != tracking_data_cache_.end();
+       ++cache_pos, ++curr_frame) {
+    std::vector<int> failed_box;
+    const int64 dst_time_msec = cache_pos->first.Value() / 1000;
+    const int64 curr_duration =
+        (cache_pos == tracking_data_cache_.begin())
+            ? 0
+            : (cache_pos[0].first.Value() - cache_pos[-1].first.Value()) / 1000;
+    StreamTrack(cache_pos->second, curr_frame, dst_time_msec, curr_duration,
+                true,  // forward
+                single_map, &failed_box);
+    if (!failed_box.empty()) {
+      return true;
+    }
+    if (dst_time_msec > end_time_msec) {
+      return false;
+    }
+  }
+  return false;
+}
+
+bool BoxTrackerCalculator::RunBackwardTrack(
+    const std::deque<std::pair<Timestamp, TrackingData>>::iterator& start_data,
+    int init_frame, MotionBoxMap* single_map, int64 end_time_msec) {
+  int curr_frame = init_frame;
+  for (auto cache_pos = start_data; cache_pos != tracking_data_cache_.begin();
+       --cache_pos, --curr_frame) {
+    std::vector<int> failed_box;
+    const int64 dst_time_msec = cache_pos[-1].first.Value() / 1000;
+    const int64 curr_duration =
+        (cache_pos[0].first.Value() - cache_pos[-1].first.Value()) / 1000;
+    StreamTrack(cache_pos->second, curr_frame, dst_time_msec, curr_duration,
+                false,  // backward
+                single_map, &failed_box);
+    if (!failed_box.empty()) {
+      return true;
+    }
+    if (dst_time_msec < end_time_msec) {
+      return false;
+    }
+  }
+  return false;
+}
+
+void BoxTrackerCalculator::ObtainResultOfRandomAccessTrack(
+    const MotionBoxMap& single_map, const TimedBoxProto& start,
+    int64 end_time_msec,
+    const std::unique_ptr<TimedBoxProtoList>& result_list) {
+  const MotionBoxPath& result_path = single_map.find(start.id())->second;
+  TimedBox result_box;
+  TimedBoxAtTime(result_path.path, end_time_msec, &result_box, nullptr);
+  TimedBoxProto proto = result_box.ToProto();
+  proto.set_id(start.id());
+  *result_list->add_box() = std::move(proto);
+}
+
+void BoxTrackerCalculator::RenderStates(
+    const std::vector<MotionBoxState>& states, cv::Mat* mat) {
+  for (int k = 0; k < states.size(); ++k) {
+    const bool print_stats = k == 0;
+    RenderState(states[k], print_stats, mat);
+  }
+}
+
+void BoxTrackerCalculator::RenderInternalStates(
+    const std::vector<MotionBoxState>& states, cv::Mat* mat) {
+  for (const MotionBoxState& state : states) {
+    RenderInternalState(state.internal(), mat);
+  }
+}
+
+void BoxTrackerCalculator::StreamTrack(const TrackingData& data,
+                                       int data_frame_num,
+                                       int64 dst_timestamp_ms,
+                                       int64 duration_ms, bool forward,
+                                       MotionBoxMap* box_map,
+                                       std::vector<int>* failed_ids) {
+  CHECK(box_map);
+  CHECK(failed_ids);
+
+  // Track all existing boxes by one frame.
+  MotionVectorFrame mvf;  // Holds motion from current to previous frame.
+  MotionVectorFrameFromTrackingData(data, &mvf);
+
+  if (forward) {
+    MotionVectorFrame mvf_inverted;
+    InvertMotionVectorFrame(mvf, &mvf_inverted);
+    std::swap(mvf, mvf_inverted);
+  }
+
+  if (duration_ms > 0) {
+    mvf.duration_ms = duration_ms;
+  }
+
+  const int from_frame = data_frame_num - (forward ? 1 : 0);
+  const int to_frame = forward ? from_frame + 1 : from_frame - 1;
+
+  for (auto& motion_box : *box_map) {
+    if (!motion_box.second.box.TrackStep(from_frame,  // from frame.
+                                         mvf, forward)) {
+      failed_ids->push_back(motion_box.first);
+      LOG(INFO) << "lost track. pushed failed id: " << motion_box.first;
+    } else {
+      // Store result.
+      PathSegment& path = motion_box.second.path;
+      const MotionBoxState& result_state =
+          motion_box.second.box.StateAtFrame(to_frame);
+      AddStateToPath(result_state, dst_timestamp_ms, &path);
+      // motion_box has got new tracking state/path. Now trimming it.
+      const int cache_size =
+          std::max(options_.streaming_track_data_cache_size(),
+                   kMotionBoxPathMinQueueSize);
+      motion_box.second.Trim(cache_size, forward);
+    }
+  }
+}
+
+void BoxTrackerCalculator::FastForwardStartPos(
+    const TimedBoxProtoList& start_pos_list, MotionBoxMap* box_map) {
+  for (const TimedBoxProto& start_pos : start_pos_list.box()) {
+    Timestamp timestamp(start_pos.time_msec() * 1000);
+    // Locate corresponding frame number for starting position. As TrackingData
+    // stores motion from current to last frame; we are using the data after
+    // this frame for tracking.
+    auto timestamp_pos = std::lower_bound(track_timestamps_.begin(),
+                                          track_timestamps_.end(), timestamp);
+
+    if (timestamp_pos == track_timestamps_.end()) {
+      LOG(WARNING) << "Received start pos beyond current timestamp, "
+                   << "Starting to track once frame arrives.";
+      *initial_pos_.add_box() = start_pos;
+      continue;
+    }
+
+    // Start at previous frame.
+    const int init_frame = timestamp_pos - track_timestamps_.begin() +
+                           track_timestamps_base_index_;
+    CHECK_GE(init_frame, 0);
+
+    // Locate corresponding tracking data.
+    auto start_data = std::find_if(
+        tracking_data_cache_.begin(), tracking_data_cache_.end(),
+        [timestamp_pos](const std::pair<Timestamp, TrackingData>& item)
+            -> bool { return item.first == timestamp_pos[0]; });
+
+    if (start_data == tracking_data_cache_.end()) {
+      LOG(ERROR) << "Box to fast forward outside tracking data cache. Ignoring."
+                 << " To avoid this error consider increasing the cache size.";
+      continue;
+    }
+
+    // Init state at request time.
+    MotionBoxState init_state;
+    MotionBoxStateFromTimedBox(TimedBox::FromProto(start_pos), &init_state);
+
+    InitializeInliersOutliersInMotionBoxState(start_data->second, &init_state);
+    InitializePnpHomographyInMotionBoxState(
+        start_data->second, options_.tracker_options().track_step_options(),
+        &init_state);
+
+    TrackStepOptions track_step_options =
+        options_.tracker_options().track_step_options();
+    ChangeTrackingDegreesBasedOnStartPos(start_pos, &track_step_options);
+    MotionBox init_box(track_step_options);
+    init_box.ResetAtFrame(init_frame, init_state);
+
+    int curr_frame = init_frame + 1;
+    MotionBoxMap single_map;
+    PathSegment init_path;
+    AddStateToPath(init_state, timestamp_pos[0].Value() / 1000, &init_path);
+    single_map.emplace(start_pos.id(),
+                       MotionBoxPath(std::move(init_box), std::move(init_path),
+                                     start_pos.reacquisition()));
+    bool track_error = false;
+
+    for (auto cache_pos = start_data + 1;
+         cache_pos != tracking_data_cache_.end(); ++cache_pos, ++curr_frame) {
+      std::vector<int> failed_box;
+      const int64 curr_time_msec = cache_pos->first.Value() / 1000;
+      const int64 curr_duration =
+          (cache_pos[0].first.Value() - cache_pos[-1].first.Value()) / 1000;
+      StreamTrack(cache_pos->second, curr_frame, curr_time_msec, curr_duration,
+                  true,  // forward
+                  &single_map, &failed_box);
+      if (!failed_box.empty()) {
+        LOG(WARNING) << "Unable to fast forward box at frame " << curr_frame;
+        track_error = true;
+        break;
+      }
+    }
+
+    if (!track_error) {
+      // Fast forward successful.
+      if (box_map->find(start_pos.id()) != box_map->end()) {
+        DLOG(ERROR) << "Fast forward successful, but box with same id "
+                    << "exists already.";
+      } else {
+        // Add to set of currently tracked boxes.
+        const MotionBoxPath& result = single_map.find(start_pos.id())->second;
+        box_map->emplace(start_pos.id(), result);
+      }
+    }
+  }
+}
+
+}  // namespace mediapipe
diff --git a/mediapipe/calculators/video/box_tracker_calculator.proto b/mediapipe/calculators/video/box_tracker_calculator.proto
new file mode 100644
index 000000000..200108201
--- /dev/null
+++ b/mediapipe/calculators/video/box_tracker_calculator.proto
@@ -0,0 +1,55 @@
+// Copyright 2019 The MediaPipe Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+syntax = "proto2";
+
+package mediapipe;
+
+import "mediapipe/framework/calculator.proto";
+import "mediapipe/util/tracking/box_tracker.proto";
+
+message BoxTrackerCalculatorOptions {
+  extend CalculatorOptions {
+    optional BoxTrackerCalculatorOptions ext = 268767860;
+  }
+
+  optional BoxTrackerOptions tracker_options = 1;
+
+  // Initial position to be tracked. Can also be supplied as side packet or
+  // as input stream.
+  optional TimedBoxProtoList initial_position = 2;
+
+  // If set and VIZ stream is present, renders tracking data into the
+  // visualization.
+  optional bool visualize_tracking_data = 3 [default = false];
+
+  // If set and VIZ stream is present, renders the box state
+  // into the visualization.
+  optional bool visualize_state = 4 [default = false];
+
+  // If set and VIZ stream is present, renders the internal box state
+  // into the visualization.
+  optional bool visualize_internal_state = 5 [default = false];
+
+  // Size of the track data cache during streaming mode. This allows to buffer
+  // track_data's for fast forward tracking, i.e. any TimedBox received
+  // via input stream START_POS can be tracked towards the current track head
+  // (i.e. last received TrackingData). Measured in number of frames.
+  optional int32 streaming_track_data_cache_size = 6 [default = 0];
+
+  // Add a transition period of N frames to smooth the jump from original
+  // tracking to reset start pos with motion compensation. The transition will
+  // be a linear decay of original tracking result. 0 means no transition.
+  optional int32 start_pos_transition_frames = 7 [default = 0];
+}
diff --git a/mediapipe/calculators/video/flow_packager_calculator.cc b/mediapipe/calculators/video/flow_packager_calculator.cc
new file mode 100644
index 000000000..f871433eb
--- /dev/null
+++ b/mediapipe/calculators/video/flow_packager_calculator.cc
@@ -0,0 +1,281 @@
+// Copyright 2019 The MediaPipe Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <stdio.h>
+
+#include <fstream>
+#include <memory>
+
+#include "absl/strings/str_format.h"
+#include "absl/strings/string_view.h"
+#include "mediapipe/calculators/video/flow_packager_calculator.pb.h"
+#include "mediapipe/framework/calculator_framework.h"
+#include "mediapipe/framework/port/integral_types.h"
+#include "mediapipe/framework/port/logging.h"
+#include "mediapipe/util/tracking/camera_motion.pb.h"
+#include "mediapipe/util/tracking/flow_packager.h"
+#include "mediapipe/util/tracking/region_flow.pb.h"
+
+namespace mediapipe {
+
+using mediapipe::CameraMotion;
+using mediapipe::FlowPackager;
+using mediapipe::RegionFlowFeatureList;
+using mediapipe::TrackingData;
+using mediapipe::TrackingDataChunk;
+
+// A calculator that packages input CameraMotion and RegionFlowFeatureList
+// into a TrackingData and optionally writes TrackingDataChunks to file.
+//
+// Input stream:
+//   FLOW:       Input region flow (proto RegionFlowFeatureList).
+//   CAMERA:     Input camera stream (proto CameraMotion, optional).
+//
+// Input side packets:
+//   CACHE_DIR:  Optional caching directory tracking files are written to.
+//
+// Output streams.
+//   TRACKING:       Output tracking data (proto TrackingData, per frame
+//                   optional).
+//   TRACKING_CHUNK: Output tracking chunks (proto TrackingDataChunk,
+//                   per chunk, optional), output at the first timestamp
+//                   of each chunk.
+//   COMPLETE:       Optional output packet sent on PreStream to
+//                   to signal downstream calculators that all data has been
+//                   processed and calculator is closed. Can be used to indicate
+//                   that all data as been written to CACHE_DIR.
+class FlowPackagerCalculator : public CalculatorBase {
+ public:
+  ~FlowPackagerCalculator() override = default;
+
+  static ::mediapipe::Status GetContract(CalculatorContract* cc);
+
+  ::mediapipe::Status Open(CalculatorContext* cc) override;
+  ::mediapipe::Status Process(CalculatorContext* cc) override;
+  ::mediapipe::Status Close(CalculatorContext* cc) override;
+
+  // Writes passed chunk to disk.
+  void WriteChunk(const TrackingDataChunk& chunk) const;
+
+  // Initializes next chunk for tracking beginning from last frame of
+  // current chunk (Chunking is design with one frame overlap).
+  void PrepareCurrentForNextChunk(TrackingDataChunk* chunk);
+
+ private:
+  FlowPackagerCalculatorOptions options_;
+
+  // Caching options.
+  bool use_caching_ = false;
+  bool build_chunk_ = false;
+  std::string cache_dir_;
+  int chunk_idx_ = -1;
+  TrackingDataChunk tracking_chunk_;
+
+  int frame_idx_ = 0;
+
+  Timestamp prev_timestamp_;
+  std::unique_ptr<FlowPackager> flow_packager_;
+};
+
+REGISTER_CALCULATOR(FlowPackagerCalculator);
+
+::mediapipe::Status FlowPackagerCalculator::GetContract(
+    CalculatorContract* cc) {
+  if (!cc->Inputs().HasTag("FLOW")) {
+    return tool::StatusFail("No input flow was specified.");
+  }
+
+  cc->Inputs().Tag("FLOW").Set<RegionFlowFeatureList>();
+
+  if (cc->Inputs().HasTag("CAMERA")) {
+    cc->Inputs().Tag("CAMERA").Set<CameraMotion>();
+  }
+  if (cc->Outputs().HasTag("TRACKING")) {
+    cc->Outputs().Tag("TRACKING").Set<TrackingData>();
+  }
+  if (cc->Outputs().HasTag("TRACKING_CHUNK")) {
+    cc->Outputs().Tag("TRACKING_CHUNK").Set<TrackingDataChunk>();
+  }
+  if (cc->Outputs().HasTag("COMPLETE")) {
+    cc->Outputs().Tag("COMPLETE").Set<bool>();
+  }
+
+  if (cc->InputSidePackets().HasTag("CACHE_DIR")) {
+    cc->InputSidePackets().Tag("CACHE_DIR").Set<std::string>();
+  }
+
+  return ::mediapipe::OkStatus();
+}
+
+::mediapipe::Status FlowPackagerCalculator::Open(CalculatorContext* cc) {
+  options_ = cc->Options<FlowPackagerCalculatorOptions>();
+
+  flow_packager_.reset(new FlowPackager(options_.flow_packager_options()));
+
+  use_caching_ = cc->InputSidePackets().HasTag("CACHE_DIR");
+  build_chunk_ = use_caching_ || cc->Outputs().HasTag("TRACKING_CHUNK");
+  if (use_caching_) {
+    cache_dir_ = cc->InputSidePackets().Tag("CACHE_DIR").Get<std::string>();
+  }
+
+  return ::mediapipe::OkStatus();
+}
+
+::mediapipe::Status FlowPackagerCalculator::Process(CalculatorContext* cc) {
+  InputStream* flow_stream = &(cc->Inputs().Tag("FLOW"));
+  const RegionFlowFeatureList& flow = flow_stream->Get<RegionFlowFeatureList>();
+
+  const Timestamp timestamp = flow_stream->Value().Timestamp();
+
+  const CameraMotion* camera_motion = nullptr;
+  if (cc->Inputs().HasTag("CAMERA")) {
+    InputStream* camera_stream = &(cc->Inputs().Tag("CAMERA"));
+    camera_motion = &camera_stream->Get<CameraMotion>();
+  }
+
+  std::unique_ptr<TrackingData> tracking_data(new TrackingData());
+
+  flow_packager_->PackFlow(flow, camera_motion, tracking_data.get());
+
+  if (build_chunk_) {
+    if (chunk_idx_ < 0) {  // Lazy init, determine first start.
+      chunk_idx_ =
+          timestamp.Value() / 1000 / options_.caching_chunk_size_msec();
+      tracking_chunk_.set_first_chunk(true);
+    }
+    CHECK_GE(chunk_idx_, 0);
+
+    TrackingDataChunk::Item* item = tracking_chunk_.add_item();
+    item->set_frame_idx(frame_idx_);
+    item->set_timestamp_usec(timestamp.Value());
+    if (frame_idx_ > 0) {
+      item->set_prev_timestamp_usec(prev_timestamp_.Value());
+    }
+    if (cc->Outputs().HasTag("TRACKING")) {
+      // Need to copy as output is requested.
+      *item->mutable_tracking_data() = *tracking_data;
+    } else {
+      item->mutable_tracking_data()->Swap(tracking_data.get());
+    }
+
+    const int next_chunk_msec =
+        options_.caching_chunk_size_msec() * (chunk_idx_ + 1);
+
+    if (timestamp.Value() / 1000 >= next_chunk_msec) {
+      if (cc->Outputs().HasTag("TRACKING_CHUNK")) {
+        cc->Outputs()
+            .Tag("TRACKING_CHUNK")
+            .Add(new TrackingDataChunk(tracking_chunk_),
+                 Timestamp(tracking_chunk_.item(0).timestamp_usec()));
+      }
+      if (use_caching_) {
+        WriteChunk(tracking_chunk_);
+      }
+      PrepareCurrentForNextChunk(&tracking_chunk_);
+    }
+  }
+
+  if (cc->Outputs().HasTag("TRACKING")) {
+    cc->Outputs()
+        .Tag("TRACKING")
+        .Add(tracking_data.release(), flow_stream->Value().Timestamp());
+  }
+
+  prev_timestamp_ = timestamp;
+  ++frame_idx_;
+  return ::mediapipe::OkStatus();
+}
+
+::mediapipe::Status FlowPackagerCalculator::Close(CalculatorContext* cc) {
+  if (frame_idx_ > 0) {
+    tracking_chunk_.set_last_chunk(true);
+    if (cc->Outputs().HasTag("TRACKING_CHUNK")) {
+      cc->Outputs()
+          .Tag("TRACKING_CHUNK")
+          .Add(new TrackingDataChunk(tracking_chunk_),
+               Timestamp(tracking_chunk_.item(0).timestamp_usec()));
+    }
+
+    if (use_caching_) {
+      WriteChunk(tracking_chunk_);
+    }
+  }
+
+  if (cc->Outputs().HasTag("COMPLETE")) {
+    cc->Outputs().Tag("COMPLETE").Add(new bool(true), Timestamp::PreStream());
+  }
+
+  return ::mediapipe::OkStatus();
+}
+
+void FlowPackagerCalculator::WriteChunk(const TrackingDataChunk& chunk) const {
+  if (chunk.item_size() == 0) {
+    LOG(ERROR) << "Write chunk called with empty tracking data."
+               << "This can only occur if the spacing between frames "
+               << "is larger than the requested chunk size. Try increasing "
+               << "the chunk size";
+    return;
+  }
+
+  auto format_runtime =
+      absl::ParsedFormat<'d'>::New(options_.cache_file_format());
+
+  std::string chunk_file;
+  if (format_runtime) {
+    chunk_file =
+        cache_dir_ + "/" + absl::StrFormat(*format_runtime, chunk_idx_);
+  } else {
+    LOG(ERROR) << "chache_file_format wrong. fall back to chunk_%04d.";
+    chunk_file = cache_dir_ + "/" + absl::StrFormat("chunk_%04d", chunk_idx_);
+  }
+
+  std::string data;
+  chunk.SerializeToString(&data);
+
+  const char* temp_filename = tempnam(cache_dir_.c_str(), nullptr);
+  std::ofstream out_file(temp_filename);
+  if (!out_file) {
+    LOG(ERROR) << "Could not open " << temp_filename;
+  } else {
+    out_file.write(data.data(), data.size());
+  }
+
+  if (rename(temp_filename, chunk_file.c_str()) != 0) {
+    LOG(ERROR) << "Failed to rename to " << chunk_file;
+  }
+
+  LOG(INFO) << "Wrote chunk : " << chunk_file;
+}
+
+void FlowPackagerCalculator::PrepareCurrentForNextChunk(
+    TrackingDataChunk* chunk) {
+  CHECK(chunk);
+  if (chunk->item_size() == 0) {
+    LOG(ERROR) << "Called with empty chunk. Unexpected.";
+    return;
+  }
+
+  chunk->set_first_chunk(false);
+
+  // Buffer last item for next chunk.
+  TrackingDataChunk::Item last_item;
+  last_item.Swap(chunk->mutable_item(chunk->item_size() - 1));
+
+  chunk->Clear();
+  chunk->add_item()->Swap(&last_item);
+
+  ++chunk_idx_;
+}
+
+}  // namespace mediapipe
diff --git a/mediapipe/calculators/video/flow_packager_calculator.proto b/mediapipe/calculators/video/flow_packager_calculator.proto
new file mode 100644
index 000000000..c1e91d8c5
--- /dev/null
+++ b/mediapipe/calculators/video/flow_packager_calculator.proto
@@ -0,0 +1,36 @@
+// Copyright 2019 The MediaPipe Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+syntax = "proto2";
+
+package mediapipe;
+
+import "mediapipe/framework/calculator.proto";
+import "mediapipe/util/tracking/flow_packager.proto";
+
+message FlowPackagerCalculatorOptions {
+  extend CalculatorOptions {
+    optional FlowPackagerCalculatorOptions ext = 271236147;
+  }
+
+  optional mediapipe.FlowPackagerOptions flow_packager_options = 1;
+
+  // Chunk size for caching files that are written to the externally specified
+  // caching directory. Specified in msec.
+  // Note that each chunk always contains at its end the first frame of the
+  // next chunk (to enable forward tracking across chunk boundaries).
+  optional int32 caching_chunk_size_msec = 2 [default = 2500];
+
+  optional string cache_file_format = 3 [default = "chunk_%04d"];
+}
diff --git a/mediapipe/calculators/video/motion_analysis_calculator.cc b/mediapipe/calculators/video/motion_analysis_calculator.cc
new file mode 100644
index 000000000..6746bc3a9
--- /dev/null
+++ b/mediapipe/calculators/video/motion_analysis_calculator.cc
@@ -0,0 +1,988 @@
+// Copyright 2019 The MediaPipe Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <cmath>
+#include <fstream>
+#include <memory>
+
+#include "absl/strings/numbers.h"
+#include "absl/strings/str_split.h"
+#include "absl/strings/string_view.h"
+#include "mediapipe/calculators/video/motion_analysis_calculator.pb.h"
+#include "mediapipe/framework/calculator_framework.h"
+#include "mediapipe/framework/formats/image_frame.h"
+#include "mediapipe/framework/formats/image_frame_opencv.h"
+#include "mediapipe/framework/formats/video_stream_header.h"
+#include "mediapipe/framework/port/integral_types.h"
+#include "mediapipe/framework/port/logging.h"
+#include "mediapipe/framework/port/ret_check.h"
+#include "mediapipe/framework/port/status.h"
+#include "mediapipe/util/tracking/camera_motion.h"
+#include "mediapipe/util/tracking/camera_motion.pb.h"
+#include "mediapipe/util/tracking/frame_selection.pb.h"
+#include "mediapipe/util/tracking/motion_analysis.h"
+#include "mediapipe/util/tracking/motion_estimation.h"
+#include "mediapipe/util/tracking/motion_models.h"
+#include "mediapipe/util/tracking/region_flow.pb.h"
+
+namespace mediapipe {
+
+using mediapipe::AffineAdapter;
+using mediapipe::CameraMotion;
+using mediapipe::FrameSelectionResult;
+using mediapipe::Homography;
+using mediapipe::HomographyAdapter;
+using mediapipe::LinearSimilarityModel;
+using mediapipe::MixtureHomography;
+using mediapipe::MixtureRowWeights;
+using mediapipe::MotionAnalysis;
+using mediapipe::ProjectViaFit;
+using mediapipe::RegionFlowComputationOptions;
+using mediapipe::RegionFlowFeatureList;
+using mediapipe::SalientPointFrame;
+using mediapipe::TranslationModel;
+
+const char kOptionsTag[] = "OPTIONS";
+
+// A calculator that performs motion analysis on an incoming video stream.
+//
+// Input streams:  (at least one of them is required).
+//   VIDEO:     The input video stream (ImageFrame, sRGB, sRGBA or GRAY8).
+//   SELECTION: Optional input stream to perform analysis only on selected
+//              frames. If present needs to contain camera motion
+//              and features.
+//
+// Input side packets:
+//   CSV_FILE:  Read motion models as homographies from CSV file. Expected
+//              to be defined in the frame domain (un-normalized).
+//              Should store 9 floats per row.
+//              Specify number of homographies per frames via option
+//              meta_models_per_frame. For values > 1, MixtureHomographies
+//              are created, for value == 1, a single Homography is used.
+//   DOWNSAMPLE: Optionally specify downsampling factor via input side packet
+//               overriding value in the graph settings.
+// Output streams (all are optional).
+//   FLOW:      Sparse feature tracks in form of proto RegionFlowFeatureList.
+//   CAMERA:    Camera motion as proto CameraMotion describing the per frame-
+//              pair motion. Has VideoHeader from input video.
+//   SALIENCY:  Foreground saliency (objects moving different from the
+//              background) as proto SalientPointFrame.
+//   VIZ:       Visualization stream as ImageFrame, sRGB, visualizing
+//              features and saliency (set via
+//              analysis_options().visualization_options())
+//   DENSE_FG:  Dense foreground stream, describing per-pixel foreground-
+//              ness as confidence between 0 (background) and 255
+//              (foreground). Output is ImageFrame (GRAY8).
+//   VIDEO_OUT: Optional output stream when SELECTION is used. Output is input
+//              VIDEO at the selected frames. Required VIDEO to be present.
+//   GRAY_VIDEO_OUT: Optional output stream for downsampled, grayscale video.
+//                   Requires VIDEO to be present and SELECTION to not be used.
+class MotionAnalysisCalculator : public CalculatorBase {
+  // TODO: Activate once leakr approval is ready.
+  // typedef com::google::android::libraries::micro::proto::Data HomographyData;
+
+ public:
+  ~MotionAnalysisCalculator() override = default;
+
+  static ::mediapipe::Status GetContract(CalculatorContract* cc);
+
+  ::mediapipe::Status Open(CalculatorContext* cc) override;
+  ::mediapipe::Status Process(CalculatorContext* cc) override;
+  ::mediapipe::Status Close(CalculatorContext* cc) override;
+
+ private:
+  // Outputs results to Outputs() if MotionAnalysis buffered sufficient results.
+  // Otherwise no-op. Set flush to true to force output of all buffered data.
+  void OutputMotionAnalyzedFrames(bool flush, CalculatorContext* cc);
+
+  // Lazy init function to be called on Process.
+  ::mediapipe::Status InitOnProcess(InputStream* video_stream,
+                                    InputStream* selection_stream);
+
+  // Parses CSV file contents to homographies.
+  bool ParseModelCSV(const std::string& contents,
+                     std::deque<Homography>* homographies);
+
+  // Turns list of 9-tuple floating values into set of homographies.
+  bool HomographiesFromValues(const std::vector<float>& homog_values,
+                              std::deque<Homography>* homographies);
+
+  // Appends CameraMotions and features from homographies.
+  // Set append_identity to true to add an identity transform to the beginning
+  // of the each list *in addition* to the motions derived from homographies.
+  void AppendCameraMotionsFromHomographies(
+      const std::deque<Homography>& homographies, bool append_identity,
+      std::deque<CameraMotion>* camera_motions,
+      std::deque<RegionFlowFeatureList>* features);
+
+  // Helper function to subtract current metadata motion from features. Used
+  // for hybrid estimation case.
+  void SubtractMetaMotion(const CameraMotion& meta_motion,
+                          RegionFlowFeatureList* features);
+
+  // Inverse of above function to add back meta motion and replace
+  // feature location with originals after estimation.
+  void AddMetaMotion(const CameraMotion& meta_motion,
+                     const RegionFlowFeatureList& meta_features,
+                     RegionFlowFeatureList* features, CameraMotion* motion);
+
+  MotionAnalysisCalculatorOptions options_;
+  int frame_width_ = -1;
+  int frame_height_ = -1;
+  int frame_idx_ = 0;
+
+  // Buffers incoming video frame packets (if visualization output is requested)
+  std::vector<Packet> packet_buffer_;
+
+  // Buffers incoming timestamps until MotionAnalysis is ready to output via
+  // above OutputMotionAnalyzedFrames.
+  std::vector<Timestamp> timestamp_buffer_;
+
+  // Input indicators for each stream.
+  bool selection_input_ = false;
+  bool video_input_ = false;
+
+  // Output indicators for each stream.
+  bool region_flow_feature_output_ = false;
+  bool camera_motion_output_ = false;
+  bool saliency_output_ = false;
+  bool visualize_output_ = false;
+  bool dense_foreground_output_ = false;
+  bool video_output_ = false;
+  bool grayscale_output_ = false;
+  bool csv_file_input_ = false;
+
+  // Inidicates if saliency should be computed.
+  bool with_saliency_ = false;
+
+  // Set if hybrid meta analysis - see proto for details.
+  bool hybrid_meta_analysis_ = false;
+
+  // Concatenated motions for each selected frame. Used in case
+  // hybrid estimation is requested to fallback to valid models.
+  std::deque<CameraMotion> selected_motions_;
+
+  // Normalized homographies from CSV file or metadata.
+  std::deque<Homography> meta_homographies_;
+  std::deque<CameraMotion> meta_motions_;
+  std::deque<RegionFlowFeatureList> meta_features_;
+
+  // Offset into above meta_motions_ and features_ when using
+  // hybrid meta analysis.
+  int hybrid_meta_offset_ = 0;
+
+  std::unique_ptr<MotionAnalysis> motion_analysis_;
+
+  std::unique_ptr<MixtureRowWeights> row_weights_;
+};
+
+REGISTER_CALCULATOR(MotionAnalysisCalculator);
+
+::mediapipe::Status MotionAnalysisCalculator::GetContract(
+    CalculatorContract* cc) {
+  if (cc->Inputs().HasTag("VIDEO")) {
+    cc->Inputs().Tag("VIDEO").Set<ImageFrame>();
+  }
+
+  // Optional input stream from frame selection calculator.
+  if (cc->Inputs().HasTag("SELECTION")) {
+    cc->Inputs().Tag("SELECTION").Set<FrameSelectionResult>();
+  }
+
+  RET_CHECK(cc->Inputs().HasTag("VIDEO") || cc->Inputs().HasTag("SELECTION"))
+      << "Either VIDEO, SELECTION must be specified.";
+
+  if (cc->Outputs().HasTag("FLOW")) {
+    cc->Outputs().Tag("FLOW").Set<RegionFlowFeatureList>();
+  }
+
+  if (cc->Outputs().HasTag("CAMERA")) {
+    cc->Outputs().Tag("CAMERA").Set<CameraMotion>();
+  }
+
+  if (cc->Outputs().HasTag("SALIENCY")) {
+    cc->Outputs().Tag("SALIENCY").Set<SalientPointFrame>();
+  }
+
+  if (cc->Outputs().HasTag("VIZ")) {
+    cc->Outputs().Tag("VIZ").Set<ImageFrame>();
+  }
+
+  if (cc->Outputs().HasTag("DENSE_FG")) {
+    cc->Outputs().Tag("DENSE_FG").Set<ImageFrame>();
+  }
+
+  if (cc->Outputs().HasTag("VIDEO_OUT")) {
+    cc->Outputs().Tag("VIDEO_OUT").Set<ImageFrame>();
+  }
+
+  if (cc->Outputs().HasTag("GRAY_VIDEO_OUT")) {
+    // We only output grayscale video if we're actually performing full region-
+    // flow analysis on the video.
+    RET_CHECK(cc->Inputs().HasTag("VIDEO") &&
+              !cc->Inputs().HasTag("SELECTION"));
+    cc->Outputs().Tag("GRAY_VIDEO_OUT").Set<ImageFrame>();
+  }
+
+  if (cc->InputSidePackets().HasTag("CSV_FILE")) {
+    cc->InputSidePackets().Tag("CSV_FILE").Set<std::string>();
+  }
+  if (cc->InputSidePackets().HasTag("DOWNSAMPLE")) {
+    cc->InputSidePackets().Tag("DOWNSAMPLE").Set<float>();
+  }
+
+  if (cc->InputSidePackets().HasTag(kOptionsTag)) {
+    cc->InputSidePackets().Tag(kOptionsTag).Set<CalculatorOptions>();
+  }
+
+  return ::mediapipe::OkStatus();
+}
+
+::mediapipe::Status MotionAnalysisCalculator::Open(CalculatorContext* cc) {
+  options_ =
+      tool::RetrieveOptions(cc->Options<MotionAnalysisCalculatorOptions>(),
+                            cc->InputSidePackets(), kOptionsTag);
+
+  video_input_ = cc->Inputs().HasTag("VIDEO");
+  selection_input_ = cc->Inputs().HasTag("SELECTION");
+  region_flow_feature_output_ = cc->Outputs().HasTag("FLOW");
+  camera_motion_output_ = cc->Outputs().HasTag("CAMERA");
+  saliency_output_ = cc->Outputs().HasTag("SALIENCY");
+  visualize_output_ = cc->Outputs().HasTag("VIZ");
+  dense_foreground_output_ = cc->Outputs().HasTag("DENSE_FG");
+  video_output_ = cc->Outputs().HasTag("VIDEO_OUT");
+  grayscale_output_ = cc->Outputs().HasTag("GRAY_VIDEO_OUT");
+  csv_file_input_ = cc->InputSidePackets().HasTag("CSV_FILE");
+  hybrid_meta_analysis_ = options_.meta_analysis() ==
+                          MotionAnalysisCalculatorOptions::META_ANALYSIS_HYBRID;
+
+  if (video_output_) {
+    RET_CHECK(selection_input_) << "VIDEO_OUT requires SELECTION input";
+  }
+
+  if (selection_input_) {
+    switch (options_.selection_analysis()) {
+      case MotionAnalysisCalculatorOptions::NO_ANALYSIS_USE_SELECTION:
+        RET_CHECK(!visualize_output_)
+            << "Visualization not supported for NO_ANALYSIS_USE_SELECTION";
+        RET_CHECK(!dense_foreground_output_)
+            << "Dense foreground not supported for NO_ANALYSIS_USE_SELECTION";
+        RET_CHECK(!saliency_output_)
+            << "Saliency output not supported for NO_ANALYSIS_USE_SELECTION";
+        break;
+
+      case MotionAnalysisCalculatorOptions::ANALYSIS_RECOMPUTE:
+      case MotionAnalysisCalculatorOptions::ANALYSIS_WITH_SEED:
+        RET_CHECK(video_input_) << "Need video input for feature tracking.";
+        break;
+
+      case MotionAnalysisCalculatorOptions::ANALYSIS_FROM_FEATURES:
+        // Nothing to add here.
+        break;
+    }
+  }
+
+  if (visualize_output_ || dense_foreground_output_ || video_output_) {
+    RET_CHECK(video_input_) << "Video input required.";
+  }
+
+  if (csv_file_input_) {
+    RET_CHECK(!selection_input_)
+        << "Can not use selection input with csv input.";
+    if (!hybrid_meta_analysis_) {
+      RET_CHECK(!saliency_output_ && !visualize_output_ &&
+                !dense_foreground_output_ && !grayscale_output_)
+          << "CSV file and meta input only supports flow and camera motion "
+          << "output when using metadata only.";
+    }
+  }
+
+  if (csv_file_input_) {
+    // Read from file and parse.
+    const std::string filename =
+        cc->InputSidePackets().Tag("CSV_FILE").Get<std::string>();
+
+    std::string file_contents;
+    std::ifstream input_file(filename, std::ios::in);
+    input_file.seekg(0, std::ios::end);
+    const int file_length = input_file.tellg();
+    file_contents.resize(file_length);
+    input_file.seekg(0, std::ios::beg);
+    input_file.read(&file_contents[0], file_length);
+    input_file.close();
+
+    RET_CHECK(ParseModelCSV(file_contents, &meta_homographies_))
+        << "Could not parse CSV file";
+  }
+
+  // Get video header from video or selection input if present.
+  const VideoHeader* video_header = nullptr;
+  if (video_input_ && !cc->Inputs().Tag("VIDEO").Header().IsEmpty()) {
+    video_header = &(cc->Inputs().Tag("VIDEO").Header().Get<VideoHeader>());
+  } else if (selection_input_ &&
+             !cc->Inputs().Tag("SELECTION").Header().IsEmpty()) {
+    video_header = &(cc->Inputs().Tag("SELECTION").Header().Get<VideoHeader>());
+  } else {
+    LOG(WARNING) << "No input video header found. Downstream calculators "
+                    "expecting video headers are likely to fail.";
+  }
+
+  with_saliency_ = options_.analysis_options().compute_motion_saliency();
+  // Force computation of saliency if requested as output.
+  if (cc->Outputs().HasTag("SALIENCY")) {
+    with_saliency_ = true;
+    if (!options_.analysis_options().compute_motion_saliency()) {
+      LOG(WARNING) << "Enable saliency computation. Set "
+                   << "compute_motion_saliency to true to silence this "
+                   << "warning.";
+      options_.mutable_analysis_options()->set_compute_motion_saliency(true);
+    }
+  }
+
+  if (options_.bypass_mode()) {
+    cc->SetOffset(TimestampDiff(0));
+  }
+
+  if (cc->InputSidePackets().HasTag("DOWNSAMPLE")) {
+    options_.mutable_analysis_options()
+        ->mutable_flow_options()
+        ->set_downsample_factor(
+            cc->InputSidePackets().Tag("DOWNSAMPLE").Get<float>());
+  }
+
+  // If no video header is provided, just return and initialize on the first
+  // Process() call.
+  if (video_header == nullptr) {
+    return ::mediapipe::OkStatus();
+  }
+
+  ////////////// EARLY RETURN; ONLY HEADER OUTPUT SHOULD GO HERE ///////////////
+
+  if (visualize_output_) {
+    cc->Outputs().Tag("VIZ").SetHeader(Adopt(new VideoHeader(*video_header)));
+  }
+
+  if (video_output_) {
+    cc->Outputs()
+        .Tag("VIDEO_OUT")
+        .SetHeader(Adopt(new VideoHeader(*video_header)));
+  }
+
+  if (cc->Outputs().HasTag("DENSE_FG")) {
+    std::unique_ptr<VideoHeader> foreground_header(
+        new VideoHeader(*video_header));
+    foreground_header->format = ImageFormat::GRAY8;
+    cc->Outputs().Tag("DENSE_FG").SetHeader(Adopt(foreground_header.release()));
+  }
+
+  if (cc->Outputs().HasTag("CAMERA")) {
+    cc->Outputs().Tag("CAMERA").SetHeader(
+        Adopt(new VideoHeader(*video_header)));
+  }
+
+  if (cc->Outputs().HasTag("SALIENCY")) {
+    cc->Outputs()
+        .Tag("SALIENCY")
+        .SetHeader(Adopt(new VideoHeader(*video_header)));
+  }
+
+  return ::mediapipe::OkStatus();
+}
+
+::mediapipe::Status MotionAnalysisCalculator::Process(CalculatorContext* cc) {
+  if (options_.bypass_mode()) {
+    return ::mediapipe::OkStatus();
+  }
+
+  InputStream* video_stream =
+      video_input_ ? &(cc->Inputs().Tag("VIDEO")) : nullptr;
+  InputStream* selection_stream =
+      selection_input_ ? &(cc->Inputs().Tag("SELECTION")) : nullptr;
+
+  // Checked on Open.
+  CHECK(video_stream || selection_stream);
+
+  // Lazy init.
+  if (frame_width_ < 0 || frame_height_ < 0) {
+    MP_RETURN_IF_ERROR(InitOnProcess(video_stream, selection_stream));
+  }
+
+  const Timestamp timestamp = cc->InputTimestamp();
+  if ((csv_file_input_) && !hybrid_meta_analysis_) {
+    if (camera_motion_output_) {
+      RET_CHECK(!meta_motions_.empty()) << "Insufficient metadata.";
+
+      CameraMotion output_motion = meta_motions_.front();
+      meta_motions_.pop_front();
+      output_motion.set_timestamp_usec(timestamp.Value());
+      cc->Outputs().Tag("CAMERA").Add(new CameraMotion(output_motion),
+                                      timestamp);
+    }
+
+    if (region_flow_feature_output_) {
+      RET_CHECK(!meta_features_.empty()) << "Insufficient frames in CSV file";
+      RegionFlowFeatureList output_features = meta_features_.front();
+      meta_features_.pop_front();
+
+      output_features.set_timestamp_usec(timestamp.Value());
+      cc->Outputs().Tag("FLOW").Add(new RegionFlowFeatureList(output_features),
+                                    timestamp);
+    }
+
+    ++frame_idx_;
+    return ::mediapipe::OkStatus();
+  }
+
+  if (motion_analysis_ == nullptr) {
+    // We do not need MotionAnalysis when using just metadata.
+    motion_analysis_.reset(new MotionAnalysis(options_.analysis_options(),
+                                              frame_width_, frame_height_));
+  }
+
+  std::unique_ptr<FrameSelectionResult> frame_selection_result;
+  // Always use frame if selection is not activated.
+  bool use_frame = !selection_input_;
+  if (selection_input_) {
+    CHECK(selection_stream);
+
+    // Fill in timestamps we process.
+    if (!selection_stream->Value().IsEmpty()) {
+      ASSIGN_OR_RETURN(
+          frame_selection_result,
+          selection_stream->Value().ConsumeOrCopy<FrameSelectionResult>());
+      use_frame = true;
+
+      // Make sure both features and camera motion are present.
+      RET_CHECK(frame_selection_result->has_camera_motion() &&
+                frame_selection_result->has_features())
+          << "Frame selection input error at: " << timestamp
+          << " both camera motion and features need to be "
+             "present in FrameSelectionResult. "
+          << frame_selection_result->has_camera_motion() << " , "
+          << frame_selection_result->has_features();
+    }
+  }
+
+  if (selection_input_ && use_frame &&
+      options_.selection_analysis() ==
+          MotionAnalysisCalculatorOptions::NO_ANALYSIS_USE_SELECTION) {
+    // Output concatenated results, nothing to compute here.
+    if (camera_motion_output_) {
+      cc->Outputs().Tag("CAMERA").Add(
+          frame_selection_result->release_camera_motion(), timestamp);
+    }
+    if (region_flow_feature_output_) {
+      cc->Outputs().Tag("FLOW").Add(frame_selection_result->release_features(),
+                                    timestamp);
+    }
+
+    if (video_output_) {
+      cc->Outputs().Tag("VIDEO_OUT").AddPacket(video_stream->Value());
+    }
+
+    return ::mediapipe::OkStatus();
+  }
+
+  if (use_frame) {
+    if (!selection_input_) {
+      const cv::Mat input_view =
+          formats::MatView(&video_stream->Get<ImageFrame>());
+      if (hybrid_meta_analysis_) {
+        // Seed with meta homography.
+        RET_CHECK(hybrid_meta_offset_ < meta_motions_.size())
+            << "Not enough metadata received for hybrid meta analysis";
+        Homography initial_transform =
+            meta_motions_[hybrid_meta_offset_].homography();
+        std::function<void(RegionFlowFeatureList*)> subtract_helper = std::bind(
+            &MotionAnalysisCalculator::SubtractMetaMotion, this,
+            meta_motions_[hybrid_meta_offset_], std::placeholders::_1);
+
+        // Keep original features before modification around.
+        motion_analysis_->AddFrameGeneric(
+            input_view, timestamp.Value(), initial_transform, nullptr, nullptr,
+            &subtract_helper, &meta_features_[hybrid_meta_offset_]);
+        ++hybrid_meta_offset_;
+      } else {
+        motion_analysis_->AddFrame(input_view, timestamp.Value());
+      }
+    } else {
+      selected_motions_.push_back(frame_selection_result->camera_motion());
+      switch (options_.selection_analysis()) {
+        case MotionAnalysisCalculatorOptions::NO_ANALYSIS_USE_SELECTION:
+          return ::mediapipe::UnknownErrorBuilder(MEDIAPIPE_LOC)
+                 << "Should not reach this point!";
+
+        case MotionAnalysisCalculatorOptions::ANALYSIS_FROM_FEATURES:
+          motion_analysis_->AddFeatures(frame_selection_result->features());
+          break;
+
+        case MotionAnalysisCalculatorOptions::ANALYSIS_RECOMPUTE: {
+          const cv::Mat input_view =
+              formats::MatView(&video_stream->Get<ImageFrame>());
+          motion_analysis_->AddFrame(input_view, timestamp.Value());
+          break;
+        }
+
+        case MotionAnalysisCalculatorOptions::ANALYSIS_WITH_SEED: {
+          Homography homography;
+          CameraMotionToHomography(frame_selection_result->camera_motion(),
+                                   &homography);
+          const cv::Mat input_view =
+              formats::MatView(&video_stream->Get<ImageFrame>());
+          motion_analysis_->AddFrameGeneric(input_view, timestamp.Value(),
+                                            homography, &homography);
+          break;
+        }
+      }
+    }
+
+    timestamp_buffer_.push_back(timestamp);
+    ++frame_idx_;
+
+    VLOG_EVERY_N(0, 100) << "Analyzed frame " << frame_idx_;
+
+    // Buffer input frames only if visualization is requested.
+    if (visualize_output_ || video_output_) {
+      packet_buffer_.push_back(video_stream->Value());
+    }
+
+    // If requested, output grayscale thumbnails
+    if (grayscale_output_) {
+      cv::Mat grayscale_mat = motion_analysis_->GetGrayscaleFrameFromResults();
+      std::unique_ptr<ImageFrame> grayscale_image(new ImageFrame(
+          ImageFormat::GRAY8, grayscale_mat.cols, grayscale_mat.rows));
+      cv::Mat image_frame_mat = formats::MatView(grayscale_image.get());
+      grayscale_mat.copyTo(image_frame_mat);
+
+      cc->Outputs()
+          .Tag("GRAY_VIDEO_OUT")
+          .Add(grayscale_image.release(), timestamp);
+    }
+
+    // Output other results, if we have any yet.
+    OutputMotionAnalyzedFrames(false, cc);
+  }
+
+  return ::mediapipe::OkStatus();
+}
+
+::mediapipe::Status MotionAnalysisCalculator::Close(CalculatorContext* cc) {
+  // Guard against empty videos.
+  if (motion_analysis_) {
+    OutputMotionAnalyzedFrames(true, cc);
+  }
+  if (csv_file_input_) {
+    if (!meta_motions_.empty()) {
+      LOG(ERROR) << "More motions than frames. Unexpected! Remainder: "
+                 << meta_motions_.size();
+    }
+  }
+  return ::mediapipe::OkStatus();
+}
+
+void MotionAnalysisCalculator::OutputMotionAnalyzedFrames(
+    bool flush, CalculatorContext* cc) {
+  std::vector<std::unique_ptr<RegionFlowFeatureList>> features;
+  std::vector<std::unique_ptr<CameraMotion>> camera_motions;
+  std::vector<std::unique_ptr<SalientPointFrame>> saliency;
+
+  const int buffer_size = timestamp_buffer_.size();
+  const int num_results = motion_analysis_->GetResults(
+      flush, &features, &camera_motions, with_saliency_ ? &saliency : nullptr);
+
+  CHECK_LE(num_results, buffer_size);
+
+  if (num_results == 0) {
+    return;
+  }
+
+  for (int k = 0; k < num_results; ++k) {
+    // Region flow features and camera motion for this frame.
+    auto& feature_list = features[k];
+    auto& camera_motion = camera_motions[k];
+    const Timestamp timestamp = timestamp_buffer_[k];
+
+    if (selection_input_ && options_.hybrid_selection_camera()) {
+      if (camera_motion->type() > selected_motions_.front().type()) {
+        // Composited type is more stable.
+        camera_motion->Swap(&selected_motions_.front());
+      }
+      selected_motions_.pop_front();
+    }
+
+    if (hybrid_meta_analysis_) {
+      AddMetaMotion(meta_motions_.front(), meta_features_.front(),
+                    feature_list.get(), camera_motion.get());
+      meta_motions_.pop_front();
+      meta_features_.pop_front();
+    }
+
+    // Video frame for visualization.
+    std::unique_ptr<ImageFrame> visualization_frame;
+    cv::Mat visualization;
+    if (visualize_output_) {
+      // Initialize visualization frame with original frame.
+      visualization_frame.reset(new ImageFrame());
+      visualization_frame->CopyFrom(packet_buffer_[k].Get<ImageFrame>(), 16);
+      visualization = formats::MatView(visualization_frame.get());
+
+      motion_analysis_->RenderResults(
+          *feature_list, *camera_motion,
+          with_saliency_ ? saliency[k].get() : nullptr, &visualization);
+
+      cc->Outputs().Tag("VIZ").Add(visualization_frame.release(), timestamp);
+    }
+
+    // Output dense foreground mask.
+    if (dense_foreground_output_) {
+      std::unique_ptr<ImageFrame> foreground_frame(
+          new ImageFrame(ImageFormat::GRAY8, frame_width_, frame_height_));
+      cv::Mat foreground = formats::MatView(foreground_frame.get());
+      motion_analysis_->ComputeDenseForeground(*feature_list, *camera_motion,
+                                               &foreground);
+      cc->Outputs().Tag("DENSE_FG").Add(foreground_frame.release(), timestamp);
+    }
+
+    // Output flow features if requested.
+    if (region_flow_feature_output_) {
+      cc->Outputs().Tag("FLOW").Add(feature_list.release(), timestamp);
+    }
+
+    // Output camera motion.
+    if (camera_motion_output_) {
+      cc->Outputs().Tag("CAMERA").Add(camera_motion.release(), timestamp);
+    }
+
+    if (video_output_) {
+      cc->Outputs().Tag("VIDEO_OUT").AddPacket(packet_buffer_[k]);
+    }
+
+    // Output saliency.
+    if (saliency_output_) {
+      cc->Outputs().Tag("SALIENCY").Add(saliency[k].release(), timestamp);
+    }
+  }
+
+  if (hybrid_meta_analysis_) {
+    hybrid_meta_offset_ -= num_results;
+    CHECK_GE(hybrid_meta_offset_, 0);
+  }
+
+  timestamp_buffer_.erase(timestamp_buffer_.begin(),
+                          timestamp_buffer_.begin() + num_results);
+
+  if (visualize_output_ || video_output_) {
+    packet_buffer_.erase(packet_buffer_.begin(),
+                         packet_buffer_.begin() + num_results);
+  }
+}
+
+::mediapipe::Status MotionAnalysisCalculator::InitOnProcess(
+    InputStream* video_stream, InputStream* selection_stream) {
+  if (video_stream) {
+    frame_width_ = video_stream->Get<ImageFrame>().Width();
+    frame_height_ = video_stream->Get<ImageFrame>().Height();
+
+    // Ensure image options are set correctly.
+    auto* region_options =
+        options_.mutable_analysis_options()->mutable_flow_options();
+
+    // Use two possible formats to account for different channel orders.
+    RegionFlowComputationOptions::ImageFormat image_format;
+    RegionFlowComputationOptions::ImageFormat image_format2;
+    switch (video_stream->Get<ImageFrame>().Format()) {
+      case ImageFormat::GRAY8:
+        image_format = image_format2 =
+            RegionFlowComputationOptions::FORMAT_GRAYSCALE;
+        break;
+
+      case ImageFormat::SRGB:
+        image_format = RegionFlowComputationOptions::FORMAT_RGB;
+        image_format2 = RegionFlowComputationOptions::FORMAT_BGR;
+        break;
+
+      case ImageFormat::SRGBA:
+        image_format = RegionFlowComputationOptions::FORMAT_RGBA;
+        image_format2 = RegionFlowComputationOptions::FORMAT_BGRA;
+        break;
+
+      default:
+        RET_CHECK(false) << "Unsupported image format.";
+    }
+    if (region_options->image_format() != image_format &&
+        region_options->image_format() != image_format2) {
+      LOG(WARNING) << "Requested image format in RegionFlowComputation "
+                   << "does not match video stream format. Overriding.";
+      region_options->set_image_format(image_format);
+    }
+
+    // Account for downsampling mode INPUT_SIZE. In this case we are handed
+    // already downsampled frames but the resulting CameraMotion should
+    // be computed on higher resolution as specifed by the downsample scale.
+    if (region_options->downsample_mode() ==
+        RegionFlowComputationOptions::DOWNSAMPLE_TO_INPUT_SIZE) {
+      const float scale = region_options->downsample_factor();
+      frame_width_ = static_cast<int>(std::round(frame_width_ * scale));
+      frame_height_ = static_cast<int>(std::round(frame_height_ * scale));
+    }
+  } else if (selection_stream) {
+    const auto& camera_motion =
+        selection_stream->Get<FrameSelectionResult>().camera_motion();
+    frame_width_ = camera_motion.frame_width();
+    frame_height_ = camera_motion.frame_height();
+  } else {
+    LOG(FATAL) << "Either VIDEO or SELECTION stream need to be specified.";
+  }
+
+  // Filled by CSV file parsing.
+  if (!meta_homographies_.empty()) {
+    CHECK(csv_file_input_);
+    AppendCameraMotionsFromHomographies(meta_homographies_,
+                                        true,  // append identity.
+                                        &meta_motions_, &meta_features_);
+    meta_homographies_.clear();
+  }
+
+  // Filter weights before using for hybrid mode.
+  if (hybrid_meta_analysis_) {
+    auto* motion_options =
+        options_.mutable_analysis_options()->mutable_motion_options();
+    motion_options->set_filter_initialized_irls_weights(true);
+  }
+
+  return ::mediapipe::OkStatus();
+}
+
+bool MotionAnalysisCalculator::ParseModelCSV(
+    const std::string& contents, std::deque<Homography>* homographies) {
+  std::vector<absl::string_view> values =
+      absl::StrSplit(contents, absl::ByAnyChar(",\n"));
+
+  // Trim off any empty lines.
+  while (values.back().empty()) {
+    values.pop_back();
+  }
+
+  // Convert to float.
+  std::vector<float> homog_values;
+  homog_values.reserve(values.size());
+
+  for (const auto& value : values) {
+    double value_64f;
+    if (!absl::SimpleAtod(value, &value_64f)) {
+      LOG(ERROR) << "Not a double, expected!";
+      return false;
+    }
+
+    homog_values.push_back(value_64f);
+  }
+
+  return HomographiesFromValues(homog_values, homographies);
+}
+
+bool MotionAnalysisCalculator::HomographiesFromValues(
+    const std::vector<float>& homog_values,
+    std::deque<Homography>* homographies) {
+  CHECK(homographies);
+
+  // Obvious constants are obvious :D
+  constexpr int kHomographyValues = 9;
+  if (homog_values.size() % kHomographyValues != 0) {
+    LOG(ERROR) << "Contents not a multiple of " << kHomographyValues;
+    return false;
+  }
+
+  for (int k = 0; k < homog_values.size(); k += kHomographyValues) {
+    std::vector<double> h_vals(kHomographyValues);
+    for (int l = 0; l < kHomographyValues; ++l) {
+      h_vals[l] = homog_values[k + l];
+    }
+
+    // Normalize last entry to 1.
+    if (h_vals[kHomographyValues - 1] == 0) {
+      LOG(ERROR) << "Degenerate homography, last entry is zero";
+      return false;
+    }
+
+    const double scale = 1.0f / h_vals[kHomographyValues - 1];
+    for (int l = 0; l < kHomographyValues; ++l) {
+      h_vals[l] *= scale;
+    }
+
+    Homography h = HomographyAdapter::FromDoublePointer(h_vals.data(), false);
+    homographies->push_back(h);
+  }
+
+  if (homographies->size() % options_.meta_models_per_frame() != 0) {
+    LOG(ERROR) << "Total homographies not a multiple of specified models "
+               << "per frame.";
+    return false;
+  }
+
+  return true;
+}
+
+void MotionAnalysisCalculator::SubtractMetaMotion(
+    const CameraMotion& meta_motion, RegionFlowFeatureList* features) {
+  if (meta_motion.mixture_homography().model_size() > 0) {
+    CHECK(row_weights_ != nullptr);
+    RegionFlowFeatureListViaTransform(meta_motion.mixture_homography(),
+                                      features, -1.0f,
+                                      1.0f,  // subtract transformed.
+                                      true,  // replace feature loc.
+                                      row_weights_.get());
+  } else {
+    RegionFlowFeatureListViaTransform(meta_motion.homography(), features, -1.0f,
+                                      1.0f,   // subtract transformed.
+                                      true);  // replace feature loc.
+  }
+
+  // Clamp transformed features to domain and handle outliers.
+  const float domain_diam =
+      hypot(features->frame_width(), features->frame_height());
+  const float motion_mag = meta_motion.average_magnitude();
+  // Same irls fraction as used by MODEL_MIXTURE_HOMOGRAPHY scaling in
+  // MotionEstimation.
+  const float irls_fraction = options_.analysis_options()
+                                  .motion_options()
+                                  .irls_mixture_fraction_scale() *
+                              options_.analysis_options()
+                                  .motion_options()
+                                  .irls_motion_magnitude_fraction();
+  float err_scale = std::max(1.0f, motion_mag * irls_fraction);
+
+  const float max_err =
+      options_.meta_outlier_domain_ratio() * domain_diam * err_scale;
+  const float max_err_sq = max_err * max_err;
+
+  for (auto& feature : *features->mutable_feature()) {
+    feature.set_x(
+        std::max(0.0f, std::min(features->frame_width() - 1.0f, feature.x())));
+    feature.set_y(
+        std::max(0.0f, std::min(features->frame_height() - 1.0f, feature.y())));
+    // Label anything with large residual motion an outlier.
+    if (FeatureFlow(feature).Norm2() > max_err_sq) {
+      feature.set_irls_weight(0.0f);
+    }
+  }
+}
+
+void MotionAnalysisCalculator::AddMetaMotion(
+    const CameraMotion& meta_motion, const RegionFlowFeatureList& meta_features,
+    RegionFlowFeatureList* features, CameraMotion* motion) {
+  // Restore old feature location.
+  CHECK_EQ(meta_features.feature_size(), features->feature_size());
+  for (int k = 0; k < meta_features.feature_size(); ++k) {
+    auto feature = features->mutable_feature(k);
+    const auto& meta_feature = meta_features.feature(k);
+    feature->set_x(meta_feature.x());
+    feature->set_y(meta_feature.y());
+    feature->set_dx(meta_feature.dx());
+    feature->set_dy(meta_feature.dy());
+  }
+
+  // Composite camera motion.
+  *motion = ComposeCameraMotion(*motion, meta_motion);
+  // Restore type from metadata, i.e. do not declare motions as invalid.
+  motion->set_type(meta_motion.type());
+  motion->set_match_frame(-1);
+}
+
+void MotionAnalysisCalculator::AppendCameraMotionsFromHomographies(
+    const std::deque<Homography>& homographies, bool append_identity,
+    std::deque<CameraMotion>* camera_motions,
+    std::deque<RegionFlowFeatureList>* features) {
+  CHECK(camera_motions);
+  CHECK(features);
+
+  CameraMotion identity;
+  identity.set_frame_width(frame_width_);
+  identity.set_frame_height(frame_height_);
+
+  *identity.mutable_translation() = TranslationModel();
+  *identity.mutable_linear_similarity() = LinearSimilarityModel();
+  *identity.mutable_homography() = Homography();
+  identity.set_type(CameraMotion::VALID);
+  identity.set_match_frame(0);
+
+  RegionFlowFeatureList empty_list;
+  empty_list.set_long_tracks(true);
+  empty_list.set_match_frame(-1);
+  empty_list.set_frame_width(frame_width_);
+  empty_list.set_frame_height(frame_height_);
+
+  if (append_identity) {
+    camera_motions->push_back(identity);
+    features->push_back(empty_list);
+  }
+
+  const int models_per_frame = options_.meta_models_per_frame();
+  CHECK_GT(models_per_frame, 0) << "At least one model per frame is needed";
+  CHECK_EQ(0, homographies.size() % models_per_frame);
+  const int num_frames = homographies.size() / models_per_frame;
+
+  // Heuristic sigma, similar to what we use for rolling shutter removal.
+  const float mixture_sigma = 1.0f / models_per_frame;
+
+  if (row_weights_ == nullptr) {
+    row_weights_.reset(new MixtureRowWeights(frame_height_,
+                                             frame_height_ / 10,  // 10% margin
+                                             mixture_sigma * frame_height_,
+                                             1.0f, models_per_frame));
+  }
+
+  for (int f = 0; f < num_frames; ++f) {
+    MixtureHomography mix_homog;
+    const int model_start = f * models_per_frame;
+
+    for (int k = 0; k < models_per_frame; ++k) {
+      const Homography& homog = homographies[model_start + k];
+      *mix_homog.add_model() = ModelInvert(homog);
+    }
+
+    CameraMotion c = identity;
+    c.set_match_frame(-1);
+
+    if (mix_homog.model_size() > 1) {
+      *c.mutable_mixture_homography() = mix_homog;
+      c.set_mixture_row_sigma(mixture_sigma);
+
+      for (int k = 0; k < models_per_frame; ++k) {
+        c.add_mixture_inlier_coverage(1.0f);
+      }
+      *c.add_mixture_homography_spectrum() = mix_homog;
+      c.set_rolling_shutter_motion_index(0);
+
+      *c.mutable_homography() = ProjectViaFit<Homography>(
+          mix_homog, frame_width_, frame_height_, row_weights_.get());
+    } else {
+      // Guaranteed to exist because to check that models_per_frame > 0 above.
+      *c.mutable_homography() = mix_homog.model(0);
+    }
+
+    // Project remaining motions down.
+    *c.mutable_linear_similarity() = ProjectViaFit<LinearSimilarityModel>(
+        c.homography(), frame_width_, frame_height_);
+    *c.mutable_translation() = ProjectViaFit<TranslationModel>(
+        c.homography(), frame_width_, frame_height_);
+
+    c.set_average_magnitude(
+        std::hypot(c.translation().dx(), c.translation().dy()));
+
+    camera_motions->push_back(c);
+    features->push_back(empty_list);
+  }
+}
+
+}  // namespace mediapipe
diff --git a/mediapipe/calculators/video/motion_analysis_calculator.proto b/mediapipe/calculators/video/motion_analysis_calculator.proto
new file mode 100644
index 000000000..a2af77d12
--- /dev/null
+++ b/mediapipe/calculators/video/motion_analysis_calculator.proto
@@ -0,0 +1,111 @@
+// Copyright 2019 The MediaPipe Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+syntax = "proto2";
+
+package mediapipe;
+
+import "mediapipe/framework/calculator.proto";
+import "mediapipe/util/tracking/motion_analysis.proto";
+
+// Next tag: 10
+message MotionAnalysisCalculatorOptions {
+  extend CalculatorOptions {
+    optional MotionAnalysisCalculatorOptions ext = 270698255;
+  }
+
+  optional mediapipe.MotionAnalysisOptions analysis_options = 1;
+
+  // Determines how optional input SELECTION (if present) is used to compute
+  // the final camera motion.
+  enum SelectionAnalysis {
+    // Recompute camera motion for selected frame neighbors.
+    ANALYSIS_RECOMPUTE = 1;
+
+    // Use composited camera motion and region flow from SELECTION input. No
+    // tracking or re-computation is performed.
+    // Note that in this case only CAMERA, FLOW and VIDEO_OUT tags are
+    // supported as output.
+    NO_ANALYSIS_USE_SELECTION = 2;
+
+    // Recompute camera motion for selected frame neighbors using
+    // features supplied by SELECTION input. No feature tracking is performed.
+    ANALYSIS_FROM_FEATURES = 3;
+
+    // Recomputes camera motion for selected frame neighbors but seeds
+    // initial transform with camera motion from SELECTION input.
+    ANALYSIS_WITH_SEED = 4;
+  }
+
+  optional SelectionAnalysis selection_analysis = 4
+      [default = ANALYSIS_WITH_SEED];
+
+  // If activated when SELECTION input is activated, will replace the computed
+  // camera motion (for any of the ANALYSIS_* case above) with the one supplied
+  // by the frame selection, in case the frame selection one is more stable.
+  // For example, if recomputed camera motion is unstable but the one from
+  // the selection result is stable, will use the stable result instead.
+  optional bool hybrid_selection_camera = 5 [default = false];
+
+  // Determines how optional input META is used to compute the final camera
+  // motion.
+  enum MetaAnalysis {
+    // Uses metadata supplied motions as is.
+    META_ANALYSIS_USE_META = 1;
+
+    // Seeds visual tracking from metadata motions - estimates visual residual
+    // motion and combines with metadata.
+    META_ANALYSIS_HYBRID = 2;
+  }
+
+  optional MetaAnalysis meta_analysis = 8 [default = META_ANALYSIS_USE_META];
+
+  // Determines number of homography models per frame stored in the CSV file
+  // or the homography metadata in META.
+  // For values > 1, MixtureHomographies are created.
+  optional int32 meta_models_per_frame = 6 [default = 1];
+
+  // Used for META_ANALYSIS_HYBRID. Rejects features which flow deviates
+  // domain_ratio * image diagonal size from the ground truth metadata motion.
+  optional float meta_outlier_domain_ratio = 9 [default = 0.0015];
+
+  // If true, the MotionAnalysisCalculator will skip all processing and emit no
+  // packets on any output. This is useful for quickly creating different
+  // versions of a MediaPipe graph without changing its structure, assuming that
+  // downstream calculators can handle missing input packets.
+  // TODO: Remove this hack. See b/36485206 for more details.
+  optional bool bypass_mode = 7 [default = false];
+}
+
+// Taken from
+// java/com/google/android/libraries/microvideo/proto/microvideo.proto to
+// satisfy leakr requirements
+// TODO: Remove and use above proto.
+message HomographyData {
+  // For each frame, there are 12 homography matrices stored. Each matrix is
+  // 3x3 (9 elements). This field will contain 12 x 3 x 3  float values. The
+  // first row of the first homography matrix will be followed by the second row
+  // of the first homography matrix, followed by third row of first homography
+  // matrix, followed by the first row of the second homography matrix, etc.
+  repeated float motion_homography_data = 1 [packed = true];
+
+  // Vector containing histogram counts for individual patches in the frame.
+  repeated uint32 histogram_count_data = 2 [packed = true];
+
+  // The width of the frame at the time metadata was sampled.
+  optional int32 frame_width = 3;
+
+  // The height of the frame at the time metadata was sampled.
+  optional int32 frame_height = 4;
+}
diff --git a/mediapipe/calculators/video/testdata/lenna.png b/mediapipe/calculators/video/testdata/lenna.png
new file mode 100644
index 000000000..bd247e8a4
Binary files /dev/null and b/mediapipe/calculators/video/testdata/lenna.png differ
diff --git a/mediapipe/calculators/video/testdata/parallel_tracker_graph.pbtxt b/mediapipe/calculators/video/testdata/parallel_tracker_graph.pbtxt
new file mode 100644
index 000000000..f3fe5b424
--- /dev/null
+++ b/mediapipe/calculators/video/testdata/parallel_tracker_graph.pbtxt
@@ -0,0 +1,134 @@
+input_stream: "image_cpu_frames"
+input_stream: "start_pos"
+input_stream: "ra_track"
+
+num_threads: 4
+
+node: {
+  calculator: "MotionAnalysisCalculator"
+  input_stream: "VIDEO:image_cpu_frames"
+  output_stream: "CAMERA:camera_motion"
+  output_stream: "FLOW:region_flow"
+
+  options: {
+    [mediapipe.MotionAnalysisCalculatorOptions.ext]: {
+      analysis_options: {
+        analysis_policy: ANALYSIS_POLICY_CAMERA_MOBILE
+
+        flow_options: {
+          # Maybe move down to 50
+          fast_estimation_min_block_size: 100
+          top_inlier_sets: 1
+          frac_inlier_error_threshold: 3e-3
+          downsample_mode: DOWNSAMPLE_NONE
+          verification_distance: 5.0
+          verify_long_feature_acceleration: true
+          verify_long_feature_trigger_ratio: 0.1
+          tracking_options: {
+            max_features: 500
+            adaptive_extraction_levels: 2
+            min_eig_val_settings: {
+              adaptive_lowest_quality_level: 2e-4
+            }
+            klt_tracker_implementation: KLT_OPENCV
+          }
+        }
+
+        motion_options: {
+          label_empty_frames_as_valid: false
+        }
+      }
+    }
+  }
+}
+
+node: {
+  calculator: "FlowPackagerCalculator"
+  input_stream: "FLOW:region_flow"
+  input_stream: "CAMERA:camera_motion"
+  output_stream: "TRACKING:tracking_data"
+
+  options: {
+    [mediapipe.FlowPackagerCalculatorOptions.ext]: {
+      flow_packager_options: {
+        binary_tracking_data_support: false
+      }
+    }
+  }
+}
+
+node: {
+  calculator: "BoxTrackerCalculator"
+  input_stream: "TRACKING:tracking_data"
+  input_stream: "START_POS:start_pos"
+  output_stream: "BOXES:boxes"
+  input_side_packet: "OPTIONS:calculator_options"
+
+  input_stream_handler: {
+    input_stream_handler: "SyncSetInputStreamHandler"
+    options: {
+      [mediapipe.SyncSetInputStreamHandlerOptions.ext]: {
+        sync_set: {
+          tag_index: "TRACKING"
+        }
+        sync_set: {
+          tag_index: "START_POS"
+        }
+      }
+    }
+  }
+
+  options: {
+    [mediapipe.BoxTrackerCalculatorOptions.ext]: {
+      tracker_options: {
+        track_step_options: {
+          track_object_and_camera: true
+          tracking_degrees: TRACKING_DEGREE_OBJECT_PERSPECTIVE
+          object_similarity_min_contd_inliers: 6
+          inlier_spring_force: 0.0
+          static_motion_temporal_ratio: 3e-2
+        }
+      }
+      visualize_tracking_data: false
+      streaming_track_data_cache_size: 100
+    }
+  }
+}
+
+node: {
+  calculator: "BoxTrackerCalculator"
+  input_stream: "TRACKING:tracking_data"
+  input_stream: "RA_TRACK:ra_track"
+  output_stream: "RA_BOXES:ra_boxes"
+  input_side_packet: "OPTIONS:calculator_options"
+
+  input_stream_handler: {
+    input_stream_handler: "SyncSetInputStreamHandler"
+    options: {
+      [mediapipe.SyncSetInputStreamHandlerOptions.ext]: {
+        sync_set: {
+          tag_index: "TRACKING"
+        }
+        sync_set: {
+          tag_index: "RA_TRACK"
+        }
+      }
+    }
+  }
+
+  options: {
+    [mediapipe.BoxTrackerCalculatorOptions.ext]: {
+      tracker_options: {
+        track_step_options: {
+          track_object_and_camera: true
+          tracking_degrees: TRACKING_DEGREE_OBJECT_PERSPECTIVE
+          object_similarity_min_contd_inliers: 6
+          inlier_spring_force: 0.0
+          static_motion_temporal_ratio: 3e-2
+        }
+      }
+      visualize_tracking_data: false
+      streaming_track_data_cache_size: 100
+    }
+  }
+}
diff --git a/mediapipe/calculators/video/testdata/tracker_graph.pbtxt b/mediapipe/calculators/video/testdata/tracker_graph.pbtxt
new file mode 100644
index 000000000..c86d50136
--- /dev/null
+++ b/mediapipe/calculators/video/testdata/tracker_graph.pbtxt
@@ -0,0 +1,122 @@
+input_stream: "image_cpu_frames"
+input_stream: "start_pos"
+input_stream: "cancel_object_id"
+input_stream: "ra_track"
+input_stream: "restart_pos"
+input_stream: "track_time"
+
+num_threads: 4
+
+node: {
+  calculator: "MotionAnalysisCalculator"
+  options: {
+    [mediapipe.MotionAnalysisCalculatorOptions.ext]: {
+      analysis_options: {
+        analysis_policy: ANALYSIS_POLICY_CAMERA_MOBILE
+
+        flow_options: {
+          # Maybe move down to 50
+          fast_estimation_min_block_size: 100
+          top_inlier_sets: 1
+          frac_inlier_error_threshold: 3e-3
+          # For mobile application, downsample before input into graph
+          # and use DOWNSAMPLE_TO_INPUT_SIZE and specify
+          # downsampling_factor option or DOWNSAMPLE input_side_packet
+          downsample_mode: DOWNSAMPLE_TO_INPUT_SIZE
+          verification_distance: 5.0
+          verify_long_feature_acceleration: true
+          verify_long_feature_trigger_ratio: 0.1
+          tracking_options: {
+            max_features: 500
+            corner_extraction_method: EXTRACTION_FAST
+            adaptive_extraction_levels: 2
+            min_eig_val_settings: {
+              adaptive_lowest_quality_level: 2e-4
+            }
+            klt_tracker_implementation: KLT_OPENCV
+          }
+        }
+      }
+    }
+  }
+
+  # Drops packets if calculator cannot keep up with the input rate.
+  input_stream_handler: {
+    input_stream_handler: "FixedSizeInputStreamHandler"
+  }
+
+  input_stream: "VIDEO:image_cpu_frames"
+  input_side_packet: "DOWNSAMPLE:analysis_downsample_factor"
+  input_side_packet: "OPTIONS:calculator_options"
+  output_stream: "CAMERA:camera_motion"
+  output_stream: "FLOW:region_flow"
+}
+
+node: {
+  calculator: "FlowPackagerCalculator"
+
+  input_stream: "FLOW:region_flow"
+  input_stream: "CAMERA:camera_motion"
+  output_stream: "TRACKING:tracking_data"
+  options: {
+    [mediapipe.FlowPackagerCalculatorOptions.ext]: {
+      flow_packager_options: {
+        binary_tracking_data_support: false
+      }
+    }
+  }
+}
+
+node: {
+  calculator: "BoxTrackerCalculator"
+
+  input_side_packet: "OPTIONS:calculator_options"
+  input_stream: "TRACKING:tracking_data"
+  input_stream: "TRACK_TIME:track_time"
+  input_stream: "START_POS:start_pos"
+  input_stream: "RESTART_POS:restart_pos"
+  input_stream: "CANCEL_OBJECT_ID:cancel_object_id"
+  input_stream: "RA_TRACK:ra_track"
+  output_stream: "BOXES:boxes"
+  output_stream: "RA_BOXES:ra_boxes"
+
+  input_stream_handler: {
+    input_stream_handler: "SyncSetInputStreamHandler"
+    options: {
+      [mediapipe.SyncSetInputStreamHandlerOptions.ext]: {
+        sync_set: {
+          tag_index: "TRACKING"
+          tag_index: "TRACK_TIME"
+        }
+        sync_set: {
+          tag_index: "START_POS"
+        }
+        sync_set: {
+          tag_index: "RESTART_POS"
+        }
+        sync_set: {
+          tag_index: "CANCEL_OBJECT_ID"
+        }
+        sync_set: {
+          tag_index: "RA_TRACK"
+        }
+      }
+    }
+  }
+
+  options: {
+    [mediapipe.BoxTrackerCalculatorOptions.ext]: {
+      tracker_options: {
+        track_step_options: {
+          track_object_and_camera: true
+          tracking_degrees: TRACKING_DEGREE_OBJECT_ROTATION_SCALE
+          inlier_spring_force: 0.0
+          static_motion_temporal_ratio: 3e-2
+          object_similarity_min_contd_inliers: 10
+        }
+      }
+      visualize_tracking_data: false
+      streaming_track_data_cache_size: 100
+    }
+  }
+}
diff --git a/mediapipe/calculators/video/tracked_detection_manager_calculator.cc b/mediapipe/calculators/video/tracked_detection_manager_calculator.cc
new file mode 100644
index 000000000..749647d17
--- /dev/null
+++ b/mediapipe/calculators/video/tracked_detection_manager_calculator.cc
@@ -0,0 +1,319 @@
+// Copyright 2019 The MediaPipe Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <memory>
+#include <string>
+#include <unordered_map>
+#include <vector>
+
+#include "absl/container/node_hash_map.h"
+#include "mediapipe/framework/calculator_framework.h"
+#include "mediapipe/framework/formats/detection.pb.h"
+#include "mediapipe/framework/formats/location_data.pb.h"
+#include "mediapipe/framework/formats/rect.pb.h"
+#include "mediapipe/framework/port/status.h"
+#include "mediapipe/util/tracking/box_tracker.h"
+#include "mediapipe/util/tracking/tracked_detection.h"
+#include "mediapipe/util/tracking/tracked_detection_manager.h"
+#include "mediapipe/util/tracking/tracking.h"
+
+namespace mediapipe {
+namespace {
+
+constexpr int kDetectionUpdateTimeOutMS = 5000;
+constexpr char kDetectionsTag[] = "DETECTIONS";
+constexpr char kDetectionBoxesTag[] = "DETECTION_BOXES";
+constexpr char kDetectionListTag[] = "DETECTION_LIST";
+constexpr char kTrackingBoxesTag[] = "TRACKING_BOXES";
+constexpr char kCancelObjectIdTag[] = "CANCEL_OBJECT_ID";
+
+// Move |src| to the back of |dst|.
+void MoveIds(std::vector<int>* dst, std::vector<int> src) {
+  dst->insert(dst->end(), std::make_move_iterator(src.begin()),
+              std::make_move_iterator(src.end()));
+}
+
+int64 GetInputTimestampMs(::mediapipe::CalculatorContext* cc) {
+  return cc->InputTimestamp().Microseconds() / 1000;  // 1 ms = 1000 us.
+}
+
+// Converts a Mediapipe Detection Proto to a TrackedDetection class.
+std::unique_ptr<TrackedDetection> GetTrackedDetectionFromDetection(
+    const Detection& detection, int64 timestamp) {
+  std::unique_ptr<TrackedDetection> tracked_detection =
+      absl::make_unique<TrackedDetection>(detection.detection_id(), timestamp);
+  const float top = detection.location_data().relative_bounding_box().ymin();
+  const float bottom =
+      detection.location_data().relative_bounding_box().ymin() +
+      detection.location_data().relative_bounding_box().height();
+  const float left = detection.location_data().relative_bounding_box().xmin();
+  const float right = detection.location_data().relative_bounding_box().xmin() +
+                      detection.location_data().relative_bounding_box().width();
+  NormalizedRect bounding_box;
+  bounding_box.set_x_center((left + right) / 2.f);
+  bounding_box.set_y_center((top + bottom) / 2.f);
+  bounding_box.set_height(bottom - top);
+  bounding_box.set_width(right - left);
+  tracked_detection->set_bounding_box(bounding_box);
+
+  for (int i = 0; i < detection.label_size(); ++i) {
+    tracked_detection->AddLabel(detection.label(i), detection.score(i));
+  }
+  return tracked_detection;
+}
+
+// Converts a TrackedDetection class to a Mediapipe Detection Proto.
+Detection GetAxisAlignedDetectionFromTrackedDetection(
+    const TrackedDetection& tracked_detection) {
+  Detection detection;
+  LocationData* location_data = detection.mutable_location_data();
+
+  auto corners = tracked_detection.GetCorners();
+
+  float x_min = std::numeric_limits<float>::max();
+  float x_max = std::numeric_limits<float>::min();
+  float y_min = std::numeric_limits<float>::max();
+  float y_max = std::numeric_limits<float>::min();
+  for (int i = 0; i < 4; ++i) {
+    x_min = std::min(x_min, corners[i].x());
+    x_max = std::max(x_max, corners[i].x());
+    y_min = std::min(y_min, corners[i].y());
+    y_max = std::max(y_max, corners[i].y());
+  }
+  location_data->set_format(LocationData::RELATIVE_BOUNDING_BOX);
+  LocationData::RelativeBoundingBox* relative_bbox =
+      location_data->mutable_relative_bounding_box();
+  relative_bbox->set_xmin(x_min);
+  relative_bbox->set_ymin(y_min);
+  relative_bbox->set_width(x_max - x_min);
+  relative_bbox->set_height(y_max - y_min);
+
+  // Use previous id which is the id the object when it's first detected.
+  if (tracked_detection.previous_id() > 0) {
+    detection.set_detection_id(tracked_detection.previous_id());
+  } else {
+    detection.set_detection_id(tracked_detection.unique_id());
+  }
+  for (const auto& label_and_score : tracked_detection.label_to_score_map()) {
+    detection.add_label(label_and_score.first);
+    detection.add_score(label_and_score.second);
+  }
+  return detection;
+}
+
+}  // namespace
+
+// TrackedDetectionManagerCalculator accepts detections and tracking results at
+// different frame rate for real time tracking of targets.
+// Input:
+//   DETECTIONS: A vector<Detection> of newly detected targets.
+//   TRACKING_BOXES: A TimedBoxProtoList which contains a list of tracked boxes
+//   from previous detections.
+//
+// Output:
+//   CANCEL_OBJECT_ID: Ids of targets that are missing/lost such that it should
+//   be removed from tracking.
+//   DETECTIONS: List of detections that are being tracked.
+//   DETECTION_BOXES: List of bounding boxes of detections that are being
+//   tracked.
+//
+// Usage example:
+// node {
+//   calculator: "TrackedDetectionManagerCalculator"
+//   input_stream: "DETECTIONS:detections"
+//   input_stream: "TRACKING_BOXES:boxes"
+//   output_stream: "CANCEL_OBJECT_ID:cancel_object_id"
+//   output_stream: "DETECTIONS:output_detections"
+// }
+class TrackedDetectionManagerCalculator : public CalculatorBase {
+ public:
+  static ::mediapipe::Status GetContract(CalculatorContract* cc);
+
+  ::mediapipe::Status Process(CalculatorContext* cc) override;
+
+ private:
+  // Adds new list of detections to |waiting_for_update_detections_|.
+  void AddDetectionList(const DetectionList& detection_list,
+                        CalculatorContext* cc);
+  void AddDetections(const std::vector<Detection>& detections,
+                     CalculatorContext* cc);
+
+  // Manages existing and new detections.
+  TrackedDetectionManager tracked_detection_manager_;
+
+  // Set of detections that are not up to date yet. These detections will be
+  // added to the detection manager until they got updated from the box tracker.
+  absl::node_hash_map<int, std::unique_ptr<TrackedDetection>>
+      waiting_for_update_detections_;
+};
+REGISTER_CALCULATOR(TrackedDetectionManagerCalculator);
+
+::mediapipe::Status TrackedDetectionManagerCalculator::GetContract(
+    CalculatorContract* cc) {
+  if (cc->Inputs().HasTag(kDetectionsTag)) {
+    cc->Inputs().Tag(kDetectionsTag).Set<std::vector<Detection>>();
+  }
+  if (cc->Inputs().HasTag(kDetectionListTag)) {
+    cc->Inputs().Tag(kDetectionListTag).Set<DetectionList>();
+  }
+  if (cc->Inputs().HasTag(kTrackingBoxesTag)) {
+    cc->Inputs().Tag(kTrackingBoxesTag).Set<TimedBoxProtoList>();
+  }
+
+  if (cc->Outputs().HasTag(kCancelObjectIdTag)) {
+    cc->Outputs().Tag(kCancelObjectIdTag).Set<int>();
+  }
+  if (cc->Outputs().HasTag(kDetectionsTag)) {
+    cc->Outputs().Tag(kDetectionsTag).Set<std::vector<Detection>>();
+  }
+  if (cc->Outputs().HasTag(kDetectionBoxesTag)) {
+    cc->Outputs().Tag(kDetectionBoxesTag).Set<std::vector<NormalizedRect>>();
+  }
+
+  return ::mediapipe::OkStatus();
+}
+
+::mediapipe::Status TrackedDetectionManagerCalculator::Process(
+    CalculatorContext* cc) {
+  if (cc->Inputs().HasTag("TRACKING_BOXES")) {
+    if (!cc->Inputs().Tag("TRACKING_BOXES").IsEmpty()) {
+      const TimedBoxProtoList& tracked_boxes =
+          cc->Inputs().Tag("TRACKING_BOXES").Get<TimedBoxProtoList>();
+
+      // Collect all detections that are removed.
+      auto removed_detection_ids = absl::make_unique<std::vector<int>>();
+      for (const TimedBoxProto& tracked_box : tracked_boxes.box()) {
+        NormalizedRect bounding_box;
+        bounding_box.set_x_center((tracked_box.left() + tracked_box.right()) /
+                                  2.f);
+        bounding_box.set_y_center((tracked_box.bottom() + tracked_box.top()) /
+                                  2.f);
+        bounding_box.set_height(tracked_box.bottom() - tracked_box.top());
+        bounding_box.set_width(tracked_box.right() - tracked_box.left());
+        bounding_box.set_rotation(tracked_box.rotation());
+        // First check if this box updates a detection that's waiting for
+        // update from the tracker.
+        auto waiting_for_update_detectoin_ptr =
+            waiting_for_update_detections_.find(tracked_box.id());
+        if (waiting_for_update_detectoin_ptr !=
+            waiting_for_update_detections_.end()) {
+          // Add the detection and remove duplicated detections.
+          auto removed_ids = tracked_detection_manager_.AddDetection(
+              std::move(waiting_for_update_detectoin_ptr->second));
+          MoveIds(removed_detection_ids.get(), std::move(removed_ids));
+
+          waiting_for_update_detections_.erase(
+              waiting_for_update_detectoin_ptr);
+        }
+        auto removed_ids = tracked_detection_manager_.UpdateDetectionLocation(
+            tracked_box.id(), bounding_box, tracked_box.time_msec());
+        MoveIds(removed_detection_ids.get(), std::move(removed_ids));
+      }
+      // TODO: Should be handled automatically in detection manager.
+      auto removed_ids = tracked_detection_manager_.RemoveObsoleteDetections(
+          GetInputTimestampMs(cc) - kDetectionUpdateTimeOutMS);
+      MoveIds(removed_detection_ids.get(), std::move(removed_ids));
+
+      // TODO: Should be handled automatically in detection manager.
+      removed_ids = tracked_detection_manager_.RemoveOutOfViewDetections();
+      MoveIds(removed_detection_ids.get(), std::move(removed_ids));
+
+      if (!removed_detection_ids->empty() &&
+          cc->Outputs().HasTag(kCancelObjectIdTag)) {
+        auto timestamp = cc->InputTimestamp();
+        for (int box_id : *removed_detection_ids) {
+          // The timestamp is incremented (by 1 us) because currently the box
+          // tracker calculator only accepts one cancel object ID for any given
+          // timestamp.
+          cc->Outputs()
+              .Tag(kCancelObjectIdTag)
+              .AddPacket(mediapipe::MakePacket<int>(box_id).At(timestamp++));
+        }
+      }
+
+      // Output detections and corresponding bounding boxes.
+      const auto& all_detections =
+          tracked_detection_manager_.GetAllTrackedDetections();
+      auto output_detections = absl::make_unique<std::vector<Detection>>();
+      auto output_boxes = absl::make_unique<std::vector<NormalizedRect>>();
+
+      for (const auto& detection_ptr : all_detections) {
+        const auto& detection = *detection_ptr.second;
+        // Only output detections that are synced.
+        if (detection.last_updated_timestamp() <
+            cc->InputTimestamp().Microseconds() / 1000) {
+          continue;
+        }
+        output_detections->emplace_back(
+            GetAxisAlignedDetectionFromTrackedDetection(detection));
+        output_boxes->emplace_back(detection.bounding_box());
+      }
+      if (cc->Outputs().HasTag(kDetectionsTag)) {
+        cc->Outputs()
+            .Tag(kDetectionsTag)
+            .Add(output_detections.release(), cc->InputTimestamp());
+      }
+
+      if (cc->Outputs().HasTag(kDetectionBoxesTag)) {
+        cc->Outputs()
+            .Tag(kDetectionBoxesTag)
+            .Add(output_boxes.release(), cc->InputTimestamp());
+      }
+    }
+  }
+
+  if (cc->Inputs().HasTag(kDetectionsTag) &&
+      !cc->Inputs().Tag(kDetectionsTag).IsEmpty()) {
+    const auto detections =
+        cc->Inputs().Tag(kDetectionsTag).Get<std::vector<Detection>>();
+    AddDetections(detections, cc);
+  }
+
+  if (cc->Inputs().HasTag(kDetectionListTag) &&
+      !cc->Inputs().Tag(kDetectionListTag).IsEmpty()) {
+    const auto detection_list =
+        cc->Inputs().Tag(kDetectionListTag).Get<DetectionList>();
+    AddDetectionList(detection_list, cc);
+  }
+
+  return ::mediapipe::OkStatus();
+}
+
+void TrackedDetectionManagerCalculator::AddDetectionList(
+    const DetectionList& detection_list, CalculatorContext* cc) {
+  for (const auto& detection : detection_list.detection()) {
+    // Convert from microseconds to milliseconds.
+    std::unique_ptr<TrackedDetection> new_detection =
+        GetTrackedDetectionFromDetection(
+            detection, cc->InputTimestamp().Microseconds() / 1000);
+
+    const int id = new_detection->unique_id();
+    waiting_for_update_detections_[id] = std::move(new_detection);
+  }
+}
+
+void TrackedDetectionManagerCalculator::AddDetections(
+    const std::vector<Detection>& detections, CalculatorContext* cc) {
+  for (const auto& detection : detections) {
+    // Convert from microseconds to milliseconds.
+    std::unique_ptr<TrackedDetection> new_detection =
+        GetTrackedDetectionFromDetection(
+            detection, cc->InputTimestamp().Microseconds() / 1000);
+
+    const int id = new_detection->unique_id();
+    waiting_for_update_detections_[id] = std::move(new_detection);
+  }
+}
+
+}  // namespace mediapipe
diff --git a/mediapipe/calculators/video/tracking_graph_test.cc b/mediapipe/calculators/video/tracking_graph_test.cc
new file mode 100644
index 000000000..fc04ee6e8
--- /dev/null
+++ b/mediapipe/calculators/video/tracking_graph_test.cc
@@ -0,0 +1,709 @@
+// Copyright 2019 The MediaPipe Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <fstream>
+#include <map>
+#include <memory>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "mediapipe/calculators/video/box_tracker_calculator.pb.h"
+#include "mediapipe/framework/calculator.pb.h"
+#include "mediapipe/framework/calculator_framework.h"
+#include "mediapipe/framework/deps/file_path.h"
+#include "mediapipe/framework/formats/image_frame.h"
+#include "mediapipe/framework/packet.h"
+#include "mediapipe/framework/port/advanced_proto_inc.h"
+#include "mediapipe/framework/port/file_helpers.h"
+#include "mediapipe/framework/port/gmock.h"
+#include "mediapipe/framework/port/gtest.h"
+#include "mediapipe/framework/port/opencv_highgui_inc.h"
+#include "mediapipe/framework/port/proto_ns.h"
+#include "mediapipe/framework/port/status.h"
+#include "mediapipe/framework/port/status_matchers.h"
+#include "mediapipe/util/tracking/box_tracker.pb.h"
+#include "mediapipe/util/tracking/tracking.pb.h"
+
+#ifdef __APPLE__
+#include <CoreFoundation/CoreFoundation.h>
+#endif  // defined(__APPLE__)
+
+namespace mediapipe {
+namespace {
+using ::testing::FloatNear;
+using ::testing::Test;
+
+std::string GetTestDir() {
+#ifdef __APPLE__
+  char path[1024];
+  CFURLRef bundle_url = CFBundleCopyBundleURL(CFBundleGetMainBundle());
+  CFURLGetFileSystemRepresentation(
+      bundle_url, true, reinterpret_cast<UInt8*>(path), sizeof(path));
+  CFRelease(bundle_url);
+  return ::mediapipe::file::JoinPath(path, "testdata");
+#elif defined(__ANDROID__)
+  char path[1024];
+  getcwd(path, sizeof(path));
+  return ::mediapipe::file::JoinPath(path,
+                                     "mediapipe/calculators/video/testdata");
+#else
+  return ::mediapipe::file::JoinPath(
+      "./",
+      // This should match the path of the output files
+      // of the genrule() that generates test model files.
+      "mediapipe/calculators/video/testdata");
+#endif  // defined(__APPLE__)
+}
+
+bool LoadBinaryTestGraph(const std::string& graph_path,
+                         CalculatorGraphConfig* config) {
+  std::ifstream ifs;
+  ifs.open(graph_path.c_str());
+  proto_ns::io::IstreamInputStream in_stream(&ifs);
+  bool success = config->ParseFromZeroCopyStream(&in_stream);
+  ifs.close();
+  if (!success) {
+    LOG(ERROR) << "could not parse test graph: " << graph_path;
+  }
+  return success;
+}
+
+class TrackingGraphTest : public Test {
+ protected:
+  TrackingGraphTest() {}
+
+  void SetUp() override {
+    test_dir_ = GetTestDir();
+    const auto graph_path = file::JoinPath(test_dir_, "tracker.binarypb");
+    ASSERT_TRUE(LoadBinaryTestGraph(graph_path, &config_));
+
+    original_image_ = cv::imread(file::JoinPath(test_dir_, "lenna.png"));
+    CreateInputFramesFromOriginalImage(kNumImages, kTranslationStep,
+                                       &input_frames_packets_);
+
+    const auto& first_input_img = input_frames_packets_[0].Get<ImageFrame>();
+    const int img_width = first_input_img.Width();
+    const int img_height = first_input_img.Height();
+    translation_step_x_ = kTranslationStep / static_cast<float>(img_width);
+    translation_step_y_ = kTranslationStep / static_cast<float>(img_height);
+
+    // Creat new configure and packet dump vector to store output.
+    mediapipe::CalculatorGraphConfig config_copy = config_;
+    mediapipe::tool::AddVectorSink("boxes", &config_copy, &output_packets_);
+    mediapipe::tool::AddVectorSink("ra_boxes", &config_copy,
+                                   &random_access_results_packets_);
+
+    // Initialize graph
+    MP_ASSERT_OK(graph_.Initialize(config_copy));
+
+    const auto parallel_graph_path =
+        file::JoinPath(test_dir_, "parallel_tracker.binarypb");
+    CalculatorGraphConfig parallel_config;
+    ASSERT_TRUE(LoadBinaryTestGraph(parallel_graph_path, &parallel_config));
+    mediapipe::tool::AddVectorSink("boxes", &parallel_config, &output_packets_);
+    mediapipe::tool::AddVectorSink("ra_boxes", &parallel_config,
+                                   &random_access_results_packets_);
+    MP_ASSERT_OK(parallel_graph_.Initialize(parallel_config));
+  }
+
+  void CreateInputFramesFromOriginalImage(
+      int num_images, int translation_step,
+      std::vector<Packet>* input_frames_packets);
+
+  void TearDown() override {
+    output_packets_.clear();
+    random_access_results_packets_.clear();
+  }
+
+  std::unique_ptr<TimedBoxProtoList> MakeBoxList(
+      const Timestamp& timestamp, const std::vector<bool>& is_quad_tracking,
+      const std::vector<bool>& is_pnp_tracking,
+      const std::vector<bool>& reacquisition) const;
+
+  void RunGraphWithSidePacketsAndInputs(
+      const std::map<std::string, mediapipe::Packet>& side_packets,
+      const mediapipe::Packet& start_pos_packet);
+
+  // Utility functions used to judge if a given quad or box is near to the
+  // groundtruth location at a given frame.
+  // Examine box.reacquisition() field equals to `reacquisition`.
+  // `frame` can be float number to account for inter-frame interpolation.
+  void ExpectBoxAtFrame(const TimedBoxProto& box, float frame,
+                        bool reacquisition);
+
+  // Examine box.aspect_ratio() field equals to `aspect_ratio` if asepct_ratio
+  // is positive.
+  void ExpectQuadAtFrame(const TimedBoxProto& box, float frame,
+                         float aspect_ratio, bool reacquisition);
+
+  // Utility function to judge if two quad are near to each other.
+  void ExpectQuadNear(const TimedBoxProto& box1, const TimedBoxProto& box2);
+
+  std::unique_ptr<TimedBoxProtoList> CreateRandomAccessTrackingBoxList(
+      const std::vector<Timestamp>& start_timestamps,
+      const std::vector<Timestamp>& end_timestamps) const;
+
+  CalculatorGraph graph_;
+  CalculatorGraph parallel_graph_;
+  CalculatorGraphConfig config_;
+  std::string test_dir_;
+  cv::Mat original_image_;
+  std::vector<Packet> input_frames_packets_;
+  std::vector<mediapipe::Packet> output_packets_;
+  std::vector<mediapipe::Packet> random_access_results_packets_;
+  float translation_step_x_;  // normalized translation step in x direction
+  float translation_step_y_;  // normalized translation step in y direction
+  static constexpr float kInitialBoxHalfWidthNormalized = 0.25f;
+  static constexpr float kInitialBoxHalfHeightNormalized = 0.25f;
+  static constexpr float kImageAspectRatio = 1.0f;  // for lenna.png
+  static constexpr float kInitialBoxLeft =
+      0.5f - kInitialBoxHalfWidthNormalized;
+  static constexpr float kInitialBoxRight =
+      0.5f + kInitialBoxHalfWidthNormalized;
+  static constexpr float kInitialBoxTop =
+      0.5f - kInitialBoxHalfHeightNormalized;
+  static constexpr float kInitialBoxBottom =
+      0.5f + kInitialBoxHalfHeightNormalized;
+  static constexpr int kFrameIntervalUs = 30000;
+  static constexpr int kNumImages = 8;
+  // Each image is shifted to the right and bottom by kTranslationStep
+  // pixels compared with the previous image.
+  static constexpr int kTranslationStep = 10;
+  static constexpr float kEqualityTolerance = 3e-4f;
+};
+
+void TrackingGraphTest::ExpectBoxAtFrame(const TimedBoxProto& box, float frame,
+                                         bool reacquisition) {
+  EXPECT_EQ(box.reacquisition(), reacquisition);
+  EXPECT_TRUE(box.has_rotation());
+  EXPECT_THAT(box.rotation(), FloatNear(0, kEqualityTolerance));
+  EXPECT_THAT(box.left(),
+              FloatNear(kInitialBoxLeft - frame * translation_step_x_,
+                        kEqualityTolerance));
+  EXPECT_THAT(box.top(), FloatNear(kInitialBoxTop - frame * translation_step_y_,
+                                   kEqualityTolerance));
+  EXPECT_THAT(box.bottom(),
+              FloatNear(kInitialBoxBottom - frame * translation_step_y_,
+                        kEqualityTolerance));
+  EXPECT_THAT(box.right(),
+              FloatNear(kInitialBoxRight - frame * translation_step_x_,
+                        kEqualityTolerance));
+}
+
+void TrackingGraphTest::ExpectQuadAtFrame(const TimedBoxProto& box, float frame,
+                                          float aspect_ratio,
+                                          bool reacquisition) {
+  EXPECT_TRUE(box.has_quad()) << "quad must exist!";
+  if (aspect_ratio > 0) {
+    EXPECT_TRUE(box.has_aspect_ratio());
+    EXPECT_NEAR(box.aspect_ratio(), aspect_ratio, kEqualityTolerance);
+  }
+
+  EXPECT_EQ(box.reacquisition(), reacquisition);
+
+  const auto& quad = box.quad();
+  EXPECT_EQ(8, quad.vertices_size())
+      << "quad has only " << box.quad().vertices_size() << " vertices";
+  EXPECT_THAT(quad.vertices(0),
+              FloatNear(kInitialBoxLeft - frame * translation_step_x_,
+                        kEqualityTolerance));
+  EXPECT_THAT(quad.vertices(1),
+              FloatNear(kInitialBoxTop - frame * translation_step_y_,
+                        kEqualityTolerance));
+  EXPECT_THAT(quad.vertices(3),
+              FloatNear(kInitialBoxBottom - frame * translation_step_y_,
+                        kEqualityTolerance));
+  EXPECT_THAT(quad.vertices(4),
+              FloatNear(kInitialBoxRight - frame * translation_step_x_,
+                        kEqualityTolerance));
+}
+
+void TrackingGraphTest::ExpectQuadNear(const TimedBoxProto& box1,
+                                       const TimedBoxProto& box2) {
+  EXPECT_TRUE(box1.has_quad());
+  EXPECT_TRUE(box2.has_quad());
+  EXPECT_EQ(8, box1.quad().vertices_size())
+      << "quad has only " << box1.quad().vertices_size() << " vertices";
+  EXPECT_EQ(8, box2.quad().vertices_size())
+      << "quad has only " << box2.quad().vertices_size() << " vertices";
+  for (int j = 0; j < box1.quad().vertices_size(); ++j) {
+    EXPECT_NEAR(box1.quad().vertices(j), box2.quad().vertices(j),
+                kEqualityTolerance);
+  }
+}
+
+std::unique_ptr<TimedBoxProtoList> TrackingGraphTest::MakeBoxList(
+    const Timestamp& timestamp, const std::vector<bool>& is_quad_tracking,
+    const std::vector<bool>& is_pnp_tracking,
+    const std::vector<bool>& reacquisition) const {
+  auto box_list = absl::make_unique<TimedBoxProtoList>();
+  int box_id = 0;
+  for (int j = 0; j < is_quad_tracking.size(); ++j) {
+    TimedBoxProto* box = box_list->add_box();
+    if (is_quad_tracking[j]) {
+      box->mutable_quad()->add_vertices(kInitialBoxLeft);
+      box->mutable_quad()->add_vertices(kInitialBoxTop);
+      box->mutable_quad()->add_vertices(kInitialBoxLeft);
+      box->mutable_quad()->add_vertices(kInitialBoxBottom);
+      box->mutable_quad()->add_vertices(kInitialBoxRight);
+      box->mutable_quad()->add_vertices(kInitialBoxBottom);
+      box->mutable_quad()->add_vertices(kInitialBoxRight);
+      box->mutable_quad()->add_vertices(kInitialBoxTop);
+
+      if (is_pnp_tracking[j]) {
+        box->set_aspect_ratio(kImageAspectRatio);
+      }
+    } else {
+      box->set_left(kInitialBoxLeft);
+      box->set_right(kInitialBoxRight);
+      box->set_top(kInitialBoxTop);
+      box->set_bottom(kInitialBoxBottom);
+    }
+    box->set_id(box_id++);
+    box->set_time_msec(timestamp.Value() / 1000);
+    box->set_reacquisition(reacquisition[j]);
+  }
+
+  return box_list;
+}
+
+void TrackingGraphTest::CreateInputFramesFromOriginalImage(
+    int num_images, int translation_step,
+    std::vector<Packet>* input_frames_packets) {
+  const int crop_width = original_image_.cols - num_images * translation_step;
+  const int crop_height = original_image_.rows - num_images * translation_step;
+  for (int i = 0; i < num_images; ++i) {
+    cv::Rect roi(i * translation_step, i * translation_step, crop_width,
+                 crop_height);
+    cv::Mat cropped_img = cv::Mat(original_image_, roi);
+    auto cropped_image_frame = absl::make_unique<ImageFrame>(
+        ImageFormat::SRGB, crop_width, crop_height, cropped_img.step[0],
+        cropped_img.data, ImageFrame::PixelDataDeleter::kNone);
+    Timestamp curr_timestamp = Timestamp(i * kFrameIntervalUs);
+    Packet image_packet =
+        Adopt(cropped_image_frame.release()).At(curr_timestamp);
+    input_frames_packets->push_back(image_packet);
+  }
+}
+
+void TrackingGraphTest::RunGraphWithSidePacketsAndInputs(
+    const std::map<std::string, mediapipe::Packet>& side_packets,
+    const mediapipe::Packet& start_pos_packet) {
+  // Start running the graph
+  MP_EXPECT_OK(graph_.StartRun(side_packets));
+
+  MP_EXPECT_OK(graph_.AddPacketToInputStream("start_pos", start_pos_packet));
+
+  for (auto frame_packet : input_frames_packets_) {
+    MP_EXPECT_OK(
+        graph_.AddPacketToInputStream("image_cpu_frames", frame_packet));
+    MP_EXPECT_OK(graph_.WaitUntilIdle());
+  }
+
+  MP_EXPECT_OK(graph_.CloseAllInputStreams());
+  MP_EXPECT_OK(graph_.WaitUntilDone());
+}
+
+std::unique_ptr<TimedBoxProtoList>
+TrackingGraphTest::CreateRandomAccessTrackingBoxList(
+    const std::vector<Timestamp>& start_timestamps,
+    const std::vector<Timestamp>& end_timestamps) const {
+  CHECK_EQ(start_timestamps.size(), end_timestamps.size());
+  auto ra_boxes = absl::make_unique<TimedBoxProtoList>();
+  for (int i = 0; i < start_timestamps.size(); ++i) {
+    auto start_box_list =
+        MakeBoxList(start_timestamps[i], std::vector<bool>{true},
+                    std::vector<bool>{true}, std::vector<bool>{false});
+    auto end_box_list =
+        MakeBoxList(end_timestamps[i], std::vector<bool>{true},
+                    std::vector<bool>{true}, std::vector<bool>{false});
+    *(ra_boxes->add_box()) = (*start_box_list).box(0);
+    *(ra_boxes->add_box()) = (*end_box_list).box(0);
+  }
+  return ra_boxes;
+}
+
+TEST_F(TrackingGraphTest, BasicBoxTrackingSanityCheck) {
+  // Create input side packets.
+  std::map<std::string, mediapipe::Packet> side_packets;
+  side_packets.insert(std::make_pair("analysis_downsample_factor",
+                                     mediapipe::MakePacket<float>(1.0f)));
+  side_packets.insert(std::make_pair(
+      "calculator_options",
+      mediapipe::MakePacket<CalculatorOptions>(CalculatorOptions())));
+
+  // Run the graph with input side packets, start_pos, and input image frames.
+  Timestamp start_box_time = input_frames_packets_[0].Timestamp();
+  // is_quad_tracking is used to indicate whether to track quad for each
+  // individual box.
+  std::vector<bool> is_quad_tracking{false};
+  // is_pnp_tracking is used to indicate whether to use perspective transform to
+  // track quad.
+  std::vector<bool> is_pnp_tracking{false};
+  // is_reacquisition is used to indicate whether to enable reacquisition for
+  // the box.
+  std::vector<bool> is_reacquisition{false};
+  auto start_box_list = MakeBoxList(start_box_time, is_quad_tracking,
+                                    is_pnp_tracking, is_reacquisition);
+  Packet start_pos_packet = Adopt(start_box_list.release()).At(start_box_time);
+  RunGraphWithSidePacketsAndInputs(side_packets, start_pos_packet);
+
+  EXPECT_EQ(input_frames_packets_.size(), output_packets_.size());
+
+  for (int i = 0; i < output_packets_.size(); ++i) {
+    const TimedBoxProtoList& boxes =
+        output_packets_[i].Get<TimedBoxProtoList>();
+    EXPECT_EQ(is_quad_tracking.size(), boxes.box_size());
+    ExpectBoxAtFrame(boxes.box(0), i, false);
+  }
+}
+
+TEST_F(TrackingGraphTest, BasicQuadTrackingSanityCheck) {
+  // Create input side packets.
+  std::map<std::string, mediapipe::Packet> side_packets;
+  side_packets.insert(std::make_pair("analysis_downsample_factor",
+                                     mediapipe::MakePacket<float>(1.0f)));
+  CalculatorOptions calculator_options;
+  calculator_options.MutableExtension(BoxTrackerCalculatorOptions::ext)
+      ->mutable_tracker_options()
+      ->mutable_track_step_options()
+      ->set_tracking_degrees(
+          TrackStepOptions::TRACKING_DEGREE_OBJECT_PERSPECTIVE);
+  side_packets.insert(std::make_pair(
+      "calculator_options",
+      mediapipe::MakePacket<CalculatorOptions>(calculator_options)));
+
+  Timestamp start_box_time = input_frames_packets_[0].Timestamp();
+  // Box id 0 use quad tracking with 8DoF homography transform.
+  // Box id 1 use quad tracking with 6DoF perspective transform.
+  // Box id 2 use box tracking with 4DoF similarity transform.
+  std::vector<bool> is_quad_tracking{true, true, false};
+  std::vector<bool> is_pnp_tracking{false, true, false};
+  std::vector<bool> is_reacquisition{true, false, true};
+  auto start_box_list = MakeBoxList(start_box_time, is_quad_tracking,
+                                    is_pnp_tracking, is_reacquisition);
+  Packet start_pos_packet = Adopt(start_box_list.release()).At(start_box_time);
+  RunGraphWithSidePacketsAndInputs(side_packets, start_pos_packet);
+
+  EXPECT_EQ(input_frames_packets_.size(), output_packets_.size());
+  for (int i = 0; i < output_packets_.size(); ++i) {
+    const TimedBoxProtoList& boxes =
+        output_packets_[i].Get<TimedBoxProtoList>();
+    EXPECT_EQ(is_quad_tracking.size(), boxes.box_size());
+    for (int j = 0; j < boxes.box_size(); ++j) {
+      const TimedBoxProto& box = boxes.box(j);
+      if (is_quad_tracking[box.id()]) {
+        ExpectQuadAtFrame(box, i,
+                          is_pnp_tracking[box.id()] ? kImageAspectRatio : -1.0f,
+                          is_reacquisition[box.id()]);
+      } else {
+        ExpectBoxAtFrame(box, i, is_reacquisition[box.id()]);
+      }
+    }
+  }
+}
+
+TEST_F(TrackingGraphTest, TestRandomAccessTrackingResults) {
+  // Create input side packets.
+  std::map<std::string, mediapipe::Packet> side_packets;
+  side_packets.insert(std::make_pair("analysis_downsample_factor",
+                                     mediapipe::MakePacket<float>(1.0f)));
+  CalculatorOptions calculator_options;
+  calculator_options.MutableExtension(BoxTrackerCalculatorOptions::ext)
+      ->mutable_tracker_options()
+      ->mutable_track_step_options()
+      ->set_tracking_degrees(
+          TrackStepOptions::TRACKING_DEGREE_OBJECT_PERSPECTIVE);
+  side_packets.insert(std::make_pair(
+      "calculator_options",
+      mediapipe::MakePacket<CalculatorOptions>(calculator_options)));
+
+  ASSERT_GT(input_frames_packets_.size(), 2);  // at least 3 frames
+  ASSERT_TRUE(input_frames_packets_[2].Timestamp() -
+                  input_frames_packets_[1].Timestamp() >
+              TimestampDiff(1000));
+
+  constexpr int start_frame = 0;
+  Timestamp start_box_time = input_frames_packets_[start_frame].Timestamp();
+  auto start_box_list =
+      MakeBoxList(start_box_time, std::vector<bool>{true},
+                  std::vector<bool>{true}, std::vector<bool>{false});
+  constexpr int end_frame = 2;
+  Timestamp end_box_time = input_frames_packets_[end_frame].Timestamp();
+
+  // Also test reverse random access tracking.
+  // This offset of 1ms is simulating the case where the start query timestamp
+  // to be not any existing frame timestamp. In reality, it's highly encouraged
+  // to have the start query timestamp be aligned with frame timestamp.
+  constexpr int reverse_start_frame = 1;
+  Timestamp reverse_start_box_time =
+      input_frames_packets_[reverse_start_frame].Timestamp() + 1000;
+
+  auto ra_boxes = CreateRandomAccessTrackingBoxList(
+      {start_box_time, reverse_start_box_time}, {end_box_time, start_box_time});
+
+  Packet ra_packet = Adopt(ra_boxes.release()).At(start_box_time);
+  Packet start_packet = Adopt(start_box_list.release()).At(start_box_time);
+
+  // Start running the ordinary graph, verify random access produce same result
+  // as normal tracking.
+  MP_EXPECT_OK(graph_.StartRun(side_packets));
+  MP_EXPECT_OK(graph_.AddPacketToInputStream("start_pos", start_packet));
+  for (auto frame_packet : input_frames_packets_) {
+    MP_EXPECT_OK(
+        graph_.AddPacketToInputStream("image_cpu_frames", frame_packet));
+    Packet track_time_packet = Adopt(new int(0)).At(frame_packet.Timestamp());
+    MP_EXPECT_OK(
+        graph_.AddPacketToInputStream("track_time", track_time_packet));
+    MP_EXPECT_OK(graph_.WaitUntilIdle());
+  }
+  MP_EXPECT_OK(graph_.AddPacketToInputStream("ra_track", ra_packet));
+  MP_EXPECT_OK(graph_.CloseAllInputStreams());
+  MP_EXPECT_OK(graph_.WaitUntilDone());
+
+  EXPECT_EQ(input_frames_packets_.size(), output_packets_.size());
+  const TimedBoxProtoList tracking_result =
+      output_packets_[end_frame].Get<TimedBoxProtoList>();
+  EXPECT_EQ(1, tracking_result.box_size());
+
+  // Should have 1 random access packet.
+  EXPECT_EQ(1, random_access_results_packets_.size());
+  const TimedBoxProtoList& ra_result =
+      random_access_results_packets_[0].Get<TimedBoxProtoList>();
+  // Two box tracking results. One for comparison with normal tracking. The
+  // other for reverse random access tracking.
+  EXPECT_EQ(2, ra_result.box_size());
+
+  // Check if randan access tracking has same result with normal tracking.
+  ExpectQuadNear(tracking_result.box(0), ra_result.box(0));
+  ExpectQuadAtFrame(ra_result.box(0), end_frame - start_frame,
+                    kImageAspectRatio, false);
+  ExpectQuadAtFrame(ra_result.box(1), start_frame - reverse_start_frame - 1,
+                    kImageAspectRatio, false);
+
+  // Clear output and ra result packet vector before test parallel graph.
+  TearDown();
+
+  // Start running the parallel graph, verify random access produce same result
+  // as normal tracking.
+  MP_EXPECT_OK(parallel_graph_.StartRun(side_packets));
+  MP_EXPECT_OK(
+      parallel_graph_.AddPacketToInputStream("start_pos", start_packet));
+  for (auto frame_packet : input_frames_packets_) {
+    MP_EXPECT_OK(parallel_graph_.AddPacketToInputStream("image_cpu_frames",
+                                                        frame_packet));
+    MP_EXPECT_OK(parallel_graph_.WaitUntilIdle());
+  }
+  MP_EXPECT_OK(parallel_graph_.AddPacketToInputStream("ra_track", ra_packet));
+  MP_EXPECT_OK(parallel_graph_.CloseAllInputStreams());
+  MP_EXPECT_OK(parallel_graph_.WaitUntilDone());
+
+  EXPECT_EQ(input_frames_packets_.size(), output_packets_.size());
+  const TimedBoxProtoList parallel_tracking_result =
+      output_packets_[end_frame].Get<TimedBoxProtoList>();
+  EXPECT_EQ(1, parallel_tracking_result.box_size());
+
+  // should have only 1 random access
+  EXPECT_EQ(1, random_access_results_packets_.size());
+  const TimedBoxProtoList& parallel_ra_result =
+      random_access_results_packets_[0].Get<TimedBoxProtoList>();
+  EXPECT_EQ(2, parallel_ra_result.box_size());
+
+  // Check if randan access tracking has same result with normal tracking.
+  ExpectQuadNear(parallel_tracking_result.box(0), parallel_ra_result.box(0));
+  ExpectQuadAtFrame(parallel_ra_result.box(0), end_frame - start_frame,
+                    kImageAspectRatio, false);
+  ExpectQuadAtFrame(parallel_ra_result.box(1),
+                    start_frame - reverse_start_frame - 1, kImageAspectRatio,
+                    false);
+}
+
+// Tests what happens when random access request timestamps are
+// outside of cache.
+TEST_F(TrackingGraphTest, TestRandomAccessTrackingTimestamps) {
+  // Create input side packets.
+  std::map<std::string, mediapipe::Packet> side_packets;
+  side_packets.insert(std::make_pair("analysis_downsample_factor",
+                                     mediapipe::MakePacket<float>(1.0f)));
+  CalculatorOptions calculator_options;
+  calculator_options.MutableExtension(BoxTrackerCalculatorOptions::ext)
+      ->mutable_tracker_options()
+      ->mutable_track_step_options()
+      ->set_tracking_degrees(
+          TrackStepOptions::TRACKING_DEGREE_OBJECT_PERSPECTIVE);
+  // We intentionally don't cache all frames, to see what happens when
+  // random access tracking request time falls outside cache range.
+  calculator_options.MutableExtension(BoxTrackerCalculatorOptions::ext)
+      ->set_streaming_track_data_cache_size(input_frames_packets_.size() - 1);
+  side_packets.insert(std::make_pair(
+      "calculator_options",
+      mediapipe::MakePacket<CalculatorOptions>(calculator_options)));
+
+  // Set up random access boxes
+  const int num_frames = input_frames_packets_.size();
+  const int64 usec_in_sec = 1000000;
+  std::vector<Timestamp> start_timestamps{
+      input_frames_packets_[0].Timestamp() - usec_in_sec,  // forward
+      input_frames_packets_[0].Timestamp(),                // forward
+      input_frames_packets_[1].Timestamp(),                // forward
+      input_frames_packets_[num_frames - 1].Timestamp() + usec_in_sec,  // fwd
+      input_frames_packets_[0].Timestamp(),               // backward
+      input_frames_packets_[num_frames - 1].Timestamp(),  // backward
+      input_frames_packets_[num_frames - 1].Timestamp(),  // backward
+      input_frames_packets_[num_frames - 1].Timestamp() + usec_in_sec  // back
+  };
+  std::vector<Timestamp> end_timestamps{
+      input_frames_packets_[num_frames - 1].Timestamp(),
+      input_frames_packets_[num_frames - 1].Timestamp(),
+      input_frames_packets_[num_frames - 1].Timestamp() + usec_in_sec,
+      input_frames_packets_[num_frames - 1].Timestamp() + 2 * usec_in_sec,
+      input_frames_packets_[0].Timestamp() - usec_in_sec,
+      input_frames_packets_[0].Timestamp(),
+      input_frames_packets_[0].Timestamp() - usec_in_sec,
+      input_frames_packets_[1].Timestamp()};
+  auto ra_boxes =
+      CreateRandomAccessTrackingBoxList(start_timestamps, end_timestamps);
+  Packet ra_packet =
+      Adopt(ra_boxes.release()).At(input_frames_packets_[0].Timestamp());
+
+  // Run the graph and check if the outside-cache request have no results.
+  // Start running the parallel graph, verify random access produce same result
+  // as normal tracking.
+  MP_EXPECT_OK(parallel_graph_.StartRun(side_packets));
+  for (auto frame_packet : input_frames_packets_) {
+    MP_EXPECT_OK(parallel_graph_.AddPacketToInputStream("image_cpu_frames",
+                                                        frame_packet));
+    MP_EXPECT_OK(parallel_graph_.WaitUntilIdle());
+  }
+  MP_EXPECT_OK(parallel_graph_.AddPacketToInputStream("ra_track", ra_packet));
+  MP_EXPECT_OK(parallel_graph_.CloseAllInputStreams());
+  MP_EXPECT_OK(parallel_graph_.WaitUntilDone());
+
+  // should have 1 random access packet with 0 result boxes
+  EXPECT_EQ(1, random_access_results_packets_.size());
+  const auto& ra_returned_boxes =
+      random_access_results_packets_[0].Get<TimedBoxProtoList>();
+  const int num_returned_ra_boxes = ra_returned_boxes.box_size();
+  EXPECT_EQ(0, num_returned_ra_boxes);
+}
+
+TEST_F(TrackingGraphTest, TestTransitionFramesForReacquisition) {
+  // Create input side packets.
+  std::map<std::string, mediapipe::Packet> side_packets;
+  side_packets.insert(std::make_pair("analysis_downsample_factor",
+                                     mediapipe::MakePacket<float>(1.0f)));
+  CalculatorOptions calculator_options;
+  calculator_options.MutableExtension(BoxTrackerCalculatorOptions::ext)
+      ->mutable_tracker_options()
+      ->mutable_track_step_options()
+      ->set_tracking_degrees(
+          TrackStepOptions::TRACKING_DEGREE_OBJECT_PERSPECTIVE);
+  constexpr int kTransitionFrames = 3;
+  calculator_options.MutableExtension(BoxTrackerCalculatorOptions::ext)
+      ->set_start_pos_transition_frames(kTransitionFrames);
+
+  side_packets.insert(std::make_pair(
+      "calculator_options",
+      mediapipe::MakePacket<CalculatorOptions>(calculator_options)));
+
+  Timestamp start_box_time = input_frames_packets_[0].Timestamp();
+  // Box id 0 use quad tracking with 8DoF homography transform.
+  // Box id 1 use quad tracking with 6DoF perspective transform.
+  // Box id 2 use box tracking with 4DoF similarity transform.
+  std::vector<bool> is_quad_tracking{true, true, false};
+  std::vector<bool> is_pnp_tracking{false, true, false};
+  std::vector<bool> is_reacquisition{true, true, true};
+  auto start_box_list = MakeBoxList(start_box_time, is_quad_tracking,
+                                    is_pnp_tracking, is_reacquisition);
+  Packet start_pos_packet = Adopt(start_box_list.release()).At(start_box_time);
+
+  // Setting box pos restart from initial position (frame 0's position).
+  constexpr int kRestartFrame = 3;
+  Timestamp restart_box_time = input_frames_packets_[kRestartFrame].Timestamp();
+  auto restart_box_list = MakeBoxList(restart_box_time, is_quad_tracking,
+                                      is_pnp_tracking, is_reacquisition);
+  Packet restart_pos_packet =
+      Adopt(restart_box_list.release()).At(restart_box_time);
+  MP_EXPECT_OK(graph_.StartRun(side_packets));
+  MP_EXPECT_OK(graph_.AddPacketToInputStream("start_pos", start_pos_packet));
+
+  for (int j = 0; j < input_frames_packets_.size(); ++j) {
+    // Add TRACK_TIME stream queries in between 2 frames.
+    if (j > 0) {
+      Timestamp track_time = Timestamp((j - 0.5f) * kFrameIntervalUs);
+      LOG(INFO) << track_time.Value();
+      Packet track_time_packet = Adopt(new Timestamp).At(track_time);
+      MP_EXPECT_OK(
+          graph_.AddPacketToInputStream("track_time", track_time_packet));
+    }
+
+    MP_EXPECT_OK(graph_.AddPacketToInputStream("image_cpu_frames",
+                                               input_frames_packets_[j]));
+    Packet track_time_packet =
+        Adopt(new int(0)).At(input_frames_packets_[j].Timestamp());
+    MP_EXPECT_OK(
+        graph_.AddPacketToInputStream("track_time", track_time_packet));
+    MP_EXPECT_OK(graph_.WaitUntilIdle());
+
+    if (j == kRestartFrame) {
+      MP_EXPECT_OK(
+          graph_.AddPacketToInputStream("restart_pos", restart_pos_packet));
+    }
+  }
+
+  MP_EXPECT_OK(graph_.CloseAllInputStreams());
+  MP_EXPECT_OK(graph_.WaitUntilDone());
+
+  EXPECT_EQ(input_frames_packets_.size() * 2 - 1, output_packets_.size());
+  for (int i = 0; i < output_packets_.size(); ++i) {
+    const TimedBoxProtoList& boxes =
+        output_packets_[i].Get<TimedBoxProtoList>();
+    EXPECT_EQ(is_quad_tracking.size(), boxes.box_size());
+    float frame_id = i / 2.0f;
+    float expected_frame_id;
+    if (frame_id <= kRestartFrame) {
+      // before transition
+      expected_frame_id = frame_id;
+    } else {
+      float transition_frames = frame_id - kRestartFrame;
+      if (transition_frames <= kTransitionFrames) {
+        // transitioning.
+        expected_frame_id =
+            kRestartFrame -
+            transition_frames / kTransitionFrames * kRestartFrame +
+            transition_frames;
+      } else {
+        // after transition.
+        expected_frame_id = transition_frames;
+      }
+    }
+
+    for (int j = 0; j < boxes.box_size(); ++j) {
+      const TimedBoxProto& box = boxes.box(j);
+      if (is_quad_tracking[box.id()]) {
+        ExpectQuadAtFrame(box, expected_frame_id,
+                          is_pnp_tracking[box.id()] ? kImageAspectRatio : -1.0f,
+                          is_reacquisition[box.id()]);
+      } else {
+        ExpectBoxAtFrame(box, expected_frame_id, is_reacquisition[box.id()]);
+      }
+    }
+  }
+}
+
+// TODO: Add test for reacquisition.
+
+}  // namespace
+}  // namespace mediapipe
diff --git a/mediapipe/docs/examples.md b/mediapipe/docs/examples.md
index 846f4f7e7..0f3077a5d 100644
--- a/mediapipe/docs/examples.md
+++ b/mediapipe/docs/examples.md
@@ -39,6 +39,11 @@ pipeline.
 the same TFLite model in a CPU-based pipeline. This example highlights how
 graphs can be easily adapted to run on CPU v.s. GPU.
 
+### Object Detection and Tracking with GPU
+
+[Object Detection and Tracking with GPU](./object_tracking_mobile_gpu.md) illustrates how to
+use MediaPipe for object detection and tracking.
+
 ### Face Detection with GPU
 
 [Face Detection with GPU](./face_detection_mobile_gpu.md) illustrates how to use
diff --git a/mediapipe/docs/images/mobile/box_tracking_subgraph.png b/mediapipe/docs/images/mobile/box_tracking_subgraph.png
new file mode 100644
index 000000000..fccfc65eb
Binary files /dev/null and b/mediapipe/docs/images/mobile/box_tracking_subgraph.png differ
diff --git a/mediapipe/docs/images/mobile/object_detection_gpu_subgraph.png b/mediapipe/docs/images/mobile/object_detection_gpu_subgraph.png
new file mode 100644
index 000000000..8c4a24f95
Binary files /dev/null and b/mediapipe/docs/images/mobile/object_detection_gpu_subgraph.png differ
diff --git a/mediapipe/docs/images/mobile/object_tracking_android_gpu.gif b/mediapipe/docs/images/mobile/object_tracking_android_gpu.gif
new file mode 100644
index 000000000..cb8b4f6e3
Binary files /dev/null and b/mediapipe/docs/images/mobile/object_tracking_android_gpu.gif differ
diff --git a/mediapipe/docs/images/mobile/object_tracking_android_gpu_small.gif b/mediapipe/docs/images/mobile/object_tracking_android_gpu_small.gif
new file mode 100644
index 000000000..9fd53144f
Binary files /dev/null and b/mediapipe/docs/images/mobile/object_tracking_android_gpu_small.gif differ
diff --git a/mediapipe/docs/images/mobile/object_tracking_mobile_gpu.png b/mediapipe/docs/images/mobile/object_tracking_mobile_gpu.png
new file mode 100644
index 000000000..de09514c1
Binary files /dev/null and b/mediapipe/docs/images/mobile/object_tracking_mobile_gpu.png differ
diff --git a/mediapipe/docs/images/mobile/object_tracking_renderer_gpu_subgraph.png b/mediapipe/docs/images/mobile/object_tracking_renderer_gpu_subgraph.png
new file mode 100644
index 000000000..b164643a6
Binary files /dev/null and b/mediapipe/docs/images/mobile/object_tracking_renderer_gpu_subgraph.png differ
diff --git a/mediapipe/docs/images/mobile/object_tracking_subgraph.png b/mediapipe/docs/images/mobile/object_tracking_subgraph.png
new file mode 100644
index 000000000..8b7aa2143
Binary files /dev/null and b/mediapipe/docs/images/mobile/object_tracking_subgraph.png differ
diff --git a/mediapipe/docs/install.md b/mediapipe/docs/install.md
index 97eb8ca0b..cdee42746 100644
--- a/mediapipe/docs/install.md
+++ b/mediapipe/docs/install.md
@@ -63,6 +63,7 @@ To build and run iOS apps:
 
     ```bash
     $ sudo apt-get install libopencv-core-dev libopencv-highgui-dev \
+                           libopencv-calib3d-dev libopencv-features2d-dev \
                            libopencv-imgproc-dev libopencv-video-dev
     ```
 
@@ -411,6 +412,7 @@ To build and run iOS apps:
 
     ```bash
     username@DESKTOP-TMVLBJ1:~/mediapipe$ sudo apt-get install libopencv-core-dev libopencv-highgui-dev \
+                           libopencv-calib3d-dev libopencv-features2d-dev \
                            libopencv-imgproc-dev libopencv-video-dev
     ```
 
diff --git a/mediapipe/docs/object_tracking_mobile_gpu.md b/mediapipe/docs/object_tracking_mobile_gpu.md
new file mode 100644
index 000000000..aacd1f288
--- /dev/null
+++ b/mediapipe/docs/object_tracking_mobile_gpu.md
@@ -0,0 +1,475 @@
+# Object Detection and Tracking
+
+This doc focuses on the
+[below example graph](https://github.com/google/mediapipe/tree/master/mediapipe/graphs/tracking/object_detection_tracking_mobile_gpu.pbtxt)
+that performs object detection and tracking.
+Note that object detection is using TensorFlow Lite on GPU while tracking is using CPU.
+
+For overall context on hand detection and hand tracking, please read this
+[Google Developer Blog](https://mediapipe.page.link/objecttrackingblog).
+
+![object_tracking_android_gpu_gif](images/mobile/object_tracking_android_gpu.gif)
+
+## Android
+
+[Source](https://github.com/google/mediapipe/tree/master/mediapipe/examples/android/src/java/com/google/mediapipe/apps/objecttrackinggpu)
+
+To build and install the app:
+
+```bash
+bazel build -c opt --config=android_arm64 mediapipe/examples/android/src/java/com/google/mediapipe/apps/objecttrackinggpu
+```
+
+Once the app is built, install it on Android device with:
+
+```bash
+adb install bazel-bin/mediapipe/examples/android/src/java/com/google/mediapipe/apps/objecttrackinggpu/objecttrackinggpu.apk
+```
+
+## Graph
+The object detection and tracking [main graph](#main-graph) internally utilizes a
+[object detection subgraph](#object-detection-subgraph), a
+[object tracking subgraph](#object-tracking-subgraph) and a
+[renderer subgraph](#renderer-subgraph).
+
+
+The subgraphs show up in the main graph visualization as nodes colored in
+purple, and the subgraph itself can also be visualized just like a regular
+graph. For more information on how to visualize a graph that includes subgraphs,
+see the Visualizing Subgraphs section in the
+[visualizer documentation](./visualizer.md).
+
+### Main Graph
+
+
+![object_detection_mobile_gpu_graph](images/mobile/object_tracking_mobile_gpu.png)
+
+[Source pbtxt file](https://github.com/google/mediapipe/tree/master/mediapipe/graphs/object_detection/object_detection_mobile_gpu.pbtxt)
+
+```bash
+# MediaPipe graph that performs object detection and tracking.
+# Used in the examples in
+# mediapipie/examples/android/src/java/com/mediapipe/apps/objecttrackinggpu
+
+# Images on GPU coming into and out of the graph.
+input_stream: "input_video"
+output_stream: "output_video"
+
+# Resamples the images by specific frame rate. This calculator is used to
+# control the frequecy of subsequent calculators/subgraphs, e.g. less power
+# consumption for expensive process.
+node {
+  calculator: "PacketResamplerCalculator"
+  input_stream: "DATA:input_video"
+  output_stream: "DATA:throttled_input_video"
+  node_options: {
+    [type.googleapis.com/mediapipe.PacketResamplerCalculatorOptions] {
+      frame_rate: 0.5
+    }
+  }
+}
+
+# Subgraph that detections objects (see object_detection_gpu.pbtxt).
+node {
+  calculator: "ObjectDetectionSubgraph"
+  input_stream: "IMAGE:throttled_input_video"
+  output_stream: "DETECTIONS:output_detections"
+}
+
+# Subgraph that tracks objects (see object_tracking.pbtxt).
+node {
+  calculator: "ObjectTrackingSubgraph"
+  input_stream: "VIDEO:input_video"
+  input_stream: "DETECTIONS:output_detections"
+  output_stream: "DETECTIONS:tracked_detections"
+}
+
+# Subgraph that renders annotations and overlays them on top of the input
+# images (see renderer_gpu.pbtxt).
+node {
+  calculator: "RendererSubgraph"
+  input_stream: "IMAGE:input_video"
+  input_stream: "DETECTIONS:tracked_detections"
+  output_stream: "IMAGE:output_video"
+}
+```
+
+### Object Detection Subgraph
+
+![object_detection_gpu_subgraph](images/mobile/object_detection_gpu_subgraph.png)
+
+[Source pbtxt file](https://github.com/google/mediapipe/tree/master/mediapipe/graphs/tracking/subgraphs/object_detection_gpu.pbtxt)
+
+```bash
+# MediaPipe object detection subgraph.
+
+type: "ObjectDetectionSubgraph"
+
+input_stream: "IMAGE:input_video"
+output_stream: "DETECTIONS:output_detections"
+
+# Transforms the input image on GPU to a 320x320 image. To scale the image, by
+# default it uses the STRETCH scale mode that maps the entire input image to the
+# entire transformed image. As a result, image aspect ratio may be changed and
+# objects in the image may be deformed (stretched or squeezed), but the object
+# detection model used in this graph is agnostic to that deformation.
+node: {
+  calculator: "ImageTransformationCalculator"
+  input_stream: "IMAGE_GPU:input_video"
+  output_stream: "IMAGE_GPU:transformed_input_video"
+  node_options: {
+    [type.googleapis.com/mediapipe.ImageTransformationCalculatorOptions] {
+      output_width: 320
+      output_height: 320
+    }
+  }
+}
+
+# Converts the transformed input image on GPU into an image tensor stored as a
+# TfLiteTensor.
+node {
+  calculator: "TfLiteConverterCalculator"
+  input_stream: "IMAGE_GPU:transformed_input_video"
+  output_stream: "TENSORS_GPU:image_tensor"
+}
+
+# Runs a TensorFlow Lite model on GPU that takes an image tensor and outputs a
+# vector of tensors representing, for instance, detection boxes/keypoints and
+# scores.
+node {
+  calculator: "TfLiteInferenceCalculator"
+  input_stream: "TENSORS_GPU:image_tensor"
+  output_stream: "TENSORS_GPU:detection_tensors"
+  node_options: {
+    [type.googleapis.com/mediapipe.TfLiteInferenceCalculatorOptions] {
+      model_path: "mediapipe/models/ssdlite_object_detection.tflite"
+    }
+  }
+}
+
+# Generates a single side packet containing a vector of SSD anchors based on
+# the specification in the options.
+node {
+  calculator: "SsdAnchorsCalculator"
+  output_side_packet: "anchors"
+  node_options: {
+    [type.googleapis.com/mediapipe.SsdAnchorsCalculatorOptions] {
+      num_layers: 6
+      min_scale: 0.2
+      max_scale: 0.95
+      input_size_height: 320
+      input_size_width: 320
+      anchor_offset_x: 0.5
+      anchor_offset_y: 0.5
+      strides: 16
+      strides: 32
+      strides: 64
+      strides: 128
+      strides: 256
+      strides: 512
+      aspect_ratios: 1.0
+      aspect_ratios: 2.0
+      aspect_ratios: 0.5
+      aspect_ratios: 3.0
+      aspect_ratios: 0.3333
+      reduce_boxes_in_lowest_layer: true
+    }
+  }
+}
+
+# Decodes the detection tensors generated by the TensorFlow Lite model, based on
+# the SSD anchors and the specification in the options, into a vector of
+# detections. Each detection describes a detected object.
+node {
+  calculator: "TfLiteTensorsToDetectionsCalculator"
+  input_stream: "TENSORS_GPU:detection_tensors"
+  input_side_packet: "ANCHORS:anchors"
+  output_stream: "DETECTIONS:detections"
+  node_options: {
+    [type.googleapis.com/mediapipe.TfLiteTensorsToDetectionsCalculatorOptions] {
+      num_classes: 91
+      num_boxes: 2034
+      num_coords: 4
+      ignore_classes: 0
+      sigmoid_score: true
+      apply_exponential_on_box_size: true
+      x_scale: 10.0
+      y_scale: 10.0
+      h_scale: 5.0
+      w_scale: 5.0
+      min_score_thresh: 0.6
+    }
+  }
+}
+
+# Performs non-max suppression to remove excessive detections.
+node {
+  calculator: "NonMaxSuppressionCalculator"
+  input_stream: "detections"
+  output_stream: "filtered_detections"
+  node_options: {
+    [type.googleapis.com/mediapipe.NonMaxSuppressionCalculatorOptions] {
+      min_suppression_threshold: 0.4
+      max_num_detections: 3
+      overlap_type: INTERSECTION_OVER_UNION
+      return_empty_detections: true
+    }
+  }
+}
+
+# Maps detection label IDs to the corresponding label text. The label map is
+# provided in the label_map_path option.
+node {
+  calculator: "DetectionLabelIdToTextCalculator"
+  input_stream: "filtered_detections"
+  output_stream: "output_detections"
+  node_options: {
+    [type.googleapis.com/mediapipe.DetectionLabelIdToTextCalculatorOptions] {
+      label_map_path: "mediapipe/models/ssdlite_object_detection_labelmap.txt"
+    }
+  }
+}
+```
+
+### Object Tracking Subgraph
+
+Object tracking subgraph uses a box tracking subgraph which is a generic
+tracking library that can be used for other use cases.
+
+![object_tracking_subgraph](images/mobile/object_tracking_subgraph.png)
+
+[Source pbtxt file](https://github.com/google/mediapipe/tree/master/mediapipe/graphs/tracking/subgraphs/object_tracking.pbtxt)
+
+```bash
+# MediaPipe object tracking subgraph.
+
+type: "ObjectTrackingSubgraph"
+
+input_stream: "VIDEO:input_video"
+input_stream: "DETECTIONS:new_detections"
+output_stream: "DETECTIONS:tracked_detections"
+
+# Assigns an unique id for each new detection.
+node {
+  calculator: "DetectionUniqueIdCalculator"
+  input_stream: "DETECTIONS:new_detections"
+  output_stream: "DETECTIONS:detections_with_id"
+}
+
+# Converts detections to TimedBox protos which are used as initial location
+# for tracking.
+node {
+  calculator: "DetectionsToTimedBoxListCalculator"
+  input_stream: "DETECTIONS:detections_with_id"
+  output_stream: "BOXES:start_pos"
+}
+
+# Subgraph that tracks boxes (see box_tracking.pbtxt).
+node {
+  calculator: "BoxTrackingSubgraph"
+  input_stream: "VIDEO:input_video"
+  input_stream: "BOXES:start_pos"
+  input_stream: "CANCEL_ID:cancel_object_id"
+  output_stream: "BOXES:boxes"
+}
+
+# Managers new detected objects and objects that are being tracked.
+# It associates the duplicated detections and updates the locations of
+# detections from tracking.
+node: {
+  calculator: "TrackedDetectionManagerCalculator"
+  input_stream: "DETECTIONS:detections_with_id"
+  input_stream: "TRACKING_BOXES:boxes"
+  output_stream: "DETECTIONS:tracked_detections"
+  output_stream: "CANCEL_OBJECT_ID:cancel_object_id"
+
+  input_stream_handler {
+    input_stream_handler: "SyncSetInputStreamHandler"
+    options {
+      [mediapipe.SyncSetInputStreamHandlerOptions.ext] {
+        sync_set {
+          tag_index: "TRACKING_BOXES"
+        }
+        sync_set {
+          tag_index: "DETECTIONS"
+        }
+      }
+    }
+  }
+}
+
+```
+
+### Box Tracking Subgraph
+
+![box_tracking_subgraph](images/mobile/box_tracking_subgraph.png)
+
+[Source pbtxt file](https://github.com/google/mediapipe/tree/master/mediapipe/graphs/tracking/subgraphs/box_tracking.pbtxt)
+
+```bash
+# MediaPipe box tracking subgraph.
+
+type: "BoxTrackingSubgraph"
+
+input_stream: "VIDEO:input_video"
+input_stream: "BOXES:start_pos"
+input_stream: "CANCEL_ID:cancel_object_id"
+output_stream: "BOXES:boxes"
+
+node: {
+  calculator: "ImageTransformationCalculator"
+  input_stream: "IMAGE_GPU:input_video"
+  output_stream: "IMAGE_GPU:downscaled_input_video"
+  node_options: {
+    [type.googleapis.com/mediapipe.ImageTransformationCalculatorOptions] {
+      output_width: 240
+      output_height: 320
+    }
+  }
+}
+
+# Converts GPU buffer to ImageFrame for processing tracking.
+node: {
+  calculator: "GpuBufferToImageFrameCalculator"
+  input_stream: "downscaled_input_video"
+  output_stream: "downscaled_input_video_cpu"
+}
+
+# Performs motion analysis on an incoming video stream.
+node: {
+  calculator: "MotionAnalysisCalculator"
+  input_stream: "VIDEO:downscaled_input_video_cpu"
+  output_stream: "CAMERA:camera_motion"
+  output_stream: "FLOW:region_flow"
+
+  node_options: {
+    [type.googleapis.com/mediapipe.MotionAnalysisCalculatorOptions]: {
+      analysis_options {
+        analysis_policy: ANALYSIS_POLICY_CAMERA_MOBILE
+        flow_options {
+          fast_estimation_min_block_size: 100
+          top_inlier_sets: 1
+          frac_inlier_error_threshold: 3e-3
+          downsample_mode: DOWNSAMPLE_TO_INPUT_SIZE
+          verification_distance: 5.0
+          verify_long_feature_acceleration: true
+          verify_long_feature_trigger_ratio: 0.1
+          tracking_options {
+            max_features: 500
+            adaptive_extraction_levels: 2
+            min_eig_val_settings {
+              adaptive_lowest_quality_level: 2e-4
+            }
+            klt_tracker_implementation: KLT_OPENCV
+          }
+        }
+      }
+    }
+  }
+}
+
+# Reads optical flow fields defined in
+# mediapipe/framework/formats/motion/optical_flow_field.h,
+# returns a VideoFrame with 2 channels (v_x and v_y), each channel is quantized
+# to 0-255.
+node: {
+  calculator: "FlowPackagerCalculator"
+  input_stream: "FLOW:region_flow"
+  input_stream: "CAMERA:camera_motion"
+  output_stream: "TRACKING:tracking_data"
+
+  node_options: {
+    [type.googleapis.com/mediapipe.FlowPackagerCalculatorOptions]: {
+      flow_packager_options: {
+        binary_tracking_data_support: false
+      }
+    }
+  }
+}
+
+# Tracks box positions over time.
+node: {
+  calculator: "BoxTrackerCalculator"
+  input_stream: "TRACKING:tracking_data"
+  input_stream: "TRACK_TIME:input_video"
+  input_stream: "START_POS:start_pos"
+  input_stream: "CANCEL_OBJECT_ID:cancel_object_id"
+  input_stream_info: {
+    tag_index: "CANCEL_OBJECT_ID"
+    back_edge: true
+  }
+  output_stream: "BOXES:boxes"
+
+  input_stream_handler {
+    input_stream_handler: "SyncSetInputStreamHandler"
+    options {
+      [mediapipe.SyncSetInputStreamHandlerOptions.ext] {
+        sync_set {
+          tag_index: "TRACKING"
+          tag_index: "TRACK_TIME"
+        }
+        sync_set {
+          tag_index: "START_POS"
+        }
+        sync_set {
+          tag_index: "CANCEL_OBJECT_ID"
+        }
+      }
+    }
+  }
+
+  node_options: {
+    [type.googleapis.com/mediapipe.BoxTrackerCalculatorOptions]: {
+      tracker_options: {
+        track_step_options {
+          track_object_and_camera: true
+          tracking_degrees: TRACKING_DEGREE_OBJECT_SCALE
+          inlier_spring_force: 0.0
+          static_motion_temporal_ratio: 3e-2
+        }
+      }
+      visualize_tracking_data: false
+      streaming_track_data_cache_size: 100
+    }
+  }
+}
+```
+
+### Renderer Subgraph
+
+![hand_renderer_gpu_subgraph.pbtxt](images/mobile/object_tracking_renderer_gpu_subgraph.png)
+
+[Source pbtxt file](https://github.com/google/mediapipe/tree/master/mediapipe/graphs/tracking/subgraphs/renderer_gpu.pbtxt)
+
+```bash
+# MediaPipe object tracking rendering subgraph.
+
+type: "RendererSubgraph"
+
+input_stream: "IMAGE:input_image"
+input_stream: "DETECTIONS:detections"
+output_stream: "IMAGE:output_image"
+
+# Converts the detections to drawing primitives for annotation overlay.
+node {
+  calculator: "DetectionsToRenderDataCalculator"
+  input_stream: "DETECTIONS:detections"
+  output_stream: "RENDER_DATA:detections_render_data"
+  node_options: {
+    [type.googleapis.com/mediapipe.DetectionsToRenderDataCalculatorOptions] {
+      thickness: 4.0
+      color { r: 255 g: 0 b: 0 }
+      render_detection_id: true
+    }
+  }
+}
+
+# Draws annotations and overlays them on top of the input images.
+node {
+  calculator: "AnnotationOverlayCalculator"
+  input_stream: "INPUT_FRAME_GPU:input_image"
+  input_stream: "detections_render_data"
+  output_stream: "OUTPUT_FRAME_GPU:output_image"
+}
+
+```
diff --git a/mediapipe/examples/android/src/java/com/google/mediapipe/apps/objecttrackinggpu/AndroidManifest.xml b/mediapipe/examples/android/src/java/com/google/mediapipe/apps/objecttrackinggpu/AndroidManifest.xml
new file mode 100644
index 000000000..2c3693a8f
--- /dev/null
+++ b/mediapipe/examples/android/src/java/com/google/mediapipe/apps/objecttrackinggpu/AndroidManifest.xml
@@ -0,0 +1,33 @@
+<?xml version="1.0" encoding="utf-8"?>
+<manifest xmlns:android="http://schemas.android.com/apk/res/android"
+    package="com.google.mediapipe.apps.objecttrackinggpu">
+
+  <uses-sdk
+      android:minSdkVersion="21"
+      android:targetSdkVersion="27" />
+
+  <!-- For using the camera -->
+  <uses-permission android:name="android.permission.CAMERA" />
+  <uses-feature android:name="android.hardware.camera" />
+  <uses-feature android:name="android.hardware.camera.autofocus" />
+  <!-- For MediaPipe -->
+  <uses-feature android:glEsVersion="0x00020000" android:required="true" />
+
+
+  <application
+      android:allowBackup="true"
+      android:label="@string/app_name"
+      android:supportsRtl="true"
+      android:theme="@style/AppTheme">
+      <activity
+          android:name=".MainActivity"
+          android:exported="true"
+          android:screenOrientation="portrait">
+          <intent-filter>
+              <action android:name="android.intent.action.MAIN" />
+              <category android:name="android.intent.category.LAUNCHER" />
+          </intent-filter>
+      </activity>
+  </application>
+
+</manifest>
diff --git a/mediapipe/examples/android/src/java/com/google/mediapipe/apps/objecttrackinggpu/BUILD b/mediapipe/examples/android/src/java/com/google/mediapipe/apps/objecttrackinggpu/BUILD
new file mode 100644
index 000000000..be0fc0cf2
--- /dev/null
+++ b/mediapipe/examples/android/src/java/com/google/mediapipe/apps/objecttrackinggpu/BUILD
@@ -0,0 +1,81 @@
+# Copyright 2019 The MediaPipe Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+licenses(["notice"])  # Apache 2.0
+
+package(default_visibility = ["//visibility:private"])
+
+cc_binary(
+    name = "libmediapipe_jni.so",
+    linkshared = 1,
+    linkstatic = 1,
+    deps = [
+        "//mediapipe/graphs/tracking:mobile_calculators",
+        "//mediapipe/java/com/google/mediapipe/framework/jni:mediapipe_framework_jni",
+    ],
+)
+
+cc_library(
+    name = "mediapipe_jni_lib",
+    srcs = [":libmediapipe_jni.so"],
+    alwayslink = 1,
+)
+
+# Maps the binary graph to an alias (e.g., the app name) for convenience so that the alias can be
+# easily incorporated into the app via, for example,
+# MainActivity.BINARY_GRAPH_NAME = "appname.binarypb".
+genrule(
+    name = "binary_graph",
+    srcs = ["//mediapipe/graphs/tracking:mobile_gpu_binary_graph"],
+    outs = ["objecttrackinggpu.binarypb"],
+    cmd = "cp $< $@",
+)
+
+android_library(
+    name = "mediapipe_lib",
+    srcs = glob(["*.java"]),
+    assets = [
+        ":binary_graph",
+        "//mediapipe/models:ssdlite_object_detection.tflite",
+        "//mediapipe/models:ssdlite_object_detection_labelmap.txt",
+    ],
+    assets_dir = "",
+    manifest = "AndroidManifest.xml",
+    resource_files = glob(["res/**"]),
+    deps = [
+        ":mediapipe_jni_lib",
+        "//mediapipe/java/com/google/mediapipe/components:android_camerax_helper",
+        "//mediapipe/java/com/google/mediapipe/components:android_components",
+        "//mediapipe/java/com/google/mediapipe/framework:android_framework",
+        "//mediapipe/java/com/google/mediapipe/glutil",
+        "//third_party:androidx_appcompat",
+        "//third_party:androidx_constraint_layout",
+        "//third_party:androidx_legacy_support_v4",
+        "//third_party:androidx_recyclerview",
+        "//third_party:opencv",
+        "@androidx_concurrent_futures//jar",
+        "@androidx_lifecycle//jar",
+        "@com_google_guava_android//jar",
+    ],
+)
+
+android_binary(
+    name = "objecttrackinggpu",
+    manifest = "AndroidManifest.xml",
+    manifest_values = {"applicationId": "com.google.mediapipe.apps.objecttrackinggpu"},
+    multidex = "native",
+    deps = [
+        ":mediapipe_lib",
+    ],
+)
diff --git a/mediapipe/examples/android/src/java/com/google/mediapipe/apps/objecttrackinggpu/MainActivity.java b/mediapipe/examples/android/src/java/com/google/mediapipe/apps/objecttrackinggpu/MainActivity.java
new file mode 100644
index 000000000..c08e7055e
--- /dev/null
+++ b/mediapipe/examples/android/src/java/com/google/mediapipe/apps/objecttrackinggpu/MainActivity.java
@@ -0,0 +1,167 @@
+// Copyright 2019 The MediaPipe Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package com.google.mediapipe.apps.objecttrackinggpu;
+
+import android.graphics.SurfaceTexture;
+import android.os.Bundle;
+import androidx.appcompat.app.AppCompatActivity;
+import android.util.Size;
+import android.view.SurfaceHolder;
+import android.view.SurfaceView;
+import android.view.View;
+import android.view.ViewGroup;
+import com.google.mediapipe.components.CameraHelper;
+import com.google.mediapipe.components.CameraXPreviewHelper;
+import com.google.mediapipe.components.ExternalTextureConverter;
+import com.google.mediapipe.components.FrameProcessor;
+import com.google.mediapipe.components.PermissionHelper;
+import com.google.mediapipe.framework.AndroidAssetUtil;
+import com.google.mediapipe.glutil.EglManager;
+
+/** Main activity of MediaPipe example apps. */
+public class MainActivity extends AppCompatActivity {
+  private static final String TAG = "MainActivity";
+
+  private static final String BINARY_GRAPH_NAME = "objecttrackinggpu.binarypb";
+  private static final String INPUT_VIDEO_STREAM_NAME = "input_video";
+  private static final String OUTPUT_VIDEO_STREAM_NAME = "output_video";
+  private static final CameraHelper.CameraFacing CAMERA_FACING = CameraHelper.CameraFacing.BACK;
+
+  // Flips the camera-preview frames vertically before sending them into FrameProcessor to be
+  // processed in a MediaPipe graph, and flips the processed frames back when they are displayed.
+  // This is needed because OpenGL represents images assuming the image origin is at the bottom-left
+  // corner, whereas MediaPipe in general assumes the image origin is at top-left.
+  private static final boolean FLIP_FRAMES_VERTICALLY = true;
+
+  static {
+    // Load all native libraries needed by the app.
+    System.loadLibrary("mediapipe_jni");
+    System.loadLibrary("opencv_java3");
+  }
+
+  // {@link SurfaceTexture} where the camera-preview frames can be accessed.
+  private SurfaceTexture previewFrameTexture;
+  // {@link SurfaceView} that displays the camera-preview frames processed by a MediaPipe graph.
+  private SurfaceView previewDisplayView;
+
+  // Creates and manages an {@link EGLContext}.
+  private EglManager eglManager;
+  // Sends camera-preview frames into a MediaPipe graph for processing, and displays the processed
+  // frames onto a {@link Surface}.
+  private FrameProcessor processor;
+  // Converts the GL_TEXTURE_EXTERNAL_OES texture from Android camera into a regular texture to be
+  // consumed by {@link FrameProcessor} and the underlying MediaPipe graph.
+  private ExternalTextureConverter converter;
+
+  // Handles camera access via the {@link CameraX} Jetpack support library.
+  private CameraXPreviewHelper cameraHelper;
+
+  @Override
+  protected void onCreate(Bundle savedInstanceState) {
+    super.onCreate(savedInstanceState);
+    setContentView(R.layout.activity_main);
+
+    previewDisplayView = new SurfaceView(this);
+    setupPreviewDisplayView();
+
+    // Initialize asset manager so that MediaPipe native libraries can access the app assets, e.g.,
+    // binary graphs.
+    AndroidAssetUtil.initializeNativeAssetManager(this);
+
+    eglManager = new EglManager(null);
+    processor =
+        new FrameProcessor(
+            this,
+            eglManager.getNativeContext(),
+            BINARY_GRAPH_NAME,
+            INPUT_VIDEO_STREAM_NAME,
+            OUTPUT_VIDEO_STREAM_NAME);
+    processor.getVideoSurfaceOutput().setFlipY(FLIP_FRAMES_VERTICALLY);
+
+    PermissionHelper.checkAndRequestCameraPermissions(this);
+  }
+
+  @Override
+  protected void onResume() {
+    super.onResume();
+    converter = new ExternalTextureConverter(eglManager.getContext());
+    converter.setFlipY(FLIP_FRAMES_VERTICALLY);
+    converter.setConsumer(processor);
+    if (PermissionHelper.cameraPermissionsGranted(this)) {
+      startCamera();
+    }
+  }
+
+  @Override
+  protected void onPause() {
+    super.onPause();
+    converter.close();
+  }
+
+  @Override
+  public void onRequestPermissionsResult(
+      int requestCode, String[] permissions, int[] grantResults) {
+    super.onRequestPermissionsResult(requestCode, permissions, grantResults);
+    PermissionHelper.onRequestPermissionsResult(requestCode, permissions, grantResults);
+  }
+
+  private void setupPreviewDisplayView() {
+    previewDisplayView.setVisibility(View.GONE);
+    ViewGroup viewGroup = findViewById(R.id.preview_display_layout);
+    viewGroup.addView(previewDisplayView);
+
+    previewDisplayView
+        .getHolder()
+        .addCallback(
+            new SurfaceHolder.Callback() {
+              @Override
+              public void surfaceCreated(SurfaceHolder holder) {
+                processor.getVideoSurfaceOutput().setSurface(holder.getSurface());
+              }
+
+              @Override
+              public void surfaceChanged(SurfaceHolder holder, int format, int width, int height) {
+                // (Re-)Compute the ideal size of the camera-preview display (the area that the
+                // camera-preview frames get rendered onto, potentially with scaling and rotation)
+                // based on the size of the SurfaceView that contains the display.
+                Size viewSize = new Size(width, height);
+                Size displaySize = cameraHelper.computeDisplaySizeFromViewSize(viewSize);
+
+                // Connect the converter to the camera-preview frames as its input (via
+                // previewFrameTexture), and configure the output width and height as the computed
+                // display size.
+                converter.setSurfaceTextureAndAttachToGLContext(
+                    previewFrameTexture, displaySize.getWidth(), displaySize.getHeight());
+              }
+
+              @Override
+              public void surfaceDestroyed(SurfaceHolder holder) {
+                processor.getVideoSurfaceOutput().setSurface(null);
+              }
+            });
+  }
+
+  private void startCamera() {
+    cameraHelper = new CameraXPreviewHelper();
+    cameraHelper.setOnCameraStartedListener(
+        surfaceTexture -> {
+          previewFrameTexture = surfaceTexture;
+          // Make the display view visible to start showing the preview. This triggers the
+          // SurfaceHolder.Callback added to (the holder of) previewDisplayView.
+          previewDisplayView.setVisibility(View.VISIBLE);
+        });
+    cameraHelper.startCamera(this, CAMERA_FACING, /*surfaceTexture=*/ null);
+  }
+}
diff --git a/mediapipe/examples/android/src/java/com/google/mediapipe/apps/objecttrackinggpu/res/layout/activity_main.xml b/mediapipe/examples/android/src/java/com/google/mediapipe/apps/objecttrackinggpu/res/layout/activity_main.xml
new file mode 100644
index 000000000..c19d7e628
--- /dev/null
+++ b/mediapipe/examples/android/src/java/com/google/mediapipe/apps/objecttrackinggpu/res/layout/activity_main.xml
@@ -0,0 +1,20 @@
+<?xml version="1.0" encoding="utf-8"?>
+<androidx.constraintlayout.widget.ConstraintLayout xmlns:android="http://schemas.android.com/apk/res/android"
+    xmlns:app="http://schemas.android.com/apk/res-auto"
+    xmlns:tools="http://schemas.android.com/tools"
+    android:layout_width="match_parent"
+    android:layout_height="match_parent">
+
+    <FrameLayout
+        android:id="@+id/preview_display_layout"
+        android:layout_width="fill_parent"
+        android:layout_height="fill_parent"
+        android:layout_weight="1">
+        <TextView
+            android:id="@+id/no_camera_access_view"
+            android:layout_height="fill_parent"
+            android:layout_width="fill_parent"
+            android:gravity="center"
+            android:text="@string/no_camera_access" />
+    </FrameLayout>
+</androidx.constraintlayout.widget.ConstraintLayout>
diff --git a/mediapipe/examples/android/src/java/com/google/mediapipe/apps/objecttrackinggpu/res/values/colors.xml b/mediapipe/examples/android/src/java/com/google/mediapipe/apps/objecttrackinggpu/res/values/colors.xml
new file mode 100644
index 000000000..69b22338c
--- /dev/null
+++ b/mediapipe/examples/android/src/java/com/google/mediapipe/apps/objecttrackinggpu/res/values/colors.xml
@@ -0,0 +1,6 @@
+<?xml version="1.0" encoding="utf-8"?>
+<resources>
+    <color name="colorPrimary">#008577</color>
+    <color name="colorPrimaryDark">#00574B</color>
+    <color name="colorAccent">#D81B60</color>
+</resources>
diff --git a/mediapipe/examples/android/src/java/com/google/mediapipe/apps/objecttrackinggpu/res/values/strings.xml b/mediapipe/examples/android/src/java/com/google/mediapipe/apps/objecttrackinggpu/res/values/strings.xml
new file mode 100644
index 000000000..577828cb4
--- /dev/null
+++ b/mediapipe/examples/android/src/java/com/google/mediapipe/apps/objecttrackinggpu/res/values/strings.xml
@@ -0,0 +1,4 @@
+<resources>
+    <string name="app_name" translatable="false">Object Tracking GPU</string>
+    <string name="no_camera_access" translatable="false">Please grant camera permissions.</string>
+</resources>
diff --git a/mediapipe/examples/android/src/java/com/google/mediapipe/apps/objecttrackinggpu/res/values/styles.xml b/mediapipe/examples/android/src/java/com/google/mediapipe/apps/objecttrackinggpu/res/values/styles.xml
new file mode 100644
index 000000000..5885930df
--- /dev/null
+++ b/mediapipe/examples/android/src/java/com/google/mediapipe/apps/objecttrackinggpu/res/values/styles.xml
@@ -0,0 +1,11 @@
+<resources>
+
+    <!-- Base application theme. -->
+    <style name="AppTheme" parent="Theme.AppCompat.Light.DarkActionBar">
+        <!-- Customize your theme here. -->
+        <item name="colorPrimary">@color/colorPrimary</item>
+        <item name="colorPrimaryDark">@color/colorPrimaryDark</item>
+        <item name="colorAccent">@color/colorAccent</item>
+    </style>
+
+</resources>
diff --git a/mediapipe/examples/desktop/media_sequence/kinetics_dataset.py b/mediapipe/examples/desktop/media_sequence/kinetics_dataset.py
index 93c62d54e..b2bb0fa55 100644
--- a/mediapipe/examples/desktop/media_sequence/kinetics_dataset.py
+++ b/mediapipe/examples/desktop/media_sequence/kinetics_dataset.py
@@ -93,15 +93,15 @@ FILEPATTERN = "kinetics_700_%s_25fps_rgb_flow"
 SPLITS = {
     "train": {
         "shards": 1000,
-        "examples": 541490
+        "examples": 541279
     },
     "validate": {
         "shards": 100,
-        "examples": 34715
+        "examples": 34688
     },
     "test": {
         "shards": 100,
-        "examples": 69321
+        "examples": 69278
     },
     "custom": {
         "csv": None,  # Add a CSV for your own data here.
diff --git a/mediapipe/framework/calculator_graph_event_loop_test.cc b/mediapipe/framework/calculator_graph_event_loop_test.cc
index 599d079a1..915b02e4a 100644
--- a/mediapipe/framework/calculator_graph_event_loop_test.cc
+++ b/mediapipe/framework/calculator_graph_event_loop_test.cc
@@ -541,5 +541,90 @@ TEST_F(CalculatorGraphEventLoopTest, WaitToAddPacketToInputStream) {
   ASSERT_EQ(kNumInputPackets, output_packets_.size());
 }
 
+// Captures log messages during testing.
+class TextMessageLogSink : public LogSink {
+ public:
+  std::vector<std::string> messages;
+  void Send(const LogEntry& entry) {
+    messages.push_back(std::string(entry.text_message()));
+  }
+};
+
+// Verifies that CalculatorGraph::UnthrottleSources does not run repeatedly
+// in a "busy-loop" while the graph is throttled due to a graph-output stream.
+TEST_F(CalculatorGraphEventLoopTest, UnthrottleSources) {
+  CalculatorGraphConfig graph_config;
+  ASSERT_TRUE(proto_ns::TextFormat::ParseFromString(
+      R"(
+          node {
+            calculator: "PassThroughCalculator"
+            input_stream: "input_numbers"
+            output_stream: "output_numbers"
+          }
+          input_stream: "input_numbers"
+          output_stream: "output_numbers"
+          num_threads: 2
+          max_queue_size: 5
+      )",
+      &graph_config));
+  constexpr int kQueueSize = 5;
+
+  // Initialize and start the mediapipe graph.
+  CalculatorGraph graph;
+  MP_ASSERT_OK(graph.Initialize(graph_config));
+  graph.SetGraphInputStreamAddMode(
+      CalculatorGraph::GraphInputStreamAddMode::ADD_IF_NOT_FULL);
+  auto poller_status = graph.AddOutputStreamPoller("output_numbers");
+  MP_ASSERT_OK(poller_status.status());
+  mediapipe::OutputStreamPoller& poller = poller_status.ValueOrDie();
+  poller.SetMaxQueueSize(kQueueSize);
+  MP_ASSERT_OK(graph.StartRun({}));
+
+  // Lambda that adds a packet to the calculator graph.
+  auto add_packet = [&graph](std::string s, int i) {
+    return graph.AddPacketToInputStream(s, MakePacket<int>(i).At(Timestamp(i)));
+  };
+
+  // Start capturing VLOG messages from the mediapipe::Scheduler.
+  TextMessageLogSink log_listener;
+  mediapipe::AddLogSink(&log_listener);
+  SetVLOGLevel("scheduler", 3);
+
+  // Add just enough packets to fill the output stream queue.
+  std::vector<Packet> out_packets;
+  for (int i = 0; i < kQueueSize; ++i) {
+    MP_EXPECT_OK(add_packet("input_numbers", i));
+    MP_EXPECT_OK(graph.WaitUntilIdle());
+  }
+
+  // The graph is throttled due to the full output stream.
+  EXPECT_FALSE(add_packet("input_numbers", kQueueSize).ok());
+
+  // CalculatorGraph::UnthrottleSources should be called just one time.
+  absl::SleepFor(absl::Milliseconds(100));
+
+  // Read all packets from the output stream queue and close the graph.
+  for (int i = 0; i < kQueueSize; ++i) {
+    Packet packet;
+    EXPECT_TRUE(poller.Next(&packet));
+    out_packets.push_back(packet);
+  }
+  MP_EXPECT_OK(graph.CloseAllInputStreams());
+  MP_EXPECT_OK(graph.WaitUntilDone());
+  EXPECT_EQ(kQueueSize, out_packets.size());
+
+  // Stop capturing VLOG messages.
+  SetVLOGLevel("scheduler", 0);
+  mediapipe::RemoveLogSink(&log_listener);
+
+  // Count and validate the calls to UnthrottleSources.
+  int loop_count = 0;
+  for (auto& message : log_listener.messages) {
+    loop_count += (message == "HandleIdle: unthrottling") ? 1 : 0;
+  }
+  EXPECT_GE(loop_count, 1);
+  EXPECT_LE(loop_count, 2);
+}
+
 }  // namespace
 }  // namespace mediapipe
diff --git a/mediapipe/framework/formats/BUILD b/mediapipe/framework/formats/BUILD
index f831bb3fd..c42621748 100644
--- a/mediapipe/framework/formats/BUILD
+++ b/mediapipe/framework/formats/BUILD
@@ -241,7 +241,9 @@ cc_test(
 proto_library(
     name = "rect_proto",
     srcs = ["rect.proto"],
-    visibility = ["//visibility:public"],
+    visibility = [
+        "//visibility:public",
+    ],
 )
 
 mediapipe_cc_proto_library(
@@ -253,10 +255,21 @@ mediapipe_cc_proto_library(
     deps = [":rect_proto"],
 )
 
+java_lite_proto_library(
+    name = "rect_java_proto_lite",
+    strict_deps = 0,
+    visibility = [
+        "//mediapipe:__subpackages__",
+    ],
+    deps = [":landmark_proto"],
+)
+
 proto_library(
     name = "landmark_proto",
     srcs = ["landmark.proto"],
-    visibility = ["//visibility:public"],
+    visibility = [
+        "//visibility:public",
+    ],
 )
 
 mediapipe_cc_proto_library(
@@ -269,7 +282,9 @@ mediapipe_cc_proto_library(
 java_lite_proto_library(
     name = "landmark_java_proto_lite",
     strict_deps = 0,
-    visibility = ["//mediapipe:__subpackages__"],
+    visibility = [
+        "//mediapipe:__subpackages__",
+    ],
     deps = [":landmark_proto"],
 )
 
diff --git a/mediapipe/framework/port/BUILD b/mediapipe/framework/port/BUILD
index 18d46e1c3..3e0fa8630 100644
--- a/mediapipe/framework/port/BUILD
+++ b/mediapipe/framework/port/BUILD
@@ -176,6 +176,7 @@ cc_library(
     deps = [
         "//mediapipe/framework:port",
         "@com_github_glog_glog//:glog",
+        "@com_google_absl//absl/time",
     ],
 )
 
diff --git a/mediapipe/framework/port/logging.h b/mediapipe/framework/port/logging.h
index a603c3761..526c045ec 100644
--- a/mediapipe/framework/port/logging.h
+++ b/mediapipe/framework/port/logging.h
@@ -15,6 +15,50 @@
 #ifndef MEDIAPIPE_PORT_LOGGING_H_
 #define MEDIAPIPE_PORT_LOGGING_H_
 
+#include "absl/time/time.h"
 #include "glog/logging.h"
 
+namespace mediapipe {
+using LogSeverity = google::LogSeverity;
+const auto SetVLOGLevel = google::SetVLOGLevel;
+class LogEntry {
+ public:
+  LogEntry(LogSeverity severity, const struct ::tm* tm_time,
+           absl::string_view message)
+      : severity_(severity),
+        timestamp_(absl::FromTM(*tm_time, absl::LocalTimeZone())),
+        text_message_(message) {}
+  LogSeverity log_severity() const { return severity_; }
+  absl::Time timestamp() const { return timestamp_; }
+  absl::string_view text_message() const { return text_message_; }
+
+ private:
+  LogSeverity severity_;
+  absl::Time timestamp_;
+  absl::string_view text_message_;
+};
+class LogSink : public google::LogSink {
+ public:
+  virtual ~LogSink() = default;
+  virtual void Send(const LogEntry& entry) = 0;
+  virtual void WaitTillSent() {}
+
+ private:
+  virtual void send(LogSeverity severity, const char* full_filename,
+                    const char* base_filename, int line,
+                    const struct ::tm* tm_time, const char* message,
+                    size_t message_len) {
+    LogEntry log_entry(severity, tm_time,
+                       absl::string_view(message, message_len));
+    Send(log_entry);
+  }
+};
+inline void AddLogSink(LogSink* destination) {
+  google::AddLogSink(destination);
+}
+inline void RemoveLogSink(LogSink* destination) {
+  google::RemoveLogSink(destination);
+}
+}  // namespace mediapipe
+
 #endif  // MEDIAPIPE_PORT_LOGGING_H_
diff --git a/mediapipe/graphs/object_detection/object_detection_desktop_lstd_tflite_graph.pbtxt b/mediapipe/graphs/object_detection/object_detection_desktop_lstd_tflite_graph.pbtxt
new file mode 100644
index 000000000..1cbba49d0
--- /dev/null
+++ b/mediapipe/graphs/object_detection/object_detection_desktop_lstd_tflite_graph.pbtxt
@@ -0,0 +1,176 @@
+# MediaPipe graph that performs LSTM based object detection on desktop with
+# TensorFlow Lite on CPU.
+
+# max_queue_size limits the number of packets enqueued on any input stream
+# by throttling inputs to the graph. This makes the graph only process one
+# frame per time.
+max_queue_size: 1
+
+# Decodes an input video file into images and a video header.
+node {
+  calculator: "OpenCvVideoDecoderCalculator"
+  input_side_packet: "INPUT_FILE_PATH:input_video_path"
+  output_stream: "VIDEO:input_video"
+  output_stream: "VIDEO_PRESTREAM:input_video_header"
+}
+
+# Transforms the input image on CPU to a 256x256 image. To scale the image, by
+# default it uses the STRETCH scale mode that maps the entire input image to the
+# entire transformed image. As a result, image aspect ratio may be changed and
+# objects in the image may be deformed (stretched or squeezed), but the object
+# detection model used in this graph is agnostic to that deformation.
+node: {
+  calculator: "ImageTransformationCalculator"
+  input_stream: "IMAGE:input_video"
+  output_stream: "IMAGE:transformed_input_video"
+  node_options: {
+    [type.googleapis.com/mediapipe.ImageTransformationCalculatorOptions] {
+      output_width: 256
+      output_height: 256
+    }
+  }
+}
+
+# Converts the transformed input image on CPU into an image tensor as a
+# TfLiteTensor. The zero_center option is set to true to normalize the
+# pixel values to [-1.f, 1.f] as opposed to [0.f, 1.f].
+node {
+  calculator: "TfLiteConverterCalculator"
+  input_stream: "IMAGE:transformed_input_video"
+  output_stream: "TENSORS:image_tensor"
+  node_options: {
+    [type.googleapis.com/mediapipe.TfLiteConverterCalculatorOptions] {
+      zero_center: true
+      use_quantized_tensors: true
+    }
+  }
+}
+
+# Joins the input image_tensor and previous lstm state tensors into a single
+# output vector of tensors.
+node {
+  calculator: "ConcatenateTfLiteTensorVectorCalculator"
+  input_stream: "image_tensor"
+  input_stream: "prev_lstm_tensors"
+  output_stream: "concatenated_tensors"
+}
+
+# Runs a TensorFlow Lite model on CPU that takes an image tensor and state
+# tensors, then outputs a vector of tensors representing, for instance,
+# detection boxes/keypoints and scores.
+node {
+  calculator: "TfLiteInferenceCalculator"
+  input_stream: "TENSORS:concatenated_tensors"
+  output_stream: "TENSORS:inference_tensors"
+  node_options: {
+    [type.googleapis.com/mediapipe.TfLiteInferenceCalculatorOptions] {
+      model_path: "mediapipe/models/lstdlite_object_detection.tflite"
+    }
+  }
+}
+
+# Splits up the output tensors into two streams [detection_boxes,
+# detection_classes, detection_scores, num_detections], and [lstm_c, lstm_h].
+node {
+  calculator: "SplitTfLiteTensorVectorCalculator"
+  input_stream: "inference_tensors"
+  output_stream: "detection_tensors"
+  output_stream: "lstm_tensors"
+  options {
+    [type.googleapis.com/mediapipe.SplitVectorCalculatorOptions] {
+      ranges: { begin: 0 end: 4 }
+      ranges: { begin: 4 end: 6 }
+    }
+  }
+}
+
+# Waits for the LSTM state output from the TfLite inferencing to feed back in as
+# an input tensor for the next frame.
+node {
+  calculator: "PreviousLoopbackCalculator"
+  input_stream: "MAIN:transformed_input_video"
+  input_stream: "LOOP:lstm_tensors"
+  input_stream_info: { tag_index: "LOOP" back_edge: true }
+  output_stream: "PREV_LOOP:prev_lstm_tensors"
+}
+
+# Decodes the detection tensors generated by the TensorFlow Lite model, based on
+# the specification in the options, into a vector of detections. Each detection
+# describes a detected object.
+node {
+  calculator: "TfLiteTensorsToDetectionsCalculator"
+  input_stream: "TENSORS:detection_tensors"
+  output_stream: "DETECTIONS:detections"
+  node_options: {
+    [type.googleapis.com/mediapipe.TfLiteTensorsToDetectionsCalculatorOptions] {
+      num_classes: 7
+      num_boxes: 40
+      num_coords: 4
+    }
+  }
+}
+
+# Performs non-max suppression to remove excessive detections.
+node {
+  calculator: "NonMaxSuppressionCalculator"
+  input_stream: "detections"
+  output_stream: "filtered_detections"
+  node_options: {
+    [type.googleapis.com/mediapipe.NonMaxSuppressionCalculatorOptions] {
+      min_suppression_threshold: 0.4
+      min_score_threshold: 0.2
+      max_num_detections: 5
+      overlap_type: INTERSECTION_OVER_UNION
+    }
+  }
+}
+
+# Maps detection label IDs to the corresponding label text. The label map is
+# provided in the label_map_path option.
+node {
+  calculator: "DetectionLabelIdToTextCalculator"
+  input_stream: "filtered_detections"
+  output_stream: "output_detections"
+  node_options: {
+    [type.googleapis.com/mediapipe.DetectionLabelIdToTextCalculatorOptions] {
+      label_map_path: "mediapipe/models/lstdlite_object_detection_labelmap.txt"
+    }
+  }
+}
+
+# Converts the detections to drawing primitives for annotation overlay.
+node {
+  calculator: "DetectionsToRenderDataCalculator"
+  input_stream: "DETECTIONS:output_detections"
+  output_stream: "RENDER_DATA:render_data"
+  node_options: {
+    [type.googleapis.com/mediapipe.DetectionsToRenderDataCalculatorOptions] {
+      thickness: 2.0
+      color { r: 255 g: 255 b: 255 }
+    }
+  }
+}
+
+# Draws annotations and overlays them on top of the original image coming into
+# the graph.
+node {
+  calculator: "AnnotationOverlayCalculator"
+  input_stream: "INPUT_FRAME:input_video"
+  input_stream: "render_data"
+  output_stream: "OUTPUT_FRAME:output_video"
+}
+
+# Encodes the annotated images into a video file, adopting properties specified
+# in the input video header, e.g., video framerate.
+node {
+  calculator: "OpenCvVideoEncoderCalculator"
+  input_stream: "VIDEO:output_video"
+  input_stream: "VIDEO_PRESTREAM:input_video_header"
+  input_side_packet: "OUTPUT_FILE_PATH:output_video_path"
+  node_options: {
+    [type.googleapis.com/mediapipe.OpenCvVideoEncoderCalculatorOptions]: {
+      codec: "avc1"
+      video_format: "mp4"
+    }
+  }
+}
diff --git a/mediapipe/graphs/tracking/BUILD b/mediapipe/graphs/tracking/BUILD
new file mode 100644
index 000000000..0a8054a81
--- /dev/null
+++ b/mediapipe/graphs/tracking/BUILD
@@ -0,0 +1,39 @@
+# Copyright 2019 The MediaPipe Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+load(
+    "//mediapipe/framework/tool:mediapipe_graph.bzl",
+    "mediapipe_binary_graph",
+)
+
+licenses(["notice"])  # Apache 2.0
+
+package(default_visibility = ["//visibility:public"])
+
+cc_library(
+    name = "mobile_calculators",
+    deps = [
+        "//mediapipe/calculators/core:packet_resampler_calculator",
+        "//mediapipe/graphs/tracking/subgraphs:object_detection_gpu",
+        "//mediapipe/graphs/tracking/subgraphs:object_tracking",
+        "//mediapipe/graphs/tracking/subgraphs:renderer_gpu",
+    ],
+)
+
+mediapipe_binary_graph(
+    name = "mobile_gpu_binary_graph",
+    graph = "object_detection_tracking_mobile_gpu.pbtxt",
+    output_name = "mobile_gpu.binarypb",
+    deps = [":mobile_calculators"],
+)
diff --git a/mediapipe/graphs/tracking/object_detection_tracking_mobile_gpu.pbtxt b/mediapipe/graphs/tracking/object_detection_tracking_mobile_gpu.pbtxt
new file mode 100644
index 000000000..5aebfc1f2
--- /dev/null
+++ b/mediapipe/graphs/tracking/object_detection_tracking_mobile_gpu.pbtxt
@@ -0,0 +1,46 @@
+# MediaPipe graph that performs object detection and tracking with TensorFlow
+# Lite on GPU.
+# Used in the examples in
+# mediapipie/examples/android/src/java/com/mediapipe/apps/objecttrackinggpu
+
+# Images on GPU coming into and out of the graph.
+input_stream: "input_video"
+output_stream: "output_video"
+
+# Resamples the images by specific frame rate. This calculator is used to
+# control the frequecy of subsequent calculators/subgraphs, e.g. less power
+# consumption for expensive process.
+node {
+  calculator: "PacketResamplerCalculator"
+  input_stream: "DATA:input_video"
+  output_stream: "DATA:throttled_input_video"
+  node_options: {
+    [type.googleapis.com/mediapipe.PacketResamplerCalculatorOptions] {
+      frame_rate: 0.5
+    }
+  }
+}
+
+# Subgraph that detections objects (see object_detection_gpu.pbtxt).
+node {
+  calculator: "ObjectDetectionSubgraph"
+  input_stream: "IMAGE:throttled_input_video"
+  output_stream: "DETECTIONS:output_detections"
+}
+
+# Subgraph that tracks objects (see object_tracking.pbtxt).
+node {
+  calculator: "ObjectTrackingSubgraph"
+  input_stream: "VIDEO:input_video"
+  input_stream: "DETECTIONS:output_detections"
+  output_stream: "DETECTIONS:tracked_detections"
+}
+
+# Subgraph that renders annotations and overlays them on top of the input
+# images (see renderer_gpu.pbtxt).
+node {
+  calculator: "RendererSubgraph"
+  input_stream: "IMAGE:input_video"
+  input_stream: "DETECTIONS:tracked_detections"
+  output_stream: "IMAGE:output_video"
+}
diff --git a/mediapipe/graphs/tracking/subgraphs/BUILD b/mediapipe/graphs/tracking/subgraphs/BUILD
new file mode 100644
index 000000000..357463d9e
--- /dev/null
+++ b/mediapipe/graphs/tracking/subgraphs/BUILD
@@ -0,0 +1,76 @@
+# Copyright 2019 The MediaPipe Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+load(
+    "//mediapipe/framework/tool:mediapipe_graph.bzl",
+    "mediapipe_simple_subgraph",
+)
+
+licenses(["notice"])  # Apache 2.0
+
+package(default_visibility = ["//visibility:public"])
+
+mediapipe_simple_subgraph(
+    name = "box_tracking",
+    graph = "box_tracking.pbtxt",
+    register_as = "BoxTrackingSubgraph",
+    deps = [
+        "//mediapipe/calculators/image:image_transformation_calculator",
+        "//mediapipe/calculators/video:box_tracker_calculator",
+        "//mediapipe/calculators/video:flow_packager_calculator",
+        "//mediapipe/calculators/video:motion_analysis_calculator",
+        "//mediapipe/framework/stream_handler:immediate_input_stream_handler",
+        "//mediapipe/framework/stream_handler:sync_set_input_stream_handler",
+        "//mediapipe/gpu:gpu_buffer_to_image_frame_calculator",
+    ],
+)
+
+mediapipe_simple_subgraph(
+    name = "object_tracking",
+    graph = "object_tracking.pbtxt",
+    register_as = "ObjectTrackingSubgraph",
+    deps = [
+        "//mediapipe/calculators/util:detection_unique_id_calculator",
+        "//mediapipe/calculators/util:detections_to_timed_box_list_calculator",
+        "//mediapipe/calculators/video:tracked_detection_manager_calculator",
+        "//mediapipe/framework/stream_handler:sync_set_input_stream_handler",
+        "//mediapipe/graphs/tracking/subgraphs:box_tracking",
+    ],
+)
+
+mediapipe_simple_subgraph(
+    name = "object_detection_gpu",
+    graph = "object_detection_gpu.pbtxt",
+    register_as = "ObjectDetectionSubgraph",
+    deps = [
+        "//mediapipe/calculators/image:image_transformation_calculator",
+        "//mediapipe/calculators/tflite:ssd_anchors_calculator",
+        "//mediapipe/calculators/tflite:tflite_converter_calculator",
+        "//mediapipe/calculators/tflite:tflite_inference_calculator",
+        "//mediapipe/calculators/tflite:tflite_tensors_to_detections_calculator",
+        "//mediapipe/calculators/util:detection_label_id_to_text_calculator",
+        "//mediapipe/calculators/util:non_max_suppression_calculator",
+    ],
+)
+
+mediapipe_simple_subgraph(
+    name = "renderer_gpu",
+    graph = "renderer_gpu.pbtxt",
+    register_as = "RendererSubgraph",
+    deps = [
+        "//mediapipe/calculators/util:annotation_overlay_calculator",
+        "//mediapipe/calculators/util:detections_to_render_data_calculator",
+        "//mediapipe/calculators/util:rect_to_render_data_calculator",
+    ],
+)
diff --git a/mediapipe/graphs/tracking/subgraphs/box_tracking.pbtxt b/mediapipe/graphs/tracking/subgraphs/box_tracking.pbtxt
new file mode 100644
index 000000000..f4830fad9
--- /dev/null
+++ b/mediapipe/graphs/tracking/subgraphs/box_tracking.pbtxt
@@ -0,0 +1,126 @@
+# MediaPipe box tracking subgraph.
+
+type: "BoxTrackingSubgraph"
+
+input_stream: "VIDEO:input_video"
+input_stream: "BOXES:start_pos"
+input_stream: "CANCEL_ID:cancel_object_id"
+output_stream: "BOXES:boxes"
+
+node: {
+  calculator: "ImageTransformationCalculator"
+  input_stream: "IMAGE_GPU:input_video"
+  output_stream: "IMAGE_GPU:downscaled_input_video"
+  node_options: {
+    [type.googleapis.com/mediapipe.ImageTransformationCalculatorOptions] {
+      output_width: 240
+      output_height: 320
+    }
+  }
+}
+
+# Converts GPU buffer to ImageFrame for processing tracking.
+node: {
+  calculator: "GpuBufferToImageFrameCalculator"
+  input_stream: "downscaled_input_video"
+  output_stream: "downscaled_input_video_cpu"
+}
+
+# Performs motion analysis on an incoming video stream.
+node: {
+  calculator: "MotionAnalysisCalculator"
+  input_stream: "VIDEO:downscaled_input_video_cpu"
+  output_stream: "CAMERA:camera_motion"
+  output_stream: "FLOW:region_flow"
+
+  node_options: {
+    [type.googleapis.com/mediapipe.MotionAnalysisCalculatorOptions]: {
+      analysis_options {
+        analysis_policy: ANALYSIS_POLICY_CAMERA_MOBILE
+        flow_options {
+          fast_estimation_min_block_size: 100
+          top_inlier_sets: 1
+          frac_inlier_error_threshold: 3e-3
+          downsample_mode: DOWNSAMPLE_TO_INPUT_SIZE
+          verification_distance: 5.0
+          verify_long_feature_acceleration: true
+          verify_long_feature_trigger_ratio: 0.1
+          tracking_options {
+            max_features: 500
+            adaptive_extraction_levels: 2
+            min_eig_val_settings {
+              adaptive_lowest_quality_level: 2e-4
+            }
+            klt_tracker_implementation: KLT_OPENCV
+          }
+        }
+      }
+    }
+  }
+}
+
+# Reads optical flow fields defined in
+# mediapipe/framework/formats/motion/optical_flow_field.h,
+# returns a VideoFrame with 2 channels (v_x and v_y), each channel is quantized
+# to 0-255.
+node: {
+  calculator: "FlowPackagerCalculator"
+  input_stream: "FLOW:region_flow"
+  input_stream: "CAMERA:camera_motion"
+  output_stream: "TRACKING:tracking_data"
+
+  node_options: {
+    [type.googleapis.com/mediapipe.FlowPackagerCalculatorOptions]: {
+      flow_packager_options: {
+        binary_tracking_data_support: false
+      }
+    }
+  }
+}
+
+# Tracks box positions over time.
+node: {
+  calculator: "BoxTrackerCalculator"
+  input_stream: "TRACKING:tracking_data"
+  input_stream: "TRACK_TIME:input_video"
+  input_stream: "START_POS:start_pos"
+  input_stream: "CANCEL_OBJECT_ID:cancel_object_id"
+  input_stream_info: {
+    tag_index: "CANCEL_OBJECT_ID"
+    back_edge: true
+  }
+  output_stream: "BOXES:boxes"
+
+  input_stream_handler {
+    input_stream_handler: "SyncSetInputStreamHandler"
+    options {
+      [mediapipe.SyncSetInputStreamHandlerOptions.ext] {
+        sync_set {
+          tag_index: "TRACKING"
+          tag_index: "TRACK_TIME"
+        }
+        sync_set {
+          tag_index: "START_POS"
+        }
+        sync_set {
+          tag_index: "CANCEL_OBJECT_ID"
+        }
+      }
+    }
+  }
+
+  node_options: {
+    [type.googleapis.com/mediapipe.BoxTrackerCalculatorOptions]: {
+      tracker_options: {
+        track_step_options {
+          track_object_and_camera: true
+          tracking_degrees: TRACKING_DEGREE_OBJECT_SCALE
+          inlier_spring_force: 0.0
+          static_motion_temporal_ratio: 3e-2
+        }
+      }
+      visualize_tracking_data: false
+      streaming_track_data_cache_size: 100
+    }
+  }
+}
diff --git a/mediapipe/graphs/tracking/subgraphs/object_detection_gpu.pbtxt b/mediapipe/graphs/tracking/subgraphs/object_detection_gpu.pbtxt
new file mode 100644
index 000000000..1b40f434d
--- /dev/null
+++ b/mediapipe/graphs/tracking/subgraphs/object_detection_gpu.pbtxt
@@ -0,0 +1,128 @@
+# MediaPipe object detection subgraph.
+
+type: "ObjectDetectionSubgraph"
+
+input_stream: "IMAGE:input_video"
+output_stream: "DETECTIONS:output_detections"
+
+# Transforms the input image on GPU to a 320x320 image. To scale the image, by
+# default it uses the STRETCH scale mode that maps the entire input image to the
+# entire transformed image. As a result, image aspect ratio may be changed and
+# objects in the image may be deformed (stretched or squeezed), but the object
+# detection model used in this graph is agnostic to that deformation.
+node: {
+  calculator: "ImageTransformationCalculator"
+  input_stream: "IMAGE_GPU:input_video"
+  output_stream: "IMAGE_GPU:transformed_input_video"
+  node_options: {
+    [type.googleapis.com/mediapipe.ImageTransformationCalculatorOptions] {
+      output_width: 320
+      output_height: 320
+    }
+  }
+}
+
+# Converts the transformed input image on GPU into an image tensor stored as a
+# TfLiteTensor.
+node {
+  calculator: "TfLiteConverterCalculator"
+  input_stream: "IMAGE_GPU:transformed_input_video"
+  output_stream: "TENSORS_GPU:image_tensor"
+}
+
+# Runs a TensorFlow Lite model on GPU that takes an image tensor and outputs a
+# vector of tensors representing, for instance, detection boxes/keypoints and
+# scores.
+node {
+  calculator: "TfLiteInferenceCalculator"
+  input_stream: "TENSORS_GPU:image_tensor"
+  output_stream: "TENSORS_GPU:detection_tensors"
+  node_options: {
+    [type.googleapis.com/mediapipe.TfLiteInferenceCalculatorOptions] {
+      model_path: "mediapipe/models/ssdlite_object_detection.tflite"
+    }
+  }
+}
+
+# Generates a single side packet containing a vector of SSD anchors based on
+# the specification in the options.
+node {
+  calculator: "SsdAnchorsCalculator"
+  output_side_packet: "anchors"
+  node_options: {
+    [type.googleapis.com/mediapipe.SsdAnchorsCalculatorOptions] {
+      num_layers: 6
+      min_scale: 0.2
+      max_scale: 0.95
+      input_size_height: 320
+      input_size_width: 320
+      anchor_offset_x: 0.5
+      anchor_offset_y: 0.5
+      strides: 16
+      strides: 32
+      strides: 64
+      strides: 128
+      strides: 256
+      strides: 512
+      aspect_ratios: 1.0
+      aspect_ratios: 2.0
+      aspect_ratios: 0.5
+      aspect_ratios: 3.0
+      aspect_ratios: 0.3333
+      reduce_boxes_in_lowest_layer: true
+    }
+  }
+}
+
+# Decodes the detection tensors generated by the TensorFlow Lite model, based on
+# the SSD anchors and the specification in the options, into a vector of
+# detections. Each detection describes a detected object.
+node {
+  calculator: "TfLiteTensorsToDetectionsCalculator"
+  input_stream: "TENSORS_GPU:detection_tensors"
+  input_side_packet: "ANCHORS:anchors"
+  output_stream: "DETECTIONS:detections"
+  node_options: {
+    [type.googleapis.com/mediapipe.TfLiteTensorsToDetectionsCalculatorOptions] {
+      num_classes: 91
+      num_boxes: 2034
+      num_coords: 4
+      ignore_classes: 0
+      sigmoid_score: true
+      apply_exponential_on_box_size: true
+      x_scale: 10.0
+      y_scale: 10.0
+      h_scale: 5.0
+      w_scale: 5.0
+      min_score_thresh: 0.6
+    }
+  }
+}
+
+# Performs non-max suppression to remove excessive detections.
+node {
+  calculator: "NonMaxSuppressionCalculator"
+  input_stream: "detections"
+  output_stream: "filtered_detections"
+  node_options: {
+    [type.googleapis.com/mediapipe.NonMaxSuppressionCalculatorOptions] {
+      min_suppression_threshold: 0.4
+      max_num_detections: 3
+      overlap_type: INTERSECTION_OVER_UNION
+      return_empty_detections: true
+    }
+  }
+}
+
+# Maps detection label IDs to the corresponding label text. The label map is
+# provided in the label_map_path option.
+node {
+  calculator: "DetectionLabelIdToTextCalculator"
+  input_stream: "filtered_detections"
+  output_stream: "output_detections"
+  node_options: {
+    [type.googleapis.com/mediapipe.DetectionLabelIdToTextCalculatorOptions] {
+      label_map_path: "mediapipe/models/ssdlite_object_detection_labelmap.txt"
+    }
+  }
+}
diff --git a/mediapipe/graphs/tracking/subgraphs/object_tracking.pbtxt b/mediapipe/graphs/tracking/subgraphs/object_tracking.pbtxt
new file mode 100644
index 000000000..a7cf87fe1
--- /dev/null
+++ b/mediapipe/graphs/tracking/subgraphs/object_tracking.pbtxt
@@ -0,0 +1,56 @@
+# MediaPipe object tracking subgraph.
+
+type: "ObjectTrackingSubgraph"
+
+input_stream: "VIDEO:input_video"
+input_stream: "DETECTIONS:new_detections"
+output_stream: "DETECTIONS:tracked_detections"
+
+# Assigns an unique id for each new detection.
+node {
+  calculator: "DetectionUniqueIdCalculator"
+  input_stream: "DETECTIONS:new_detections"
+  output_stream: "DETECTIONS:detections_with_id"
+}
+
+# Converts detections to TimedBox protos which are used as initial location
+# for tracking.
+node {
+  calculator: "DetectionsToTimedBoxListCalculator"
+  input_stream: "DETECTIONS:detections_with_id"
+  output_stream: "BOXES:start_pos"
+}
+
+# Subgraph that tracks boxes (see box_tracking.pbtxt).
+node {
+  calculator: "BoxTrackingSubgraph"
+  input_stream: "VIDEO:input_video"
+  input_stream: "BOXES:start_pos"
+  input_stream: "CANCEL_ID:cancel_object_id"
+  output_stream: "BOXES:boxes"
+}
+
+# Managers new detected objects and objects that are being tracked.
+# It associates the duplicated detections and updates the locations of
+# detections from tracking.
+node: {
+  calculator: "TrackedDetectionManagerCalculator"
+  input_stream: "DETECTIONS:detections_with_id"
+  input_stream: "TRACKING_BOXES:boxes"
+  output_stream: "DETECTIONS:tracked_detections"
+  output_stream: "CANCEL_OBJECT_ID:cancel_object_id"
+
+  input_stream_handler {
+    input_stream_handler: "SyncSetInputStreamHandler"
+    options {
+      [mediapipe.SyncSetInputStreamHandlerOptions.ext] {
+        sync_set {
+          tag_index: "TRACKING_BOXES"
+        }
+        sync_set {
+          tag_index: "DETECTIONS"
+        }
+      }
+    }
+  }
+}
diff --git a/mediapipe/graphs/tracking/subgraphs/renderer_gpu.pbtxt b/mediapipe/graphs/tracking/subgraphs/renderer_gpu.pbtxt
new file mode 100644
index 000000000..9640ccabc
--- /dev/null
+++ b/mediapipe/graphs/tracking/subgraphs/renderer_gpu.pbtxt
@@ -0,0 +1,29 @@
+# MediaPipe object tracking rendering subgraph.
+
+type: "RendererSubgraph"
+
+input_stream: "IMAGE:input_image"
+input_stream: "DETECTIONS:detections"
+output_stream: "IMAGE:output_image"
+
+# Converts the detections to drawing primitives for annotation overlay.
+node {
+  calculator: "DetectionsToRenderDataCalculator"
+  input_stream: "DETECTIONS:detections"
+  output_stream: "RENDER_DATA:detections_render_data"
+  node_options: {
+    [type.googleapis.com/mediapipe.DetectionsToRenderDataCalculatorOptions] {
+      thickness: 4.0
+      color { r: 255 g: 0 b: 0 }
+      render_detection_id: true
+    }
+  }
+}
+
+# Draws annotations and overlays them on top of the input images.
+node {
+  calculator: "AnnotationOverlayCalculator"
+  input_stream: "INPUT_FRAME_GPU:input_image"
+  input_stream: "detections_render_data"
+  output_stream: "OUTPUT_FRAME_GPU:output_image"
+}
diff --git a/mediapipe/java/com/google/mediapipe/framework/MediaPipeRunner.java b/mediapipe/java/com/google/mediapipe/framework/MediaPipeRunner.java
index 39a527372..466c18915 100644
--- a/mediapipe/java/com/google/mediapipe/framework/MediaPipeRunner.java
+++ b/mediapipe/java/com/google/mediapipe/framework/MediaPipeRunner.java
@@ -50,4 +50,16 @@ public abstract class MediaPipeRunner extends Graph {
    * Stops the running graph and releases the resource. Call this in Activity onDestroy callback.
    */
   public abstract void release();
+  /**
+   * Stops the running graph and releases the resource. Call this in Activity onDestroy callback.
+   *
+   * <p>Like {@link #release()} but with a timeout. The default behavior is to call
+   * {@link #release()}. The implementation can override this method if it cares about timing out
+   * releasing resources.
+   *
+   * @param timeoutMillis the time it takes to force timeout from releasing mff context.
+   */
+  public void release(long timeoutMillis) {
+    release();
+  }
 }
diff --git a/mediapipe/util/tracking/BUILD b/mediapipe/util/tracking/BUILD
new file mode 100644
index 000000000..6eb4a16a4
--- /dev/null
+++ b/mediapipe/util/tracking/BUILD
@@ -0,0 +1,742 @@
+# Copyright 2019 The MediaPipe Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+load("//mediapipe/framework/port:build_config.bzl", "mediapipe_cc_proto_library")
+
+licenses(["notice"])  # Apache 2.0
+
+package(default_visibility = ["//visibility:public"])
+
+proto_library(
+    name = "tone_models_proto",
+    srcs = ["tone_models.proto"],
+)
+
+proto_library(
+    name = "tone_estimation_proto",
+    srcs = ["tone_estimation.proto"],
+    deps = [":tone_models_proto"],
+)
+
+proto_library(
+    name = "region_flow_computation_proto",
+    srcs = ["region_flow_computation.proto"],
+    deps = [
+        ":tone_estimation_proto",
+    ],
+)
+
+proto_library(
+    name = "motion_saliency_proto",
+    srcs = ["motion_saliency.proto"],
+)
+
+proto_library(
+    name = "motion_estimation_proto",
+    srcs = ["motion_estimation.proto"],
+)
+
+proto_library(
+    name = "motion_analysis_proto",
+    srcs = ["motion_analysis.proto"],
+    deps = [
+        ":motion_estimation_proto",
+        ":motion_saliency_proto",
+        ":region_flow_computation_proto",
+    ],
+)
+
+proto_library(
+    name = "region_flow_proto",
+    srcs = ["region_flow.proto"],
+)
+
+proto_library(
+    name = "motion_models_proto",
+    srcs = ["motion_models.proto"],
+)
+
+proto_library(
+    name = "camera_motion_proto",
+    srcs = ["camera_motion.proto"],
+    deps = [":motion_models_proto"],
+)
+
+proto_library(
+    name = "push_pull_filtering_proto",
+    srcs = ["push_pull_filtering.proto"],
+)
+
+proto_library(
+    name = "frame_selection_solution_evaluator_proto",
+    srcs = ["frame_selection_solution_evaluator.proto"],
+)
+
+proto_library(
+    name = "frame_selection_proto",
+    srcs = ["frame_selection.proto"],
+    deps = [
+        ":camera_motion_proto",
+        ":frame_selection_solution_evaluator_proto",
+        ":region_flow_proto",
+    ],
+)
+
+proto_library(
+    name = "flow_packager_proto",
+    srcs = ["flow_packager.proto"],
+    deps = [
+        ":motion_models_proto",
+        ":region_flow_proto",
+    ],
+)
+
+proto_library(
+    name = "tracking_proto",
+    srcs = ["tracking.proto"],
+    deps = [
+        ":motion_models_proto",
+    ],
+)
+
+proto_library(
+    name = "box_tracker_proto",
+    srcs = ["box_tracker.proto"],
+    deps = [":tracking_proto"],
+)
+
+mediapipe_cc_proto_library(
+    name = "tone_models_cc_proto",
+    srcs = ["tone_models.proto"],
+    visibility = ["//visibility:public"],
+    deps = [":tone_models_proto"],
+)
+
+mediapipe_cc_proto_library(
+    name = "tone_estimation_cc_proto",
+    srcs = ["tone_estimation.proto"],
+    cc_deps = [":tone_models_cc_proto"],
+    visibility = ["//visibility:public"],
+    deps = [":tone_estimation_proto"],
+)
+
+mediapipe_cc_proto_library(
+    name = "region_flow_computation_cc_proto",
+    srcs = ["region_flow_computation.proto"],
+    cc_deps = [
+        ":tone_estimation_cc_proto",
+        ":tone_models_cc_proto",
+    ],
+    visibility = ["//visibility:public"],
+    deps = [":region_flow_computation_proto"],
+)
+
+mediapipe_cc_proto_library(
+    name = "motion_saliency_cc_proto",
+    srcs = ["motion_saliency.proto"],
+    visibility = ["//visibility:public"],
+    deps = [":motion_saliency_proto"],
+)
+
+mediapipe_cc_proto_library(
+    name = "motion_estimation_cc_proto",
+    srcs = ["motion_estimation.proto"],
+    visibility = ["//visibility:public"],
+    deps = [":motion_estimation_proto"],
+)
+
+mediapipe_cc_proto_library(
+    name = "motion_analysis_cc_proto",
+    srcs = ["motion_analysis.proto"],
+    cc_deps = [
+        ":motion_estimation_cc_proto",
+        ":motion_saliency_cc_proto",
+        ":region_flow_computation_cc_proto",
+    ],
+    visibility = ["//visibility:public"],
+    deps = [":motion_analysis_proto"],
+)
+
+mediapipe_cc_proto_library(
+    name = "region_flow_cc_proto",
+    srcs = ["region_flow.proto"],
+    cc_deps = [":motion_models_cc_proto"],
+    visibility = ["//visibility:public"],
+    deps = [":region_flow_proto"],
+)
+
+mediapipe_cc_proto_library(
+    name = "motion_models_cc_proto",
+    srcs = ["motion_models.proto"],
+    visibility = ["//visibility:public"],
+    deps = [":motion_models_proto"],
+)
+
+mediapipe_cc_proto_library(
+    name = "camera_motion_cc_proto",
+    srcs = ["camera_motion.proto"],
+    cc_deps = [":motion_models_cc_proto"],
+    visibility = ["//visibility:public"],
+    deps = [":camera_motion_proto"],
+)
+
+mediapipe_cc_proto_library(
+    name = "push_pull_filtering_cc_proto",
+    srcs = ["push_pull_filtering.proto"],
+    visibility = ["//visibility:public"],
+    deps = [":push_pull_filtering_proto"],
+)
+
+mediapipe_cc_proto_library(
+    name = "frame_selection_solution_evaluator_cc_proto",
+    srcs = ["frame_selection_solution_evaluator.proto"],
+    visibility = ["//visibility:public"],
+    deps = [":frame_selection_solution_evaluator_proto"],
+)
+
+mediapipe_cc_proto_library(
+    name = "frame_selection_cc_proto",
+    srcs = ["frame_selection.proto"],
+    cc_deps = [
+        ":camera_motion_cc_proto",
+        ":frame_selection_solution_evaluator_cc_proto",
+        ":region_flow_cc_proto",
+    ],
+    visibility = ["//visibility:public"],
+    deps = [":frame_selection_proto"],
+)
+
+mediapipe_cc_proto_library(
+    name = "flow_packager_cc_proto",
+    srcs = ["flow_packager.proto"],
+    cc_deps = [
+        ":motion_models_cc_proto",
+        ":region_flow_cc_proto",
+    ],
+    visibility = ["//visibility:public"],
+    deps = [":flow_packager_proto"],
+)
+
+mediapipe_cc_proto_library(
+    name = "tracking_cc_proto",
+    srcs = ["tracking.proto"],
+    cc_deps = [":motion_models_cc_proto"],
+    visibility = ["//visibility:public"],
+    deps = [":tracking_proto"],
+)
+
+mediapipe_cc_proto_library(
+    name = "box_tracker_cc_proto",
+    srcs = ["box_tracker.proto"],
+    cc_deps = [":tracking_cc_proto"],
+    visibility = ["//visibility:public"],
+    deps = [":box_tracker_proto"],
+)
+
+cc_library(
+    name = "motion_models",
+    srcs = ["motion_models.cc"],
+    hdrs = ["motion_models.h"],
+    deps = [
+        ":camera_motion_cc_proto",
+        ":motion_models_cc_proto",
+        ":region_flow_cc_proto",
+        "//mediapipe/framework/port:logging",
+        "//mediapipe/framework/port:singleton",
+        "//mediapipe/framework/port:vector",
+        "@com_google_absl//absl/container:node_hash_map",
+        "@com_google_absl//absl/strings:str_format",
+        "@eigen_archive//:eigen",
+    ],
+)
+
+cc_library(
+    name = "motion_models_cv",
+    srcs = ["motion_models_cv.cc"],
+    hdrs = ["motion_models_cv.h"],
+    deps = [
+        ":camera_motion_cc_proto",
+        ":motion_models",
+        ":motion_models_cc_proto",
+        "//mediapipe/framework/port:opencv_core",
+    ],
+)
+
+cc_library(
+    name = "region_flow",
+    srcs = ["region_flow.cc"],
+    hdrs = ["region_flow.h"],
+    deps = [
+        ":measure_time",
+        ":motion_models",
+        ":parallel_invoker",
+        ":region_flow_cc_proto",
+        "//mediapipe/framework/port:integral_types",
+        "//mediapipe/framework/port:logging",
+        "//mediapipe/framework/port:vector",
+        "@com_google_absl//absl/strings",
+    ],
+)
+
+cc_library(
+    name = "camera_motion",
+    srcs = ["camera_motion.cc"],
+    hdrs = ["camera_motion.h"],
+    deps = [
+        ":camera_motion_cc_proto",
+        ":motion_models",
+        ":region_flow",
+        ":region_flow_cc_proto",
+        "@com_google_absl//absl/strings:str_format",
+    ],
+)
+
+PARALLEL_COPTS = ["-DPARALLEL_INVOKER_ACTIVE"] + select({
+    "//mediapipe:apple": [],
+    "//mediapipe:android": [],
+    "//conditions:default": [],
+})
+
+PARALLEL_LINKOPTS = []
+
+cc_library(
+    name = "measure_time",
+    srcs = ["measure_time.cc"],
+    hdrs = ["measure_time.h"],
+    deps = [
+        "//mediapipe/framework/port:integral_types",
+        "//mediapipe/framework/port:logging",
+        "@com_google_absl//absl/strings",
+        "@com_google_absl//absl/synchronization",
+        "@com_google_absl//absl/time",
+    ],
+)
+
+cc_library(
+    name = "parallel_invoker",
+    srcs = ["parallel_invoker.cc"],
+    hdrs = ["parallel_invoker.h"],
+    copts = PARALLEL_COPTS,
+    linkopts = PARALLEL_LINKOPTS,
+    deps = [
+        ":parallel_invoker_forbid_mixed_active",
+        "//mediapipe/framework/port:logging",
+        "//mediapipe/framework/port:threadpool",
+        "@com_google_absl//absl/synchronization",
+    ],
+)
+
+cc_library(
+    name = "parallel_invoker_forbid_mixed_active",
+    srcs = ["parallel_invoker_forbid_mixed.cc"],
+    copts = PARALLEL_COPTS,
+    alwayslink = 1,  # Forces all symbols to be included.
+)
+
+cc_library(
+    name = "image_util",
+    srcs = ["image_util.cc"],
+    hdrs = ["image_util.h"],
+    deps = [
+        ":motion_models",
+        ":motion_models_cc_proto",
+        ":region_flow",
+        ":region_flow_cc_proto",
+        "//mediapipe/framework/port:logging",
+        "//mediapipe/framework/port:opencv_core",
+        "//mediapipe/framework/port:opencv_imgproc",
+        "//mediapipe/framework/port:vector",
+    ],
+)
+
+cc_library(
+    name = "streaming_buffer",
+    srcs = ["streaming_buffer.cc"],
+    hdrs = ["streaming_buffer.h"],
+    deps = [
+        "//mediapipe/framework/port:logging",
+        "@com_google_absl//absl/container:node_hash_map",
+        "@com_google_absl//absl/strings",
+        "@com_google_absl//absl/types:any",
+    ],
+)
+
+cc_library(
+    name = "motion_estimation",
+    srcs = ["motion_estimation.cc"],
+    hdrs = ["motion_estimation.h"],
+    copts = PARALLEL_COPTS,
+    linkopts = PARALLEL_LINKOPTS,
+    deps = [
+        ":camera_motion",
+        ":camera_motion_cc_proto",
+        ":measure_time",
+        ":motion_estimation_cc_proto",
+        ":motion_models",
+        ":motion_models_cc_proto",
+        ":parallel_invoker",
+        ":region_flow",
+        ":region_flow_cc_proto",
+        "//mediapipe/framework/port:integral_types",
+        "//mediapipe/framework/port:logging",
+        "//mediapipe/framework/port:vector",
+        "@com_google_absl//absl/container:node_hash_map",
+        "@com_google_absl//absl/strings",
+        "@eigen_archive//:eigen",
+    ],
+)
+
+cc_library(
+    name = "motion_saliency",
+    srcs = ["motion_saliency.cc"],
+    hdrs = ["motion_saliency.h"],
+    deps = [
+        ":camera_motion",
+        ":measure_time",
+        ":motion_saliency_cc_proto",
+        ":region_flow",
+        ":region_flow_cc_proto",
+        "//mediapipe/framework/port:logging",
+        "//mediapipe/framework/port:vector",
+    ],
+)
+
+cc_library(
+    name = "push_pull_filtering",
+    hdrs = ["push_pull_filtering.h"],
+    deps = [
+        ":image_util",
+        ":push_pull_filtering_cc_proto",
+        "//mediapipe/framework/port:integral_types",
+        "//mediapipe/framework/port:logging",
+        "//mediapipe/framework/port:opencv_core",
+        "//mediapipe/framework/port:opencv_highgui",
+    ],
+)
+
+cc_library(
+    name = "tone_models",
+    srcs = ["tone_models.cc"],
+    hdrs = ["tone_models.h"],
+    deps = [
+        ":tone_models_cc_proto",
+        "//mediapipe/framework/port:integral_types",
+        "//mediapipe/framework/port:logging",
+        "//mediapipe/framework/port:opencv_core",
+        "//mediapipe/framework/port:vector",
+        "@com_google_absl//absl/strings:str_format",
+    ],
+)
+
+cc_library(
+    name = "tone_estimation",
+    srcs = ["tone_estimation.cc"],
+    hdrs = ["tone_estimation.h"],
+    deps = [
+        ":motion_models_cc_proto",
+        ":region_flow",
+        ":region_flow_cc_proto",
+        ":tone_estimation_cc_proto",
+        ":tone_models",
+        ":tone_models_cc_proto",
+        "//mediapipe/framework/port:integral_types",
+        "//mediapipe/framework/port:logging",
+        "//mediapipe/framework/port:opencv_core",
+        "//mediapipe/framework/port:opencv_imgproc",
+        "//mediapipe/framework/port:vector",
+    ],
+)
+
+cc_library(
+    name = "region_flow_computation",
+    srcs = ["region_flow_computation.cc"],
+    hdrs = ["region_flow_computation.h"],
+    copts = PARALLEL_COPTS,
+    linkopts = PARALLEL_LINKOPTS,
+    deps = [
+        ":camera_motion_cc_proto",
+        ":image_util",
+        ":measure_time",
+        ":motion_estimation",
+        ":motion_estimation_cc_proto",
+        ":motion_models",
+        ":motion_models_cc_proto",
+        ":parallel_invoker",
+        ":region_flow",
+        ":region_flow_cc_proto",
+        ":region_flow_computation_cc_proto",
+        ":tone_estimation",
+        ":tone_estimation_cc_proto",
+        ":tone_models",
+        ":tone_models_cc_proto",
+        "//mediapipe/framework/port:integral_types",
+        "//mediapipe/framework/port:logging",
+        "//mediapipe/framework/port:opencv_core",
+        "//mediapipe/framework/port:opencv_features2d",
+        "//mediapipe/framework/port:opencv_imgproc",
+        "//mediapipe/framework/port:opencv_video",
+        "//mediapipe/framework/port:vector",
+        "@com_google_absl//absl/container:node_hash_set",
+        "@com_google_absl//absl/memory",
+        "@eigen_archive//:eigen",
+    ],
+)
+
+cc_library(
+    name = "region_flow_visualization",
+    srcs = ["region_flow_visualization.cc"],
+    hdrs = ["region_flow_visualization.h"],
+    deps = [
+        ":measure_time",
+        ":motion_models",
+        ":parallel_invoker",
+        ":region_flow",
+        ":region_flow_cc_proto",
+        "//mediapipe/framework/port:integral_types",
+        "//mediapipe/framework/port:logging",
+        "//mediapipe/framework/port:opencv_core",
+        "//mediapipe/framework/port:opencv_imgproc",
+        "//mediapipe/framework/port:vector",
+        "@com_google_absl//absl/strings",
+    ],
+)
+
+cc_library(
+    name = "motion_analysis",
+    srcs = ["motion_analysis.cc"],
+    hdrs = ["motion_analysis.h"],
+    deps = [
+        ":camera_motion",
+        ":camera_motion_cc_proto",
+        ":image_util",
+        ":measure_time",
+        ":motion_analysis_cc_proto",
+        ":motion_estimation",
+        ":motion_estimation_cc_proto",
+        ":motion_saliency",
+        ":motion_saliency_cc_proto",
+        ":push_pull_filtering",
+        ":region_flow",
+        ":region_flow_cc_proto",
+        ":region_flow_computation",
+        ":region_flow_computation_cc_proto",
+        ":region_flow_visualization",
+        ":streaming_buffer",
+        "//mediapipe/framework/port:integral_types",
+        "//mediapipe/framework/port:logging",
+        "//mediapipe/framework/port:opencv_core",
+        "//mediapipe/framework/port:opencv_imgproc",
+        "//mediapipe/framework/port:vector",
+        "@com_google_absl//absl/strings:str_format",
+    ],
+)
+
+cc_library(
+    name = "flow_packager",
+    srcs = ["flow_packager.cc"],
+    hdrs = ["flow_packager.h"],
+    deps = [
+        ":camera_motion",
+        ":camera_motion_cc_proto",
+        ":flow_packager_cc_proto",
+        ":motion_estimation",
+        ":motion_estimation_cc_proto",
+        ":motion_models",
+        ":motion_models_cc_proto",
+        ":region_flow",
+        ":region_flow_cc_proto",
+        "//mediapipe/framework/port:integral_types",
+        "//mediapipe/framework/port:logging",
+        "//mediapipe/framework/port:vector",
+        "@com_google_absl//absl/base:core_headers",
+        "@com_google_absl//absl/strings",
+    ],
+)
+
+cc_library(
+    name = "tracking",
+    srcs = ["tracking.cc"],
+    hdrs = ["tracking.h"],
+    copts = PARALLEL_COPTS,
+    linkopts = PARALLEL_LINKOPTS,
+    deps = [
+        ":flow_packager_cc_proto",
+        ":measure_time",
+        ":motion_models",
+        ":motion_models_cc_proto",
+        ":motion_models_cv",
+        ":parallel_invoker",
+        ":region_flow",
+        ":tracking_cc_proto",
+        "//mediapipe/framework/port:logging",
+        "//mediapipe/framework/port:opencv_calib3d",
+        "//mediapipe/framework/port:opencv_core",
+        "//mediapipe/framework/port:opencv_imgproc",
+        "//mediapipe/framework/port:vector",
+        "@com_google_absl//absl/algorithm:container",
+        "@com_google_absl//absl/memory",
+        "@eigen_archive//:eigen",
+    ],
+    alwayslink = 1,
+)
+
+cc_library(
+    name = "box_tracker",
+    srcs = ["box_tracker.cc"],
+    hdrs = ["box_tracker.h"],
+    deps = [
+        ":box_tracker_cc_proto",
+        ":flow_packager_cc_proto",
+        ":measure_time",
+        ":tracking",
+        ":tracking_cc_proto",
+        "//mediapipe/framework/port:integral_types",
+        "//mediapipe/framework/port:logging",
+        "//mediapipe/framework/port:threadpool",
+        "@com_google_absl//absl/strings",
+        "@com_google_absl//absl/strings:str_format",
+        "@com_google_absl//absl/synchronization",
+        "@com_google_absl//absl/time",
+    ],
+)
+
+cc_library(
+    name = "tracking_visualization_utilities",
+    srcs = ["tracking_visualization_utilities.cc"],
+    hdrs = ["tracking_visualization_utilities.h"],
+    deps = [
+        ":box_tracker",
+        ":box_tracker_cc_proto",
+        ":flow_packager_cc_proto",
+        ":tracking",
+        ":tracking_cc_proto",
+        "//mediapipe/framework/port:opencv_core",
+        "//mediapipe/framework/port:opencv_imgproc",
+        "@com_google_absl//absl/strings:str_format",
+    ],
+)
+
+cc_test(
+    name = "parallel_invoker_test",
+    srcs = ["parallel_invoker_test.cc"],
+    copts = PARALLEL_COPTS,
+    linkopts = PARALLEL_LINKOPTS,
+    deps = [
+        ":parallel_invoker",
+        "//mediapipe/framework/port:gtest_main",
+        "@com_google_absl//absl/synchronization",
+    ],
+)
+
+cc_test(
+    name = "image_util_test",
+    srcs = [
+        "image_util_test.cc",
+    ],
+    deps = [
+        ":image_util",
+        "//mediapipe/framework/port:gtest_main",
+    ],
+)
+
+cc_test(
+    name = "motion_models_test",
+    srcs = ["motion_models_test.cc"],
+    deps = [
+        ":motion_estimation",
+        ":motion_models",
+        "//mediapipe/framework/deps:message_matchers",
+        "//mediapipe/framework/port:gtest_main",
+        "//mediapipe/framework/port:parse_text_proto",
+        "//mediapipe/framework/port:vector",
+    ],
+)
+
+cc_test(
+    name = "region_flow_computation_test",
+    srcs = ["region_flow_computation_test.cc"],
+    copts = PARALLEL_COPTS,
+    data = ["testdata/stabilize_test.png"],
+    linkopts = PARALLEL_LINKOPTS,
+    deps = [
+        ":region_flow",
+        ":region_flow_cc_proto",
+        ":region_flow_computation",
+        "//mediapipe/framework/deps:file_path",
+        "//mediapipe/framework/port:commandlineflags",
+        "//mediapipe/framework/port:file_helpers",
+        "//mediapipe/framework/port:gtest_main",
+        "//mediapipe/framework/port:logging",
+        "//mediapipe/framework/port:opencv_core",
+        "//mediapipe/framework/port:opencv_highgui",
+        "//mediapipe/framework/port:opencv_imgproc",
+        "//mediapipe/framework/port:status",
+        "//mediapipe/framework/port:vector",
+        "@com_google_absl//absl/time",
+    ],
+)
+
+cc_test(
+    name = "box_tracker_test",
+    timeout = "short",
+    srcs = ["box_tracker_test.cc"],
+    data = glob(["testdata/box_tracker/*"]),
+    deps = [
+        ":box_tracker",
+        "//mediapipe/framework/deps:file_path",
+        "//mediapipe/framework/port:gtest_main",
+    ],
+)
+
+cc_library(
+    name = "tracked_detection",
+    srcs = [
+        "tracked_detection.cc",
+    ],
+    hdrs = [
+        "tracked_detection.h",
+    ],
+    deps = [
+        "//mediapipe/framework/formats:rect_cc_proto",
+        "//mediapipe/framework/port:vector",
+        "@com_google_absl//absl/container:node_hash_map",
+    ],
+)
+
+cc_test(
+    name = "tracked_detection_test",
+    srcs = [
+        "tracked_detection_test.cc",
+    ],
+    deps = [
+        ":tracked_detection",
+        "//mediapipe/framework/port:gtest_main",
+    ],
+)
+
+cc_library(
+    name = "tracked_detection_manager",
+    srcs = [
+        "tracked_detection_manager.cc",
+    ],
+    hdrs = [
+        "tracked_detection_manager.h",
+    ],
+    deps = [
+        ":tracked_detection",
+        "//mediapipe/framework/formats:rect_cc_proto",
+        "@com_google_absl//absl/container:node_hash_map",
+    ],
+)
diff --git a/mediapipe/util/tracking/OWNERS b/mediapipe/util/tracking/OWNERS
new file mode 100644
index 000000000..a5c705e31
--- /dev/null
+++ b/mediapipe/util/tracking/OWNERS
@@ -0,0 +1,2 @@
+jianingwei
+yegenzhi
diff --git a/mediapipe/util/tracking/box_tracker.cc b/mediapipe/util/tracking/box_tracker.cc
new file mode 100644
index 000000000..2111eac4b
--- /dev/null
+++ b/mediapipe/util/tracking/box_tracker.cc
@@ -0,0 +1,1062 @@
+// Copyright 2019 The MediaPipe Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "mediapipe/util/tracking/box_tracker.h"
+
+#include <sys/stat.h>
+
+#include <fstream>
+#include <limits>
+
+#include "absl/strings/str_cat.h"
+#include "absl/synchronization/mutex.h"
+#include "absl/time/clock.h"
+#include "absl/time/time.h"
+#include "mediapipe/framework/port/integral_types.h"
+#include "mediapipe/framework/port/logging.h"
+#include "mediapipe/util/tracking/measure_time.h"
+#include "mediapipe/util/tracking/tracking.pb.h"
+
+namespace mediapipe {
+
+// Time within which close checkpoints are removed.
+static constexpr int kSnapMs = 1000;
+static constexpr int kInitCheckpoint = -1;
+
+void MotionBoxStateQuadToVertices(const MotionBoxState::Quad& quad,
+                                  std::vector<Vector2_f>* vertices) {
+  CHECK_EQ(TimedBox::kNumQuadVertices * 2, quad.vertices_size());
+  CHECK(vertices != nullptr);
+  vertices->clear();
+  for (int i = 0; i < TimedBox::kNumQuadVertices; ++i) {
+    vertices->push_back(
+        Vector2_f(quad.vertices(i * 2), quad.vertices(i * 2 + 1)));
+  }
+}
+
+void VerticesToMotionBoxStateQuad(const std::vector<Vector2_f>& vertices,
+                                  MotionBoxState::Quad* quad) {
+  CHECK_EQ(TimedBox::kNumQuadVertices, vertices.size());
+  CHECK(quad != nullptr);
+  for (const Vector2_f& vertex : vertices) {
+    quad->add_vertices(vertex.x());
+    quad->add_vertices(vertex.y());
+  }
+}
+
+void MotionBoxStateFromTimedBox(const TimedBox& box, MotionBoxState* state) {
+  CHECK(state);
+  state->set_pos_x(box.left);
+  state->set_pos_y(box.top);
+  state->set_width(box.right - box.left);
+  state->set_height(box.bottom - box.top);
+  state->set_rotation(box.rotation);
+  state->set_request_grouping(box.request_grouping);
+  if (box.quad_vertices.size() == TimedBox::kNumQuadVertices) {
+    VerticesToMotionBoxStateQuad(box.quad_vertices, state->mutable_quad());
+
+    if (box.aspect_ratio > 0.0f) {
+      state->set_aspect_ratio(box.aspect_ratio);
+    }
+
+    // set pos_x and pos_y to be the top-left vertex x and y coordinates
+    // set width and height to be max - min of x and y.
+    float min_x = std::numeric_limits<float>::max();
+    float max_x = std::numeric_limits<float>::lowest();
+    float min_y = std::numeric_limits<float>::max();
+    float max_y = std::numeric_limits<float>::lowest();
+    for (const auto& vertex : box.quad_vertices) {
+      min_x = std::min(min_x, vertex.x());
+      max_x = std::max(max_x, vertex.x());
+      min_y = std::min(min_y, vertex.y());
+      max_y = std::max(max_y, vertex.y());
+    }
+    state->set_pos_x(min_x);
+    state->set_pos_y(min_y);
+    state->set_width(max_x - min_x);
+    state->set_height(max_y - min_y);
+  }
+}
+
+void TimedBoxFromMotionBoxState(const MotionBoxState& state, TimedBox* box) {
+  CHECK(box);
+  const float scale_dx = state.width() * (state.scale() - 1.0f) * 0.5f;
+  const float scale_dy = state.height() * (state.scale() - 1.0f) * 0.5f;
+  box->left = state.pos_x() - scale_dx;
+  box->top = state.pos_y() - scale_dy;
+  box->right = state.pos_x() + state.width() + scale_dx;
+  box->bottom = state.pos_y() + state.height() + scale_dy;
+  box->rotation = state.rotation();
+  box->confidence = state.tracking_confidence();
+  box->request_grouping = state.request_grouping();
+  if (state.has_quad()) {
+    MotionBoxStateQuadToVertices(state.quad(), &(box->quad_vertices));
+
+    if (state.has_aspect_ratio()) {
+      box->aspect_ratio = state.aspect_ratio();
+    }
+  }
+}
+
+namespace {
+
+TimedBox BlendTimedBoxes(const TimedBox& lhs, const TimedBox& rhs,
+                         int64 time_msec) {
+  CHECK_LT(lhs.time_msec, rhs.time_msec);
+  const double alpha =
+      (time_msec - lhs.time_msec) * 1.0 / (rhs.time_msec - lhs.time_msec);
+  return TimedBox::Blend(lhs, rhs, alpha);
+}
+
+}  // namespace.
+
+TimedBox TimedBox::Blend(const TimedBox& lhs, const TimedBox& rhs, double alpha,
+                         double beta) {
+  // Due to large timestamps alpha beta should be in double.
+  TimedBox result;
+  result.top = alpha * lhs.top + beta * rhs.top;
+  result.left = alpha * lhs.left + beta * rhs.left;
+  result.bottom = alpha * lhs.bottom + beta * rhs.bottom;
+  result.right = alpha * lhs.right + beta * rhs.right;
+  result.rotation = alpha * lhs.rotation + beta * rhs.rotation;
+  result.time_msec = std::round(alpha * lhs.time_msec + beta * rhs.time_msec);
+  result.confidence = alpha * lhs.confidence + beta * rhs.confidence;
+  if (lhs.quad_vertices.size() == kNumQuadVertices &&
+      rhs.quad_vertices.size() == kNumQuadVertices) {
+    result.quad_vertices.clear();
+    for (int i = 0; i < lhs.quad_vertices.size(); ++i) {
+      result.quad_vertices.push_back(alpha * lhs.quad_vertices[i] +
+                                     beta * rhs.quad_vertices[i]);
+    }
+
+    // Since alpha and beta are not necessarily sum to 1, aspect ratio can not
+    // be derived with alpha and beta. Here we are simply averaging the
+    // aspect_ratio as the blended box aspect ratio.
+    if (lhs.aspect_ratio > 0 && rhs.aspect_ratio > 0) {
+      result.aspect_ratio = 0.5f * lhs.aspect_ratio + 0.5f * rhs.aspect_ratio;
+    }
+  }
+  return result;
+}
+
+TimedBox TimedBox::Blend(const TimedBox& lhs, const TimedBox& rhs,
+                         double alpha) {
+  return Blend(lhs, rhs, 1.0 - alpha, alpha);
+}
+
+std::array<Vector2_f, 4> TimedBox::Corners(float width, float height) const {
+  if (quad_vertices.size() == kNumQuadVertices) {
+    std::array<Vector2_f, 4> corners{{
+        Vector2_f(quad_vertices[0].x() * width, quad_vertices[0].y() * height),
+        Vector2_f(quad_vertices[1].x() * width, quad_vertices[1].y() * height),
+        Vector2_f(quad_vertices[2].x() * width, quad_vertices[2].y() * height),
+        Vector2_f(quad_vertices[3].x() * width, quad_vertices[3].y() * height),
+    }};
+    return corners;
+  } else {
+    // Rotate 4 corner w.r.t. center.
+    const Vector2_f center(0.5f * (left + right) * width,
+                           0.5f * (top + bottom) * height);
+    const std::array<Vector2_f, 4> corners{{
+        Vector2_f(left * width, top * height),
+        Vector2_f(left * width, bottom * height),
+        Vector2_f(right * width, bottom * height),
+        Vector2_f(right * width, top * height),
+    }};
+
+    const float cos_a = std::cos(rotation);
+    const float sin_a = std::sin(rotation);
+    std::array<Vector2_f, 4> transformed_corners;
+    for (int k = 0; k < 4; ++k) {
+      // Scale and rotate w.r.t. center.
+      const Vector2_f rad = corners[k] - center;
+      const Vector2_f rot_rad(cos_a * rad.x() - sin_a * rad.y(),
+                              sin_a * rad.x() + cos_a * rad.y());
+      transformed_corners[k] = center + rot_rad;
+    }
+    return transformed_corners;
+  }
+}
+
+TimedBox TimedBox::FromProto(const TimedBoxProto& proto) {
+  TimedBox box;
+  box.top = proto.top();
+  box.left = proto.left();
+  box.bottom = proto.bottom();
+  box.right = proto.right();
+  box.rotation = proto.rotation();
+  box.time_msec = proto.time_msec();
+  box.request_grouping = proto.request_grouping();
+  if (proto.has_quad() &&
+      proto.quad().vertices_size() == kNumQuadVertices * 2) {
+    MotionBoxStateQuadToVertices(proto.quad(), &(box.quad_vertices));
+
+    if (proto.has_aspect_ratio()) {
+      box.aspect_ratio = proto.aspect_ratio();
+    }
+  }
+  return box;
+}
+
+TimedBoxProto TimedBox::ToProto() const {
+  TimedBoxProto proto;
+  proto.set_top(top);
+  proto.set_left(left);
+  proto.set_bottom(bottom);
+  proto.set_right(right);
+  proto.set_rotation(rotation);
+  proto.set_time_msec(time_msec);
+  proto.set_confidence(confidence);
+  proto.set_request_grouping(request_grouping);
+  if (quad_vertices.size() == kNumQuadVertices) {
+    VerticesToMotionBoxStateQuad(quad_vertices, proto.mutable_quad());
+
+    if (aspect_ratio > 0.0f) {
+      proto.set_aspect_ratio(aspect_ratio);
+    }
+  }
+  return proto;
+}
+
+BoxTracker::BoxTracker(const std::string& cache_dir,
+                       const BoxTrackerOptions& options)
+    : options_(options), cache_dir_(cache_dir) {
+  tracking_workers_.reset(new ThreadPool(options_.num_tracking_workers()));
+  tracking_workers_->StartWorkers();
+}
+
+BoxTracker::BoxTracker(
+    const std::vector<const TrackingDataChunk*>& tracking_data, bool copy_data,
+    const BoxTrackerOptions& options)
+    : BoxTracker("", options) {
+  AddTrackingDataChunks(tracking_data, copy_data);
+}
+
+void BoxTracker::AddTrackingDataChunk(const TrackingDataChunk* chunk,
+                                      bool copy_data) {
+  CHECK_GT(chunk->item_size(), 0) << "Empty chunk.";
+  int64 chunk_time_msec = chunk->item(0).timestamp_usec() / 1000;
+  int chunk_idx = ChunkIdxFromTime(chunk_time_msec);
+  CHECK_GE(chunk_idx, tracking_data_.size()) << "Chunk is out of order.";
+  if (chunk_idx > tracking_data_.size()) {
+    LOG(INFO) << "Resize tracking_data_ to " << chunk_idx;
+    tracking_data_.resize(chunk_idx);
+  }
+  if (copy_data) {
+    tracking_data_buffer_.emplace_back(new TrackingDataChunk(*chunk));
+    tracking_data_.push_back(tracking_data_buffer_.back().get());
+  } else {
+    tracking_data_.emplace_back(chunk);
+  }
+}
+
+void BoxTracker::AddTrackingDataChunks(
+    const std::vector<const TrackingDataChunk*>& tracking_data,
+    bool copy_data) {
+  for (const auto item : tracking_data) {
+    AddTrackingDataChunk(item, copy_data);
+  }
+}
+
+void BoxTracker::NewBoxTrack(const TimedBox& initial_pos, int id,
+                             int64 min_msec, int64 max_msec) {
+  VLOG(1) << "New box track: " << id << " : " << initial_pos.ToString()
+          << " from " << min_msec << " to " << max_msec;
+
+  // Mark initialization with checkpoint -1.
+  absl::MutexLock lock(&status_mutex_);
+
+  if (canceling_) {
+    LOG(WARNING) << "Box Tracker is in cancel state. Refusing request.";
+    return;
+  }
+  ++track_status_[id][kInitCheckpoint].tracks_ongoing;
+
+  auto operation = [this, initial_pos, id, min_msec, max_msec]() {
+    this->NewBoxTrackAsync(initial_pos, id, min_msec, max_msec);
+  };
+
+  tracking_workers_->Schedule(operation);
+}
+
+std::pair<int64, int64> BoxTracker::TrackInterval(int id) {
+  absl::MutexLock lock(&path_mutex_);
+  const Path& path = paths_[id];
+  if (path.empty()) {
+    return std::make_pair(-1, -1);
+  }
+
+  auto first_interval = path.begin()->second;
+  auto last_interval = path.rbegin()->second;
+
+  return std::make_pair(first_interval.front().time_msec,
+                        last_interval.back().time_msec);
+}
+
+void BoxTracker::NewBoxTrackAsync(const TimedBox& initial_pos, int id,
+                                  int64 min_msec, int64 max_msec) {
+  VLOG(1) << "Async track for id: " << id << " from " << min_msec << " to "
+          << max_msec;
+
+  // Determine start position and track forward and backward.
+  int chunk_idx = ChunkIdxFromTime(initial_pos.time_msec);
+
+  VLOG(1) << "Starting at chunk " << chunk_idx;
+
+  AugmentedChunkPtr tracking_chunk(ReadChunk(id, kInitCheckpoint, chunk_idx));
+
+  if (!tracking_chunk.first) {
+    absl::MutexLock lock(&status_mutex_);
+    --track_status_[id][kInitCheckpoint].tracks_ongoing;
+    LOG(ERROR) << "Could not read tracking chunk from file: " << chunk_idx
+               << " for start position: " << initial_pos.ToString();
+    return;
+  }
+
+  // Grab ownership here, to avoid any memory leaks due to early return.
+  std::unique_ptr<const TrackingDataChunk> chunk_owned;
+  if (tracking_chunk.second) {
+    chunk_owned.reset(tracking_chunk.first);
+  }
+
+  const int start_frame =
+      ClosestFrameIndex(initial_pos.time_msec, *tracking_chunk.first);
+
+  VLOG(1) << "Local start frame: " << start_frame;
+
+  // Update starting position to coincide with a frame.
+  TimedBox start_pos = initial_pos;
+  start_pos.time_msec =
+      tracking_chunk.first->item(start_frame).timestamp_usec() / 1000;
+
+  VLOG(1) << "Request at " << initial_pos.time_msec << " revised to "
+          << start_pos.time_msec;
+
+  const int checkpoint = start_pos.time_msec;
+
+  // TODO:
+  // Compute min and max for tracking based on existing check points.
+  if (!WaitToScheduleId(id)) {
+    // Could not schedule, id already being canceled.
+    return;
+  }
+
+  // If another checkpoint is close by, cancel that one.
+  VLOG(1) << "Removing close checkpoints";
+  absl::MutexLock lock(&status_mutex_);
+  RemoveCloseCheckpoints(id, checkpoint);
+
+  VLOG(1) << "Cancel existing tracks";
+  CancelTracking(id, checkpoint);
+
+  // Remove checkpoint results (to be replaced with current one).
+  ClearCheckpoint(id, checkpoint);
+
+  MotionBoxState start_state;
+  MotionBoxStateFromTimedBox(start_pos, &start_state);
+
+  VLOG(1) << "Adding initial result";
+  AddBoxResult(start_pos, id, checkpoint, start_state);
+
+  // Perform forward and backward tracking and add to current PathSegment.
+  // Track forward.
+  track_status_[id][checkpoint].tracks_ongoing += 2;
+
+  VLOG(1) << "Starting tracking workers ... ";
+
+  AugmentedChunkPtr forward_chunk = tracking_chunk;
+  AugmentedChunkPtr backward_chunk = tracking_chunk;
+
+  if (tracking_chunk.second) {  // We have ownership, need a copy here.
+    forward_chunk = std::make_pair(new TrackingDataChunk(*chunk_owned), true);
+    backward_chunk = std::make_pair(chunk_owned.release(), true);
+  }
+
+  auto forward_operation = [this, forward_chunk, start_state, start_frame,
+                            chunk_idx, id, checkpoint, min_msec, max_msec]() {
+    this->TrackingImpl(TrackingImplArgs(forward_chunk, start_state, start_frame,
+                                        chunk_idx, id, checkpoint, true, true,
+                                        min_msec, max_msec));
+  };
+
+  tracking_workers_->Schedule(forward_operation);
+
+  // Track backward.
+  auto backward_operation = [this, backward_chunk, start_state, start_frame,
+                             chunk_idx, id, checkpoint, min_msec, max_msec]() {
+    this->TrackingImpl(TrackingImplArgs(backward_chunk, start_state,
+                                        start_frame, chunk_idx, id, checkpoint,
+                                        false, true, min_msec, max_msec));
+  };
+
+  tracking_workers_->Schedule(backward_operation);
+
+  DoneSchedulingId(id);
+
+  // Tell a waiting request that we are done scheduling.
+  status_condvar_.SignalAll();
+  VLOG(1) << "Scheduling done for " << id;
+}
+
+void BoxTracker::RemoveCloseCheckpoints(int id, int checkpoint) {
+  if (track_status_[id].empty()) {
+    return;
+  }
+
+  auto pos = track_status_[id].lower_bound(checkpoint);
+  // Test current and previous location (if possible).
+  int num_turns = 1;
+  if (pos != track_status_[id].begin()) {
+    --pos;
+    ++num_turns;
+  }
+
+  for (int k = 0; k < num_turns; ++k, ++pos) {
+    if (pos != track_status_[id].end()) {
+      const int check_pos = pos->first;
+      // Ignore marker init checkpoint from track_status_.
+      if (check_pos > kInitCheckpoint &&
+          std::abs(check_pos - checkpoint) < kSnapMs) {
+        CancelTracking(id, check_pos);
+        ClearCheckpoint(id, check_pos);
+      }
+    } else {
+      break;
+    }
+  }
+}
+
+bool BoxTracker::WaitToScheduleId(int id) {
+  VLOG(1) << "Wait to schedule id: " << id;
+  absl::MutexLock lock(&status_mutex_);
+  while (new_box_track_[id]) {
+    // Box tracking is currently ongoing for this id.
+    if (track_status_[id][kInitCheckpoint].canceled) {
+      // Canceled, remove myself from ongoing tracks.
+      --track_status_[id][kInitCheckpoint].tracks_ongoing;
+      status_condvar_.SignalAll();
+      return false;
+    }
+
+    // Only one request can be processing in the section till end of the
+    // function NewBoxTrackAsync at a time.
+    status_condvar_.Wait(&status_mutex_);
+  }
+
+  // We got canceled already, don't proceed.
+  if (track_status_[id][kInitCheckpoint].canceled) {
+    --track_status_[id][kInitCheckpoint].tracks_ongoing;
+    status_condvar_.SignalAll();
+    return false;
+  }
+
+  // Signal we are about to schedule new tracking.
+  new_box_track_[id] = true;
+  VLOG(1) << "Ready to schedule id:  " << id;
+  return true;
+}
+
+void BoxTracker::DoneSchedulingId(int id) {
+  new_box_track_[id] = false;
+  --track_status_[id][kInitCheckpoint].tracks_ongoing;
+}
+
+void BoxTracker::CancelTracking(int id, int checkpoint) {
+  // Wait for ongoing requests to terminate.
+  while (track_status_[id][checkpoint].tracks_ongoing != 0) {
+    // Cancel all ongoing requests.
+    track_status_[id][checkpoint].canceled = true;
+    status_condvar_.Wait(&status_mutex_);
+  }
+
+  track_status_[id][checkpoint].canceled = false;
+}
+
+bool BoxTracker::GetTimedPosition(int id, int64 time_msec, TimedBox* result,
+                                  std::vector<MotionBoxState>* states) {
+  CHECK(result);
+
+  MotionBoxState* lhs_box_state = nullptr;
+  MotionBoxState* rhs_box_state = nullptr;
+  if (states) {
+    CHECK(options_.record_path_states())
+        << "Requesting corresponding tracking states requires option "
+        << "record_path_states to be set";
+    states->resize(1);
+    lhs_box_state = rhs_box_state = &states->at(0);
+  }
+
+  VLOG(1) << "Obtaining result at " << time_msec;
+
+  absl::MutexLock lock(&path_mutex_);
+  const Path& path = paths_[id];
+  if (path.size() < 1) {
+    LOG(ERROR) << "Empty path!";
+    return false;
+  }
+
+  // Find corresponding checkpoint.
+  auto check_pos = path.lower_bound(time_msec);
+  if (check_pos == path.begin()) {
+    VLOG(1) << "To left";
+    // We are to the left of the earliest checkpoint.
+    return TimedBoxAtTime(check_pos->second, time_msec, result, lhs_box_state);
+  }
+  if (check_pos == path.end()) {
+    VLOG(1) << "To right";
+    --check_pos;
+    // We are to the right of the lastest checkpoint.
+    return TimedBoxAtTime(check_pos->second, time_msec, result, rhs_box_state);
+  }
+
+  VLOG(1) << "Blending ...";
+
+  // We are inbetween checkpoints, get result for each, then blend.
+  const PathSegment& rhs = check_pos->second;
+  const int check_rhs = check_pos->first;
+  --check_pos;
+  const PathSegment& lhs = check_pos->second;
+  const int check_lhs = check_pos->first;
+
+  TimedBox lhs_box;
+  TimedBox rhs_box;
+  if (states) {
+    states->resize(2);
+    lhs_box_state = &states->at(0);
+    rhs_box_state = &states->at(1);
+  }
+
+  if (!TimedBoxAtTime(lhs, time_msec, &lhs_box, lhs_box_state)) {
+    return false;
+  }
+
+  if (!TimedBoxAtTime(rhs, time_msec, &rhs_box, rhs_box_state)) {
+    return false;
+  }
+
+  VLOG(1) << "Blending: " << lhs_box.ToString() << " and "
+          << rhs_box.ToString();
+  const double alpha = (time_msec - check_lhs) * 1.0 / (check_rhs - check_lhs);
+  *result = TimedBox::Blend(lhs_box, rhs_box, alpha);
+
+  return true;
+}
+
+bool BoxTracker::IsTrackingOngoingForId(int id) {
+  absl::MutexLock lock(&status_mutex_);
+  for (const auto& item : track_status_[id]) {
+    if (item.second.tracks_ongoing > 0) {
+      return true;
+    }
+  }
+  return false;
+}
+
+bool BoxTracker::IsTrackingOngoing() {
+  absl::MutexLock lock(&status_mutex_);
+  return IsTrackingOngoingMutexHeld();
+}
+
+bool BoxTracker::IsTrackingOngoingMutexHeld() {
+  for (const auto& id : track_status_) {
+    for (const auto& item : id.second) {
+      if (item.second.tracks_ongoing > 0) {
+        return true;
+      }
+    }
+  }
+  return false;
+}
+
+BoxTracker::AugmentedChunkPtr BoxTracker::ReadChunk(int id, int checkpoint,
+                                                    int chunk_idx) {
+  VLOG(1) << __FUNCTION__ << " id=" << id << " chunk_idx=" << chunk_idx;
+  if (cache_dir_.empty() && !tracking_data_.empty()) {
+    if (chunk_idx < tracking_data_.size()) {
+      return std::make_pair(tracking_data_[chunk_idx], false);
+    } else {
+      LOG(ERROR) << "chunk_idx >= tracking_data_.size()";
+      return std::make_pair(nullptr, false);
+    }
+  } else {
+    std::unique_ptr<TrackingDataChunk> chunk_data(
+        ReadChunkFromCache(id, checkpoint, chunk_idx));
+    return std::make_pair(chunk_data.release(), true);
+  }
+}
+
+std::unique_ptr<TrackingDataChunk> BoxTracker::ReadChunkFromCache(
+    int id, int checkpoint, int chunk_idx) {
+  VLOG(1) << __FUNCTION__ << " id=" << id << " chunk_idx=" << chunk_idx;
+
+  auto format_runtime =
+      absl::ParsedFormat<'d'>::New(options_.cache_file_format());
+
+  std::string chunk_file;
+  if (format_runtime) {
+    chunk_file = cache_dir_ + "/" + absl::StrFormat(*format_runtime, chunk_idx);
+  } else {
+    LOG(ERROR) << "chache_file_format wrong. fall back to chunk_%04d.";
+    chunk_file = cache_dir_ + "/" + absl::StrFormat("chunk_%04d", chunk_idx);
+  }
+
+  VLOG(1) << "Reading chunk from cache: " << chunk_file;
+  std::unique_ptr<TrackingDataChunk> chunk_data(new TrackingDataChunk());
+
+  struct stat tmp;
+  if (stat(chunk_file.c_str(), &tmp)) {
+    if (!WaitForChunkFile(id, checkpoint, chunk_file)) {
+      return nullptr;
+    }
+  }
+
+  VLOG(1) << "File exists, reading ...";
+
+  std::ifstream in(chunk_file, std::ios::in | std::ios::binary);
+  if (!in) {
+    LOG(ERROR) << "Could not read chunk file: " << chunk_file;
+    return nullptr;
+  }
+
+  std::string data;
+  in.seekg(0, std::ios::end);
+  data.resize(in.tellg());
+  in.seekg(0, std::ios::beg);
+  in.read(&data[0], data.size());
+  in.close();
+
+  chunk_data->ParseFromString(data);
+
+  VLOG(1) << "Read success";
+  return chunk_data;
+}
+
+bool BoxTracker::WaitForChunkFile(int id, int checkpoint,
+                                  const std::string& chunk_file) {
+  VLOG(1) << "Chunk no exists, waiting for file: " << chunk_file;
+
+  const int timeout_msec = options_.read_chunk_timeout_msec();
+
+  // Exponential backoff sleep till file exists.
+  int wait_time_msec = 20;
+
+  VLOG(1) << "In wait for chunk ...: " << chunk_file;
+
+  // Maximum wait time.
+  const int kMaxWaitPeriod = 5000;
+  int total_wait_msec = 0;
+  bool file_exists = false;
+
+  while (!file_exists && total_wait_msec < timeout_msec) {
+    // Check if we got canceled.
+    {
+      absl::MutexLock lock(&status_mutex_);
+      if (track_status_[id][checkpoint].canceled) {
+        return false;
+      }
+    }
+
+    usleep(wait_time_msec * 1000);
+    total_wait_msec += wait_time_msec;
+
+    struct stat tmp;
+    file_exists = stat(chunk_file.c_str(), &tmp) == 0;
+
+    if (file_exists) {
+      VLOG(1) << "Sucessfully waited on " << chunk_file << " for "
+              << total_wait_msec;
+      break;
+    }
+    if (wait_time_msec < kMaxWaitPeriod) {
+      wait_time_msec *= 1.5;
+    }
+  }
+
+  return file_exists;
+}
+
+int BoxTracker::ClosestFrameIndex(int64 msec,
+                                  const TrackingDataChunk& chunk) const {
+  CHECK_GT(chunk.item_size(), 0);
+  typedef TrackingDataChunk::Item Item;
+  Item item_to_find;
+  item_to_find.set_timestamp_usec(msec * 1000);
+  int pos =
+      std::lower_bound(chunk.item().begin(), chunk.item().end(), item_to_find,
+                       [](const Item& lhs, const Item& rhs) -> bool {
+                         return lhs.timestamp_usec() < rhs.timestamp_usec();
+                       }) -
+      chunk.item().begin();
+
+  // Skip end.
+  if (pos == chunk.item_size()) {
+    return pos - 1;
+  } else if (pos == 0) {
+    // Nothing smaller exists.
+    return 0;
+  }
+
+  // Determine closest timestamp.
+  const int64 lhs_diff = msec - chunk.item(pos - 1).timestamp_usec() / 1000;
+  const int64 rhs_diff = chunk.item(pos).timestamp_usec() / 1000 - msec;
+
+  if (std::min(lhs_diff, rhs_diff) >= 67) {
+    LOG(ERROR) << "No frame found within 67ms, probably using wrong chunk.";
+  }
+
+  if (lhs_diff < rhs_diff) {
+    return pos - 1;
+  } else {
+    return pos;
+  }
+}
+
+void BoxTracker::AddBoxResult(const TimedBox& box, int id, int checkpoint,
+                              const MotionBoxState& state) {
+  absl::MutexLock lock(&path_mutex_);
+  PathSegment& segment = paths_[id][checkpoint];
+  auto insert_pos = std::lower_bound(segment.begin(), segment.end(), box);
+  const bool store_state = options_.record_path_states();
+
+  // Don't overwrite an existing box.
+  if (insert_pos == segment.end() || insert_pos->time_msec != box.time_msec) {
+    segment.insert(insert_pos,
+                   InternalTimedBox(
+                       box, store_state ? new MotionBoxState(state) : nullptr));
+  }
+}
+
+void BoxTracker::ClearCheckpoint(int id, int checkpoint) {
+  absl::MutexLock lock(&path_mutex_);
+  PathSegment& segment = paths_[id][checkpoint];
+  segment.clear();
+}
+
+void BoxTracker::TrackingImpl(const TrackingImplArgs& a) {
+  TrackStepOptions track_step_options = options_.track_step_options();
+  ChangeTrackingDegreesBasedOnStartPos(a.start_state, &track_step_options);
+  MotionBox motion_box(track_step_options);
+  const int chunk_data_size = a.chunk_data->item_size();
+
+  CHECK_GE(a.start_frame, 0);
+  CHECK_LT(a.start_frame, chunk_data_size);
+
+  VLOG(1) << " a.start_frame = " << a.start_frame << " @"
+          << a.chunk_data->item(a.start_frame).timestamp_usec() << " with "
+          << chunk_data_size << " items";
+  motion_box.ResetAtFrame(a.start_frame, a.start_state);
+
+  auto cleanup_func = [&a, this]() -> void {
+    if (a.first_call) {
+      // Signal we are done processing in this direction.
+      absl::MutexLock lock(&status_mutex_);
+      --track_status_[a.id][a.checkpoint].tracks_ongoing;
+      status_condvar_.SignalAll();
+    }
+  };
+
+  if (a.forward) {
+    // TrackingData at frame f, contains tracking information from
+    // frame f to f - 1. Get information at frame f + 1 and invert:
+    // Tracking from f to f + 1.
+    for (int f = a.start_frame; f + 1 < chunk_data_size; ++f) {
+      // Note: we use / 1000 instead of * 1000 to avoid overflow.
+      if (a.chunk_data->item(f + 1).timestamp_usec() / 1000 > a.max_msec) {
+        VLOG(2) << "Reached maximum tracking timestamp @" << a.max_msec;
+        break;
+      }
+      VLOG(1) << "Track forward from " << f;
+      MotionVectorFrame mvf;
+      MotionVectorFrameFromTrackingData(
+          a.chunk_data->item(f + 1).tracking_data(), &mvf);
+      const int track_duration_ms =
+          TrackingDataDurationMs(a.chunk_data->item(f + 1));
+      if (track_duration_ms > 0) {
+        mvf.duration_ms = track_duration_ms;
+      }
+
+      // If this is the first frame in a chunk, there might be an unobserved
+      // chunk boundary at the first frame.
+      if (f == 0 && a.chunk_data->item(0).tracking_data().frame_flags() &
+                        TrackingData::FLAG_CHUNK_BOUNDARY) {
+        mvf.is_chunk_boundary = true;
+      }
+
+      MotionVectorFrame mvf_inverted;
+      InvertMotionVectorFrame(mvf, &mvf_inverted);
+
+      const bool forward_tracking = true;
+      if (!motion_box.TrackStep(f, mvf_inverted, forward_tracking)) {
+        VLOG(1) << "Failed forward at frame: " << f;
+        break;
+      } else {
+        // Test if current request is canceled.
+        {
+          absl::MutexLock lock(&status_mutex_);
+          if (track_status_[a.id][a.checkpoint].canceled) {
+            --track_status_[a.id][a.checkpoint].tracks_ongoing;
+            status_condvar_.SignalAll();
+            return;
+          }
+        }
+
+        TimedBox result;
+        const MotionBoxState& result_state = motion_box.StateAtFrame(f + 1);
+        TimedBoxFromMotionBoxState(result_state, &result);
+        result.time_msec = a.chunk_data->item(f + 1).timestamp_usec() / 1000;
+        AddBoxResult(result, a.id, a.checkpoint, result_state);
+      }
+
+      if (f + 2 == chunk_data_size && !a.chunk_data->last_chunk()) {
+        // Last frame, successful track, continue;
+        AugmentedChunkPtr next_chunk(
+            ReadChunk(a.id, a.checkpoint, a.chunk_idx + 1));
+
+        if (next_chunk.first != nullptr) {
+          TrackingImplArgs next_args(next_chunk, motion_box.StateAtFrame(f + 1),
+                                     0, a.chunk_idx + 1, a.id, a.checkpoint,
+                                     a.forward, false, a.min_msec, a.max_msec);
+
+          TrackingImpl(next_args);
+        } else {
+          cleanup_func();
+          LOG(ERROR) << "Can't read expected chunk file!";
+        }
+      }
+    }
+  } else {
+    // Backward tracking.
+    // Don't attempt to track from the very first frame backwards.
+    const int first_frame = a.chunk_data->first_chunk() ? 1 : 0;
+
+    for (int f = a.start_frame; f >= first_frame; --f) {
+      if (a.chunk_data->item(f).timestamp_usec() / 1000 < a.min_msec) {
+        VLOG(2) << "Reached minimum tracking timestamp @" << a.min_msec;
+        break;
+      }
+      VLOG(1) << "Track backward from " << f;
+      MotionVectorFrame mvf;
+      MotionVectorFrameFromTrackingData(a.chunk_data->item(f).tracking_data(),
+                                        &mvf);
+      const int64 track_duration_ms =
+          TrackingDataDurationMs(a.chunk_data->item(f));
+      if (track_duration_ms > 0) {
+        mvf.duration_ms = track_duration_ms;
+      }
+      const bool forward_tracking = false;
+      if (!motion_box.TrackStep(f, mvf, forward_tracking)) {
+        VLOG(1) << "Failed backward at frame: " << f;
+        break;
+      } else {
+        // Test if current request is canceled.
+        {
+          absl::MutexLock lock(&status_mutex_);
+          if (track_status_[a.id][a.checkpoint].canceled) {
+            --track_status_[a.id][a.checkpoint].tracks_ongoing;
+            status_condvar_.SignalAll();
+            return;
+          }
+        }
+
+        TimedBox result;
+        const MotionBoxState& result_state = motion_box.StateAtFrame(f - 1);
+        TimedBoxFromMotionBoxState(result_state, &result);
+        result.time_msec = a.chunk_data->item(f).prev_timestamp_usec() / 1000;
+        AddBoxResult(result, a.id, a.checkpoint, result_state);
+      }
+
+      if (f == first_frame && !a.chunk_data->first_chunk()) {
+        VLOG(1) << "Read next chunk: " << f << "==" << first_frame << " in "
+                << a.chunk_idx;
+        // First frame, successful track, continue.
+        AugmentedChunkPtr prev_chunk(
+            ReadChunk(a.id, a.checkpoint, a.chunk_idx - 1));
+        if (prev_chunk.first != nullptr) {
+          const int last_frame = prev_chunk.first->item_size() - 1;
+          TrackingImplArgs prev_args(prev_chunk, motion_box.StateAtFrame(f - 1),
+                                     last_frame, a.chunk_idx - 1, a.id,
+                                     a.checkpoint, a.forward, false, a.min_msec,
+                                     a.max_msec);
+
+          TrackingImpl(prev_args);
+        } else {
+          cleanup_func();
+          LOG(ERROR) << "Can't read expected chunk file! " << a.chunk_idx - 1
+                     << " while tracking @"
+                     << a.chunk_data->item(f).timestamp_usec() / 1000
+                     << " with cutoff " << a.min_msec;
+          return;
+        }
+      }
+    }
+  }
+
+  cleanup_func();
+}
+
+bool TimedBoxAtTime(const PathSegment& segment, int64 time_msec, TimedBox* box,
+                    MotionBoxState* state) {
+  CHECK(box);
+
+  if (segment.empty()) {
+    return false;
+  }
+
+  TimedBox to_find;
+  to_find.time_msec = time_msec;
+  auto pos = std::lower_bound(segment.begin(), segment.end(), to_find);
+  if (pos != segment.end() && pos->time_msec == time_msec) {
+    *box = *pos;
+    if (state) {
+      *state = *pos->state;
+    }
+    return true;
+  }
+
+  constexpr int kMaxDiff = 67;
+
+  if (pos == segment.begin()) {
+    if (pos->time_msec - time_msec < kMaxDiff) {
+      *box = *pos;
+      if (state && pos->state) {
+        *state = *pos->state;
+      }
+      return true;
+    } else {
+      return false;
+    }
+  }
+
+  if (pos == segment.end()) {
+    if (time_msec - pos[-1].time_msec < kMaxDiff) {
+      *box = pos[-1];
+      if (state && pos[-1].state) {
+        *state = *pos[-1].state;
+      }
+      return true;
+    } else {
+      return false;
+    }
+  }
+
+  // Interpolation necessary.
+  *box = BlendTimedBoxes(pos[-1], pos[0], time_msec);
+  if (state) {
+    // Grab closest state.
+    if (std::abs(pos[-1].time_msec - time_msec) <
+        std::abs(pos[0].time_msec - time_msec)) {
+      if (pos[-1].state) {
+        *state = *pos[-1].state;
+      }
+    } else {
+      if (pos[0].state) {
+        *state = *pos[0].state;
+      }
+    }
+  }
+  return true;
+}
+
+void BoxTracker::ResumeTracking() {
+  absl::MutexLock lock(&status_mutex_);
+  canceling_ = false;
+}
+
+void BoxTracker::CancelAllOngoingTracks() {
+  // Get a list of items to be canceled (id, checkpoint)
+  absl::MutexLock lock(&status_mutex_);
+  canceling_ = true;
+
+  std::vector<std::pair<int, int>> to_be_canceled;
+  for (auto& id : track_status_) {
+    for (auto& checkpoint : id.second) {
+      if (checkpoint.second.tracks_ongoing > 0) {
+        checkpoint.second.canceled = true;
+        to_be_canceled.push_back(std::make_pair(id.first, checkpoint.first));
+      }
+    }
+  }
+
+  // Wait for ongoing requests to terminate.
+  auto on_going_test = [&to_be_canceled, this]() -> bool {
+    status_mutex_.AssertHeld();
+    for (const auto& item : to_be_canceled) {
+      if (track_status_[item.first][item.second].tracks_ongoing > 0) {
+        return true;
+      }
+    }
+    return false;
+  };
+
+  while (on_going_test()) {
+    status_condvar_.Wait(&status_mutex_);
+  }
+
+  // Indicate we are done canceling.
+  for (const auto& item : to_be_canceled) {
+    track_status_[item.first][item.second].canceled = false;
+  }
+}
+
+bool BoxTracker::WaitForAllOngoingTracks(int timeout_us) {
+  MEASURE_TIME << "Tracking time ...";
+  absl::MutexLock lock(&status_mutex_);
+
+  // Infinite wait for timeout <= 0.
+  absl::Duration timeout = timeout_us > 0 ? absl::Microseconds(timeout_us)
+                                          : absl::InfiniteDuration();
+
+  while (timeout > absl::ZeroDuration() && IsTrackingOngoingMutexHeld()) {
+    absl::Time start_wait = absl::Now();
+    status_condvar_.WaitWithTimeout(&status_mutex_, timeout);
+
+    absl::Duration elapsed = absl::Now() - start_wait;
+    timeout -= elapsed;
+  }
+
+  return !IsTrackingOngoingMutexHeld();
+}
+
+bool BoxTracker::GetTrackingData(int id, int64 request_time_msec,
+                                 TrackingData* tracking_data,
+                                 int* tracking_data_msec) {
+  CHECK(tracking_data);
+
+  int chunk_idx = ChunkIdxFromTime(request_time_msec);
+
+  AugmentedChunkPtr tracking_chunk(ReadChunk(id, kInitCheckpoint, chunk_idx));
+  if (!tracking_chunk.first) {
+    absl::MutexLock lock(&status_mutex_);
+    --track_status_[id][kInitCheckpoint].tracks_ongoing;
+    LOG(ERROR) << "Could not read tracking chunk from file.";
+    return false;
+  }
+
+  std::unique_ptr<const TrackingDataChunk> owned_chunk;
+  if (tracking_chunk.second) {
+    owned_chunk.reset(tracking_chunk.first);
+  }
+
+  const int closest_frame =
+      ClosestFrameIndex(request_time_msec, *tracking_chunk.first);
+
+  *tracking_data = tracking_chunk.first->item(closest_frame).tracking_data();
+  if (tracking_data_msec) {
+    *tracking_data_msec =
+        tracking_chunk.first->item(closest_frame).timestamp_usec() / 1000;
+  }
+  return true;
+}
+
+}  // namespace mediapipe
diff --git a/mediapipe/util/tracking/box_tracker.h b/mediapipe/util/tracking/box_tracker.h
new file mode 100644
index 000000000..72022bf45
--- /dev/null
+++ b/mediapipe/util/tracking/box_tracker.h
@@ -0,0 +1,372 @@
+// Copyright 2019 The MediaPipe Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef MEDIAPIPE_UTIL_TRACKING_BOX_TRACKER_H_
+#define MEDIAPIPE_UTIL_TRACKING_BOX_TRACKER_H_
+
+#include <inttypes.h>
+
+#include <map>
+#include <unordered_map>
+#include <vector>
+
+#include "absl/strings/str_format.h"
+#include "absl/synchronization/mutex.h"
+#include "mediapipe/framework/port/threadpool.h"
+#include "mediapipe/util/tracking/box_tracker.pb.h"
+#include "mediapipe/util/tracking/flow_packager.pb.h"
+#include "mediapipe/util/tracking/tracking.h"
+#include "mediapipe/util/tracking/tracking.pb.h"
+
+namespace mediapipe {
+
+// Describes a rectangle box at an instance of time.
+struct TimedBox {
+  static const int kNumQuadVertices = 4;
+  // Dimensions are in normalized coordinates [0, 1].
+  float top = 0;
+  float left = 0;
+  float bottom = 0;
+  float right = 0;
+  // Rotation of box w.r.t. center in radians.
+  float rotation = 0;
+  int64 time_msec = 0;
+  // Confidence of the tracked box in range [0, 1].
+  float confidence = 0;
+  std::vector<Vector2_f> quad_vertices;
+  // Aspect ratio (width / height) for the tracked rectangle in physical space.
+  float aspect_ratio = -1.0;
+  // Whether we want this box to be potentially grouped with other boxes
+  // to track together. This is useful for tracking small boxes that lie
+  // on a plane. For example, when we detect a plane,
+  // track the plane, then all boxes within the plane can share the same
+  // homography transform.
+  bool request_grouping = false;
+
+  bool operator<(const TimedBox& rhs) const {
+    return time_msec < rhs.time_msec;
+  }
+
+  // Returns (1.0 - alpha) * lhs + alpha * rhs;
+  static TimedBox Blend(const TimedBox& lhs, const TimedBox& rhs, double alpha);
+
+  // Returns lhs * alpha + rhs * beta;
+  static TimedBox Blend(const TimedBox& lhs, const TimedBox& rhs, double alpha,
+                        double beta);
+
+  std::string ToString() const {
+    return absl::StrFormat(
+        "top: %.3f left: %.3f bottom: %.3f right: %.3f "
+        "rot: %.3f t: %d",
+        top, left, bottom, right, rotation, static_cast<int64_t>(time_msec));
+  }
+
+  // Returns corners of TimedBox in the requested domain.
+  std::array<Vector2_f, 4> Corners(float width, float height) const;
+
+  static TimedBox FromProto(const TimedBoxProto& proto);
+  TimedBoxProto ToProto() const;
+};
+
+// TimedBox augment with internal states.
+struct InternalTimedBox : public TimedBox {
+  InternalTimedBox() = default;
+  // Convenience constructor.
+  InternalTimedBox(const TimedBox& box, const MotionBoxState* state_)
+      : TimedBox(box), state(state_) {}
+
+  // Corresponding MotionBoxState a TimedBox.
+  std::shared_ptr<const MotionBoxState> state;
+};
+
+// Initializes beginning tracking state from a TimedBox.
+void MotionBoxStateFromTimedBox(const TimedBox& box, MotionBoxState* state);
+
+// Retrieves box position and time from a tracking state.
+void TimedBoxFromMotionBoxState(const MotionBoxState& state, TimedBox* box);
+
+// Downgrade tracking degrees of TrackStepOptions to
+// TRACKING_DEGREE_OBJECT_ROTATION_SCALE if originally
+// specified TRACKING_DEGREE_OBJECT_PERSPECTIVE, but start pos
+// does not contain quad or contains invalid quad.
+template <typename T>
+void ChangeTrackingDegreesBasedOnStartPos(
+    const T& start_pos, TrackStepOptions* track_step_options) {
+  if (track_step_options->tracking_degrees() ==
+          TrackStepOptions::TRACKING_DEGREE_OBJECT_PERSPECTIVE &&
+      (!start_pos.has_quad() || start_pos.quad().vertices_size() != 8)) {
+    track_step_options->set_tracking_degrees(
+        TrackStepOptions::TRACKING_DEGREE_OBJECT_ROTATION_SCALE);
+    VLOG(1) << "Originally specified TRACKING_DEGREE_OBJECT_PERSPECTIVE, but "
+               "changed to TRACKING_DEGREE_OBJECT_ROTATION_SCALE";
+  }
+}
+
+// Set of boxes for a particular checkpoint.
+typedef std::deque<InternalTimedBox> PathSegment;
+
+// Stores the PathSegment for each checkpoint time.
+typedef std::map<int, PathSegment> Path;
+
+// Returns box at specified time for a specific path segment.
+// Returns true on success, otherwise box is left untouched.
+// Optionally returns closest known MotionBoxState if state is not null.
+bool TimedBoxAtTime(const PathSegment& segment, int64 time_msec, TimedBox* box,
+                    MotionBoxState* state = nullptr);
+
+// Tracks timed boxes from cached TrackingDataChunks created by
+// FlowPackagerCalculator. For usage see accompanying test.
+class BoxTracker {
+ public:
+  // Initializes a new BoxTracker to work on cached TrackingData from a chunk
+  // directory.
+  BoxTracker(const std::string& cache_dir, const BoxTrackerOptions& options);
+
+  // Initializes a new BoxTracker to work on the passed TrackingDataChunks.
+  // If copy_data is true, BoxTracker will retain its own copy of the data;
+  // otherwise the passed pointer need to be valid for the lifetime of the
+  // BoxTracker.
+  BoxTracker(const std::vector<const TrackingDataChunk*>& tracking_data,
+             bool copy_data, const BoxTrackerOptions& options);
+
+  // Add single TrackingDataChunk. This chunk must be correctly aligned with
+  // existing chunks. If chunk starting timestamp is larger than next valid
+  // chunk timestamp, empty chunks will be added to fill the gap. If copy_data
+  // is true, BoxTracker will retain its own copy of the data; otherwise the
+  // passed pointers need to be valid for the lifetime of the BoxTracker.
+  void AddTrackingDataChunk(const TrackingDataChunk* chunk, bool copy_data);
+  // Add new TrackingDataChunks. This method allows to streamline BoxTracker
+  // usage. Instead of collecting all tracking chunks and then calling the
+  // constructor, it makes possible to pass only minimally required chunks to
+  // constructor, and then append new chunks when available.
+  // Caller is responsible to guarantee that all chunks that are necessary for
+  // tracking are at the disposal of BoxTracker before calling NewBoxTrack, i.e.
+  // min_msec and max_msec parameters must be in the range of chunks passed to
+  // BoxTracker.
+  // Chunks will be added in the order they are specified in tracking_data. Each
+  // chunk must be correctly aligned with existing chunks. If chunk starting
+  // timestamp is larger than next valid chunk timestamp, empty chunks will be
+  // added to fill the gap. If copy_data is true, BoxTracker will retain its own
+  // copy of the data; otherwise the passed pointers need to be valid for the
+  // lifetime of the BoxTracker.
+  void AddTrackingDataChunks(
+      const std::vector<const TrackingDataChunk*>& tracking_data,
+      bool copy_data);
+
+  // Starts a new track for the specified box until tracking terminates or
+  // until track length achieves max_length.
+  // Does not block caller, returns immediately.
+  // Note: Use positive integers for id, we reserve negative ones for debugging
+  // and visualization purposes.
+  void NewBoxTrack(const TimedBox& initial_pos, int id, int64 min_msec = 0,
+                   int64 max_msec = std::numeric_limits<int64>::max());
+
+  // Returns interval for which the state of the specified box is known.
+  // (Returns -1, -1 if id is missing or no tracking has been done).
+  std::pair<int64, int64> TrackInterval(int id);
+
+  // Returns box position for requested box at specified time.
+  // Returns false if no such box exists.
+  // Optional, if record_path_states is enabled in options outputs the
+  // corresponding states for the resulting TimedBox. Note, as TimedBoxes are
+  // generally interpolated from the underlying data and blended across
+  // checkpoints, the returned states are the closest (snapped) known tracking
+  // states for the left and right checkpoint path. Therefore the size of states
+  // is either two or one (if only one checkpoint exists).
+  bool GetTimedPosition(int id, int64 time_msec, TimedBox* result,
+                        std::vector<MotionBoxState>* states = nullptr);
+
+  // Returns chunk index for specified time.
+  int ChunkIdxFromTime(int64 msec) const {
+    return msec / options_.caching_chunk_size_msec();
+  }
+
+  // Returns true if any tracking is ongoing for the specified id.
+  bool IsTrackingOngoingForId(int id) LOCKS_EXCLUDED(status_mutex_);
+
+  // Returns true if any tracking is ongoing.
+  bool IsTrackingOngoing() LOCKS_EXCLUDED(status_mutex_);
+
+  // Cancels all ongoing tracks. To avoid race conditions all NewBoxTrack's in
+  // flight will also be canceled. Future NewBoxTrack's will be canceled.
+  // NOTE: To resume execution, you have to call ResumeTracking() before
+  //       issuing more NewBoxTrack calls.
+  void CancelAllOngoingTracks() LOCKS_EXCLUDED(status_mutex_);
+  void ResumeTracking() LOCKS_EXCLUDED(status_mutex_);
+
+  // Waits for all ongoing tracks to complete.
+  // Optionally accepts a timeout in microseconds (== 0 for infinite wait).
+  // Returns true on success, false if timeout is reached.
+  // NOTE: If WaitForAllOngoingTracks timed out, CancelAllOngoingTracks() must
+  // be called before destructing the BoxTracker object or dangeling running
+  // threads might try to access invalid data.
+  bool WaitForAllOngoingTracks(int timeout_us = 0)
+      LOCKS_EXCLUDED(status_mutex_);
+
+  // Debug function to obtain raw TrackingData closest to the specified
+  // timestamp. This call will read from disk on every invocation so it is
+  // expensive.
+  // To not interfere with other tracking requests it is recommended that you
+  // use a unique id here.
+  // Returns true on success.
+  bool GetTrackingData(int id, int64 request_time_msec,
+                       TrackingData* tracking_data,
+                       int* tracking_data_msec = nullptr);
+
+ private:
+  // Asynchronous implementation function for box tracking. Schedules forward
+  // and backward tracking.
+  void NewBoxTrackAsync(const TimedBox& initial_pos, int id, int64 min_msec,
+                        int64 max_msec);
+
+  typedef std::pair<const TrackingDataChunk*, bool> AugmentedChunkPtr;
+  // Attempts to read chunk at chunk_idx if it exists. Reads from cache
+  // directory or from in memory cache.
+  // Important: 2nd part of return value indicates if returned tracking data
+  // will be owned by the caller (if true). In that case caller is responsible
+  // for releasing the returned chunk.
+  AugmentedChunkPtr ReadChunk(int id, int checkpoint, int chunk_idx);
+
+  // Attempts to read specified chunk from caching directory. Blocks and waits
+  // until chunk is available or internal time out is reached.
+  // Returns nullptr if data could not be read.
+  std::unique_ptr<TrackingDataChunk> ReadChunkFromCache(int id, int checkpoint,
+                                                        int chunk_idx);
+
+  // Waits with timeout for chunkfile to become available. Returns true on
+  // success, false if waited till timeout or when canceled.
+  bool WaitForChunkFile(int id, int checkpoint, const std::string& chunk_file)
+      LOCKS_EXCLUDED(status_mutex_);
+
+  // Determines closest index in passed TrackingDataChunk
+  int ClosestFrameIndex(int64 msec, const TrackingDataChunk& chunk) const;
+
+  // Adds new TimedBox to specified checkpoint with state.
+  void AddBoxResult(const TimedBox& box, int id, int checkpoint,
+                    const MotionBoxState& state);
+
+  // Callback can only handle 5 args max.
+  // Set own_data to true for args to assume ownership.
+  struct TrackingImplArgs {
+    TrackingImplArgs(AugmentedChunkPtr chunk_ptr,
+                     const MotionBoxState& start_state_, int start_frame_,
+                     int chunk_idx_, int id_, int checkpoint_, bool forward_,
+                     bool first_call_, int64 min_msec_, int64 max_msec_)
+        : start_state(start_state_),
+          start_frame(start_frame_),
+          chunk_idx(chunk_idx_),
+          id(id_),
+          checkpoint(checkpoint_),
+          forward(forward_),
+          first_call(first_call_),
+          min_msec(min_msec_),
+          max_msec(max_msec_) {
+      if (chunk_ptr.second) {
+        chunk_data_buffer.reset(chunk_ptr.first);
+      }
+
+      chunk_data = chunk_ptr.first;
+    }
+
+    TrackingImplArgs(const TrackingImplArgs&) = default;
+
+    // Storage for tracking data.
+    std::shared_ptr<const TrackingDataChunk> chunk_data_buffer;
+
+    // Pointer to the actual tracking data. Usually points to the buffer
+    // but can also point to external data for performance reasons.
+    const TrackingDataChunk* chunk_data;
+
+    MotionBoxState start_state;
+    int start_frame;
+    int chunk_idx;
+    int id;
+    int checkpoint;
+    bool forward = true;
+    bool first_call = true;
+    int64 min_msec;  // minimum timestamp to track to
+    int64 max_msec;  // maximum timestamp to track to
+  };
+
+  // Actual tracking algorithm.
+  void TrackingImpl(const TrackingImplArgs& args);
+
+  // Ids are scheduled exclusively, run this method to acquire lock.
+  // Returns false if id could not be scheduled (e.g. id got canceled during
+  // waiting).
+  bool WaitToScheduleId(int id) LOCKS_EXCLUDED(status_mutex_);
+
+  // Signals end of scheduling phase. Requires status mutex to be held.
+  void DoneSchedulingId(int id) EXCLUSIVE_LOCKS_REQUIRED(status_mutex_);
+
+  // Removes all checkpoints within vicinity of new checkpoint.
+  void RemoveCloseCheckpoints(int id, int checkpoint)
+      EXCLUSIVE_LOCKS_REQUIRED(status_mutex_);
+
+  // Removes specific checkpoint.
+  void ClearCheckpoint(int id, int checkpoint)
+      EXCLUSIVE_LOCKS_REQUIRED(status_mutex_);
+
+  // Terminates tracking for specific id and checkpoint.
+  void CancelTracking(int id, int checkpoint)
+      EXCLUSIVE_LOCKS_REQUIRED(status_mutex_);
+
+  // Implementation function for IsTrackingOngoing assuming mutex is already
+  // held.
+  bool IsTrackingOngoingMutexHeld() EXCLUSIVE_LOCKS_REQUIRED(status_mutex_);
+
+  // Captures tracking status for each checkpoint
+  struct TrackStatus {
+    // Indicates that all current tracking processes should be canceled.
+    bool canceled = false;
+
+    // Number of tracking requests than are currently ongoing.
+    int tracks_ongoing = 0;
+  };
+
+ private:
+  // Stores computed tracking paths_ for all boxes.
+  std::unordered_map<int, Path> paths_ GUARDED_BY(path_mutex_);
+  absl::Mutex path_mutex_;
+
+  // For each id and each checkpoint stores current tracking status.
+  std::unordered_map<int, std::map<int, TrackStatus>> track_status_
+      GUARDED_BY(status_mutex_);
+
+  // Keeps track which ids are currently processing in NewBoxTrack.
+  std::unordered_map<int, bool> new_box_track_ GUARDED_BY(status_mutex_);
+  absl::Mutex status_mutex_;
+
+  bool canceling_ GUARDED_BY(status_mutex_) = false;
+
+  // Use to signal changes to status_condvar_;
+  absl::CondVar status_condvar_ GUARDED_BY(status_mutex_);
+
+  BoxTrackerOptions options_;
+
+  // Caching directory for TrackingData stored on disk.
+  std::string cache_dir_;
+
+  // Pointers to tracking data stored in memory.
+  std::vector<const TrackingDataChunk*> tracking_data_;
+  // Buffer for tracking data in case we retain a deep copy.
+  std::vector<std::unique_ptr<TrackingDataChunk>> tracking_data_buffer_;
+
+  // Workers that run the tracking algorithm.
+  std::unique_ptr<ThreadPool> tracking_workers_;
+};
+
+}  // namespace mediapipe
+
+#endif  // MEDIAPIPE_UTIL_TRACKING_BOX_TRACKER_H_
diff --git a/mediapipe/util/tracking/box_tracker.proto b/mediapipe/util/tracking/box_tracker.proto
new file mode 100644
index 000000000..1d778da1c
--- /dev/null
+++ b/mediapipe/util/tracking/box_tracker.proto
@@ -0,0 +1,87 @@
+// Copyright 2019 The MediaPipe Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+syntax = "proto2";
+
+package mediapipe;
+
+import "mediapipe/util/tracking/tracking.proto";
+
+option java_package = "com.google.mediapipe.tracking";
+option java_outer_classname = "BoxTrackerProto";
+
+message BoxTrackerOptions {
+  // Chunk size for caching files. Should be equal to those
+  // written by the FlowPackagerCalculator.
+  optional int32 caching_chunk_size_msec = 1 [default = 2500];
+
+  // Chunk file format.
+  optional string cache_file_format = 2 [default = "chunk_%04d"];
+
+  // Number of simultaneous tracking requests.
+  optional int32 num_tracking_workers = 3 [default = 8];
+
+  // Maximum waiting time for next chunk, till function times out.
+  optional int32 read_chunk_timeout_msec = 4 [default = 60000];
+
+  // If set, box tracker will record the state for each computed TimedBox
+  // across all paths.
+  optional bool record_path_states = 5 [default = false];
+
+  // Actual tracking options to be used for every step.
+  optional TrackStepOptions track_step_options = 6;
+}
+
+// Next tag: 13
+// Proto equivalent of struct TimedBox.
+message TimedBoxProto {
+  // Normalized coords - in [0, 1]
+  optional float top = 1;
+  optional float left = 2;
+  optional float bottom = 3;
+  optional float right = 4;
+  // Rotation of box w.r.t. center in radians.
+  optional float rotation = 7;
+  optional MotionBoxState.Quad quad = 9;
+  optional int64 time_msec = 5 [default = 0];
+
+  // Unique per object id to disambiguate boxes.
+  optional int32 id = 6 [default = -1];
+
+  // Confidence of box tracked in the range [0, 1], with 0 being least
+  // confident, and 1 being most confident. A reasonable threshold is 0.5
+  // to filter out unconfident boxes.
+  optional float confidence = 8;
+
+  // Aspect ratio (width / height) for the tracked rectangle in physical space.
+  // If this field is provided, quad tracking will be performed using
+  // 6 degrees of freedom perspective transform between physical rectangle and
+  // frame quad. Otherwise, 8 degrees of freedom homography tracking between
+  // adjacent frames will be used.
+  optional float aspect_ratio = 10;
+
+  // Whether or not to enable reacquisition functionality for this specific box.
+  optional bool reacquisition = 11 [default = false];
+
+  // Whether we want this box to be potentially grouped with other boxes
+  // to track together. This is useful for tracking small boxes that lie
+  // on a plane. For example, when we detect a plane,
+  // track the plane, then all boxes within the plane can share the same
+  // homography transform.
+  optional bool request_grouping = 12 [default = false];
+}
+
+message TimedBoxProtoList {
+  repeated TimedBoxProto box = 1;
+}
diff --git a/mediapipe/util/tracking/box_tracker_test.cc b/mediapipe/util/tracking/box_tracker_test.cc
new file mode 100644
index 000000000..558daa91d
--- /dev/null
+++ b/mediapipe/util/tracking/box_tracker_test.cc
@@ -0,0 +1,117 @@
+// Copyright 2019 The MediaPipe Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "mediapipe/util/tracking/box_tracker.h"
+
+#include "mediapipe/framework/deps/file_path.h"
+#include "mediapipe/framework/port/gtest.h"
+
+namespace mediapipe {
+namespace {
+
+constexpr double kWidth = 1280.0;
+constexpr double kHeight = 720.0;
+
+// Ground truth test; testing tracking accuracy and multi-thread load testing.
+TEST(BoxTrackerTest, MovingBoxTest) {
+  const std::string cache_dir =
+      file::JoinPath("./", "/mediapipe/util/tracking/testdata/box_tracker");
+  BoxTracker box_tracker(cache_dir, BoxTrackerOptions());
+
+  // Ground truth positions of the overlay (linear in between).
+  // @ 0:     (50, 100)
+  // @ 3000:  (50, 400)
+  // @ 6000:  (500, 400)
+  // @ 9000:  (1000, 50)
+  // @ 12000: (50, 100)
+  // @ 15000: (1000, 400)
+  //
+  // size of overlay: 220 x 252
+  std::vector<Vector2_d> positions{
+      {50.0 / kWidth, 100.0 / kHeight},  {50.0 / kWidth, 400.0 / kHeight},
+      {500.0 / kWidth, 400.0 / kHeight}, {1000.0 / kWidth, 50.0 / kHeight},
+      {50.0 / kWidth, 100.0 / kHeight},  {1000.0 / kWidth, 400.0 / kHeight},
+  };
+
+  const Vector2_d overlay_sz(220.0 / kWidth, 252.0 / kHeight);
+
+  TimedBox initial_pos;
+  initial_pos.left = positions[1].x();
+  initial_pos.top = positions[1].y();
+  initial_pos.right = initial_pos.left + overlay_sz.x();
+  initial_pos.bottom = initial_pos.top + overlay_sz.y();
+  initial_pos.time_msec = 3000;
+
+  // Test multithreading under load, ensure this does not crash or stall.
+  box_tracker.NewBoxTrack(initial_pos, 0);
+  // Cancel right after issuing.
+  box_tracker.CancelAllOngoingTracks();
+
+  // Should not be scheduled.
+  box_tracker.NewBoxTrack(initial_pos, 0);
+  EXPECT_FALSE(box_tracker.IsTrackingOngoing());
+  box_tracker.ResumeTracking();
+
+  box_tracker.NewBoxTrack(initial_pos, 0);
+  // Two cancelations in a row should not block.
+  box_tracker.CancelAllOngoingTracks();
+  box_tracker.CancelAllOngoingTracks();
+  box_tracker.ResumeTracking();
+
+  // Start again for real this time.
+  box_tracker.NewBoxTrack(initial_pos, 0);
+
+  // Wait to terminate.
+  box_tracker.WaitForAllOngoingTracks();
+
+  // Check that tracking did not abort.
+  EXPECT_EQ(0, box_tracker.TrackInterval(0).first);
+  EXPECT_GT(box_tracker.TrackInterval(0).second, 15000);
+
+  auto boxes_equal = [](const TimedBox& lhs, const TimedBox& rhs) {
+    constexpr float kAccuracy = 0.015f;
+    return (lhs.time_msec == rhs.time_msec &&
+            std::abs(lhs.top - rhs.top) < kAccuracy &&
+            std::abs(lhs.left - rhs.left) < kAccuracy &&
+            std::abs(lhs.right - rhs.right) < kAccuracy &&
+            std::abs(lhs.bottom - rhs.bottom) < kAccuracy);
+  };
+
+  for (int k = 0; k < 15000; k += 33) {
+    TimedBox box;
+    EXPECT_TRUE(box_tracker.GetTimedPosition(0, k, &box));
+
+    // One groundtruth position every 3s, linear in between.
+    const int rect_pos = k / 3000;
+    const int dt = k - rect_pos * 3000;
+    const int next_rect_pos = dt == 0 ? rect_pos : (rect_pos + 1);
+
+    // Blend to get initial position.
+    const double alpha = dt / 3000.0;
+    const Vector2_d gt_pos =
+        (1.0 - alpha) * positions[rect_pos] + alpha * positions[next_rect_pos];
+
+    TimedBox gt_box;
+    gt_box.time_msec = k;
+    gt_box.top = gt_pos.y();
+    gt_box.left = gt_pos.x();
+    gt_box.right = gt_box.left + overlay_sz.x();
+    gt_box.bottom = gt_box.top + overlay_sz.y();
+    EXPECT_TRUE(boxes_equal(gt_box, box));
+  }
+}
+
+}  // namespace
+
+}  // namespace mediapipe
diff --git a/mediapipe/util/tracking/camera_motion.cc b/mediapipe/util/tracking/camera_motion.cc
new file mode 100644
index 000000000..e753be712
--- /dev/null
+++ b/mediapipe/util/tracking/camera_motion.cc
@@ -0,0 +1,408 @@
+// Copyright 2019 The MediaPipe Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "mediapipe/util/tracking/camera_motion.h"
+
+#include <numeric>
+
+#include "absl/strings/str_format.h"
+#include "mediapipe/util/tracking/region_flow.h"
+
+namespace mediapipe {
+
+void CameraMotionToTranslation(const CameraMotion& camera_motion,
+                               TranslationModel* model) {
+  if (camera_motion.has_translation()) {
+    model->CopyFrom(camera_motion.translation());
+  } else {
+    model->Clear();
+  }
+}
+
+void CameraMotionToLinearSimilarity(const CameraMotion& camera_motion,
+                                    LinearSimilarityModel* model) {
+  if (camera_motion.has_linear_similarity()) {
+    model->CopyFrom(camera_motion.linear_similarity());
+  } else {
+    TranslationModel translation;
+    CameraMotionToTranslation(camera_motion, &translation);
+    model->CopyFrom(LinearSimilarityAdapter::Embed(translation));
+  }
+}
+
+void CameraMotionToAffine(const CameraMotion& camera_motion,
+                          AffineModel* model) {
+  if (camera_motion.has_affine()) {
+    model->CopyFrom(camera_motion.affine());
+  } else {
+    LinearSimilarityModel similarity;
+    CameraMotionToLinearSimilarity(camera_motion, &similarity);
+    model->CopyFrom(AffineAdapter::Embed(similarity));
+  }
+}
+
+void CameraMotionToHomography(const CameraMotion& camera_motion,
+                              Homography* model) {
+  if (camera_motion.has_homography()) {
+    model->CopyFrom(camera_motion.homography());
+  } else {
+    AffineModel affine;
+    CameraMotionToAffine(camera_motion, &affine);
+    model->CopyFrom(HomographyAdapter::Embed(affine));
+  }
+}
+
+void CameraMotionToMixtureHomography(const CameraMotion& camera_motion,
+                                     MixtureHomography* model) {
+  if (camera_motion.has_mixture_homography()) {
+    model->CopyFrom(camera_motion.mixture_homography());
+  } else {
+    Homography homography;
+    CameraMotionToHomography(camera_motion, &homography);
+    model->CopyFrom(MixtureHomographyAdapter::Embed(homography, 1));
+  }
+}
+
+CameraMotion ComposeCameraMotion(const CameraMotion& lhs,
+                                 const CameraMotion& rhs) {
+  CHECK_EQ(lhs.frame_width(), rhs.frame_width());
+  CHECK_EQ(lhs.frame_height(), rhs.frame_height());
+
+  CameraMotion result = rhs;
+  if (lhs.has_translation() || rhs.has_translation()) {
+    *result.mutable_translation() =
+        ModelCompose2(lhs.translation(), rhs.translation());
+  }
+
+  if (lhs.has_similarity() || rhs.has_similarity()) {
+    *result.mutable_similarity() =
+        ModelCompose2(lhs.similarity(), rhs.similarity());
+  }
+
+  if (lhs.has_linear_similarity() || rhs.has_linear_similarity()) {
+    *result.mutable_linear_similarity() =
+        ModelCompose2(lhs.linear_similarity(), rhs.linear_similarity());
+  }
+
+  if (lhs.has_affine() || rhs.has_affine()) {
+    *result.mutable_affine() = ModelCompose2(lhs.affine(), rhs.affine());
+  }
+
+  if (lhs.has_homography() || rhs.has_homography()) {
+    *result.mutable_homography() =
+        ModelCompose2(lhs.homography(), rhs.homography());
+  }
+
+  if (rhs.has_mixture_homography()) {
+    if (lhs.has_mixture_homography()) {
+      LOG(ERROR) << "Mixture homographies are not closed under composition, "
+                 << "Only rhs mixtures composed with lhs homographies "
+                 << "are supported.";
+    } else if (lhs.type() <= CameraMotion::UNSTABLE_SIM) {
+      // We only composit base model when stability is sufficient.
+      *result.mutable_mixture_homography() =
+          MixtureHomographyAdapter::ComposeLeft(rhs.mixture_homography(),
+                                                lhs.homography());
+    }
+  } else if (lhs.has_mixture_homography()) {
+    LOG(ERROR) << "Only rhs mixtures supported.";
+  }
+
+  // Select max unstable type.
+  result.set_type(std::max(lhs.type(), rhs.type()));
+  result.set_average_magnitude(lhs.average_magnitude() +
+                               rhs.average_magnitude());
+  result.set_translation_variance(
+      std::max(lhs.translation_variance(), rhs.translation_variance()));
+  result.set_similarity_inlier_ratio(
+      std::min(lhs.similarity_inlier_ratio(), rhs.similarity_inlier_ratio()));
+
+  result.set_similarity_strict_inlier_ratio(
+      std::min(lhs.similarity_strict_inlier_ratio(),
+               rhs.similarity_strict_inlier_ratio()));
+
+  result.set_average_homography_error(
+      std::max(lhs.average_homography_error(), rhs.average_homography_error()));
+  result.set_homography_inlier_coverage(std::min(
+      lhs.homography_inlier_coverage(), rhs.homography_inlier_coverage()));
+  result.set_homography_strict_inlier_coverage(
+      std::min(lhs.homography_strict_inlier_coverage(),
+               rhs.homography_strict_inlier_coverage()));
+
+  // TODO: Overlay stuff.
+
+  result.set_flags(lhs.flags() | rhs.flags());
+  result.set_timestamp_usec(
+      std::max(lhs.timestamp_usec(), rhs.timestamp_usec()));
+  result.set_match_frame(lhs.match_frame() + rhs.match_frame());
+
+  // TODO: Rest.
+  return result;
+}
+
+CameraMotion InvertCameraMotion(const CameraMotion& motion) {
+  CameraMotion inverted = motion;
+  if (motion.has_translation()) {
+    *inverted.mutable_translation() = ModelInvert(motion.translation());
+  }
+
+  if (motion.has_similarity()) {
+    *inverted.mutable_similarity() = ModelInvert(motion.similarity());
+  }
+
+  if (motion.has_linear_similarity()) {
+    *inverted.mutable_linear_similarity() =
+        ModelInvert(motion.linear_similarity());
+  }
+
+  if (motion.has_affine()) {
+    *inverted.mutable_affine() = ModelInvert(motion.affine());
+  }
+
+  if (motion.has_homography()) {
+    *inverted.mutable_homography() = ModelInvert(motion.homography());
+  }
+
+  if (motion.has_mixture_homography()) {
+    LOG(ERROR) << "Mixture homographies are not closed under inversion.";
+  }
+
+  return inverted;
+}
+
+void SubtractCameraMotionFromFeatures(
+    const std::vector<CameraMotion>& camera_motions,
+    std::vector<RegionFlowFeatureList*>* feature_lists) {
+  CHECK(feature_lists != nullptr);
+  CHECK_GE(camera_motions.size(), feature_lists->size());
+  if (feature_lists->empty()) {
+    return;
+  }
+
+  bool use_mixtures = camera_motions[0].has_mixture_homography();
+
+  std::unique_ptr<MixtureRowWeights> row_weights;
+  if (use_mixtures) {
+    row_weights.reset(MixtureRowWeightsFromCameraMotion(
+        camera_motions[0], (*feature_lists)[0]->frame_height()));
+  }
+
+  for (int k = 0; k < feature_lists->size(); ++k) {
+    Homography background_model;
+    MixtureHomography background_model_mixture;
+    if (use_mixtures) {
+      CameraMotionToMixtureHomography(camera_motions[k],
+                                      &background_model_mixture);
+    } else {
+      CameraMotionToHomography(camera_motions[k], &background_model);
+    }
+
+    // Remove motion due to camera motion, leaving only foreground motion.
+    for (auto& feature : *(*feature_lists)[k]->mutable_feature()) {
+      const Vector2_f background_motion =
+          (!use_mixtures ? HomographyAdapter::TransformPoint(
+                               background_model, FeatureLocation(feature))
+                         : MixtureHomographyAdapter::TransformPoint(
+                               background_model_mixture, *row_weights,
+                               FeatureLocation(feature))) -
+          FeatureLocation(feature);
+      const Vector2_f object_motion = FeatureFlow(feature) - background_motion;
+      feature.set_dx(object_motion.x());
+      feature.set_dy(object_motion.y());
+    }
+  }
+}
+
+float ForegroundMotion(const CameraMotion& camera_motion,
+                       const RegionFlowFeatureList& feature_list) {
+  if (camera_motion.has_mixture_homography()) {
+    LOG(WARNING) << "Mixture homographies are present but function is only "
+                 << "using homographies. Truncation error likely.";
+  }
+
+  Homography background_motion;
+  CameraMotionToHomography(camera_motion, &background_motion);
+
+  float foreground_motion = 0;
+  for (auto& feature : feature_list.feature()) {
+    const float error = (FeatureMatchLocation(feature) -
+                         HomographyAdapter::TransformPoint(
+                             background_motion, FeatureLocation(feature)))
+                            .Norm();
+    foreground_motion += error;
+  }
+
+  if (feature_list.feature_size() > 0) {
+    foreground_motion *= 1.0f / feature_list.feature_size();
+  }
+
+  return foreground_motion;
+}
+
+void InitCameraMotionFromFeatureList(const RegionFlowFeatureList& feature_list,
+                                     CameraMotion* camera_motion) {
+  camera_motion->set_blur_score(feature_list.blur_score());
+  camera_motion->set_frac_long_features_rejected(
+      feature_list.frac_long_features_rejected());
+  camera_motion->set_timestamp_usec(feature_list.timestamp_usec());
+  camera_motion->set_match_frame(feature_list.match_frame());
+  camera_motion->set_frame_width(feature_list.frame_width());
+  camera_motion->set_frame_height(feature_list.frame_height());
+}
+
+std::string CameraMotionFlagToString(const CameraMotion& camera_motion) {
+  std::string text;
+  if (camera_motion.flags() & CameraMotion::FLAG_SHOT_BOUNDARY) {
+    text += "SHOT_BOUNDARY|";
+  }
+
+  if (camera_motion.flags() & CameraMotion::FLAG_BLURRY_FRAME) {
+    text += absl::StrFormat("BLURRY_FRAME %.2f|", camera_motion.bluriness());
+  }
+
+  if (camera_motion.flags() & CameraMotion::FLAG_MAJOR_OVERLAY) {
+    text += "MAJOR_OVERLAY|";
+  }
+
+  if (camera_motion.flags() & CameraMotion::FLAG_SHARP_FRAME) {
+    text += "SHARP_FRAME|";
+  }
+
+  if (camera_motion.flags() & CameraMotion::FLAG_SHOT_FADE) {
+    text += "SHOT_FADE|";
+  }
+
+  if (camera_motion.flags() & CameraMotion::FLAG_DUPLICATED) {
+    text += "DUPLICATED|";
+  }
+  return text;
+}
+
+std::string CameraMotionTypeToString(const CameraMotion& motion) {
+  switch (motion.type()) {
+    case CameraMotion::VALID:
+      return "VALID";
+    case CameraMotion::UNSTABLE_HOMOG:
+      return "UNSTABLE_HOMOG";
+    case CameraMotion::UNSTABLE_SIM:
+      return "UNSTABLE_SIM";
+    case CameraMotion::UNSTABLE:
+      return "UNSTABLE";
+    case CameraMotion::INVALID:
+      return "INVALID";
+  }
+
+  return "NEVER HAPPENS WITH CLANG";
+}
+
+float InlierCoverage(const CameraMotion& camera_motion,
+                     bool use_homography_coverage) {
+  const int num_block_coverages = camera_motion.mixture_inlier_coverage_size();
+  if (num_block_coverages == 0 || use_homography_coverage) {
+    if (camera_motion.has_homography()) {
+      return camera_motion.homography_inlier_coverage();
+    } else {
+      return 1.0f;
+    }
+  } else {
+    return std::accumulate(camera_motion.mixture_inlier_coverage().begin(),
+                           camera_motion.mixture_inlier_coverage().end(),
+                           0.0f) *
+           (1.0f / num_block_coverages);
+  }
+}
+
+template <class CameraMotionContainer>
+CameraMotion FirstCameraMotionForLooping(
+    const CameraMotionContainer& camera_motions) {
+  if (camera_motions.size() < 2) {
+    LOG(ERROR) << "Not enough camera motions for refinement.";
+    return CameraMotion();
+  }
+
+  CameraMotion loop_motion = camera_motions[0];
+
+  // Only update motions present in first camera motion.
+  bool has_translation = loop_motion.has_translation();
+  bool has_similarity = loop_motion.has_linear_similarity();
+  bool has_homography = loop_motion.has_homography();
+
+  TranslationModel translation;
+  LinearSimilarityModel similarity;
+  Homography homography;
+
+  for (int i = 1; i < camera_motions.size(); ++i) {
+    const CameraMotion& motion = camera_motions[i];
+    if (motion.has_mixture_homography()) {
+      // TODO: Implement
+      LOG(WARNING) << "This function does not validly apply mixtures; "
+                   << "which are currently not closed under composition. ";
+    }
+
+    switch (motion.type()) {
+      case CameraMotion::INVALID:
+        has_translation = false;
+        has_similarity = false;
+        has_homography = false;
+        break;
+      case CameraMotion::UNSTABLE:
+        has_similarity = false;
+        has_homography = false;
+        break;
+      case CameraMotion::UNSTABLE_SIM:
+        has_homography = false;
+        break;
+      case CameraMotion::VALID:
+      case CameraMotion::UNSTABLE_HOMOG:
+        break;
+      default:
+        LOG(FATAL) << "Unknown CameraMotion::type.";
+    }
+
+    // Only accumulate motions which are valid for the entire chain, otherwise
+    // keep the pre-initialized motions.
+    has_translation =
+        has_translation && motion.has_translation() &&
+        AccumulateInvertedModel(motion.translation(), &translation);
+    has_similarity =
+        has_similarity && motion.has_linear_similarity() &&
+        AccumulateInvertedModel(motion.linear_similarity(), &similarity);
+    has_homography = has_homography && motion.has_homography() &&
+                     AccumulateInvertedModel(motion.homography(), &homography);
+  }
+
+  if (has_translation) {
+    *loop_motion.mutable_translation() = translation;
+  }
+  if (has_similarity && IsInverseStable(similarity)) {
+    *loop_motion.mutable_linear_similarity() = similarity;
+  }
+  if (has_homography && IsInverseStable(homography)) {
+    *loop_motion.mutable_homography() = homography;
+  }
+
+  VLOG(1) << "Looping camera motion refinement for "
+          << " translation:" << (has_translation ? "successful" : "failed")
+          << " similarity:" << (has_similarity ? "successful" : "failed")
+          << " homography:" << (has_homography ? "successful" : "failed");
+
+  return loop_motion;
+}
+
+// Explicit instantiation.
+template CameraMotion FirstCameraMotionForLooping<std::vector<CameraMotion>>(
+    const std::vector<CameraMotion>&);
+template CameraMotion FirstCameraMotionForLooping<std::deque<CameraMotion>>(
+    const std::deque<CameraMotion>&);
+}  // namespace mediapipe
diff --git a/mediapipe/util/tracking/camera_motion.h b/mediapipe/util/tracking/camera_motion.h
new file mode 100644
index 000000000..98b28abbf
--- /dev/null
+++ b/mediapipe/util/tracking/camera_motion.h
@@ -0,0 +1,336 @@
+// Copyright 2019 The MediaPipe Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef MEDIAPIPE_UTIL_TRACKING_CAMERA_MOTION_H_
+#define MEDIAPIPE_UTIL_TRACKING_CAMERA_MOTION_H_
+
+#include <vector>
+
+#include "mediapipe/util/tracking/camera_motion.pb.h"
+#include "mediapipe/util/tracking/motion_models.h"
+#include "mediapipe/util/tracking/region_flow.pb.h"
+
+namespace mediapipe {
+
+// Helper functions to extract specific models from CameraMotion.
+// Returned is always the requested model. In case, model is not present (i.e.
+// has_<motion model> fails), the highest degree of freedom model
+// (lower or equal to the requested model) that is present is embedded in the
+// requested model.
+// Presence of the model depends on wich models were requesteded to be
+// estimated (via MotionEstimationOptions, to initialize requested models to
+// identity, use ResetMotionModels above). For example, assume linear similarity
+// was not requested to be estimated, but affine was requested. If
+// CameraMotionToLinearSimilarity is called, has_linear_similarity would be
+// false and the function fall back returning a translation model.
+void CameraMotionToTranslation(const CameraMotion& camera_motion,
+                               TranslationModel* model);
+void CameraMotionToLinearSimilarity(const CameraMotion& camera_motion,
+                                    LinearSimilarityModel* model);
+void CameraMotionToAffine(const CameraMotion& camera_motion,
+                          AffineModel* model);
+void CameraMotionToHomography(const CameraMotion& camera_motion,
+                              Homography* homography);
+void CameraMotionToMixtureHomography(const CameraMotion& camera_motion,
+                                     MixtureHomography* mixture);
+
+// TODO: Under development ...
+// Returns camera motion lhs * rhs. Initial camera motion is set to rhs
+// before composition.
+CameraMotion ComposeCameraMotion(const CameraMotion& lhs,
+                                 const CameraMotion& rhs);
+
+// Inverts every motion model that is set in CameraMotion.
+CameraMotion InvertCameraMotion(const CameraMotion& motion);
+
+// Templated wrapper for above calls.
+template <class Model>
+Model CameraMotionToModel(const CameraMotion& camera_motion);
+
+// Returns model from passed CameraMotion specified by unstable_type
+// (which must name a type != VALID, CHECK-ed) and embeds it in the specified
+// Model.
+template <class Model>
+Model UnstableCameraMotionToModel(const CameraMotion& camera_motion,
+                                  CameraMotion::Type unstable_type);
+
+// Projects passed model to lower degree of freedom model (embedded in original
+// type), as specified type. In case type is valid, function is effectively
+// identity function.
+// Only implemented for the following models:
+// - Translation
+// - LinearSimilarity
+// - AffineModel
+template <class Model>
+Model ProjectToTypeModel(const Model& model, float frame_width,
+                         float frame_height, CameraMotion::Type type);
+
+// Substract camera motion (specifically highest, degree of freedom model,
+// that has been estimated reliably) from feature lists. Operates on vectors
+// for improved performance. Size of camera_motions can be larger than
+// feature_lists, in this case last camera motions are ignored.
+void SubtractCameraMotionFromFeatures(
+    const std::vector<CameraMotion>& camera_motions,
+    std::vector<RegionFlowFeatureList*>* feature_lists);
+
+// Returns average motion magnitude after subtracting camera motion.
+float ForegroundMotion(const CameraMotion& camera_motion,
+                       const RegionFlowFeatureList& feature_list);
+
+// Initializes a CameraMotion with its corresponding fields from a
+// RegionFlowFeatureList.
+void InitCameraMotionFromFeatureList(const RegionFlowFeatureList& feature_list,
+                                     CameraMotion* camera_motion);
+
+// Converts Camera motion flag to std::string.
+std::string CameraMotionFlagToString(const CameraMotion& motion);
+
+// Converts Camera motion type to std::string. Used instead of builtin proto
+// function for mobile support.
+std::string CameraMotionTypeToString(const CameraMotion& motion);
+
+// Returns inlier coverage either based on mixture (if present, in this case
+// return mean of block coverages) or else homography.
+// If neither is present, returns 0 to signal insufficient inliers.
+// If use_homography_coverage is set, uses homography even when mixture is
+// present.
+float InlierCoverage(const CameraMotion& camera_motion,
+                     bool use_homography_coverage);
+
+// Downsamples passed motion models temporally by specified downsample_scale,
+// i.e. for models F_0, F_1, F_2, F_3, F_4 and downsample_scale of 2, models:
+// F_0 * F_1, F_2 * F_3 and F_4 are returned.
+// Optionally also performs downsampling of corresponding model_type returning
+// the least unstable for each composition.
+template <class Model>
+void DownsampleMotionModels(
+    const std::vector<Model>& models,
+    const std::vector<CameraMotion::Type>* model_type,  // optional.
+    int downsample_scale, std::vector<Model>* downsampled_models,
+    std::vector<CameraMotion::Type>* downsampled_types);
+
+// Compatible subsampling method to above DownsampleMotionModels.
+// Note, when downsampling for example:
+// F_0, F_1, F_2, F_3, F_4  by factor 3 via above function, downsampled result
+// will be F_0 * F_1 * F_2, F_3 * F_4
+// so we would need to pick entities at F_2 and F_4.
+// Template class Container must be SequenceContainer, like
+// std::vector, std::deque.
+template <class Container>
+void SubsampleEntities(const Container& input, int downsample_scale,
+                       Container* output);
+
+// For perfect looping, this function computes the motion in the first frame
+// to be the inverse of the accumulated motion from frame 1 to N.
+// If a particular motion type is not available or not invertible at any
+// frame pair, the original motion for that type is retained.
+// Does not work if mixtures are present.
+template <class CameraMotionContainer>  // STL container of CameraMotion's
+CameraMotion FirstCameraMotionForLooping(
+    const CameraMotionContainer& container);
+
+// Template implementation functions.
+
+template <class Model>
+Model UnstableCameraMotionToModel(const CameraMotion& camera_motion,
+                                  CameraMotion::Type unstable_type) {
+  switch (unstable_type) {
+    case CameraMotion::INVALID:
+      return Model();  // Identity.
+
+    case CameraMotion::UNSTABLE: {
+      return ModelAdapter<Model>::Embed(
+          CameraMotionToModel<TranslationModel>(camera_motion));
+    }
+
+    case CameraMotion::UNSTABLE_SIM: {
+      return ModelAdapter<Model>::Embed(
+          CameraMotionToModel<LinearSimilarityModel>(camera_motion));
+    }
+
+    case CameraMotion::UNSTABLE_HOMOG: {
+      return ModelAdapter<Model>::Embed(
+          CameraMotionToModel<Homography>(camera_motion));
+    }
+
+    case CameraMotion::VALID:
+      LOG(FATAL) << "Specify a type != VALID";
+      return Model();
+  }
+}
+
+template <>
+inline TranslationModel ProjectToTypeModel(const TranslationModel& model,
+                                           float frame_width,
+                                           float frame_height,
+                                           CameraMotion::Type type) {
+  switch (type) {
+    case CameraMotion::INVALID:
+      return TranslationModel();  // Identity.
+    default:
+      return model;
+  }
+}
+
+template <>
+inline LinearSimilarityModel ProjectToTypeModel(
+    const LinearSimilarityModel& model, float frame_width, float frame_height,
+    CameraMotion::Type type) {
+  switch (type) {
+    case CameraMotion::INVALID:
+      return LinearSimilarityModel();  // Identity.
+
+    case CameraMotion::UNSTABLE:
+      return LinearSimilarityAdapter::Embed(
+          TranslationAdapter::ProjectFrom(model, frame_width, frame_height));
+
+    default:
+      return model;
+  }
+}
+
+template <class Model>
+Model ProjectToTypeModel(const Model& model, float frame_width,
+                         float frame_height, CameraMotion::Type type) {
+  switch (type) {
+    case CameraMotion::INVALID:
+      return Model();  // Identity.
+
+    case CameraMotion::UNSTABLE:
+      return ModelAdapter<Model>::Embed(
+          TranslationAdapter::ProjectFrom(model, frame_width, frame_height));
+
+    case CameraMotion::UNSTABLE_SIM:
+      return ModelAdapter<Model>::Embed(LinearSimilarityAdapter::ProjectFrom(
+          model, frame_width, frame_height));
+
+      // case UNSTABLE_HOMOG does not occur except for mixtures.
+
+    default:
+      return model;
+  }
+}
+
+template <>
+inline MixtureHomography ProjectToTypeModel(const MixtureHomography&, float,
+                                            float, CameraMotion::Type) {
+  LOG(FATAL) << "Projection not supported for mixtures.";
+  return MixtureHomography();
+}
+
+template <class Model>
+void DownsampleMotionModels(
+    const std::vector<Model>& models,
+    const std::vector<CameraMotion::Type>* model_type, int downsample_scale,
+    std::vector<Model>* downsampled_models,
+    std::vector<CameraMotion::Type>* downsampled_types) {
+  if (model_type) {
+    CHECK_EQ(models.size(), model_type->size());
+    CHECK(downsampled_models) << "Expecting output models.";
+  }
+
+  CHECK(downsampled_models);
+  downsampled_models->clear();
+  if (downsampled_types) {
+    downsampled_types->clear();
+  }
+
+  const int num_models = models.size();
+
+  for (int model_idx = 0; model_idx < num_models;
+       model_idx += downsample_scale) {
+    const int last_idx =
+        std::min<int>(model_idx + downsample_scale, num_models) - 1;
+
+    CameraMotion::Type sampled_type = CameraMotion::VALID;
+    if (model_type) {
+      // Get least stable model within downsample window (max operation).
+      for (int i = model_idx; i <= last_idx; ++i) {
+        sampled_type = std::max(sampled_type, model_type->at(i));
+      }
+      downsampled_types->push_back(sampled_type);
+    }
+
+    // Concatenate models.
+    Model composed = models[last_idx];
+
+    for (int i = last_idx - 1; i >= model_idx; --i) {
+      composed = ModelCompose2(models[i], composed);
+    }
+
+    downsampled_models->push_back(composed);
+  }
+}
+
+template <class Container>
+void SubsampleEntities(const Container& input, int downsample_factor,
+                       Container* output) {
+  CHECK(output);
+  output->clear();
+
+  if (input.empty()) {
+    return;
+  }
+
+  for (int k = downsample_factor - 1; k < input.size();
+       k += downsample_factor) {
+    output->push_back(input[k]);
+  }
+
+  if (input.size() % downsample_factor != 0) {
+    // We need to add last constraint as termination.
+    output->push_back(input.back());
+  }
+}
+
+template <>
+inline TranslationModel CameraMotionToModel(const CameraMotion& camera_motion) {
+  TranslationModel model;
+  CameraMotionToTranslation(camera_motion, &model);
+  return model;
+}
+
+template <>
+inline LinearSimilarityModel CameraMotionToModel(
+    const CameraMotion& camera_motion) {
+  LinearSimilarityModel model;
+  CameraMotionToLinearSimilarity(camera_motion, &model);
+  return model;
+}
+
+template <>
+inline AffineModel CameraMotionToModel(const CameraMotion& camera_motion) {
+  AffineModel model;
+  CameraMotionToAffine(camera_motion, &model);
+  return model;
+}
+
+template <>
+inline Homography CameraMotionToModel(const CameraMotion& camera_motion) {
+  Homography model;
+  CameraMotionToHomography(camera_motion, &model);
+  return model;
+}
+
+template <>
+inline MixtureHomography CameraMotionToModel(
+    const CameraMotion& camera_motion) {
+  MixtureHomography model;
+  CameraMotionToMixtureHomography(camera_motion, &model);
+  return model;
+}
+
+}  // namespace mediapipe
+
+#endif  // MEDIAPIPE_UTIL_TRACKING_CAMERA_MOTION_H_
diff --git a/mediapipe/util/tracking/camera_motion.proto b/mediapipe/util/tracking/camera_motion.proto
new file mode 100644
index 000000000..9e19f4ad8
--- /dev/null
+++ b/mediapipe/util/tracking/camera_motion.proto
@@ -0,0 +1,198 @@
+// Copyright 2019 The MediaPipe Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Describes camera motion between two frames with various degree of freedom
+// parametric motion models.
+// In addition, stores features describing how reliable the estimated motion
+// model is.
+// Flags indicate several properties derived from the camera motion, e.g. if a
+// frame is sharp, blurry or contains overlays.
+
+syntax = "proto2";
+
+package mediapipe;
+
+import "mediapipe/util/tracking/motion_models.proto";
+
+// Next tag: 33
+message CameraMotion {
+  // Background motion expressed in various models.
+  // These are per-frame pair motions (from current to previous frame).
+  // Models are expressed in the un-normalized domain frame_width x frame_height
+  // that is passed to MotionEstimation (storred below).
+  optional TranslationModel translation = 1;
+  optional SimilarityModel similarity = 2;
+  optional LinearSimilarityModel linear_similarity = 3;
+  optional AffineModel affine = 4;
+  optional Homography homography = 5;
+
+  optional MixtureHomography mixture_homography = 8;
+
+  // Frame dimensions camera motion was computed over.
+  optional float frame_width = 31;
+  optional float frame_height = 32;
+
+  // Mixture homographies computed w.r.t. exponentially increasing
+  // regularizers. Above mixture_homography member is selected from spectrum
+  // based on amount of rolling shutter present in the video.
+  repeated MixtureHomography mixture_homography_spectrum = 23;
+
+  // Relative row sigma w.r.t. frame_height for mixture models.
+  optional float mixture_row_sigma = 10;
+
+  // Average of all motion vector magnitudes (without accounting for any motion
+  // model), within 10th to 90th percentile (to remove outliers).
+  optional float average_magnitude = 24 [default = 0.0];
+
+  // Inlier-weighted variance of the translation model.
+  // Specified, w.r.t. unnormalized video domain that motion models
+  // are computed for.
+  optional float translation_variance = 25 [default = 0.0];
+
+  // Ratio of inliers w.r.t. regular and stricter thresholds. In [0, 1].
+  optional float similarity_inlier_ratio = 29 [default = 0];
+  optional float similarity_strict_inlier_ratio = 30 [default = 0];
+
+  // Average registration error of homography in pixels.
+  // Note: These two parameters default to zero in-case homographies have not
+  // been estimated.
+  optional float average_homography_error = 11;
+
+  // Fraction, in [0,1], of homography inliers.
+  optional float homography_inlier_coverage = 12;
+
+  // Same as above but with stricter threshold.
+  // (For details, see: MotionEstimationOptions::strict_coverage_scale).
+  // Coverage is designed to measure the amount of significant outliers,
+  // which can affect the validity of the estimated homography.
+  // However, it does not discount small outliers, which occur in case
+  // of small rolling shutter wobbles. For this a stricter version of coverage
+  // is needed, which is essential for computing the rolling_shutter_guess,
+  // i.e. the increase in coverage by using mixtures vs. homographies.
+  optional float homography_strict_inlier_coverage = 22;
+
+  // Per-block inlier fraction for mixtures.
+  repeated float mixture_inlier_coverage = 13;
+
+  // Set based on stability analysis indicating if frame is likely to originate
+  // from a rolling shutter camera. (-1 is used to indicate frame was not
+  // tested, e.g. due to mixture deemed unstable for analysis).
+  // Guess is a scaler indicating by how much the mixture models (suitable for
+  // rolling shutter distortions) increased inlier coverage compared to a single
+  // homography. For example a value, of 1.3 indicates, that the mixture models
+  // increased inlier coverage by 30%.
+  // If not -1, range is in [0, inf] (values slightly smaller than 1 are
+  // possible due to suppression of noisy feature tracks during estimation).
+  optional float rolling_shutter_guess = 14;
+
+  // Indicating if CameraMotion is deemed to originate from rolling
+  // shutter camera (index >= 0), and if so, denotes the index in the
+  // mixture_homography_spectrum, where higher indices correspond to heavier
+  // regularized motions. If motion is not deemed to originate from a rolling
+  // shutter camera, index is set to -1.
+  optional int32 rolling_shutter_motion_index = 16 [default = -1];
+
+  // List of overlay indices (cell locations in column major format) over domain
+  // of size overlay_domain x overlay_domain, where
+  // overlay_domain is set by MotionEstimation to
+  // MotionEstimationOptions::OverlayDetectionOptions::analysis_mask_size.
+  // Overlay analysis is performed over chunk of frames, as specified by
+  // MotionEstimationOptions::overlay_analysis_chunk_size, with the resulting
+  // overlay indices being assigned to each frame of the chunk.
+  // Consequently it suffices to store the result only for the first frame
+  // of every chunk. Subsequent frames store a single negative index relative
+  // to the first chunk frame indicating where to locate the overlay indicies.
+  // Specifically if for frame f, overlay_indices(0) == -2, overlay indices for
+  // corresponding chunk can be found at frame f - 2.
+  // For details about how overlay indices are used to flag a frame to contain
+  // an overlay, see MotionFilterOptions::OverlayOptions.
+  repeated int32 overlay_indices = 17;
+  optional int32 overlay_domain = 18 [default = 10];
+
+  // CameraMotion type indicates whether highest degree of freedom (DOF)
+  // model estimation was deemed stable, in which case CameraMotion::Type is set
+  // to VALID.
+  // If a model was deemed not stable (according to *StabilityBounds in
+  // MotionEstimationOptions), it is set to the lower dof type which was deemed
+  // stable.
+  enum Type {
+    VALID = 0;           // All requested motion models estimated reliably.
+    UNSTABLE_HOMOG = 1;  // Fallback to homographies, mixture unreliable.
+    UNSTABLE_SIM = 2;    // Fallback to similarity model, homography
+                         // unreliable.
+    UNSTABLE = 3;        // Fallback to translation model, similarity
+                         // unreliable, legacy naming.
+    INVALID = 4;         // Identity model, translation unreliable.
+  }
+
+  optional Type type = 6 [default = VALID];
+
+  // If set, stores original type in case it was overriden (by filtering
+  // functions, etc.).
+  optional Type overridden_type = 15 [default = VALID];
+
+  // Set of optional *bit* flags set for various purposes.
+  enum Flags {
+    FLAG_SHOT_BOUNDARY = 1;  // Set to indicate presence of a
+                             // shot boundary.
+    FLAG_BLURRY_FRAME = 2;
+    FLAG_MAJOR_OVERLAY = 4;
+    FLAG_SHARP_FRAME = 8;           // Set if frame is considered sharp
+                                    // in a neighborhood of frames.
+    FLAG_SINGULAR_ESTIMATION = 16;  // Indicates that estimation resulted
+                                    // in singular optimization problem.
+                                    // Used internally by MotionEstimation.
+    // Indicates if shot boundary is part of a fade. If so, all frames of the
+    // fade will be labeled with the FLAG but only the begin and end of the fade
+    // will have the FLAG_SHOT_BOUNDARY set.
+    FLAG_SHOT_FADE = 32;
+
+    FLAG_DUPLICATED = 64;     // Set if frame is exact duplicate of
+                              // previous frame.
+    FLAG_CENTER_FRAME = 128;  // Indicates this frame is at the
+                              // center of the sequence. Currently
+                              // used to constrain stabilizing crop
+                              // transform.
+  }
+
+  optional int32 flags = 19 [default = 0];
+
+  // Same as in RegionFlowFeatureList (from region_flow.proto), measures blur
+  // as average cornerness over textured areas. As it depends on the image
+  // content, should only be used relative.
+  optional float blur_score = 20;
+
+  // Quanitifies amount of blur. Specified as ratio w.r.t. sharpest matching
+  // frame, i.e. 1 indicates no blur, values > 1 amount of blur w.r.t. sharpest
+  // frame.
+  optional float bluriness = 21 [default = 0.0];
+
+  // Same as in RegionFlowFeatureList (from region_flow.proto). Stores fraction
+  // of long feature tracks that got rejected for this frame.
+  optional float frac_long_features_rejected = 26;
+
+  // Same as in RegionFlowFeatureList (from region_flow.proto).
+  // Timestamp in micro seconds of the underlying frame.
+  optional int64 timestamp_usec = 27 [default = 0];
+
+  // Same as in RegionFlowFeatureList (from region_flow.proto).
+  // Denotes frame that motion was computed w.r.t. to, locally to the current
+  // frame. Values < 0 indicate backward tracking, while values > 0 indicate
+  // forward tracking. For example, match_frame = -1, indicates tracking is
+  // from current to previous frame.
+  optional int32 match_frame = 28 [default = 0];
+
+  // Deprecated fields.
+  extensions 9;
+}
diff --git a/mediapipe/util/tracking/flow_packager.cc b/mediapipe/util/tracking/flow_packager.cc
new file mode 100644
index 000000000..3a570d85b
--- /dev/null
+++ b/mediapipe/util/tracking/flow_packager.cc
@@ -0,0 +1,1023 @@
+// Copyright 2019 The MediaPipe Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "mediapipe/util/tracking/flow_packager.h"
+
+#include <math.h>
+
+#include <algorithm>
+#include <cmath>
+#include <memory>
+
+#include "absl/strings/str_cat.h"
+#include "absl/strings/string_view.h"
+#include "mediapipe/framework/port/logging.h"
+#include "mediapipe/framework/port/vector.h"
+#include "mediapipe/util/tracking/camera_motion.h"
+#include "mediapipe/util/tracking/camera_motion.pb.h"
+#include "mediapipe/util/tracking/motion_estimation.h"
+#include "mediapipe/util/tracking/motion_models.h"
+#include "mediapipe/util/tracking/motion_models.pb.h"
+#include "mediapipe/util/tracking/region_flow.pb.h"
+
+namespace mediapipe {
+
+FlowPackager::FlowPackager(const FlowPackagerOptions& options)
+    : options_(options) {
+  if (options_.binary_tracking_data_support()) {
+    CHECK_LE(options.domain_width(), 256);
+    CHECK_LE(options.domain_height(), 256);
+  }
+}
+
+namespace {
+
+// Performs rounding of float vector position to int.
+class FeatureIntegerPosition {
+ public:
+  // Scales a feature's location in x and y by scale_x and scale_y respectively.
+  // Limits feature position to the integer domain
+  // [0, width - 1] x [0, height - 1]
+  FeatureIntegerPosition(float scale_x, float scale_y, int width, int height)
+      : scale_x_(scale_x), scale_y_(scale_y), width_(width), height_(height) {}
+
+  Vector2_i ToIntPosition(const RegionFlowFeature& feature) const {
+    return Vector2_i(
+        std::max(0, std::min<int>(width_ - 1, feature.x() * scale_x_ + 0.5f)),
+        std::max(0, std::min<int>(height_ - 1, feature.y() * scale_y_ + 0.5f)));
+  }
+
+ private:
+  float scale_x_;
+  float scale_y_;
+  int width_;
+  int height_;
+};
+
+// Lexicographic compare (first in x, then in y) under scaled integer rounding
+// as specified by FeatureIntegerPosition.
+class IntegerColumnComparator {
+ public:
+  IntegerColumnComparator(float scale_x, float scale_y, int width, int height)
+      : integer_pos_(scale_x, scale_y, width, height) {}
+
+  bool operator()(const RegionFlowFeature& lhs,
+                  const RegionFlowFeature& rhs) const {
+    const Vector2_i vec_lhs = integer_pos_.ToIntPosition(lhs);
+    const Vector2_i vec_rhs = integer_pos_.ToIntPosition(rhs);
+    return (vec_lhs.x() < vec_rhs.x()) ||
+           (vec_lhs.x() == vec_rhs.x() && vec_lhs.y() < vec_rhs.y());
+  }
+
+ private:
+  const FeatureIntegerPosition integer_pos_;
+};
+
+template <typename T>
+inline std::string EncodeToString(const T& value) {
+  std::string s(sizeof(T), 0);
+  memcpy(&s[0], &value, sizeof(T));
+  return s;
+}
+
+template <typename T>
+inline std::string EncodeVectorToString(const std::vector<T>& vec) {
+  std::string s(vec.size() * sizeof(T), 0);
+  typename std::vector<T>::const_iterator iter;
+  char* ptr;
+  for (iter = vec.begin(), ptr = &s[0]; iter != vec.end();
+       ++iter, ptr += sizeof(T)) {
+    memcpy(ptr, &(*iter), sizeof(T));
+  }
+  return s;
+}
+
+template <typename T>
+inline bool DecodeFromStringView(absl::string_view str, T* result) {
+  CHECK(result != nullptr);
+  if (sizeof(*result) != str.size()) {
+    return false;
+  }
+  memcpy(result, str.data(), sizeof(T));
+  return true;
+}
+
+template <typename T>
+inline bool DecodeVectorFromStringView(absl::string_view str,
+                                       std::vector<T>* result) {
+  CHECK(result != nullptr);
+  if (str.size() % sizeof(T) != 0) return false;
+  result->clear();
+  result->reserve(str.size() / sizeof(T));
+  T value;
+  const char* begin = str.data();
+  const char* end = str.data() + str.size();
+  for (const char* ptr = begin; ptr != end; ptr += sizeof(T)) {
+    memcpy(&value, ptr, sizeof(T));
+    result->push_back(value);
+  }
+  return true;
+}
+}  // namespace.
+
+void FlowPackager::PackFlow(const RegionFlowFeatureList& feature_list,
+                            const CameraMotion* camera_motion,
+                            TrackingData* tracking_data) const {
+  CHECK(tracking_data);
+  CHECK_GT(feature_list.frame_width(), 0);
+  CHECK_GT(feature_list.frame_height(), 0);
+
+  // Scale flow to output domain.
+  const float dim_x_scale =
+      options_.domain_width() * (1.0f / feature_list.frame_width());
+  const float dim_y_scale =
+      options_.domain_height() * (1.0f / feature_list.frame_height());
+
+  const bool long_tracks = feature_list.long_tracks();
+
+  // Sort features lexicographically.
+  RegionFlowFeatureList sorted_feature_list(feature_list);
+
+  SortRegionFlowFeatureList(dim_x_scale, dim_y_scale, &sorted_feature_list);
+
+  tracking_data->set_domain_width(options_.domain_width());
+  tracking_data->set_domain_height(options_.domain_height());
+  tracking_data->set_frame_aspect(feature_list.frame_width() * 1.0f /
+                                  feature_list.frame_height());
+  tracking_data->set_global_feature_count(feature_list.feature_size());
+  int flags = 0;
+
+  if (camera_motion == nullptr ||
+      camera_motion->type() > CameraMotion::UNSTABLE_SIM) {
+    flags |= TrackingData::FLAG_BACKGROUND_UNSTABLE;
+  } else {
+    Homography transform;
+    CameraMotionToHomography(*camera_motion, &transform);
+    Homography normalization = HomographyAdapter::Embed(
+        AffineAdapter::FromArgs(0, 0, dim_x_scale, 0, 0, dim_y_scale));
+    Homography inv_normalization =
+        HomographyAdapter::Embed(AffineAdapter::FromArgs(
+            0, 0, 1.0f / dim_x_scale, 0, 0, 1.0f / dim_y_scale));
+    *tracking_data->mutable_background_model() =
+        ModelCompose3(normalization, transform, inv_normalization);
+  }
+
+  if (camera_motion != nullptr) {
+    tracking_data->set_average_motion_magnitude(
+        camera_motion->average_magnitude());
+  }
+
+  if (feature_list.is_duplicated()) {
+    flags |= TrackingData::FLAG_DUPLICATED;
+  }
+  tracking_data->set_frame_flags(flags);
+
+  const int num_vectors = sorted_feature_list.feature_size();
+  TrackingData::MotionData* data = tracking_data->mutable_motion_data();
+  data->set_num_elements(num_vectors);
+
+  // Initialize col starts with "unseen" marker.
+  std::vector<float> col_start(options_.domain_width() + 1, -1);
+
+  int last_col = -1;
+  int last_row = -1;
+  FeatureIntegerPosition integer_pos(dim_x_scale, dim_y_scale,
+                                     options_.domain_width(),
+                                     options_.domain_height());
+
+  // Store feature and corresponding motion (minus camera motion) in
+  // compressed sparse column format:
+  // https://en.wikipedia.org/wiki/Sparse_matrix#Compressed_sparse_column_.28CSC_or_CCS.29
+  for (const auto& feature : sorted_feature_list.feature()) {
+    float flow_x = feature.dx() * dim_x_scale;
+    float flow_y = feature.dy() * dim_y_scale;
+    Vector2_i loc = integer_pos.ToIntPosition(feature);
+
+    // Convert back to float for accurate background model computation.
+    Vector2_f loc_f = Vector2_f::Cast(loc);
+
+    if (camera_motion) {
+      Vector2_f residual = HomographyAdapter::TransformPoint(
+                               tracking_data->background_model(), loc_f) -
+                           loc_f;
+      flow_x -= residual.x();
+      flow_y -= residual.y();
+    }
+
+    data->add_vector_data(flow_x);
+    data->add_vector_data(flow_y);
+    data->add_row_indices(loc.y());
+
+    if (feature.has_binary_feature_descriptor()) {
+      data->add_feature_descriptors()->set_data(
+          feature.binary_feature_descriptor().data());
+    }
+
+    if (long_tracks) {
+      data->add_track_id(feature.track_id());
+    }
+
+    const int curr_col = loc.x();
+
+    if (curr_col != last_col) {
+      CHECK_LT(last_col, curr_col);
+      CHECK_EQ(-1, col_start[curr_col]);
+      col_start[curr_col] = data->row_indices_size() - 1;
+      last_col = curr_col;
+    } else {
+      CHECK_LE(last_row, loc.y());
+    }
+    last_row = loc.y();
+  }
+
+  col_start[0] = 0;
+  col_start[options_.domain_width()] = num_vectors;
+
+  // Fill unset values with previously set value. Propagate end value.
+  for (int i = options_.domain_width() - 1; i > 0; --i) {
+    if (col_start[i] < 0) {
+      DCHECK_GE(col_start[i + 1], 0);
+      col_start[i] = col_start[i + 1];
+    }
+  }
+
+  for (const auto& col_idx : col_start) {
+    data->add_col_starts(col_idx);
+  }
+
+  // Check monotonicity of the row indices.
+  for (int c = 0; c < options_.domain_width(); ++c) {
+    const int r_start = data->col_starts(c);
+    const int r_end = data->col_starts(c + 1);
+    for (int r = r_start; r < r_end - 1; ++r) {
+      CHECK_LE(data->row_indices(r), data->row_indices(r + 1));
+    }
+  }
+
+  CHECK_EQ(data->vector_data_size(), 2 * data->row_indices_size());
+}
+
+void FlowPackager::EncodeTrackingData(const TrackingData& tracking_data,
+                                      BinaryTrackingData* binary_data) const {
+  CHECK(options_.binary_tracking_data_support());
+  CHECK(binary_data != nullptr);
+
+  int32 frame_flags = 0;
+  const bool high_profile = options_.use_high_profile();
+  if (high_profile) {
+    frame_flags |= TrackingData::FLAG_PROFILE_HIGH;
+  } else {
+    frame_flags |= TrackingData::FLAG_PROFILE_BASELINE;  // No op.
+  }
+
+  if (options_.high_fidelity_16bit_encode()) {
+    frame_flags |= TrackingData::FLAG_HIGH_FIDELITY_VECTORS;
+  }
+
+  // Copy background flag.
+  frame_flags |=
+      tracking_data.frame_flags() & TrackingData::FLAG_BACKGROUND_UNSTABLE;
+
+  const TrackingData::MotionData& motion_data = tracking_data.motion_data();
+  int32 num_vectors = motion_data.num_elements();
+
+  // Compute maximum vector or delta vector value.
+  float max_vector_value = 0;
+  if (high_profile) {
+    for (int k = 2; k < 2 * num_vectors; ++k) {
+      max_vector_value = std::max<float>(
+          max_vector_value,
+          fabs(motion_data.vector_data(k) - motion_data.vector_data(k - 2)) *
+              1.02f);  // Expand by 2% to account for
+                       // rounding issues.
+    }
+  } else {
+    for (const float vector_value : motion_data.vector_data()) {
+      max_vector_value = std::max<float>(max_vector_value, fabs(vector_value));
+    }
+  }
+
+  const int32 domain_width = tracking_data.domain_width();
+  const int32 domain_height = tracking_data.domain_height();
+  CHECK_LT(domain_height, 256) << "Only heights below 256 are supported.";
+  const float frame_aspect = tracking_data.frame_aspect();
+
+  // Limit vector value from above (to 20% frame diameter) and below (small
+  // eps).
+  const float max_vector_threshold = hypot(domain_width, domain_height) * 0.2f;
+  // Warn if too much truncation.
+  if (max_vector_value > max_vector_threshold * 1.5f) {
+    LOG(WARNING) << "A lot of truncation will occur during encoding. "
+                 << "Vector magnitudes are larger than 20% of the "
+                 << "frame diameter.";
+  }
+
+  max_vector_value =
+      std::min<float>(max_vector_threshold, std::max(1e-4f, max_vector_value));
+
+  // Compute scales for 16bit and 8bit float -> int conversion.
+  // Use highest bit for sign.
+  const int kByteMax16 = (1 << 15) - 1;
+  const int kByteMax8 = (1 << 7) - 1;
+
+  // Scale such that highest vector value is mapped to kByteMax
+  int scale_16 = std::ceil(kByteMax16 / max_vector_value);
+  int scale_8 = std::ceil(kByteMax8 / max_vector_value);
+
+  const int32 scale =
+      options_.high_fidelity_16bit_encode() ? scale_16 : scale_8;
+  const float inv_scale = 1.0f / scale;
+  const int kByteMax =
+      options_.high_fidelity_16bit_encode() ? kByteMax16 : kByteMax8;
+
+  // Compressed flow to be encoded in binary format.
+  std::vector<int16> flow_compressed_16;
+  std::vector<int8> flow_compressed_8;
+
+  flow_compressed_16.reserve(num_vectors);
+  flow_compressed_8.reserve(num_vectors);
+
+  std::vector<uint8> row_idx;
+  row_idx.reserve(num_vectors);
+
+  float average_error = 0;
+  std::vector<int> col_starts(motion_data.col_starts().begin(),
+                              motion_data.col_starts().end());
+
+  // Separate both implementations for easier readability.
+  // For details please refer to description in proto.
+  // Low profile:
+  //   * Encode vectors by scaling to integer format.
+  //   * Keep sparse matrix format as is
+  // High profile:
+  //   * Encode deltas between vectors scaling them to integers
+  //   * Re-use encoded vectors if delta is small, use ADVANCE flag in row
+  //   index.
+  //   * Delta encode row indices to reduce magnitude.
+  //   * If two row deltas are small (< 8), encode in one byte
+  if (!high_profile) {
+    // Traverse columns.
+    for (int c = 0; c < col_starts.size() - 1; ++c) {
+      const int r_start = col_starts[c];
+      const int r_end = col_starts[c + 1];
+      for (int r = r_start; r < r_end; ++r) {
+        const float flow_x_32f = motion_data.vector_data(2 * r);
+        const float flow_y_32f = motion_data.vector_data(2 * r + 1);
+
+        const int flow_x =
+            std::max(-kByteMax, std::min<int>(kByteMax, flow_x_32f * scale));
+        const int flow_y =
+            std::max(-kByteMax, std::min<int>(kByteMax, flow_y_32f * scale));
+        average_error += 0.5f * (fabs(flow_x * inv_scale - flow_x_32f) +
+                                 fabs(flow_y * inv_scale - flow_y_32f));
+
+        if (options_.high_fidelity_16bit_encode()) {
+          flow_compressed_16.push_back(flow_x);
+          flow_compressed_16.push_back(flow_y);
+        } else {
+          flow_compressed_8.push_back(flow_x);
+          flow_compressed_8.push_back(flow_y);
+        }
+
+        DCHECK_LT(motion_data.row_indices(r), 256);
+        row_idx.push_back(motion_data.row_indices(r));
+      }
+    }
+  } else {
+    // Compress flow.
+    int prev_flow_x = 0;
+    int prev_flow_y = 0;
+    const float reuse_threshold = options_.high_profile_reuse_threshold();
+    int compressible = 0;
+
+    std::vector<int> compressions_per_column(domain_width, 0);
+    const int kAdvanceFlag = FlowPackagerOptions::ADVANCE_FLAG;
+    const int kDoubleIndexEncode = FlowPackagerOptions::DOUBLE_INDEX_ENCODE;
+    const int kIndexMask = FlowPackagerOptions::INDEX_MASK;
+
+    // Traverse columns.
+    for (int c = 0; c < motion_data.col_starts().size() - 1; ++c) {
+      const int r_start = col_starts[c];
+      const int r_end = col_starts[c + 1];
+      for (int r = r_start; r < r_end; ++r) {
+        int flow_x = 0;
+        int flow_y = 0;
+        bool advance = true;
+        const float flow_x_32f = motion_data.vector_data(2 * r);
+        const float flow_y_32f = motion_data.vector_data(2 * r + 1);
+
+        // Delta coding of vectors.
+        const float diff_x = flow_x_32f - prev_flow_x * inv_scale;
+        const float diff_y = flow_y_32f - prev_flow_y * inv_scale;
+
+        // Determine if previous flow can be re-used.
+        if (fabs(diff_x) < reuse_threshold && fabs(diff_y) < reuse_threshold) {
+          advance = false;
+        } else {
+          flow_x = std::max(-kByteMax, std::min<int>(kByteMax, diff_x * scale));
+
+          flow_y = std::max(-kByteMax, std::min<int>(kByteMax, diff_y * scale));
+
+          prev_flow_x += flow_x;
+          prev_flow_y += flow_y;
+        }
+
+        average_error += 0.5f * (fabs(prev_flow_x * inv_scale - flow_x_32f) +
+                                 fabs(prev_flow_y * inv_scale - flow_y_32f));
+
+        // Combine into one 32 or 16 bit value (clear sign bits for the
+        // right part before combining).
+        if (advance) {
+          if (options_.high_fidelity_16bit_encode()) {
+            flow_compressed_16.push_back(flow_x);
+            flow_compressed_16.push_back(flow_y);
+          } else {
+            flow_compressed_8.push_back(flow_x);
+            flow_compressed_8.push_back(flow_y);
+          }
+        }
+
+        // Delta code row indices in high profile mode and use two top bits
+        // for status:
+        // 10: single row encode, use next vector data.
+        // (ADVANCE_FLAG)
+        //
+        // 11: double row encode: (3 bit + 3 bit = maximum of 7 + 7 row delta),
+        // use next vector data for each.
+        // (ADVANCE_FLAG | DOUBLE_INDEX_ENCODE)
+        //
+        // 00: single row encode + no advance (re-use previous vector data).
+        // (no flags set)
+        //
+        // 01: double row encode + no advance (re-use previous vector data for
+        // each).
+        // (DOUBLE_INDEX_ENCODE)
+
+        // Delta compress.
+        int delta_row = motion_data.row_indices(r) -
+                        (r == r_start ? 0 : motion_data.row_indices(r - 1));
+        CHECK_GE(delta_row, 0);
+
+        bool combined = false;
+        if (r > r_start) {
+          int prev_row_idx = row_idx.back();
+          if (!(prev_row_idx & kDoubleIndexEncode) &&      // Single encode.
+              (prev_row_idx & kAdvanceFlag) == advance) {  // Same advance flag.
+            // Both compressible (each index fits in 3 bit).
+            if (delta_row < 8 && (prev_row_idx & kIndexMask) < 8) {
+              // Encode two deltas into 6 bit.
+              prev_row_idx = ((prev_row_idx & 0x07) << 3) | delta_row |
+                             kDoubleIndexEncode | (advance ? kAdvanceFlag : 0);
+
+              row_idx.back() = prev_row_idx;
+              // Record as one compression for this column.
+              ++compressions_per_column[c];
+              ++compressible;
+              combined = true;
+            }
+          }
+        }
+
+        if (!combined) {
+          while (delta_row > kIndexMask) {
+            // Special case of large displacement. Duplicate vector until sum of
+            // deltas reaches target delta).
+            row_idx.push_back(kIndexMask | (advance ? kAdvanceFlag : 0));
+            delta_row -= kIndexMask;
+            advance = false;  // Store same vector again, re-use previously
+                              // encoded vector data.
+
+            // Record as one addition for the column.
+            --compressions_per_column[c];
+            ++num_vectors;
+          }
+
+          row_idx.push_back(delta_row | (advance ? kAdvanceFlag : 0));
+        }
+      }
+    }
+
+    // Count number of advance flags encoded.
+    int encoded = 0;
+    for (int idx : row_idx) {
+      if (idx & kAdvanceFlag) {
+        encoded += (idx & kDoubleIndexEncode) ? 2 : 1;
+      }
+    }
+
+    if (options_.high_fidelity_16bit_encode()) {
+      CHECK_EQ(2 * encoded, flow_compressed_16.size());
+    } else {
+      CHECK_EQ(2 * encoded, flow_compressed_8.size());
+    }
+
+    // Adjust column start by compressions.
+    int curr_adjust = 0;
+    for (int k = 0; k < domain_width; ++k) {
+      curr_adjust -= compressions_per_column[k];
+      col_starts[k + 1] += curr_adjust;
+      CHECK_LE(col_starts[k], col_starts[k + 1]);
+    }
+
+    CHECK_EQ(row_idx.size(), col_starts.back());
+    CHECK_EQ(num_vectors, row_idx.size() + compressible);
+  }
+
+  // Delta compress col_starts.
+  std::vector<uint8> col_start_delta(domain_width + 1, 0);
+  col_start_delta[0] = col_starts[0];
+  for (int k = 1; k < domain_width + 1; ++k) {
+    const int delta = col_starts[k] - col_starts[k - 1];
+    CHECK_LT(delta, 256) << "Only up to 255 items per column supported.";
+    col_start_delta[k] = delta;
+  }
+
+  VLOG(1) << "error: " << average_error / (num_vectors + 1)
+          << " additions: " << num_vectors - motion_data.num_elements();
+  const Homography& background_model = tracking_data.background_model();
+
+  const float scale_x = 1.0f / tracking_data.domain_width();
+  const float scale_y = 1.0f / tracking_data.domain_height();
+
+  Homography homog_scale = HomographyAdapter::Embed(
+      AffineAdapter::FromArgs(0, 0, scale_x, 0, 0, scale_y));
+
+  Homography inv_homog_scale = HomographyAdapter::Embed(
+      AffineAdapter::FromArgs(0, 0, 1.0f / scale_x, 0, 0, 1.0f / scale_y));
+
+  // Might be just the identity if not set.
+  const Homography background_model_scaled =
+      ModelCompose3(homog_scale, background_model, inv_homog_scale);
+
+  std::string background_model_string =
+      absl::StrCat(EncodeToString(background_model.h_00()),
+                   EncodeToString(background_model.h_01()),
+                   EncodeToString(background_model.h_02()),
+                   EncodeToString(background_model.h_10()),
+                   EncodeToString(background_model.h_11()),
+                   EncodeToString(background_model.h_12()),
+                   EncodeToString(background_model.h_20()),
+                   EncodeToString(background_model.h_21()));
+
+  std::string* data = binary_data->mutable_data();
+  data->clear();
+  int32 vector_size = options_.high_fidelity_16bit_encode()
+                          ? flow_compressed_16.size()
+                          : flow_compressed_8.size();
+  int32 row_idx_size = row_idx.size();
+
+  absl::StrAppend(
+      data,
+      absl::StrCat(EncodeToString(frame_flags), EncodeToString(domain_width),
+                   EncodeToString(domain_height), EncodeToString(frame_aspect),
+                   background_model_string, EncodeToString(scale),
+                   EncodeToString(num_vectors),
+                   EncodeVectorToString(col_start_delta),
+                   EncodeToString(row_idx_size), EncodeVectorToString(row_idx),
+                   EncodeToString(vector_size),
+                   (options_.high_fidelity_16bit_encode()
+                        ? EncodeVectorToString(flow_compressed_16)
+                        : EncodeVectorToString(flow_compressed_8))));
+  VLOG(1) << "Binary data size: " << data->size() << " for " << num_vectors
+          << " (" << vector_size << ")";
+}
+
+std::string PopSubstring(int len, absl::string_view* piece) {
+  std::string result = std::string(piece->substr(0, len));
+  piece->remove_prefix(len);
+  return result;
+}
+
+void FlowPackager::DecodeTrackingData(const BinaryTrackingData& container_data,
+                                      TrackingData* tracking_data) const {
+  CHECK(tracking_data != nullptr);
+
+  absl::string_view data(container_data.data());
+  int32 frame_flags = 0;
+  int32 domain_width = 0;
+  int32 domain_height = 0;
+  std::vector<float> background_model;
+  int32 scale = 0;
+  int32 num_vectors = 0;
+  float frame_aspect = 0.0f;
+
+  DecodeFromStringView(PopSubstring(4, &data), &frame_flags);
+  DecodeFromStringView(PopSubstring(4, &data), &domain_width);
+  DecodeFromStringView(PopSubstring(4, &data), &domain_height);
+  DecodeFromStringView(PopSubstring(4, &data), &frame_aspect);
+
+  CHECK_LE(domain_width, 256);
+  CHECK_LE(domain_height, 256);
+
+  DecodeVectorFromStringView(
+      PopSubstring(4 * HomographyAdapter::NumParameters(), &data),
+      &background_model);
+  DecodeFromStringView(PopSubstring(4, &data), &scale);
+  DecodeFromStringView(PopSubstring(4, &data), &num_vectors);
+
+  tracking_data->set_frame_flags(frame_flags);
+  tracking_data->set_domain_width(domain_width);
+  tracking_data->set_domain_height(domain_height);
+  tracking_data->set_frame_aspect(frame_aspect);
+  *tracking_data->mutable_background_model() =
+      HomographyAdapter::FromFloatPointer(&background_model[0], false);
+
+  TrackingData::MotionData* motion_data = tracking_data->mutable_motion_data();
+  motion_data->set_num_elements(num_vectors);
+
+  const bool high_profile = frame_flags & TrackingData::FLAG_PROFILE_HIGH;
+  const bool high_fidelity =
+      frame_flags & TrackingData::FLAG_HIGH_FIDELITY_VECTORS;
+  const float flow_denom = 1.0f / scale;
+
+  std::vector<uint8> col_starts_delta;
+  DecodeVectorFromStringView(PopSubstring(domain_width + 1, &data),
+                             &col_starts_delta);
+
+  // Delta decompress.
+  std::vector<int> col_starts;
+  col_starts.reserve(domain_width + 1);
+
+  int column = 0;
+  for (auto col : col_starts_delta) {
+    column += col;
+    col_starts.push_back(column);
+  }
+
+  std::vector<uint8> row_idx;
+  int32 row_idx_size;
+  DecodeFromStringView(PopSubstring(4, &data), &row_idx_size);
+
+  // Should not have more row indices than vectors. (One for each in baseline
+  // profile, less in high profile).
+  CHECK_LE(row_idx_size, num_vectors);
+  DecodeVectorFromStringView(PopSubstring(row_idx_size, &data), &row_idx);
+
+  // Records for each vector whether to advance pointer in the vector data array
+  // or re-use previously read data.
+  std::vector<bool> advance(num_vectors, true);
+
+  if (high_profile) {
+    // Unpack row indices, populate advance.
+    const int kAdvanceFlag = FlowPackagerOptions::ADVANCE_FLAG;
+    const int kDoubleIndexEncode = FlowPackagerOptions::DOUBLE_INDEX_ENCODE;
+    const int kIndexMask = FlowPackagerOptions::INDEX_MASK;
+
+    std::vector<int> column_expansions(domain_width, 0);
+    std::vector<uint8> row_idx_unpacked;
+    row_idx_unpacked.reserve(num_vectors);
+    advance.clear();
+
+    for (int c = 0; c < col_starts.size() - 1; ++c) {
+      const int r_start = col_starts[c];
+      const int r_end = col_starts[c + 1];
+      uint8 prev_row_idx = 0;
+      for (int r = r_start; r < r_end; ++r) {
+        // Use top bit as indicator to advance.
+        advance.push_back(row_idx[r] & kAdvanceFlag);
+
+        // Double encode?
+        if (row_idx[r] & kDoubleIndexEncode) {
+          // Indices are encoded as each 3 bit offset within kIndexMask.
+          prev_row_idx += (row_idx[r] >> 3) & 0x7;
+          row_idx_unpacked.push_back(prev_row_idx);
+          prev_row_idx += row_idx[r] & 0x7;
+          row_idx_unpacked.push_back(prev_row_idx);
+
+          // Duplicate advance setting.
+          advance.push_back(advance.back());
+          ++column_expansions[c];
+        } else {
+          // Single encode.
+          prev_row_idx += row_idx[r] & kIndexMask;  // Clear status.
+          row_idx_unpacked.push_back(prev_row_idx);
+        }
+      }
+    }
+    row_idx.swap(row_idx_unpacked);
+    CHECK_EQ(num_vectors, row_idx.size());
+
+    // Adjust column start by expansions.
+    int curr_adjust = 0;
+    for (int k = 0; k < domain_width; ++k) {
+      curr_adjust += column_expansions[k];
+      col_starts[k + 1] += curr_adjust;
+    }
+  }
+
+  CHECK_EQ(num_vectors, col_starts.back());
+
+  int vector_data_size;
+  DecodeFromStringView(PopSubstring(4, &data), &vector_data_size);
+
+  int prev_flow_x = 0;
+  int prev_flow_y = 0;
+  if (high_fidelity) {
+    std::vector<int16> vector_data;
+    DecodeVectorFromStringView(
+        PopSubstring(sizeof(vector_data[0]) * vector_data_size, &data),
+        &vector_data);
+    int counter = 0;
+    for (int k = 0; k < num_vectors; ++k) {
+      if (advance[k]) {  // Read new vector data.
+        int flow_x = vector_data[counter++];
+        int flow_y = vector_data[counter++];
+
+        if (high_profile) {  // Delta decode in high profile.
+          flow_x += prev_flow_x;
+          flow_y += prev_flow_y;
+          prev_flow_x = flow_x;
+          prev_flow_y = flow_y;
+        }
+
+        motion_data->add_vector_data(flow_x * flow_denom);
+        motion_data->add_vector_data(flow_y * flow_denom);
+      } else {  // Re-use previous vector data.
+        motion_data->add_vector_data(prev_flow_x * flow_denom);
+        motion_data->add_vector_data(prev_flow_y * flow_denom);
+      }
+    }
+    CHECK_EQ(vector_data_size, counter);
+  } else {
+    std::vector<int8> vector_data;
+    DecodeVectorFromStringView(
+        PopSubstring(sizeof(vector_data[0]) * vector_data_size, &data),
+        &vector_data);
+    int counter = 0;
+    for (int k = 0; k < num_vectors; ++k) {
+      if (advance[k]) {  // Read new vector data.
+        int flow_x = vector_data[counter++];
+        int flow_y = vector_data[counter++];
+
+        if (high_profile) {  // Delta decode in high profile.
+          flow_x += prev_flow_x;
+          flow_y += prev_flow_y;
+          prev_flow_x = flow_x;
+          prev_flow_y = flow_y;
+        }
+
+        motion_data->add_vector_data(flow_x * flow_denom);
+        motion_data->add_vector_data(flow_y * flow_denom);
+      } else {  // Re-use previous vector data.
+        motion_data->add_vector_data(prev_flow_x * flow_denom);
+        motion_data->add_vector_data(prev_flow_y * flow_denom);
+      }
+    }
+    CHECK_EQ(vector_data_size, counter);
+  }
+
+  for (auto idx : row_idx) {
+    motion_data->add_row_indices(idx);
+  }
+
+  for (auto column : col_starts) {
+    motion_data->add_col_starts(column);
+  }
+}
+
+void FlowPackager::BinaryTrackingDataToContainer(
+    const BinaryTrackingData& binary_data, TrackingContainer* container) const {
+  CHECK(container != nullptr);
+  container->Clear();
+  container->set_header("TRAK");
+  container->set_version(1);
+  container->set_size(binary_data.data().size());
+  *container->mutable_data() = binary_data.data();
+}
+
+void FlowPackager::BinaryTrackingDataFromContainer(
+    const TrackingContainer& container, BinaryTrackingData* binary_data) const {
+  CHECK_EQ("TRAK", container.header());
+  CHECK_EQ(1, container.version()) << "Unsupported version.";
+  *binary_data->mutable_data() = container.data();
+}
+
+void FlowPackager::DecodeMetaData(const TrackingContainer& container_data,
+                                  MetaData* meta_data) const {
+  CHECK(meta_data != nullptr);
+
+  CHECK_EQ("META", container_data.header());
+  CHECK_EQ(1, container_data.version()) << "Unsupported version.";
+
+  absl::string_view data(container_data.data());
+
+  int32 num_frames;
+  DecodeFromStringView(PopSubstring(4, &data), &num_frames);
+  meta_data->set_num_frames(num_frames);
+
+  for (int k = 0; k < num_frames; ++k) {
+    int32 msec;
+    int32 stream_offset;
+
+    DecodeFromStringView(PopSubstring(4, &data), &msec);
+    DecodeFromStringView(PopSubstring(4, &data), &stream_offset);
+
+    MetaData::TrackOffset* track_offset = meta_data->add_track_offsets();
+    track_offset->set_msec(msec);
+    track_offset->set_stream_offset(stream_offset);
+  }
+}
+
+void FlowPackager::FinalizeTrackingContainerFormat(
+    std::vector<uint32>* timestamps,
+    TrackingContainerFormat* container_format) {
+  CHECK(container_format != nullptr);
+
+  // Compute binary sizes of track_data.
+  const int num_frames = container_format->track_data_size();
+
+  std::vector<uint32> msecs(num_frames, 0);
+  if (timestamps) {
+    CHECK_EQ(num_frames, timestamps->size());
+    msecs = *timestamps;
+  }
+  std::vector<int> sizes(num_frames, 0);
+
+  for (int f = 0; f < num_frames; ++f) {
+    // Default size of container: 12 bytes + binary data size (see comment for
+    // TrackingContainer in flow_packager.proto).
+    sizes[f] = container_format->track_data(f).data().size() + 12;
+  }
+
+  // Store relative offsets w.r.t. end of MetaData.
+  MetaData meta_data;
+  InitializeMetaData(num_frames, msecs, sizes, &meta_data);
+
+  // Serialize metadata to binary.
+  TrackingContainer* meta = container_format->mutable_meta_data();
+  meta->Clear();
+  meta->set_header("META");
+
+  std::string* binary_metadata = meta->mutable_data();
+  absl::StrAppend(binary_metadata, EncodeToString(meta_data.num_frames()));
+  for (auto& track_offset : *meta_data.mutable_track_offsets()) {
+    absl::StrAppend(binary_metadata,
+                    absl::StrCat(EncodeToString(track_offset.msec()),
+                                 EncodeToString(track_offset.stream_offset())));
+  }
+
+  meta->set_size(binary_metadata->size());
+
+  // Add term header.
+  TrackingContainer* term = container_format->mutable_term_data();
+  term->set_header("TERM");
+  term->set_size(0);
+}
+
+void FlowPackager::FinalizeTrackingContainerProto(
+    std::vector<uint32>* timestamps, TrackingContainerProto* proto) {
+  CHECK(proto != nullptr);
+
+  // Compute binary sizes of track_data.
+  const int num_frames = proto->track_data_size();
+
+  std::vector<uint32> msecs(num_frames, 0);
+  if (timestamps) {
+    CHECK_EQ(num_frames, timestamps->size());
+    msecs = *timestamps;
+  }
+
+  std::vector<int> sizes(num_frames, 0);
+
+  TrackingContainerProto temp_proto;
+  BinaryTrackingData* temp_track_data = temp_proto.add_track_data();
+  for (int f = 0; f < num_frames; ++f) {
+    // Swap current track data in and out of temp_track_data to determine total
+    // encoding size with proto preamble.
+    proto->mutable_track_data(f)->Swap(temp_track_data);
+    sizes[f] = temp_proto.ByteSize();
+    proto->mutable_track_data(f)->Swap(temp_track_data);
+  }
+
+  proto->clear_meta_data();
+  InitializeMetaData(num_frames, msecs, sizes, proto->mutable_meta_data());
+}
+
+void FlowPackager::InitializeMetaData(int num_frames,
+                                      const std::vector<uint32>& msecs,
+                                      const std::vector<int>& data_sizes,
+                                      MetaData* meta_data) const {
+  meta_data->set_num_frames(num_frames);
+  CHECK_EQ(num_frames, msecs.size());
+  CHECK_EQ(num_frames, data_sizes.size());
+
+  int curr_offset = 0;
+  for (int f = 0; f < num_frames; ++f) {
+    MetaData::TrackOffset* track_offset = meta_data->add_track_offsets();
+    track_offset->set_msec(msecs[f]);
+    track_offset->set_stream_offset(curr_offset);
+    curr_offset += data_sizes[f];
+  }
+}
+
+void FlowPackager::AddContainerToString(const TrackingContainer& container,
+                                        std::string* binary_data) {
+  CHECK(binary_data != nullptr);
+  std::string header_string(container.header());
+  CHECK_EQ(4, header_string.size());
+
+  std::vector<char> header{header_string[0], header_string[1], header_string[2],
+                           header_string[3]};
+  absl::StrAppend(binary_data, EncodeVectorToString(header),
+                  EncodeToString(container.version()),
+                  EncodeToString(container.size()), container.data());
+}
+
+std::string FlowPackager::SplitContainerFromString(
+    absl::string_view* binary_data, TrackingContainer* container) {
+  CHECK(binary_data != nullptr);
+  CHECK(container != nullptr);
+  CHECK_GE(binary_data->size(), 12) << "Data does not contain "
+                                    << "valid container";
+
+  container->set_header(PopSubstring(4, binary_data));
+
+  int version;
+  DecodeFromStringView(PopSubstring(4, binary_data), &version);
+
+  int size;
+  DecodeFromStringView(PopSubstring(4, binary_data), &size);
+
+  container->set_version(version);
+  container->set_size(size);
+
+  if (size > 0) {
+    container->set_data(PopSubstring(size, binary_data));
+  }
+
+  return container->header();
+}
+
+void FlowPackager::TrackingContainerFormatToBinary(
+    const TrackingContainerFormat& container_format, std::string* binary) {
+  CHECK(binary != nullptr);
+  binary->clear();
+
+  AddContainerToString(container_format.meta_data(), binary);
+  for (const auto& track_data : container_format.track_data()) {
+    AddContainerToString(track_data, binary);
+  }
+
+  AddContainerToString(container_format.term_data(), binary);
+}
+
+void FlowPackager::TrackingContainerFormatFromBinary(
+    const std::string& binary, TrackingContainerFormat* container_format) {
+  CHECK(container_format != nullptr);
+  container_format->Clear();
+
+  absl::string_view data(binary);
+
+  CHECK_EQ("META", SplitContainerFromString(
+                       &data, container_format->mutable_meta_data()));
+  MetaData meta_data;
+  DecodeMetaData(container_format->meta_data(), &meta_data);
+
+  for (int f = 0; f < meta_data.num_frames(); ++f) {
+    TrackingContainer* container = container_format->add_track_data();
+    CHECK_EQ("TRAK", SplitContainerFromString(&data, container));
+  }
+
+  CHECK_EQ("TERM", SplitContainerFromString(
+                       &data, container_format->mutable_term_data()));
+}
+
+void FlowPackager::SortRegionFlowFeatureList(
+    float scale_x, float scale_y, RegionFlowFeatureList* feature_list) const {
+  CHECK(feature_list != nullptr);
+  // Sort features lexicographically.
+  std::sort(feature_list->mutable_feature()->begin(),
+            feature_list->mutable_feature()->end(),
+            IntegerColumnComparator(scale_x, scale_y, options_.domain_width(),
+                                    options_.domain_height()));
+}
+
+bool FlowPackager::CompatibleForEncodeWithoutDuplication(
+    const TrackingData& tracking_data) const {
+  const TrackingData::MotionData& motion_data = tracking_data.motion_data();
+  for (int c = 0; c < motion_data.col_starts_size() - 1; ++c) {
+    const int r_start = motion_data.col_starts(c);
+    const int r_end = motion_data.col_starts(c + 1);
+    for (int r = r_start; r < r_end; ++r) {
+      if (motion_data.row_indices(r) -
+              (r == r_start ? 0 : motion_data.row_indices(r - 1)) >=
+          64) {
+        return false;
+      }
+    }
+  }
+  return true;
+}
+
+}  // namespace mediapipe
diff --git a/mediapipe/util/tracking/flow_packager.h b/mediapipe/util/tracking/flow_packager.h
new file mode 100644
index 000000000..03b67facf
--- /dev/null
+++ b/mediapipe/util/tracking/flow_packager.h
@@ -0,0 +1,168 @@
+// Copyright 2019 The MediaPipe Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef MEDIAPIPE_UTIL_TRACKING_FLOW_PACKAGER_H_
+#define MEDIAPIPE_UTIL_TRACKING_FLOW_PACKAGER_H_
+
+#include <string>
+#include <vector>
+
+#include "absl/strings/string_view.h"
+#include "mediapipe/framework/port/integral_types.h"
+#include "mediapipe/util/tracking/flow_packager.pb.h"
+#include "mediapipe/util/tracking/motion_estimation.pb.h"
+#include "mediapipe/util/tracking/region_flow.pb.h"
+
+namespace mediapipe {
+
+// Usage (output):
+// FlowPackager flow_packager((FlowPackagerOptions()));
+//
+// // Input: Feature lists and optional camera motion
+// vector<RegionFlowFeatureList> input_features;      // Externally supplied.
+// vector<CameraMotion> input_motions;
+//
+// const int num_frames = input_features.size();
+//
+// // Can encode to either TrackingContainerFormat or use protos.
+// TrackingContainerFormat container;
+// TrackingContainerProto proto;
+//
+// for (int f = 0; f < num_frames; ++f) {
+//   // Obtain tracking data.
+//   TrackingData track_data;
+//   flow_packager.PackFlow(input_features[f],
+//                          input_motions[f],
+//                          &track_data);
+//
+//   // Encode tracking data.
+//   BinaryTrackingData binary_data;
+//   flow_packager.EncodeTrackingData(track_data,
+//                                    &binary_data);
+//
+//   // Add to either container format or proto.
+//   // Container:
+//   TrackingContainer* track_data_encoded = container.add_track_data();
+//   flow_packager.BinaryTrackingDataToContainer(binary_data,
+//                                               track_data_encoded);
+//   // Proto:
+//   proto.add_track_data()->CopyFrom(binary_data);
+// }
+//
+// // Write meta and term containers.
+// flow_packager.FinalizeTrackingContainerFormat(&container);
+// flow_packager.FinalizeTrackingProto(&proto);
+//
+// // Convert to binary std::string to stream out.
+// std::string output;
+// flow_packager.TrackingContainerFormatToBinary(container, &output);
+// // OR:
+// proto.SerializeToString(&output);
+
+// Usage (input):
+// std::string input;
+// FlowPackager flow_packager((FlowPackagerOptions()));
+// TrackingContainerFormat container;
+// flow_packager.TrackingContainerFormatFromBinary(input, &container);
+//
+// // Obtain Track data.
+// vector<TrackingData> tracking_data;    // To be used by tracking.
+// for (const auto& track_data_encoded : container.track_data()) {
+//   tracking_data.push_back(TrackingData());
+//   flow_packager.DecodeTrackingData(track_data_encoded,
+//                                    &tracking_data.back());
+// }
+//
+// // Use tracking_data with Tracker.
+
+class CameraMotion;
+class RegionFlowFeatureList;
+
+class FlowPackager {
+ public:
+  explicit FlowPackager(const FlowPackagerOptions& options);
+  FlowPackager(const FlowPackager&) = delete;
+  FlowPackager& operator=(const FlowPackager&) = delete;
+
+  void PackFlow(const RegionFlowFeatureList& feature_list,
+                const CameraMotion* camera_motion,  // optional.
+                TrackingData* tracking_data) const;
+
+  // Converts TrackingData to condensed binary representation.
+  void EncodeTrackingData(const TrackingData& tracking_data,
+                          BinaryTrackingData* binary_data) const;
+
+  void DecodeTrackingData(const BinaryTrackingData& data,
+                          TrackingData* tracking_data) const;
+
+  void BinaryTrackingDataToContainer(const BinaryTrackingData& binary_data,
+                                     TrackingContainer* container) const;
+
+  void BinaryTrackingDataFromContainer(const TrackingContainer& container,
+                                       BinaryTrackingData* binary_data) const;
+
+  void DecodeMetaData(const TrackingContainer& data, MetaData* meta_ata) const;
+
+  // Fills in meta (first container) and termination data (last container).
+  // Optionally, pass timestamps for each frame.
+  void FinalizeTrackingContainerFormat(
+      std::vector<uint32>* timestamps,  // optional, can be null.
+      TrackingContainerFormat* container_fromat);
+  void FinalizeTrackingContainerProto(
+      std::vector<uint32>* timestamps,  // optional, can be null.
+      TrackingContainerProto* proto);
+
+  // Fast encode to binary representation.
+  void TrackingContainerFormatToBinary(
+      const TrackingContainerFormat& container_format, std::string* binary);
+
+  // Fast decode from binary representation.
+  void TrackingContainerFormatFromBinary(
+      const std::string& binary, TrackingContainerFormat* container_format);
+
+  // Checks whether tracking data can be encoded in high profile mode without
+  // duplicating any features. This occurs if the horizonal distance between two
+  // features is less than 64.
+  bool CompatibleForEncodeWithoutDuplication(
+      const TrackingData& tracking_data) const;
+
+  // Helper function for test. Sorts according to scaled, integer based
+  // lexicographical ordering.
+  // Declared public for access by test.
+  void SortRegionFlowFeatureList(float scale_x, float scale_y,
+                                 RegionFlowFeatureList* feature_list) const;
+
+  // Removes binary encoded container from std::string and parses it to
+  // container. Returns header std::string of the parsed container. Useful for
+  // random seek.
+  std::string SplitContainerFromString(absl::string_view* binary_data,
+                                       TrackingContainer* container);
+
+ private:
+  // Sets meta data for a set
+  void InitializeMetaData(int num_frames, const std::vector<uint32>& msecs,
+                          const std::vector<int>& data_sizes,
+                          MetaData* meta_data) const;
+
+  // Serializes container to binary std::string and adds it to binary_data.
+  void AddContainerToString(const TrackingContainer& container,
+                            std::string* binary_data);
+
+ private:
+  FlowPackagerOptions options_;
+};
+
+}  // namespace mediapipe
+
+#endif  // MEDIAPIPE_UTIL_TRACKING_FLOW_PACKAGER_H_
diff --git a/mediapipe/util/tracking/flow_packager.proto b/mediapipe/util/tracking/flow_packager.proto
new file mode 100644
index 000000000..c4f4797a9
--- /dev/null
+++ b/mediapipe/util/tracking/flow_packager.proto
@@ -0,0 +1,319 @@
+// Copyright 2019 The MediaPipe Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+syntax = "proto2";
+
+package mediapipe;
+
+import "mediapipe/util/tracking/motion_models.proto";
+import "mediapipe/util/tracking/region_flow.proto";
+
+// Messages encapsulating compressed and uncompressed TrackingData.
+//
+// Uncompressed tracking data can be aggregated via an TrackingDataChunk
+// (e.g. to be cached to file per chunk). The whole chunk persists in memory
+// after reading.
+//
+// Compressed tracking data can be aggregated as binary encoded TrackingData
+// messages into two container formats (with support for random seeking):
+// 1) TrackingContainerProto:
+//    Encoding using proto buffer wire format, using default proto
+//    serialization and de-serialization to binary string.
+//    The container uses the MetaData message to store the stream offsets and
+//    milliseconds for each frame of TrackingData. TrackingData itself is custom
+//    encoded to binary using FlowPackager::EncodeTrackingData and the resulting
+//    binary blob wrapped in a BinaryTrackingData message.
+// 2) TrackingContainerFormat:
+//    Encoding without any dependencies to protobuffers, for clients without
+//    proto buffer support.
+//    Encoding is based on encoding binary blobs of data wrapped into repeated
+//    containers. The layout of a container is described by the message
+//    TrackingContainer and serialized to binary data as described below
+//    (without using proto encoding). Therefore, message TrackingContainer is
+//    mostly for documentation purposes than for direct use.
+//    The format is described by the proto message TrackingContainerFormat (used
+//    internally by FlowPackager) however serialization and de-serialization
+//    to binary string is performed using custom methods supplied by
+//    FlowPackager (TrackingContainerFormatToBinary and
+//    TrackingContainerFormatFromBinary).
+//    The format stores the MetaData first as above, although using custom
+//    encoding. TrackingData is encoded to binary as above using
+//    FlowPackager::EncodeTrackingData and the resulting binary blob is storred
+//    within a TrackingContainer.
+
+// Next flag: 9
+message TrackingData {
+  enum FrameFlags {
+    FLAG_PROFILE_BASELINE = 0;
+    FLAG_PROFILE_HIGH = 1;
+    FLAG_HIGH_FIDELITY_VECTORS = 2;
+    FLAG_BACKGROUND_UNSTABLE = 4;  // Background model could not be estimated.
+    FLAG_DUPLICATED = 8;           // Frame is duplicated, i.e. identical to
+                                   // previous one.
+    // Indicates the beginning of a new chunk. In this case the track_id's
+    // are not compatible w.r.t. previous one.
+    FLAG_CHUNK_BOUNDARY = 16;
+  }
+
+  optional int32 frame_flags = 1 [default = 0];
+
+  // Tracking data is resolution independent specified w.r.t.
+  // specified domain.
+  optional int32 domain_width = 2;
+  optional int32 domain_height = 3;
+
+  // Aspect ratio (w/h) of the original frame tracking data was computed from.
+  optional float frame_aspect = 6 [default = 1.0];
+
+  optional Homography background_model = 4;
+
+  // Stores num_elements vectors of motion data. (x,y) position encoded via
+  // row_indices and col_starts, as compressed sparse column matrix storage
+  // format:
+  // (https://en.wikipedia.org/wiki/Sparse_matrix#Compressed_sparse_column_.28CSC_or_CCS.29),
+  // Vector data is stored as (dx, dy) position. Optionally we store the fitting
+  // error and track id for each feature.
+  message MotionData {
+    optional int32 num_elements = 1;
+
+    // #num_elements pairs (flow_x, flow_y) densely packed.
+    repeated float vector_data = 2 [packed = true];
+
+    // Stores corresponding track index for each feature. Features belonging
+    // to the same track over time are assigned the same id.
+    // NOTE: Due to size, tracking ids are never stored as compressed binary
+    // tracking data.
+    repeated int32 track_id = 3 [packed = true];
+
+    // # num_elements row indices.
+    repeated int32 row_indices = 4 [packed = true];
+
+    // Start index in above array for each column (#domain_width + 1 entries).
+    repeated int32 col_starts = 5 [packed = true];
+
+    // Feature descriptors for num_elements feature points.
+    repeated BinaryFeatureDescriptor feature_descriptors = 6;
+  }
+
+  optional MotionData motion_data = 5;
+
+  // Total number of features in our analysis
+  optional uint32 global_feature_count = 7;
+
+  // Average of all motion vector magnitudes (without accounting for any motion
+  // model), within 10th to 90th percentile (to remove outliers).
+  optional float average_motion_magnitude = 8;
+}
+
+message TrackingDataChunk {
+  message Item {
+    optional TrackingData tracking_data = 1;
+    // Global frame index.
+    optional int32 frame_idx = 2;
+    // Corresponding timestamp.
+    optional int64 timestamp_usec = 3;
+    // Previous frame timestamp.
+    optional int64 prev_timestamp_usec = 4;
+  }
+
+  repeated Item item = 1;
+
+  // Set as marker for last chunk.
+  optional bool last_chunk = 2 [default = false];
+
+  // Set as marker for first chunk.
+  optional bool first_chunk = 3 [default = false];
+}
+
+// TrackingData in compressed binary format. Obtainable via
+// FlowPackager::EncodeTrackingData. Details of binary encode are below.
+message BinaryTrackingData {  // TrackingContainer::header = "TRAK"
+  optional bytes data = 1;
+}
+
+// Detailed explanation of binary Tracking data encode (LITTLE ENDIAN encode!)
+// TrackingData is stored in binary as a struct of the above fields and the
+// compressed motion data in sparse column matrix storage format.
+// (https://en.wikipedia.org/wiki/Sparse_matrix#Compressed_sparse_column_.28CSC_or_CCS.29)
+// Specifically, TrackingData is encoded as:
+// {  frame_flags        : 32 bit int    (from member)
+//    domain_width       : 32 bit int    (from member)
+//    domain_height      : 32 bit int    (from member)
+//    frame_aspect       : 32 bit float  (from member)
+//
+//    background_model   : 6 * 32 bit float  (dx, dy, a, b, c, d of AffineModel)
+//    scale              : 32 bit float  (scale vectors are multiplied with)
+//    num_vectors        : 32 bit int    (from member num_elements)
+//
+//    col_start_delta    : (domain_width + 1) * 8 bit uint   (col starts delta
+//                                                           encoded)
+//    row_idx_size       : 32 bit int     (size of row_idx array <= num_vectors)
+//    row_idx            : row_idx_size * 8 bit uint
+//    vector_size        : 32 bit int     (size of vector_data)
+//    vector_data        : vector_size * [8 bit | 16 bit] int
+//                         (depending on FLAG_HIGH_FIDELITY_VECTORS)
+// }
+//
+// >> Baseline encode <<
+// Scale is determined such that maximum vector value (maximum across x and y)
+// is mapped to highest 8 bit or 16 bit SIGNED int
+// (i.e. 7 or 15 bit resolution respectively).
+// Vector values are multiplied by this scale (storring float in int with
+// truncation) and (dx, dy) is packed as [dy | dx] into a 16bit or 32 bit word.
+// Unpacking requires therefore dividing the vector values by scale.
+//
+// Column starts are delta compressed, that is, col_start_delta[i] stores
+// col_starts(i) - col_starts(i - 1) from MotionData.
+//
+// Row indices are directly stored at 8 bit uints, that is row_idx_size ==
+// num_vectors in this case.
+//
+//
+// >> High profile encode <<
+// Scale is determined as above but for maximum vector deltas (maximum across x
+// and y of magnitude in difference between two adjacent vectors). Vector value
+// deltas are multiplied by this scale before encoding.
+//
+// Encoding is more complex compared to baseline. Instead of vector value, delta
+// vector values (difference in dx = ddx, difference in dy = ddy)
+// are multiplied by scale and storred packed as [ddy | ddx] into to 16bit or
+// 32bit word. Compression algorithm accounts for error accumulation, so
+// unpacking should first add deltas in integer domain (for x and y separately)
+// and then divide by scale to yield (an approximation) of the
+// original vector value.
+// Most importantly, not every vector value is storred, but only if the delta is
+// above the FlowPackagerOptions::high_profile_reuse_threshold, in which case we
+// advance to the next vector data. Otherwise the previous vector is used.
+//
+// The information whether to advance is stored for each vector in the
+// highest bit of of the row index (FlowPackagerOptions::ADVANCE_FLAG). Row
+// indicies are not storred as in the baseline profile directly, but as deltas
+// (reset at the beginning of every column). As deltas are small it is often
+// possible to store two deltas (if both are < 8) in a single byte. This is
+// indicated by the second highest flag in the row index
+// (FlowPackagerOptions::DOUBLE_INDEX_ENCODE). If set, row index stores
+// [row_delta_1 | row_delta_2] in the lower 6 bit. Note, that the advance flag
+// applies uniformly to both deltas in this case.
+// Sidenote (edge case): Due to the use of the top 2 bits as flags,
+// at times we cannot store the full row delta in the lower 6 bits.
+// In this case the vector is duplicated (using the ADVANCE_FLAG)
+// until the delta sum of duplicated vectors reaches the original delta.
+// Consequently, the compressed vector field in high profile may contain a few
+// vectors more than the original.
+//
+// Column starts are delta compressed as in baseline, but account for double
+// index encodes. Therefore each column delta is reduced by the number of double
+// index encodes occuring for this column. This has to be replicated on the
+// decoding side, each delta needs to be increased by the number of double index
+// encodes encountered during encoding.
+
+// Stores offsets for random seek and time offsets for each frame of
+// TrackingData. Stream offsets are specified relative w.r.t. end of metadata
+// blob.
+// Offsets specify start of the corresponding binary encoded TrackingContainer
+// (for TrackingContainerFormat) or BinaryTrackingData proto (for
+// TrackingContainerProto).
+message MetaData {  // TrackingContainer::header = "META"
+  optional fixed32 num_frames = 2;
+
+  message TrackOffset {
+    optional fixed32 msec = 1;           // Time offset of the metadata in msec.
+    optional fixed32 stream_offset = 2;  // Offset of TrackingContainer or
+                                         // respectively BinaryTrackingData
+                                         // in stream.
+                                         // Specifed w.r.t. end of the Metadata.
+  }
+
+  repeated TrackOffset track_offsets = 3;
+}
+
+// TrackingContainer is self-describing container format to store arbitrary
+// chunks of binary data. Each container is typed via its 4 character header,
+// versioned via an int, and followed by the size of the binary data and the
+// actual data. Designed for clients without availability of protobuffer
+// support.
+// Note: This message is mainly used for documentation purposes and uses custom
+// encoding as specified by FlowPackager::TrackingContainerFormatToBinary.
+// Default binary size of a TrackingContainer (DO NOT CHANGE!):
+// header:    4 byte +
+// version:   4 byte +
+// size:      4 byte +
+// data       #size
+// SUM:       12 + #size.
+message TrackingContainer {
+  optional string header = 1;                  // 4 character header.
+  optional fixed32 version = 2 [default = 1];  // Version information.
+  optional fixed32 size = 3;  // Size of binary data held by container
+  optional bytes data = 4;    // Binary data encoded.
+
+  // DO NOT alter layout of TrackingContainer.
+  // Use version to extend or alter encoded binary data.
+}
+
+// Container format for clients without proto support (written via
+// FlowPackager::TrackingContainerFormatToBinary and read via
+// FlowPackager::TrackingContainerFormatFromBinary).
+// Proto here is intermediate format for documentationa and internal use.
+// Stores multiple TrackingContainers of different types.
+// Meta data is storred first, to facilitate random seek (via stream offset
+// positions) to arbitrary binary TrackinData. Termination container signals end
+// of stream.
+message TrackingContainerFormat {
+  optional TrackingContainer meta_data = 1;   // Wraps binary meta data, via
+                                              // custom encode.
+  repeated TrackingContainer track_data = 2;  // Wraps BinaryTrackingData.
+
+  // Add new TrackingContainers above before end of stream indicator.
+  // Zero sized termination container with TrackingContainer::header = "TERM".
+  optional TrackingContainer term_data = 3;
+}
+
+// Simplified proto format of above TrackingContainerFormat. Instead of using
+// self-describing TrackingContainer's, we simply use the proto wire format for
+// encoding and decoding (proto format is typed and versioned via ids).
+message TrackingContainerProto {
+  optional MetaData meta_data = 1;
+  repeated BinaryTrackingData track_data = 2;
+}
+
+// Options controlling compression and encoding.
+message FlowPackagerOptions {
+  // Tracking data is resolution independent specified w.r.t.
+  // specified domain. Only values <= 256 are supported if binary tracking data
+  // is requested to be supported (see below).
+  optional int32 domain_width = 1 [default = 256];
+  optional int32 domain_height = 2 [default = 192];
+
+  // Needs to be set for calls to FlowPackager::EncodeTrackingData. If encoding
+  // is not required, can be set to false in which case a higher domain_width
+  // can be used.
+  optional bool binary_tracking_data_support = 6 [default = true];
+
+  optional bool use_high_profile = 3 [default = false];
+
+  // If set uses 16 bit encode for vector data, in BinaryTrackingData,
+  // otherwise only 8 bits are used.
+  optional bool high_fidelity_16bit_encode = 4 [default = true];
+
+  // In high profile encode, re-use previously encoded vector when absolute
+  // difference to current vector is below threshold.
+  optional float high_profile_reuse_threshold = 5 [default = 0.5];
+
+  // High profile encoding flags.
+  enum HighProfileEncoding {
+    ADVANCE_FLAG = 0x80;
+    DOUBLE_INDEX_ENCODE = 0x40;
+    INDEX_MASK = 0x3F;
+  }
+}
diff --git a/mediapipe/util/tracking/frame_selection.proto b/mediapipe/util/tracking/frame_selection.proto
new file mode 100644
index 000000000..35712c20d
--- /dev/null
+++ b/mediapipe/util/tracking/frame_selection.proto
@@ -0,0 +1,93 @@
+// Copyright 2019 The MediaPipe Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+syntax = "proto2";
+
+package mediapipe;
+
+import "mediapipe/util/tracking/camera_motion.proto";
+import "mediapipe/util/tracking/frame_selection_solution_evaluator.proto";
+import "mediapipe/util/tracking/region_flow.proto";
+
+// Stores selected timestamps and corresponding frame index.
+message FrameSelectionTimestamp {
+  optional int64 timestamp = 1;  // Timestamp of the selected frame.
+
+  // Frame index of the selected frame in the initial video stream.  If this
+  // timestamp was manufactured, this will be the index of the initial frame.
+  optional int32 frame_idx = 2;
+
+  // If this timestamp was manufactured, the timestamp of the original frame.
+  optional int64 processed_from_timestamp = 3 [default = -1];
+}
+
+// Stores the result of the frame selection, with composited features.
+// Next index: 6
+message FrameSelectionResult {
+  optional int64 timestamp = 1;  // Timestamp of the selected frame.
+
+  // Frame index of the selected frame in the initial video stream.  If this
+  // timestamp was manufactured, this will be the index of the initial frame.
+  optional int32 frame_idx = 2;
+
+  // CameraMotion from selected item to previous selected item.
+  optional CameraMotion camera_motion = 3;
+
+  // Features from selected item to previous selected item.
+  optional RegionFlowFeatureList features = 4;
+
+  // If this FrameSelectionResult was the result of processing a previous one,
+  // the timestamp of the original frame.
+  optional int64 processed_from_timestamp = 5 [default = -1];
+}
+
+// Next index: 7
+message FrameSelectionCriterion {
+  // Interval at which frames should be sampled; set to zero if sampling should
+  // not be enforced (i.e. selection is performed w.r.t. other criteria).
+  optional int32 sampling_rate = 1 [default = 0];
+
+  // Bandwidth used during dynamic programming. The larger the bandwidth the
+  // more accurate the result w.r.t. the specified sampling rate. Smaller
+  // bandwidth's bias the solution suboptimally to center around the mean
+  // frame numbers of the sampling rate.
+  // If in (0, 1), assumed to specify fraction of total number of input frames,
+  // otherwise must be an integer.
+  optional float bandwidth_frames = 2 [default = 50];
+
+  // Search radius for dynamic programming (how many frames you are allowed to
+  // search around the previous frame).
+  optional int32 search_radius_frames = 3 [default = 1];
+
+  // Allows one to specify custom solution selection criteria (i.e. different
+  // way to choose the best row of the computed cost matrix).
+  optional FrameSelectionSolutionEvaluatorType solution_evaluator = 4;
+
+  // Outputs a fixed number of frames and automatically sets the appropriate
+  // sampling rate. Set to 0 by default (i.e. not enabled).
+  optional int32 max_output_frames = 5 [default = 0];
+}
+
+// Options for computing frame selection.
+// TODO: Support multiple criteria if required. Currently uses only the
+// first one.
+message FrameSelectionOptions {
+  repeated FrameSelectionCriterion criterion = 1;
+
+  // FrameSelection buffers incoming CameraMotions for specified chunk size
+  // and creates cost matrices upon reaching the limit.
+  // TODO: Implement if necessary (currently nothing is cleared upon
+  // reaching the limit).
+  optional int32 chunk_size = 2 [default = 100];
+}
diff --git a/mediapipe/util/tracking/frame_selection_solution_evaluator.proto b/mediapipe/util/tracking/frame_selection_solution_evaluator.proto
new file mode 100644
index 000000000..b2b92c995
--- /dev/null
+++ b/mediapipe/util/tracking/frame_selection_solution_evaluator.proto
@@ -0,0 +1,27 @@
+// Copyright 2019 The MediaPipe Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+syntax = "proto2";
+
+package mediapipe;
+
+message FrameSelectionSolutionEvaluatorOptions {
+  extensions 20000 to max;
+}
+
+message FrameSelectionSolutionEvaluatorType {
+  // Class of type FrameSelectionSolution that computes the best row.
+  optional string class_name = 1 [default = "FrameSelectionSolutionEvaluator"];
+  optional FrameSelectionSolutionEvaluatorOptions options = 2;
+}
diff --git a/mediapipe/util/tracking/image_util.cc b/mediapipe/util/tracking/image_util.cc
new file mode 100644
index 000000000..cafdef079
--- /dev/null
+++ b/mediapipe/util/tracking/image_util.cc
@@ -0,0 +1,116 @@
+// Copyright 2019 The MediaPipe Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "mediapipe/util/tracking/image_util.h"
+
+#include <algorithm>
+#include <vector>
+
+#include "mediapipe/framework/port/logging.h"
+#include "mediapipe/util/tracking/motion_models.h"
+#include "mediapipe/util/tracking/region_flow.h"
+
+namespace mediapipe {
+
+// Returns median of the L1 color distance between img_1 and img_2
+float FrameDifferenceMedian(const cv::Mat& img_1, const cv::Mat& img_2) {
+  CHECK(img_1.size() == img_2.size());
+  CHECK_EQ(img_1.channels(), img_2.channels());
+
+  std::vector<float> color_diffs;
+  color_diffs.reserve(img_1.cols * img_1.rows);
+  const int channels = img_1.channels();
+  for (int j = 0; j < img_1.rows; ++j) {
+    const uint8* src_1 = img_1.ptr<uint8>(j);
+    const uint8* src_2 = img_2.ptr<uint8>(j);
+    const int end_i = channels * img_1.cols;
+    const float inverse = 1.0f / channels;
+    for (int i = 0; i < end_i;) {
+      float color_diff = 0.0f;
+      for (int k = 0; k < channels; ++k, ++i) {
+        color_diff +=
+            std::abs(static_cast<int>(src_1[i]) - static_cast<int>(src_2[i]));
+      }
+      color_diffs.push_back(color_diff * inverse);
+    }
+  }
+
+  auto median = color_diffs.begin() + color_diffs.size() / 2;
+  std::nth_element(color_diffs.begin(), median, color_diffs.end());
+  return *median;
+}
+
+void JetColoring(int steps, std::vector<Vector3_f>* color_map) {
+  CHECK(color_map != nullptr);
+  color_map->resize(steps);
+  for (int i = 0; i < steps; ++i) {
+    const float frac = 2.0f * (i * (1.0f / steps) - 0.5f);
+    if (frac < -0.8f) {
+      (*color_map)[i] = Vector3_f(0, 0, 0.6f + (frac + 1.0) * 2.0f) * 255.0f;
+    } else if (frac >= -0.8f && frac < -0.25f) {
+      (*color_map)[i] = Vector3_f(0, (frac + 0.8f) * 1.82f, 1.0f) * 255.0f;
+    } else if (frac >= -0.25f && frac < 0.25f) {
+      (*color_map)[i] =
+          Vector3_f((frac + 0.25f) * 2.0f, 1.0f, 1.0 + (frac + 0.25f) * -2.0f) *
+          255.0f;
+    } else if (frac >= 0.25f && frac < 0.8f) {
+      (*color_map)[i] =
+          Vector3_f(1.0f, 1.0f + (frac - 0.25f) * -1.81f, 0.0f) * 255.0f;
+    } else if (frac >= 0.8f) {
+      (*color_map)[i] =
+          Vector3_f(1.0f + (frac - 0.8f) * -2.0f, 0.0f, 0.0f) * 255.0f;
+    } else {
+      LOG(ERROR) << "Out of bound value. Should not occur.";
+    }
+  }
+}
+
+void RenderSaliency(const SalientPointFrame& salient_points,
+                    const cv::Scalar& line_color, int line_thickness,
+                    bool render_bounding_box, cv::Mat* image) {
+  // Visualize salient points.
+  for (const auto& point : salient_points.point()) {
+    if (point.weight() > 0) {
+      SalientPoint copy = point;
+      ScaleSalientPoint(image->cols, image->rows, &copy);
+      Vector2_f pt(copy.norm_point_x(), copy.norm_point_y());
+
+      cv::ellipse(*image, cv::Point(pt.x(), pt.y()),
+                  cv::Size(copy.norm_major(), copy.norm_minor()),
+                  copy.angle() / M_PI * 180.0,
+                  0,    // start angle
+                  360,  // end angle
+                  line_color, line_thickness);
+
+      if (render_bounding_box) {
+        std::vector<Vector2_f> ellipse_bounding_box;
+        BoundingBoxFromEllipse(pt, copy.norm_major(), copy.norm_minor(),
+                               copy.angle(), &ellipse_bounding_box);
+
+        std::vector<cv::Point> corners;
+        corners.reserve(4);
+        for (const Vector2_f& corner : ellipse_bounding_box) {
+          corners.emplace_back(cv::Point(corner.x(), corner.y()));
+        }
+
+        for (int k = 0; k < 4; ++k) {
+          cv::line(*image, corners[k], corners[(k + 1) % 4], line_color,
+                   line_thickness, CV_AA);
+        }
+      }
+    }
+  }
+}
+
+}  // namespace mediapipe
diff --git a/mediapipe/util/tracking/image_util.h b/mediapipe/util/tracking/image_util.h
new file mode 100644
index 000000000..ba58d343b
--- /dev/null
+++ b/mediapipe/util/tracking/image_util.h
@@ -0,0 +1,192 @@
+// Copyright 2019 The MediaPipe Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef MEDIAPIPE_UTIL_TRACKING_IMAGE_UTIL_H_
+#define MEDIAPIPE_UTIL_TRACKING_IMAGE_UTIL_H_
+
+#include <vector>
+
+#include "mediapipe/framework/port/opencv_core_inc.h"
+#include "mediapipe/framework/port/opencv_imgproc_inc.h"
+#include "mediapipe/framework/port/vector.h"
+#include "mediapipe/util/tracking/motion_models.pb.h"
+#include "mediapipe/util/tracking/region_flow.pb.h"
+
+namespace mediapipe {
+
+// Returns median of the L1 color distance between img_1 and img_2.
+float FrameDifferenceMedian(const cv::Mat& img_1, const cv::Mat& img_2);
+
+// Matlab's jet color map (returned assuming RGB channel order in [0, 1]
+// normalized intensity domain). For details: http://goo.gl/gmHKZ
+// Returned as map with num_entries entries.
+void JetColoring(int num_entries, std::vector<Vector3_f>* color_map);
+
+// Draws a saliency point frame to a single frame.
+// Optionally renders axis aligned bounding box for each SalientPointFrame.
+void RenderSaliency(const SalientPointFrame& salient_points,
+                    const cv::Scalar& line_color, int line_thickness,
+                    bool render_bounding_box, cv::Mat* image);
+
+// Templated CopyBorder methods for increased speed. In-place border copy
+// for specified Mat of type T with channels. Passed matrix is assumed to be of
+// full size, that is we copy the content at [border, cols - 2 * border] x
+// [border, rows - 2 * border] to the full size.
+template <typename T, int border, int channels>
+void CopyMatBorder(cv::Mat* mat);
+
+// Same as above for copying border only in X or Y
+template <typename T, int border, int channels>
+void CopyMatBorderX(cv::Mat* mat);
+template <typename T, int border, int channels>
+void CopyMatBorderY(cv::Mat* mat);
+
+template <typename T, int border, int channels>
+void CopyMatBorder(cv::Mat* mat) {
+  const int width = mat->cols - 2 * border;
+  const int height = mat->rows - 2 * border;
+
+  // Maximum values we clamp at to avoid going out of bound small images.
+  const int max_w = width - 1;
+  const int max_h = height - 1;
+
+  // Top rows.
+  for (int r = 0; r < border; ++r) {
+    const T* src_ptr =
+        mat->ptr<T>(border + std::min(r, max_h)) + border * channels;
+    T* dst_ptr = mat->ptr<T>(border - 1 - r);
+
+    // Top left elems.
+    for (int i = 0; i < border; ++i, dst_ptr += channels) {
+      for (int j = 0; j < channels; ++j) {
+        dst_ptr[j] = src_ptr[std::min(max_w, border - 1 - i) * channels + j];
+      }
+    }
+
+    // src and dst should point to same column from here.
+    DCHECK_EQ(0, (src_ptr - dst_ptr) * sizeof(T) % mat->step[0]);
+
+    // Top row copy.
+    memcpy(dst_ptr, src_ptr, width * channels * sizeof(dst_ptr[0]));
+    src_ptr += width * channels;  // Points one behind end.
+    dst_ptr += width * channels;
+
+    // Top right elems.
+    for (int i = 0; i < border; ++i, dst_ptr += channels) {
+      if (i <= max_w) {
+        src_ptr -= channels;
+      }
+      for (int j = 0; j < channels; ++j) {
+        dst_ptr[j] = src_ptr[j];
+      }
+    }
+  }
+
+  // Left and right border.
+  for (int r = 0; r < height; ++r) {
+    // Get pointers to left most and right most column within image.
+    T* left_ptr = mat->ptr<T>(r + border) + border * channels;
+    T* right_ptr = left_ptr + (width - 1) * channels;
+    for (int i = 0; i < border; ++i) {
+      for (int j = 0; j < channels; ++j) {
+        left_ptr[-(i + 1) * channels + j] =
+            left_ptr[std::min(max_w, i) * channels + j];
+        right_ptr[(i + 1) * channels + j] =
+            right_ptr[-std::min(max_w, i) * channels + j];
+      }
+    }
+  }
+
+  // Bottom rows.
+  for (int r = 0; r < border; ++r) {
+    const T* src_ptr = mat->ptr<T>(border + height - 1 - std::min(r, max_h)) +
+                       border * channels;
+    T* dst_ptr = mat->ptr<T>(border + height + r);
+
+    // First elems.
+    for (int i = 0; i < border; ++i, dst_ptr += channels) {
+      for (int j = 0; j < channels; ++j) {
+        dst_ptr[j] = src_ptr[(border - 1 - std::min(max_w, i)) * channels + j];
+      }
+    }
+
+    // src and dst should point to same column from here.
+    DCHECK_EQ(0, (dst_ptr - src_ptr) * sizeof(T) % mat->step[0]);
+    memcpy(dst_ptr, src_ptr, width * channels * sizeof(dst_ptr[0]));
+    src_ptr += width * channels;  // Points one behind the end.
+    dst_ptr += width * channels;
+
+    // Top right elems.
+    for (int i = 0; i < border; ++i, dst_ptr += channels) {
+      if (i <= max_w) {
+        src_ptr -= channels;
+      }
+      for (int j = 0; j < channels; ++j) {
+        dst_ptr[j] = src_ptr[j];
+      }
+    }
+  }
+}
+
+template <typename T, int border, int channels>
+void CopyMatBorderX(cv::Mat* mat) {
+  const int width = mat->cols - 2 * border;
+  const int height = mat->rows - 2 * border;
+
+  // Maximum values we clamp at to avoid going out of bound small images.
+  const int max_w = width - 1;
+
+  // Left and right border.
+  for (int r = 0; r < height; ++r) {
+    T* left_ptr = mat->ptr<T>(r + border) + border * channels;
+    T* right_ptr = left_ptr + (width - 1) * channels;
+    for (int i = 0; i < border; ++i) {
+      for (int j = 0; j < channels; ++j) {
+        left_ptr[-(i + 1) * channels + j] =
+            left_ptr[std::min(i, max_w) * channels + j];
+        right_ptr[(i + 1) * channels + j] =
+            right_ptr[-std::min(max_w, i) * channels + j];
+      }
+    }
+  }
+}
+
+template <typename T, int border, int channels>
+void CopyMatBorderY(cv::Mat* mat) {
+  const int width = mat->cols - 2 * border;
+  const int height = mat->rows - 2 * border;
+
+  // Maximum values we clamp at to avoid going out of bound small images.
+  const int max_h = height - 1;
+
+  // Top rows.
+  for (int r = 0; r < border; ++r) {
+    const T* src_ptr =
+        mat->ptr<T>(border + std::min(max_h, r)) + border * channels;
+    T* dst_ptr = mat->ptr<T>(border - 1 - r) + border * channels;
+    memcpy(dst_ptr, src_ptr, width * channels * sizeof(dst_ptr[0]));
+  }
+
+  // Bottom rows.
+  for (int r = 0; r < border; ++r) {
+    const T* src_ptr = mat->ptr<T>(border + height - 1 - std::min(max_h, r)) +
+                       border * channels;
+    T* dst_ptr = mat->ptr<T>(border + height + r) + border * channels;
+    memcpy(dst_ptr, src_ptr, width * channels * sizeof(dst_ptr[0]));
+  }
+}
+
+}  // namespace mediapipe
+
+#endif  // MEDIAPIPE_UTIL_TRACKING_IMAGE_UTIL_H_
diff --git a/mediapipe/util/tracking/image_util_test.cc b/mediapipe/util/tracking/image_util_test.cc
new file mode 100644
index 000000000..a4c4e0cc8
--- /dev/null
+++ b/mediapipe/util/tracking/image_util_test.cc
@@ -0,0 +1,93 @@
+// Copyright 2019 The MediaPipe Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "mediapipe/util/tracking/image_util.h"
+
+#include "mediapipe/framework/port/gtest.h"
+
+namespace mediapipe {
+
+template <int border>
+void TestCopyBorder(cv::Mat* full_size) {
+  full_size->setTo(0);
+
+  const int width = full_size->cols - 2 * border;
+  // Fill center with per pixel value [1, 2, 3].
+  for (int y = border; y < full_size->rows - border; ++y) {
+    float* row_ptr = full_size->ptr<float>(y);
+    for (int x = border; x < full_size->cols - border; ++x) {
+      int value = x - border;
+      row_ptr[3 * x] = value + 0.1f;
+      row_ptr[3 * x + 1] = value + 0.2f;
+      row_ptr[3 * x + 2] = value + 0.3f;
+    }
+  }
+
+  CopyMatBorder<float, border, 3>(full_size);
+
+  // Main part should not be modified.
+  for (int y = border; y < full_size->rows - border; ++y) {
+    float* row_ptr = full_size->ptr<float>(y);
+    for (int x = border; x < full_size->cols - border; ++x) {
+      int value = x - border;
+      EXPECT_EQ(row_ptr[3 * x], value + 0.1f);
+      EXPECT_EQ(row_ptr[3 * x + 1], value + 0.2f);
+      EXPECT_EQ(row_ptr[3 * x + 2], value + 0.3f);
+    }
+  }
+
+  for (int y = 0; y < full_size->rows; ++y) {
+    const float* row_ptr = full_size->ptr<float>(y);
+    for (int x = 0; x < full_size->cols; ++x) {
+      // Memory copy, expect exact floating point equality.
+      int value = x - border;
+      if (x < border) {
+        // Cap to valid area.
+        value = std::min(width - 1, (border - 1) - x);
+      } else if (x >= full_size->cols - border) {
+        // Last column minus distance from frame boundary, capped
+        // to valid area.
+        value = std::max(0, width - 1 - (x - full_size->cols + border));
+      }
+      EXPECT_EQ(row_ptr[3 * x], value + 0.1f);
+      EXPECT_EQ(row_ptr[3 * x + 1], value + 0.2f);
+      EXPECT_EQ(row_ptr[3 * x + 2], value + 0.3f);
+    }
+  }
+}
+
+TEST(PushPullFilteringTest, CopyBorder) {
+  cv::Mat full_size(100, 50, CV_32FC3);
+  TestCopyBorder<1>(&full_size);
+  TestCopyBorder<2>(&full_size);
+  TestCopyBorder<3>(&full_size);
+  TestCopyBorder<4>(&full_size);
+  TestCopyBorder<5>(&full_size);
+}
+
+TEST(PushPullFilteringTest, CopyBorderSmallFrame) {
+  cv::Mat full_size(3, 3, CV_32FC3);
+  TestCopyBorder<1>(&full_size);
+
+  full_size.create(5, 5, CV_32FC3);
+  TestCopyBorder<2>(&full_size);
+
+  full_size.create(7, 7, CV_32FC3);
+  TestCopyBorder<3>(&full_size);
+
+  full_size.create(9, 9, CV_32FC3);
+  TestCopyBorder<4>(&full_size);
+}
+
+}  // namespace mediapipe
diff --git a/mediapipe/util/tracking/measure_time.cc b/mediapipe/util/tracking/measure_time.cc
new file mode 100644
index 000000000..e628dff0f
--- /dev/null
+++ b/mediapipe/util/tracking/measure_time.cc
@@ -0,0 +1,22 @@
+// Copyright 2019 The MediaPipe Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "mediapipe/util/tracking/measure_time.h"
+
+#ifdef SET_FLAG_MEASURE_TIME
+// If set to true, outputs time measurements to LOG(INFO).
+bool flags_measure_time = true;
+#else
+bool flags_measure_time = false;
+#endif
diff --git a/mediapipe/util/tracking/measure_time.h b/mediapipe/util/tracking/measure_time.h
new file mode 100644
index 000000000..0351f4652
--- /dev/null
+++ b/mediapipe/util/tracking/measure_time.h
@@ -0,0 +1,169 @@
+// Copyright 2019 The MediaPipe Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Helper class and macro to take time measurements within current scope.
+// Takes time measurement within current scope. Outputs to LOG(INFO) if
+// flag --measure_time is set or if build flag SET_FLAG_MEASURE_TIME is
+// defined (add --copt=-DSET_FLAG_MEASURE_TIME to your build command).
+// Additionally you can limit time measurements to specific files,
+// via the flag
+// --measure_time_filter="<comma separated list of substrings of file names>"
+// Example:
+// {     // Scope to be measured
+//   MEASURE_TIME << "Some additional logging and answers : " << 42;
+//   ...  // instructions.
+// }
+
+#ifndef MEDIAPIPE_UTIL_TRACKING_MEASURE_TIME_H_
+#define MEDIAPIPE_UTIL_TRACKING_MEASURE_TIME_H_
+
+#include <memory>
+#include <sstream>
+
+#include "absl/strings/str_split.h"
+#include "absl/strings/string_view.h"
+#include "absl/synchronization/mutex.h"
+#include "absl/time/clock.h"
+#include "mediapipe/framework/port/integral_types.h"
+#include "mediapipe/framework/port/logging.h"
+
+extern bool flags_measure_time;
+
+namespace mediapipe {
+class MeasureTimeFilter;
+}  // namespace mediapipe
+
+#define MEASURE_TIME \
+  MEASURE_TIME_PRE_IMPL(flags_measure_time, __LINE__, __FILE__)
+
+// Add level of indirection to trigger __LINE__ macro expansion.
+#define MEASURE_TIME_PRE_IMPL(show_output, line, file) \
+  MEASURE_TIME_IMPL(show_output, line, file)
+
+#define MEASURE_TIME_IMPL(show_output, line, file)                      \
+  std::unique_ptr<mediapipe::ScopedWallTimer> scoped_wall_timer_##line; \
+  const bool activated##line = show_output;                             \
+  if (activated##line) {                                                \
+    static mediapipe::ScopedWallTimer::Accumulator*                     \
+        scoped_wall_timer_accum_##line =                                \
+            new mediapipe::ScopedWallTimer::Accumulator;                \
+    scoped_wall_timer_##line.reset(new mediapipe::ScopedWallTimer(      \
+        file, line, show_output, scoped_wall_timer_accum_##line));      \
+  }                                                                     \
+  if (activated##line) /* NOLINT */                                     \
+  scoped_wall_timer_##line->stream()
+
+namespace mediapipe {
+
+class ScopedWallTimer {
+ public:
+  // Helper class for accumulating time across multiple calls to a scoped wall
+  // timer. Thread-safe (except on ANDROID, for which Mutex is mocked out).
+  class Accumulator {
+   public:
+    Accumulator() : accum_time_(0.0f), count_(0) {}
+    Accumulator(const Accumulator&) = delete;
+    Accumulator& operator=(const Accumulator&) = delete;
+
+    // Accumulates passed_time into accumulators. Returns total time and number
+    // of calls made.
+    void Accumulate(double passed_time, double* accum_time, int* count) {
+      absl::MutexLock lock(&mutex_);
+      accum_time_ += passed_time;
+      ++count_;
+      *accum_time = accum_time_;
+      *count = count_;
+    }
+
+   private:
+    double accum_time_;
+    int count_;
+    absl::Mutex mutex_;
+  };
+
+  // Creates a new ScopedWallTimer for current file and line. LogMessage is only
+  // initialized if show_output is set to true.
+  ScopedWallTimer(const char* file, int line, bool show_output,
+                  Accumulator* accumulator)
+      : file_(file),
+        line_(line),
+        show_output_(show_output),
+        accumulator_(accumulator) {
+    if (show_output_) {
+      CHECK(accumulator_);
+      start_time_ = GetWallTime();
+    }
+  }
+  ScopedWallTimer(const ScopedWallTimer&) = delete;
+  ScopedWallTimer& operator=(const ScopedWallTimer&) = delete;
+
+  // Destructor measures time and outputs to stream.
+  ~ScopedWallTimer() {
+    if (show_output_) {
+      double passed_time = GetWallTime() - start_time_;
+      double accum_time = 0.0;
+      int count = 0;
+      accumulator_->Accumulate(passed_time, &accum_time, &count);
+      LOG(INFO) << stream_.str() << " TIMES: [Curr: " << passed_time * 1e-6
+                << " ms, "
+                << "Avg: " << accum_time * 1e-6 / std::max(1, count) << " ms, "
+                << count << " calls]";
+    }
+  }
+
+  std::ostream& stream() { return stream_; }
+
+ private:
+  const char* file_;
+  int line_;
+  bool show_output_;
+  // We need to buffer information passed via stream operator <<
+  // While LogMessage is adequate for this, no good equivalent exists on
+  // Android, so we employ a portable ostringstream for buffering.
+  std::ostringstream stream_;
+  int64 start_time_;
+  Accumulator* accumulator_;
+
+  int64 GetWallTime() { return absl::GetCurrentTimeNanos(); }
+};
+
+class MeasureTimeFilter {
+ public:
+  static const MeasureTimeFilter* get() {
+    static MeasureTimeFilter instance;
+    return &instance;
+  }
+  MeasureTimeFilter(const MeasureTimeFilter&) = delete;
+  MeasureTimeFilter& operator=(const MeasureTimeFilter&) = delete;
+
+  bool Matches(const std::string& item) const {
+    for (const std::string& match_item : match_items_) {
+      if (item.find(match_item) != std::string::npos) {
+        return true;
+      }
+    }
+    return false;
+  }
+
+ private:
+  explicit MeasureTimeFilter() {}
+  explicit MeasureTimeFilter(const std::string& filter) {
+    match_items_ = absl::StrSplit(filter, absl::ByChar(','));
+  }
+  std::vector<std::string> match_items_;
+};
+
+}  // namespace mediapipe
+
+#endif  // MEDIAPIPE_UTIL_TRACKING_MEASURE_TIME_H_
diff --git a/mediapipe/util/tracking/motion_analysis.cc b/mediapipe/util/tracking/motion_analysis.cc
new file mode 100644
index 000000000..ff4d3d531
--- /dev/null
+++ b/mediapipe/util/tracking/motion_analysis.cc
@@ -0,0 +1,850 @@
+// Copyright 2019 The MediaPipe Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "mediapipe/util/tracking/motion_analysis.h"
+
+#include <algorithm>
+#include <cmath>
+#include <cstring>
+#include <deque>
+#include <memory>
+
+#include "absl/strings/str_format.h"
+#include "mediapipe/framework/port/integral_types.h"
+#include "mediapipe/framework/port/logging.h"
+#include "mediapipe/framework/port/vector.h"
+#include "mediapipe/util/tracking/camera_motion.h"
+#include "mediapipe/util/tracking/camera_motion.pb.h"
+#include "mediapipe/util/tracking/image_util.h"
+#include "mediapipe/util/tracking/measure_time.h"
+#include "mediapipe/util/tracking/motion_saliency.pb.h"
+#include "mediapipe/util/tracking/region_flow.h"
+#include "mediapipe/util/tracking/region_flow.pb.h"
+#include "mediapipe/util/tracking/region_flow_computation.h"
+#include "mediapipe/util/tracking/region_flow_computation.pb.h"
+#include "mediapipe/util/tracking/region_flow_visualization.h"
+
+namespace mediapipe {
+
+MotionAnalysis::MotionAnalysis(const MotionAnalysisOptions& options,
+                               int frame_width, int frame_height)
+    : options_(options),
+      frame_width_(frame_width),
+      frame_height_(frame_height) {
+  // Init options by policy.
+  InitPolicyOptions();
+  // Merge back in any overriden options.
+  options_.MergeFrom(options);
+
+  region_flow_computation_.reset(new RegionFlowComputation(
+      options_.flow_options(), frame_width_, frame_height_));
+  motion_estimation_.reset(new MotionEstimation(options_.motion_options(),
+                                                frame_width_, frame_height_));
+
+  if (options_.compute_motion_saliency()) {
+    motion_saliency_.reset(new MotionSaliency(options_.saliency_options(),
+                                              frame_width_, frame_height_));
+
+    // Compute overlap needed between clips if filtering or smoothing of
+    // saliency is requested.
+    if (options_.select_saliency_inliers()) {
+      // 1.65 sigmas in each direction = 90% of variance is captured.
+      overlap_size_ = std::max<int>(
+          overlap_size_, options_.saliency_options().selection_frame_radius());
+    }
+
+    if (options_.filter_saliency()) {
+      overlap_size_ = std::max<int>(
+          overlap_size_,
+          options_.saliency_options().filtering_sigma_time() * 1.65f);
+    }
+  }
+
+  long_feature_stream_.reset(new LongFeatureStream);
+
+  frame_num_ = 0;
+
+  // Determine if feature descriptors need to be computed.
+  // Required for irls smoothing, overlay detection and mixture homographies.
+  const bool compute_mixtures =
+      options_.motion_options().mix_homography_estimation() !=
+      MotionEstimationOptions::ESTIMATION_HOMOG_MIX_NONE;
+
+  const bool use_spatial_bias =
+      options_.motion_options().estimation_policy() ==
+          MotionEstimationOptions::TEMPORAL_LONG_FEATURE_BIAS &&
+      options_.motion_options().long_feature_bias_options().use_spatial_bias();
+
+  compute_feature_descriptors_ =
+      options_.post_irls_smoothing() ||
+      options_.motion_options().overlay_detection() || compute_mixtures ||
+      use_spatial_bias;
+
+  if (compute_feature_descriptors_) {
+    CHECK_EQ(RegionFlowComputationOptions::FORMAT_RGB,
+             options_.flow_options().image_format())
+        << "Feature descriptors only support RGB currently.";
+    prev_frame_.reset(new cv::Mat(frame_height_, frame_width_, CV_8UC3));
+  }
+
+  // Setup streaming buffer. By default we buffer features and motion.
+  // If saliency is computed, also buffer saliency and filtered/smoothed
+  // output_saliency.
+  std::vector<TaggedType> data_config{
+      TaggedPointerType<RegionFlowFeatureList>("features"),
+      TaggedPointerType<CameraMotion>("motion")};
+  std::vector<TaggedType> data_config_saliency = data_config;
+  data_config_saliency.push_back(
+      TaggedPointerType<SalientPointFrame>("saliency"));
+  data_config_saliency.push_back(
+      TaggedPointerType<SalientPointFrame>("output_saliency"));
+
+  // Store twice the overlap.
+  buffer_.reset(new StreamingBuffer(
+      options_.compute_motion_saliency() ? data_config_saliency : data_config,
+      2 * overlap_size_));
+}
+
+void MotionAnalysis::InitPolicyOptions() {
+  auto* flow_options = options_.mutable_flow_options();
+  auto* tracking_options = flow_options->mutable_tracking_options();
+  auto* motion_options = options_.mutable_motion_options();
+  auto* feature_bias_options =
+      motion_options->mutable_long_feature_bias_options();
+  auto* translation_bounds =
+      motion_options->mutable_stable_translation_bounds();
+  auto* similarity_bounds = motion_options->mutable_stable_similarity_bounds();
+  auto* homography_bounds = motion_options->mutable_stable_homography_bounds();
+
+  switch (options_.analysis_policy()) {
+    case MotionAnalysisOptions::ANALYSIS_POLICY_LEGACY:
+      break;
+
+    case MotionAnalysisOptions::ANALYSIS_POLICY_VIDEO:
+      // Long track settings. Temporally consistent.
+      options_.set_estimation_clip_size(64);
+
+      tracking_options->set_internal_tracking_direction(
+          TrackingOptions::FORWARD);
+      tracking_options->set_tracking_policy(
+          TrackingOptions::POLICY_LONG_TRACKS);
+
+      feature_bias_options->set_use_spatial_bias(false);
+      feature_bias_options->set_seed_priors_from_bias(true);
+
+      similarity_bounds->set_min_inlier_fraction(0.15f);
+      homography_bounds->set_min_inlier_coverage(0.25f);
+      flow_options->set_verify_long_features(false);
+
+      // Better features.
+      tracking_options->set_adaptive_features_levels(3);
+      tracking_options->set_use_cv_tracking_algorithm(true);
+
+      // Speed.
+      flow_options->set_downsample_mode(
+          RegionFlowComputationOptions::DOWNSAMPLE_BY_SCHEDULE);
+
+      motion_options->set_estimation_policy(
+          MotionEstimationOptions::TEMPORAL_LONG_FEATURE_BIAS);
+      motion_options->set_feature_density_normalization(true);
+      motion_options->set_overlay_detection(true);
+      motion_options->set_domain_limited_irls_scaling(true);
+      motion_options->set_irls_weights_preinitialized(true);
+      motion_options->mutable_irls_initialization()->set_activated(true);
+      motion_options->mutable_long_feature_initialization()->set_activated(
+          true);
+
+      break;
+
+    case MotionAnalysisOptions::ANALYSIS_POLICY_VIDEO_MOBILE:
+      // Long track settings. Temporally consistent.
+      options_.set_estimation_clip_size(32);
+      tracking_options->set_internal_tracking_direction(
+          TrackingOptions::FORWARD);
+      tracking_options->set_tracking_policy(
+          TrackingOptions::POLICY_LONG_TRACKS);
+
+      motion_options->set_estimation_policy(
+          MotionEstimationOptions::TEMPORAL_LONG_FEATURE_BIAS);
+      motion_options->set_feature_density_normalization(true);
+      motion_options->set_domain_limited_irls_scaling(true);
+      motion_options->mutable_irls_initialization()->set_activated(true);
+      motion_options->mutable_long_feature_initialization()->set_activated(
+          true);
+
+      feature_bias_options->set_use_spatial_bias(false);
+      feature_bias_options->set_seed_priors_from_bias(true);
+
+      similarity_bounds->set_inlier_threshold(2.0);
+      similarity_bounds->set_min_inlier_fraction(0.15f);
+
+      // Speed for mobile.
+      tracking_options->set_tracking_window_size(8);
+      tracking_options->set_tracking_iterations(8);
+      tracking_options->set_fractional_tracking_distance(0.1);
+      tracking_options->set_reuse_features_max_frame_distance(15);
+      tracking_options->set_max_features(500);
+      tracking_options->set_use_cv_tracking_algorithm(true);
+
+      flow_options->set_downsample_mode(
+          RegionFlowComputationOptions::DOWNSAMPLE_TO_MIN_SIZE);
+      flow_options->set_round_downsample_factor(true);
+      flow_options->set_downsampling_size(256);
+      flow_options->set_pre_blur_sigma(0);
+      flow_options->set_top_inlier_sets(1);
+      flow_options->set_ransac_rounds_per_region(10);
+      flow_options->mutable_visual_consistency_options()
+          ->set_compute_consistency(false);
+      flow_options->set_verify_long_features(false);
+
+      // Avoid resetting features if a large amount is flagged.
+      // In that case verify feature tracks to avoid pertubing the estimation.
+      flow_options->set_verify_long_feature_acceleration(true);
+      flow_options->set_verification_distance(5.0);
+      flow_options->set_verify_long_feature_trigger_ratio(0.1);
+
+      motion_options->set_use_exact_homography_estimation(false);
+      motion_options->set_use_highest_accuracy_for_normal_equations(false);
+
+      break;
+
+    case MotionAnalysisOptions::ANALYSIS_POLICY_CAMERA_MOBILE:
+      // Long track settings.
+      tracking_options->set_internal_tracking_direction(
+          TrackingOptions::FORWARD);
+      tracking_options->set_tracking_policy(
+          TrackingOptions::POLICY_LONG_TRACKS);
+
+      motion_options->set_estimation_policy(
+          MotionEstimationOptions::TEMPORAL_IRLS_MASK);
+      motion_options->set_feature_density_normalization(true);
+      motion_options->set_domain_limited_irls_scaling(true);
+      motion_options->mutable_irls_initialization()->set_activated(true);
+      motion_options->mutable_irls_initialization()->set_rounds(50);
+
+      feature_bias_options->set_use_spatial_bias(false);
+      feature_bias_options->set_seed_priors_from_bias(true);
+
+      similarity_bounds->set_inlier_threshold(2.0);
+      similarity_bounds->set_min_inlier_fraction(0.15f);
+
+      // Speed for mobile.
+      tracking_options->set_tracking_window_size(8);
+      tracking_options->set_tracking_iterations(8);
+      tracking_options->set_fractional_tracking_distance(0.1);
+      tracking_options->set_reuse_features_max_frame_distance(10);
+      tracking_options->set_reuse_features_min_survived_frac(0.6);
+      tracking_options->set_max_features(240);
+      tracking_options->set_adaptive_tracking_distance(true);
+      tracking_options->set_use_cv_tracking_algorithm(true);
+
+      // Assumes downsampled input.
+      flow_options->set_pre_blur_sigma(0);
+      flow_options->set_top_inlier_sets(1);
+      flow_options->set_ransac_rounds_per_region(10);
+      flow_options->mutable_visual_consistency_options()
+          ->set_compute_consistency(false);
+      flow_options->set_verify_long_features(false);
+
+      motion_options->set_use_exact_homography_estimation(false);
+      motion_options->set_use_highest_accuracy_for_normal_equations(false);
+
+      // Low latency.
+      options_.set_estimation_clip_size(1);
+      break;
+
+    case MotionAnalysisOptions::ANALYSIS_POLICY_HYPERLAPSE:
+      // Long track settings. Temporally consistent.
+      options_.set_estimation_clip_size(64);
+
+      tracking_options->set_internal_tracking_direction(
+          TrackingOptions::FORWARD);
+      tracking_options->set_tracking_policy(
+          TrackingOptions::POLICY_LONG_TRACKS);
+
+      feature_bias_options->set_use_spatial_bias(false);
+      feature_bias_options->set_seed_priors_from_bias(true);
+
+      // Relaxed stability bounds.
+      translation_bounds->set_max_motion_stdev(0.15);
+      translation_bounds->set_max_acceleration(60.0);
+      translation_bounds->set_frac_max_motion_magnitude(0.25);
+
+      similarity_bounds->set_min_inlier_fraction(0.02f);
+      similarity_bounds->set_min_inliers(5);
+      similarity_bounds->set_lower_scale(0.5);
+      similarity_bounds->set_upper_scale(2.0);
+      similarity_bounds->set_limit_rotation(0.5);
+
+      homography_bounds->set_lower_scale(0.5);
+      homography_bounds->set_upper_scale(2.0);
+      homography_bounds->set_limit_rotation(0.5);
+      homography_bounds->set_limit_perspective(0.004);
+      homography_bounds->set_min_inlier_coverage(0.1);
+
+      // Relax constraining of homographies.
+      motion_options->set_lin_sim_inlier_threshold(0.02);
+      // Anticipate much higher registration errors.
+      motion_options->set_irls_motion_magnitude_fraction(0.2);
+
+      flow_options->set_verify_long_features(false);
+
+      // Better features.
+      tracking_options->set_adaptive_features_levels(3);
+
+      // Speed.
+      tracking_options->set_use_cv_tracking_algorithm(true);
+      flow_options->set_downsample_mode(
+          RegionFlowComputationOptions::DOWNSAMPLE_BY_SCHEDULE);
+
+      // Less quality features.
+      flow_options->set_absolute_inlier_error_threshold(4);
+      flow_options->set_relative_inlier_error_threshold(0.2);
+
+      motion_options->set_estimation_policy(
+          MotionEstimationOptions::TEMPORAL_LONG_FEATURE_BIAS);
+      motion_options->set_feature_density_normalization(true);
+
+      // TODO: This needs to be activated and then tighten relaxed
+      // thresholds above again.
+      //  motion_options->set_domain_limited_irls_scaling(true);
+
+      motion_options->set_irls_weights_preinitialized(true);
+      motion_options->mutable_irls_initialization()->set_activated(true);
+
+      // More weight towards long feature tracks.
+      motion_options->mutable_long_feature_initialization()->set_activated(
+          true);
+      motion_options->mutable_long_feature_initialization()
+          ->set_min_length_percentile(0.975);
+      motion_options->mutable_long_feature_initialization()
+          ->set_upweight_multiplier(50);
+
+      // 2nd pass, no need to do overlay again.
+      motion_options->set_overlay_detection(false);
+
+      break;
+  }
+}
+
+bool MotionAnalysis::AddFrame(const cv::Mat& frame, int64 timestamp_usec,
+                              RegionFlowFeatureList* feature_list) {
+  return AddFrameWithSeed(frame, timestamp_usec, Homography(), feature_list);
+}
+
+bool MotionAnalysis::AddFrameWithSeed(const cv::Mat& frame,
+                                      int64 timestamp_usec,
+                                      const Homography& initial_transform,
+                                      RegionFlowFeatureList* feature_list) {
+  return AddFrameGeneric(frame, timestamp_usec, initial_transform,
+                         nullptr,  // rejection_transform
+                         nullptr,  // external features
+                         nullptr,  // feature modification function
+                         feature_list);
+}
+
+bool MotionAnalysis::AddFrameGeneric(
+    const cv::Mat& frame, int64 timestamp_usec,
+    const Homography& initial_transform, const Homography* rejection_transform,
+    const RegionFlowFeatureList* external_features,
+    std::function<void(RegionFlowFeatureList*)>* modify_features,
+    RegionFlowFeatureList* output_feature_list) {
+  // Don't check input sizes here, RegionFlowComputation does that based
+  // on its internal options.
+  CHECK(feature_computation_) << "Calls to AddFrame* can NOT be mixed "
+                              << "with AddFeatures";
+
+  // Compute RegionFlow.
+  {
+    MEASURE_TIME << "CALL RegionFlowComputation::AddImage";
+    if (!region_flow_computation_->AddImageWithSeed(frame, timestamp_usec,
+                                                    initial_transform)) {
+      LOG(ERROR) << "Error while computing region flow.";
+      return false;
+    }
+  }
+
+  // Buffer features.
+  std::unique_ptr<RegionFlowFeatureList> feature_list;
+  {
+    MEASURE_TIME << "CALL RegionFlowComputation::RetrieveRegionFlowFeatureList";
+    const bool compute_feature_match_descriptors =
+        compute_feature_descriptors_ && frame_num_ > 0;
+
+    // Determine which region flow should be computed in case multi frame
+    // tracking has been requested.
+    int max_track_index = 0;
+    if (options_.flow_options().tracking_options().tracking_policy() ==
+        TrackingOptions::POLICY_MULTI_FRAME) {
+      max_track_index = std::min(
+          options_.flow_options().tracking_options().multi_frames_to_track() -
+              1,
+          options_.track_index());
+    }
+
+    feature_list.reset(
+        region_flow_computation_->RetrieveMultiRegionFlowFeatureList(
+            std::min(std::max(0, frame_num_ - 1), max_track_index),
+            compute_feature_descriptors_, compute_feature_match_descriptors,
+            &frame,
+            compute_feature_match_descriptors ? prev_frame_.get() : nullptr));
+
+    if (feature_list == nullptr) {
+      LOG(ERROR) << "Error retrieving feature list.";
+      return false;
+    }
+  }
+
+  if (external_features) {
+    constexpr int kTrackIdShift = 1 << 20;
+    for (const auto& external_feat : external_features->feature()) {
+      auto* new_feat = feature_list->add_feature();
+      *new_feat = external_feat;
+      if (new_feat->track_id() >= 0) {
+        new_feat->set_track_id(new_feat->track_id() + kTrackIdShift);
+      }
+    }
+  }
+
+  if (rejection_transform) {
+    RegionFlowFeatureList tmp_list;
+    tmp_list.mutable_feature()->Swap(feature_list->mutable_feature());
+
+    for (const auto& feature : tmp_list.feature()) {
+      const Vector2_f diff =
+          TransformPoint(*rejection_transform, FeatureLocation(feature)) -
+          FeatureMatchLocation(feature);
+      if (diff.Norm() < options_.rejection_transform_threshold()) {
+        *feature_list->add_feature() = feature;
+      }
+    }
+  }
+
+  if (output_feature_list) {
+    *output_feature_list = *feature_list;
+  }
+
+  if (modify_features) {
+    (*modify_features)(feature_list.get());
+  }
+
+  buffer_->EmplaceDatum("features", feature_list.release());
+
+  // Store frame for next call.
+  if (compute_feature_descriptors_) {
+    frame.copyTo(*prev_frame_);
+  }
+
+  ++frame_num_;
+
+  return true;
+}
+
+void MotionAnalysis::AddFeatures(const RegionFlowFeatureList& features) {
+  feature_computation_ = false;
+  buffer_->EmplaceDatum("features", new RegionFlowFeatureList(features));
+
+  ++frame_num_;
+}
+
+void MotionAnalysis::EnqueueFeaturesAndMotions(
+    const RegionFlowFeatureList& features, const CameraMotion& motion) {
+  feature_computation_ = false;
+  CHECK(buffer_->HaveEqualSize({"motion", "features"}))
+      << "Can not be mixed with other Add* calls";
+  buffer_->EmplaceDatum("features", new RegionFlowFeatureList(features));
+  buffer_->EmplaceDatum("motion", new CameraMotion(motion));
+}
+
+cv::Mat MotionAnalysis::GetGrayscaleFrameFromResults() {
+  return region_flow_computation_->GetGrayscaleFrameFromResults();
+}
+
+int MotionAnalysis::GetResults(
+    bool flush, std::vector<std::unique_ptr<RegionFlowFeatureList>>* features,
+    std::vector<std::unique_ptr<CameraMotion>>* camera_motion,
+    std::vector<std::unique_ptr<SalientPointFrame>>* saliency) {
+  MEASURE_TIME << "GetResults";
+
+  const int num_features_lists = buffer_->BufferSize("features");
+  const int num_new_feature_lists = num_features_lists - overlap_start_;
+  CHECK_GE(num_new_feature_lists, 0);
+
+  if (!flush && num_new_feature_lists < options_.estimation_clip_size()) {
+    // Nothing to compute, return.
+    return 0;
+  }
+
+  const bool compute_saliency = options_.compute_motion_saliency();
+  CHECK_EQ(compute_saliency, saliency != nullptr)
+      << "Computing saliency requires saliency output and vice versa";
+
+  // Estimate motions for newly buffered RegionFlowFeatureLists, which also
+  // computes IRLS feature weights for foreground estimation, if needed
+  // (otherwise could be externally added).
+  const int num_motions_to_compute =
+      buffer_->BufferSize("features") - buffer_->BufferSize("motion");
+
+  if (num_motions_to_compute > 0) {
+    std::vector<CameraMotion> camera_motions;
+    std::vector<RegionFlowFeatureList*> feature_lists;
+    for (int k = overlap_start_; k < num_features_lists; ++k) {
+      feature_lists.push_back(
+          buffer_->GetMutableDatum<RegionFlowFeatureList>("features", k));
+    }
+
+    // TODO: Result should be vector of unique_ptr.
+    motion_estimation_->EstimateMotionsParallel(
+        options_.post_irls_smoothing(), &feature_lists, &camera_motions);
+
+    // Add solution to buffer.
+    for (const auto& motion : camera_motions) {
+      buffer_->EmplaceDatum("motion", new CameraMotion(motion));
+    }
+  }
+
+  CHECK(buffer_->HaveEqualSize({"features", "motion"}));
+
+  if (compute_saliency) {
+    ComputeSaliency();
+  }
+
+  return OutputResults(flush, features, camera_motion, saliency);
+}
+
+int MotionAnalysis::OutputResults(
+    bool flush, std::vector<std::unique_ptr<RegionFlowFeatureList>>* features,
+    std::vector<std::unique_ptr<CameraMotion>>* camera_motion,
+    std::vector<std::unique_ptr<SalientPointFrame>>* saliency) {
+  const bool compute_saliency = options_.compute_motion_saliency();
+  CHECK_EQ(compute_saliency, saliency != nullptr)
+      << "Computing saliency requires saliency output and vice versa";
+  CHECK(buffer_->HaveEqualSize({"features", "motion"}));
+
+  // Discard prev. overlap (already output, just used for filtering here).
+  buffer_->DiscardData(buffer_->AllTags(), prev_overlap_start_);
+  prev_overlap_start_ = 0;
+
+  // Output only frames not part of the overlap.
+  const int num_output_frames =
+      std::max(0, buffer_->MaxBufferSize() - (flush ? 0 : overlap_size_));
+
+  if (features) {
+    features->reserve(num_output_frames);
+  }
+  if (camera_motion) {
+    camera_motion->reserve(num_output_frames);
+  }
+  if (saliency) {
+    saliency->reserve(num_output_frames);
+  }
+
+  // Need to retain twice the overlap, for frames in that segment we need
+  // to create a copy.
+  const int new_overlap_start =
+      std::max(0, num_output_frames - (flush ? 0 : overlap_size_));
+
+  // Populate output.
+  for (int k = 0; k < num_output_frames; ++k) {
+    std::unique_ptr<RegionFlowFeatureList> out_features;
+    std::unique_ptr<CameraMotion> out_motion;
+    std::unique_ptr<SalientPointFrame> out_saliency;
+
+    if (k >= new_overlap_start) {
+      // Create copy.
+      out_features.reset(new RegionFlowFeatureList(
+          *buffer_->GetDatum<RegionFlowFeatureList>("features", k)));
+      out_motion.reset(
+          new CameraMotion(*buffer_->GetDatum<CameraMotion>("motion", k)));
+    } else {
+      // Release datum.
+      out_features =
+          buffer_->ReleaseDatum<RegionFlowFeatureList>("features", k);
+      out_motion = buffer_->ReleaseDatum<CameraMotion>("motion", k);
+    }
+
+    // output_saliency is temporary so we never need to buffer it.
+    if (compute_saliency) {
+      out_saliency =
+          buffer_->ReleaseDatum<SalientPointFrame>("output_saliency", k);
+    }
+
+    if (options_.subtract_camera_motion_from_features()) {
+      std::vector<RegionFlowFeatureList*> feature_view{out_features.get()};
+      SubtractCameraMotionFromFeatures({*out_motion}, &feature_view);
+    }
+
+    if (features != nullptr) {
+      features->push_back(std::move(out_features));
+    }
+    if (camera_motion != nullptr) {
+      camera_motion->push_back(std::move(out_motion));
+    }
+    if (saliency != nullptr) {
+      saliency->push_back(std::move(out_saliency));
+    }
+  }
+
+  // Reset for next chunk.
+  prev_overlap_start_ = num_output_frames - new_overlap_start;
+  CHECK_GE(prev_overlap_start_, 0);
+
+  CHECK(buffer_->TruncateBuffer(flush));
+
+  overlap_start_ = buffer_->MaxBufferSize();
+  return num_output_frames;
+}
+
+void MotionAnalysis::RenderResults(const RegionFlowFeatureList& feature_list,
+                                   const CameraMotion& motion,
+                                   const SalientPointFrame* saliency,
+                                   cv::Mat* rendered_results) {
+#ifndef NO_RENDERING
+  CHECK(rendered_results != nullptr);
+  CHECK_EQ(frame_width_, rendered_results->cols);
+  CHECK_EQ(frame_height_, rendered_results->rows);
+
+  const auto viz_options = options_.visualization_options();
+
+  // Visualize flow features if requested.
+  if (viz_options.visualize_region_flow_features()) {
+    cv::Scalar inlier_color(0, 255, 0);
+    cv::Scalar outlier_color(255, 0, 0);
+    if (feature_list.long_tracks()) {
+      long_feature_stream_->AddFeatures(feature_list,
+                                        true,   // Check connectivity.
+                                        true);  // Purge non present ones.
+
+      VisualizeLongFeatureStream(
+          *long_feature_stream_, inlier_color, outlier_color,
+          viz_options.min_long_feature_track(),
+          viz_options.max_long_feature_points(), 1.0f, 1.0f, rendered_results);
+    } else {
+      VisualizeRegionFlowFeatures(feature_list, inlier_color, outlier_color,
+                                  true, 1.0f, 1.0f, rendered_results);
+    }
+  }
+
+  if (saliency != nullptr && viz_options.visualize_salient_points()) {
+    static const cv::Scalar kColor(255, 0, 0);
+    RenderSaliency(*saliency, kColor, viz_options.line_thickness(), false,
+                   rendered_results);
+  }
+
+  if (viz_options.visualize_blur_analysis_region()) {
+    VisualizeBlurAnalysisRegions(rendered_results);
+  }
+
+  if (viz_options.visualize_stats()) {
+    // Output general stats.
+    std::string hud_text = absl::StrFormat(
+        "H-cvg %.2f | H-err %4.2f | Avg.t %3.1f | dx: %+2.1f dy: %+2.1f "
+        "Feat# %4d | %s | ",
+        motion.homography_inlier_coverage(), motion.average_homography_error(),
+        motion.average_magnitude(), motion.translation().dx(),
+        motion.translation().dy(), feature_list.feature_size(),
+        CameraMotionTypeToString(motion));
+    hud_text += CameraMotionFlagToString(motion);
+
+    const float text_scale = frame_width_ * 5.e-4;
+    cv::putText(*rendered_results, hud_text,
+                cv::Point(0.02 * frame_width_, 0.975 * frame_height_),
+                cv::FONT_HERSHEY_SIMPLEX, text_scale, cv::Scalar::all(255),
+                text_scale * 3, CV_AA);
+
+    cv::putText(*rendered_results,
+                absl::StrFormat("%6d", motion.timestamp_usec() / 1000),
+                cv::Point(0.9 * frame_width_, 0.05 * frame_height_),
+                cv::FONT_HERSHEY_SIMPLEX, text_scale, cv::Scalar::all(255),
+                text_scale * 3, CV_AA);
+  }
+#else
+  LOG(FATAL) << "Code stripped out because of NO_RENDERING";
+#endif
+}
+
+void MotionAnalysis::ComputeDenseForeground(
+    const RegionFlowFeatureList& feature_list,
+    const CameraMotion& camera_motion, cv::Mat* foreground_mask) {
+  const auto& foreground_options = options_.foreground_options();
+
+  if (foreground_push_pull_ == nullptr) {
+    foreground_push_pull_.reset(
+        new PushPullFilteringC1(cv::Size(frame_width_, frame_height_),
+                                PushPullFilteringC1::BINOMIAL_5X5, false,
+                                nullptr,    // Gaussian filter weights only.
+                                nullptr,    // No mip map visualizer.
+                                nullptr));  // No weight adjustment.
+  }
+
+  // Determine foreground weights for each features.
+  std::vector<float> foreground_weights;
+  ForegroundWeightsFromFeatures(
+      feature_list, foreground_options.foreground_threshold(),
+      foreground_options.foreground_gamma(),
+      foreground_options.threshold_coverage_scaling() ? &camera_motion
+                                                      : nullptr,
+      &foreground_weights);
+
+  // Setup push pull map (with border). Ensure constructor used the right type.
+  CHECK(foreground_push_pull_->filter_type() ==
+            PushPullFilteringC1::BINOMIAL_5X5 ||
+        foreground_push_pull_->filter_type() ==
+            PushPullFilteringC1::GAUSSIAN_5X5);
+
+  cv::Mat foreground_map(frame_height_ + 4, frame_width_ + 4, CV_32FC2);
+  std::vector<Vector2_f> feature_locations;
+  std::vector<cv::Vec<float, 1>> feature_irls;
+
+  for (int feat_idx = 0; feat_idx < foreground_weights.size(); ++feat_idx) {
+    // Skip marked outliers.
+    if (foreground_weights[feat_idx] == 0) {
+      continue;
+    }
+
+    feature_locations.push_back(
+        FeatureLocation(feature_list.feature(feat_idx)));
+    feature_irls.push_back(cv::Vec<float, 1>(foreground_weights[feat_idx]));
+  }
+
+  // Run push pull.
+  foreground_push_pull_->PerformPushPull(feature_locations, feature_irls, 0.2,
+                                         cv::Point2i(0, 0),
+                                         0,        // Default read out level.
+                                         nullptr,  // Uniform weights.
+                                         nullptr,  // No bilateral term.
+                                         &foreground_map);
+
+  // Convert to grayscale output.
+  foreground_mask->create(frame_height_, frame_width_, CV_8U);
+  for (int i = 0; i < frame_height_; ++i) {
+    const float* src_ptr = foreground_map.ptr<float>(i);
+    uint8* dst_ptr = foreground_mask->ptr<uint8>(i);
+    for (int j = 0; j < frame_width_; ++j) {
+      // Result is in first channel (second is confidence).
+      dst_ptr[j] =
+          std::max(0, std::min(255, static_cast<int>(src_ptr[2 * j] * 255.0f)));
+    }
+  }
+}
+
+void MotionAnalysis::VisualizeDenseForeground(const cv::Mat& foreground_mask,
+                                              cv::Mat* output) {
+  CHECK(output != nullptr);
+  CHECK(foreground_mask.size() == output->size());
+  // Map foreground measure to color (green by default).
+  std::vector<Vector3_f> color_map;
+  if (options_.visualization_options().foreground_jet_coloring()) {
+    JetColoring(1000, &color_map);
+  } else {
+    color_map.resize(1000, Vector3_f(0, 255, 0));
+  }
+
+  auto clamp = [](int value) -> int {
+    return std::max(0, std::min(255, value));
+  };
+
+  // Burn-in alpha compositing.
+  const float alpha = 1.3f;
+  for (int i = 0; i < frame_height_; ++i) {
+    uint8* image_ptr = output->ptr<uint8>(i);
+    const uint8* foreground_ptr = foreground_mask.ptr<uint8>(i);
+
+    for (int j = 0; j < frame_width_; ++j) {
+      const float norm_foreground = foreground_ptr[j] * (1.0 / 255.0f);
+      const float foreground = norm_foreground * alpha;
+      const float alpha_denom = 1.0f / (1.0f + foreground);
+
+      Vector3_f color =
+          color_map[static_cast<int>(norm_foreground * (color_map.size() - 1))];
+
+      image_ptr[3 * j] =
+          clamp((image_ptr[3 * j] + color[0] * foreground) * alpha_denom);
+      image_ptr[3 * j + 1] =
+          clamp((image_ptr[3 * j + 1] + color[1] * foreground) * alpha_denom);
+      image_ptr[3 * j + 2] =
+          clamp((image_ptr[3 * j + 2] + color[2] * foreground) * alpha_denom);
+    }
+  }
+}
+
+void MotionAnalysis::VisualizeBlurAnalysisRegions(cv::Mat* input_view) {
+  CHECK(input_view != nullptr);
+
+  cv::Mat intensity;
+  cv::cvtColor(*input_view, intensity, CV_RGB2GRAY);
+  cv::Mat corner_values;
+  cv::cornerMinEigenVal(intensity, corner_values, 3);
+
+  cv::Mat mask;
+  region_flow_computation_->ComputeBlurMask(*input_view, &corner_values, &mask);
+
+  cv::Mat mask_3c;
+  cv::cvtColor(mask, mask_3c, CV_GRAY2RGB);
+  cv::addWeighted(*input_view, 0.5, mask_3c, 0.5, -128, *input_view);
+}
+
+void MotionAnalysis::ComputeSaliency() {
+  MEASURE_TIME << "Saliency computation.";
+  CHECK_EQ(overlap_start_, buffer_->BufferSize("saliency"));
+
+  const int num_features_lists = buffer_->BufferSize("features");
+
+  // Compute saliency only for newly buffered RegionFlowFeatureLists.
+  for (int k = overlap_start_; k < num_features_lists; ++k) {
+    std::vector<float> foreground_weights;
+    ForegroundWeightsFromFeatures(
+        *buffer_->GetDatum<RegionFlowFeatureList>("features", k),
+        options_.foreground_options().foreground_threshold(),
+        options_.foreground_options().foreground_gamma(),
+        options_.foreground_options().threshold_coverage_scaling()
+            ? buffer_->GetDatum<CameraMotion>("motion", k)
+            : nullptr,
+        &foreground_weights);
+
+    std::unique_ptr<SalientPointFrame> saliency(new SalientPointFrame());
+    motion_saliency_->SaliencyFromFeatures(
+        *buffer_->GetDatum<RegionFlowFeatureList>("features", k),
+        &foreground_weights, saliency.get());
+
+    buffer_->AddDatum("saliency", std::move(saliency));
+  }
+
+  CHECK(buffer_->HaveEqualSize({"features", "motion", "saliency"}));
+
+  // Clear output saliency and copy from saliency.
+  buffer_->DiscardDatum("output_saliency",
+                        buffer_->BufferSize("output_saliency"));
+
+  for (int k = 0; k < buffer_->BufferSize("saliency"); ++k) {
+    std::unique_ptr<SalientPointFrame> copy(new SalientPointFrame());
+    *copy = *buffer_->GetDatum<SalientPointFrame>("saliency", k);
+    buffer_->AddDatum("output_saliency", std::move(copy));
+  }
+
+  // Create view.
+  std::vector<SalientPointFrame*> saliency_view =
+      buffer_->GetMutableDatumVector<SalientPointFrame>("output_saliency");
+
+  // saliency_frames are filtered after this point and ready for output.
+  if (options_.select_saliency_inliers()) {
+    motion_saliency_->SelectSaliencyInliers(&saliency_view, false);
+  }
+
+  if (options_.filter_saliency()) {
+    motion_saliency_->FilterMotionSaliency(&saliency_view);
+  }
+}
+
+}  // namespace mediapipe
diff --git a/mediapipe/util/tracking/motion_analysis.h b/mediapipe/util/tracking/motion_analysis.h
new file mode 100644
index 000000000..d2ed44d06
--- /dev/null
+++ b/mediapipe/util/tracking/motion_analysis.h
@@ -0,0 +1,229 @@
+// Copyright 2019 The MediaPipe Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Module for performing motion analysis on a video stream including computing
+// locally filtered (robust) feature tracking, camera motion estimation, and
+// dense foreground saliency estimation.
+// Module buffers frames internally (using an adaptive overlap to achieve
+// temporal consistency):
+//
+// Usage example:
+//
+// MotionAnalysisOptions options();
+// // Should be always be a multiple 8 for optimal parallel performance
+// options.set_estimation_clip_size(16);
+// MotionAnalysis motion_analysis(options, 960, 540);
+
+// std::vector<cv::Mat> input_frames(N);
+// // Define output vectors.
+// std::vector<std::unique_ptr<RegionFlowFeatureList>> features;
+// std::vector<std::unique_ptr<CameraMotion>> camera_motion;
+// std::vector<std::unique_ptr<SalientPointFrame>> saliency;
+// std::vector<cv::Mat> rendered_results);   // Should to be initialized with
+//                                           // frame.
+//
+// for (int k = 0; k < N; ++k) {
+//   motion_analysis.AddFrame(input_frames[k], 0);
+//   // Outputs results, if new ones are available.
+//   // Output will be all of the same lengths (Length returned by function).
+//   if (motion_analysis.GetResults(k + 1 == N,         // Flush, force output.
+//                                  &features,
+//                                  &camera_motion,
+//                                  &saliency) > 0) {            // Optional.
+//      // Optionally render at i'th frame.
+//      motion_analysis.RenderResults(*features[i],
+//                                    *camera_motion[i],
+//                                    saliency[i].get(),
+//                                    &rendered_results[i]);
+//
+//      // Output results...
+//    }
+// }
+
+#ifndef MEDIAPIPE_UTIL_TRACKING_MOTION_ANALYSIS_H_
+#define MEDIAPIPE_UTIL_TRACKING_MOTION_ANALYSIS_H_
+
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "mediapipe/framework/port/opencv_core_inc.h"
+#include "mediapipe/util/tracking/camera_motion.pb.h"
+#include "mediapipe/util/tracking/motion_analysis.pb.h"
+#include "mediapipe/util/tracking/motion_estimation.h"
+#include "mediapipe/util/tracking/motion_estimation.pb.h"
+#include "mediapipe/util/tracking/motion_saliency.h"
+#include "mediapipe/util/tracking/push_pull_filtering.h"
+#include "mediapipe/util/tracking/region_flow.h"
+#include "mediapipe/util/tracking/region_flow.pb.h"
+#include "mediapipe/util/tracking/region_flow_computation.h"
+#include "mediapipe/util/tracking/streaming_buffer.h"
+
+namespace mediapipe {
+
+typedef PushPullFiltering<1, FilterWeightMultiplierOne> PushPullFlowC1;
+
+class MotionAnalysis {
+ public:
+  MotionAnalysis(const MotionAnalysisOptions& options, int frame_width,
+                 int frame_height);
+  ~MotionAnalysis() = default;
+  MotionAnalysis(const MotionAnalysis&) = delete;
+  MotionAnalysis& operator=(const MotionAnalysis&) = delete;
+
+  // Call with every frame. Timestamp is optional (set to zero if not needed).
+  // Optionally outputs list of features extracted from this frame.
+  // Returns true on success.
+  bool AddFrame(const cv::Mat& frame, int64 timestamp_usec,
+                RegionFlowFeatureList* feature_list = nullptr);
+
+  // Same as above, but uses specified initial transform to seed
+  // feature locations.
+  bool AddFrameWithSeed(const cv::Mat& frame, int64 timestamp_usec,
+                        const Homography& initial_transform,
+                        RegionFlowFeatureList* feature_list = nullptr);
+
+  // Generic function to perform motion analysis on the passed frame,
+  // with initial_transform used as seed.
+  // Optionally accepts external_feature to be added to the computed ones,
+  // and to reject all features that do not agree with the rejection transform
+  // within a specified threshold (rejection_transform_threshold in options).
+  // Also allows to modify feature locations before motion estimation by
+  // supplying appropiate callback - this is invoked *after* the rejection
+  // transform.
+  // Returns list of features extracted from this frame, *before* any
+  // modification is applied. To yield modified features, simply
+  // apply modify_features function to returned result.
+  bool AddFrameGeneric(
+      const cv::Mat& frame, int64 timestamp_usec,
+      const Homography& initial_transform,
+      const Homography* rejection_transform = nullptr,
+      const RegionFlowFeatureList* external_features = nullptr,
+      std::function<void(RegionFlowFeatureList*)>* modify_features = nullptr,
+      RegionFlowFeatureList* feature_list = nullptr);
+
+  // Instead of tracking passed frames, uses result directly as supplied by
+  // features. Can not be mixed with above AddFrame* calls.
+  void AddFeatures(const RegionFlowFeatureList& features);
+
+  // Instead of tracking and computing camera motions, simply adds precomputed
+  // features and camera motions to the internal buffers. Can not be mixed
+  // with above Add* calls.
+  // This is useful for just computing saliency via GetResults.
+  void EnqueueFeaturesAndMotions(const RegionFlowFeatureList& features,
+                                 const CameraMotion& motion);
+
+  // Returns motion results (features, camera motions and saliency, all
+  // optional).
+  // Call after every AddFrame for optimal performance.
+  // Returns number of available results. Note, this call with often return
+  // zero, and only return results (multiple in this case) when chunk boundaries
+  // are reached. The actual number returned depends on various smoothing
+  // settings for saliency and features.
+  // Set flush to true, to force output of all results (e.g. when the end of the
+  // video stream is reached).
+  // Note: Passing a non-zero argument for saliency, requires
+  // MotionAnalysisOptions::compute_motion_saliency to be set and
+  // vice versa. (CHECKED)
+  int GetResults(
+      bool flush,  // Forces output.
+      std::vector<std::unique_ptr<RegionFlowFeatureList>>* features = nullptr,
+      std::vector<std::unique_ptr<CameraMotion>>* camera_motion = nullptr,
+      std::vector<std::unique_ptr<SalientPointFrame>>* saliency = nullptr);
+
+  // Exposes the grayscale image frame from the most recently created region
+  // flow tracking data.
+  cv::Mat GetGrayscaleFrameFromResults();
+
+  // Renders features and saliency to rendered_results based on
+  // VisualizationOptions onto pre-initialized rendered_results (in most cases
+  // you want to create a copy of the input frame).
+  // NOTE:
+  // If features are requested to be rendered, this function should be
+  // called serially with each frame, or wrong feature location might be
+  // rendered.
+  void RenderResults(const RegionFlowFeatureList& features,
+                     const CameraMotion& camera_motion,
+                     const SalientPointFrame* saliency,  // Optional.
+                     cv::Mat* rendered_results);
+
+  // Determines dense foreground mask from features.
+  // Returns foreground mask as CV_8U image, indicating propability of
+  // foreground.
+  void ComputeDenseForeground(const RegionFlowFeatureList& feature_list,
+                              const CameraMotion& camera_motion,
+                              cv::Mat* foreground_mask);
+
+  // Overlays foreground mask over output as green burn in, or with
+  // jet_coloring (based on options).
+  void VisualizeDenseForeground(const cv::Mat& foreground_mask,
+                                cv::Mat* output);
+
+  // Masks out regions from input that are not used for blur analysis.
+  void VisualizeBlurAnalysisRegions(cv::Mat* input);
+
+  // Number of frames/features added so far.
+  int NumFrames() const { return frame_num_; }
+
+ private:
+  void InitPolicyOptions();
+
+  // Compute saliency from buffered features and motions.
+  void ComputeSaliency();
+
+  // Outputs computed results from the streaming buffer to the optional
+  // output args. Also performs overlap handling.
+  int OutputResults(
+      bool flush,  // Forces output.
+      std::vector<std::unique_ptr<RegionFlowFeatureList>>* features = nullptr,
+      std::vector<std::unique_ptr<CameraMotion>>* camera_motion = nullptr,
+      std::vector<std::unique_ptr<SalientPointFrame>>* saliency = nullptr);
+
+  MotionAnalysisOptions options_;
+  int frame_width_ = 0;
+  int frame_height_ = 0;
+  int frame_num_ = 0;
+
+  // Internal objects for actual motion analysis.
+  std::unique_ptr<RegionFlowComputation> region_flow_computation_;
+  std::unique_ptr<MotionEstimation> motion_estimation_;
+  std::unique_ptr<MotionSaliency> motion_saliency_;
+  std::unique_ptr<PushPullFlowC1> foreground_push_pull_;
+  // Used for visualization if long feature tracks are present.
+  std::unique_ptr<LongFeatureStream> long_feature_stream_;
+
+  std::unique_ptr<StreamingBuffer> buffer_;
+
+  // Indicates where previous overlap in above buffers starts (earlier data is
+  // just to improve smoothing).
+  int prev_overlap_start_ = 0;
+  // Indicates where actual overlap starts (data after this has not been
+  // output).
+  int overlap_start_ = 0;
+
+  // Buffers previous frame.
+  std::unique_ptr<cv::Mat> prev_frame_;
+
+  bool compute_feature_descriptors_ = false;
+
+  // Amount of overlap between clips. Determined from saliency smoothing
+  // and filtering options.
+  int overlap_size_ = 0;
+
+  bool feature_computation_ = true;
+};
+
+}  // namespace mediapipe
+
+#endif  // MEDIAPIPE_UTIL_TRACKING_MOTION_ANALYSIS_H_
diff --git a/mediapipe/util/tracking/motion_analysis.proto b/mediapipe/util/tracking/motion_analysis.proto
new file mode 100644
index 000000000..97d6d1d7b
--- /dev/null
+++ b/mediapipe/util/tracking/motion_analysis.proto
@@ -0,0 +1,154 @@
+// Copyright 2019 The MediaPipe Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+syntax = "proto2";
+
+package mediapipe;
+
+import "mediapipe/util/tracking/motion_estimation.proto";
+import "mediapipe/util/tracking/motion_saliency.proto";
+import "mediapipe/util/tracking/region_flow_computation.proto";
+
+// Settings for MotionAnalysis. This class computes sparse, locally consistent
+// flow (referred to as region flow), camera motions, and foreground saliency
+// (i.e. likely foreground objects moving different from the background).
+// Next tag: 16
+message MotionAnalysisOptions {
+  // Pre-configured policies for MotionAnalysis.
+  // For general use, it is recommended to select an appropiate policy
+  // instead of customizing flow and motion options by hand.
+  // Policies are being kept up to date with appropiate settings.
+  enum AnalysisPolicy {
+    // Default legacy options. Effectivley no op.
+    ANALYSIS_POLICY_LEGACY = 0;
+
+    // Use for video.
+    ANALYSIS_POLICY_VIDEO = 1;
+
+    // Use for video on mobile.
+    ANALYSIS_POLICY_VIDEO_MOBILE = 2;
+
+    // Use if applied to camera stream on mobile, e.g.
+    // low latency and high throughput.
+    // ASSUMES DOWNSAMPLED INPUT, e.g. from GPU.
+    ANALYSIS_POLICY_CAMERA_MOBILE = 3;
+
+    // Use for sped up video / hyperlapse when adding frames with seeds
+    // and rejection transforms. Mostly ups temporal consistency weights
+    // and relaxes stability constraints.
+    // Only recommended to be used as second pass after initial MotionAnalysis
+    // and FrameSelection.
+    ANALYSIS_POLICY_HYPERLAPSE = 4;
+  }
+
+  optional AnalysisPolicy analysis_policy = 14
+      [default = ANALYSIS_POLICY_LEGACY];
+
+  // Options for the actual motion stabilization
+  // (in order of object usage).
+  optional RegionFlowComputationOptions flow_options = 1;
+  optional MotionEstimationOptions motion_options = 2;
+  optional MotionSaliencyOptions saliency_options = 3;
+
+  // Clip-size used for (parallelized) motion estimation.
+  optional int32 estimation_clip_size = 4 [default = 16];
+
+  // If set, camera motion is subtracted from features before output.
+  // Effectively outputs, residual motion w.r.t. background.
+  optional bool subtract_camera_motion_from_features = 5 [default = false];
+
+  // If flow_options().tracking_options().tracking_policy() equals
+  // POLICY_MULTI_FRAME, this flag indicates which RegionFlowFeatureList to use.
+  // Specifically, for frame C, we use the motion from C to C - 1 - track_index.
+  optional int32 track_index = 6 [default = 0];
+
+  // If set, compute motion saliency (regions of moving foreground).
+  optional bool compute_motion_saliency = 7 [default = false];
+
+  // Selects saliency inliers (only saliency locations with sufficient
+  // spatial and temporal support are kept).
+  // Only applied when compute_motion_saliency is set.
+  optional bool select_saliency_inliers = 8 [default = true];
+
+  // Performs spatio-temporal filtering of extracted foreground saliency. If
+  // used with above selection of saliency inliers, filtering is performed
+  // *after* inlier selection.
+  // Only applied when compute_motion_saliency is set.
+  optional bool filter_saliency = 9 [default = true];
+
+  // If set, irls weights of motion estimation are spatio-temporally smoothed
+  // after model estimation.
+  optional bool post_irls_smoothing = 10 [default = false];
+
+  // If a rejection_transform is passed to AddFrameGeneric, features that
+  // do not agree with the transform within below threshold are removed.
+  optional float rejection_transform_threshold = 13 [default = 20.0];
+
+  // Adapts visualization for rendered_results when passed to GetResults.
+  message VisualizationOptions {
+    // Visualizes tracked region flow features, colored w.r.t. fitting error.
+    optional bool visualize_region_flow_features = 1 [default = true];
+
+    // Visualizes salient points. Only applicable is compute_motion_saliency is
+    // set to true.
+    optional bool visualize_salient_points = 2 [default = false];
+
+    // Line thickness of ellipse when rendering salient points.
+    optional int32 line_thickness = 5 [default = 4];
+
+    // Instead of green burn in uses jet coloring to indicate magnitude of
+    // foreground motion.
+    optional bool foreground_jet_coloring = 3 [default = false];
+
+    // If set, only keeps masks of pixels that is used for blur analysis, rest
+    // is set to zero.
+    optional bool visualize_blur_analysis_region = 4 [default = false];
+
+    optional bool visualize_stats = 6 [default = true];
+
+    // Only long feature tracks with specified minimum length are rendered.
+    // Set to zero to consider all tracks.
+    optional int32 min_long_feature_track = 7 [default = 0];
+
+    // Only the last N points of a long feature track are rendered. Set to zero
+    // to render all points.
+    optional int32 max_long_feature_points = 8 [default = 0];
+  }
+
+  optional VisualizationOptions visualization_options = 11;
+
+  // Describes how to compute foreground from features.
+  message ForegroundOptions {
+    // Indicates the *inverse* registration error (i.e. the irls weight)
+    // that is deemed a complete inlier.
+    // Weights in the interval [0, foreground_threshold] (corresponding to
+    // pixel errors in the interval [1 / foreground_threshold, inf])
+    // are mappend to 1 - [0, 1], i.e. foreground threshold is mapped to zero
+    // with weights below the threshold being assigned values > 0.
+    // Therefore, larger values will increase amount of detected foreground
+    // as well as noise.
+    optional float foreground_threshold = 1 [default = 0.5];
+
+    // By using foreground_gamma < 1.0 you can increase resolution of small
+    // foreground motion at the expense of the resolution of large foreground
+    // motions.
+    optional float foreground_gamma = 2 [default = 1.0];
+
+    // Threshold is scaled by coverage, i.e. for frames with large registration
+    // error less forground is visualized.
+    optional bool threshold_coverage_scaling = 3 [default = true];
+  }
+
+  optional ForegroundOptions foreground_options = 12;
+}
diff --git a/mediapipe/util/tracking/motion_estimation.cc b/mediapipe/util/tracking/motion_estimation.cc
new file mode 100644
index 000000000..311424dd8
--- /dev/null
+++ b/mediapipe/util/tracking/motion_estimation.cc
@@ -0,0 +1,6009 @@
+// Copyright 2019 The MediaPipe Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "mediapipe/util/tracking/motion_estimation.h"
+
+#include <algorithm>
+#include <cmath>
+#include <cstddef>
+#include <memory>
+#include <numeric>
+#include <random>
+#include <string>
+#include <unordered_set>
+#include <utility>
+#include <vector>
+
+#include "Eigen/Core"
+#include "Eigen/Dense"
+#include "Eigen/QR"
+#include "Eigen/SVD"
+#include "absl/container/node_hash_map.h"
+#include "absl/strings/str_cat.h"
+#include "mediapipe/framework/port/logging.h"
+#include "mediapipe/util/tracking/camera_motion.h"
+#include "mediapipe/util/tracking/measure_time.h"
+#include "mediapipe/util/tracking/motion_models.h"
+#include "mediapipe/util/tracking/motion_models.pb.h"
+#include "mediapipe/util/tracking/parallel_invoker.h"
+#include "mediapipe/util/tracking/region_flow.h"
+#include "mediapipe/util/tracking/region_flow.pb.h"
+
+namespace mediapipe {
+
+constexpr float kIrlsEps = 1e-4f;
+constexpr float kOutlierIRLSWeight = 1e-10f;
+constexpr float kMaxCondition = 1e30f;
+
+constexpr float kPrecision = 0.1f;
+
+typedef RegionFlowFrame::RegionFlow RegionFlow;
+typedef RegionFlowFeature Feature;
+
+namespace {
+
+void GenericFit(
+    const RegionFlowFeatureList& features,
+    const std::function<bool(MotionEstimation*, RegionFlowFeatureList*,
+                             CameraMotion*)>& est_func,
+    CameraMotion* motion) {
+  MotionEstimationOptions options;
+  options.set_irls_rounds(1);
+  options.set_use_exact_homography_estimation(false);
+  options.set_use_highest_accuracy_for_normal_equations(false);
+
+  MotionEstimation motion_est(options, features.frame_width(),
+                              features.frame_height());
+
+  RegionFlowFeatureList local = features;
+  NormalizeRegionFlowFeatureList(&local);
+  est_func(&motion_est, &local, motion);
+}
+
+}  // namespace.
+
+TranslationModel FitTranslationModel(const RegionFlowFeatureList& features) {
+  CameraMotion motion;
+  GenericFit(features, &MotionEstimation::EstimateTranslationModel, &motion);
+  return motion.translation();
+}
+
+LinearSimilarityModel FitLinearSimilarityModel(
+    const RegionFlowFeatureList& features) {
+  CameraMotion motion;
+  GenericFit(features, &MotionEstimation::EstimateLinearSimilarityModel,
+             &motion);
+  return motion.linear_similarity();
+}
+
+AffineModel FitAffineModel(const RegionFlowFeatureList& features) {
+  CameraMotion motion;
+  GenericFit(features, &MotionEstimation::EstimateAffineModel, &motion);
+  return motion.affine();
+}
+
+Homography FitHomography(const RegionFlowFeatureList& features) {
+  CameraMotion motion;
+  GenericFit(features, &MotionEstimation::EstimateHomography, &motion);
+  return motion.homography();
+}
+
+MixtureHomography FitMixtureHomography(const RegionFlowFeatureList& features) {
+  CameraMotion motion;
+  GenericFit(features, &MotionEstimation::EstimateMixtureHomography, &motion);
+  return motion.mixture_homography();
+}
+
+// Records inlier state across frames.
+// Specifically records spatial position and average magnitude of inliers
+// over time (motion prior).
+// New sample points can be weighted w.r.t. their agreement of spatial inlier
+// locations and motion prior.
+class InlierMask {
+ public:
+  // Initialize mask from options for specified frame domain.
+  InlierMask(const MotionEstimationOptions::IrlsMaskOptions& options,
+             int feature_mask_size, int frame_width, int frame_height)
+      : options_(options),
+        frame_width_(frame_width),
+        frame_height_(frame_height) {
+    const int num_bins = feature_mask_size * feature_mask_size;
+    mask_.resize(num_bins);
+    update_mask_.resize(num_bins);
+
+    const LinearSimilarityModel norm_model =
+        LinearSimilarityAdapter::NormalizationTransform(frame_width_,
+                                                        frame_height_);
+    const Vector2_f domain = LinearSimilarityAdapter::TransformPoint(
+        norm_model, Vector2_f(frame_width_, frame_height_));
+    denom_x_ = 1.0f / domain.x();
+    denom_y_ = 1.0f / domain.y();
+    base_score_ = options_.base_score();
+  }
+
+  // Resets mask to all inliers.
+  void InitMask() {
+    mask_.assign(mask_.size(), 1.0f);
+    translation_prior_ = 0;
+  }
+
+  // Applies update mask to mask.
+  void UpdateMask() { update_mask_.swap(mask_); }
+
+  void UpdateTranslation(Vector2_f translation) {
+    const float alpha = options_.translation_blend_alpha() * translation_prior_;
+    translation_ = translation_ * alpha + (1.0 - alpha) * translation;
+    translation_prior_ = std::min(
+        1.0f, translation_prior_ + options_.translation_prior_increase());
+  }
+
+  // Initialize update mask from current mask, by decaying each element.
+  void InitUpdateMask() {
+    const float decay = options_.decay();
+    for (int k = 0; k < mask_.size(); ++k) {
+      update_mask_[k] = mask_[k] * decay;
+    }
+  }
+
+  // Returns inlier score for bin index.
+  // Can be > 1, as we take the best inlier score compared to other iterations,
+  // only relative values matter.
+  float GetInlierScore(int idx) const { return base_score_ + mask_[idx]; }
+
+  // Increases inlier score at bin idx.
+  void RecordInlier(int idx, float feature_weight) {
+    update_mask_[idx] = std::min(
+        1.0f, update_mask_[idx] + feature_weight * options_.inlier_score());
+  }
+
+  // Multiplies passed motion prior with a weight within [0, 1] for each
+  // feature point describing how well feature's motion agrees with previously
+  // estimated translation.
+  void MotionPrior(const RegionFlowFeatureList& feature_list,
+                   std::vector<float>* motion_prior) {
+    CHECK(motion_prior != nullptr);
+    const int num_features = feature_list.feature_size();
+    CHECK_EQ(num_features, motion_prior->size());
+
+    // Return, if prior is too low.
+    const float kMinTranslationPrior = 0.5f;
+    if (translation_prior_ < kMinTranslationPrior) {
+      motion_prior->assign(num_features, 1.0f);
+      return;
+    }
+    const float prev_magnitude = translation_.Norm();
+
+    CHECK_EQ(num_features, motion_prior->size());
+    const float inv_prev_magnitude =
+        prev_magnitude < options_.min_translation_norm()
+            ? (1.0f / options_.min_translation_norm())
+            : (1.0f / prev_magnitude);
+    for (int k = 0; k < num_features; ++k) {
+      const Vector2_f flow = FeatureFlow(feature_list.feature(k));
+      const float weight =
+          base_score_ + std::max<float>(0, 1.0f - (flow - translation_).Norm() *
+                                                      inv_prev_magnitude);
+      (*motion_prior)[k] *= weight;
+    }
+  }
+
+ private:
+  MotionEstimationOptions::IrlsMaskOptions options_;
+  int frame_width_;
+  int frame_height_;
+  float denom_x_;
+  float denom_y_;
+  float base_score_;
+
+  Vector2_f translation_;
+  float translation_prior_ = 0;
+
+  std::vector<float> mask_;
+  std::vector<float> update_mask_;
+};
+
+// Local storage for MotionEstimation within each thread to minimize
+// allocations.
+class MotionEstimationThreadStorage {
+ public:
+  MotionEstimationThreadStorage(const MotionEstimationOptions& options,
+                                const MotionEstimation* motion_estimation,
+                                int max_feature_guess = 0) {
+    const int coverage_grid_size = options.coverage_grid_size();
+    grid_coverage_irls_mask_.resize(coverage_grid_size * coverage_grid_size);
+    const int max_features = max_feature_guess > 0 ? max_feature_guess : 4000;
+
+    // Allocate bins to 150% of expected features.
+    const int features_per_bin =
+        max_features * 1.5f / grid_coverage_irls_mask_.size();
+
+    for (auto& mask : grid_coverage_irls_mask_) {
+      mask.reserve(features_per_bin);
+    }
+
+    // Compute gaussian weights for grid coverage.
+    const float scaled_width =
+        1.0f / motion_estimation->normalized_domain_.x() * coverage_grid_size;
+    const float scaled_height =
+        1.0f / motion_estimation->normalized_domain_.y() * coverage_grid_size;
+
+    const float inv_scaled_width = 1.0f / scaled_width;
+    const float inv_scaled_height = 1.0f / scaled_height;
+
+    // Compute gaussian weights for grid cells.
+    RegionFlowFeatureList grid_cell_features;
+    for (int y = 0; y < coverage_grid_size; ++y) {
+      for (int x = 0; x < coverage_grid_size; ++x) {
+        RegionFlowFeature* feature = grid_cell_features.add_feature();
+        feature->set_x((x + 0.5f) * inv_scaled_width);
+        feature->set_y((y + 0.5f) * inv_scaled_height);
+      }
+    }
+
+    motion_estimation->GetHomographyIRLSCenterWeights(grid_cell_features,
+                                                      &grid_cell_weights_);
+  }
+
+  MotionEstimationThreadStorage(const MotionEstimationThreadStorage&) = delete;
+  MotionEstimationThreadStorage& operator=(
+      const MotionEstimationThreadStorage&) = delete;
+
+  std::vector<std::vector<float>>* EmptyGridCoverageIrlsMask() {
+    for (auto& mask : grid_coverage_irls_mask_) {
+      mask.clear();
+    }
+    return &grid_coverage_irls_mask_;
+  }
+
+  const std::vector<float>& GridCoverageInitializationWeights() const {
+    return grid_cell_weights_;
+  }
+
+  // Creates copy of current thread storage, caller takes ownership.
+  std::unique_ptr<MotionEstimationThreadStorage> Copy() const {
+    std::unique_ptr<MotionEstimationThreadStorage> copy(
+        new MotionEstimationThreadStorage);
+    copy->grid_coverage_irls_mask_ = grid_coverage_irls_mask_;
+    copy->grid_cell_weights_ = grid_cell_weights_;
+    return copy;
+  }
+
+ private:
+  // Empty constructor for Copy.
+  MotionEstimationThreadStorage() {}
+
+  std::vector<std::vector<float>> grid_coverage_irls_mask_;
+  std::vector<float> grid_cell_weights_;
+};
+
+// Holds all the data for a clip (multiple frames) of single-frame tracks.
+struct MotionEstimation::SingleTrackClipData {
+  // Features to be processed. Can be set to point to external data, or
+  // point to internal storage via InitializeFromInternalStorage.
+  // Stores one RegionFlowFeatureList pointer per frame.
+  std::vector<RegionFlowFeatureList*>* feature_lists = nullptr;
+
+  // Camera motions to be output. Can be set to point to external data, or
+  // point to internal storage via InitializeFromInternalStorage.
+  // Stores one camera motion per frame.
+  std::vector<CameraMotion>* camera_motions = nullptr;
+
+  // Difference in frames that features and motions are computed for.
+  int frame_diff = 1;
+
+  // Prior weights for each frame.
+  std::vector<PriorFeatureWeights> prior_weights;
+
+  // Optional inlier mask. Used across the whole clip.
+  InlierMask* inlier_mask = nullptr;
+
+  // Weights to be passed to each stage of motion estimation.
+  std::vector<std::vector<float>> irls_weight_input;
+
+  // Indicates if weights in above vectors are uniform (to avoid testing on
+  // each loop iteration).
+  std::vector<bool> uniform_weight_input;
+
+  // Indicates if non-decaying full prior should be used
+  // (always bias towards initialization).
+  std::vector<bool> use_full_prior;
+
+  // Specific weights for homography.
+  std::vector<std::vector<float>> homog_irls_weight_input;
+
+  // Storage for earlier weights, in case estimated model is unstable.
+  std::vector<std::vector<float>>* irls_weight_backup = nullptr;
+
+  // If above feature_lists and camera_motions are not a view on external
+  // data, storage holds underlying data.
+  std::vector<RegionFlowFeatureList> feature_storage;
+  std::vector<RegionFlowFeatureList*> feature_view;
+  std::vector<CameraMotion> motion_storage;
+  std::vector<std::vector<float>> irls_backup_storage;
+
+  // Call after populating feature_storage and motion_storage with data, to
+  // initialize feature_lists and camera_motions.
+  void InitializeFromInternalStorage() {
+    feature_view.reserve(feature_storage.size());
+
+    for (auto& feature_list : feature_storage) {
+      feature_view.push_back(&feature_list);
+    }
+
+    feature_lists = &feature_view;
+    camera_motions = &motion_storage;
+  }
+
+  // Call after initializing feature_lists, to allocate storage for each
+  // feature's irls weight. If weight_backup is set, allocates storage
+  // to backup and reset irls weights.
+  void AllocateIRLSWeightStorage(bool weight_backup) {
+    CHECK(feature_lists != nullptr);
+    const int num_frames = feature_lists->size();
+    if (weight_backup) {
+      irls_weight_backup = &irls_backup_storage;
+    }
+
+    if (num_frames == 0) {
+      return;
+    }
+
+    irls_weight_input.resize(num_frames);
+    uniform_weight_input.resize(num_frames, true);
+    use_full_prior.resize(num_frames, false);
+    homog_irls_weight_input.resize(num_frames);
+
+    if (weight_backup) {
+      irls_weight_backup->resize(num_frames);
+    }
+
+    for (int k = 0; k < num_frames; ++k) {
+      const int num_features = (*feature_lists)[k]->feature_size();
+      if (num_features != 0) {
+        irls_weight_input[k].reserve(num_features);
+        homog_irls_weight_input[k].reserve(num_features);
+      }
+    }
+  }
+
+  // Returns number of frames in this clip.
+  int num_frames() const {
+    DCHECK(feature_lists);
+    return feature_lists->size();
+  }
+
+  // Returns irls weight input depending on the passed motion type.
+  std::vector<std::vector<float>>& IrlsWeightInput(const MotionType& type) {
+    switch (type) {
+      case MODEL_HOMOGRAPHY:
+        return homog_irls_weight_input;
+      default:
+        return irls_weight_input;
+    }
+  }
+
+  // Checks that SingleTrackClipData is properly initialized.
+  void CheckInitialization() const {
+    CHECK(feature_lists != nullptr);
+    CHECK(camera_motions != nullptr);
+    CHECK_EQ(feature_lists->size(), camera_motions->size());
+    if (feature_lists->empty()) {
+      return;
+    }
+
+    CHECK_EQ(num_frames(), irls_weight_input.size());
+    CHECK_EQ(num_frames(), homog_irls_weight_input.size());
+    if (irls_weight_backup) {
+      CHECK_EQ(num_frames(), irls_weight_backup->size());
+    }
+
+    for (int k = 0; k < num_frames(); ++k) {
+      const int num_features = (*feature_lists)[k]->feature_size();
+      CHECK_EQ(num_features, irls_weight_input[k].size());
+      CHECK_EQ(num_features, homog_irls_weight_input[k].size());
+    }
+  }
+
+  // Prepares PriorFeatureWeights structure for usage.
+  void SetupPriorWeights(int irls_rounds) {
+    prior_weights.resize(num_frames(), PriorFeatureWeights(irls_rounds));
+    for (int k = 0; k < num_frames(); ++k) {
+      prior_weights[k].use_full_prior = use_full_prior[k];
+    }
+  }
+
+  // Clears the specified flag from each camera motion.
+  void ClearFlagFromMotion(int flag) {
+    for (auto& camera_motion : *camera_motions) {
+      camera_motion.set_flags(camera_motion.flags() & ~flag);
+    }
+  }
+
+  // Resets feature weights from backed up ones if type is <=
+  // max_unstable_type.
+  void RestoreWeightsFromBackup(CameraMotion::Type max_unstable_type) {
+    if (irls_weight_backup == nullptr) {
+      return;
+    }
+
+    const int num_frames = feature_lists->size();
+    for (int k = 0; k < num_frames; ++k) {
+      if (camera_motions->at(k).type() <= max_unstable_type) {
+        SetRegionFlowFeatureIRLSWeights(irls_weight_backup->at(k),
+                                        feature_lists->at(k));
+      }
+    }
+  }
+};
+
+MotionEstimation::MotionEstimation(const MotionEstimationOptions& options,
+                                   int frame_width, int frame_height)
+    : frame_width_(frame_width), frame_height_(frame_height) {
+  normalization_transform_ = LinearSimilarityAdapter::NormalizationTransform(
+      frame_width_, frame_height_);
+
+  inv_normalization_transform_ =
+      LinearSimilarityAdapter::Invert(normalization_transform_);
+
+  // Cap domain to express IRLS errors to 640x360 (format used
+  // to calibrate thresholds on dataset).
+  const int max_irls_width = frame_width_ > frame_height ? 640 : 360;
+  const int max_irls_height = frame_width_ > frame_height ? 360 : 640;
+  const int irls_width = std::min(max_irls_width, frame_width_);
+  const int irls_height = std::min(max_irls_height, frame_height_);
+  irls_transform_ = ModelInvert(
+      LinearSimilarityAdapter::NormalizationTransform(irls_width, irls_height));
+  if (!options.domain_limited_irls_scaling()) {
+    // Fallback to inverse normalization transform, i.e. express errors
+    // in image domain.
+    irls_transform_ = inv_normalization_transform_;
+  }
+
+  normalized_domain_ = TransformPoint(normalization_transform_,
+                                      Vector2_f(frame_width_, frame_height_));
+
+  InitializeWithOptions(options);
+}
+
+MotionEstimation::~MotionEstimation() {}
+
+void MotionEstimation::InitializeWithOptions(
+    const MotionEstimationOptions& options) {
+  // Check options, specifically if fall-back models are set to be estimated.
+  if (options.homography_estimation() !=
+          MotionEstimationOptions::ESTIMATION_HOMOG_NONE &&
+      options.linear_similarity_estimation() ==
+          MotionEstimationOptions::ESTIMATION_LS_NONE) {
+    LOG(FATAL) << "Invalid MotionEstimationOptions. "
+               << "Homography estimation requires similarity to be estimated";
+  }
+
+  if (options.mix_homography_estimation() !=
+          MotionEstimationOptions::ESTIMATION_HOMOG_MIX_NONE &&
+      options.homography_estimation() ==
+          MotionEstimationOptions::ESTIMATION_HOMOG_NONE) {
+    LOG(FATAL) << "Invalid MotionEstimationOptions. "
+               << "Mixture homography estimation requires homography to be "
+               << "estimated.";
+  }
+
+  // Check for deprecated options.
+  CHECK_NE(options.estimate_similarity(), true)
+      << "Option estimate_similarity is deprecated, use static function "
+      << "EstimateSimilarityModelL2 instead.";
+  CHECK_NE(options.linear_similarity_estimation(),
+           MotionEstimationOptions::ESTIMATION_LS_L2_RANSAC)
+      << "Option ESTIMATION_LS_L2_RANSAC is deprecated, use "
+      << "ESTIMATION_LS_IRLS instead.";
+  CHECK_NE(options.linear_similarity_estimation(),
+           MotionEstimationOptions::ESTIMATION_LS_L1)
+      << "Option ESTIMATION_LS_L1 is deprecated, use static function "
+      << "EstimateLinearSimilarityL1 instead.";
+
+  options_ = options;
+
+  // (Re)-Initialize row_weights_ based on options.
+  if (options.mix_homography_estimation() !=
+      MotionEstimationOptions::ESTIMATION_HOMOG_MIX_NONE) {
+    const float row_sigma = options.mixture_row_sigma() * frame_height_;
+    const float y_scale = frame_height_ / normalized_domain_.y();
+
+    if (row_weights_ == NULL ||
+        row_weights_->NeedsInitialization(options.num_mixtures(), row_sigma,
+                                          y_scale)) {
+      row_weights_.reset(new MixtureRowWeights(frame_height_,
+                                               0,  // no margin.
+                                               row_sigma, y_scale,
+                                               options.num_mixtures()));
+    }
+  }
+
+  switch (options.estimation_policy()) {
+    case MotionEstimationOptions::INDEPENDENT_PARALLEL:
+    case MotionEstimationOptions::JOINTLY_FROM_TRACKS:
+      break;
+
+    case MotionEstimationOptions::TEMPORAL_LONG_FEATURE_BIAS: {
+      const auto& bias_options = options.long_feature_bias_options();
+
+      // Using 3x3 filters, max distance is 2 bin diagonals plus 1% room
+      // incase maximum value is attained.
+      const float max_space_diff =
+          2.0f * sqrt(2.0) * bias_options.grid_size() * 1.01f;
+
+      InitGaussLUT(bias_options.spatial_sigma(), max_space_diff,
+                   &feature_bias_lut_.spatial_lut,
+                   &feature_bias_lut_.spatial_scale);
+
+      const float max_color_diff =
+          std::sqrt(static_cast<double>(3.0f)) * 255.0f;  // 3 channels.
+      InitGaussLUT(bias_options.color_sigma(), max_color_diff,
+                   &feature_bias_lut_.color_lut,
+                   &feature_bias_lut_.color_scale);
+
+      // Gaussian at 2.5 (normalized) < 0.05
+      const float max_weight = bias_options.bias_stdev() * 2.5 * 1.01f;
+      InitGaussLUT(bias_options.bias_stdev(), max_weight,
+                   &feature_bias_lut_.bias_weight_lut,
+                   &feature_bias_lut_.bias_weight_scale);
+
+      break;
+    }
+
+    case MotionEstimationOptions::TEMPORAL_IRLS_MASK:
+      CHECK(options.irls_initialization().activated())
+          << "To use dependent_initialization, irls_initialization has to "
+          << "be activated. ";
+      inlier_mask_.reset(new InlierMask(options.irls_mask_options(),
+                                        options.feature_mask_size(),
+                                        frame_width_, frame_height_));
+      inlier_mask_->InitMask();
+      break;
+  }
+}
+
+void MotionEstimation::EstimateMotion(const RegionFlowFrame& region_flow_frame,
+                                      const int* intensity_frame,       // null
+                                      const int* prev_intensity_frame,  // null
+                                      CameraMotion* camera_motion) const {
+  CHECK(camera_motion);
+
+  CHECK(intensity_frame == NULL)
+      << "Parameter intensity_frame is deprecated, must be NULL.";
+  CHECK(prev_intensity_frame == NULL)
+      << "Parameter prev_intensity_frame is deprecated, must be NULL.";
+
+  RegionFlowFeatureList feature_list;
+  GetRegionFlowFeatureList(region_flow_frame, 0, &feature_list);
+  std::vector<RegionFlowFeatureList*> feature_lists(1, &feature_list);
+  std::vector<CameraMotion> camera_motions(1);
+
+  EstimateMotionsParallel(false, &feature_lists, &camera_motions);
+  camera_motion->CopyFrom(camera_motions[0]);
+}
+
+bool MotionEstimation::EstimateTranslationModel(
+    RegionFlowFeatureList* feature_list, CameraMotion* camera_motion) {
+  EstimateTranslationModelIRLS(options_.irls_rounds(), false, feature_list,
+                               nullptr, camera_motion);
+  return true;
+}
+
+bool MotionEstimation::EstimateLinearSimilarityModel(
+    RegionFlowFeatureList* feature_list, CameraMotion* camera_motion) {
+  return EstimateLinearSimilarityModelIRLS(
+      options_.irls_rounds(), false, feature_list, nullptr, camera_motion);
+}
+
+bool MotionEstimation::EstimateAffineModel(RegionFlowFeatureList* feature_list,
+                                           CameraMotion* camera_motion) {
+  return EstimateAffineModelIRLS(options_.irls_rounds(), feature_list,
+                                 camera_motion);
+}
+
+bool MotionEstimation::EstimateHomography(RegionFlowFeatureList* feature_list,
+                                          CameraMotion* camera_motion) {
+  return EstimateHomographyIRLS(options_.irls_rounds(), false, nullptr, nullptr,
+                                feature_list, camera_motion);
+}
+
+bool MotionEstimation::EstimateMixtureHomography(
+    RegionFlowFeatureList* feature_list, CameraMotion* camera_motion) {
+  return EstimateMixtureHomographyIRLS(
+      options_.irls_rounds(), true, options_.mixture_regularizer(),
+      0,  // spectrum index.
+      nullptr, nullptr, feature_list, camera_motion);
+}
+
+float MotionEstimation::GetIRLSResidualScale(const float avg_motion_magnitude,
+                                             float distance_fraction) const {
+  const float translation_magnitude =
+      LinearSimilarityAdapter::TransformPoint(
+          normalization_transform_, Vector2_f(avg_motion_magnitude, 0))
+          .x();
+
+  // Assume 1 pixel estimation error for tracked features at 480p video.
+  // This serves as absolute minimum of the estimation error, so we do not
+  // scale translation fractions below this threshold.
+  const float minimum_error = 1.25e-3f;
+
+  // Only normalize if residual (w.r.t. translation) is larger than
+  // estimation error.
+  const float translation_threshold = translation_magnitude * distance_fraction;
+  // Translation is above minimum error.
+  if (translation_threshold > minimum_error) {
+    return minimum_error / translation_threshold;
+  } else {
+    return 1.0f;
+  }
+}
+
+// Collects various options that alter how motion models are initialized or
+// estimated. Construct from MotionEstimationOptions for default values.
+struct MotionEstimation::EstimateModelOptions {
+  explicit EstimateModelOptions(const MotionEstimationOptions& options)
+      : mixture_regularizer(options.mixture_regularizer()),
+        estimate_linear_similarity(
+            options.linear_similarity_estimation() !=
+            MotionEstimationOptions::ESTIMATION_LS_NONE) {}
+
+  // Maps each motion type to a unique index, whereas different mixtures in a
+  // spectrum are treated as separate types.
+  int IndexFromType(const MotionType& type) const {
+    if (type != MODEL_MIXTURE_HOMOGRAPHY) {
+      return static_cast<int>(type);
+    } else {
+      return static_cast<int>(type) + mixture_spectrum_index;
+    }
+  }
+
+  float mixture_regularizer = 0;
+  float mixture_inlier_threshold_scale = 0;
+  int mixture_spectrum_index = 0;
+  bool check_model_stability = true;
+  bool estimate_linear_similarity = true;
+};
+
+// Invoker for parallel execution. Thread storage is optional.
+class EstimateMotionIRLSInvoker {
+ public:
+  // Performs estimation of the requested type for irls_rounds IRLS iterations.
+  // Only performs estimation if current motion type is less or equal to
+  // max_unstable_type.
+  // Optionally accepts list of PriorFeatureWeights and thread_storage.
+  // After model computation irls_weight member for each RegionFlowFeature
+  // in the passed feature_lists is updated with the inverse fitting error.
+  // Stability features for requested MotionType are computed if
+  // compute_stability argument is set.
+  EstimateMotionIRLSInvoker(
+      const MotionEstimation::MotionType& type, int irls_rounds,
+      bool compute_stability, const CameraMotion::Type& max_unstable_type,
+      const MotionEstimation::EstimateModelOptions& model_options,
+      const MotionEstimation* motion_estimation,
+      const std::vector<MotionEstimation::PriorFeatureWeights>*
+          prior_weights,                                    // optional.
+      const MotionEstimationThreadStorage* thread_storage,  // optional.
+      std::vector<RegionFlowFeatureList*>* feature_lists,
+      std::vector<CameraMotion>* camera_motions)
+      : motion_type_(type),
+        irls_rounds_(irls_rounds),
+        compute_stability_(compute_stability),
+        max_unstable_type_(max_unstable_type),
+        model_options_(model_options),
+        motion_estimation_(motion_estimation),
+        prior_weights_(prior_weights),
+        feature_lists_(feature_lists),
+        camera_motions_(camera_motions) {
+    if (thread_storage != nullptr) {
+      std::unique_ptr<MotionEstimationThreadStorage> tmp_storage(
+          thread_storage->Copy());
+      thread_storage_ = std::move(tmp_storage);
+    }
+  }
+
+  EstimateMotionIRLSInvoker(const EstimateMotionIRLSInvoker& invoker)
+      : motion_type_(invoker.motion_type_),
+        irls_rounds_(invoker.irls_rounds_),
+        compute_stability_(invoker.compute_stability_),
+        max_unstable_type_(invoker.max_unstable_type_),
+        model_options_(invoker.model_options_),
+        motion_estimation_(invoker.motion_estimation_),
+        prior_weights_(invoker.prior_weights_),
+        feature_lists_(invoker.feature_lists_),
+        camera_motions_(invoker.camera_motions_) {
+    if (invoker.thread_storage_ != nullptr) {
+      std::unique_ptr<MotionEstimationThreadStorage> tmp_storage(
+          invoker.thread_storage_->Copy());
+      thread_storage_ = std::move(tmp_storage);
+    }
+  }
+
+  void operator()(const BlockedRange& range) const {
+    for (int frame = range.begin(); frame != range.end(); ++frame) {
+      EstimateMotion(frame, (*feature_lists_)[frame],
+                     &(*camera_motions_)[frame]);
+    }
+  }
+
+ private:
+  inline void EstimateMotion(int frame, RegionFlowFeatureList* feature_list,
+                             CameraMotion* camera_motion) const {
+    if (camera_motion->type() > max_unstable_type_) {
+      // Don't estimate anything, immediate return.
+      return;
+    }
+
+    if (camera_motion->flags() & CameraMotion::FLAG_SINGULAR_ESTIMATION) {
+      // If motion became singular in earlier iteration, skip.
+      return;
+    }
+
+    const MotionEstimation::PriorFeatureWeights* prior_weight =
+        prior_weights_ && (*prior_weights_)[frame].HasPrior()
+            ? &(*prior_weights_)[frame]
+            : nullptr;
+
+    switch (motion_type_) {
+      case MotionEstimation::MODEL_AVERAGE_MAGNITUDE:
+        motion_estimation_->EstimateAverageMotionMagnitude(*feature_list,
+                                                           camera_motion);
+        break;
+
+      case MotionEstimation::MODEL_TRANSLATION:
+        motion_estimation_->EstimateTranslationModelIRLS(
+            irls_rounds_, compute_stability_, feature_list, prior_weight,
+            camera_motion);
+        break;
+
+      case MotionEstimation::MODEL_LINEAR_SIMILARITY:
+        motion_estimation_->EstimateLinearSimilarityModelIRLS(
+            irls_rounds_, compute_stability_, feature_list, prior_weight,
+            camera_motion);
+        break;
+
+      case MotionEstimation::MODEL_AFFINE:
+        motion_estimation_->EstimateAffineModelIRLS(irls_rounds_, feature_list,
+                                                    camera_motion);
+        break;
+
+      case MotionEstimation::MODEL_HOMOGRAPHY:
+        motion_estimation_->EstimateHomographyIRLS(
+            irls_rounds_, compute_stability_, prior_weight,
+            thread_storage_.get(), feature_list, camera_motion);
+        break;
+
+      case MotionEstimation::MODEL_MIXTURE_HOMOGRAPHY:
+        // If one estimation fails, clear the whole spectrum.
+        if (!motion_estimation_->EstimateMixtureHomographyIRLS(
+                irls_rounds_, compute_stability_,
+                model_options_.mixture_regularizer,
+                model_options_.mixture_spectrum_index, prior_weight,
+                thread_storage_.get(), feature_list, camera_motion)) {
+          camera_motion->clear_mixture_homography_spectrum();
+        }
+        break;
+
+      case MotionEstimation::MODEL_NUM_VALUES:
+        LOG(FATAL) << "Function should not be called with this value";
+        break;
+    }
+  }
+
+ private:
+  const MotionEstimation::MotionType motion_type_;
+  int irls_rounds_;
+  bool compute_stability_;
+  CameraMotion::Type max_unstable_type_;
+  const MotionEstimation::EstimateModelOptions& model_options_;
+  const MotionEstimation* motion_estimation_;
+  const std::vector<MotionEstimation::PriorFeatureWeights>* prior_weights_;
+  std::vector<RegionFlowFeatureList*>* feature_lists_;
+  std::vector<CameraMotion>* camera_motions_;
+
+  std::unique_ptr<MotionEstimationThreadStorage> thread_storage_;
+};
+
+void MotionEstimation::EstimateMotionsParallelImpl(
+    bool irls_weights_preinitialized,
+    std::vector<RegionFlowFeatureList*>* feature_lists,
+    std::vector<CameraMotion>* camera_motions) const {
+  MEASURE_TIME << "Estimate motions: " << feature_lists->size();
+
+  CHECK(feature_lists != nullptr);
+  CHECK(camera_motions != nullptr);
+
+  const int num_frames = feature_lists->size();
+  CHECK_EQ(num_frames, camera_motions->size());
+
+  // Initialize camera_motions.
+  for (int f = 0; f < num_frames; ++f) {
+    CameraMotion& camera_motion = (*camera_motions)[f];
+    const RegionFlowFeatureList& feature_list = *(*feature_lists)[f];
+
+    // Resets every model to INVALID.
+    ResetMotionModels(options_, &camera_motion);
+    InitCameraMotionFromFeatureList(feature_list, &camera_motion);
+
+    // Assume motions to be VALID in case they contain features.
+    // INVALID (= empty frames) wont be considered during motion estimation.
+    if (feature_list.feature_size() != 0) {
+      camera_motion.set_type(CameraMotion::VALID);
+    }
+
+    // Flag duplicated frames.
+    if (feature_list.is_duplicated()) {
+      camera_motion.set_flags(camera_motion.flags() |
+                              CameraMotion::FLAG_DUPLICATED);
+    }
+  }
+
+  // Backup original IRLS weights if original weights are requested to be
+  // output.
+  std::vector<std::vector<float>> original_irls_weights(num_frames);
+  if (!options_.output_refined_irls_weights()) {
+    for (int f = 0; f < num_frames; ++f) {
+      const RegionFlowFeatureList& feature_list = *(*feature_lists)[f];
+      GetRegionFlowFeatureIRLSWeights(feature_list, &original_irls_weights[f]);
+    }
+  }
+
+  const bool use_joint_tracks = options_.estimation_policy() ==
+                                MotionEstimationOptions::JOINTLY_FROM_TRACKS;
+
+  // Joint frame estimation.
+  const int num_motion_models =
+      use_joint_tracks ? options_.joint_track_estimation().num_motion_models()
+                       : 1;
+  CHECK_GT(num_motion_models, 0);
+
+  // Several single track clip datas, we seek to process.
+  std::vector<SingleTrackClipData> clip_datas(num_motion_models);
+  SingleTrackClipData* main_clip_data = &clip_datas[0];
+
+  // First clip data is always view on external data.
+  main_clip_data->feature_lists = feature_lists;
+  main_clip_data->camera_motions = camera_motions;
+  main_clip_data->inlier_mask = inlier_mask_.get();
+  main_clip_data->frame_diff = 1;
+
+  main_clip_data->AllocateIRLSWeightStorage(true);
+
+  LongFeatureInfo long_feature_info;
+
+  if (irls_weights_preinitialized &&
+      options_.filter_initialized_irls_weights()) {
+    MinFilterIrlsWeightByTrack(main_clip_data);
+  }
+
+  // Determine importance for each track length.
+  std::vector<float> track_length_importance;
+  if (options_.long_feature_initialization().activated()) {
+    for (const auto feature_list_ptr : *feature_lists) {
+      if (feature_list_ptr->long_tracks()) {
+        long_feature_info.AddFeatures(*feature_list_ptr);
+      }
+    }
+
+    const float percentile =
+        options_.long_feature_initialization().min_length_percentile();
+    const float min_length = long_feature_info.GlobalTrackLength(percentile);
+
+    track_length_importance.resize(num_frames + 1, 1.0f);
+    // Gaussian weighting.
+    const float denom = -0.5f / (2.0f * 2.0f);  // 2 frame stdev.
+    for (int k = 0; k <= num_frames; ++k) {
+      float weight = 1.0f;
+      if (k < min_length) {
+        weight = std::exp((k - min_length) * (k - min_length) * denom);
+      }
+      track_length_importance[k] = weight;
+    }
+  }
+
+  int max_features = 0;
+  for (int f = 0; f < num_frames; ++f) {
+    // Initialize IRLS input.
+    const RegionFlowFeatureList& feature_list =
+        *(*main_clip_data->feature_lists)[f];
+
+    max_features = std::max(feature_list.feature_size(), max_features);
+
+    std::vector<float>& irls_weight_input =
+        main_clip_data->irls_weight_input[f];
+
+    if (irls_weights_preinitialized) {
+      GetRegionFlowFeatureIRLSWeights(feature_list, &irls_weight_input);
+    } else {
+      // Set to one.
+      irls_weight_input.resize(feature_list.feature_size(), 1.0f);
+    }
+
+    // Note: To create visualizations of the prior stored in irls_weight_input,
+    // add a call to
+    // SetRegionFlowFeatureIRLSWeights(irls_weight_input, &feature_list);
+    // anywhere below and adopt irls_rounds to zero in options.
+    // This will effectively output identity motions with irls weights set
+    // to the priors here.
+    bool uniform_weights = !irls_weights_preinitialized;
+    bool use_full_prior = false;
+
+    if (options_.long_feature_initialization().activated()) {
+      if (!feature_list.long_tracks()) {
+        LOG(ERROR) << "Requesting long feature initialization but "
+                   << "input is not computed with long features.";
+      } else {
+        LongFeatureInitialization(feature_list, long_feature_info,
+                                  track_length_importance, &irls_weight_input);
+        uniform_weights = false;
+        use_full_prior = true;
+      }
+    }
+
+    if (options_.feature_density_normalization()) {
+      FeatureDensityNormalization(feature_list, &irls_weight_input);
+      uniform_weights = false;
+      use_full_prior = true;
+    }
+
+    GetHomographyIRLSCenterWeights(feature_list,
+                                   &main_clip_data->homog_irls_weight_input[f]);
+
+    if (!uniform_weights) {
+      // Multiply homography weights by non-uniform irls input weights.
+      std::vector<float>& homg_weights =
+          main_clip_data->homog_irls_weight_input[f];
+
+      const int num_features = feature_list.feature_size();
+      for (int k = 0; k < num_features; ++k) {
+        homg_weights[k] *= irls_weight_input[k];
+      }
+    }
+
+    main_clip_data->uniform_weight_input[f] = uniform_weights;
+    main_clip_data->use_full_prior[f] = use_full_prior;
+  }
+
+  if (options_.estimation_policy() ==
+      MotionEstimationOptions::JOINTLY_FROM_TRACKS) {
+    for (int k = 1; k < num_motion_models; ++k) {
+      SingleTrackClipData* curr_clip_data = &clip_datas[k];
+      // Camera motions are simply a copy of the initialized main data.
+      curr_clip_data->motion_storage = *camera_motions;
+      curr_clip_data->feature_storage.resize(num_frames);
+      curr_clip_data->InitializeFromInternalStorage();
+
+      // No backup storage for non-main data (feature's are not output).
+      curr_clip_data->AllocateIRLSWeightStorage(false);
+      const int stride = options_.joint_track_estimation().motion_stride();
+      curr_clip_data->frame_diff = stride * k;
+
+      // Populate feature and motion storage.
+      for (int f = 0; f < num_frames; ++f) {
+        const int prev_frame = f - stride * k;
+        if (prev_frame < 0) {
+          CopyToEmptyFeatureList((*feature_lists)[f],
+                                 &curr_clip_data->feature_storage[f]);
+        } else {
+          // Determine features present in both frames along the long feature
+          // tracks.
+          std::vector<int> source_idx;
+          IntersectRegionFlowFeatureList(*(*feature_lists)[prev_frame],
+                                         &FeatureLocation, (*feature_lists)[f],
+                                         &curr_clip_data->feature_storage[f],
+                                         &source_idx);
+
+          curr_clip_data->irls_weight_input[f].reserve(source_idx.size());
+          curr_clip_data->homog_irls_weight_input[f].reserve(source_idx.size());
+
+          // Copy weights.
+          for (int idx : source_idx) {
+            curr_clip_data->irls_weight_input[f].push_back(
+                main_clip_data->irls_weight_input[f][idx]);
+            curr_clip_data->homog_irls_weight_input[f].push_back(
+                main_clip_data->homog_irls_weight_input[f][idx]);
+          }
+        }
+      }
+    }
+  }
+
+  for (auto& clip_data : clip_datas) {
+    clip_data.CheckInitialization();
+  }
+
+  for (auto& clip_data : clip_datas) {
+    // Estimate AverageMotion magnitudes.
+    ParallelFor(0, num_frames, 1,
+                EstimateMotionIRLSInvoker(
+                    MODEL_AVERAGE_MAGNITUDE,
+                    1,     // Does not use irls.
+                    true,  // Compute stability.
+                    CameraMotion::VALID, DefaultModelOptions(), this,
+                    nullptr,  // No prior weights.
+                    nullptr,  // No thread storage.
+                    clip_data.feature_lists, clip_data.camera_motions));
+  }
+
+  // Order of estimation for motion models:
+  // Translation -> Linear Similarity -> Affine -> Homography -> Mixture
+  // Homography.
+
+  // Estimate translations, regardless of stability of similarity.
+  EstimateMotionModels(MODEL_TRANSLATION,
+                       CameraMotion::UNSTABLE,  // Any but invalid.
+                       DefaultModelOptions(),
+                       nullptr,  // No thread storage.
+                       &clip_datas);
+
+  // Estimate linear similarity, but only if translation was deemed stable.
+  EstimateMotionModels(MODEL_LINEAR_SIMILARITY, CameraMotion::VALID,
+                       DefaultModelOptions(),
+                       nullptr,  // No thread storage.
+                       &clip_datas);
+
+  if (options_.project_valid_motions_down()) {
+    ProjectMotionsDown(MODEL_LINEAR_SIMILARITY, camera_motions);
+  }
+
+  // Estimate affine, but only if similarity was deemed stable.
+  EstimateMotionModels(MODEL_AFFINE, CameraMotion::VALID, DefaultModelOptions(),
+                       nullptr,  // No thread storage.
+                       &clip_datas);
+
+  // Thread storage below is only used for homography or mixtures.
+  MotionEstimationThreadStorage thread_storage(options_, this, max_features);
+
+  // Estimate homographies, only if similarity was deemed stable.
+  EstimateMotionModels(MODEL_HOMOGRAPHY, CameraMotion::VALID,
+                       DefaultModelOptions(), &thread_storage, &clip_datas);
+
+  if (options_.project_valid_motions_down()) {
+    // If homography is unstable, then whatever was deemed stable got
+    // embedded here and is now down projected again.
+    ProjectMotionsDown(MODEL_HOMOGRAPHY, camera_motions);
+  }
+
+  // Estimate mixtures. We attempt estimation as long as at least
+  // translation was estimated stable.
+  // Estimate mixtures across a spectrum a different regularizers, from the
+  // weakest to the most regularized one.
+  const int num_mixture_levels = options_.mixture_regularizer_levels();
+  CHECK_LE(num_mixture_levels, 10) << "Only up to 10 mixtures are supported.";
+
+  // Initialize to weakest regularizer.
+  float regularizer = options_.mixture_regularizer();
+
+  // Initialize with maximum of weakest regularized mixture.
+  float inlier_threshold_scale =
+      std::pow(static_cast<double>(options_.mixture_regularizer_base()),
+               static_cast<double>(options_.mixture_regularizer_levels() - 1));
+
+  bool base_mixture_estimated = false;
+  for (int m = 0; m < num_mixture_levels; ++m) {
+    EstimateModelOptions options = DefaultModelOptions();
+    options.mixture_regularizer = regularizer;
+    options.mixture_inlier_threshold_scale = inlier_threshold_scale;
+    options.mixture_spectrum_index = m;
+    // Only check stability for weakest regularized mixture.
+    options.check_model_stability = m == 0;
+    // Estimate weakest mixture even if similarity was deemed unstable, higher
+    // mixtures only if weakest mixture was deemed stable.
+    const bool estimate_result = EstimateMotionModels(
+        MODEL_MIXTURE_HOMOGRAPHY,
+        m == 0 ? CameraMotion::UNSTABLE : CameraMotion::VALID, options,
+        &thread_storage, &clip_datas);
+
+    if (m == 0) {
+      base_mixture_estimated = estimate_result;
+    }
+
+    regularizer *= options_.mixture_regularizer_base();
+    inlier_threshold_scale /= options_.mixture_regularizer_base();
+
+    // Preserve IRLS weights from the very first mixture (all stability is
+    // computed w.r.t. it).
+    if (base_mixture_estimated && m > 0) {
+      for (auto& clip_data : clip_datas) {
+        clip_data.RestoreWeightsFromBackup(CameraMotion::VALID);
+      }
+    }
+  }
+
+  // Check that mixture spectrum has sufficient entries.
+  for (const CameraMotion& motion : *camera_motions) {
+    if (motion.mixture_homography_spectrum_size() > 0) {
+      CHECK_EQ(motion.mixture_homography_spectrum_size(),
+               options_.mixture_regularizer_levels());
+    }
+  }
+
+  IRLSWeightFilter(feature_lists);
+
+  if (!options_.output_refined_irls_weights()) {
+    for (int f = 0; f < num_frames; ++f) {
+      RegionFlowFeatureList& feature_list = *(*feature_lists)[f];
+      SetRegionFlowFeatureIRLSWeights(original_irls_weights[f], &feature_list);
+    }
+  }
+
+  // Lift model type from INVALID for empty frames to VALID if requested.
+  if (options_.label_empty_frames_as_valid()) {
+    for (int f = 0; f < num_frames; ++f) {
+      CameraMotion& camera_motion = (*camera_motions)[f];
+      if ((*feature_lists)[f]->feature_size() == 0) {
+        camera_motion.set_type(CameraMotion::VALID);
+      }
+    }
+  }
+}
+
+MotionEstimation::EstimateModelOptions MotionEstimation::DefaultModelOptions()
+    const {
+  return EstimateModelOptions(options_);
+}
+
+// In the following member refers to member in SingleTrackClipData.
+// For each estimation invocation, irls weights of features are set from
+// member irls_weight_input.
+// Motion models are estimated from member feature_list and stored in
+// member camera_motions.
+bool MotionEstimation::EstimateMotionModels(
+    const MotionType& type, const CameraMotion::Type& max_unstable_type,
+    const EstimateModelOptions& model_options,
+    const MotionEstimationThreadStorage* thread_storage,
+    std::vector<SingleTrackClipData>* clip_datas) const {
+  CHECK(clip_datas != nullptr);
+
+  const int num_datas = clip_datas->size();
+  if (num_datas == 0) {
+    return false;
+  }
+
+  for (const auto& clip_data : *clip_datas) {
+    clip_data.CheckInitialization();
+  }
+
+  // Perform estimation across all frames for several total_rounds with each
+  // round having irls_per_round iterations.
+  int irls_per_round = 1;
+  int total_rounds = 1;
+
+  PolicyToIRLSRounds(IRLSRoundsFromSettings(type), &total_rounds,
+                     &irls_per_round);
+
+  const int total_irls_rounds = irls_per_round * total_rounds;
+
+  // Skip if nothing to estimate.
+  if (total_irls_rounds == 0) {
+    return false;
+  }
+
+  // Setup each clip data for this estimation round.
+  for (auto& clip_data : *clip_datas) {
+    clip_data.SetupPriorWeights(irls_per_round);
+    // Clear flag from earlier model estimation.
+    clip_data.ClearFlagFromMotion(CameraMotion::FLAG_SINGULAR_ESTIMATION);
+  }
+
+  if (options_.estimation_policy() !=
+      MotionEstimationOptions::TEMPORAL_LONG_FEATURE_BIAS) {
+    // Irls initialization for each list.
+    for (auto& clip_data : *clip_datas) {
+      IrlsInitialization(type, max_unstable_type,
+                         -1,  // all frames.
+                         model_options, &clip_data);
+    }
+
+    // Parallel estimation across frames.
+    for (int r = 0; r < total_rounds; ++r) {
+      // Setup, default decaying irls alphas.
+      std::vector<float> irls_alphas(irls_per_round);
+      for (int k = 0; k < irls_per_round; ++k) {
+        irls_alphas[k] =
+            IRLSPriorWeight(r * irls_per_round + k, total_irls_rounds);
+      }
+
+      for (auto& clip_data : *clip_datas) {
+        // Setup prior's alphas.
+        for (auto& prior_weight : clip_data.prior_weights) {
+          if (prior_weight.use_full_prior) {
+            prior_weight.alphas.assign(irls_per_round, 1.0f);
+          } else {
+            prior_weight.alphas = irls_alphas;
+          }
+
+          // Last iteration, irls_alpha is always zero to return actual error.
+          if (r + 1 == total_rounds) {
+            prior_weight.alphas.back() = 0.0;
+          }
+        }
+
+        const bool last_round = r + 1 == total_rounds;
+        ParallelFor(0, clip_data.num_frames(), 1,
+                    EstimateMotionIRLSInvoker(
+                        type, irls_per_round,
+                        last_round,  // Compute stability on last round.
+                        max_unstable_type, model_options, this,
+                        &clip_data.prior_weights, thread_storage,
+                        clip_data.feature_lists, clip_data.camera_motions));
+      }
+
+      if (options_.estimation_policy() ==
+          MotionEstimationOptions::JOINTLY_FROM_TRACKS) {
+        EnforceTrackConsistency(clip_datas);
+      }
+    }
+
+    if (model_options.check_model_stability) {
+      for (auto& clip_data : *clip_datas) {
+        CheckModelStability(type, max_unstable_type,
+                            clip_data.irls_weight_backup,
+                            clip_data.feature_lists, clip_data.camera_motions);
+      }
+    }
+  } else {
+    // Estimation policy == TEMPORAL_LONG_FEATURE_BIAS.
+    for (auto& clip_data : *clip_datas) {
+      EstimateMotionIRLSInvoker motion_invoker(
+          type, irls_per_round,
+          true,  // Compute stability on last round.
+          max_unstable_type, model_options, this, &clip_data.prior_weights,
+          thread_storage, clip_data.feature_lists, clip_data.camera_motions);
+
+      for (int round = 0; round < total_rounds; ++round) {
+        // Traverse frames in order.
+        for (int k = 0; k < clip_data.num_frames(); ++k) {
+          if (clip_data.feature_lists->at(k)->feature_size() > 0) {
+            CHECK(clip_data.feature_lists->at(k)->long_tracks())
+                << "Estimation policy TEMPORAL_LONG_FEATURE_BIAS requires "
+                << "tracking with long tracks.";
+          }
+
+          // First round -> initialize weights.
+          if (round == 0) {
+            IrlsInitialization(type, max_unstable_type, k, model_options,
+                               &clip_data);
+
+            BiasLongFeatures(clip_data.feature_lists->at(k), type,
+                             model_options, &clip_data.prior_weights[k]);
+          }
+
+          if (clip_data.camera_motions->at(k).type() <= max_unstable_type) {
+            CHECK(clip_data.prior_weights[k].use_full_prior);
+            clip_data.prior_weights[k].alphas.assign(irls_per_round, 1.0f);
+            clip_data.prior_weights[k].alphas.back() = 0.0;
+          }
+
+          // Compute per-frame motion.
+          motion_invoker(BlockedRange(k, k + 1, 1));
+
+          if (model_options.check_model_stability) {
+            CheckSingleModelStability(type, max_unstable_type,
+                                      clip_data.irls_weight_backup
+                                          ? &clip_data.irls_weight_backup->at(k)
+                                          : nullptr,
+                                      clip_data.feature_lists->at(k),
+                                      &clip_data.camera_motions->at(k));
+          }
+
+          if (clip_data.camera_motions->at(k).type() == CameraMotion::VALID) {
+            const bool remove_terminated_tracks =
+                total_rounds == 1 || (round == 0 && k == 0);
+            UpdateLongFeatureBias(type, model_options, remove_terminated_tracks,
+                                  round > 0,  // Update irls observation.
+                                  clip_data.feature_lists->at(k));
+          }
+        }
+
+        // Update feature weights and priors for the next round.
+        for (int k = 0; k < clip_data.num_frames(); ++k) {
+          auto& feats = *clip_data.feature_lists->at(k);
+          auto& priors = clip_data.prior_weights[k].priors;
+          const int type_idx = model_options.IndexFromType(type);
+          const auto& bias_map = long_feature_bias_maps_[type_idx];
+
+          for (int l = 0; l < feats.feature_size(); ++l) {
+            auto iter = bias_map.find(feats.feature(l).track_id());
+            if (iter != bias_map.end()) {
+              const float bias = iter->second.bias;
+              float irls = 1.0f / (bias + kIrlsEps);
+              if (irls < 1.0f) {
+                irls *= irls;  // Downweight outliers even more.
+              }
+              feats.mutable_feature(l)->set_irls_weight(irls);
+              priors[l] = irls;
+            }
+          }
+        }
+      }
+    }
+  }
+  return true;
+}
+
+namespace {
+
+class DoesFeatureAgreeWithSimilarity {
+ public:
+  DoesFeatureAgreeWithSimilarity(const LinearSimilarityModel& similarity,
+                                 float inlier_threshold)
+      : similarity_(similarity),
+        sq_inlier_threshold_(inlier_threshold * inlier_threshold) {}
+  bool operator()(const RegionFlowFeature& feature) const {
+    Vector2_f lin_pt = LinearSimilarityAdapter::TransformPoint(
+        similarity_, FeatureLocation(feature));
+    return (lin_pt - FeatureMatchLocation(feature)).Norm2() <
+           sq_inlier_threshold_;
+  }
+
+ private:
+  LinearSimilarityModel similarity_;
+  float sq_inlier_threshold_;
+};
+
+}  // namespace.
+
+class IrlsInitializationInvoker {
+ public:
+  IrlsInitializationInvoker(
+      const MotionEstimation::MotionType& type, int max_unstable_type,
+      const MotionEstimation::EstimateModelOptions& model_options,
+      const MotionEstimation* motion_estimation,
+      MotionEstimation::SingleTrackClipData* clip_data)
+      : type_(type),
+        max_unstable_type_(max_unstable_type),
+        model_options_(model_options),
+        motion_estimation_(motion_estimation),
+        clip_data_(clip_data) {}
+
+  void operator()(const BlockedRange& range) const {
+    for (int frame = range.begin(); frame != range.end(); ++frame) {
+      const CameraMotion& camera_motion = (*clip_data_->camera_motions)[frame];
+      RegionFlowFeatureList& feature_list =
+          *(*clip_data_->feature_lists)[frame];
+
+      // Don't process motions that are already deemed unstable.
+      // Keep last resulting weights around.
+      if (camera_motion.type() > max_unstable_type_) {
+        continue;
+      }
+
+      // Backup irls weights, reset of weights occurs in CheckModelStability
+      // if model was deemed unstable.
+      if (clip_data_->irls_weight_backup) {
+        GetRegionFlowFeatureIRLSWeights(
+            feature_list, &(*clip_data_->irls_weight_backup)[frame]);
+      }
+
+      // Should translation or similarity irls weights be initialized?
+      // In this case, prior weights will be enforced during estimation.
+      const bool irls_initialization =
+          motion_estimation_->options_.irls_initialization().activated();
+
+      // Should (mixture) homography irls weights be initialized?
+      // In this case, prior weights are not necessarily enforced, to allow
+      // estimation to deviate from filtered result.
+      const bool use_only_lin_sim_inliers_for_homography =
+          motion_estimation_->options_
+              .use_only_lin_sim_inliers_for_homography();
+
+      // Only seed priors if at least one round of translation estimation was
+      // performed, i.e. feature weights contain results of that estimation.
+      bool seed_priors_from_bias =
+          motion_estimation_->options_.estimation_policy() ==
+              MotionEstimationOptions::TEMPORAL_LONG_FEATURE_BIAS &&
+          motion_estimation_->options_.long_feature_bias_options()
+              .seed_priors_from_bias() &&
+          type_ >= MotionEstimation::MODEL_LINEAR_SIMILARITY;
+
+      if (seed_priors_from_bias &&
+          type_ == MotionEstimation::MODEL_LINEAR_SIMILARITY) {
+        // Check that translation variance is not too large, otherwise
+        // biasing from prior distorts results (think of zooming, rotation,
+        // etc.).
+        std::vector<float> variances;
+        for (const auto& camera_motion : *clip_data_->camera_motions) {
+          variances.push_back(camera_motion.translation_variance());
+        }
+        auto percentile = variances.begin() + variances.size() * 0.8f;
+        std::nth_element(variances.begin(), percentile, variances.end());
+        const float variance =
+            *percentile / hypot(motion_estimation_->frame_width_,
+                                motion_estimation_->frame_height_);
+        constexpr float kMaxTranslationVariance = 5e-3;
+        seed_priors_from_bias = variance < kMaxTranslationVariance;
+      }
+
+      if (seed_priors_from_bias &&
+          type_ == MotionEstimation::MODEL_HOMOGRAPHY) {
+        // Check strict inlier ratio; if there are sufficient inliers
+        // don't bias homography to better fit; otherwise bias for more
+        // rigidity.
+        std::vector<float> inlier_ratio;
+        for (const auto& camera_motion : *clip_data_->camera_motions) {
+          inlier_ratio.push_back(
+              camera_motion.similarity_strict_inlier_ratio());
+        }
+        auto percentile = inlier_ratio.begin() + inlier_ratio.size() * 0.5f;
+        std::nth_element(inlier_ratio.begin(), percentile, inlier_ratio.end());
+        // TODO: Revisit decision boundary after limiting
+        // motion estimation to selection.
+        constexpr float kMaxRatio = 0.7;
+        seed_priors_from_bias = *percentile < kMaxRatio;
+      }
+
+      // Seed priors from previous estimation if requested.
+      if (seed_priors_from_bias) {
+        GetRegionFlowFeatureIRLSWeights(
+            feature_list, &clip_data_->prior_weights[frame].priors);
+      }
+
+      // Initialize irls weights to their default values.
+      SetRegionFlowFeatureIRLSWeights(clip_data_->IrlsWeightInput(type_)[frame],
+                                      &feature_list);
+
+      // Update weights based on type and options.
+      // Initialization step.
+      switch (type_) {
+        case MotionEstimation::MODEL_TRANSLATION:
+          if (irls_initialization) {
+            TranslationModel best_model;
+            if (motion_estimation_->GetTranslationIrlsInitialization(
+                    &feature_list, model_options_,
+                    camera_motion.average_magnitude(), clip_data_->inlier_mask,
+                    &best_model)) {
+              // Successful initialization, update inlier mask and signal to
+              // use full prior.
+              clip_data_->prior_weights[frame].use_full_prior = true;
+              if (clip_data_->inlier_mask) {
+                clip_data_->inlier_mask->UpdateTranslation(
+                    Vector2_f(best_model.dx(), best_model.dy()));
+                clip_data_->inlier_mask->UpdateMask();
+              }
+            } else {
+              // Initialization failed, reset to original weights.
+              SetRegionFlowFeatureIRLSWeights(
+                  clip_data_->IrlsWeightInput(type_)[frame], &feature_list);
+
+              if (clip_data_->inlier_mask) {
+                // Unstable translation: Pretty bad motion here, should reset
+                // prior.
+                clip_data_->inlier_mask->InitMask();
+              }
+            }
+          }
+          break;
+
+        case MotionEstimation::MODEL_LINEAR_SIMILARITY:
+          if (irls_initialization) {
+            LinearSimilarityModel best_model;
+            if (motion_estimation_->GetSimilarityIrlsInitialization(
+                    &feature_list, model_options_,
+                    camera_motion.average_magnitude(), clip_data_->inlier_mask,
+                    &best_model)) {
+              // Successful initialization, update inlier mask and signal to
+              // use full prior.
+              clip_data_->prior_weights[frame].use_full_prior = true;
+
+              if (clip_data_->inlier_mask) {
+                clip_data_->inlier_mask->UpdateMask();
+              }
+            } else {
+              // Initialization failed, reset to original weights.
+              SetRegionFlowFeatureIRLSWeights(
+                  clip_data_->IrlsWeightInput(type_)[frame], &feature_list);
+            }
+          }
+          break;
+
+        default:
+          break;
+      }
+
+      // Filtering step.
+      switch (type_) {
+        case MotionEstimation::MODEL_HOMOGRAPHY:
+          if (use_only_lin_sim_inliers_for_homography &&
+              camera_motion.type() <= CameraMotion::UNSTABLE_SIM) {
+            // Threshold is set w.r.t. normalized frame diameter.
+            LinearSimilarityModel normalized_similarity =
+                ModelCompose3(motion_estimation_->normalization_transform_,
+                              camera_motion.linear_similarity(),
+                              motion_estimation_->inv_normalization_transform_);
+            FilterRegionFlowFeatureList(
+                DoesFeatureAgreeWithSimilarity(
+                    normalized_similarity,
+                    motion_estimation_->options_.lin_sim_inlier_threshold()),
+                kOutlierIRLSWeight, &feature_list);
+          }
+          break;
+
+        case MotionEstimation::MODEL_MIXTURE_HOMOGRAPHY:
+          if (use_only_lin_sim_inliers_for_homography &&
+              camera_motion.type() <= CameraMotion::UNSTABLE_SIM) {
+            // Threshold is set w.r.t. normalized frame diameter.
+            LinearSimilarityModel normalized_similarity =
+                ModelCompose3(motion_estimation_->normalization_transform_,
+                              camera_motion.linear_similarity(),
+                              motion_estimation_->inv_normalization_transform_);
+
+            // Linear similarity is a rigid motion model, only reject severe
+            // outliers (4 times motion magnitude, 1.5 times rejection
+            // threshold).
+            const float irls_residual_scale =
+                motion_estimation_->GetIRLSResidualScale(
+                    camera_motion.average_magnitude(),
+                    motion_estimation_->options_.irls_mixture_fraction_scale() *
+                        motion_estimation_->options_
+                            .irls_motion_magnitude_fraction());
+
+            const float inlier_threshold =
+                model_options_.mixture_inlier_threshold_scale *
+                motion_estimation_->options_.lin_sim_inlier_threshold() /
+                irls_residual_scale;
+
+            FilterRegionFlowFeatureList(
+                DoesFeatureAgreeWithSimilarity(normalized_similarity,
+                                               inlier_threshold),
+                kOutlierIRLSWeight, &feature_list);
+          }
+          break;
+
+        default:
+          break;
+      }
+
+      const bool use_prior_weights =
+          !clip_data_->uniform_weight_input[frame] || irls_initialization;
+
+      // Initialize priors from irls weights.
+      if (use_prior_weights) {
+        CHECK_LT(frame, clip_data_->prior_weights.size());
+
+        if (clip_data_->prior_weights[frame].priors.empty()) {
+          clip_data_->prior_weights[frame].priors.resize(
+              feature_list.feature_size(), 1.0f);
+        }
+
+        if (seed_priors_from_bias) {
+          std::vector<float> multiply;
+          GetRegionFlowFeatureIRLSWeights(feature_list, &multiply);
+          for (int l = 0; l < multiply.size(); ++l) {
+            clip_data_->prior_weights[frame].priors[l] *= multiply[l];
+          }
+        } else {
+          GetRegionFlowFeatureIRLSWeights(
+              feature_list, &clip_data_->prior_weights[frame].priors);
+        }
+      }
+    }
+  }
+
+ private:
+  MotionEstimation::MotionType type_;
+  int max_unstable_type_;
+  const MotionEstimation::EstimateModelOptions& model_options_;
+  const MotionEstimation* motion_estimation_;
+  MotionEstimation::SingleTrackClipData* clip_data_;
+};
+
+void MotionEstimation::LongFeatureInitialization(
+    const RegionFlowFeatureList& feature_list,
+    const LongFeatureInfo& feature_info,
+    const std::vector<float>& track_length_importance,
+    std::vector<float>* irls_weights) const {
+  CHECK(irls_weights);
+  const int num_features = feature_list.feature_size();
+  if (num_features == 0) {
+    return;
+  }
+
+  CHECK_EQ(num_features, irls_weights->size());
+
+  // Determine actual scale to be applied to each feature.
+  std::vector<float> feature_scales(num_features);
+  constexpr float kTrackLengthImportance = 0.5f;
+  int num_upweighted = 0;  // Count number of upweighted features.
+  for (int k = 0; k < num_features; ++k) {
+    const int track_len = feature_info.TrackLength(feature_list.feature(k));
+    const float track_len_scale = track_length_importance[track_len];
+    if (track_len_scale >= kTrackLengthImportance) {
+      ++num_upweighted;
+    }
+
+    feature_scales[k] = track_len_scale;
+  }
+
+  // Use full upweighting above kMinFraction of upweighted features.
+  constexpr float kMinFraction = 0.1f;
+  const float upweight_multiplier =
+      options_.long_feature_initialization().upweight_multiplier() *
+      std::min(1.0f, num_upweighted / (num_features * kMinFraction));
+
+  for (int k = 0; k < num_features; ++k) {
+    // Never downweight.
+    (*irls_weights)[k] *=
+        std::max(1.0f, feature_scales[k] * upweight_multiplier);
+  }
+}
+
+void MotionEstimation::FeatureDensityNormalization(
+    const RegionFlowFeatureList& feature_list,
+    std::vector<float>* irls_weights) const {
+  CHECK(irls_weights);
+  const int num_features = feature_list.feature_size();
+  CHECK_EQ(num_features, irls_weights->size());
+
+  // Compute mask index for each feature.
+  std::vector<int> bin_indices;
+  bin_indices.reserve(num_features);
+
+  const int mask_size = options_.feature_mask_size();
+  const int max_bins = mask_size * mask_size;
+
+  std::vector<float> bin_normalizer(max_bins, 0.0f);
+
+  const Vector2_f domain = NormalizedDomain();
+  const float scale_x = (mask_size - 1) / domain.x();
+  const float scale_y = (mask_size - 1) / domain.y();
+
+  // Interpolate location into adjacent bins.
+  for (const auto& feature : feature_list.feature()) {
+    const float grid_y = feature.y() * scale_y;
+    const float grid_x = feature.x() * scale_x;
+
+    const int int_grid_x = grid_x;
+    const int int_grid_y = grid_y;
+
+    const float dx = grid_x - int_grid_x;
+    const float dy = grid_y - int_grid_y;
+    const float dxdy = dx * dy;
+    const float dx_plus_dy = dx + dy;
+
+    const int inc_x = dx != 0;
+    const int inc_y = dy != 0;
+
+    int bin_idx = int_grid_y * mask_size + int_grid_x;
+    // (1 - dx)(1 - dy) = 1 - (dx + dy) + dx*dy
+    bin_normalizer[bin_idx] += 1 - dx_plus_dy + dxdy;
+    // dx * (1 - dy) = dx - dxdy
+    bin_normalizer[bin_idx + inc_x] += dx - dxdy;
+
+    bin_idx += mask_size * inc_y;
+    // (1 - dx) * dy = dy - dxdy
+    bin_normalizer[bin_idx] += dy - dxdy;
+    bin_normalizer[bin_idx + inc_x] += dxdy;
+  }
+
+  // Get normalization for each feature.
+  float avg_normalizer = 0.0f;
+  for (int k = 0; k < num_features; ++k) {
+    const RegionFlowFeature& feature = feature_list.feature(k);
+    const float grid_y = feature.y() * scale_y;
+    const float grid_x = feature.x() * scale_x;
+
+    const int int_grid_x = grid_x;
+    const int int_grid_y = grid_y;
+
+    const float dx = grid_x - int_grid_x;
+    const float dy = grid_y - int_grid_y;
+    const float dxdy = dx * dy;
+    const float dx_plus_dy = dx + dy;
+
+    int inc_x = dx != 0;
+    int inc_y = dy != 0;
+
+    float normalizer = 0;
+    int bin_idx = int_grid_y * mask_size + int_grid_x;
+    CHECK_LT(bin_idx, max_bins);
+    // See above.
+    normalizer += bin_normalizer[bin_idx] * (1 - dx_plus_dy + dxdy);
+    normalizer += bin_normalizer[bin_idx + inc_x] * (dx - dxdy);
+
+    bin_idx += mask_size * inc_y;
+    CHECK_LT(bin_idx, max_bins);
+    normalizer += bin_normalizer[bin_idx] * (dy - dxdy);
+    normalizer += bin_normalizer[bin_idx + inc_x] * dxdy;
+
+    const float inv_normalizer =
+        normalizer > 0 ? 1.0f / std::sqrt(static_cast<double>(normalizer)) : 0;
+    avg_normalizer += inv_normalizer;
+
+    (*irls_weights)[k] *= inv_normalizer;
+  }
+
+  const float scale = num_features / (avg_normalizer + 1e-6f);
+
+  // Normalize such that average scale is 1.0f.
+  for (int k = 0; k < num_features; ++k) {
+    (*irls_weights)[k] *= scale;
+  }
+}
+
+void MotionEstimation::IrlsInitialization(
+    const MotionType& type, int max_unstable_type, int frame,
+    const EstimateModelOptions& model_options,
+    SingleTrackClipData* clip_data) const {
+  if (options_.estimation_policy() ==
+      MotionEstimationOptions::TEMPORAL_LONG_FEATURE_BIAS) {
+    CHECK_NE(frame, -1) << "Only per frame processing for this policy "
+                        << "supported.";
+  }
+
+  IrlsInitializationInvoker invoker(type, max_unstable_type, model_options,
+                                    this, clip_data);
+
+  if (frame == -1) {
+    // Function pointer to select between SerialFor and ParallelFor based on
+    // EstimationPolicy.
+    typedef void (*ForFunctionPtr)(size_t, size_t, size_t,
+                                   const IrlsInitializationInvoker&);
+
+    ForFunctionPtr for_function = &ParallelFor<IrlsInitializationInvoker>;
+
+    // Inlier mask only used for translation or linear similarity.
+    // In that case, initialization needs to proceed serially.
+    if (type == MODEL_TRANSLATION || type == MODEL_LINEAR_SIMILARITY) {
+      if (clip_data->inlier_mask != nullptr) {
+        for_function = &SerialFor<IrlsInitializationInvoker>;
+      }
+    }
+
+    for_function(0, clip_data->num_frames(), 1, invoker);
+  } else {
+    CHECK_GE(frame, 0);
+    CHECK_LT(frame, clip_data->num_frames());
+    invoker(BlockedRange(frame, frame + 1, 1));
+  }
+}
+
+// Helper class for parallel irls weight filtering.
+// This filters per-frame pair, across multiple models, not across time.
+class TrackFilterInvoker {
+ public:
+  explicit TrackFilterInvoker(
+      std::vector<MotionEstimation::SingleTrackClipData>* clip_datas)
+      : clip_datas_(clip_datas) {}
+
+  void operator()(const BlockedRange& range) const {
+    for (int f = range.begin(); f != range.end(); ++f) {
+      // Gather irls weights for each track.
+      absl::node_hash_map<int, std::vector<float>> track_weights;
+      for (auto& clip_data : *clip_datas_) {
+        for (const auto& feature : (*clip_data.feature_lists)[f]->feature()) {
+          track_weights[feature.track_id()].push_back(feature.irls_weight());
+        }
+      }
+
+      // Min filter across weights, store in first element.
+      int match_sum = 0;
+      for (auto& entry : track_weights) {
+        match_sum += entry.second.size();
+        entry.second[0] =
+            *std::min_element(entry.second.begin(), entry.second.end());
+      }
+
+      // Apply.
+      for (auto& clip_data : *clip_datas_) {
+        for (auto& feature :
+             *(*clip_data.feature_lists)[f]->mutable_feature()) {
+          feature.set_irls_weight(track_weights[feature.track_id()][0]);
+        }
+      }
+    }
+  }
+
+ private:
+  std::vector<MotionEstimation::SingleTrackClipData>* clip_datas_;
+};
+
+void MotionEstimation::MinFilterIrlsWeightByTrack(
+    SingleTrackClipData* clip_data) const {
+  // Gather irls weights for each track.
+  std::unordered_map<int, std::vector<float>> track_weights;
+
+  const int num_frames = clip_data->feature_lists->size();
+  for (int f = 0; f < num_frames; ++f) {
+    for (const auto& feature : clip_data->feature_lists->at(f)->feature()) {
+      track_weights[feature.track_id()].push_back(feature.irls_weight());
+    }
+  }
+
+  // Robust min filter across weights, store in first element; here 20th
+  // percentile.
+  for (auto& entry : track_weights) {
+    if (entry.second.size() > 1) {
+      auto robust_min =
+          entry.second.begin() + std::ceil(entry.second.size() * 0.2f);
+
+      std::nth_element(entry.second.begin(), robust_min, entry.second.end());
+      entry.second[0] = *robust_min;
+    }
+  }
+
+  // Apply.
+  for (int f = 0; f < num_frames; ++f) {
+    for (auto& feature : *clip_data->feature_lists->at(f)->mutable_feature()) {
+      feature.set_irls_weight(track_weights[feature.track_id()][0]);
+    }
+  }
+}
+
+void MotionEstimation::EnforceTrackConsistency(
+    std::vector<SingleTrackClipData>* clip_datas) const {
+  CHECK(clip_datas != nullptr);
+  if (clip_datas->empty()) {
+    return;
+  }
+
+  // Traverse each frame, filter across clip_datas.
+  const int num_frames = (*clip_datas)[0].num_frames();
+
+  // Map track id to weights.
+  SerialFor(0, num_frames, 1, TrackFilterInvoker(clip_datas));
+
+  if (!options_.joint_track_estimation().temporal_smoothing()) {
+    return;
+  }
+
+  // Temporal smoothing in time for each clip data.
+  for (auto& clip_data : *clip_datas) {
+    // Bilateral 1D filter across irls weights for each track.
+    // Gather irls weights across all tracks.
+    absl::node_hash_map<int, std::deque<float>> track_irls_weights;
+
+    for (RegionFlowFeatureList* feature_list : *clip_data.feature_lists) {
+      for (const auto& feature : feature_list->feature()) {
+        track_irls_weights[feature.track_id()].push_back(feature.irls_weight());
+      }
+    }
+
+    // Filter weights for each track, store results in map below.
+    for (auto& iter : track_irls_weights) {
+      SmoothIRLSWeights(&iter.second);
+    }
+
+    // Set filtered weight.
+    for (RegionFlowFeatureList* feature_list : *clip_data.feature_lists) {
+      for (auto& feature : *feature_list->mutable_feature()) {
+        feature.set_irls_weight(track_irls_weights[feature.track_id()].front());
+        track_irls_weights[feature.track_id()].pop_front();
+      }
+    }
+  }
+}
+
+void MotionEstimation::BiasFromFeatures(
+    const RegionFlowFeatureList& feature_list, MotionType type,
+    const EstimateModelOptions& model_options, std::vector<float>* bias) const {
+  CHECK(bias);
+  const int num_features = feature_list.feature_size();
+  bias->resize(num_features);
+
+  int feature_idx = 0;
+  const int type_idx = model_options.IndexFromType(type);
+  auto& bias_map = long_feature_bias_maps_[type_idx];
+  constexpr float kMinBias = 0.1f;
+
+  for (const auto& feature : feature_list.feature()) {
+    // Is feature present?
+    auto iter = bias_map.find(feature.track_id());
+    if (iter != bias_map.end()) {
+      const float current_bias_bin =
+          iter->second.bias * feature_bias_lut_.bias_weight_scale;
+
+      // Never bias 100% towards old value,
+      // allow for new values to propagate.
+      // Downweight outliers but do not upweight inliers.
+      if (current_bias_bin >= feature_bias_lut_.bias_weight_lut.size()) {
+        (*bias)[feature_idx] = kMinBias;
+      } else {
+        (*bias)[feature_idx] = std::max(
+            kMinBias, feature_bias_lut_.bias_weight_lut[current_bias_bin]);
+      }
+    } else {
+      // TODO: This should be some kind of average of all the other
+      // feature's bias; such that new features can't overpower old ones
+      // (e.g. on tracking errors).
+      (*bias)[feature_idx] = 1.0f;
+    }
+
+    ++feature_idx;
+  }
+}
+
+void MotionEstimation::BiasLongFeatures(
+    RegionFlowFeatureList* feature_list, MotionType type,
+    const EstimateModelOptions& model_options,
+    PriorFeatureWeights* prior_weights) const {
+  CHECK(prior_weights);
+  CHECK(feature_list);
+
+  // Don't bias duplicated frames -> should be identity transform.
+  if (feature_list->is_duplicated()) {
+    return;
+  }
+
+  // TODO: Rename, it is not the bias (= error), but a weight in
+  // [0, 1] to condition features.
+  std::vector<float> bias;
+  BiasFromFeatures(*feature_list, type, model_options, &bias);
+
+  // Bias along long tracks.
+  if (!prior_weights->use_full_prior) {
+    LOG_IF(WARNING,
+           []() {
+             static int k = 0;
+             return k++ < 2;
+           }())
+        << "Use full prior overridden to true, no initialization used. "
+        << "Atypical usage.";
+    prior_weights->use_full_prior = true;
+  }
+
+  const int num_features = feature_list->feature_size();
+  if (prior_weights->priors.empty() && num_features > 0) {
+    LOG(WARNING) << "BiasLongFeatures without using IrlsOutlierInitialization "
+                 << "or LongFeatureInitialization.";
+    prior_weights->priors.resize(num_features, 1.0f);
+  }
+
+  CHECK_EQ(num_features, prior_weights->priors.size());
+  for (int k = 0; k < num_features; ++k) {
+    prior_weights->priors[k] *= bias[k];
+    auto* feature = feature_list->mutable_feature(k);
+    feature->set_irls_weight(feature->irls_weight() * bias[k]);
+  }
+}
+
+void MotionEstimation::ComputeSpatialBias(
+    MotionType type, const EstimateModelOptions& model_options,
+    RegionFlowFeatureList* feature_list, SpatialBiasMap* spatial_bias) const {
+  const auto& bias_options = options_.long_feature_bias_options();
+  const int type_idx = model_options.IndexFromType(type);
+  const auto& bias_map = long_feature_bias_maps_[type_idx];
+
+  // Select all features that are not marked to be ignored (irls weight of
+  // zero).
+  RegionFlowFeatureView feature_view;
+  SelectFeaturesFromList(
+      [](const RegionFlowFeature& feature) -> bool {
+        return feature.irls_weight() != 0;
+      },
+      feature_list, &feature_view);
+
+  const int num_features = feature_view.size();
+
+  std::vector<std::vector<int>> feature_taps_3;
+  std::vector<FeatureGrid<RegionFlowFeature>> feature_grids;
+
+  // Create grid to seed newly observed features with an appropiate bias.
+  BuildFeatureGrid(NormalizedDomain().x(), NormalizedDomain().y(),
+                   bias_options.grid_size(), {feature_view}, FeatureLocation,
+                   &feature_taps_3, nullptr, nullptr, &feature_grids);
+  CHECK_EQ(1, feature_grids.size());
+  const FeatureGrid<RegionFlowFeature>& single_grid = feature_grids[0];
+
+  const float long_track_threshold = bias_options.long_track_threshold();
+
+  // Traverse bins.
+  for (int k = 0; k < single_grid.size(); ++k) {
+    for (auto feature_ptr : single_grid[k]) {  // Traverse each bin.
+      float total_weight = 0.0f;
+      float weighted_bias = 0.0f;
+
+      // Counts all neighbors considered (including itself).
+      int total_neighbors = 0;
+
+      // Counts approximately how many similar looking long feature tracks
+      // are neighbors.
+      float similar_long_tracks = 0;
+      for (int neighbor_bin : feature_taps_3[k]) {
+        for (auto neighbor_ptr : single_grid[neighbor_bin]) {
+          ++total_neighbors;
+          auto iter = bias_map.find(neighbor_ptr->track_id());
+          float neighbor_bias = 0;
+          int num_observations = 0;
+
+          if (iter != bias_map.end()) {
+            neighbor_bias = iter->second.bias;
+            num_observations = iter->second.total_observations;
+          } else {
+            // If new track use estimated irls weight.
+            neighbor_bias = 1.0f / neighbor_ptr->irls_weight();
+            num_observations = 1;
+          }
+
+          const float distance =
+              (FeatureLocation(*feature_ptr) - FeatureLocation(*neighbor_ptr))
+                  .Norm();
+          const float spatial_weight =
+              feature_bias_lut_
+                  .spatial_lut[distance * feature_bias_lut_.spatial_scale];
+
+          const float color_distance =
+              RegionFlowFeatureDistance(feature_ptr->feature_descriptor(),
+                                        neighbor_ptr->feature_descriptor());
+
+          const float color_weight =
+              feature_bias_lut_
+                  .color_lut[color_distance * feature_bias_lut_.color_scale];
+
+          if (num_observations >= long_track_threshold) {
+            // Count similar looking tracks (weights are normalized such that
+            // identical looking track has color_weight of 1.0.
+            // Scale by length of track (limited to kMaxTrackScale).
+            constexpr float kMaxTrackScale = 3.0f;
+            similar_long_tracks +=
+                color_weight *
+                std::min(kMaxTrackScale,
+                         num_observations / long_track_threshold);
+          }
+
+          const float weight = spatial_weight * color_weight;
+
+          total_weight += weight;
+          weighted_bias += neighbor_bias * weight;
+        }
+      }
+
+      DCHECK(spatial_bias->find(feature_ptr->track_id()) ==
+             spatial_bias->end());
+
+      // Threshold such that few similar tracks do not count.
+      // Set to 0.25% of features.
+      if (similar_long_tracks < 2.5e-3 * num_features) {
+        similar_long_tracks = 0;
+      }
+
+      // Cutoff for features that do not have any matching neighbors.
+      // In that case fallback to feature's irls weight.
+      if (total_weight > total_neighbors * 1e-4f) {
+        // Note: Considered doing minimum of bias and irls weight
+        //       but this leads to instable IRLS weights very similar
+        //       to independent estimation.
+        const float norm_bias = weighted_bias / total_weight;
+        (*spatial_bias)[feature_ptr->track_id()] =
+            std::make_pair(norm_bias, similar_long_tracks);
+      } else {
+        (*spatial_bias)[feature_ptr->track_id()] = std::make_pair(
+            1.0f / feature_ptr->irls_weight(), similar_long_tracks);
+      }
+    }
+  }
+}
+
+void MotionEstimation::UpdateLongFeatureBias(
+    MotionType type, const EstimateModelOptions& model_options,
+    bool remove_terminated_tracks, bool update_irls_observation,
+    RegionFlowFeatureList* feature_list) const {
+  const int type_idx = model_options.IndexFromType(type);
+  auto& bias_map = long_feature_bias_maps_[type_idx];
+
+  constexpr int kMaxDuplicatedFrames = 2;
+  int& model_duplicate_frames = num_duplicate_frames_[type_idx];
+  if (feature_list->is_duplicated()) {
+    ++model_duplicate_frames;
+  } else {
+    model_duplicate_frames = 0;
+  }
+
+  // Do not update bias from duplicated frames. We consider any duplicated
+  // frames > kMaxDuplicatedFrames as a totally static camera, in which case the
+  // update is desired.
+  if (model_duplicate_frames > 0 &&
+      model_duplicate_frames <= kMaxDuplicatedFrames) {
+    for (auto& feature : *feature_list->mutable_feature()) {
+      auto iter = bias_map.find(feature.track_id());
+      if (iter != bias_map.end() && feature.irls_weight() > 0) {
+        // Restore bias from last non-duplicated observation.
+        feature.set_irls_weight(1.0f / (iter->second.bias + kIrlsEps));
+      }
+    }
+    return;
+  }
+
+  const auto& bias_options = options_.long_feature_bias_options();
+  const int num_irls_observations = bias_options.num_irls_observations();
+
+  CHECK_GT(num_irls_observations, 0) << "Specify value > 0";
+  const float inv_num_irls_observations = 1.0f / num_irls_observations;
+
+  SpatialBiasMap spatial_bias;
+  if (bias_options.use_spatial_bias()) {
+    ComputeSpatialBias(type, model_options, feature_list, &spatial_bias);
+  } else {
+    // Just populate bias with inverse irls error.
+    for (const auto& feature : feature_list->feature()) {
+      spatial_bias[feature.track_id()] =
+          std::make_pair(1.0f / feature.irls_weight(), 0);
+    }
+  }
+
+  // Tracks current ids in this frame.
+  std::unordered_set<int> curr_track_ids;
+
+  // Scale applied to irls weight for linear interpolation between inlier and
+  // outlier bias.
+  CHECK_GT(bias_options.inlier_irls_weight(), 0);
+  const float irls_scale = 1.0f / bias_options.inlier_irls_weight();
+  const float long_track_scale =
+      1.0f / bias_options.long_track_confidence_fraction();
+
+  for (auto& feature : *feature_list->mutable_feature()) {
+    if (remove_terminated_tracks) {
+      curr_track_ids.insert(feature.track_id());
+    }
+
+    // Skip features that are marked as not to be processed.
+    if (feature.irls_weight() == 0) {
+      continue;
+    }
+
+    auto iter = bias_map.find(feature.track_id());
+
+    // Is LongFeatureBias present?
+    if (iter != bias_map.end()) {
+      // Get minimum across last k observation.
+      constexpr size_t lastK = 3;
+
+      const std::vector<float>& irls_values = iter->second.irls_values;
+      const float last_min = *std::min_element(
+          irls_values.end() - std::min(irls_values.size(), lastK),
+          irls_values.end());
+
+      const float curr_irls_weight = feature.irls_weight();
+
+      // Clamp weights for ratio computation (count major outliers and inliers
+      // as regular ones). Set to range of errors between 0.5 pixels to 25
+      // pixels.
+      const float last_min_clamped = std::max(0.04f, std::min(last_min, 2.0f));
+
+      const float curr_irls_clamped =
+          std::max(0.04f, std::min(curr_irls_weight, 2.0f));
+
+      // Only checking for change from outlier to inlier here.
+      // The reverse case inlier -> outlier is addressed by bias
+      // blending below. Denominator is guaranteed to be > 0.
+      float irls_ratio = curr_irls_clamped / last_min_clamped;
+
+      if (irls_ratio > bias_options.max_irls_change_ratio()) {
+        // Reset feature and start again.
+        bias_map[feature.track_id()] =
+            LongFeatureBias(spatial_bias[feature.track_id()].first);
+        continue;
+      }
+
+      ++iter->second.total_observations;
+
+      // Compute median.
+      std::vector<float> irls_values_copy = iter->second.irls_values;
+      auto median = irls_values_copy.begin() + irls_values_copy.size() / 2;
+      std::nth_element(irls_values_copy.begin(), median,
+                       irls_values_copy.end());
+
+      // By default shorter observations are given less prior, except if
+      // sufficient long tracks were used during seeding.
+      float prior_weight =
+          std::max(std::min(1.0f, spatial_bias[feature.track_id()].second *
+                                      long_track_scale),
+                   (irls_values_copy.size() * inv_num_irls_observations));
+
+      const float alpha = std::min(1.0f, *median * irls_scale) * prior_weight;
+      const float bias = (alpha * bias_options.inlier_bias() +
+                          (1.0f - alpha) * bias_options.outlier_bias());
+
+      // Bias is weighted by median IRLS weight (foreground / outliers)
+      // can override bias faster. Similar if only few IRLS values have
+      // been observed, update bias faster.
+      const float biased_weight =
+          bias * iter->second.bias +
+          (1.0f - bias) * (1.0f / feature.irls_weight());
+
+      // Update weight.
+      iter->second.bias = biased_weight;
+
+      std::vector<float>& irls_values_ref = iter->second.irls_values;
+      if (!update_irls_observation) {
+        irls_values_ref.push_back(feature.irls_weight());
+        if (irls_values_ref.size() > num_irls_observations) {
+          irls_values_ref.erase(irls_values_ref.begin());
+        }
+      } else {
+        // Update.
+        // TODO: Sure about this? This is the error after
+        // estimation, but also biased toward previous solution.
+        irls_values_ref.back() = feature.irls_weight();
+      }
+
+      // Update feature's weight as well.
+      feature.set_irls_weight(1.0f / (biased_weight + kIrlsEps));
+    } else {
+      CHECK(!update_irls_observation) << "Should never happen on >= 2nd round";
+
+      // Not present, reset to spatial bias.
+      const float biased_weight = spatial_bias[feature.track_id()].first;
+      bias_map[feature.track_id()] = LongFeatureBias(biased_weight);
+      feature.set_irls_weight(1.0f / (biased_weight + kIrlsEps));
+    }
+  }
+
+  // Remove terminated tracks.
+  if (remove_terminated_tracks) {
+    std::vector<int> tracks_to_be_removed;
+    for (const auto& entry : bias_map) {
+      if (curr_track_ids.find(entry.first) == curr_track_ids.end()) {
+        tracks_to_be_removed.push_back(entry.first);
+      }
+    }
+
+    for (auto id : tracks_to_be_removed) {
+      bias_map.erase(id);
+    }
+  }
+}
+
+void MotionEstimation::SmoothIRLSWeights(std::deque<float>* irls) const {
+  CHECK(irls != nullptr);
+  if (irls->empty()) {
+    return;
+  }
+
+  const float sigma_space = 7.0f;
+  const float sigma_signal = 0.5f;
+
+  // Account for 90% of the data.
+  const int radius = 1.65f * sigma_space + 0.5f;
+  const int diameter = 2 * radius + 1;
+  const int num_irls = irls->size();
+
+  // Calculate spatial weights;
+  std::vector<float> weights(diameter);
+  const float space_coeff = -0.5f / (sigma_space * sigma_space);
+  for (int i = -radius; i <= radius; ++i) {
+    weights[i + radius] = std::exp(space_coeff * i * i);
+  }
+
+  // Map weights to error.
+  std::vector<float> error(num_irls + 2 * radius);
+  for (int k = 0; k < num_irls; ++k) {
+    error[radius + k] = 1.0f / ((*irls)[k] + 1e-6f);
+  }
+
+  // Copy border (right hand side).
+  std::copy(error.rbegin() + radius, error.rbegin() + 2 * radius,
+            error.end() - radius);
+  // Left hand side.
+  std::copy(error.begin() + radius, error.begin() + 2 * radius, error.begin());
+
+  // Bilateral filter.
+  const float signal_coeff = (-0.5f / (sigma_signal * sigma_signal));
+  for (int i = 0; i < num_irls; ++i) {
+    const float curr_val = error[i + radius];
+    float val_sum = 0;
+    float weight_sum = 0;
+    for (int k = 0; k < diameter; ++k) {
+      const float value = error[i + k];
+      const float diff = value - curr_val;
+      const float weight =
+          weights[k] *
+          std::exp(static_cast<double>(diff * diff * signal_coeff));
+      weight_sum += weight;
+      val_sum += value * weight;
+    }
+
+    // Result is val_sum / weight_sum, as irls is inverse of error, result is:
+    // weight_sum / val_sum.
+    if (val_sum != 0) {
+      (*irls)[i] = weight_sum / val_sum;
+    }
+  }
+}
+
+int MotionEstimation::IRLSRoundsFromSettings(const MotionType& type) const {
+  const int irls_rounds = options_.irls_rounds();
+  switch (type) {
+    case MODEL_AVERAGE_MAGNITUDE:
+      LOG(WARNING) << "Called with irls free motion type. Returning zero.";
+      return 0;
+
+    case MODEL_TRANSLATION:
+      if (options_.estimate_translation_irls()) {
+        return irls_rounds;
+      } else {
+        return 1;  // Always do at least L2 for translation.
+      }
+      break;
+
+    case MODEL_LINEAR_SIMILARITY:
+      switch (options_.linear_similarity_estimation()) {
+        case MotionEstimationOptions::ESTIMATION_LS_NONE:
+          return 0;
+
+        case MotionEstimationOptions::ESTIMATION_LS_L2:
+          return 1;
+
+        case MotionEstimationOptions::ESTIMATION_LS_IRLS:
+          return irls_rounds;
+
+        case MotionEstimationOptions::ESTIMATION_LS_L2_RANSAC:
+        case MotionEstimationOptions::ESTIMATION_LS_L1:
+          LOG(FATAL) << "Deprecated options, use ESTIMATION_LS_IRLS instead.";
+          return -1;
+      }
+      break;
+
+    case MODEL_AFFINE:
+      switch (options_.affine_estimation()) {
+        case MotionEstimationOptions::ESTIMATION_AFFINE_NONE:
+          return 0;
+
+        case MotionEstimationOptions::ESTIMATION_AFFINE_L2:
+          return 1;
+
+        case MotionEstimationOptions::ESTIMATION_AFFINE_IRLS:
+          return irls_rounds;
+      }
+      break;
+
+    case MODEL_HOMOGRAPHY:
+      switch (options_.homography_estimation()) {
+        case MotionEstimationOptions::ESTIMATION_HOMOG_NONE:
+          return 0;
+
+        case MotionEstimationOptions::ESTIMATION_HOMOG_L2:
+          return 1;
+
+        case MotionEstimationOptions::ESTIMATION_HOMOG_IRLS:
+          return irls_rounds;
+      }
+      break;
+
+    case MODEL_MIXTURE_HOMOGRAPHY:
+      switch (options_.mix_homography_estimation()) {
+        case MotionEstimationOptions::ESTIMATION_HOMOG_MIX_NONE:
+          return 0;
+
+        case MotionEstimationOptions::ESTIMATION_HOMOG_MIX_L2:
+          return 1;
+
+        case MotionEstimationOptions::ESTIMATION_HOMOG_MIX_IRLS:
+          return irls_rounds;
+      }
+      break;
+
+    case MODEL_NUM_VALUES:
+      LOG(FATAL) << "Function should never be called with this value";
+      break;
+  }
+
+  LOG(FATAL) << "All branches above return, execution can not reach this point";
+  return -1;
+}
+
+void MotionEstimation::PolicyToIRLSRounds(int irls_rounds, int* total_rounds,
+                                          int* irls_per_round) const {
+  CHECK(total_rounds != nullptr);
+  CHECK(irls_per_round != nullptr);
+
+  // Small optimization: irls_rounds == 0 -> total_rounds = 0 regardless of
+  // settings.
+  if (irls_rounds == 0) {
+    *total_rounds = 0;
+    *irls_per_round = 0;
+    return;
+  }
+
+  switch (options_.estimation_policy()) {
+    case MotionEstimationOptions::INDEPENDENT_PARALLEL:
+    case MotionEstimationOptions::TEMPORAL_IRLS_MASK:
+      *irls_per_round = irls_rounds;
+      *total_rounds = 1;
+      break;
+
+    case MotionEstimationOptions::TEMPORAL_LONG_FEATURE_BIAS:
+      *total_rounds = options_.long_feature_bias_options().total_rounds();
+      *irls_per_round = irls_rounds;
+      break;
+
+    case MotionEstimationOptions::JOINTLY_FROM_TRACKS:
+      *irls_per_round = 1;
+      *total_rounds = irls_rounds;
+      break;
+  }
+}
+
+void MotionEstimation::CheckModelStability(
+    const MotionType& type, const CameraMotion::Type& max_unstable_type,
+    const std::vector<std::vector<float>>* reset_irls_weights,
+    std::vector<RegionFlowFeatureList*>* feature_lists,
+    std::vector<CameraMotion>* camera_motions) const {
+  CHECK(feature_lists != nullptr);
+  CHECK(camera_motions != nullptr);
+  const int num_frames = feature_lists->size();
+  if (reset_irls_weights) {
+    DCHECK_EQ(num_frames, reset_irls_weights->size());
+  }
+  DCHECK_EQ(num_frames, camera_motions->size());
+
+  for (int f = 0; f < num_frames; ++f) {
+    CameraMotion& camera_motion = (*camera_motions)[f];
+    RegionFlowFeatureList* feature_list = (*feature_lists)[f];
+
+    const std::vector<float>* reset_irls_weight =
+        reset_irls_weights ? &reset_irls_weights->at(f) : nullptr;
+    CheckSingleModelStability(type, max_unstable_type, reset_irls_weight,
+                              feature_list, &camera_motion);
+  }
+}
+
+// Order of estimation is:
+// Translation -> Linear Similarity -> Affine -> Homography -> Mixture
+// Homography.
+void MotionEstimation::CheckSingleModelStability(
+    const MotionType& type, const CameraMotion::Type& max_unstable_type,
+    const std::vector<float>* reset_irls_weights,
+    RegionFlowFeatureList* feature_list, CameraMotion* camera_motion) const {
+  if (camera_motion->type() > max_unstable_type) {
+    return;
+  }
+
+  switch (type) {
+    case MODEL_AVERAGE_MAGNITUDE:
+      LOG(WARNING) << "Nothing to check for requested model type.";
+      return;
+
+    case MODEL_TRANSLATION:
+      if (IsStableTranslation(camera_motion->translation(),
+                              camera_motion->translation_variance(),
+                              *feature_list)) {
+        // Translation can never be singular.
+        CHECK_EQ(
+            0, camera_motion->flags() & CameraMotion::FLAG_SINGULAR_ESTIMATION);
+      } else {
+        // Invalid model.
+        if (reset_irls_weights) {
+          SetRegionFlowFeatureIRLSWeights(*reset_irls_weights, feature_list);
+        }
+        ResetMotionModels(options_, camera_motion);
+      }
+      break;
+
+    case MODEL_LINEAR_SIMILARITY: {
+      const int num_inliers =
+          std::round(feature_list->feature_size() *
+                     camera_motion->similarity_inlier_ratio());
+      if (camera_motion->flags() & CameraMotion::FLAG_SINGULAR_ESTIMATION ||
+          !IsStableSimilarity(camera_motion->linear_similarity(), *feature_list,
+                              num_inliers)) {
+        if (reset_irls_weights) {
+          SetRegionFlowFeatureIRLSWeights(*reset_irls_weights, feature_list);
+        }
+
+        ResetToTranslation(camera_motion->translation(), camera_motion);
+      }
+      break;
+    }
+
+    case MODEL_AFFINE:
+      // Not implemented, nothing to check here.
+      break;
+
+    case MODEL_HOMOGRAPHY:
+      if (camera_motion->flags() & CameraMotion::FLAG_SINGULAR_ESTIMATION ||
+          !IsStableHomography(camera_motion->homography(),
+                              camera_motion->average_homography_error(),
+                              camera_motion->homography_inlier_coverage())) {
+        if (reset_irls_weights) {
+          SetRegionFlowFeatureIRLSWeights(*reset_irls_weights, feature_list);
+        }
+
+        ResetToSimilarity(camera_motion->linear_similarity(), camera_motion);
+      }
+      break;
+
+    case MODEL_MIXTURE_HOMOGRAPHY: {
+      std::vector<float> block_coverage(
+          camera_motion->mixture_inlier_coverage().begin(),
+          camera_motion->mixture_inlier_coverage().end());
+      const float mix_min_inlier_coverage =
+          options_.stable_mixture_homography_bounds().min_inlier_coverage();
+
+      if (camera_motion->flags() & CameraMotion::FLAG_SINGULAR_ESTIMATION ||
+          !IsStableMixtureHomography(camera_motion->mixture_homography(),
+                                     mix_min_inlier_coverage, block_coverage)) {
+        // Unstable homography mixture.
+        // Fall-back to previously estimated motion type.
+        // Function is only called for CameraMotion::Type <= UNSTABLE.
+        // In this case this means type is either UNSTABLE, UNSTABLE_SIM
+        // or VALID.
+        // (UNSTABLE_HOMOG flag is set by this function only during
+        //  ResetToHomography below).
+        switch (camera_motion->type()) {
+          case CameraMotion::VALID:
+            // Homography deemed stable, fallback to it.
+            MotionEstimation::ResetToHomography(camera_motion->homography(),
+                                                true,  // flag_as_unstable_model
+                                                camera_motion);
+            break;
+
+          case CameraMotion::UNSTABLE_SIM:
+            MotionEstimation::ResetToSimilarity(
+                camera_motion->linear_similarity(), camera_motion);
+            break;
+
+          case CameraMotion::UNSTABLE:
+            MotionEstimation::ResetToTranslation(camera_motion->translation(),
+                                                 camera_motion);
+            break;
+
+          case CameraMotion::INVALID:
+          case CameraMotion::UNSTABLE_HOMOG:
+            LOG(FATAL) << "Unexpected CameraMotion::Type: "
+                       << camera_motion->type();
+            break;
+        }
+
+        if (reset_irls_weights) {
+          SetRegionFlowFeatureIRLSWeights(*reset_irls_weights, feature_list);
+        }
+
+        // Clear rolling shutter guess in case it was computed.
+        camera_motion->set_rolling_shutter_guess(-1);
+        camera_motion->clear_mixture_homography_spectrum();
+      } else {
+        // Stable mixture homography can reset unstable type.
+        camera_motion->set_overridden_type(camera_motion->type());
+        camera_motion->set_type(CameraMotion::VALID);
+        // Select weakest regularized mixture.
+        camera_motion->set_rolling_shutter_motion_index(0);
+      }
+
+      break;
+    }
+
+    case MODEL_NUM_VALUES:
+      LOG(FATAL) << "Function should not be called with this value";
+      break;
+  }
+}
+
+void MotionEstimation::ProjectMotionsDown(
+    const MotionType& type, std::vector<CameraMotion>* camera_motions) const {
+  CHECK(camera_motions != nullptr);
+  for (auto& camera_motion : *camera_motions) {
+    switch (type) {
+      case MODEL_AVERAGE_MAGNITUDE:
+      case MODEL_TRANSLATION:
+      case MODEL_MIXTURE_HOMOGRAPHY:
+      case MODEL_AFFINE:
+        LOG(WARNING) << "Nothing to project for requested model type";
+        return;
+
+      case MODEL_HOMOGRAPHY:
+        // Only project down if model was estimated, otherwise would propagate
+        // identity.
+        if (camera_motion.has_homography() &&
+            camera_motion.type() <= CameraMotion::UNSTABLE_HOMOG) {
+          LinearSimilarityModel lin_sim =
+              *camera_motion.mutable_linear_similarity() =
+                  AffineAdapter::ProjectToLinearSimilarity(
+                      HomographyAdapter::ProjectToAffine(
+                          camera_motion.homography(), frame_width_,
+                          frame_height_),
+                      frame_width_, frame_height_);
+        }
+
+        ABSL_FALLTHROUGH_INTENDED;
+
+      case MODEL_LINEAR_SIMILARITY:
+        // Only project down if model was estimated.
+        if (camera_motion.has_linear_similarity() &&
+            camera_motion.type() <= CameraMotion::UNSTABLE_SIM) {
+          *camera_motion.mutable_translation() =
+              LinearSimilarityAdapter::ProjectToTranslation(
+                  camera_motion.linear_similarity(), frame_width_,
+                  frame_height_);
+        }
+        break;
+
+      case MODEL_NUM_VALUES:
+        LOG(FATAL) << "Function should not be called with this value";
+        break;
+    }
+  }
+}
+
+void MotionEstimation::IRLSWeightFilter(
+    std::vector<RegionFlowFeatureList*>* feature_lists) const {
+  CHECK(feature_lists != nullptr);
+  for (auto feature_ptr : *feature_lists) {
+    switch (options_.irls_weight_filter()) {
+      case MotionEstimationOptions::IRLS_FILTER_TEXTURE:
+        TextureFilteredRegionFlowFeatureIRLSWeights(
+            0.5,  // Below texturedness threshold of 0.5
+            1,    // Set irls_weight to 1.
+            feature_ptr);
+        break;
+
+      case MotionEstimationOptions::IRLS_FILTER_CORNER_RESPONSE:
+        CornerFilteredRegionFlowFeatureIRLSWeights(
+            0.5,  // Below texturedness threshold of 0.5
+            1,    // Set irls_weight to 1.
+            feature_ptr);
+        break;
+
+      case MotionEstimationOptions::IRLS_FILTER_NONE:
+        break;
+    }
+  }
+}
+
+void MotionEstimation::EstimateMotionsParallel(
+    bool post_irls_weight_smoothing,
+    std::vector<RegionFlowFeatureList*>* feature_lists,
+    std::vector<CameraMotion>* camera_motions) const {
+  CHECK(camera_motions != nullptr);
+  camera_motions->clear();
+  camera_motions->resize(feature_lists->size());
+
+  // Normalize features.
+  for (std::vector<RegionFlowFeatureList*>::iterator feature_list =
+           feature_lists->begin();
+       feature_list != feature_lists->end(); ++feature_list) {
+    TransformRegionFlowFeatureList(normalization_transform_, *feature_list);
+  }
+
+  if (!options_.overlay_detection()) {
+    EstimateMotionsParallelImpl(options_.irls_weights_preinitialized(),
+                                feature_lists, camera_motions);
+  } else {
+    DetermineOverlayIndices(options_.irls_weights_preinitialized(),
+                            camera_motions, feature_lists);
+
+    EstimateMotionsParallelImpl(true, feature_lists, camera_motions);
+  }
+
+  if (!options_.deactivate_stable_motion_estimation()) {
+    CheckTranslationAcceleration(camera_motions);
+  }
+
+  if (post_irls_weight_smoothing) {
+    PostIRLSSmoothing(*camera_motions, feature_lists);
+  }
+
+  // Undo transform applied to features.
+  for (auto& feature_list_ptr : *feature_lists) {
+    TransformRegionFlowFeatureList(inv_normalization_transform_,
+                                   feature_list_ptr);
+  }
+
+  DetermineShotBoundaries(*feature_lists, camera_motions);
+}
+
+void MotionEstimation::DetermineShotBoundaries(
+    const std::vector<RegionFlowFeatureList*>& feature_lists,
+    std::vector<CameraMotion>* camera_motions) const {
+  CHECK(camera_motions != nullptr);
+  CHECK_EQ(feature_lists.size(), camera_motions->size());
+  const auto& shot_options = options_.shot_boundary_options();
+
+  // Verify empty feature frames and invalid models via visual consistency.
+  const int num_motions = camera_motions->size();
+  for (int k = 0; k < num_motions; ++k) {
+    auto& camera_motion = (*camera_motions)[k];
+    if (camera_motion.type() == CameraMotion::INVALID ||
+        feature_lists[k]->feature_size() == 0) {
+      // Potential shot boundary, verify.
+      if (feature_lists[k]->visual_consistency() >= 0) {
+        if (feature_lists[k]->visual_consistency() >=
+            shot_options.motion_consistency_threshold()) {
+          camera_motion.set_flags(camera_motion.flags() |
+                                  CameraMotion::FLAG_SHOT_BOUNDARY);
+        }
+      } else {
+        // No consistency present, label as shot boundary.
+        camera_motion.set_flags(camera_motion.flags() |
+                                CameraMotion::FLAG_SHOT_BOUNDARY);
+      }
+    }
+  }
+
+  // Determine additional boundaries missed during motion estimation.
+  for (int k = 0; k < num_motions; ++k) {
+    auto& camera_motion = (*camera_motions)[k];
+    if (feature_lists[k]->visual_consistency() >=
+        shot_options.appearance_consistency_threshold()) {
+      if (k + 1 == num_motions ||  // no next frame available.
+          feature_lists[k + 1]->visual_consistency() >=
+              shot_options.appearance_consistency_threshold()) {
+        // Only add boundaries if previous or next frame are not already labeled
+        // boundaries by above tests.
+        if (k > 0 && ((*camera_motions)[k - 1].flags() &
+                      CameraMotion::FLAG_SHOT_BOUNDARY) != 0) {
+          continue;
+        }
+        if (k + 1 < num_motions && ((*camera_motions)[k + 1].flags() &
+                                    CameraMotion::FLAG_SHOT_BOUNDARY) != 0) {
+          continue;
+        }
+
+        // Shot boundaries based on visual consistency measure.
+        camera_motion.set_flags(camera_motion.flags() |
+                                CameraMotion::FLAG_SHOT_BOUNDARY);
+      }
+    }
+  }
+
+  // LOG shot boundaries.
+  for (const auto& camera_motion : *camera_motions) {
+    if (camera_motion.flags() & CameraMotion::FLAG_SHOT_BOUNDARY) {
+      VLOG(1) << "Shot boundary at : " << camera_motion.timestamp_usec() * 1e-6f
+              << "s";
+    }
+  }
+}
+
+void MotionEstimation::ResetMotionModels(const MotionEstimationOptions& options,
+                                         CameraMotion* camera_motion) {
+  CHECK(camera_motion);
+
+  // Clear models.
+  camera_motion->clear_translation();
+  camera_motion->clear_similarity();
+  camera_motion->clear_linear_similarity();
+  camera_motion->clear_affine();
+  camera_motion->clear_homography();
+  camera_motion->clear_mixture_homography();
+  camera_motion->clear_mixture_homography_spectrum();
+
+  // We need to set models explicitly for has_* tests to work.
+  *camera_motion->mutable_translation() = TranslationModel();
+
+  if (options.estimate_similarity()) {
+    *camera_motion->mutable_similarity() = SimilarityModel();
+  }
+
+  if (options.linear_similarity_estimation() !=
+      MotionEstimationOptions::ESTIMATION_LS_NONE) {
+    *camera_motion->mutable_linear_similarity() = LinearSimilarityModel();
+  }
+
+  if (options.affine_estimation() !=
+      MotionEstimationOptions::ESTIMATION_AFFINE_NONE) {
+    *camera_motion->mutable_affine() = AffineModel();
+  }
+
+  if (options.homography_estimation() !=
+      MotionEstimationOptions::ESTIMATION_HOMOG_NONE) {
+    *camera_motion->mutable_homography() = Homography();
+  }
+
+  if (options.mix_homography_estimation() !=
+      MotionEstimationOptions::ESTIMATION_HOMOG_MIX_NONE) {
+    *camera_motion->mutable_mixture_homography() =
+        MixtureHomographyAdapter::IdentityModel(options.num_mixtures());
+    camera_motion->set_mixture_row_sigma(options.mixture_row_sigma());
+  }
+
+  camera_motion->set_type(CameraMotion::INVALID);
+}
+
+void MotionEstimation::ResetToIdentity(CameraMotion* camera_motion,
+                                       bool consider_valid) {
+  if (camera_motion->has_translation()) {
+    *camera_motion->mutable_translation() = TranslationModel();
+  }
+
+  if (camera_motion->has_similarity()) {
+    *camera_motion->mutable_similarity() = SimilarityModel();
+  }
+
+  if (camera_motion->has_linear_similarity()) {
+    *camera_motion->mutable_linear_similarity() = LinearSimilarityModel();
+  }
+
+  if (camera_motion->has_affine()) {
+    *camera_motion->mutable_affine() = AffineModel();
+  }
+
+  if (camera_motion->has_homography()) {
+    *camera_motion->mutable_homography() = Homography();
+  }
+
+  if (camera_motion->has_mixture_homography()) {
+    const int num_models = camera_motion->mixture_homography().model_size();
+    for (int m = 0; m < num_models; ++m) {
+      *camera_motion->mutable_mixture_homography()->mutable_model(m) =
+          Homography();
+    }
+  }
+
+  if (consider_valid) {
+    camera_motion->set_type(CameraMotion::VALID);
+  } else {
+    camera_motion->set_type(CameraMotion::INVALID);
+  }
+}
+
+void MotionEstimation::ResetToTranslation(const TranslationModel& model,
+                                          CameraMotion* camera_motion) {
+  const float dx = model.dx();
+  const float dy = model.dy();
+
+  if (camera_motion->has_translation()) {
+    *camera_motion->mutable_translation() = model;
+  }
+
+  if (camera_motion->has_similarity()) {
+    *camera_motion->mutable_similarity() =
+        SimilarityAdapter::FromArgs(dx, dy, 1, 0);
+  }
+
+  if (camera_motion->has_linear_similarity()) {
+    *camera_motion->mutable_linear_similarity() =
+        LinearSimilarityAdapter::FromArgs(dx, dy, 1, 0);
+  }
+
+  if (camera_motion->has_affine()) {
+    *camera_motion->mutable_affine() = TranslationAdapter::ToAffine(model);
+  }
+
+  if (camera_motion->has_homography()) {
+    *camera_motion->mutable_homography() =
+        TranslationAdapter::ToHomography(model);
+  }
+
+  if (camera_motion->has_mixture_homography()) {
+    const int num_models = camera_motion->mixture_homography().model_size();
+    const Homography h = TranslationAdapter::ToHomography(model);
+    for (int m = 0; m < num_models; ++m) {
+      camera_motion->mutable_mixture_homography()->mutable_model(m)->CopyFrom(
+          h);
+    }
+    camera_motion->mutable_mixture_homography()->set_dof(
+        MixtureHomography::CONST_DOF);
+  }
+
+  camera_motion->set_type(CameraMotion::UNSTABLE);
+}
+
+void MotionEstimation::ResetToSimilarity(const LinearSimilarityModel& model,
+                                         CameraMotion* camera_motion) {
+  if (camera_motion->has_similarity()) {
+    *camera_motion->mutable_similarity() =
+        LinearSimilarityAdapter::ToSimilarity(model);
+  }
+
+  if (camera_motion->has_linear_similarity()) {
+    *camera_motion->mutable_linear_similarity() = model;
+  }
+
+  if (camera_motion->has_affine()) {
+    *camera_motion->mutable_affine() = LinearSimilarityAdapter::ToAffine(model);
+  }
+
+  if (camera_motion->has_homography()) {
+    *camera_motion->mutable_homography() =
+        LinearSimilarityAdapter::ToHomography(model);
+  }
+
+  if (camera_motion->has_mixture_homography()) {
+    const int num_models = camera_motion->mixture_homography().model_size();
+    const Homography h = LinearSimilarityAdapter::ToHomography(model);
+
+    for (int m = 0; m < num_models; ++m) {
+      camera_motion->mutable_mixture_homography()->mutable_model(m)->CopyFrom(
+          h);
+    }
+    camera_motion->mutable_mixture_homography()->set_dof(
+        MixtureHomography::CONST_DOF);
+  }
+
+  camera_motion->set_type(CameraMotion::UNSTABLE_SIM);
+}
+
+void MotionEstimation::ResetToHomography(const Homography& model,
+                                         bool flag_as_unstable_model,
+                                         CameraMotion* camera_motion) {
+  if (camera_motion->has_homography()) {
+    *camera_motion->mutable_homography() = model;
+  }
+
+  if (camera_motion->has_mixture_homography()) {
+    const int num_models = camera_motion->mixture_homography().model_size();
+
+    for (int m = 0; m < num_models; ++m) {
+      camera_motion->mutable_mixture_homography()->mutable_model(m)->CopyFrom(
+          model);
+    }
+    camera_motion->mutable_mixture_homography()->set_dof(
+        MixtureHomography::CONST_DOF);
+  }
+
+  if (flag_as_unstable_model) {
+    camera_motion->set_type(CameraMotion::UNSTABLE_HOMOG);
+  }
+}
+
+void MotionEstimation::EstimateAverageMotionMagnitude(
+    const RegionFlowFeatureList& feature_list,
+    CameraMotion* camera_motion) const {
+  std::vector<float> magnitudes;
+  magnitudes.reserve(feature_list.feature_size());
+  for (const auto& feature : feature_list.feature()) {
+    magnitudes.push_back(std::hypot(feature.dy(), feature.dx()));
+  }
+
+  std::sort(magnitudes.begin(), magnitudes.end());
+  auto tenth = magnitudes.begin() + magnitudes.size() / 10;
+  auto ninetieth = magnitudes.begin() + magnitudes.size() * 9 / 10;
+  const int elems = ninetieth - tenth;
+  if (elems > 0) {
+    const float average_magnitude =
+        std::accumulate(tenth, ninetieth, 0.0f) * (1.0f / elems);
+
+    // De-normalize translation.
+    const float magnitude =
+        LinearSimilarityAdapter::TransformPoint(inv_normalization_transform_,
+                                                Vector2_f(average_magnitude, 0))
+            .x();
+    camera_motion->set_average_magnitude(magnitude);
+  }
+}
+
+float MotionEstimation::IRLSPriorWeight(int iteration, int irls_rounds) const {
+  // Iteration zero -> mapped to one
+  // Iteration irls_rounds -> mapped to irls_prior_scale.
+  return 1.0f - (iteration * (1.0f / irls_rounds) *
+                 (1.0f - options_.irls_prior_scale()));
+}
+
+namespace {
+
+// Returns weighted translational model from feature_list.
+Vector2_f EstimateTranslationModelFloat(
+    const RegionFlowFeatureList& feature_list) {
+  Vector2_f mean_motion(0, 0);
+  float weight_sum = 0;
+  for (const auto& feature : feature_list.feature()) {
+    mean_motion += FeatureFlow(feature) * feature.irls_weight();
+    weight_sum += feature.irls_weight();
+  }
+
+  if (weight_sum > 0) {
+    mean_motion *= (1.0f / weight_sum);
+  }
+  return mean_motion;
+}
+
+Vector2_f EstimateTranslationModelDouble(
+    const RegionFlowFeatureList& feature_list) {
+  Vector2_d mean_motion(0, 0);
+  double weight_sum = 0;
+  for (const auto& feature : feature_list.feature()) {
+    mean_motion +=
+        Vector2_d::Cast(FeatureFlow(feature)) * feature.irls_weight();
+    weight_sum += feature.irls_weight();
+  }
+
+  if (weight_sum > 0) {
+    mean_motion *= (1.0 / weight_sum);
+  }
+
+  return Vector2_f::Cast(mean_motion);
+}
+
+}  // namespace.
+
+void MotionEstimation::ComputeFeatureMask(
+    const RegionFlowFeatureList& feature_list, std::vector<int>* mask_indices,
+    std::vector<float>* bin_normalizer) const {
+  CHECK(mask_indices != nullptr);
+  CHECK(bin_normalizer != nullptr);
+
+  const int num_features = feature_list.feature_size();
+  mask_indices->clear();
+  mask_indices->reserve(num_features);
+
+  const int mask_size = options_.feature_mask_size();
+  const int max_bins = mask_size * mask_size;
+  bin_normalizer->clear();
+  bin_normalizer->resize(max_bins, 0.0f);
+
+  const Vector2_f domain = NormalizedDomain();
+  const float denom_x = 1.0f / domain.x();
+  const float denom_y = 1.0f / domain.y();
+
+  // Record index, but guard against out of bound error.
+  for (const auto& feature : feature_list.feature()) {
+    const int bin_idx = std::min<int>(
+        max_bins,
+        static_cast<int>(feature.y() * denom_y * mask_size) * mask_size +
+            feature.x() * denom_x * mask_size);
+
+    ++(*bin_normalizer)[bin_idx];
+    mask_indices->push_back(bin_idx);
+  }
+
+  for (float& bin_value : *bin_normalizer) {
+    bin_value = (bin_value == 0) ? 0 : sqrt(1.0 / bin_value);
+  }
+}
+
+bool MotionEstimation::GetTranslationIrlsInitialization(
+    RegionFlowFeatureList* feature_list,
+    const EstimateModelOptions& model_options, float avg_camera_motion,
+    InlierMask* inlier_mask, TranslationModel* best_model) const {
+  CHECK(best_model != nullptr);
+
+  const int num_features = feature_list->feature_size();
+  if (!num_features) {
+    return false;
+  }
+
+  // Bool indicator which features agree with model in each round.
+  // In case no RANSAC rounds are performed considered all features inliers.
+  std::vector<uint8> best_features(num_features, 1);
+  std::vector<uint8> curr_features(num_features);
+  float best_sum = 0;
+
+  unsigned int seed = 900913;  // = Google in leet :)
+  std::default_random_engine rand_gen(seed);
+  std::uniform_int_distribution<> distribution(0, num_features - 1);
+
+  auto& options = options_.irls_initialization();
+  const float irls_residual_scale = GetIRLSResidualScale(
+      avg_camera_motion, options_.irls_motion_magnitude_fraction());
+  const float cutoff = options.cutoff() / irls_residual_scale;
+  const float sq_cutoff = cutoff * cutoff;
+
+  // Either temporal bias or motion prior based on options.
+  std::vector<float> bias(num_features, 1.0f);
+
+  // Optionally, compute mask index for each feature.
+  std::vector<int> mask_indices;
+
+  if (options_.estimation_policy() ==
+      MotionEstimationOptions::TEMPORAL_LONG_FEATURE_BIAS) {
+    BiasFromFeatures(*feature_list, MODEL_TRANSLATION, model_options, &bias);
+  } else if (inlier_mask) {
+    std::vector<float> unused_bin_normalizer;
+    ComputeFeatureMask(*feature_list, &mask_indices, &unused_bin_normalizer);
+    inlier_mask->MotionPrior(*feature_list, &bias);
+  }
+
+  for (int rounds = 0; rounds < options.rounds(); ++rounds) {
+    float curr_sum = 0;
+    // Pick a random vector.
+    const int rand_idx = distribution(rand_gen);
+    const Vector2_f flow = FeatureFlow(feature_list->feature(rand_idx));
+
+    // curr_features gets set for every feature below; no need to reset.
+    for (int i = 0; i < num_features; ++i) {
+      const Feature& feature = feature_list->feature(i);
+      const Vector2_f diff = FeatureFlow(feature) - flow;
+      curr_features[i] = static_cast<uint8>(diff.Norm2() < sq_cutoff);
+      if (curr_features[i]) {
+        float score = feature.irls_weight();
+        if (inlier_mask) {
+          const int bin_idx = mask_indices[i];
+          score *= bias[i] + inlier_mask->GetInlierScore(bin_idx);
+        } else {
+          score *= bias[i];
+        }
+        curr_sum += score;
+      }
+    }
+
+    if (curr_sum > best_sum) {
+      best_sum = curr_sum;
+      best_features.swap(curr_features);
+      best_model->set_dx(flow.x());
+      best_model->set_dy(flow.y());
+    }
+  }
+
+  if (inlier_mask) {
+    inlier_mask->InitUpdateMask();
+  }
+
+  std::vector<float> inlier_weights;
+
+  // Score outliers low.
+  for (int i = 0; i < num_features; ++i) {
+    RegionFlowFeature* feature = feature_list->mutable_feature(i);
+    if (best_features[i] == 0 && feature->irls_weight() != 0) {
+      feature->set_irls_weight(kOutlierIRLSWeight);
+    } else {
+      inlier_weights.push_back(feature->irls_weight());
+      if (inlier_mask) {
+        const int bin_idx = mask_indices[i];
+        inlier_mask->RecordInlier(bin_idx, feature->irls_weight());
+      }
+    }
+  }
+
+  if (!inlier_weights.empty()) {
+    // Ensure that all selected inlier features have at least median weight.
+    auto median = inlier_weights.begin() + inlier_weights.size() * 0.5f;
+    std::nth_element(inlier_weights.begin(), median, inlier_weights.end());
+
+    for (int i = 0; i < num_features; ++i) {
+      RegionFlowFeature* feature = feature_list->mutable_feature(i);
+      if (best_features[i] != 0) {
+        feature->set_irls_weight(std::max(*median, feature->irls_weight()));
+      }
+    }
+  }
+
+  // Compute translation variance as feature for stability evaluation.
+  const float translation_variance = TranslationVariance(
+      *feature_list, Vector2_f(best_model->dx(), best_model->dy()));
+
+  return IsStableTranslation(*best_model, translation_variance, *feature_list);
+}
+
+void MotionEstimation::EstimateTranslationModelIRLS(
+    int irls_rounds, bool compute_stability,
+    RegionFlowFeatureList* flow_feature_list,
+    const PriorFeatureWeights* prior_weights,
+    CameraMotion* camera_motion) const {
+  if (prior_weights && !prior_weights->HasCorrectDimension(
+                           irls_rounds, flow_feature_list->feature_size())) {
+    LOG(ERROR) << "Prior weights incorrectly initialized, ignoring.";
+    prior_weights = nullptr;
+  }
+
+  // Simply average over features.
+  const bool irls_use_l0_norm = options_.irls_use_l0_norm();
+
+  const float irls_residual_scale =
+      GetIRLSResidualScale(camera_motion->average_magnitude(),
+                           options_.irls_motion_magnitude_fraction());
+
+  const std::vector<float>* irls_priors = nullptr;
+  const std::vector<float>* irls_alphas = nullptr;
+  if (prior_weights && prior_weights->HasNonZeroAlpha()) {
+    irls_priors = &prior_weights->priors;
+    irls_alphas = &prior_weights->alphas;
+  }
+
+  Vector2_f mean_motion;
+  for (int i = 0; i < irls_rounds; ++i) {
+    if (options_.use_highest_accuracy_for_normal_equations()) {
+      mean_motion = EstimateTranslationModelDouble(*flow_feature_list);
+    } else {
+      mean_motion = EstimateTranslationModelFloat(*flow_feature_list);
+    }
+
+    const float alpha = irls_alphas != nullptr ? (*irls_alphas)[i] : 0.0f;
+    const float one_minus_alpha = 1.0f - alpha;
+
+    // Update irls weights.
+    const auto feature_start = flow_feature_list->mutable_feature()->begin();
+    for (auto feature = feature_start;
+         feature != flow_feature_list->mutable_feature()->end(); ++feature) {
+      if (feature->irls_weight() == 0.0f) {
+        continue;
+      }
+
+      // Express difference in original domain.
+      const Vector2_f diff = LinearSimilarityAdapter::TransformPoint(
+          irls_transform_, FeatureFlow(*feature) - mean_motion);
+
+      const float numerator =
+          alpha == 0.0f ? 1.0f
+                        : ((*irls_priors)[feature - feature_start] * alpha +
+                           one_minus_alpha);
+
+      if (irls_use_l0_norm) {
+        feature->set_irls_weight(
+            numerator / (diff.Norm() * irls_residual_scale + kIrlsEps));
+      } else {
+        feature->set_irls_weight(
+            numerator /
+            (std::sqrt(static_cast<double>(diff.Norm() * irls_residual_scale)) +
+             kIrlsEps));
+      }
+    }
+  }
+
+  // De-normalize translation.
+  Vector2_f translation = LinearSimilarityAdapter::TransformPoint(
+      inv_normalization_transform_, mean_motion);
+
+  camera_motion->mutable_translation()->set_dx(translation.x());
+  camera_motion->mutable_translation()->set_dy(translation.y());
+
+  if (compute_stability) {
+    camera_motion->set_translation_variance(
+        TranslationVariance(*flow_feature_list, translation));
+  }
+}
+
+float MotionEstimation::TranslationVariance(
+    const RegionFlowFeatureList& feature_list,
+    const Vector2_f& translation) const {
+  // Compute irls based variance.
+  float variance = 0;
+  double weight_sum = 0;
+
+  for (const auto& feature : feature_list.feature()) {
+    weight_sum += feature.irls_weight();
+    variance += (LinearSimilarityAdapter::TransformPoint(
+                     inv_normalization_transform_, FeatureFlow(feature)) -
+                 translation)
+                    .Norm2() *
+                feature.irls_weight();
+  }
+
+  if (weight_sum > 0) {
+    return variance / weight_sum;
+  } else {
+    return 0.0f;
+  }
+}
+
+namespace {
+
+// Solves for the linear similarity via normal equations,
+// using only the positions specified by features from the feature list.
+// Input matrix is expected to be a 4x4 matrix of type T, rhs and solution are
+// both 4x1 vectors of type T.
+// Template class T specifies the desired accuracy, use float or double.
+template <class T>
+LinearSimilarityModel LinearSimilarityL2SolveSystem(
+    const RegionFlowFeatureList& feature_list, Eigen::Matrix<T, 4, 4>* matrix,
+    Eigen::Matrix<T, 4, 1>* rhs, Eigen::Matrix<T, 4, 1>* solution,
+    bool* success) {
+  CHECK(matrix != nullptr);
+  CHECK(rhs != nullptr);
+  CHECK(solution != nullptr);
+
+  *matrix = Eigen::Matrix<T, 4, 4>::Zero();
+  *rhs = Eigen::Matrix<T, 4, 1>::Zero();
+
+  // Matrix multiplications are hand-coded for speed improvements vs.
+  // opencv's cvGEMM calls.
+  for (const auto& feature : feature_list.feature()) {
+    const T x = feature.x();
+    const T y = feature.y();
+    const T w = feature.irls_weight();
+
+    // double J[2 * 4] = {1, 0, x,  -y,
+    //                    0, 1, y,   x};
+    // Compute J^t * J * w = {1,  0,   x,    -y
+    //                        0,  1,   y,     x,
+    //                        x,  y,   xx+yy, 0,
+    //                        -y  x,   0,     xx+yy} * w;
+
+    const T x_w = x * w;
+    const T y_w = y * w;
+    const T xx_yy_w = (x * x + y * y) * w;
+
+    T* matrix_ptr = matrix->data();
+    matrix_ptr[0] += w;
+    matrix_ptr[2] += x_w;
+    matrix_ptr[3] += -y_w;
+
+    matrix_ptr += 4;
+    matrix_ptr[1] += w;
+    matrix_ptr[2] += y_w;
+    matrix_ptr[3] += x_w;
+
+    matrix_ptr += 4;
+    matrix_ptr[0] += x_w;
+    matrix_ptr[1] += y_w;
+    matrix_ptr[2] += xx_yy_w;
+
+    matrix_ptr += 4;
+    matrix_ptr[0] += -y_w;
+    matrix_ptr[1] += x_w;
+    matrix_ptr[3] += xx_yy_w;
+
+    T* rhs_ptr = rhs->data();
+
+    // Using identity parametrization below.
+    const T m_x = feature.dx() * w;
+    const T m_y = feature.dy() * w;
+
+    rhs_ptr[0] += m_x;
+    rhs_ptr[1] += m_y;
+    rhs_ptr[2] += x * m_x + y * m_y;
+    rhs_ptr[3] += -y * m_x + x * m_y;
+  }
+
+  // Solution parameters p.
+  *solution = matrix->colPivHouseholderQr().solve(*rhs);
+  if (((*matrix) * (*solution)).isApprox(*rhs, kPrecision)) {
+    LinearSimilarityModel model;
+    model.set_dx((*solution)(0, 0));
+    model.set_dy((*solution)(1, 0));
+    model.set_a((*solution)(2, 0) + 1.0);  // Identity parametrization.
+    model.set_b((*solution)(3, 0));
+    if (success) {
+      *success = true;
+    }
+    return model;
+  }
+
+  if (success) {
+    *success = false;
+  }
+  return LinearSimilarityModel();
+}
+
+}  // namespace.
+
+bool MotionEstimation::GetSimilarityIrlsInitialization(
+    RegionFlowFeatureList* feature_list,
+    const EstimateModelOptions& model_options, float avg_camera_motion,
+    InlierMask* inlier_mask, LinearSimilarityModel* best_model) const {
+  CHECK(best_model != nullptr);
+
+  const int num_features = feature_list->feature_size();
+  if (!num_features) {
+    return false;
+  }
+
+  // matrix is symmetric.
+  Eigen::Matrix<float, 4, 4> matrix = Eigen::Matrix<float, 4, 4>::Zero();
+  Eigen::Matrix<float, 4, 1> solution = Eigen::Matrix<float, 4, 1>::Zero();
+  Eigen::Matrix<float, 4, 1> rhs = Eigen::Matrix<float, 4, 1>::Zero();
+
+  // Bool indicator which features agree with model in each round.
+  // In case no RANSAC rounds are performed considered all features inliers.
+  std::vector<uint8> best_features(num_features, 1);
+  std::vector<uint8> curr_features(num_features);
+  float best_sum = 0;
+
+  unsigned int seed = 900913;  // = Google in leet :)
+  std::default_random_engine rand_gen(seed);
+  std::uniform_int_distribution<> distribution(0, num_features - 1);
+  auto& options = options_.irls_initialization();
+
+  const float irls_residual_scale = GetIRLSResidualScale(
+      avg_camera_motion, options_.irls_motion_magnitude_fraction());
+  const float cutoff = options.cutoff() / irls_residual_scale;
+  const float sq_cutoff = cutoff * cutoff;
+
+  // Either temporal bias or motion prior based on options.
+  std::vector<float> bias(num_features, 1.0f);
+
+  // Compute mask index for each feature.
+  std::vector<int> mask_indices;
+
+  if (options_.estimation_policy() ==
+      MotionEstimationOptions::TEMPORAL_LONG_FEATURE_BIAS) {
+    BiasFromFeatures(*feature_list, MODEL_LINEAR_SIMILARITY, model_options,
+                     &bias);
+  } else if (inlier_mask) {
+    std::vector<float> unused_bin_normalizer;
+    ComputeFeatureMask(*feature_list, &mask_indices, &unused_bin_normalizer);
+    inlier_mask->MotionPrior(*feature_list, &bias);
+  }
+
+  for (int rounds = 0; rounds < options.rounds(); ++rounds) {
+    // Pick two random vectors.
+    RegionFlowFeatureList to_test;
+    to_test.add_feature()->CopyFrom(
+        feature_list->feature(distribution(rand_gen)));
+    to_test.add_feature()->CopyFrom(
+        feature_list->feature(distribution(rand_gen)));
+    ResetRegionFlowFeatureIRLSWeights(1.0f, &to_test);
+    bool success = false;
+    LinearSimilarityModel similarity = LinearSimilarityL2SolveSystem<float>(
+        to_test, &matrix, &rhs, &solution, &success);
+    if (!success) {
+      continue;
+    }
+
+    float curr_sum = 0;
+    for (int i = 0; i < num_features; ++i) {
+      const Feature& feature = feature_list->feature(i);
+      const Vector2_f trans_location = LinearSimilarityAdapter::TransformPoint(
+          similarity, FeatureLocation(feature));
+      const Vector2_f diff = FeatureMatchLocation(feature) - trans_location;
+      curr_features[i] = static_cast<uint8>(diff.Norm2() < sq_cutoff);
+      if (curr_features[i]) {
+        float score = feature.irls_weight();
+        if (inlier_mask) {
+          const int bin_idx = mask_indices[i];
+          score *= (bias[i] + inlier_mask->GetInlierScore(bin_idx));
+        } else {
+          score *= bias[i];
+        }
+        curr_sum += score;
+      }
+    }
+
+    if (curr_sum > best_sum) {
+      best_sum = curr_sum;
+      best_features.swap(curr_features);
+      best_model->Swap(&similarity);
+    }
+  }
+
+  if (inlier_mask) {
+    inlier_mask->InitUpdateMask();
+  }
+
+  int num_inliers = 0;
+
+  std::vector<float> inlier_weights;
+
+  // Score outliers low.
+  for (int i = 0; i < num_features; ++i) {
+    RegionFlowFeature* feature = feature_list->mutable_feature(i);
+    if (best_features[i] == 0 && feature->irls_weight() != 0) {
+      feature->set_irls_weight(kOutlierIRLSWeight);
+    } else {
+      ++num_inliers;
+      inlier_weights.push_back(feature->irls_weight());
+      if (inlier_mask) {
+        const int bin_idx = mask_indices[i];
+        inlier_mask->RecordInlier(bin_idx, feature->irls_weight());
+      }
+    }
+  }
+
+  if (!inlier_weights.empty()) {
+    // Ensure that all selected inlier features have at least median weight.
+    auto median = inlier_weights.begin() + inlier_weights.size() * 0.5f;
+    std::nth_element(inlier_weights.begin(), median, inlier_weights.end());
+
+    for (int i = 0; i < num_features; ++i) {
+      RegionFlowFeature* feature = feature_list->mutable_feature(i);
+      if (best_features[i] != 0) {
+        feature->set_irls_weight(std::max(*median, feature->irls_weight()));
+      }
+    }
+  }
+
+  // For stability purposes we don't need to be that strict here.
+  // Inflate number of actual inliers, as failing the initialization will most
+  // likely fail the actual estimation. That way it can recover.
+  num_inliers *= 2;
+  return IsStableSimilarity(*best_model, *feature_list, num_inliers);
+}
+
+void MotionEstimation::ComputeSimilarityInliers(
+    const RegionFlowFeatureList& feature_list, int* num_inliers,
+    int* num_strict_inliers) const {
+  CHECK(num_inliers);
+  CHECK(num_strict_inliers);
+
+  const auto& similarity_bounds = options_.stable_similarity_bounds();
+
+  // Compute IRLS weight threshold from inlier threshold expressed in pixel
+  // error. IRLS weight is normalized to 1 pixel error, so take reciprocal.
+  float threshold = std::max<float>(similarity_bounds.inlier_threshold(),
+                                    similarity_bounds.frac_inlier_threshold() *
+                                        hypot(frame_width_, frame_height_));
+  CHECK_GT(threshold, 0);
+
+  threshold = 1.0f / threshold;
+  float strict_threshold = similarity_bounds.strict_inlier_threshold();
+  CHECK_GT(strict_threshold, 0);
+  strict_threshold = 1.0f / strict_threshold;
+
+  if (!options_.irls_use_l0_norm()) {
+    threshold = std::sqrt(static_cast<double>(threshold));
+  }
+
+  *num_inliers = 0;
+  *num_strict_inliers = 0;
+  for (const auto& feature : feature_list.feature()) {
+    if (feature.irls_weight() >= threshold) {
+      ++*num_inliers;
+    }
+
+    if (feature.irls_weight() >= strict_threshold) {
+      ++*num_strict_inliers;
+    }
+  }
+}
+
+bool MotionEstimation::EstimateLinearSimilarityModelIRLS(
+    int irls_rounds, bool compute_stability,
+    RegionFlowFeatureList* flow_feature_list,
+    const PriorFeatureWeights* prior_weights,
+    CameraMotion* camera_motion) const {
+  if (prior_weights && !prior_weights->HasCorrectDimension(
+                           irls_rounds, flow_feature_list->feature_size())) {
+    LOG(ERROR) << "Prior weights incorrectly initialized, ignoring.";
+    prior_weights = nullptr;
+  }
+
+  // Just declaring does not actually allocate memory
+  Eigen::Matrix<float, 4, 4> matrix_f;
+  Eigen::Matrix<float, 4, 1> solution_f;
+  Eigen::Matrix<float, 4, 1> rhs_f;
+  Eigen::Matrix<double, 4, 4> matrix_d;
+  Eigen::Matrix<double, 4, 1> solution_d;
+  Eigen::Matrix<double, 4, 1> rhs_d;
+
+  if (options_.use_highest_accuracy_for_normal_equations()) {
+    matrix_d = Eigen::Matrix<double, 4, 4>::Zero();
+    solution_d = Eigen::Matrix<double, 4, 1>::Zero();
+    rhs_d = Eigen::Matrix<double, 4, 1>::Zero();
+  } else {
+    matrix_f = Eigen::Matrix<float, 4, 4>::Zero();
+    solution_f = Eigen::Matrix<float, 4, 1>::Zero();
+    rhs_f = Eigen::Matrix<float, 4, 1>::Zero();
+  }
+
+  LinearSimilarityModel* solved_model =
+      camera_motion->mutable_linear_similarity();
+
+  const float irls_residual_scale =
+      GetIRLSResidualScale(camera_motion->average_magnitude(),
+                           options_.irls_motion_magnitude_fraction());
+
+  const bool irls_use_l0_norm = options_.irls_use_l0_norm();
+
+  const std::vector<float>* irls_priors = nullptr;
+  const std::vector<float>* irls_alphas = nullptr;
+  if (prior_weights && prior_weights->HasNonZeroAlpha()) {
+    irls_priors = &prior_weights->priors;
+    irls_alphas = &prior_weights->alphas;
+  }
+
+  for (int i = 0; i < irls_rounds; ++i) {
+    bool success;
+    if (options_.use_highest_accuracy_for_normal_equations()) {
+      *solved_model = LinearSimilarityL2SolveSystem<double>(
+          *flow_feature_list, &matrix_d, &rhs_d, &solution_d, &success);
+    } else {
+      *solved_model = LinearSimilarityL2SolveSystem<float>(
+          *flow_feature_list, &matrix_f, &rhs_f, &solution_f, &success);
+    }
+
+    if (!success) {
+      VLOG(1) << "Linear similarity estimation failed.";
+      *camera_motion->mutable_linear_similarity() = LinearSimilarityModel();
+      camera_motion->set_flags(camera_motion->flags() |
+                               CameraMotion::FLAG_SINGULAR_ESTIMATION);
+      return false;
+    }
+
+    const float alpha = irls_alphas != nullptr ? (*irls_alphas)[i] : 0.0f;
+    const float one_minus_alpha = 1.0f - alpha;
+
+    const auto feature_start = flow_feature_list->mutable_feature()->begin();
+    for (auto feature = feature_start;
+         feature != flow_feature_list->mutable_feature()->end(); ++feature) {
+      if (feature->irls_weight() == 0.0f) {
+        continue;
+      }
+
+      const Vector2_f trans_location = LinearSimilarityAdapter::TransformPoint(
+          *solved_model, FeatureLocation(*feature));
+      const Vector2_f matched_location = FeatureMatchLocation(*feature);
+
+      // Express residual in frame coordinates.
+      const Vector2_f residual = LinearSimilarityAdapter::TransformPoint(
+          irls_transform_, trans_location - matched_location);
+      const float numerator =
+          alpha == 0.0f ? 1.0f
+                        : ((*irls_priors)[feature - feature_start] * alpha +
+                           one_minus_alpha);
+
+      if (irls_use_l0_norm) {
+        feature->set_irls_weight(
+            numerator / (residual.Norm() * irls_residual_scale + kIrlsEps));
+      } else {
+        feature->set_irls_weight(
+            numerator / (std::sqrt(static_cast<double>(residual.Norm() *
+                                                       irls_residual_scale)) +
+                         kIrlsEps));
+      }
+    }
+  }
+
+  // Undo pre_transform.
+  *solved_model = ModelCompose3(inv_normalization_transform_, *solved_model,
+                                normalization_transform_);
+
+  if (compute_stability) {
+    int num_inliers = 0;
+    int num_strict_inliers = 0;
+
+    if (flow_feature_list->feature_size() > 0) {
+      ComputeSimilarityInliers(*flow_feature_list, &num_inliers,
+                               &num_strict_inliers);
+
+      const float inv_num_feat = 1.0f / flow_feature_list->feature_size();
+      camera_motion->set_similarity_inlier_ratio(num_inliers * inv_num_feat);
+      camera_motion->set_similarity_strict_inlier_ratio(num_strict_inliers *
+                                                        inv_num_feat);
+    } else {
+      camera_motion->set_similarity_inlier_ratio(1.0f);
+      camera_motion->set_similarity_strict_inlier_ratio(1.0f);
+    }
+  }
+
+  return true;
+}
+
+bool MotionEstimation::EstimateAffineModelIRLS(
+    int irls_rounds, RegionFlowFeatureList* feature_list,
+    CameraMotion* camera_motion) const {
+  // Setup solution matrices in column major.
+  Eigen::Matrix<double, 6, 6> matrix = Eigen::Matrix<double, 6, 6>::Zero();
+  Eigen::Matrix<double, 6, 1> rhs = Eigen::Matrix<double, 6, 1>::Zero();
+
+  AffineModel* solved_model = camera_motion->mutable_affine();
+
+  // Multiple rounds of weighting based L2 optimization.
+  for (int i = 0; i < irls_rounds; ++i) {
+    // Build Jacobians.
+    for (const auto& feature : feature_list->feature()) {
+      const double w = feature.irls_weight();
+      const Vector2_f& pt_1 = FeatureLocation(feature);
+      const double x = pt_1.x() * w;
+      const double y = pt_1.y() * w;
+
+      Eigen::Matrix<double, 2, 6> jacobian =
+          Eigen::Matrix<double, 2, 6>::Zero();
+      jacobian(0, 0) = w;
+      jacobian(0, 2) = x;
+      jacobian(0, 3) = y;
+      jacobian(1, 1) = w;
+      jacobian(1, 4) = x;
+      jacobian(1, 5) = y;
+
+      // Update A.
+      // Compute J^t * J, where J = jacobian.
+      matrix = jacobian.transpose() * jacobian + matrix;
+
+      // Transform matched point.
+      const Vector2_f& pt_2 = FeatureMatchLocation(feature);
+      Eigen::Matrix<double, 2, 1> pt_2_mat(pt_2.x() * w, pt_2.y() * w);
+
+      // Compute J^t * y_i;
+      rhs = jacobian.transpose() * pt_2_mat + rhs;
+    }
+
+    // Solve A * p = b;
+    Eigen::Matrix<double, 6, 1> p = Eigen::Matrix<double, 6, 1>::Zero();
+    p = matrix.colPivHouseholderQr().solve(rhs);
+    if (!(matrix * p).isApprox(rhs, kPrecision)) {
+      camera_motion->set_flags(camera_motion->flags() |
+                               CameraMotion::FLAG_SINGULAR_ESTIMATION);
+      return false;
+    }
+
+    // Set model.
+    solved_model->set_dx(p(0, 0));
+    solved_model->set_dy(p(1, 0));
+    solved_model->set_a(p(2, 0));
+    solved_model->set_b(p(3, 0));
+    solved_model->set_c(p(4, 0));
+    solved_model->set_d(p(5, 0));
+
+    // Re-compute weights from errors.
+    for (auto& feature : *feature_list->mutable_feature()) {
+      if (feature.irls_weight() == 0.0f) {
+        continue;
+      }
+
+      const Vector2_f trans_location = AffineAdapter::TransformPoint(
+          *solved_model, FeatureLocation(feature));
+      const Vector2_f& matched_location = FeatureMatchLocation(feature);
+
+      // Express residual in frame coordinates.
+      const Vector2_f residual = LinearSimilarityAdapter::TransformPoint(
+          irls_transform_, trans_location - matched_location);
+
+      feature.set_irls_weight(sqrt(1.0 / (residual.Norm() + kIrlsEps)));
+    }
+  }
+
+  // Express in original frame coordinate system.
+  *solved_model = ModelCompose3(
+      LinearSimilarityAdapter::ToAffine(inv_normalization_transform_),
+      *solved_model,
+      LinearSimilarityAdapter::ToAffine(normalization_transform_));
+  return true;
+}
+
+// Estimates homography via least squares (specifically QR decomposition).
+// Specifically, for
+// H = (a  b  t1)
+//      c  d  t2
+//      w1 w2  1
+// H * (x, y, 1)^T = (a*x + b*y + t1
+//                    c*x + d*y + t2
+//                    w1*x + w2*y + 1 )
+//
+// Therefore matching point (mx, my) is given by
+// mx = (a*x + b*y + t1) / (w1*x + w2*y + 1)     Eq. 1
+// my = (c*x + d*y + t2) / (w1*x + w2*y + 1)
+//
+// Multiply with denominator to get:
+// a*x + b*y + t1 -w1*x*mx -w2*y*mx = mx         Eq. 2
+// c*x + d*y + t2 -w1*x*my -w2*y*my = my
+//
+// This results in the following system
+// J = (x  y  1  0  0  0  -x*mx  -y*mx) * (a b t1 c d t2 w1 w2)^T  = (mx
+//      0  0  0  x  y  1  -x*my  -y*my                                my)
+//
+// Note, that in a linear system Eq. 1 and Eq. 2 are not equivalent,
+// as the denominator varies w.r.t. the position of each feature.
+
+// Therefore, if the optional argument prev_solution is passed,
+// each J will be scaled with the denominator w1*x + w2*y + 1.
+//
+// If perspective_regularizer := r != 0, an additional equation is introduced:
+// (0 0 0 0 0 0 r r) * (a b t1 c d t2 w1 w2)^T = 0.
+//
+// Returns false if system could not be solved for.
+template <class T>
+bool HomographyL2QRSolve(
+    const RegionFlowFeatureList& feature_list,
+    const Homography* prev_solution,  // optional.
+    float perspective_regularizer,
+    Eigen::Matrix<T, Eigen::Dynamic, 8>* matrix,  // tmp matrix
+    Eigen::Matrix<T, 8, 1>* solution) {
+  CHECK(matrix);
+  CHECK(solution);
+  CHECK_EQ(8, matrix->cols());
+  const int num_rows =
+      2 * feature_list.feature_size() + (perspective_regularizer == 0 ? 0 : 1);
+  CHECK_EQ(num_rows, matrix->rows());
+  CHECK_EQ(1, solution->cols());
+  CHECK_EQ(8, solution->rows());
+
+  // Compute homography from features (H * location = prev_location).
+  *matrix = Eigen::Matrix<T, Eigen::Dynamic, 8>::Zero(matrix->rows(), 8);
+  Eigen::Matrix<T, Eigen::Dynamic, 1> rhs =
+      Eigen::Matrix<T, Eigen::Dynamic, 1>::Zero(matrix->rows(), 1);
+
+  if (RegionFlowFeatureIRLSSum(feature_list) > kMaxCondition) {
+    return false;
+  }
+
+  // Create matrix and rhs (using h_33 = 1 constraint).
+  int feature_idx = 0;
+  for (auto feature = feature_list.feature().begin();
+       feature != feature_list.feature().end(); ++feature, ++feature_idx) {
+    int feature_row = 2 * feature_idx;
+
+    Vector2_f pt = FeatureLocation(*feature);
+    Vector2_f prev_pt = FeatureMatchLocation(*feature);
+    // Weight per feature.
+    double scale = 1.0;
+    if (prev_solution) {
+      const double denom =
+          prev_solution->h_20() * pt.x() + prev_solution->h_21() * pt.y() + 1.0;
+      if (fabs(denom) > 1e-5) {
+        scale /= denom;
+      } else {
+        scale = 0;
+      }
+    }
+
+    const float w = feature->irls_weight() * scale;
+
+    // Scale feature with weight;
+    Vector2_f pt_w = pt * w;
+
+    // Row 1 of above J:
+    (*matrix)(feature_row, 0) = pt_w.x();
+    (*matrix)(feature_row, 1) = pt_w.y();
+    (*matrix)(feature_row, 2) = w;
+
+    // Entry 3 .. 5 equal zero.
+    (*matrix)(feature_row, 6) = -pt_w.x() * prev_pt.x();
+    (*matrix)(feature_row, 7) = -pt_w.y() * prev_pt.x();
+    rhs(feature_row, 0) = prev_pt.x() * w;
+
+    // Row 2 of above J:
+    // Entry 0 .. 2 equal zero.
+    (*matrix)(feature_row + 1, 3) = pt_w.x();
+    (*matrix)(feature_row + 1, 4) = pt_w.y();
+    (*matrix)(feature_row + 1, 5) = w;
+
+    (*matrix)(feature_row + 1, 6) = -pt_w.x() * prev_pt.y();
+    (*matrix)(feature_row + 1, 7) = -pt_w.y() * prev_pt.y();
+    rhs(feature_row + 1, 0) = prev_pt.y() * w;
+  }
+
+  if (perspective_regularizer > 0) {
+    int last_row_idx = 2 * feature_list.feature_size();
+    (*matrix)(last_row_idx, 6) = (*matrix)(last_row_idx, 7) =
+        perspective_regularizer;
+  }
+
+  // TODO: Consider a faster function?
+  *solution = matrix->colPivHouseholderQr().solve(rhs);
+  return ((*matrix) * (*solution)).isApprox(rhs, kPrecision);
+}
+
+// Same as function above, but solves for homography via normal equations,
+// using only the positions specified by features from the feature list.
+// Expects 8x8 matrix of type T and 8x1 rhs and solution vector of type T.
+// Optional parameter is prev_solution, in which case each row is scaled by
+// correct denominator (see derivation at function description
+// HomographyL2QRSolve).
+// Template class T specifies the desired accuracy, use float or double.
+template <class T>
+Homography HomographyL2NormalEquationSolve(
+    const RegionFlowFeatureList& feature_list,
+    const Homography* prev_solution,  // optional.
+    float perspective_regularizer, Eigen::Matrix<T, 8, 8>* matrix,
+    Eigen::Matrix<T, 8, 1>* rhs, Eigen::Matrix<T, 8, 1>* solution,
+    bool* success) {
+  CHECK(matrix != nullptr);
+  CHECK(rhs != nullptr);
+  CHECK(solution != nullptr);
+
+  *matrix = Eigen::Matrix<T, 8, 8>::Zero();
+  *rhs = Eigen::Matrix<T, 8, 1>::Zero();
+
+  // Matrix multiplications are hand-coded for speed improvements vs.
+  // opencv's cvGEMM calls.
+  for (const auto& feature : feature_list.feature()) {
+    T scale = 1.0;
+    if (prev_solution) {
+      const T denom = prev_solution->h_20() * feature.x() +
+                      prev_solution->h_21() * feature.y() + 1.0;
+      if (fabs(denom) > 1e-5) {
+        scale /= denom;
+      } else {
+        scale = 0;
+      }
+    }
+    const T w = feature.irls_weight() * scale;
+    const T x = feature.x();
+    const T y = feature.y();
+    const T xw = x * w;
+    const T yw = y * w;
+    const T xxw = x * x * w;
+    const T yyw = y * y * w;
+    const T xyw = x * y * w;
+    const T mx = feature.x() + feature.dx();
+    const T my = feature.y() + feature.dy();
+
+    const T mxxyy = mx * mx + my * my;
+    // Jacobian
+    // double J[2 * 8] = {x, y, 1,  0,  0,   0, -x * m_x, -y * m_x,
+    //                   {0, 0, 0,  x,  y,   1, -x * m_y, -y * m_y}
+    //
+    // // Compute J^t * J * w =
+    // ( xx        xy    x      0       0    0    -xx*mx  -xy*mx    )
+    // ( xy        yy    y      0       0    0    -xy*mx  -yy*mx    )
+    // ( x         y     1      0       0    0     -x*mx   -y*mx    )
+    // ( 0         0     0     xx      xy    x    -xx*my  -xy*my    )
+    // ( 0         0     0     xy      yy    y    -xy*my  -yy*my    )
+    // ( 0         0     0      x      y     1     -x*my   -y*my    )
+    // ( -xx*mx -xy*mx -x*mx -xx*my -xy*my -x*my xx*mxxyy  xy*mxxyy )
+    // ( -xy*mx -yy*mx -y*mx -xy*my -yy*my -y*my xy*mxxyy  yy*mxxyy  ) * w
+
+    // 1st row:  xx        xy    x      0       0    0    -xx*mx  -xy*mx
+    T* matrix_ptr = matrix->data();
+    matrix_ptr[0] += xxw;
+    matrix_ptr[1] += xyw;
+    matrix_ptr[2] += xw;
+    matrix_ptr[6] += -xxw * mx;
+    matrix_ptr[7] += -xyw * mx;
+
+    // 2nd row:  xy       yy   y      0       0    0    -xy*mx  -yy*mx
+    matrix_ptr += 8;
+    matrix_ptr[0] += xyw;
+    matrix_ptr[1] += yyw;
+    matrix_ptr[2] += yw;
+    matrix_ptr[6] += -xyw * mx;
+    matrix_ptr[7] += -yyw * mx;
+
+    // 3rd row: x         y     1      0       0    0     -x*mx   -y*mx
+    matrix_ptr += 8;
+    matrix_ptr[0] += xw;
+    matrix_ptr[1] += yw;
+    matrix_ptr[2] += w;
+    matrix_ptr[6] += -xw * mx;
+    matrix_ptr[7] += -yw * mx;
+
+    // 4th row: 0         0     0     xx      xy    x    -xx*my  -xy*my
+    matrix_ptr += 8;
+    matrix_ptr[3] += xxw;
+    matrix_ptr[4] += xyw;
+    matrix_ptr[5] += xw;
+    matrix_ptr[6] += -xxw * my;
+    matrix_ptr[7] += -xyw * my;
+
+    // 5th row: 0         0     0     xy      yy    y    -xy*my  -yy*my
+    matrix_ptr += 8;
+    matrix_ptr[3] += xyw;
+    matrix_ptr[4] += yyw;
+    matrix_ptr[5] += yw;
+    matrix_ptr[6] += -xyw * my;
+    matrix_ptr[7] += -yyw * my;
+
+    // 6th row:  0         0     0     x     y     1      -x*my    -y*my
+    matrix_ptr += 8;
+    matrix_ptr[3] += xw;
+    matrix_ptr[4] += yw;
+    matrix_ptr[5] += w;
+    matrix_ptr[6] += -xw * my;
+    matrix_ptr[7] += -yw * my;
+
+    // 7th row:  -xx*mx -xy*mx -x*mx -xx*my -xy*my -x*my xx*mxxyy  xy*mxxyy
+    matrix_ptr += 8;
+    matrix_ptr[0] += -xxw * mx;
+    matrix_ptr[1] += -xyw * mx;
+    matrix_ptr[2] += -xw * mx;
+    matrix_ptr[3] += -xxw * my;
+    matrix_ptr[4] += -xyw * my;
+    matrix_ptr[5] += -xw * my;
+    matrix_ptr[6] += xxw * mxxyy;
+    matrix_ptr[7] += xyw * mxxyy;
+
+    // 8th row: -xy*mx -yy*mx -y*mx -xy*my -yy*my -y*my xy*mxxyy  yy*mxxyy
+    matrix_ptr += 8;
+    matrix_ptr[0] += -xyw * mx;
+    matrix_ptr[1] += -yyw * mx;
+    matrix_ptr[2] += -yw * mx;
+    matrix_ptr[3] += -xyw * my;
+    matrix_ptr[4] += -yyw * my;
+    matrix_ptr[5] += -yw * my;
+    matrix_ptr[6] += xyw * mxxyy;
+    matrix_ptr[7] += yyw * mxxyy;
+
+    // Right hand side:
+    // b = ( x
+    //       y )
+    // Compute J^t * b  * w =
+    // ( x*mx  y*mx  mx  x*my  y*my  my  -x*mxxyy -y*mxxyy ) * w
+    T* rhs_ptr = rhs->data();
+    rhs_ptr[0] += xw * mx;
+    rhs_ptr[1] += yw * mx;
+    rhs_ptr[2] += mx * w;
+    rhs_ptr[3] += xw * my;
+    rhs_ptr[4] += yw * my;
+    rhs_ptr[5] += my * w;
+    rhs_ptr[6] += -xw * mxxyy;
+    rhs_ptr[7] += -yw * mxxyy;
+  }
+
+  if (perspective_regularizer > 0) {
+    // Additional constraint:
+    // C[8] = {0, 0, 0,  0,  0,  0, r, r}
+    // Compute C^t * C =
+    // [ 0  ...       0   0  0
+    //      ...
+    //   0  ...       0   r^2  r^2
+    //   0  ...       0   r^2  r^2 ]
+    const T sq_r = perspective_regularizer * perspective_regularizer;
+
+    T* matrix_ptr = matrix->row(6).data();
+    matrix_ptr[6] += sq_r;
+    matrix_ptr[7] += sq_r;
+    matrix_ptr += 8;
+    matrix_ptr[6] += sq_r;
+    matrix_ptr[7] += sq_r;
+    // Nothing to add to RHS (zero).
+  }
+
+  // Solution parameters p.
+  *solution = matrix->colPivHouseholderQr().solve(*rhs);
+  if (((*matrix) * (*solution)).isApprox(*rhs, kPrecision)) {
+    const T* ptr = solution->data();
+    Homography model;
+
+    model.set_h_00(ptr[0]);
+    model.set_h_01(ptr[1]);
+    model.set_h_02(ptr[2]);
+    model.set_h_10(ptr[3]);
+    model.set_h_11(ptr[4]);
+    model.set_h_12(ptr[5]);
+    model.set_h_20(ptr[6]);
+    model.set_h_21(ptr[7]);
+
+    if (success) {
+      *success = true;
+    }
+    return model;
+  }
+
+  if (success) {
+    *success = false;
+  }
+  return Homography();
+}
+
+namespace {
+
+float PatchDescriptorIRLSWeight(const RegionFlowFeature& feature) {
+  float weight = feature.irls_weight();
+
+  // Blend weight to combine irls weight with a feature's path standard
+  // deviation.
+  const float alpha = 0.7f;
+  // Inverse of maximum value of standard deviation for intensities in [0, 255].
+  // Scaled such that only low textured regions are given small weight.
+  const float denom = 1.0f / 128.0f * 5.0f;
+
+  const float feature_stdev_l1 =
+      PatchDescriptorColorStdevL1(feature.feature_descriptor());
+
+  if (feature_stdev_l1 >= 0.0f) {
+    weight *= alpha + (1.f - alpha) * std::min(1.f, feature_stdev_l1 * denom);
+  }
+
+  return weight;
+}
+
+// Extension of above function to evenly spaced row-mixture models.
+bool MixtureHomographyL2DLTSolve(
+    const RegionFlowFeatureList& feature_list, int num_models,
+    const MixtureRowWeights& row_weights, float regularizer_lambda,
+    Eigen::MatrixXf* matrix,  // least squares matrix
+    Eigen::MatrixXf* solution) {
+  CHECK(matrix);
+  CHECK(solution);
+
+  // cv::solve can hang for really bad conditioned systems.
+  const double feature_irls_sum = RegionFlowFeatureIRLSSum(feature_list);
+  if (feature_irls_sum > kMaxCondition) {
+    return false;
+  }
+
+  const int num_dof = 8 * num_models;
+  const int num_constraints = num_dof - 8;
+
+  CHECK_EQ(matrix->cols(), num_dof);
+  // 2 Rows (x,y) per feature.
+  CHECK_EQ(matrix->rows(), 2 * feature_list.feature_size() + num_constraints);
+  CHECK_EQ(solution->cols(), 1);
+  CHECK_EQ(solution->rows(), num_dof);
+
+  // Compute homography from features. (H * location = prev_location)
+  *matrix = Eigen::MatrixXf::Zero(matrix->rows(), matrix->cols());
+  Eigen::Matrix<float, Eigen::Dynamic, 1> rhs =
+      Eigen::MatrixXf::Zero(matrix->rows(), 1);
+
+  // Normalize feature sum to 1.
+  float irls_denom = 1.0 / (feature_irls_sum + 1e-6);
+
+  // Create matrix for DLT.
+  int feature_idx = 0;
+  for (auto feature = feature_list.feature().begin();
+       feature != feature_list.feature().end(); ++feature, ++feature_idx) {
+    float* mat_row_1 = matrix->row(2 * feature_idx).data();
+    float* mat_row_2 = matrix->row(2 * feature_idx + 1).data();
+    float* rhs_row_1 = rhs.row(2 * feature_idx).data();
+    float* rhs_row_2 = rhs.row(2 * feature_idx + 1).data();
+
+    Vector2_f pt = FeatureLocation(*feature);
+    Vector2_f prev_pt = FeatureMatchLocation(*feature);
+    // Weight per feature.
+    const float f_w = PatchDescriptorIRLSWeight(*feature) * irls_denom;
+
+    // Scale feature point by weight;
+    Vector2_f pt_w = pt * f_w;
+    const float* mix_weights = row_weights.RowWeightsClamped(feature->y());
+
+    for (int m = 0; m < num_models; ++m, mat_row_1 += 8, mat_row_2 += 8) {
+      const float w = mix_weights[m];
+      // Entries 0 .. 2 are zero.
+      mat_row_1[3] = -pt_w.x() * w;
+      mat_row_1[4] = -pt_w.y() * w;
+      mat_row_1[5] = -f_w * w;
+
+      mat_row_1[6] = pt_w.x() * prev_pt.y() * w;
+      mat_row_1[7] = pt_w.y() * prev_pt.y() * w;
+
+      mat_row_2[0] = pt_w.x() * w;
+      mat_row_2[1] = pt_w.y() * w;
+      mat_row_2[2] = f_w * w;
+
+      // Entries 3 .. 5 are zero.
+      mat_row_2[6] = -pt_w.x() * prev_pt.x() * w;
+      mat_row_2[7] = -pt_w.y() * prev_pt.x() * w;
+    }
+
+    // Weights sum to one (-> take out of loop).
+    rhs_row_1[0] = -prev_pt.y() * f_w;
+    rhs_row_2[0] = prev_pt.x() * f_w;
+  }
+
+  // Add regularizer term. It is important to weight perspective larger
+  // to roughly obtain similar magnitudes across parameters.
+  const float param_weights[8] = {1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 100.f, 100.f};
+
+  const int reg_row_start = 2 * feature_list.feature_size();
+  for (int m = 0; m < num_models - 1; ++m) {
+    for (int p = 0; p < 8; ++p) {
+      const int curr_idx = m * 8 + p;
+      float* curr_row = matrix->row(reg_row_start + curr_idx).data();
+      curr_row[curr_idx] = regularizer_lambda * param_weights[p];
+      curr_row[curr_idx + 8] = -regularizer_lambda * param_weights[p];
+    }
+  }
+
+  // TODO: Consider a faster function?
+  *solution = matrix->colPivHouseholderQr().solve(rhs);
+  return ((*matrix) * (*solution)).isApprox(rhs, kPrecision);
+}
+
+// Constraint mixture homography model.
+// Only translation (2 DOF) varies across mixture of size num_models, with
+// strictly affine and perspective part (4 + 2 = 6 DOF) being constant across
+// the mixtures.
+bool TransMixtureHomographyL2DLTSolve(
+    const RegionFlowFeatureList& feature_list, int num_models,
+    const MixtureRowWeights& row_weights, float regularizer_lambda,
+    Eigen::MatrixXf* matrix,  // least squares matrix
+    Eigen::MatrixXf* solution) {
+  CHECK(matrix);
+  CHECK(solution);
+
+  // cv::solve can hang for really bad conditioned systems.
+  const double feature_irls_sum = RegionFlowFeatureIRLSSum(feature_list);
+  if (feature_irls_sum > kMaxCondition) {
+    return false;
+  }
+
+  const int num_dof = 6 + 2 * num_models;
+  const int num_constraints = 2 * (num_models - 1);
+
+  CHECK_EQ(matrix->cols(), num_dof);
+  // 2 Rows (x,y) per feature.
+  CHECK_EQ(matrix->rows(), 2 * feature_list.feature_size() + num_constraints);
+  CHECK_EQ(solution->cols(), 1);
+  CHECK_EQ(solution->rows(), num_dof);
+
+  // Compute homography from features. (H * location = prev_location)
+  *matrix = Eigen::MatrixXf::Zero(matrix->rows(), matrix->cols());
+  Eigen::Matrix<float, Eigen::Dynamic, 1> rhs =
+      Eigen::MatrixXf::Zero(matrix->rows(), 1);
+
+  // Create matrix for DLT.
+  int feature_idx = 0;
+
+  // Normalize feature sum to 1.
+  float irls_denom = 1.0 / (feature_irls_sum + 1e-6);
+
+  for (auto feature = feature_list.feature().begin();
+       feature != feature_list.feature().end(); ++feature, ++feature_idx) {
+    float* mat_row_1 = matrix->row(2 * feature_idx).data();
+    float* mat_row_2 = matrix->row(2 * feature_idx + 1).data();
+    float* rhs_row_1 = rhs.row(2 * feature_idx).data();
+    float* rhs_row_2 = rhs.row(2 * feature_idx + 1).data();
+
+    Vector2_f pt = FeatureLocation(*feature);
+    Vector2_f prev_pt = FeatureMatchLocation(*feature);
+
+    // Weight per feature.
+    const float f_w = PatchDescriptorIRLSWeight(*feature) * irls_denom;
+
+    // Scale feature point by weight.
+    Vector2_f pt_w = pt * f_w;
+    const float* mix_weights = row_weights.RowWeightsClamped(feature->y());
+
+    // Entries 0 .. 1 are zero.
+    mat_row_1[2] = -pt_w.x();
+    mat_row_1[3] = -pt_w.y();
+
+    mat_row_1[4] = pt_w.x() * prev_pt.y();
+    mat_row_1[5] = pt_w.y() * prev_pt.y();
+
+    mat_row_2[0] = pt_w.x();
+    mat_row_2[1] = pt_w.y();
+
+    // Entries 2 .. 3 are zero.
+    mat_row_2[4] = -pt_w.x() * prev_pt.x();
+    mat_row_2[5] = -pt_w.y() * prev_pt.x();
+
+    // Weights sum to one (-> take out of loop).
+    rhs_row_1[0] = -prev_pt.y() * f_w;
+    rhs_row_2[0] = prev_pt.x() * f_w;
+
+    for (int m = 0; m < num_models; ++m, mat_row_1 += 2, mat_row_2 += 2) {
+      const float w = mix_weights[m];
+      mat_row_1[6] = 0;
+      mat_row_1[7] = -f_w * w;
+
+      mat_row_2[6] = f_w * w;
+      mat_row_2[7] = 0;
+    }
+  }
+
+  const int reg_row_start = 2 * feature_list.feature_size();
+  int constraint_idx = 0;
+  for (int m = 0; m < num_models - 1; ++m) {
+    for (int p = 0; p < 2; ++p, ++constraint_idx) {
+      const int curr_idx = 6 + m * 2 + p;
+      float* curr_row = matrix->row(reg_row_start + constraint_idx).data();
+      curr_row[curr_idx] = regularizer_lambda;
+      curr_row[curr_idx + 2] = -regularizer_lambda;
+    }
+  }
+
+  // TODO: Consider a faster function
+  *solution = matrix->colPivHouseholderQr().solve(rhs);
+  return ((*matrix) * (*solution)).isApprox(rhs, kPrecision);
+}
+
+// Constraint mixture homography model.
+// Only translation and skew and rotation (2 + 2 = 4 DOF) vary across mixture
+// of size num_models, with scale and perspective part (2 + 2 = 4 DOF) being
+// constant across the mixtures.
+bool SkewRotMixtureHomographyL2DLTSolve(
+    const RegionFlowFeatureList& feature_list, int num_models,
+    const MixtureRowWeights& row_weights, float regularizer_lambda,
+    Eigen::MatrixXf* matrix,  // least squares matrix
+    Eigen::MatrixXf* solution) {
+  CHECK(matrix);
+  CHECK(solution);
+
+  // cv::solve can hang for really bad conditioned systems.
+  const double feature_irls_sum = RegionFlowFeatureIRLSSum(feature_list);
+  if (feature_irls_sum > kMaxCondition) {
+    return false;
+  }
+
+  const int num_dof = 4 + 4 * num_models;
+  const int num_constraints = 4 * (num_models - 1);
+
+  CHECK_EQ(matrix->cols(), num_dof);
+  // 2 Rows (x,y) per feature.
+  CHECK_EQ(matrix->rows(), 2 * feature_list.feature_size() + num_constraints);
+  CHECK_EQ(solution->cols(), 1);
+  CHECK_EQ(solution->rows(), num_dof);
+
+  // Compute homography from features. (H * location = prev_location)
+  *matrix = Eigen::MatrixXf::Zero(matrix->rows(), matrix->cols());
+  Eigen::Matrix<float, Eigen::Dynamic, 1> rhs =
+      Eigen::MatrixXf::Zero(matrix->rows(), 1);
+
+  // Create matrix for DLT.
+  int feature_idx = 0;
+
+  // Normalize feature sum to 1.
+  float irls_denom = 1.0 / (feature_irls_sum + 1e-6);
+
+  for (auto feature = feature_list.feature().begin();
+       feature != feature_list.feature().end(); ++feature, ++feature_idx) {
+    Vector2_f pt = FeatureLocation(*feature);
+    Vector2_f prev_pt = FeatureMatchLocation(*feature);
+
+    // Weight per feature.
+    const float f_w = PatchDescriptorIRLSWeight(*feature) * irls_denom;
+
+    // Scale feature point by weight.
+    Vector2_f pt_w = pt * f_w;
+    const float* mix_weights = row_weights.RowWeightsClamped(feature->y());
+
+    // Compare to MixtureHomographyDLTSolve.
+    // Mapping of parameters (from homography to mixture) is as follows:
+    //       0 1 2 3 4 5 6 7
+    //  -->  0 4 6 5 1 7 2 3
+
+    int feature_row = 2 * feature_idx;
+    // Entry 0 is zero.
+    // Skew is in mixture.
+    (*matrix)(feature_row, 1) = -pt_w.y();
+    (*matrix)(feature_row, 2) = pt_w.x() * prev_pt.y();
+    (*matrix)(feature_row, 3) = pt_w.y() * prev_pt.y();
+
+    (*matrix)(feature_row + 1, 0) = pt_w.x();
+    // Entry 1 is zero.
+    (*matrix)(feature_row + 1, 2) = -pt_w.x() * prev_pt.x();
+    (*matrix)(feature_row + 1, 3) = -pt_w.y() * prev_pt.x();
+
+    // Weights sum to one (-> take out of loop).
+    rhs(feature_row, 0) = -prev_pt.y() * f_w;
+    rhs(feature_row + 1, 0) = prev_pt.x() * f_w;
+
+    // Is this right?
+    for (int m = 0; m < num_models; ++m) {
+      const float w = mix_weights[m];
+      (*matrix)(feature_row, 4 + 4 * m) = 0.0f;
+      (*matrix)(feature_row, 5 + 4 * m) = -pt_w.x() * w;  // Skew.
+      (*matrix)(feature_row, 6 + 4 * m) = 0.0f;
+      (*matrix)(feature_row, 7 + 4 * m) = -f_w * w;
+
+      (*matrix)(feature_row + 1, 4 + 4 * m) = pt_w.y() * w;
+      (*matrix)(feature_row + 1, 5 + 4 * m) = 0.0f;  // Translation.
+      (*matrix)(feature_row + 1, 6 + 4 * m) = f_w * w;
+      (*matrix)(feature_row + 1, 7 + 4 * m) = 0.0f;
+    }
+  }
+
+  const int reg_row_start = 2 * feature_list.feature_size();
+  int constraint_idx = 0;
+  for (int m = 0; m < num_models - 1; ++m) {
+    for (int p = 0; p < 4; ++p, ++constraint_idx) {
+      const int curr_idx = 4 + m * 4 + p;
+      int curr_row = reg_row_start + constraint_idx;
+      (*matrix)(curr_row, curr_idx) = regularizer_lambda;
+      (*matrix)(curr_row, curr_idx + 4) = -regularizer_lambda;
+    }
+  }
+
+  // TODO: Consider a faster function?
+  *solution = matrix->colPivHouseholderQr().solve(rhs);
+  return ((*matrix) * (*solution)).isApprox(rhs, kPrecision);
+}
+
+}  // namespace.
+
+// For plot example for IRLS_WEIGHT_PERIMITER_GAUSSIAN, see: goo.gl/fNzQc
+// (plot assumes 3:2 ratio for width:height).
+void MotionEstimation::GetHomographyIRLSCenterWeights(
+    const RegionFlowFeatureList& feature_list,
+    std::vector<float>* weights) const {
+  CHECK(weights != nullptr);
+
+  const int num_features = feature_list.feature_size();
+  weights->clear();
+  weights->reserve(num_features);
+
+  // Early return for constant weight.
+  if (options_.homography_irls_weight_initialization() ==
+      MotionEstimationOptions::IRLS_WEIGHT_CONSTANT_ONE) {
+    weights->resize(num_features, 1.0f);
+    return;
+  }
+
+  const float sigma_x = normalized_domain_.x() * 0.3f;
+  const float sigma_y = normalized_domain_.y() * 0.3f;
+  const float denom_x = 1.0f / (sigma_x * sigma_x);
+  const float denom_y = 1.0f / (sigma_y * sigma_y);
+  const Vector2_f center = normalized_domain_ * 0.5f;
+
+  for (const auto& feature : feature_list.feature()) {
+    const float diff_x = feature.x() - center.x();
+    const float diff_y = feature.y() - center.y();
+    const float dist = diff_x * diff_x * denom_x + diff_y * diff_y * denom_y;
+    const float weight = std::exp(static_cast<double>(-0.5f * dist));
+
+    switch (options_.homography_irls_weight_initialization()) {
+      case MotionEstimationOptions::IRLS_WEIGHT_CENTER_GAUSSIAN:
+        weights->push_back(weight);
+        break;
+      case MotionEstimationOptions::IRLS_WEIGHT_PERIMETER_GAUSSIAN:
+        weights->push_back(1.0f - weight * 0.5f);
+        break;
+      default:
+        LOG(INFO) << "Unsupported IRLS weighting.";
+    }
+  }
+}
+
+bool MotionEstimation::IsStableTranslation(
+    const TranslationModel& translation, float translation_variance,
+    const RegionFlowFeatureList& features) const {
+  if (options_.deactivate_stable_motion_estimation()) {
+    return true;
+  }
+
+  const bool sufficient_features =
+      features.feature_size() >=
+      options_.stable_translation_bounds().min_features();
+
+  if (!sufficient_features) {
+    VLOG(1) << "Translation unstable, insufficient features.";
+    return false;
+  }
+
+  const float translation_magnitude =
+      std::hypot(translation.dx(), translation.dy());
+  const float max_translation_magnitude =
+      options_.stable_translation_bounds().frac_max_motion_magnitude() *
+      hypot(frame_width_, frame_height_);
+
+  const float stdev = std::sqrt(static_cast<double>(translation_variance)) /
+                      hypot(frame_width_, frame_height_);
+
+  const float max_motion_stdev_threshold =
+      options_.stable_translation_bounds().max_motion_stdev_threshold();
+
+  // Only test for exceeding max translation magnitude if standard deviation
+  // of translation is not close to zero (in which case registration
+  // can be considered stable).
+  if (translation_magnitude >= max_translation_magnitude &&
+      stdev >= max_motion_stdev_threshold) {
+    VLOG(1) << "Translation unstable, exceeds max translation: "
+            << translation_magnitude << " stdev: " << stdev;
+    return false;
+  }
+
+  if (stdev >= options_.stable_translation_bounds().max_motion_stdev()) {
+    VLOG(1) << "Translation unstable, translation variance out of bound: "
+            << stdev;
+    return false;
+  }
+
+  return true;
+}
+
+void MotionEstimation::CheckTranslationAcceleration(
+    std::vector<CameraMotion>* camera_motions) const {
+  CHECK(camera_motions != nullptr);
+  std::vector<float> magnitudes;
+  for (const auto& motion : *camera_motions) {
+    const float translation_magnitude =
+        LinearSimilarityAdapter::TransformPoint(
+            normalization_transform_,
+            Vector2_f(motion.translation().dx(), motion.translation().dy()))
+            .Norm();
+
+    magnitudes.push_back(translation_magnitude);
+  }
+
+  // Determine median motion around each frame
+  // (really third lower percentile here).
+  const int median_radius = 6;
+  const int num_magnitudes = magnitudes.size();
+  std::vector<float> median_magnitudes;
+  const float kZeroMotion = 3e-4f;  // 0.5 pixels @ 720p.
+  for (int k = 0; k < num_magnitudes; ++k) {
+    std::vector<float> filter;
+    const auto mag_begin =
+        magnitudes.begin() + std::max(0, (k - median_radius));
+    const auto mag_end =
+        magnitudes.begin() + std::min(num_magnitudes, k + median_radius + 1);
+
+    for (auto mag = mag_begin; mag != mag_end; ++mag) {
+      // Ignore zero motion (empty or duplicate frames).
+      if (*mag > kZeroMotion) {
+        filter.push_back(*mag);
+      }
+    }
+
+    const float kMinMotion = 1e-3f;  // 1.5 pixels @ 720p.
+    if (filter.empty()) {
+      median_magnitudes.push_back(kMinMotion);
+    } else {
+      auto median_iter = filter.begin() + filter.size() / 3;
+      std::nth_element(filter.begin(), median_iter, filter.end());
+      median_magnitudes.push_back(std::max(kMinMotion, *median_iter));
+    }
+  }
+
+  const float max_acceleration =
+      options_.stable_translation_bounds().max_acceleration();
+  for (int k = 0; k < magnitudes.size(); ++k) {
+    // Relative test, test for acceleration and de-acceleration (only
+    // in case motion is not zero, to ignore empty or duplicated frames).
+    if (magnitudes[k] > max_acceleration * median_magnitudes[k] ||
+        (magnitudes[k] > kZeroMotion &&
+         median_magnitudes[k] > max_acceleration * magnitudes[k])) {
+      MotionEstimation::ResetMotionModels(options_, &(*camera_motions)[k]);
+    }
+  }
+}
+
+bool MotionEstimation::IsStableSimilarity(
+    const LinearSimilarityModel& model,
+    const RegionFlowFeatureList& feature_list, int num_inliers) const {
+  if (options_.deactivate_stable_motion_estimation()) {
+    // Only check if model is invertible.
+    return IsInverseStable(model);
+  }
+
+  const auto& similarity_bounds = options_.stable_similarity_bounds();
+
+  if (similarity_bounds.only_stable_input() && feature_list.unstable()) {
+    VLOG(1) << "Feature list is unstable.";
+    return false;
+  }
+
+  const float lower_scale = similarity_bounds.lower_scale();
+  const float upper_scale = similarity_bounds.upper_scale();
+  if (model.a() < lower_scale || model.a() > upper_scale) {
+    VLOG(1) << "Warning: Unstable similarity found. "
+            << "Scale is out of bound: " << model.a();
+    return false;
+  }
+
+  const float limit_rotation = similarity_bounds.limit_rotation();
+  if (fabs(model.b()) > limit_rotation) {
+    VLOG(1) << "Warning: Unstable similarity found. "
+            << "Rotation is out of bound: " << model.b();
+    return false;
+  }
+
+  if (num_inliers < similarity_bounds.min_inliers()) {
+    VLOG(1) << "Unstable similarity, only " << num_inliers << " inliers chosen "
+            << "from " << feature_list.feature_size() << " features.";
+    return false;
+  }
+
+  if (num_inliers <
+      similarity_bounds.min_inlier_fraction() * feature_list.feature_size()) {
+    VLOG(1) << "Unstable similarity, inlier fraction only "
+            << static_cast<float>(num_inliers) /
+                   (feature_list.feature_size() + 1.e-6f);
+    return false;
+  }
+
+  return true;
+}
+
+bool MotionEstimation::IsStableHomography(const Homography& model,
+                                          float average_homography_error,
+                                          float inlier_coverage) const {
+  if (options_.deactivate_stable_motion_estimation()) {
+    return IsInverseStable(model);
+  }
+
+  const auto& homography_bounds = options_.stable_homography_bounds();
+
+  // Test if the inter-frame transform is stable, i.e. small enough
+  // that its faithful estimation is possible.
+  const float lower_scale = homography_bounds.lower_scale();
+  const float upper_scale = homography_bounds.upper_scale();
+  if (model.h_00() < lower_scale || model.h_00() > upper_scale ||  // Scale.
+      model.h_11() < lower_scale || model.h_11() > upper_scale) {
+    VLOG(1) << "Warning: Unstable homography found. "
+            << "Scale is out of bound: " << model.h_00() << " " << model.h_11();
+    return false;
+  }
+
+  const float limit_rotation = homography_bounds.limit_rotation();
+  if (fabs(model.h_01()) > limit_rotation ||  // Rotation.
+      fabs(model.h_10()) > limit_rotation) {
+    VLOG(1) << "Warning: Unstable homography found. "
+            << "Rotation is out of bound: " << model.h_01() << " "
+            << model.h_10();
+    return false;
+  }
+
+  const float limit_perspective = homography_bounds.limit_perspective();
+  if (fabs(model.h_20()) > limit_perspective ||  // Perspective.
+      fabs(model.h_21()) > limit_perspective) {
+    VLOG(1) << "Warning: Unstable homography found. "
+            << "Perspective is out of bound:" << model.h_20() << " "
+            << model.h_21();
+    return false;
+  }
+
+  const float min_inlier_coverage = homography_bounds.min_inlier_coverage();
+  const float registration_threshold =
+      std::max<float>(homography_bounds.registration_threshold(),
+                      homography_bounds.frac_registration_threshold() *
+                          hypot(frame_width_, frame_height_));
+
+  if (average_homography_error > registration_threshold &&
+      inlier_coverage <= min_inlier_coverage) {
+    VLOG(1) << "Unstable homography found. "
+            << "Registration (actual, threshold): " << average_homography_error
+            << " " << registration_threshold
+            << " Inlier coverage (actual, threshold): " << inlier_coverage
+            << " " << min_inlier_coverage;
+    return false;
+  }
+
+  return true;
+}
+
+bool MotionEstimation::IsStableMixtureHomography(
+    const MixtureHomography& homography, float min_block_inlier_coverage,
+    const std::vector<float>& block_inlier_coverage) const {
+  if (options_.deactivate_stable_motion_estimation()) {
+    return true;
+  }
+
+  const int num_blocks = block_inlier_coverage.size();
+  std::vector<bool> stable_block(num_blocks, false);
+  for (int k = 0; k < num_blocks; ++k) {
+    stable_block[k] = block_inlier_coverage[k] > min_block_inlier_coverage;
+  }
+
+  int unstable_start = -1;
+  int empty_start = -1;
+  const int max_outlier_blocks =
+      options_.stable_mixture_homography_bounds().max_adjacent_outlier_blocks();
+  const int max_empty_blocks =
+      options_.stable_mixture_homography_bounds().max_adjacent_empty_blocks();
+
+  for (int k = 0; k < num_blocks; ++k) {
+    const int offset = unstable_start == 0 ? 1 : 0;
+    // Test for outlier blocks.
+    if (stable_block[k]) {
+      if (unstable_start >= 0 &&
+          k - unstable_start >= max_outlier_blocks - offset) {
+        return false;
+      }
+      unstable_start = -1;
+    } else {
+      if (unstable_start < 0) {
+        unstable_start = k;
+      }
+    }
+
+    // Test for empty blocks.
+    if (block_inlier_coverage[k] > 0) {
+      if (empty_start >= 0 && k - empty_start >= max_empty_blocks - offset) {
+        return false;
+      }
+      empty_start = -1;
+    } else {
+      if (empty_start < 0) {
+        empty_start = k;
+      }
+    }
+  }
+
+  if (unstable_start >= 0 &&
+      num_blocks - unstable_start >= max_outlier_blocks) {
+    return false;
+  }
+
+  if (empty_start >= 0 && num_blocks - empty_start >= max_empty_blocks) {
+    return false;
+  }
+
+  return true;
+}
+
+float MotionEstimation::GridCoverage(
+    const RegionFlowFeatureList& feature_list, float min_inlier_score,
+    MotionEstimationThreadStorage* thread_storage) const {
+  CHECK(thread_storage != nullptr);
+
+  // 10x10 grid for coverage estimation.
+  const int grid_size = options_.coverage_grid_size();
+  const int mask_size = grid_size * grid_size;
+
+  const float scaled_width = 1.0f / normalized_domain_.x() * grid_size;
+  const float scaled_height = 1.0f / normalized_domain_.y() * grid_size;
+
+  const std::vector<float>& grid_cell_weights =
+      thread_storage->GridCoverageInitializationWeights();
+  CHECK_EQ(mask_size, grid_cell_weights.size());
+
+  const float max_inlier_score = 1.75f * min_inlier_score;
+  const float mid_inlier_score = 0.5 * (min_inlier_score + max_inlier_score);
+
+  // Map min_inlier to 0.1 and max_inlier to 0.9 via logistic regression.
+  // f(x) = 1 / (1 + exp(-a(x - mid)))
+  // f(min) == 0.1 ==> a = ln(1 / 0.1 - 1) / (mid - min)
+  const float logistic_scale = 2.1972245 /  // ln(1.0 / 0.1 - 1)
+                               (mid_inlier_score - min_inlier_score);
+
+  const int num_overlaps = 3;
+
+  // Maximum coverage and number of features across shifted versions.
+  std::vector<float> max_coverage(mask_size, 0.0f);
+  std::vector<int> max_features(mask_size, 0);
+
+  for (int overlap_y = 0; overlap_y < num_overlaps; ++overlap_y) {
+    const float shift_y =
+        normalized_domain_.y() / grid_size * overlap_y / num_overlaps;
+    for (int overlap_x = 0; overlap_x < num_overlaps; ++overlap_x) {
+      const float shift_x =
+          normalized_domain_.x() / grid_size * overlap_x / num_overlaps;
+      std::vector<std::vector<float>>& irls_mask =
+          *thread_storage->EmptyGridCoverageIrlsMask();
+      CHECK_EQ(mask_size, irls_mask.size());
+
+      // Bin features.
+      for (const auto& feature : feature_list.feature()) {
+        if (feature.irls_weight() > 0) {
+          const int x =
+              static_cast<int>((feature.x() - shift_x) * scaled_width);
+          const int y =
+              static_cast<int>((feature.y() - shift_y) * scaled_height);
+          // Ignore features that are out of bound in one shifted version.
+          if (x < 0 || y < 0 || x >= grid_size || y >= grid_size) {
+            continue;
+          }
+
+          const int grid_bin = y * grid_size + x;
+          irls_mask[grid_bin].push_back(feature.irls_weight());
+        }
+      }
+
+      for (int k = 0; k < mask_size; ++k) {
+        if (irls_mask[k].size() < 2) {  // At least two features present for
+          continue;                     // grid cell to be considered.
+        }
+
+        const int median_elem = irls_mask[k].size() / 2;
+        std::nth_element(irls_mask[k].begin(),
+                         irls_mask[k].begin() + median_elem,
+                         irls_mask[k].end());
+
+        const float irls_median = irls_mask[k][median_elem];
+        const float inlier_score =
+            1.0f /
+            (1.0f + std::exp(static_cast<double>(
+                        -logistic_scale * (irls_median - mid_inlier_score))));
+        if (max_features[k] < irls_mask[k].size()) {
+          max_features[k] = irls_mask[k].size();
+          max_coverage[k] = inlier_score;
+        }
+      }
+    }
+  }
+
+  const float cell_weight_sum =
+      std::accumulate(grid_cell_weights.begin(), grid_cell_weights.end(), 0.0f);
+  CHECK_GT(cell_weight_sum, 0);
+
+  return std::inner_product(max_coverage.begin(), max_coverage.end(),
+                            grid_cell_weights.begin(), 0.0f) /
+         cell_weight_sum;
+}
+
+void MotionEstimation::ComputeMixtureCoverage(
+    const RegionFlowFeatureList& feature_list, float min_inlier_score,
+    bool assume_rolling_shutter_camera,
+    MotionEstimationThreadStorage* thread_storage,
+    CameraMotion* camera_motion) const {
+  const int grid_size = row_weights_->NumModels();
+  const int mask_size = grid_size * grid_size;
+  std::vector<float> irls_mask(mask_size, 0.0f);
+  std::vector<float> mask_counter(mask_size, 0.0f);
+
+  const float scaled_width = 1.0f / normalized_domain_.x() * (grid_size - 1);
+  // Consider features slightly above 1 block distance away from center a block
+  // to vote for its coverage.
+  const float weight_threshold = row_weights_->WeightThreshold(1.25f);
+
+  const float max_inlier_score = 1.75f * min_inlier_score;
+  const float mid_inlier_score = 0.5 * (min_inlier_score + max_inlier_score);
+
+  // Map min_inlier to 0.1 and max_inlier to 0.9 via logistic regression.
+  // f(x) = 1 / (1 + exp(-a(x - mid)))
+  // f(min) == 0.1 ==> a = ln(1 / 0.1 - 1) / (mid - min)
+  const float logistic_scale = 2.1972245 /  // ln(1.0 / 0.1 - 1)
+                               (mid_inlier_score - min_inlier_score);
+
+  std::vector<float> texturedness;
+  ComputeRegionFlowFeatureTexturedness(feature_list, true, &texturedness);
+
+  int texture_idx = 0;
+  for (auto feature = feature_list.feature().begin();
+       feature != feature_list.feature().end(); ++feature, ++texture_idx) {
+    float irls_weight = feature->irls_weight();
+    if (irls_weight == 0) {
+      continue;
+    }
+
+    // Account for feature texturedness -> low textured outliers do not cause
+    // visible artifacts.
+    if (assume_rolling_shutter_camera) {
+      // Skip low textured outliers.
+      if (texturedness[texture_idx] < 0.5 && irls_weight < min_inlier_score) {
+        continue;
+      }
+
+      // Weight by texture.
+      irls_weight /= (texturedness[texture_idx] + 1.e-6f);
+    }
+
+    // Interpolate into bins.
+    const float x = feature->x() * scaled_width;
+    const int bin_x = x;
+    const float dx = x - bin_x;
+    const int off_x = static_cast<int>(dx != 0);
+
+    const float* row_weights = row_weights_->RowWeights(feature->y());
+    for (int k = 0, grid_bin = bin_x; k < row_weights_->NumModels();
+         ++k, grid_bin += grid_size) {
+      if (row_weights[k] > weight_threshold) {
+        irls_mask[grid_bin] += irls_weight * row_weights[k] * (1.0f - dx);
+        mask_counter[grid_bin] += row_weights[k] * (1.0f - dx);
+        irls_mask[grid_bin + off_x] += irls_weight * row_weights[k] * dx;
+        mask_counter[grid_bin + off_x] += row_weights[k] * dx;
+      }
+    }
+  }
+
+  std::vector<float> coverage(grid_size, 0.0f);
+  // Record number of occupied cells per block.
+  std::vector<int> occupancy(grid_size, 0);
+
+  for (int k = 0, grid_bin = 0; k < grid_size; ++k) {
+    for (int l = 0; l < grid_size; ++l, ++grid_bin) {
+      // At least two features (at maximum distance from block center)
+      // should be present for this cell to be considered.
+      if (mask_counter[grid_bin] < 2 * weight_threshold) {
+        continue;
+      }
+
+      ++occupancy[k];
+
+      const float irls_average = irls_mask[grid_bin] / mask_counter[grid_bin];
+      const float inlier_score =
+          1.0f /
+          (1.0f + std::exp(static_cast<double>(
+                      -logistic_scale * (irls_average - mid_inlier_score))));
+
+      coverage[k] += inlier_score;
+    }
+
+    // If block was occupied assign small eps coverage so it is not considered
+    // empty.
+    const float empty_block_eps = 1e-2;
+    if (occupancy[k] > 0 && coverage[k] == 0) {
+      coverage[k] = empty_block_eps;
+    }
+  }
+
+  camera_motion->clear_mixture_inlier_coverage();
+
+  // For rolling shutter videos, grid cells without features are assumed to
+  // not cause visible distortions (no features -> lack of texture).
+  // Limit to 60% of number of cells, to avoid considering only one or two
+  // occupied cells stable.
+  for (int k = 0; k < grid_size; ++k) {
+    const float denom =
+        1.0f / (assume_rolling_shutter_camera
+                    ? std::max<float>(grid_size * 0.6, occupancy[k])
+                    : grid_size);
+    camera_motion->add_mixture_inlier_coverage(coverage[k] * denom);
+  }
+}
+
+bool MotionEstimation::EstimateHomographyIRLS(
+    int irls_rounds, bool compute_stability,
+    const PriorFeatureWeights* prior_weights,
+    MotionEstimationThreadStorage* thread_storage,
+    RegionFlowFeatureList* feature_list, CameraMotion* camera_motion) const {
+  if (prior_weights && !prior_weights->HasCorrectDimension(
+                           irls_rounds, feature_list->feature_size())) {
+    LOG(ERROR) << "Prior weights incorrectly initialized, ignoring.";
+    prior_weights = nullptr;
+  }
+
+  std::unique_ptr<MotionEstimationThreadStorage> local_storage;
+  if (thread_storage == nullptr) {
+    local_storage.reset(new MotionEstimationThreadStorage(options_, this));
+    thread_storage = local_storage.get();
+  }
+
+  int num_nonzero_weights =
+      feature_list->feature_size() -
+      CountIgnoredRegionFlowFeatures(*feature_list, kOutlierIRLSWeight);
+
+  // Use identity if not enough features found.
+  const int min_features_for_solution = 9;
+  if (num_nonzero_weights < min_features_for_solution) {
+    VLOG(1) << "Homography estimation failed, less than "
+            << min_features_for_solution << " features usable for estimation.";
+    *camera_motion->mutable_homography() = Homography();
+    camera_motion->set_flags(camera_motion->flags() |
+                             CameraMotion::FLAG_SINGULAR_ESTIMATION);
+    return false;
+  }
+
+  bool use_float = true;
+  // Just declaring does not use memory
+  Eigen::Matrix<float, Eigen::Dynamic, 8> matrix_e;
+  Eigen::Matrix<float, 8, 1> solution_e;
+  Eigen::Matrix<float, 8, 1> rhs_e;
+  Eigen::Matrix<double, 8, 8> matrix_d;
+  Eigen::Matrix<double, 8, 1> solution_d;
+  Eigen::Matrix<double, 8, 1> rhs_d;
+  Eigen::Matrix<float, 8, 8> matrix_f;
+  Eigen::Matrix<float, 8, 1> solution_f;
+  Eigen::Matrix<float, 8, 1> rhs_f;
+
+  if (options_.use_exact_homography_estimation()) {
+    const int num_rows =
+        2 * feature_list->feature_size() +
+        (options_.homography_perspective_regularizer() == 0 ? 0 : 1);
+    matrix_e = Eigen::Matrix<float, Eigen::Dynamic, 8>::Zero(num_rows, 8);
+    rhs_e = Eigen::Matrix<float, 8, 1>::Zero(8, 1);
+    solution_e = Eigen::Matrix<float, 8, 1>::Zero(8, 1);
+  } else {
+    if (options_.use_highest_accuracy_for_normal_equations()) {
+      matrix_d = Eigen::Matrix<double, 8, 8>::Zero(8, 8);
+      rhs_d = Eigen::Matrix<double, 8, 1>::Zero();
+      solution_d = Eigen::Matrix<double, 8, 1>::Zero(8, 1);
+      use_float = false;
+    } else {
+      matrix_f = Eigen::Matrix<float, 8, 8>::Zero(8, 8);
+      rhs_f = Eigen::Matrix<float, 8, 1>::Zero();
+      solution_f = Eigen::Matrix<float, 8, 1>::Zero(8, 1);
+      use_float = true;
+    }
+  }
+
+  // Multiple rounds of weighting based L2 optimization.
+  Homography norm_model;
+  const float irls_residual_scale =
+      GetIRLSResidualScale(camera_motion->average_magnitude(),
+                           options_.irls_motion_magnitude_fraction());
+
+  const bool irls_use_l0_norm = options_.irls_use_l0_norm();
+
+  const std::vector<float>* irls_priors = nullptr;
+  const std::vector<float>* irls_alphas = nullptr;
+  if (prior_weights && prior_weights->HasNonZeroAlpha()) {
+    irls_priors = &prior_weights->priors;
+    irls_alphas = &prior_weights->alphas;
+  }
+
+  Homography* prev_solution = nullptr;
+  if (options_.homography_exact_denominator_scaling()) {
+    prev_solution = &norm_model;
+  }
+
+  for (int r = 0; r < irls_rounds; ++r) {
+    if (options_.use_exact_homography_estimation()) {
+      bool success = false;
+
+      success = HomographyL2QRSolve<float>(
+          *feature_list, prev_solution,
+          options_.homography_perspective_regularizer(), &matrix_e,
+          &solution_e);
+      if (!success) {
+        VLOG(1) << "Could not solve for homography.";
+        *camera_motion->mutable_homography() = Homography();
+        camera_motion->set_flags(camera_motion->flags() |
+                                 CameraMotion::FLAG_SINGULAR_ESTIMATION);
+        return false;
+      }
+      norm_model =
+          HomographyAdapter::FromFloatPointer(solution_e.data(), false);
+    } else {
+      bool success = false;
+      if (options_.use_highest_accuracy_for_normal_equations()) {
+        CHECK(!use_float);
+        norm_model = HomographyL2NormalEquationSolve<double>(
+            *feature_list, prev_solution,
+            options_.homography_perspective_regularizer(), &matrix_d, &rhs_d,
+            &solution_d, &success);
+      } else {
+        CHECK(use_float);
+        norm_model = HomographyL2NormalEquationSolve<float>(
+            *feature_list, prev_solution,
+            options_.homography_perspective_regularizer(), &matrix_f, &rhs_f,
+            &solution_f, &success);
+      }
+      if (!success) {
+        VLOG(1) << "Could not solve for homography.";
+        *camera_motion->mutable_homography() = Homography();
+        camera_motion->set_flags(camera_motion->flags() |
+                                 CameraMotion::FLAG_SINGULAR_ESTIMATION);
+        return false;
+      }
+    }
+
+    const float alpha = irls_alphas != nullptr ? (*irls_alphas)[r] : 0.0f;
+    const float one_minus_alpha = 1.0f - alpha;
+
+    // Compute weights from registration errors.
+    const auto feature_start = feature_list->mutable_feature()->begin();
+    for (auto feature = feature_start;
+         feature != feature_list->mutable_feature()->end(); ++feature) {
+      // Ignored features marked as outliers.
+      if (feature->irls_weight() == 0.0f) {
+        continue;
+      }
+
+      // Residual is expressed as geometric difference, that is
+      // for a point match (p<->q) with estimated homography p,
+      // geometric difference is defined as Hp x q.
+      Vector2_f lhs = HomographyAdapter::TransformPoint(
+          norm_model, FeatureLocation(*feature));
+      // Map to original coordinate system to evaluate error.
+      lhs = LinearSimilarityAdapter::TransformPoint(irls_transform_, lhs);
+      const Vector3_f lhs3(lhs.x(), lhs.y(), 1);
+      const Vector2_f rhs = LinearSimilarityAdapter::TransformPoint(
+          irls_transform_, FeatureMatchLocation(*feature));
+
+      const Vector3_f rhs3(rhs.x(), rhs.y(), 1);
+      const Vector3_f cross = lhs3.CrossProd(rhs3);
+      // We only use the first 2 linearly independent rows.
+      const Vector2_f cross2(cross.x(), cross.y());
+
+      const float numerator =
+          alpha == 0.0f ? 1.0f
+                        : ((*irls_priors)[feature - feature_start] * alpha +
+                           one_minus_alpha);
+
+      if (irls_use_l0_norm) {
+        feature->set_irls_weight(
+            numerator / (cross2.Norm() * irls_residual_scale + kIrlsEps));
+      } else {
+        feature->set_irls_weight(
+            numerator / (std::sqrt(static_cast<double>(cross2.Norm() *
+                                                       irls_residual_scale)) +
+                         kIrlsEps));
+      }
+    }
+  }
+
+  // Undo pre_transform.
+  Homography* model = camera_motion->mutable_homography();
+  *model = ModelCompose3(
+      LinearSimilarityAdapter::ToHomography(inv_normalization_transform_),
+      norm_model,
+      LinearSimilarityAdapter::ToHomography(normalization_transform_));
+
+  if (compute_stability) {
+    // Score irls and save.
+    float average_homography_error = 0;
+    // Number of non-zero features.
+    int nnz_features = 0;
+    const float kMinIrlsWeight = 1e-6f;
+    for (const auto& feature : feature_list->feature()) {
+      if (feature.irls_weight() > kMinIrlsWeight) {
+        if (options_.irls_use_l0_norm()) {
+          average_homography_error += 1.0f / feature.irls_weight();
+        } else {
+          average_homography_error +=
+              1.0f / (feature.irls_weight() * feature.irls_weight());
+        }
+        ++nnz_features;
+      }
+    }
+
+    if (nnz_features > 0) {
+      average_homography_error *= 1.0f / nnz_features;
+    }
+
+    camera_motion->set_average_homography_error(average_homography_error);
+
+    // TODO: Use sqrt when use_l0_norm is false.
+    // Need to verify that does not break face_compositor before modifying.
+    float inlier_threshold =
+        options_.stable_homography_bounds().frac_inlier_threshold() *
+        hypot(frame_width_, frame_height_);
+    camera_motion->set_homography_inlier_coverage(
+        GridCoverage(*feature_list, 1.0 / inlier_threshold, thread_storage));
+    camera_motion->set_homography_strict_inlier_coverage(GridCoverage(
+        *feature_list, options_.strict_coverage_scale() / inlier_threshold,
+        thread_storage));
+  }
+  return true;
+}
+
+bool MotionEstimation::MixtureHomographyFromFeature(
+    const TranslationModel& camera_translation, int irls_rounds,
+    float regularizer, const PriorFeatureWeights* prior_weights,
+    RegionFlowFeatureList* feature_list,
+    MixtureHomography* mix_homography) const {
+  if (prior_weights && !prior_weights->HasCorrectDimension(
+                           irls_rounds, feature_list->feature_size())) {
+    LOG(ERROR) << "Prior weights incorrectly initialized, ignoring.";
+    prior_weights = nullptr;
+  }
+
+  const int num_mixtures = options_.num_mixtures();
+
+  // Compute weights if necessary.
+  // Compute scale to index mixture weights from normalization.
+  CHECK(row_weights_.get() != nullptr);
+  CHECK_EQ(row_weights_->YScale(), frame_height_ / normalized_domain_.y());
+  CHECK_EQ(row_weights_->NumModels(), num_mixtures);
+
+  const MotionEstimationOptions::MixtureModelMode mixture_mode =
+      options_.mixture_model_mode();
+  int num_dof = 0;
+  int adjacency_constraints = 0;
+  switch (mixture_mode) {
+    case MotionEstimationOptions::FULL_MIXTURE:
+      num_dof = 8 * num_mixtures;
+      adjacency_constraints = 8 * (num_mixtures - 1);
+      break;
+    case MotionEstimationOptions::TRANSLATION_MIXTURE:
+      num_dof = 6 + 2 * num_mixtures;
+      adjacency_constraints = 2 * (num_mixtures - 1);
+      break;
+    case MotionEstimationOptions::SKEW_ROTATION_MIXTURE:
+      num_dof = 4 + 4 * num_mixtures;
+      adjacency_constraints = 4 * (num_mixtures - 1);
+      break;
+    default:
+      LOG(FATAL) << "Unknown MixtureModelMode specified.";
+  }
+
+  Eigen::MatrixXf matrix(
+      2 * feature_list->feature_size() + adjacency_constraints, num_dof);
+  Eigen::MatrixXf solution(num_dof, 1);
+
+  // Multiple rounds of weighting based L2 optimization.
+  MixtureHomography norm_model;
+
+  // Initialize with identity.
+  for (int k = 0; k < num_mixtures; ++k) {
+    norm_model.add_model();
+  }
+
+  const bool irls_use_l0_norm = options_.irls_use_l0_norm();
+
+  const std::vector<float>* irls_priors = nullptr;
+  const std::vector<float>* irls_alphas = nullptr;
+  if (prior_weights && prior_weights->HasNonZeroAlpha()) {
+    irls_priors = &prior_weights->priors;
+    irls_alphas = &prior_weights->alphas;
+  }
+
+  for (int r = 0; r < irls_rounds; ++r) {
+    // Unpack solution to mixture homographies, if not full model.
+    std::vector<float> solution_unpacked(8 * num_mixtures);
+    const float* solution_pointer = &solution_unpacked[0];
+
+    switch (mixture_mode) {
+      case MotionEstimationOptions::FULL_MIXTURE:
+        if (!MixtureHomographyL2DLTSolve(*feature_list, num_mixtures,
+                                         *row_weights_, regularizer, &matrix,
+                                         &solution)) {
+          return false;
+        }
+        // No need to unpack solution.
+        solution_pointer = solution.data();
+        break;
+
+      case MotionEstimationOptions::TRANSLATION_MIXTURE:
+        if (!TransMixtureHomographyL2DLTSolve(*feature_list, num_mixtures,
+                                              *row_weights_, regularizer,
+                                              &matrix, &solution)) {
+          return false;
+        }
+        {
+          const float* sol_ptr = solution.data();
+          for (int k = 0; k < num_mixtures; ++k) {
+            float* curr_ptr = &solution_unpacked[8 * k];
+            curr_ptr[0] = sol_ptr[0];
+            curr_ptr[1] = sol_ptr[1];
+            curr_ptr[2] = sol_ptr[6 + 2 * k];
+            curr_ptr[3] = sol_ptr[2];
+            curr_ptr[4] = sol_ptr[3];
+            curr_ptr[5] = sol_ptr[6 + 2 * k + 1];
+            curr_ptr[6] = sol_ptr[4];
+            curr_ptr[7] = sol_ptr[5];
+          }
+        }
+        break;
+
+      case MotionEstimationOptions::SKEW_ROTATION_MIXTURE:
+        if (!SkewRotMixtureHomographyL2DLTSolve(*feature_list, num_mixtures,
+                                                *row_weights_, regularizer,
+                                                &matrix, &solution)) {
+          return false;
+        }
+        {
+          const float* sol_ptr = solution.data();
+          for (int k = 0; k < num_mixtures; ++k) {
+            float* curr_ptr = &solution_unpacked[8 * k];
+            curr_ptr[0] = sol_ptr[0];
+            curr_ptr[1] = sol_ptr[4 + 4 * k];
+            curr_ptr[2] = sol_ptr[4 + 4 * k + 2];
+            curr_ptr[3] = sol_ptr[4 + 4 * k + 1];
+            curr_ptr[4] = sol_ptr[1];
+            curr_ptr[5] = sol_ptr[4 + 4 * k + 3];
+            curr_ptr[6] = sol_ptr[2];
+            curr_ptr[7] = sol_ptr[3];
+          }
+        }
+        break;
+
+      default:
+        LOG(FATAL) << "Unknown MixtureModelMode specified.";
+    }
+
+    norm_model = MixtureHomographyAdapter::FromFloatPointer(
+        solution_pointer, false, 0, num_mixtures);
+
+    const float alpha = irls_alphas != nullptr ? (*irls_alphas)[r] : 0.0f;
+    const float one_minus_alpha = 1.0f - alpha;
+
+    // Evaluate IRLS error.
+    const auto feature_start = feature_list->mutable_feature()->begin();
+    for (auto feature = feature_start;
+         feature != feature_list->mutable_feature()->end(); ++feature) {
+      if (feature->irls_weight() == 0.0f) {
+        continue;
+      }
+
+      // Residual is expressed in geometric difference, that is
+      // for a point match (p<->q) with estimated homography p,
+      // geometric difference is defined as Hp x q.
+      Vector2_f lhs = MixtureHomographyAdapter::TransformPoint(
+          norm_model, row_weights_->RowWeightsClamped(feature->y()),
+          FeatureLocation(*feature));
+      // Map to original coordinate system to evaluate error.
+      lhs = LinearSimilarityAdapter::TransformPoint(irls_transform_, lhs);
+
+      const Vector3_f lhs3(lhs.x(), lhs.y(), 1);
+      const Vector2_f rhs = LinearSimilarityAdapter::TransformPoint(
+          irls_transform_, FeatureMatchLocation(*feature));
+
+      const Vector3_f rhs3(rhs.x(), rhs.y(), 1);
+      const Vector3_f cross = lhs3.CrossProd(rhs3);
+
+      // We only use the first 2 linearly independent rows.
+      const Vector2_f cross2(cross.x(), cross.y());
+
+      const float numerator =
+          alpha == 0.0f ? 1.0f
+                        : ((*irls_priors)[feature - feature_start] * alpha +
+                           one_minus_alpha);
+
+      if (irls_use_l0_norm) {
+        feature->set_irls_weight(numerator / (cross2.Norm() + kIrlsEps));
+      } else {
+        feature->set_irls_weight(
+            numerator /
+            (std::sqrt(static_cast<double>(cross2.Norm())) + kIrlsEps));
+      }
+    }
+  }
+
+  // Undo pre_transform.
+  *mix_homography = MixtureHomographyAdapter::ComposeLeft(
+      MixtureHomographyAdapter::ComposeRight(
+          norm_model,
+          LinearSimilarityAdapter::ToHomography(normalization_transform_)),
+      LinearSimilarityAdapter::ToHomography(inv_normalization_transform_));
+
+  switch (mixture_mode) {
+    case MotionEstimationOptions::FULL_MIXTURE:
+      mix_homography->set_dof(MixtureHomography::ALL_DOF);
+      break;
+    case MotionEstimationOptions::TRANSLATION_MIXTURE:
+      mix_homography->set_dof(MixtureHomography::TRANSLATION_DOF);
+      break;
+    case MotionEstimationOptions::SKEW_ROTATION_MIXTURE:
+      mix_homography->set_dof(MixtureHomography::SKEW_ROTATION_DOF);
+      break;
+    default:
+      LOG(FATAL) << "Unknown MixtureModelMode specified.";
+  }
+  return true;
+}
+
+bool MotionEstimation::EstimateMixtureHomographyIRLS(
+    int irls_rounds, bool compute_stability, float regularizer,
+    int spectrum_idx, const PriorFeatureWeights* prior_weights,
+    MotionEstimationThreadStorage* thread_storage,
+    RegionFlowFeatureList* feature_list, CameraMotion* camera_motion) const {
+  std::unique_ptr<MotionEstimationThreadStorage> local_storage;
+  if (thread_storage == NULL) {
+    local_storage.reset(new MotionEstimationThreadStorage(options_, this));
+    thread_storage = local_storage.get();
+  }
+
+  // We bin features into 3 blocks (top, middle, bottom), requiring each to
+  // have sufficient features. This is a specialization of the same test for
+  // the homography case. The tested blocks here are not related to the
+  // mixture blocks in any manner.
+  const int min_features_for_solution = 9;
+  const int num_blocks = 3;
+  std::vector<int> features_per_block(3, 0);
+  const float block_scale = num_blocks / normalized_domain_.y();
+
+  for (const auto& feature : feature_list->feature()) {
+    if (feature.irls_weight() > 0) {
+      ++features_per_block[feature.y() * block_scale];
+    }
+  }
+
+  // Require at least two blocks to have sufficient features.
+  std::sort(features_per_block.begin(), features_per_block.end());
+  if (features_per_block[1] < min_features_for_solution) {
+    VLOG(1) << "Mixture homography estimation not possible, less than "
+            << min_features_for_solution << " features present.";
+    camera_motion->set_flags(camera_motion->flags() |
+                             CameraMotion::FLAG_SINGULAR_ESTIMATION);
+    return false;
+  }
+
+  MixtureHomography mix_homography;
+  if (!MixtureHomographyFromFeature(camera_motion->translation(), irls_rounds,
+                                    regularizer, prior_weights, feature_list,
+                                    &mix_homography)) {
+    VLOG(1) << "Non-rigid homography estimated. "
+            << "CameraMotion flagged as unstable.";
+    camera_motion->set_flags(camera_motion->flags() |
+                             CameraMotion::FLAG_SINGULAR_ESTIMATION);
+    return false;
+  }
+
+  if (compute_stability) {
+    // Test if mixture is invertible for every scanline (test via grid,
+    // every 10 scanlines, also test one row out of frame domain).
+    const float test_grid_size = 10.0f / frame_height_ * normalized_domain_.y();
+    bool invertible = true;
+    int counter = 0;
+    for (float y = -test_grid_size; y < normalized_domain_.y() + test_grid_size;
+         y += test_grid_size) {
+      ++counter;
+      const float* weights = row_weights_->RowWeightsClamped(y);
+      Homography test_homography = MixtureHomographyAdapter::ToBaseModel(
+          camera_motion->mixture_homography(), weights);
+      HomographyAdapter::InvertChecked(test_homography, &invertible);
+      if (!invertible) {
+        VLOG(1) << "Mixture is not invertible.";
+        camera_motion->set_flags(camera_motion->flags() |
+                                 CameraMotion::FLAG_SINGULAR_ESTIMATION);
+        return false;
+      }
+    }
+  }
+
+  while (spectrum_idx >= camera_motion->mixture_homography_spectrum_size()) {
+    camera_motion->add_mixture_homography_spectrum();
+  }
+
+  camera_motion->mutable_mixture_homography_spectrum(spectrum_idx)
+      ->CopyFrom(mix_homography);
+
+  float mixture_inlier_threshold =
+      options_.stable_mixture_homography_bounds().frac_inlier_threshold() *
+      hypot(frame_width_, frame_height_);
+
+  // First computed mixture in the spectrum is stored in mixture homography
+  // member. Also compute coverage for it.
+  if (spectrum_idx == 0) {
+    camera_motion->mutable_mixture_homography()->CopyFrom(
+        camera_motion->mixture_homography_spectrum(0));
+    if (compute_stability) {
+      ComputeMixtureCoverage(*feature_list, 1.0f / mixture_inlier_threshold,
+                             true, thread_storage, camera_motion);
+    }
+  }
+
+  // Cap rolling shutter analysis level to be valid level.
+  if (options_.mixture_rs_analysis_level() >=
+      options_.mixture_regularizer_levels()) {
+    LOG(WARNING) << "Resetting mixture_rs_analysis_level to "
+                 << options_.mixture_regularizer_levels() - 1;
+  }
+
+  const int rs_analysis_level =
+      std::min<int>(options_.mixture_rs_analysis_level(),
+                    options_.mixture_regularizer_levels() - 1);
+
+  // We compute mixture coverage only for frames which can be safely assumed
+  // to be stable mixtures, comparing it to the homography coverage and
+  // recording the increase in coverage.
+  // Compute coverage assuming rigid camera.
+  // TODO: Use sqrt when use_l0_norm is false. Need to verify that
+  // does not break face_compositor before modifying.
+  if (compute_stability && spectrum_idx == rs_analysis_level) {
+    std::vector<float> coverage_backup(
+        camera_motion->mixture_inlier_coverage().begin(),
+        camera_motion->mixture_inlier_coverage().end());
+
+    // Evaluate mixture coverage and compute rolling shutter guess.
+    ComputeMixtureCoverage(*feature_list, 1.0f / mixture_inlier_threshold,
+                           false, thread_storage, camera_motion);
+
+    std::vector<float> mixture_inlier_coverage(
+        camera_motion->mixture_inlier_coverage().begin(),
+        camera_motion->mixture_inlier_coverage().end());
+
+    // Reset to original values.
+    if (!coverage_backup.empty()) {
+      camera_motion->clear_mixture_inlier_coverage();
+      for (float item : coverage_backup) {
+        camera_motion->add_mixture_inlier_coverage(item);
+      }
+    }
+
+    // Estimate rolling shutter score.
+    // Use higher threshold on inlier coverage to only consider mixtures that
+    // are very reliable.
+    const MixtureHomography& rs_mixture =
+        camera_motion->mixture_homography_spectrum(
+            camera_motion->mixture_homography_spectrum_size() - 1);
+    const float rs_stability_threshold =
+        options_.stable_mixture_homography_bounds().min_inlier_coverage() *
+        1.5f;
+
+    if (IsStableMixtureHomography(rs_mixture, rs_stability_threshold,
+                                  mixture_inlier_coverage)) {
+      // Only use best matches (strict coverage) to determine by how much
+      // mixture models improve on homographies.
+      // TODO: Use sqrt when use_l0_norm is false. Need to verify that
+      // does not break face_compositor before modifying.
+      float homog_inlier_threshold =
+          options_.stable_homography_bounds().frac_inlier_threshold() *
+          hypot(frame_width_, frame_height_);
+      homog_inlier_threshold /= options_.strict_coverage_scale();
+
+      float mixture_coverage = GridCoverage(
+          *feature_list, 1.0f / homog_inlier_threshold, thread_storage);
+
+      const float coverage_ratio =
+          mixture_coverage /
+          (camera_motion->homography_strict_inlier_coverage() + 0.01f);
+
+      camera_motion->set_rolling_shutter_guess(coverage_ratio);
+    } else {
+      camera_motion->set_rolling_shutter_guess(-1.0f);
+    }
+  }
+
+  camera_motion->set_mixture_row_sigma(options_.mixture_row_sigma());
+  return true;
+}
+
+void MotionEstimation::DetermineOverlayIndices(
+    bool irls_weights_preinitialized, std::vector<CameraMotion>* camera_motions,
+    std::vector<RegionFlowFeatureList*>* feature_lists) const {
+  CHECK(camera_motions != nullptr);
+  CHECK(feature_lists != nullptr);
+  // Two stage estimation: First translation only, followed by
+  // overlay analysis.
+  const int num_frames = feature_lists->size();
+  CHECK_EQ(num_frames, camera_motions->size());
+
+  std::vector<CameraMotion> translation_motions(num_frames);
+  const int irls_per_round = options_.irls_rounds();
+
+  // Perform quick initialization of weights and backup original ones.
+  if (!irls_weights_preinitialized) {
+    for (auto feature_list_ptr : *feature_lists) {
+      ResetRegionFlowFeatureIRLSWeights(1.0, feature_list_ptr);
+    }
+  }
+
+  std::vector<std::vector<float>> original_irls_weights(num_frames);
+  for (int f = 0; f < num_frames; ++f) {
+    const RegionFlowFeatureList& feature_list = *(*feature_lists)[f];
+    GetRegionFlowFeatureIRLSWeights(feature_list, &original_irls_weights[f]);
+  }
+
+  ParallelFor(0, num_frames, 1,
+              EstimateMotionIRLSInvoker(MODEL_TRANSLATION, irls_per_round,
+                                        false, CameraMotion::VALID,
+                                        DefaultModelOptions(), this,
+                                        nullptr,  // No prior weights.
+                                        nullptr,  // No thread storage here.
+                                        feature_lists, &translation_motions));
+
+  // Restore weights.
+  for (int f = 0; f < num_frames; ++f) {
+    RegionFlowFeatureList& feature_list = *(*feature_lists)[f];
+    SetRegionFlowFeatureIRLSWeights(original_irls_weights[f], &feature_list);
+  }
+
+  const int chunk_size = options_.overlay_analysis_chunk_size();
+  const int num_chunks = std::ceil(feature_lists->size() * (1.0f / chunk_size));
+
+  const int overlay_grid_size =
+      options_.overlay_detection_options().analysis_mask_size();
+  for (int chunk = 0; chunk < num_chunks; ++chunk) {
+    std::vector<TranslationModel> translations;
+    std::vector<RegionFlowFeatureList*> chunk_features;
+    const int chunk_start = chunk * chunk_size;
+    const int chunk_end = std::min<int>((chunk + 1) * chunk_size, num_frames);
+    for (int f = chunk_start; f < chunk_end; ++f) {
+      translations.push_back(translation_motions[f].translation());
+      chunk_features.push_back((*feature_lists)[f]);
+    }
+
+    std::vector<int> overlay_indices;
+    OverlayAnalysis(translations, &chunk_features, &overlay_indices);
+    for (const auto& overlay_idx : overlay_indices) {
+      (*camera_motions)[chunk_start].add_overlay_indices(overlay_idx);
+    }
+
+    // Negative marker to frame chunk_start.
+    for (int f = chunk_start; f < chunk_end; ++f) {
+      if (f > chunk_start) {
+        (*camera_motions)[f].add_overlay_indices(chunk_start - f);
+      }
+
+      (*camera_motions)[f].set_overlay_domain(overlay_grid_size);
+    }
+  }
+}
+
+// Features are aggregated over a regular grid of the image domain with the
+// purpose to determine if a grid bin is deemed part of an overlay or not.
+// In particular, we distinguish between two types of overlay features, strict
+// and loose ones, based on different thresholds regarding their motion. A grid
+// bin is flagged as overlay if it contains a sufficient number of the strict
+// features, in which case *all* overlay feature candidates (strict and loose
+// ones) are flagged by setting their irls weight to zero.
+float MotionEstimation::OverlayAnalysis(
+    const std::vector<TranslationModel>& translations,
+    std::vector<RegionFlowFeatureList*>* feature_lists,
+    std::vector<int>* overlay_indices) const {
+  CHECK(feature_lists != nullptr);
+  CHECK(overlay_indices != nullptr);
+  CHECK_EQ(feature_lists->size(), translations.size());
+  overlay_indices->clear();
+
+  const int grid_size =
+      options_.overlay_detection_options().analysis_mask_size();
+
+  const int mask_size = grid_size * grid_size;
+  const float scaled_width = 1.0f / normalized_domain_.x() * grid_size;
+  const float scaled_height = 1.0f / normalized_domain_.y() * grid_size;
+
+  const float strict_zero_motion_threshold =
+      options_.overlay_detection_options().strict_near_zero_motion();
+  const float strict_max_translation_ratio =
+      options_.overlay_detection_options().strict_max_translation_ratio();
+  const float loose_zero_motion_threshold =
+      options_.overlay_detection_options().loose_near_zero_motion();
+
+  const float strict_min_texturedness =
+      options_.overlay_detection_options().strict_min_texturedness();
+
+  std::vector<int> mask_counter(mask_size, 0);
+  std::vector<int> overlay_counter(mask_size, 0);
+  std::vector<std::vector<RegionFlowFeature*>> overlay_features(mask_size);
+
+  for (int frame = 0; frame < feature_lists->size(); ++frame) {
+    const TranslationModel& translation = translations[frame];
+    const float trans_magnitude =
+        std::hypot(translation.dx(), translation.dy());
+    const float strict_trans_threshold =
+        strict_max_translation_ratio * trans_magnitude;
+
+    RegionFlowFeatureList* feature_list = (*feature_lists)[frame];
+    std::vector<float> texturedness;
+    ComputeRegionFlowFeatureTexturedness(*feature_list, false, &texturedness);
+
+    for (int feat_idx = 0, feat_size = feature_list->feature_size();
+         feat_idx < feat_size; ++feat_idx) {
+      RegionFlowFeature* feature = feature_list->mutable_feature(feat_idx);
+      const int x = static_cast<int>(feature->x() * scaled_width);
+      const int y = static_cast<int>(feature->y() * scaled_height);
+      const int grid_bin = y * grid_size + x;
+      ++mask_counter[grid_bin];
+
+      // If translation is near zero, this test is impossible, so continue.
+      if (trans_magnitude < 1.0f) {  // In pixels.
+        continue;
+      }
+
+      const float feat_magnitude =
+          LinearSimilarityAdapter::TransformPoint(
+              irls_transform_, Vector2_f(feature->dx(), feature->dy()))
+              .Norm();
+      if (feat_magnitude <= loose_zero_motion_threshold) {
+        overlay_features[grid_bin].push_back(feature);
+        if (feat_magnitude <= strict_trans_threshold &&
+            feat_magnitude <= strict_zero_motion_threshold &&
+            texturedness[feat_idx] >= strict_min_texturedness) {
+          ++overlay_counter[grid_bin];
+        }
+      }
+    }
+  }
+
+  // Determine potential outlier grids.
+  const float overlay_min_ratio =
+      options_.overlay_detection_options().overlay_min_ratio();
+
+  const float overlay_min_features =
+      options_.overlay_detection_options().overlay_min_features();
+
+  for (int i = 0; i < mask_size; ++i) {
+    // Ensure sufficient features were aggregated.
+    if (mask_counter[i] > overlay_min_features &&
+        overlay_counter[i] > overlay_min_ratio * mask_counter[i]) {
+      // Consider all features in this bin outliers.
+      for (auto& feature_ptr : overlay_features[i]) {
+        feature_ptr->set_irls_weight(0.0f);
+      }
+      overlay_indices->push_back(i);
+    }
+  }
+
+  return overlay_indices->size() * (1.0f / mask_size);
+}
+
+void MotionEstimation::PostIRLSSmoothing(
+    const std::vector<CameraMotion>& camera_motions,
+    std::vector<RegionFlowFeatureList*>* feature_lists) const {
+  CHECK(feature_lists != nullptr);
+
+  std::vector<FeatureGrid<RegionFlowFeature>> feature_grids;
+  std::vector<std::vector<int>> feature_taps_3;
+  std::vector<std::vector<int>> feature_taps_5;
+
+  struct NonOverlayPredicate {
+    bool operator()(const RegionFlowFeature& feature) const {
+      return feature.irls_weight() != 0;
+    }
+  };
+
+  std::vector<RegionFlowFeatureView> feature_views(feature_lists->size());
+  for (int k = 0; k < feature_views.size(); ++k) {
+    SelectFeaturesFromList(
+        [](const RegionFlowFeature& feature) -> bool {
+          return feature.irls_weight() != 0;
+        },
+        (*feature_lists)[k], &feature_views[k]);
+  }
+
+  // In normalized domain.
+  BuildFeatureGrid(normalized_domain_.x(), normalized_domain_.y(),
+                   options_.feature_grid_size(),  // In normalized coords.
+                   feature_views, FeatureLocation, &feature_taps_3,
+                   &feature_taps_5, nullptr, &feature_grids);
+
+  std::vector<float> feature_frame_confidence(feature_lists->size(), 1.0f);
+  if (options_.frame_confidence_weighting()) {
+    float max_confidence = 0.0;
+    ;
+    for (int f = 0; f < feature_lists->size(); ++f) {
+      feature_frame_confidence[f] =
+          std::max(1e-3f, InlierCoverage(camera_motions[f], false));
+      feature_frame_confidence[f] *= feature_frame_confidence[f];
+      max_confidence = std::max(max_confidence, feature_frame_confidence[f]);
+    }
+
+    const float cut_off_confidence =
+        options_.reset_confidence_threshold() * max_confidence;
+    for (int f = 0; f < feature_lists->size(); ++f) {
+      if (feature_frame_confidence[f] < cut_off_confidence) {
+        // If registration is bad, reset to identity and let adjacent
+        // frames do the fill in.
+        for (auto& feature_ptr : feature_views[f]) {
+          feature_ptr->set_irls_weight(1.0f);
+        }
+      }
+    }
+  }
+
+  RunTemporalIRLSSmoothing(feature_grids, feature_taps_3, feature_taps_5,
+                           feature_frame_confidence, &feature_views);
+}
+
+namespace {
+
+void ClearInternalIRLSStructure(RegionFlowFeatureView* feature_view) {
+  for (auto& feature_ptr : *feature_view) {
+    feature_ptr->clear_internal_irls();
+  }
+}
+
+}  // namespace.
+
+namespace {
+
+// Note: Push / Pull averaging is performed as reciprocal (effectively we
+// average the per feature registration error and convert this back the irls
+// weight using 1 / error).
+void TemporalIRLSPush(const FeatureGrid<RegionFlowFeature>& curr_grid,
+                      const FeatureGrid<RegionFlowFeature>* prev_grid,
+                      const std::vector<std::vector<int>>& feature_taps,
+                      float space_scale, const std::vector<float>& space_lut,
+                      float feature_scale,
+                      const std::vector<float>& feature_lut,
+                      float temporal_weight, float curr_frame_confidence,
+                      float grid_scale, int grid_dim_x,
+                      RegionFlowFeatureView* curr_view,
+                      RegionFlowFeatureView* prev_view) {
+  CHECK(curr_view != nullptr);
+  // Spatial filtering of inverse irls weights and the temporally weighted
+  // pushed result from the next frame.
+  for (auto& feature : *curr_view) {
+    float weight_sum = feature->internal_irls().weight_sum() * temporal_weight;
+    float value_sum = feature->internal_irls().value_sum() * temporal_weight;
+
+    const int bin_x = feature->x() * grid_scale;
+    const int bin_y = feature->y() * grid_scale;
+    const int grid_loc = bin_y * grid_dim_x + bin_x;
+
+    for (const auto& bin : feature_taps[grid_loc]) {
+      for (const auto& test_feat : curr_grid[bin]) {
+        const float dist =
+            (FeatureLocation(*test_feat) - FeatureLocation(*feature)).Norm();
+        const float feature_dist = RegionFlowFeatureDistance(
+            feature->feature_descriptor(), test_feat->feature_descriptor());
+        const float weight =
+            space_lut[static_cast<int>(dist * space_scale)] *
+            feature_lut[static_cast<int>(feature_dist * feature_scale)] *
+            curr_frame_confidence;
+
+        weight_sum += weight;
+        value_sum += 1.0f / test_feat->irls_weight() * weight;
+      }
+    }
+
+    // Only zero if spatial AND feature sigma = 0.
+    DCHECK_GT(weight_sum, 0);
+    feature->mutable_internal_irls()->set_weight_sum(weight_sum);
+    feature->mutable_internal_irls()->set_value_sum(value_sum);
+  }
+
+  // Clear previous frames interal irls.
+  if (prev_view) {
+    ClearInternalIRLSStructure(prev_view);
+  }
+
+  // Evaluate irls weight and push result to feature in the previous frame along
+  // the flow dimension (using spatial interpolation).
+  for (auto& feature : *curr_view) {
+    feature->set_irls_weight(1.0f / (feature->internal_irls().value_sum() /
+                                     feature->internal_irls().weight_sum()));
+    feature->clear_internal_irls();
+
+    if (prev_view == NULL) {
+      continue;
+    }
+
+    const int bin_x = (feature->x() + feature->dx()) * grid_scale;
+    const int bin_y = (feature->y() + feature->dy()) * grid_scale;
+    const int grid_loc = bin_y * grid_dim_x + bin_x;
+
+    for (const auto& bin : feature_taps[grid_loc]) {
+      for (auto& test_feat : (*prev_grid)[bin]) {
+        float dist =
+            (FeatureLocation(*test_feat) - FeatureMatchLocation(*feature))
+                .Norm();
+        const float feature_dist =
+            RegionFlowFeatureDistance(feature->feature_match_descriptor(),
+                                      test_feat->feature_descriptor());
+        const float weight =
+            space_lut[static_cast<int>(dist * space_scale)] *
+            feature_lut[static_cast<int>(feature_dist * feature_scale)];
+        TemporalIRLSSmoothing* temporal_irls =
+            test_feat->mutable_internal_irls();
+        temporal_irls->set_value_sum(temporal_irls->value_sum() +
+                                     weight * 1.0f / feature->irls_weight());
+        temporal_irls->set_weight_sum(temporal_irls->weight_sum() + weight);
+      }
+    }
+  }
+}
+
+void TemporalIRLSPull(const FeatureGrid<RegionFlowFeature>& curr_grid,
+                      const FeatureGrid<RegionFlowFeature>& prev_grid,
+                      const std::vector<std::vector<int>>& feature_taps,
+                      float space_scale, const std::vector<float>& space_lut,
+                      float feature_scale,
+                      const std::vector<float>& feature_lut,
+                      float temporal_weight, float curr_frame_confidence,
+                      float grid_scale, int grid_dim_x,
+                      RegionFlowFeatureView* curr_view,
+                      RegionFlowFeatureView* prev_view) {
+  // Pull irls weights of spatially neighboring features from previous frame.
+  // Neighborhood is displaced by flow. Pulled weights are weighted by
+  // temporal_weight.
+  for (auto& feature : *curr_view) {
+    const int bin_x = (feature->x() + feature->dx()) * grid_scale;
+    const int bin_y = (feature->y() + feature->dy()) * grid_scale;
+    const int grid_loc = bin_y * grid_dim_x + bin_x;
+
+    float weight_sum = 0;
+    float value_sum = 0;
+    for (const auto& bin : feature_taps[grid_loc]) {
+      for (const auto& test_feat : prev_grid[bin]) {
+        float dist =
+            (FeatureLocation(*test_feat) - FeatureMatchLocation(*feature))
+                .Norm();
+        const float feature_dist =
+            RegionFlowFeatureDistance(feature->feature_match_descriptor(),
+                                      test_feat->feature_descriptor());
+        const float weight =
+            space_lut[static_cast<int>(dist * space_scale)] *
+            feature_lut[static_cast<int>(feature_dist * feature_scale)];
+        weight_sum += weight;
+        value_sum += weight * 1.0f / test_feat->irls_weight();
+      }
+    }
+
+    TemporalIRLSSmoothing* temporal_irls = feature->mutable_internal_irls();
+    temporal_irls->set_value_sum(value_sum * temporal_weight);
+    temporal_irls->set_weight_sum(weight_sum * temporal_weight);
+  }
+
+  // Spatial filtering of neighboring inverse irls_weight and above
+  // pulled result from the previous frame.
+  for (auto& feature : *curr_view) {
+    float weight_sum = feature->internal_irls().weight_sum();
+    float value_sum = feature->internal_irls().value_sum();
+
+    const int bin_x = feature->x() * grid_scale;
+    const int bin_y = feature->y() * grid_scale;
+    const int grid_loc = bin_y * grid_dim_x + bin_x;
+
+    for (const auto& bin : feature_taps[grid_loc]) {
+      for (const auto& test_feat : curr_grid[bin]) {
+        float dist =
+            (FeatureLocation(*test_feat) - FeatureLocation(*feature)).Norm();
+        const float feature_dist = RegionFlowFeatureDistance(
+            feature->feature_descriptor(), test_feat->feature_descriptor());
+        const float weight =
+            space_lut[static_cast<int>(dist * space_scale)] *
+            feature_lut[static_cast<int>(feature_dist * feature_scale)] *
+            curr_frame_confidence;
+        weight_sum += weight;
+        value_sum += 1.0f / test_feat->irls_weight() * weight;
+      }
+    }
+
+    CHECK_GT(weight_sum, 0) << feature->irls_weight();
+    feature->mutable_internal_irls()->set_weight_sum(weight_sum);
+    feature->mutable_internal_irls()->set_value_sum(value_sum);
+  }
+
+  // Evaluate irls weight.
+  for (auto& feature : *curr_view) {
+    feature->set_irls_weight(1.0f / (feature->internal_irls().value_sum() /
+                                     feature->internal_irls().weight_sum()));
+    feature->clear_internal_irls();
+  }
+}
+
+}  // namespace.
+
+void MotionEstimation::InitGaussLUT(float sigma, float max_range,
+                                    std::vector<float>* lut,
+                                    float* scale) const {
+  CHECK(lut);
+  // Calculate number of bins if scale is non-zero, otherwise use one bin per
+  // integer in the domain [0, max_range].
+  const int lut_bins = (scale != nullptr) ? (1 << 10) : std::ceil(max_range);
+  lut->resize(lut_bins);
+
+  const float bin_size = max_range / lut_bins;
+  const float coeff = -0.5f / (sigma * sigma);
+  for (int i = 0; i < lut_bins; ++i) {
+    const float value = i * bin_size;
+    (*lut)[i] = std::exp(value * value * coeff);
+  }
+
+  if (scale) {
+    *scale = 1.0f / bin_size;
+  }
+}
+
+// Smooth IRLS weights across the volume.
+void MotionEstimation::RunTemporalIRLSSmoothing(
+    const std::vector<FeatureGrid<RegionFlowFeature>>& feature_grid,
+    const std::vector<std::vector<int>>& feature_taps_3,
+    const std::vector<std::vector<int>>& feature_taps_5,
+    const std::vector<float>& frame_confidence,
+    std::vector<RegionFlowFeatureView>* feature_views) const {
+  // Approximate goal for temporal window length with closest integer that
+  // achieves roughly homogeneous sized chunks.
+  const int temporal_length_goal = options_.temporal_irls_diameter();
+  const int num_frames = feature_views->size();
+
+  if (num_frames == 0) {
+    return;
+  }
+
+  // Clamp IRLS bounds before smoothing, otherwise outliers skew the result
+  // heavily.
+  for (auto& feature_view : *feature_views) {
+    ClampRegionFlowFeatureIRLSWeights(0.01, 100, &feature_view);
+  }
+
+  const int num_chunks = std::min<int>(
+      1, std::ceil(static_cast<double>(static_cast<float>(num_frames) /
+                                       temporal_length_goal)));
+  const int temporal_length = std::ceil(
+      static_cast<double>(static_cast<float>(num_frames) / num_chunks));
+
+  const float grid_resolution = options_.feature_grid_size();
+  const int grid_dim_x =
+      std::ceil(static_cast<double>(normalized_domain_.x() / grid_resolution));
+  const float grid_scale = 1.0f / grid_resolution;
+
+  const float spatial_sigma = options_.spatial_sigma();
+
+  // Setup Gaussian LUT for smoothing in space, time and feature-space.
+  std::vector<float> space_lut;
+
+  // Using 3 tap smoothing, max distance is 2 bin diagonals, for 5 tap
+  // smoothing max distance in 3 bin diagonals.
+  // We use maximum of 3 * sqrt(2) * bin_radius plus 1% room incase maximum
+  // value is attained.
+  const float max_space_diff = sqrt(2.0) * 3.f * grid_resolution * 1.01f;
+  float space_scale;
+
+  InitGaussLUT(spatial_sigma, max_space_diff, &space_lut, &space_scale);
+
+  const float temporal_sigma = options_.temporal_sigma();
+  std::vector<float> temporal_lut;
+  InitGaussLUT(temporal_sigma, temporal_length, &temporal_lut, nullptr);
+
+  const float feature_sigma = options_.feature_sigma();
+  const float max_feature_diff = sqrt(3.0) * 255.0;  // 3 channels.
+
+  std::vector<float> feature_lut;
+  float feature_scale;
+
+  InitGaussLUT(feature_sigma, max_feature_diff, &feature_lut, &feature_scale);
+
+  // Smooth each chunk independently.
+  // Smoothing algorithm description:
+  //   The volumetric smoothing operation is approximated by a push and pull
+  //   phase similar in its nature to scattered data interpolation via push /
+  //   pull albeit in time instead of scale space.
+  //   (for classic push/pull see:
+  //    www.vis.uni-stuttgart.de/~kraus/preprints/vmv06_strengert.pdf)
+  //
+  //   Our push/pull algorithm pushes and pulls for each feature its irls weight
+  //   with gaussian weights (based on proximity in space, time and feature
+  //   space) to its neighbors in the previous or next frame. The result of a
+  //   push or pull is aggregated in the features TemporalIRLSSmoothing
+  //   structure.
+  //
+  //   In general, we first push the weights through the whole volume (clip)
+  //   towards the first frame performing spatial and temporal smoothing, then
+  //   pull the resulting weights from the first frame through the whole clip
+  //   using again spatial and temporal smoothing.
+  //
+  //   Specifically, in the push phase a feature's irls weight is updated using
+  //   a weigted average (gaussian weights) of its neighboring features and any
+  //   pushed information from the next frame
+  //   (via TemporalIRLSSmoothing structure).
+  //   The updated weight is then pushed along the feature's flow to the
+  //   previous frame and spread into the corresponding TemporalIRLSSmoothing
+  //   fields of neighboring features in the previous frame.
+  //   Similar, the pull phase proceeds by updating a features weights using a
+  //   weighted average of its neighboring features and any information from the
+  //   previous frame, that is pulled along the features flow.
+  for (int chunk = 0; chunk < num_chunks; ++chunk) {
+    const int start_frame = chunk * temporal_length;
+    const int end_frame = std::min((chunk + 1) * temporal_length, num_frames);
+
+    ClearInternalIRLSStructure(&(*feature_views)[end_frame - 1]);
+
+    // Push pass.
+    for (int f = end_frame - 1; f >= start_frame; --f) {
+      RegionFlowFeatureView* curr_view = &(*feature_views)[f];
+      RegionFlowFeatureView* prev_view =
+          f > start_frame ? &(*feature_views)[f - 1] : nullptr;
+      const auto& curr_grid = feature_grid[f];
+      const auto* prev_grid = f > start_frame ? &feature_grid[f - 1] : nullptr;
+
+      // Evalutate temporal weight (pushed weights to this frames are weighted
+      // by temporal weight).
+      float temporal_weight = 0;
+      for (int e = 1; e < end_frame - f; ++e) {
+        temporal_weight += temporal_lut[e];
+      }
+      // Relative weighting to save multiplication of pushed information,
+      // i.e. weight 1.0 for current frame.
+      temporal_weight /= temporal_lut[0];
+
+      TemporalIRLSPush(
+          curr_grid, prev_grid,
+          options_.filter_5_taps() ? feature_taps_5 : feature_taps_3,
+          space_scale, space_lut, feature_scale, feature_lut, temporal_weight,
+          frame_confidence[f], grid_scale, grid_dim_x, curr_view, prev_view);
+    }
+
+    // Pull pass.
+    for (int f = start_frame + 1; f < end_frame; ++f) {
+      RegionFlowFeatureView* curr_view = &(*feature_views)[f];
+      RegionFlowFeatureView* prev_view = &(*feature_views)[f - 1];
+      const auto& curr_grid = feature_grid[f];
+      const auto& prev_grid = feature_grid[f - 1];
+
+      // Evalutate temporal weight.
+      float temporal_weight = 0;
+      for (int e = 1; e <= f - start_frame; ++e) {
+        temporal_weight += temporal_lut[e];
+      }
+      // Relative weighting to save multiplication of pushed information.
+      temporal_weight /= temporal_lut[0];
+
+      TemporalIRLSPull(
+          curr_grid, prev_grid,
+          options_.filter_5_taps() ? feature_taps_5 : feature_taps_3,
+          space_scale, space_lut, feature_scale, feature_lut, temporal_weight,
+          frame_confidence[f], grid_scale, grid_dim_x, curr_view, prev_view);
+    }
+  }
+}
+
+}  // namespace mediapipe
diff --git a/mediapipe/util/tracking/motion_estimation.h b/mediapipe/util/tracking/motion_estimation.h
new file mode 100644
index 000000000..4939a1917
--- /dev/null
+++ b/mediapipe/util/tracking/motion_estimation.h
@@ -0,0 +1,758 @@
+// Copyright 2019 The MediaPipe Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Fits several linear motion models to the tracked features obtained
+// from RegionFlowComputation.
+//
+// --- Multi-threaded usage (parallel motion estimation over frames) ---
+// assume input: vector<RegionFlowFeatureList*> feature_lists
+// // Can be obtained after getting RegionFlowFrame from RegionFlowComputation
+// // and executing GetRegionFlowFeatureList (region_flow.h)
+// MotionEstimation motion_estimation(MotionEstimationOptions(),
+//                                    frame_width,
+//                                    frame_height);
+// vector<CameraMotion> camera_motions;
+// motion_estimation.EstimateMotionsParallel(false,    // no IRLS smoothing.
+//                                           &feature_lists,
+//                                           &camera_motions);
+// // RegionFlowFeatureList can be discarded or passed to Cropper.
+//
+//
+// --- DEPRECATED, per-frame usage ---
+// assume input: RegionFlowFrame* flow_frame  // from RegionFlowComputation.
+//
+// // Initialize with standard options.
+// MotionEstimation motion_estimation(MotionEstimationOptions(),
+//                                    frame_width,
+//                                    frame_height);
+// CameraMotion estimated_motion;
+// motion_estimation.EstimateMotion(flow_frame,
+//                                  NULL,               // deprecated param.
+//                                  NULL,               // deprecated param.
+//                                  &estimation_motion);
+//
+// // If features are not needed anymore flow_frame can be be discarded now.
+//
+// // Pass motion models in estimated_motion onto MotionStabilization,
+// // if stabilization is desired.
+
+#ifndef MEDIAPIPE_UTIL_TRACKING_MOTION_ESTIMATION_H_
+#define MEDIAPIPE_UTIL_TRACKING_MOTION_ESTIMATION_H_
+
+#include <algorithm>
+#include <deque>
+#include <list>
+#include <memory>
+#include <unordered_map>
+#include <vector>
+
+#include "mediapipe/framework/port/integral_types.h"
+#include "mediapipe/framework/port/vector.h"
+#include "mediapipe/util/tracking/camera_motion.pb.h"
+#include "mediapipe/util/tracking/motion_estimation.pb.h"
+#include "mediapipe/util/tracking/motion_models.pb.h"
+#include "mediapipe/util/tracking/region_flow.h"
+
+namespace mediapipe {
+
+class Homography;
+class LinearSimilarityModel;
+class MixtureHomography;
+class MixtureRowWeights;
+class RegionFlowFeature;
+class RegionFlowFeatureList;
+class RegionFlowFrame;
+
+class EstimateMotionIRLSInvoker;
+class InlierMask;
+class IrlsInitializationInvoker;
+// Thread local storage for pre-allocated memory.
+class MotionEstimationThreadStorage;
+class TrackFilterInvoker;
+
+class MotionEstimation {
+ public:
+  MotionEstimation(const MotionEstimationOptions& options, int frame_width,
+                   int frame_height);
+  virtual ~MotionEstimation();
+  MotionEstimation(const MotionEstimation&) = delete;
+  MotionEstimation& operator=(const MotionEstimation&) = delete;
+
+  // Can be used to re-initialize options between EstimateMotion /
+  // EstimateMotionsParallel calls.
+  void InitializeWithOptions(const MotionEstimationOptions& options);
+
+  // Estimates motion models from RegionFlowFeatureLists based on
+  // MotionEstimationOptions, in a multithreaded manner (frame parallel).
+  // The computed IRLS weights used on the last iteration of the highest
+  // degree of freedom model are *written* to the irls_weight member for each
+  // RegionFlowFeature in RegionFlowFeatureList which can be a useful
+  // feature for later processing.
+  // In addition the returned irls weights can be smoothed spatially
+  // and temporally before they are output.
+  // Note: The actual vector feature_lists is not modified.
+  virtual void EstimateMotionsParallel(
+      bool post_irls_weight_smoothing,
+      std::vector<RegionFlowFeatureList*>* feature_lists,
+      std::vector<CameraMotion>* camera_motions) const;
+
+  // DEPRECATED function, estimating Camera motion from a single
+  // RegionFlowFrame.
+  virtual void EstimateMotion(const RegionFlowFrame& region_flow_frame,
+                              const int*,  // deprecated, must be NULL.
+                              const int*,  // deprecated, must be NULL.
+                              CameraMotion* camera_motion) const;
+
+  // Public facing API to directly estimate motion models (as opposed to
+  // a cascade of increasing degree of freedom motion models with appropiate
+  // stability analysis via above EstimateMotionsParallel).
+  // Use this if all you need is just the a specific motion
+  // model describing/summarizing the motion of the RegionFlowFeatureList.
+  // Returns false if model estimation failed (in this case an identity model
+  // is set in camera_motion).
+  // NOTE: All direct estimation functions assume normalized feature input,
+  // i.e. transformed via NormalizeRegionFlowFeatureList.
+  //
+  // NOTE 2: For easy direct use see Fit* functions below class.
+  bool EstimateTranslationModel(RegionFlowFeatureList* feature_list,
+                                CameraMotion* camera_motion);
+
+  bool EstimateLinearSimilarityModel(RegionFlowFeatureList* feature_list,
+                                     CameraMotion* camera_motion);
+
+  bool EstimateAffineModel(RegionFlowFeatureList* feature_list,
+                           CameraMotion* camera_motion);
+
+  bool EstimateHomography(RegionFlowFeatureList* feature_list,
+                          CameraMotion* camera_motion);
+
+  bool EstimateMixtureHomography(RegionFlowFeatureList* feature_list,
+                                 CameraMotion* camera_motion);
+
+  // Static function which sets motion models (requested in options) to identity
+  // models.
+  static void ResetMotionModels(const MotionEstimationOptions& options,
+                                CameraMotion* camera_motion);
+
+  // The following functions ResetTo* functions reset all models that are
+  // present in camera_motion (tested via has_*) to identity or
+  // the passed model (which is embedded in higher degree of freedom models
+  // if applicable). CameraMotion::Type is set in accordance to function name.
+  static void ResetToIdentity(CameraMotion* camera_motion,
+                              bool consider_valid = false);  // Set to true
+                                                             // for type VALID
+
+  // Resets every specified model to embedded translation model.
+  // CameraMotion type is set to UNSTABLE.
+  static void ResetToTranslation(const TranslationModel& translation,
+                                 CameraMotion* camera_motion);
+
+  // Resets every specified model with more or equal DOF than a similarity
+  // to the passed model.
+  // Camera Motion type is set to UNSTABLE_SIM.
+  static void ResetToSimilarity(const LinearSimilarityModel& model,
+                                CameraMotion* camera_motion);
+
+  // Resets every specified model with more or equal DOF than a homography
+  // to the passed model. If flag_as_unstable_model is set, camera motion type
+  // is set to UNSTABLE_HOMOG.
+  static void ResetToHomography(const Homography& model,
+                                bool flag_as_unstable_model,
+                                CameraMotion* camera_motion);
+
+ private:
+  // Simple enum indicating with motion model should be estimated, mapped from
+  // MotionEstimationOptions.
+  enum MotionType {
+    MODEL_AVERAGE_MAGNITUDE = 0,
+    MODEL_TRANSLATION = 1,
+    MODEL_LINEAR_SIMILARITY = 2,
+    MODEL_AFFINE = 3,
+    MODEL_HOMOGRAPHY = 4,
+    MODEL_MIXTURE_HOMOGRAPHY = 5,
+    // ... internal enum values used for mixture spectrum (up to 10 mixtures are
+    // supported). Do not use directly.
+
+    // Change value if new motions are added.
+    MODEL_NUM_VALUES = 16,
+  };
+
+  // Determines shot boundaries from estimated motion models and input
+  // feature_lists by setting the corresponding flag in CameraMotion.
+  // Make sure, this function is called after motion estimation.
+  void DetermineShotBoundaries(
+      const std::vector<RegionFlowFeatureList*>& feature_lists,
+      std::vector<CameraMotion>* camera_motions) const;
+
+  // Implementation function to estimate CameraMotion's from
+  // RegionFlowFeatureLists.
+  void EstimateMotionsParallelImpl(
+      bool irls_weights_preinitialized,
+      std::vector<RegionFlowFeatureList*>* feature_lists,
+      std::vector<CameraMotion>* camera_motions) const;
+
+  struct SingleTrackClipData;
+  struct EstimateModelOptions;
+
+  EstimateModelOptions DefaultModelOptions() const;
+
+  // Implementation function to estimate all motions for a specific type
+  // across multiple single tracks. Motions are only estimated for those
+  // CameraMotions with type less or equal to max_unstable_type. Flag
+  // irls_weights_preinitialized enables some optimizations in case it is
+  // set to false as features are not pre-initialized.
+  // Optionally pass thread_storage.
+  // Returns true if requested type was attempt to be estimated
+  // (based on options) , false otherwise.
+  bool EstimateMotionModels(
+      const MotionType& max_type, const CameraMotion::Type& max_unstable_type,
+      const EstimateModelOptions& options,
+      const MotionEstimationThreadStorage* thread_storage,  // optional.
+      std::vector<SingleTrackClipData>* clip_datas) const;
+
+  // Multiplies input irls_weights by an upweight multiplier for each feature
+  // that is part of a sufficiently large track (contribution of each track
+  // length is by track_length_multiplier, mapping each track length
+  // to an importance weight in [0, 1]).
+  void LongFeatureInitialization(
+      const RegionFlowFeatureList& feature_list,
+      const LongFeatureInfo& feature_info,
+      const std::vector<float>& track_length_importance,
+      std::vector<float>* irls_weights) const;
+
+  // Multiplies input irls_weights by normalization factor that downweights
+  // features is areas of high density.
+  void FeatureDensityNormalization(const RegionFlowFeatureList& feature_list,
+                                   std::vector<float>* irls_weights) const;
+
+  // Initializes irls weights (if
+  // MotionEstimationOptions::irls_initialization::activated is true),
+  // based on a multitude of options (RANSAC based  pre-fitting of motion
+  // models, homography initialization, etc.).
+  // Processes one frame or all (if frame = -1) within a clip_data,
+  void IrlsInitialization(const MotionType& type, int max_unstable_type,
+                          int frame,  // Specify -1 for all frames.
+                          const EstimateModelOptions& options,
+                          SingleTrackClipData* clip_data) const;
+
+  // Estimation functions for models, called via options by EstimateMotion and
+  // EstimateMotionsParallel.
+  // NOTE: All direct estimation functions assume normalized feature input,
+  // i.e. transformed via
+  // TransformRegionFlowFeatureList(normalization_transform, feature_list);
+  // where normalization_transform =
+  //     LinearSimilarityAdapter::NormalizationTransform(frame_width,
+  //                                                     frame_height);
+  //
+  // Direct estimation functions perform estimation via iterated reweighted
+  // least squares (IRLS). In this case specify number of iterations (10 is a
+  // good default), and optionally the PriorFeatureWeights for each iteration.
+  // The alphas specify, how much weight should be given to the
+  // prior weight that the feature has before optimization. An alpha of zero
+  // indicates, no prior weighting, whereas as an alpha of one corresponds to
+  // full prior weighting. The actual prior is stored in priors.
+  // Each iteration is reweighted by numerator / error, where error is the L2
+  // fitting error after estimation and
+  // numerator = (1.0 - alpha) * 1.0 + alpha * prior
+  struct PriorFeatureWeights {
+    explicit PriorFeatureWeights(int num_iterations)
+        : alphas(num_iterations, 0.0f) {}
+    PriorFeatureWeights(int num_iterations, int num_features)
+        : alphas(num_iterations, 0.0f), priors(num_features, 1.0f) {}
+
+    // Tests for correct dimensions of PriorFeatureWeights.
+    bool HasCorrectDimension(int num_iterations, int num_features) const {
+      return alphas.size() == num_iterations && priors.size() == num_features;
+    }
+
+    // Returns true if at least one alpha is non-zero.
+    bool HasNonZeroAlpha() const {
+      return !alphas.empty() &&
+             *std::max_element(alphas.begin(), alphas.end()) > 0;
+    }
+
+    // Returns true, if a prior was specified.
+    bool HasPrior() const { return !priors.empty(); }
+
+    std::vector<float> alphas;  // Alpha for each IRLS round.
+    std::vector<float> priors;  // Prior weight for each feature.
+
+    // If set, above alpha are not adjusted with iterations, but always set to
+    // 1.0, given full weight to the prior.
+    bool use_full_prior = false;
+  };
+
+  // In addition, each estimation function can compute its corresponding
+  // stability features and store it in CameraMotion. These features are needed
+  // to test via the IsStable* functions further below.
+
+  // Estimates 2 DOF translation model.
+  // Note: feature_list is assumed to be normalized/transformed by
+  // LinearSimilarity::NormalizationTransform N. Returned irls weights and
+  // linear similarity are expressed in original frame, i.e. for estimated model
+  // M, M' = N^(-1) M N is returned.
+  void EstimateTranslationModelIRLS(
+      int irls_rounds, bool compute_stability,
+      RegionFlowFeatureList* feature_list,
+      const PriorFeatureWeights* prior_weights,  // optional.
+      CameraMotion* camera_motion) const;
+
+  // Estimates linear similarity from feature_list using irls_rounds iterative
+  // reweighted least squares iterations. For L2 estimation, use irls_round = 1.
+  // The irls_weight member of each RegionFlowFeature in feature_list will be
+  // set to the inverse residual w.r.t. estimated LinearSimilarityModel.
+  // Note: feature_list is assumed to be normalized/transformed by
+  // LinearSimilarity::NormalizationTransform N. Returned irls weights and
+  // linear similarity are expressed in original frame, i.e. for estimated model
+  // M, M' = N^(-1) M N is returned.
+  // Returns true if estimation was successful, otherwise returns false and sets
+  // the CameraMotion::type to INVALID.
+  bool EstimateLinearSimilarityModelIRLS(
+      int irls_rounds, bool compute_stability,
+      RegionFlowFeatureList* feature_list,
+      const PriorFeatureWeights* prior_weights,  // optional.
+      CameraMotion* camera_motion) const;
+
+  // Same as above for affine motion.
+  // Note: feature_list is assumed to be normalized/transformed by
+  // LinearSimilarity::NormalizationTransform N. Returned irls weights and
+  // affine model are expressed in original frame, i.e. for estimated model
+  // M, M' = N^(-1) M N is returned.
+  bool EstimateAffineModelIRLS(int irls_rounds,
+                               RegionFlowFeatureList* feature_list,
+                               CameraMotion* camera_motion) const;
+
+  // Same as above for homography.
+  // Note: feature_list is assumed to be normalized/transformed by
+  // LinearSimilarity::NormalizationTransform N. Returned irls weights and
+  // homography are expressed in original frame, i.e. for estimated model
+  // M, M' = N^(-1) M N is returned.
+  // Returns true if estimation was successful, otherwise returns false and sets
+  // the CameraMotion::type to INVALID.
+  bool EstimateHomographyIRLS(
+      int irls_rounds, bool compute_stability,
+      const PriorFeatureWeights* prior_weights,       // optional.
+      MotionEstimationThreadStorage* thread_storage,  // optional.
+      RegionFlowFeatureList* feature_list, CameraMotion* camera_motion) const;
+
+  // Same as above for mixture homography.
+  // Note: feature_list is assumed to be normalized/transformed by
+  // LinearSimilarity::NormalizationTransform N. Returned irls weights and
+  // mixture homography are expressed in original frame, i.e. for estimated
+  // model M, M' = N^(-1) M N is returned.
+  // Mixture model estimation customized by MotionEstimationOptions.
+  // Returns true if estimation was successful, otherwise returns false and sets
+  // the CameraMotion::type to INVALID.
+  // Supports computation for mixture spectrum, i.e. mixtures with different
+  // regularizers. For default regularizer pass
+  // MotionEstimationOptions::mixture_regularizer. Estimated motion will be
+  // stored in CameraMotion::mixture_homography_spectrum(spectrum_idx).
+  bool EstimateMixtureHomographyIRLS(
+      int irls_rounds, bool compute_stability, float regularizer,
+      int spectrum_idx,                               // 0 by default.
+      const PriorFeatureWeights* prior_weights,       // optional.
+      MotionEstimationThreadStorage* thread_storage,  // optional.
+      RegionFlowFeatureList* feature_list, CameraMotion* camera_motion) const;
+
+  // Returns weighted variance for mean translation from feature_list (assumed
+  // to be in normalized coordinates). Returned variance is in unnormalized
+  // domain.
+  float TranslationVariance(const RegionFlowFeatureList& feature_list,
+                            const Vector2_f& translation) const;
+
+  // Replace each features irls weight by robust min-filtered irls weight
+  // across each track.
+  void MinFilterIrlsWeightByTrack(SingleTrackClipData* clip_data) const;
+
+  // Performs filtering of irls weight across several tracking clip datas,
+  // to yield consistent irls weights.
+  void EnforceTrackConsistency(
+      std::vector<SingleTrackClipData>* clip_datas) const;
+
+  // Initializes or modifies prior_weights for passed feature_list by
+  // biasing toward previous (filtered) IRLS weight for that feature.
+  // This enables temporal coherence.
+  void BiasLongFeatures(RegionFlowFeatureList* feature_list, MotionType type,
+                        const EstimateModelOptions& model_options,
+                        PriorFeatureWeights* prior_weights) const;
+
+  // Called by above function to determine the bias each feature is multiplied
+  // with.
+  void BiasFromFeatures(const RegionFlowFeatureList& feature_list,
+                        MotionType type,
+                        const EstimateModelOptions& model_options,
+                        std::vector<float>* bias) const;
+
+  // Maps track index to tuple of spatial bias and number of similar
+  // looking long tracks.
+  typedef std::unordered_map<int, std::pair<float, float>> SpatialBiasMap;
+  void ComputeSpatialBias(MotionType type,
+                          const EstimateModelOptions& model_options,
+                          RegionFlowFeatureList* feature_list,
+                          SpatialBiasMap* spatial_bias) const;
+
+  // Updates features weights in feature_list by temporally consistent bias.
+  void UpdateLongFeatureBias(MotionType type,
+                             const EstimateModelOptions& model_options,
+                             bool remove_terminated_tracks,
+                             bool update_irls_observation,
+                             RegionFlowFeatureList* feature_list) const;
+
+  // Bilateral filtering of irls weights across the passed list.
+  void SmoothIRLSWeights(std::deque<float>* irls) const;
+
+  // Helper function. Returns number of irls iterations for passed MotionType
+  // derived from current MotionEstimationOptions. Returns zero, if no
+  // estimation should be attempted.
+  int IRLSRoundsFromSettings(const MotionType& type) const;
+
+  // Partitions irls_rounds into several rounds with each having irls_per_round
+  // interations each based on MotionEstimationOptions::EstimationPolicy.
+  // Post-condition: total_rounds * irls_per_rounds == irls_rounds.
+  void PolicyToIRLSRounds(int irls_rounds, int* total_rounds,
+                          int* irls_per_round) const;
+
+  // Check for specified MotionType is estimated model is stable. If not, resets
+  // feature's irls weights to reset_irls_weights (optional) and resets motion
+  // model in camera_motion to lower degree of freedom model. In this case,
+  // CameraMotion::Type is flagged as UNSTABLE_* where * denotes the lower
+  // degree of freedom model.
+  // Model is only checked those CameraMotions with type less than or equal to
+  // max_unstable_type.
+  void CheckModelStability(
+      const MotionType& type, const CameraMotion::Type& max_unstable_type,
+      const std::vector<std::vector<float>>* reset_irls_weights,
+      std::vector<RegionFlowFeatureList*>* feature_lists,
+      std::vector<CameraMotion>* camera_motions) const;
+
+  // Implementation function called by above function, to check for a single
+  // model.
+  void CheckSingleModelStability(const MotionType& type,
+                                 const CameraMotion::Type& max_unstable_type,
+                                 const std::vector<float>* reset_irls_weights,
+                                 RegionFlowFeatureList* feature_list,
+                                 CameraMotion* camera_motion) const;
+
+  // Projects motion model specified by type to lower degree of freedom models.
+  void ProjectMotionsDown(const MotionType& type,
+                          std::vector<CameraMotion>* camera_motions) const;
+
+  // Filters passed feature_lists based on
+  // MotionEstimationOptions::irls_weight_filter.
+  void IRLSWeightFilter(
+      std::vector<RegionFlowFeatureList*>* feature_lists) const;
+
+  // Inlier scale based on average motion magnitude and the fraction
+  // of the magnitude that is still considered an inlier.
+  // In general a residual of 1 pixel is assigned an IRLS weight of 1,
+  // this function returns a residual scale, such that a residual
+  // of distance_fraction * translation_magnitude equals an IRLS weight of 1
+  // if multiplied by returned scale.
+  float GetIRLSResidualScale(const float avg_motion_magnitude,
+                             float distance_fraction) const;
+
+  const LinearSimilarityModel& InverseNormalizationTransform() const {
+    return inv_normalization_transform_;
+  }
+
+  const LinearSimilarityModel& NormalizationTransform() const {
+    return normalization_transform_;
+  }
+
+  // Returns domain normalized features fall in.
+  Vector2_f NormalizedDomain() const { return normalized_domain_; }
+
+  // Returns index within the inlier mask for each feature point.
+  // Also returns for each bin normalizer to account for different number of
+  // features per bin during weighting.
+  void ComputeFeatureMask(const RegionFlowFeatureList& feature_list,
+                          std::vector<int>* mask_indices,
+                          std::vector<float>* bin_normalizer) const;
+
+  // Runs multiple rounds of RANSAC, resetting outlier IRLS weight to
+  // a low score.
+  // Optionally can perform temporally consistent selection if inlier_mask is
+  // specified.
+  // Returns best model across all iterations in best_model and true if
+  // estimated model was deemed stable.
+  bool GetTranslationIrlsInitialization(
+      RegionFlowFeatureList* feature_list,
+      const EstimateModelOptions& model_options, float avg_camera_motion,
+      InlierMask* inlier_mask,  // optional.
+      TranslationModel* best_model) const;
+
+  // Same as above for linear similarities.
+  bool GetSimilarityIrlsInitialization(
+      RegionFlowFeatureList* feature_list,
+      const EstimateModelOptions& model_options, float avg_camera_motion,
+      InlierMask* inlier_mask,  // optional.
+      LinearSimilarityModel* best_model) const;
+
+  // Computes number of inliers and strict inliers (satisfying much stricter
+  // threshold) for a given feature list after model fitting.
+  void ComputeSimilarityInliers(const RegionFlowFeatureList& feature_list,
+                                int* num_inliers,
+                                int* num_strict_inliers) const;
+
+  // Initializes irls weights based on setting
+  // MotionEstimationOptions::homography_irls_weight_initialization.
+  void GetHomographyIRLSCenterWeights(const RegionFlowFeatureList& feature_list,
+                                      std::vector<float>* center_weights) const;
+
+  // Checks for unreasonable large accelerationas between frames as specified by
+  // MotionEstimationOptions::StableTranslationBounds.
+  void CheckTranslationAcceleration(
+      std::vector<CameraMotion>* camera_motions) const;
+
+  // Functions below, test passed model is deemed stable according to
+  // several heuristics set by Stable[MODEL]Bounds in MotionEstimationOptions.
+  bool IsStableTranslation(const TranslationModel& normalized_translation,
+                           float translation_variance,
+                           const RegionFlowFeatureList& features) const;
+
+  // Tests if passed similarity is stable. Pass number of inliers from
+  // ComputeSimilarityInliers.
+  bool IsStableSimilarity(const LinearSimilarityModel& model,
+                          const RegionFlowFeatureList& features,
+                          int num_inliers) const;
+
+  bool IsStableHomography(const Homography& homography,
+                          float average_homography_error,
+                          float inlier_coverage) const;
+
+  bool IsStableMixtureHomography(
+      const MixtureHomography& homography, float min_block_inlier_coverage,
+      const std::vector<float>& block_inlier_coverage) const;
+
+  // Computes fraction (in [0, 1]) of inliers w.r.t. frame area using a grid of
+  // occupancy cells. A feature is consider an inlier if its irls_weight is
+  // larger or equal to min_inlier_score.
+  float GridCoverage(const RegionFlowFeatureList& feature_list,
+                     float min_inlier_score,
+                     MotionEstimationThreadStorage* thread_storage) const;
+
+  // Estimates per scanline-block coverage of mixture. If
+  // assume_rolling_shutter_camera is set, low textured features are allowed to
+  // have higher error as registration errors would not be as visible here.
+  void ComputeMixtureCoverage(const RegionFlowFeatureList& feature_list,
+                              float min_inlier_score,
+                              bool assume_rolling_shutter_camera,
+                              MotionEstimationThreadStorage* thread_storage,
+                              CameraMotion* camera_motion) const;
+
+  // Returns average motion magnitude as mean of the translation magnitude from
+  // the 10th to 90th percentile.
+  void EstimateAverageMotionMagnitude(const RegionFlowFeatureList& feature_list,
+                                      CameraMotion* camera_motion) const;
+
+  // Returns per iteration weight of the feature's irls weight initialization.
+  float IRLSPriorWeight(int iteration, int irls_rounds) const;
+
+  // Implementation function for above function. Estimates mixture homography
+  // from features and returns true if estimation was non-degenerate.
+  bool MixtureHomographyFromFeature(
+      const TranslationModel& translation, int irls_rounds, float regularizer,
+      const PriorFeatureWeights* prior_weights,  // optional.
+      RegionFlowFeatureList* feature_list,
+      MixtureHomography* mix_homography) const;
+
+  // Determines overlay indices (spatial bin locations that are likely to be
+  // affected by overlays) and stores them in corresponding member in
+  // CameraMotion. Features that fall within these bins will be assigned a
+  // weight of zero.
+  void DetermineOverlayIndices(
+      bool irls_weights_preinitialized,
+      std::vector<CameraMotion>* camera_motions,
+      std::vector<RegionFlowFeatureList*>* feature_lists) const;
+
+  // Determine features likely to be part of a static overlay, by setting their
+  // irls weight to zero.
+  // Returns fraction of the image domain that is considered to be occupied by
+  // overlays and specific overlay cell indices in overlay_indices.
+  float OverlayAnalysis(const std::vector<TranslationModel>& translations,
+                        std::vector<RegionFlowFeatureList*>* feature_lists,
+                        std::vector<int>* overlay_indices) const;
+
+  // Smooths feature's irls_weights spatio-temporally.
+  void PostIRLSSmoothing(
+      const std::vector<CameraMotion>& camera_motions,
+      std::vector<RegionFlowFeatureList*>* feature_lists) const;
+
+  // Initializes LUT for gaussian weighting. By default discretizes the domain
+  // [0, max_range] into 4K bins, returning scale to map from a value in the
+  // domain to the corresponding bin. If scale is nullptr max_range bins are
+  // created instead (in this case scale would be 1.0, i.e. value equals bin
+  // index).
+  void InitGaussLUT(float sigma, float max_range, std::vector<float>* lut,
+                    float* scale) const;
+
+  // Performs fast volumetric smoothing / filtering of irls weights. Weights are
+  // expected to be already binned using BuildFeatureGrid.
+  void RunTemporalIRLSSmoothing(
+      const std::vector<FeatureGrid<RegionFlowFeature>>& feature_grid,
+      const std::vector<std::vector<int>>& feature_taps_3,
+      const std::vector<std::vector<int>>& feature_taps_5,
+      const std::vector<float>& frame_confidence,
+      std::vector<RegionFlowFeatureView>* feature_views) const;
+
+ private:
+  MotionEstimationOptions options_;
+  int frame_width_;
+  int frame_height_;
+
+  LinearSimilarityModel normalization_transform_;
+  LinearSimilarityModel inv_normalization_transform_;
+
+  // Transform from normalized features to irls domain.
+  LinearSimilarityModel irls_transform_;
+
+  // Frame dimensions transformed by normalization transform.
+  Vector2_f normalized_domain_;
+  std::unique_ptr<MixtureRowWeights> row_weights_;
+
+  // For initialization biased towards previous frame.
+  std::unique_ptr<InlierMask> inlier_mask_;
+
+  // Stores current bias for each track and the last K irls observations.
+  struct LongFeatureBias {
+    explicit LongFeatureBias(float initial_weight) : bias(initial_weight) {
+      irls_values.push_back(1.0f / initial_weight);
+    }
+
+    LongFeatureBias() : LongFeatureBias(1.0f) {}
+
+    float bias = 1.0f;               // Current bias, stores pixel error,
+                                     // i.e. 1 / IRLS.
+    std::vector<float> irls_values;  // Recently observed IRLS values;
+                                     // Ring buffer.
+    int total_observations = 1;
+  };
+
+  // Maps track id to LongFeatureBias.
+  typedef std::unordered_map<int, LongFeatureBias> LongFeatureBiasMap;
+
+  // Bias map indexed by MotionType.
+  mutable std::vector<LongFeatureBiasMap> long_feature_bias_maps_ =
+      std::vector<LongFeatureBiasMap>(static_cast<int>(MODEL_NUM_VALUES));
+
+  // Lookup tables and scale for FeatureBias computation.
+  struct FeatureBiasLUT {
+    // For ComputeSpatialBias weighting.
+    std::vector<float> spatial_lut;
+    float spatial_scale;
+    std::vector<float> color_lut;
+    float color_scale;
+
+    // For BiasFromFeature computation.
+    std::vector<float> bias_weight_lut;
+    float bias_weight_scale;
+  };
+
+  FeatureBiasLUT feature_bias_lut_;
+
+  // Counts the number of consecutive duplicate frames for each motion model.
+  mutable std::vector<int> num_duplicate_frames_ =
+      std::vector<int>(static_cast<int>(MODEL_NUM_VALUES));
+
+  friend class EstimateMotionIRLSInvoker;
+  friend class IrlsInitializationInvoker;
+  friend class TrackFilterInvoker;
+  friend class MotionEstimationThreadStorage;
+};
+
+// Meta-data set in the header of filter streams to communicate information used
+// during camera motion estimation.
+struct CameraMotionStreamHeader {
+  CameraMotionStreamHeader() : frame_width(0), frame_height(0) {}
+  int32 frame_width;
+  int32 frame_height;
+};
+
+// Direct fitting functions.
+TranslationModel FitTranslationModel(const RegionFlowFeatureList& features);
+
+LinearSimilarityModel FitLinearSimilarityModel(
+    const RegionFlowFeatureList& features);
+
+AffineModel FitAffineModel(const RegionFlowFeatureList& features);
+
+Homography FitHomography(const RegionFlowFeatureList& features);
+
+MixtureHomography FitMixtureHomography(const RegionFlowFeatureList& features);
+
+// Templated fitting functions.
+template <class Model>
+Model FitModel(const RegionFlowFeatureList& features);
+
+template <>
+inline TranslationModel FitModel(const RegionFlowFeatureList& features) {
+  return FitTranslationModel(features);
+}
+
+template <>
+inline LinearSimilarityModel FitModel(const RegionFlowFeatureList& features) {
+  return FitLinearSimilarityModel(features);
+}
+
+template <>
+inline AffineModel FitModel(const RegionFlowFeatureList& features) {
+  return FitAffineModel(features);
+}
+
+template <>
+inline Homography FitModel(const RegionFlowFeatureList& features) {
+  return FitHomography(features);
+}
+
+template <>
+inline MixtureHomography FitModel(const RegionFlowFeatureList& features) {
+  return FitMixtureHomography(features);
+}
+
+// Generic projection function that projects models in an arbitrary direction
+// (that is from lower to higher or vice versa) via fast model fits, without
+// any error bound checking.
+// MixtureRowWeights are only necessary for ToModel == MixtureHomography.
+template <class ToModel, class FromModel>
+ToModel ProjectViaFit(const FromModel& model, int frame_width, int frame_height,
+                      MixtureRowWeights* row_weights = nullptr,
+                      int grid_dim = 10) {
+  // Build a grid of features.
+  const float dx = frame_width * 1.0f / grid_dim;
+  const float dy = frame_height * 1.0f / grid_dim;
+
+  // Create region flow from grid.
+  RegionFlowFeatureList grid_features;
+  grid_features.set_frame_width(frame_width);
+  grid_features.set_frame_height(frame_height);
+  grid_features.set_match_frame(-1);
+
+  for (int k = 0; k <= grid_dim; ++k) {
+    for (int l = 0; l <= grid_dim; ++l) {
+      auto* feat = grid_features.add_feature();
+      feat->set_x(l * dx);
+      feat->set_y(k * dy);
+    }
+  }
+
+  RegionFlowFeatureListViaTransform(model, &grid_features, 1.0f,
+                                    0.0f,   // Replace flow.
+                                    false,  // Don't change feature loc.
+                                    row_weights);
+  return FitModel<ToModel>(grid_features);
+}
+
+}  // namespace mediapipe
+
+#endif  // MEDIAPIPE_UTIL_TRACKING_MOTION_ESTIMATION_H_
diff --git a/mediapipe/util/tracking/motion_estimation.proto b/mediapipe/util/tracking/motion_estimation.proto
new file mode 100644
index 000000000..79d60c64f
--- /dev/null
+++ b/mediapipe/util/tracking/motion_estimation.proto
@@ -0,0 +1,670 @@
+// Copyright 2019 The MediaPipe Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Settings for MotionEstimation.
+syntax = "proto2";
+
+package mediapipe;
+
+// Note: In general for Estimation modes, the prefix are used as follows:
+// L2:        minimize squared norm of error
+// IRLS:      iterative reweighted least square, L2 minimization using multiple
+//            iterations, downweighting outliers.
+// Next tag: 69
+message MotionEstimationOptions {
+  // Specifies which camera models should be estimated, translation is always
+  // estimated.
+  optional bool estimate_translation_irls = 1 [default = true];
+
+  enum LinearSimilarityEstimation {
+    ESTIMATION_LS_NONE = 0;
+    ESTIMATION_LS_L2 = 1;    // L2 estimation
+    ESTIMATION_LS_IRLS = 4;  // good performance, robust to outliers.
+
+    // DEPRECATED modes.
+    ESTIMATION_LS_L2_RANSAC = 2;  // DEPRECATED, use IRLS instead.
+    ESTIMATION_LS_L1 = 3;         // DEPRECATED, use IRLS instead, or static
+                                  // function MotionEstimation::
+                                  // EstimateLinearSimilarityL1
+  }
+
+  optional LinearSimilarityEstimation linear_similarity_estimation = 3
+      [default = ESTIMATION_LS_IRLS];
+
+  enum AffineEstimation {
+    ESTIMATION_AFFINE_NONE = 0;
+    ESTIMATION_AFFINE_L2 = 1;
+    ESTIMATION_AFFINE_IRLS = 2;
+  }
+
+  optional AffineEstimation affine_estimation = 30
+      [default = ESTIMATION_AFFINE_NONE];
+
+  enum HomographyEstimation {
+    ESTIMATION_HOMOG_NONE = 0;
+    ESTIMATION_HOMOG_L2 = 1;
+    ESTIMATION_HOMOG_IRLS = 2;
+  }
+
+  optional HomographyEstimation homography_estimation = 5
+      [default = ESTIMATION_HOMOG_IRLS];
+
+  // By default, homography estimation minimizes an objective that is not
+  // strictly the L2 distance between matched points. If the flag is set, each
+  // row of the linear system is scaled with the exact denominator which results
+  // in an objective that minimizes the L2 distance.
+  optional bool homography_exact_denominator_scaling = 53 [default = false];
+
+  // Per default, we use exact solver for over-determined system using
+  // well-conditioned QR decomposition. For better speed, set value to false to
+  // use estimation via normal equations.
+  optional bool use_exact_homography_estimation = 54 [default = true];
+
+  // If set uses double instead of float when computing normal equations.
+  optional bool use_highest_accuracy_for_normal_equations = 55 [default = true];
+
+  // Regularizer for perspective part of the homography. If zero, no
+  // regularization is performed. Should be >= 0.
+  optional float homography_perspective_regularizer = 61 [default = 0];
+
+  // Note: Mixture models have high DOF are much more affected by outliers
+  // than models above. It is recommended that if IRLS estimation is NOT used,
+  // that mixture_regularizer is increased by a factor >=3.
+  enum MixtureHomographyEstimation {
+    ESTIMATION_HOMOG_MIX_NONE = 0;
+    ESTIMATION_HOMOG_MIX_L2 = 1;
+    ESTIMATION_HOMOG_MIX_IRLS = 2;  // robust to outliers.
+  }
+
+  optional MixtureHomographyEstimation mix_homography_estimation = 12
+      [default = ESTIMATION_HOMOG_MIX_NONE];
+
+  // If row-wise mixture models are estimated, determines number of them.
+  // Note, changing number of mixtures, interpolation sigma and regularizer
+  // is very likely to impact the stability analysis for mixtures and rolling
+  // shutter scoring. At least MixtureHomographyBounds would need to be adjusted
+  // to the new values.
+  optional int32 num_mixtures = 13 [default = 10];
+
+  // If row-wise mixture models are estimated, determines how much each point is
+  // influenced by its neigbhoring mixtures. Specified as relative sigma
+  // (standard deviation) w.r.t. frame_height.
+  optional float mixture_row_sigma = 14 [default = 0.1];
+
+  // Mixture estimation uses L2 regularizer to assure that adjacent mixture
+  // models are similar.
+  optional float mixture_regularizer = 15 [default = 1e-4];
+
+  // Mixtures are estimated across a spectrum of exponentially increasingly
+  // regularizers. In particular the regularizer at level L is given as
+  // mixture_regularizer * mixture_regularizer_base^L.
+  // A maximum of 10 levels are supported (checked!).
+  // Note: When changing the number of levels you probably want to adapt the
+  //       MotionStabilizationOptions::rolling_shutter_increment value as well,
+  //       as the number of levels directly controls the highest threshold for
+  //       the rolling shutter index analysis.
+  optional float mixture_regularizer_levels = 42 [default = 3];
+  optional float mixture_regularizer_base = 43 [default = 2.2];
+  optional int32 mixture_rs_analysis_level = 44 [default = 2];
+
+  // IRLS rounds to down-weight outliers (default across all models).
+  // Note: IRLS in combination with full mixture models (as opposed to the
+  // default reduced ones) is somewhat expensive.
+  optional int32 irls_rounds = 17 [default = 10];
+
+  // If set to > 0 (always needs be less than 1.0), influence of supplied prior
+  // irls weights is linearlly decreased from the specified prior scale (weight
+  // 1.0) to prior_scale. Effectively, biases the solution to the
+  // supplied prior features.
+  // Note: Without irls_weights_preinitialized set to true, this option is
+  // effectively a no op.
+  // TODO: Retire this option.
+  optional float irls_prior_scale = 50 [default = 0.2];
+
+  // Determine how to normalize irls weights w.r.t. average motion magnitude.
+  // In general a residual of 1 pixel is assigned an IRLS weight of 1.
+  // However as larger motions in general are affected by a larger error, we
+  // normalize irls weights, such that a residual
+  // of distance of irls_motion_magnitude_fraction times
+  // <average translation magnitude> equals an IRLS weight of 1.
+  // Must be larger than zero.
+  optional float irls_motion_magnitude_fraction = 31 [default = 0.08];
+
+  // Scale that is applied for mixture (where error is expected to be bigger).
+  optional float irls_mixture_fraction_scale = 68 [default = 1.5];
+
+  // By default, irls weight of all features are set uniformly to one before
+  // estimating EACH model, refining them in subsequent irls iterations.
+  // If flag below is set, input irls weights are used instead for each motion
+  // model.
+  optional bool irls_weights_preinitialized = 39 [default = false];
+
+  // If weights are pre-initialized optionally min filter weights along track
+  // ids when long tracks are used. This can be used to consistently label
+  // outliers in time before estimation.
+  optional bool filter_initialized_irls_weights = 67 [default = false];
+
+  // If activated, irls weight of outlier features are reset. Outliers are
+  // defined as those features, for which the best model fit after #rounds
+  // iterations of RANSAC did NOT yield an error lower than cutoff.
+  // Only applies to translation and similarity estimation.
+  message IrlsOutlierInitialization {
+    optional bool activated = 1 [default = false];
+
+    optional int32 rounds = 2 [default = 100];
+    optional float cutoff = 3 [default = 0.003];
+  }
+  optional IrlsOutlierInitialization irls_initialization = 56;
+
+  // Normalizes feature's irls weights prior to estimation such that
+  // feature in high density areas are downweighted. Multiplicative in case
+  // irls_weights_preinitialized is set to true.
+  optional bool feature_density_normalization = 62 [default = false];
+
+  // A regular grid of size feature_mask_size x feature_mask_size
+  // is used to normalize features w.r.t. their density.
+  optional int32 feature_mask_size = 63 [default = 10];
+
+  // In addition to above outlier and density initialization, long features
+  // that are present for a specified ratio of the analysis interval can be
+  // upweighted. This greatly improves temporal consistency.
+  message LongFeatureInitialization {
+    optional bool activated = 1 [default = false];
+
+    // Tracks with a length greater of equal to the specified percentile
+    // are upweighted by the specified upweight_multiplier.
+    optional float min_length_percentile = 2 [default = 0.95];
+
+    // Features passing above test have their irls weight increased by the
+    // specified multiplier prior to estimation.
+    optional float upweight_multiplier = 3 [default = 5];
+  }
+
+  optional LongFeatureInitialization long_feature_initialization = 66;
+
+  // Irls initialization can be performed in a temporal depdent manner,
+  // (if estimation_policy() == TEMPORALLY_DEPENDENT), where
+  // the previous frame's motion estimation biases the IrlsInitialization of the
+  // currently processed frame. In particular the location and magnitude of
+  // inliers is used during the RANSAC selection stage, to favor those features
+  // that agree with the prior, represented as confidence mask of inliers
+  // (using same dimension as above feature_mask_size).
+  // After estimation, the prior is updated.
+  message IrlsMaskOptions {
+    // Amount prior is decayed after each iteration.
+    optional float decay = 2 [default = 0.7];
+
+    // Score that each inlier adds to the current prior. Specified w.r.t. total
+    // number of features, i.e. each feature increases a bins score by
+    // inlier_score.
+    optional float inlier_score = 3 [default = 0.4];
+
+    // Each inlier scores at least this value regardless of the inlier mask
+    // (additive).
+    optional float base_score = 4 [default = 0.2];
+
+    // Motions are scored relative to previous motion. Threshold denotes
+    // absolute minimum of denominator.
+    optional float min_translation_norm = 5 [default = 2e-3];
+
+    // Translation is updated in every step by blending it with the previous
+    // estimated translation. (alpha is within 0 to 1, where 0 indicates to use
+    // only measured translation, i.e. no blending).
+    optional float translation_blend_alpha = 6 [default = 0.7];
+
+    // Every time translation is updated, prior (in [0, 1]) is increased by the
+    // specified amount.
+    optional float translation_prior_increase = 7 [default = 0.2];
+
+    // Deprecated fields.
+    extensions 1;
+  }
+
+  optional IrlsMaskOptions irls_mask_options = 57;
+
+  // Describes how long feature tracks are leveraged for joint estimation across
+  // many frames.
+  message JointTrackEstimationOptions {
+    // For each frame-pair motion model, describing the motion between frame
+    // I and I - 1, estimate in addition several additional motion
+    // models along long feature tracks describing the motion between frame
+    // I and I - k * motion_stride (additional models are not output,
+    // but help to filter irls weights).
+    // Specifies total number of estimated motion models per frame-pair. Must be
+    // greater than zero.
+    optional int32 num_motion_models = 1 [default = 3];
+
+    // Spacing in frames for additional motion models.
+    optional int32 motion_stride = 2 [default = 15];
+
+    // If set, performs temporal smoothing across frames of the obtained irls
+    // weights.
+    optional bool temporal_smoothing = 3 [default = false];
+
+    // TODO: Specify which filter is used during temporal smoothing.
+    // TODO: Limit joint estimation to static scenes
+    //                 (dont violate rigidity of alignment in case of wide
+    //                  baselines). Adopt stride and num_motion_models
+    //                  accordingly.
+  }
+
+  optional JointTrackEstimationOptions joint_track_estimation = 59;
+
+  // Options being used to bias IRLS features if estimation mode
+  // TEMPORAL_LONG_FEATURE_BIAS is being used.
+  // Next Tag: 15
+  message LongFeatureBiasOptions {
+    // Estimation is performed multiple times, alternating between model
+    // estimation and smooth temporal feature biasing for the specified number
+    // of rounds.
+    optional int32 total_rounds = 13 [default = 1];
+
+    // Controls how fast the bias for a track gets updated, in case feature is
+    // an inlier. Use higher values for less decay of background motion over
+    // time.
+    optional float inlier_bias = 1 [default = 0.98];
+
+    // Same as above for outliers (or features with low prior), i.e those that
+    // got recently seeded.
+    optional float outlier_bias = 2 [default = 0.7];
+
+    // Number of elements after which we deem estimation to be stable.
+    // Used to control weight of bias if fewer than the specified number have
+    // been observed. Also used as maximum ring buffer size (only most recent
+    // number of observations are kept). Must be > 0.
+    optional int32 num_irls_observations = 3 [default = 10];
+
+    // Change in irls weight magnitude (from outlier to inlier) above which we
+    // reset the current bias.
+    optional float max_irls_change_ratio = 4 [default = 10.0];
+
+    // Irls weight above which we consider it to be an inlier for bias update
+    // purposes (see above inlier and outlier bias). By default, outliers are
+    // allowed to update their bias faster than inliers. Must be > 0.
+    optional float inlier_irls_weight = 5 [default = 0.2];
+
+    // Standard deviation used during feature initialization. Current bias of a
+    // track is used to pre-weight features via gaussian weighting with
+    // specified standard deviation.
+    optional float bias_stdev = 12 [default = 1.0];
+
+    // When seeding new tracks (on the first frame), we bilaterally pool
+    // neighboring feature biases as seed. Details are controlled by options
+    // below. If false, the feature's estimation error is used instead
+    // (faster, but less spatially smooth).
+    // If activated it is advised to use a patch descriptor radius of at least
+    // 20 pixels.
+    optional bool use_spatial_bias = 6 [default = true];
+
+    // Newly observered tracks's biases are seeded by similar looking features
+    // in close spatial proximity. For efficieny a grid is used to determine
+    // proximity.
+    // Grid size in normalized coordinates w.r.t. frame domain.
+    optional float grid_size = 7 [default = 0.04];
+
+    // Sigma's for combining feature biases.
+    optional float spatial_sigma = 8 [default = 0.02];
+    optional float color_sigma = 9 [default = 20.0];
+
+    // Defines what we consider to be a long track. Features spawned around
+    // locations of similar looking long tracks are considered to have
+    // high prior, e.g. their initilization is given more weight.
+    optional int32 long_track_threshold = 10 [default = 30];
+
+    // Determines with fraction of long tracks is considered to be sufficient
+    // for highly confident bias seed.
+    optional float long_track_confidence_fraction = 11 [default = 0.25];
+
+    // If activated, uses the irls weights from the estimation of the lower
+    // degree of freedom model to seed the bias of the higher degree of freedom
+    // model. This improves rigidity of the computed motion.
+    optional bool seed_priors_from_bias = 14 [default = false];
+  }
+
+  optional LongFeatureBiasOptions long_feature_bias_options = 64;
+
+  // Controls how multiple models via EstimateMotionsParallel are estimated.
+  enum EstimationPolicy {
+    INDEPENDENT_PARALLEL = 1;  // Models are estimated independently across
+                               // frames in parallel.
+    TEMPORAL_IRLS_MASK = 2;    // Previous frame's estimation biases
+                               // current one, controlled via above
+                               // IrlsMaskOptions.
+    TEMPORAL_LONG_FEATURE_BIAS = 4;  // Frame's estimation is biased along
+                                     // long features, controlled via above
+                                     // LongFeatureBiasOptions.
+    JOINTLY_FROM_TRACKS = 3;         // Estimation is performed jointly over
+                              // chunks of frames, exercising a consistent
+                              // irls weight PER track. Expects as input
+                              // RegionFlowFeatureList's with long_tracks.
+                              // Controlled via above
+                              // JointTrackEstimationOptions.
+  }
+  optional EstimationPolicy estimation_policy = 58
+      [default = INDEPENDENT_PARALLEL];
+
+  optional int32 coverage_grid_size = 51 [default = 10];
+
+  // Degree of freedom of estimated homography mixtures. If desired, specific
+  // parts of the homography can be held constant across the mixture.
+  // For fast draft TRANSLATION_MIXTURE is recommended, for high quality
+  // SKEW_ROTATION_MIXTURE.
+  enum MixtureModelMode {
+    FULL_MIXTURE = 0;           // 8 dof * num_mixtures
+    TRANSLATION_MIXTURE = 1;    // 6 dof + 2 dof * num_mixtures
+    SKEW_ROTATION_MIXTURE = 2;  // 4 dof + 4 dof * num_mixtures
+  }
+
+  optional MixtureModelMode mixture_model_mode = 23
+      [default = SKEW_ROTATION_MIXTURE];
+
+  // If specified, only features that agree with the estimated linear similarity
+  // will be used to estimate the homography.
+  // If set, linear_similarity_estimation can not be ESTIMATION_NONE! (checked)
+  optional bool use_only_lin_sim_inliers_for_homography = 6 [default = true];
+
+  // Max. deviation to be considered an inlier w.r.t. estimated similarity for
+  // above flag. This value is set w.r.t. normalized frame diameter.
+  // TODO: Should take GetIRLSResidualScale into account.
+  optional float lin_sim_inlier_threshold = 20 [default = 0.003];
+
+  // If any parameter of the input flow or estimated translation exceeds these
+  // thresholds we deem the motion INVALID.
+  message TranslationBounds {
+    // Absolute minimum of features present.
+    optional int32 min_features = 1 [default = 3];
+
+    // Max magnitude of the translation expressed w.r.t. frame diameter
+    optional float frac_max_motion_magnitude = 2 [default = 0.15];
+
+    // Motion magnitude is only tested for if standard deviation of estimated
+    // translation exceeds threshold.
+    optional float max_motion_stdev_threshold = 4 [default = 0.01];
+
+    // Max standard deviation of the estimated translation (normalized to frame
+    // diameter).
+    optional float max_motion_stdev = 3 [default = 0.065];
+
+    // Maximum acceleration between frames. Specified relative to minimum
+    // velocity across two adjacent frames (absolute minimum of 0.001 is
+    // enforced, ~1 pix for 480p).
+    // If exceeded for one frame, the whole batch passed to
+    // EstimateMotionsParallel is labeled unstable.
+    optional float max_acceleration = 5 [default = 20.0];
+  }
+
+  optional TranslationBounds stable_translation_bounds = 32;
+
+  // If any test/bound is violated, the motion is deemed UNSTABLE.
+  message SimilarityBounds {
+    // Input frame has to be labeled stable, i.e. enough features and coverage
+    // present.
+    optional bool only_stable_input = 1 [default = true];
+
+    // Minimum number of inlier features (absolute and as fraction of total
+    // number of features).
+    // TODO: Dataset run setting this to 0.15
+    optional float min_inlier_fraction = 2 [default = 0.2];
+    optional float min_inliers = 3 [default = 30];
+
+    // Bounds on valid similarities. We use larger values compared to
+    // homographies. Note: Bounds are necessary,  to guarantee invertability
+    // of the resulting similarity.
+    optional float lower_scale = 4 [default = 0.8];
+    optional float upper_scale = 5 [default = 1.25];     // 1 / 0.8.
+    optional float limit_rotation = 6 [default = 0.25];  // 15 degrees.
+
+    // Thresholds for a feature to be considered inlier w.r.t similarity
+    // transform, expressed in terms of pixel residual error. Max of absolute
+    // and fractional thresholds is used.
+    // Ratio of inliers that pass regular and strict thresholds are storred in
+    // CameraMotion.
+    //
+    // TODO: Just use lin_sim_inlier_threshold directly, however that
+    // recomputes the error, and requires regression testing. Using an extra
+    // fractional inlier threshold for now.
+    //
+    // Absolute in pixels.
+    optional float inlier_threshold = 7 [default = 4.0];
+    // Scaled by frame diameter.
+    optional float frac_inlier_threshold = 8 [default = 0];
+
+    // TODO: Revisit after frame selection change.
+    // Absolute in pixels.
+    optional float strict_inlier_threshold = 9 [default = 0.5];
+  }
+
+  optional SimilarityBounds stable_similarity_bounds = 33;
+
+  // If any parameter of the estimated homography exceeds these bounds,
+  // we deem it UNSTABLE_SIM and use estimated similarity instead.
+  message HomographyBounds {
+    optional float lower_scale = 1 [default = 0.8];
+    optional float upper_scale = 2 [default = 1.25];     // 1 / 0.8.
+    optional float limit_rotation = 3 [default = 0.25];  // 15 degrees.
+    optional float limit_perspective = 4 [default = 0.0004];
+
+    // Inlier coverage is only tested for if average homography error exceeds
+    // registration_thresholds. Max of the following two thresholds is used.
+    //
+    // Absolute in pixels.
+    optional float registration_threshold = 5 [default = 0.1];
+    // Scaled by frame diameter.
+    optional float frac_registration_threshold = 8 [default = 0];
+
+    // Minimum fraction of inlier features w.r.t. frame area.
+    optional float min_inlier_coverage = 6 [default = 0.3];
+
+    // Grid coverage inlier threshold. Pixel errors below this
+    // threshold are considered inliers. Defined w.r.t. frame diameter, approx.
+    // 1.5 for 16:9 SD video (480p), i.e. threshold is multiplied by frame
+    // diameter.
+    optional float frac_inlier_threshold = 7 [default = 2.0e-3];
+  }
+
+  optional HomographyBounds stable_homography_bounds = 11;
+
+  // If any parameter of the estimated homography mixture exceeds these bounds,
+  // we deem it UNSTABLE_HOMOG and use the estimated homography instead.
+  message MixtureHomographyBounds {
+    // Minimum fraction of inlier features w.r.t. block area.
+    optional float min_inlier_coverage = 1 [default = 0.4];
+
+    // Each block is tested to be stable, regarding the outliers.
+    // A frame is labeled unstable, if more or equal than the specified adjacent
+    // blocks are labeled outliers.
+    optional int32 max_adjacent_outlier_blocks = 2 [default = 5];
+
+    // Maximum number of adjacent empty blocks (no inliers).
+    optional int32 max_adjacent_empty_blocks = 3 [default = 3];
+
+    // Grid coverage threshold inlier threshold. See identical parameter in
+    // HomographyBounds.
+    optional float frac_inlier_threshold = 7 [default = 2.5e-3];
+  }
+
+  optional MixtureHomographyBounds stable_mixture_homography_bounds = 34;
+
+  // Scale for stricter coverage evaluation. Used for rolling shutter guess
+  // computation, by only using high quality inliers. Larger values reflect
+  // stricter coverage.
+  // Specifically, when computing coverage via GridCoverage call,
+  // frac_inlier_threshold is reduced (divided) by specified scale below.
+  optional float strict_coverage_scale = 41 [default = 1.333];
+
+  // By default frames with zero trackable features (e.g. at the beginning,
+  // empty frame or shot boundary) are set identity model but still labeled as
+  // valid. If set to false, these frames are flagged as invalid, which can be
+  // useful to locate shot boundaries, etc.
+  optional bool label_empty_frames_as_valid = 22 [default = true];
+
+  // Setting for temporal smoothing of irls weights in optional post-processing
+  // step.
+  // In normalized coordinates w.r.t. frame domain.
+  optional float feature_grid_size = 24 [default = 0.05];
+  optional float spatial_sigma = 25 [default = 0.01];
+
+  // Frame diameter across which smoothing is performed.
+  optional int32 temporal_irls_diameter = 26 [default = 20];
+  optional float temporal_sigma = 27 [default = 5];  // in frames.
+
+  // Bilateral weight (for un-normalized color domain [0, .. 255]).
+  optional float feature_sigma = 28 [default = 30.0];
+
+  // If set to false 3 taps are used.
+  optional bool filter_5_taps = 29 [default = false];
+
+  // If set, during temporal smoothing, each frame is weighted by its
+  // confidence, defined as the square coverage (or square mean mixture
+  // coverage). Therefore, low confidence fits do not errornouesly propagate
+  // over time. In addition, if the confidence is below the specified
+  // confidence_threshold (relative the the maximum coverage observed in the
+  // test interval), irls weights are reset to 1, i.e. biased to be
+  // agree with the (unkown) background motion.
+  optional bool frame_confidence_weighting = 48 [default = true];
+  optional float reset_confidence_threshold = 49 [default = 0.4];
+
+  // Filters irls weights before smoothing them according to specified
+  // operation.
+  enum IRLSWeightFilter {
+    IRLS_FILTER_NONE = 0;
+    IRLS_FILTER_TEXTURE = 1;
+    IRLS_FILTER_CORNER_RESPONSE = 2;
+  }
+
+  // Calls TextureFilteredRegionFlowFeatureIRLSWeights on computed irls weights
+  // before smoothing them.
+  optional IRLSWeightFilter irls_weight_filter = 35
+      [default = IRLS_FILTER_NONE];
+
+  // Attempts to detect overlays, i.e. static elements burned-into the video
+  // that potentially corrupt motion estimation.
+  optional bool overlay_detection = 36 [default = false];
+  // Overlay detection is performed over specified number of frames.
+  optional int32 overlay_analysis_chunk_size = 37 [default = 8];
+
+  message OverlayDetectionOptions {
+    // Potential overlay features are aggregated over a mask with cells
+    // mask_size x mask_size as specified below.
+    optional int32 analysis_mask_size = 1 [default = 10];
+
+    // There are two types of candidates of overlay features, strict and loose
+    // ones. Strict candidates are used to determine if a grid bin should be
+    // considered an overlay cell. If the grid is an overlay cell, *all*
+    // candidates, i.e. strict and loose ones will be flagged as overlay
+    // features by setting their corresponding irls weight to zero.
+
+    // A feature is a strict overlay feature if its motion is less than
+    // near_zero_motion and AND less than max_translation_ratio times the
+    // estimated translation magnitude at that frame AND is texturedness is
+    // sufficiently high.
+    optional float strict_near_zero_motion = 2 [default = 0.2];
+    optional float strict_max_translation_ratio = 3 [default = 0.2];
+    // Minimum texturedness of a feature to be considered an overlay.
+    // Motivation: Overlays are mostly text or graphics, i.e. have visually
+    // distinguished features.
+    optional float strict_min_texturedness = 5 [default = 0.1];
+
+    // A feature is a loose overlay feature if its motion is less than
+    // loose_near_zero_motion.
+    optional float loose_near_zero_motion = 4 [default = 1.0];
+
+    // Minimum fraction of strict overlay features within a cell to be
+    // considered an overlay cell.
+    optional float overlay_min_ratio = 6 [default = 0.3];
+
+    // Absolute minimum number of strict overlay features within a cell to be
+    // considered an overlay cel..
+    optional float overlay_min_features = 7 [default = 10];
+  }
+
+  optional OverlayDetectionOptions overlay_detection_options = 38;
+
+  // Shot boundaries are introduced in 3 different scenarios:
+  // a) Frame has zero tracked features w.r.t. previous frame
+  // b) Estimated motion is deemed invalid (CameraMotion::INVALID).
+  // c) Visual consistency is above threshold of two adjacent frames.
+  message ShotBoundaryOptions {
+    // After cases a & b are determined from features/camera motion, they
+    // are verified by ensuring visual consistency is above specified threshold,
+    // if visual consistency has been computed. Only if this is case will the
+    // frame be labeled as shot boundary. Motivation is, that there should
+    // always be some (even small) measurable increase in the frame difference
+    // at a shot boundary.
+    // Verification is only performed if visual_consistency has been evaluated
+    // (value >= 0).
+    optional float motion_consistency_threshold = 1 [default = 0.02];
+
+    // Threshold for case c). Sometimes, motion estimation will miss shot
+    // boundaries. We define shot boundaries for which the visual consistency is
+    // higher than the specified threshold for at least two adjacent frames.
+    optional float appearance_consistency_threshold = 2 [default = 0.075];
+  }
+
+  optional ShotBoundaryOptions shot_boundary_options = 60;
+
+  // By default, irls weights of each feature are overwritten with refined irls
+  // weights of the last iteration for the highest degree of freedom model that
+  // was estimated stable. If set to false, original irls weights are retained.
+  // Note: If overlay detection is activated, features to be deemed overlays
+  //       have their irls weight set to zero, regardless of this setting.
+  //       Similarily, an IRLSWeightFilter is applied if requested, regardless
+  //       of this setting.
+  optional bool output_refined_irls_weights = 40 [default = true];
+
+  // Weight initialization for homography estimation. This is to bias homography
+  // estimation either to foreground or background.
+  enum HomographyIrlsWeightInitialization {
+    IRLS_WEIGHT_CONSTANT_ONE = 1;     // Constant, treat all features equally.
+    IRLS_WEIGHT_CENTER_GAUSSIAN = 2;  // Weight features in the center higher.
+                                      // Tends to lock onto foreground.
+    IRLS_WEIGHT_PERIMETER_GAUSSIAN = 3;  // Weight features around the
+                                         // perimeter higher, tends to lock onto
+                                         // background and improves rigidity of
+                                         // the perspective degree of freedom.
+  }
+
+  // IRLS weights for homography estimation are initialized based on the
+  // specified options. If, options irls_weights_preinitialized is set,
+  // weights are multiplied instead of reset.
+  optional HomographyIrlsWeightInitialization
+      homography_irls_weight_initialization = 45
+      [default = IRLS_WEIGHT_PERIMETER_GAUSSIAN];
+
+  // If set to false use L1 norm irls weights instead of L0 norm irls weights.
+  optional bool irls_use_l0_norm = 46 [default = true];
+
+  // IRLS weights are determined in a limited domain (in particular helpful
+  // for stabilization analysis on HD videos).
+  // TODO: Make this the default.
+  optional bool domain_limited_irls_scaling = 65 [default = false];
+
+  // For comparison and debugging purposes. Simply estimates requested models
+  // without checking their stability via the stable_*_bounds parameters.
+  // However, invertibility is still checked to avoid invalid data being passed
+  // to later stages of the stabilizer.
+  optional bool deactivate_stable_motion_estimation = 47 [default = false];
+
+  // Projects higher order motions if estimated correctly down to lower order
+  // motions, therefore replacing the previously estimated motions.
+  optional bool project_valid_motions_down = 52 [default = false];
+
+  // DEPRECATED functionality. Use static functions as indicated instead.
+  //
+  // Non-linear similarity, use MotionEstimation::EstimateSimilarityModelL2.
+  optional bool estimate_similarity = 2 [deprecated = true];
+
+  // Deprecated fields.
+  extensions 7, 8, 16;
+}
diff --git a/mediapipe/util/tracking/motion_models.cc b/mediapipe/util/tracking/motion_models.cc
new file mode 100644
index 000000000..eb6a8b314
--- /dev/null
+++ b/mediapipe/util/tracking/motion_models.cc
@@ -0,0 +1,924 @@
+// Copyright 2019 The MediaPipe Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "mediapipe/util/tracking/motion_models.h"
+
+#include <stdlib.h>
+
+#include <cmath>
+#include <string>
+#include <utility>
+
+#include "Eigen/Core"
+#include "Eigen/Dense"
+#include "absl/strings/str_format.h"
+
+// Set to true to use catmull rom mixture weights instead of Gaussian weights
+// for homography mixture estimation.
+bool flags_catmull_rom_mixture_weights = false;
+
+namespace mediapipe {
+
+std::string ModelAdapter<TranslationModel>::ToString(
+    const TranslationModel& model) {
+  return absl::StrFormat("%7f", model.dx()) + " " +
+         absl::StrFormat("%7f", model.dy());
+}
+
+AffineModel ModelAdapter<TranslationModel>::ToAffine(
+    const TranslationModel& model) {
+  return AffineAdapter::FromArgs(model.dx(), model.dy(), 1, 0, 0, 1);
+}
+
+TranslationModel ModelAdapter<TranslationModel>::FromAffine(
+    const AffineModel& model) {
+  DCHECK_EQ(model.a(), 1);
+  DCHECK_EQ(model.b(), 0);
+  DCHECK_EQ(model.c(), 0);
+  DCHECK_EQ(model.d(), 1);
+
+  return TranslationAdapter::FromArgs(model.dx(), model.dy());
+}
+
+Homography ModelAdapter<TranslationModel>::ToHomography(
+    const TranslationModel& model) {
+  return HomographyAdapter::FromAffine(ToAffine(model));
+}
+
+TranslationModel ModelAdapter<TranslationModel>::FromHomography(
+    const Homography& model) {
+  return TranslationAdapter::FromAffine(HomographyAdapter::ToAffine(model));
+}
+
+void ModelAdapter<TranslationModel>::GetJacobianAtPoint(const Vector2_f& pt,
+                                                        float* jacobian) {
+  DCHECK(jacobian);
+  jacobian[0] = 1;
+  jacobian[1] = 0;
+  jacobian[2] = 0;
+  jacobian[3] = 1;
+}
+
+TranslationModel ModelAdapter<TranslationModel>::NormalizationTransform(
+    float frame_width, float frame_height) {
+  // Independent of frame size.
+  return TranslationModel();
+}
+
+TranslationModel ModelAdapter<TranslationModel>::Maximum(
+    const TranslationModel& lhs, const TranslationModel& rhs) {
+  return FromArgs(std::max(lhs.dx(), rhs.dx()), std::max(lhs.dy(), rhs.dy()));
+}
+
+TranslationModel ModelAdapter<TranslationModel>::ProjectFrom(
+    const LinearSimilarityModel& model, float frame_width, float frame_height) {
+  return LinearSimilarityAdapter::ProjectToTranslation(model, frame_width,
+                                                       frame_height);
+}
+
+TranslationModel ModelAdapter<TranslationModel>::ProjectFrom(
+    const AffineModel& model, float frame_width, float frame_height) {
+  return ProjectFrom(AffineAdapter::ProjectToLinearSimilarity(
+                         model, frame_width, frame_height),
+                     frame_width, frame_height);
+}
+
+TranslationModel ModelAdapter<TranslationModel>::ProjectFrom(
+    const Homography& model, float frame_width, float frame_height) {
+  return ProjectFrom(
+      HomographyAdapter::ProjectToAffine(model, frame_width, frame_height),
+      frame_width, frame_height);
+}
+
+// Similarity model.
+SimilarityModel ModelAdapter<SimilarityModel>::FromArgs(float dx, float dy,
+                                                        float scale,
+                                                        float rotation) {
+  SimilarityModel model;
+  model.set_dx(dx);
+  model.set_dy(dy);
+  model.set_scale(scale);
+  model.set_rotation(rotation);
+  return model;
+}
+
+SimilarityModel ModelAdapter<SimilarityModel>::FromFloatPointer(
+    const float* args, bool identity_parametrization) {
+  DCHECK(args);
+  SimilarityModel model;
+  model.set_dx(args[0]);
+  model.set_dy(args[1]);
+  model.set_scale((identity_parametrization ? 1.f : 0.f) + args[2]);
+  model.set_rotation(args[3]);
+  return model;
+}
+
+SimilarityModel ModelAdapter<SimilarityModel>::FromDoublePointer(
+    const double* args, bool identity_parametrization) {
+  DCHECK(args);
+  SimilarityModel model;
+  model.set_dx(args[0]);
+  model.set_dy(args[1]);
+  model.set_scale((identity_parametrization ? 1.f : 0.f) + args[2]);
+  model.set_rotation(args[3]);
+  return model;
+}
+
+Vector2_f ModelAdapter<SimilarityModel>::TransformPoint(
+    const SimilarityModel& model, const Vector2_f& pt) {
+  // Setup rotation part.
+  const float c_r = std::cos(model.rotation());
+  const float c_s = std::sin(model.rotation());
+  const Vector2_f pt_rot(c_r * pt.x() - c_s * pt.y(),
+                         c_s * pt.x() + c_r * pt.y());
+
+  return pt_rot * model.scale() + Vector2_f(model.dx(), model.dy());
+}
+
+SimilarityModel ModelAdapter<SimilarityModel>::Invert(
+    const SimilarityModel& model) {
+  bool success = true;
+  const SimilarityModel result = InvertChecked(model, &success);
+  if (!success) {
+    LOG(ERROR) << "Model not invertible. Returning identity.";
+    return SimilarityModel();
+  } else {
+    return result;
+  }
+}
+
+SimilarityModel ModelAdapter<SimilarityModel>::InvertChecked(
+    const SimilarityModel& model, bool* success) {
+  SimilarityModel inv_model;
+  inv_model.set_rotation(-model.rotation());
+
+  if (fabs(model.scale()) > kDetInvertibleEps) {
+    inv_model.set_scale(1.0 / model.scale());
+    *success = true;
+  } else {
+    *success = false;
+    VLOG(1) << "Model is not invertible.";
+    return SimilarityModel();
+  }
+
+  // Rotate and scale [dx, dy] by inv_model.
+  const float c_r = std::cos(inv_model.rotation());
+  const float c_s = std::sin(inv_model.rotation());
+  const Vector2_f pt_rot(c_r * model.dx() - c_s * model.dy(),
+                         c_s * model.dx() + c_r * model.dy());
+
+  const Vector2_f inv_trans = -pt_rot * inv_model.scale();
+
+  inv_model.set_dx(inv_trans.x());
+  inv_model.set_dy(inv_trans.y());
+
+  return inv_model;
+}
+
+SimilarityModel ModelAdapter<SimilarityModel>::Compose(
+    const SimilarityModel& lhs, const SimilarityModel& rhs) {
+  SimilarityModel result;
+  result.set_scale(lhs.scale() * rhs.scale());
+  result.set_rotation(lhs.rotation() + rhs.rotation());
+
+  // Apply lhs scale and rot to rhs translation.
+  const float c_r = std::cos(lhs.rotation());
+  const float c_s = std::sin(lhs.rotation());
+  const Vector2_f trans_rot(c_r * rhs.dx() - c_s * rhs.dy(),
+                            c_s * rhs.dx() + c_r * rhs.dy());
+
+  const Vector2_f trans_concat =
+      trans_rot * lhs.scale() + Vector2_f(lhs.dx(), lhs.dy());
+
+  result.set_dx(trans_concat.x());
+  result.set_dy(trans_concat.y());
+  return result;
+}
+
+float ModelAdapter<SimilarityModel>::GetParameter(const SimilarityModel& model,
+                                                  int id) {
+  switch (id) {
+    case 0:
+      return model.dx();
+    case 1:
+      return model.dy();
+    case 2:
+      return model.scale();
+    case 3:
+      return model.rotation();
+    default:
+      LOG(FATAL) << "Parameter id is out of bounds";
+  }
+
+  return 0;
+}
+
+std::string ModelAdapter<SimilarityModel>::ToString(
+    const SimilarityModel& model) {
+  return absl::StrFormat("%7f", model.dx()) + " " +
+         absl::StrFormat("%7f", model.dy()) + " " +
+         absl::StrFormat("%7f", model.scale()) + " " +
+         absl::StrFormat("%7f", model.rotation());
+}
+
+SimilarityModel ModelAdapter<SimilarityModel>::NormalizationTransform(
+    float frame_width, float frame_height) {
+  const float scale = std::hypot(frame_width, frame_height);
+  DCHECK_NE(scale, 0);
+  return SimilarityAdapter::FromArgs(0, 0, 1.0 / scale, 0);
+}
+
+TranslationModel ModelAdapter<SimilarityModel>::ProjectToTranslation(
+    const SimilarityModel& model, float frame_width, float frame_height) {
+  return LinearSimilarityAdapter::ProjectToTranslation(
+      LinearSimilarityAdapter::FromSimilarity(model), frame_width,
+      frame_height);
+}
+
+std::string ModelAdapter<LinearSimilarityModel>::ToString(
+    const LinearSimilarityModel& model) {
+  return absl::StrFormat("%7f", model.dx()) + " " +
+         absl::StrFormat("%7f", model.dy()) + " " +
+         absl::StrFormat("%7f", model.a()) + " " +
+         absl::StrFormat("%7f", model.b());
+}
+
+AffineModel ModelAdapter<LinearSimilarityModel>::ToAffine(
+    const LinearSimilarityModel& model) {
+  return AffineAdapter::FromArgs(model.dx(), model.dy(), model.a(), -model.b(),
+                                 model.b(), model.a());
+}
+
+LinearSimilarityModel ModelAdapter<LinearSimilarityModel>::FromAffine(
+    const AffineModel& model) {
+  DCHECK_EQ(model.a(), model.d());
+  DCHECK_EQ(model.b(), -model.c());
+
+  return LinearSimilarityAdapter::FromArgs(model.dx(), model.dy(), model.a(),
+                                           -model.b());
+}
+
+Homography ModelAdapter<LinearSimilarityModel>::ToHomography(
+    const LinearSimilarityModel& model) {
+  return HomographyAdapter::FromAffine(ToAffine(model));
+}
+
+LinearSimilarityModel ModelAdapter<LinearSimilarityModel>::FromHomography(
+    const Homography& model) {
+  return LinearSimilarityAdapter::FromAffine(
+      HomographyAdapter::ToAffine(model));
+}
+
+SimilarityModel ModelAdapter<LinearSimilarityModel>::ToSimilarity(
+    const LinearSimilarityModel& model) {
+  const float scale = std::hypot(model.a(), model.b());
+  return SimilarityAdapter::FromArgs(model.dx(), model.dy(), scale,
+                                     std::atan2(model.b(), model.a()));
+}
+
+LinearSimilarityModel ModelAdapter<LinearSimilarityModel>::FromSimilarity(
+    const SimilarityModel& model) {
+  return LinearSimilarityAdapter::FromArgs(
+      model.dx(), model.dy(), model.scale() * std::cos(model.rotation()),
+      model.scale() * std::sin(model.rotation()));
+}
+
+LinearSimilarityModel ModelAdapter<LinearSimilarityModel>::ScaleParameters(
+    const LinearSimilarityModel& model_in, float scale) {
+  LinearSimilarityModel model = model_in;
+  model.set_a(model.a() * scale);
+  model.set_b(model.b() * scale);
+  model.set_dx(model.dx() * scale);
+  model.set_dy(model.dy() * scale);
+  return model;
+}
+
+LinearSimilarityModel ModelAdapter<LinearSimilarityModel>::AddIdentity(
+    const LinearSimilarityModel& model_in) {
+  LinearSimilarityModel model = model_in;
+  model.set_a(model.a() + 1);
+  return model;
+}
+
+void ModelAdapter<LinearSimilarityModel>::GetJacobianAtPoint(
+    const Vector2_f& pt, float* jacobian) {
+  DCHECK(jacobian);
+  // First row.
+  jacobian[0] = 1;
+  jacobian[1] = 0;
+  jacobian[2] = pt.x();
+  jacobian[3] = -pt.y();
+  // Second row.
+  jacobian[4] = 0;
+  jacobian[5] = 1;
+  jacobian[6] = pt.y();
+  jacobian[7] = pt.x();
+}
+
+LinearSimilarityModel
+ModelAdapter<LinearSimilarityModel>::NormalizationTransform(
+    float frame_width, float frame_height) {
+  const float scale = std::hypot(frame_width, frame_height);
+  DCHECK_NE(scale, 0);
+  return LinearSimilarityAdapter::FromArgs(0, 0, 1.0 / scale, 0);
+}
+
+LinearSimilarityModel ModelAdapter<LinearSimilarityModel>::Maximum(
+    const LinearSimilarityModel& lhs, const LinearSimilarityModel& rhs) {
+  return FromArgs(std::max(lhs.dx(), rhs.dx()), std::max(lhs.dy(), rhs.dy()),
+                  std::max(lhs.a(), rhs.a()), std::max(lhs.b(), rhs.b()));
+}
+
+TranslationModel ModelAdapter<LinearSimilarityModel>::ProjectToTranslation(
+    const LinearSimilarityModel& model, float frame_width, float frame_height) {
+  LinearSimilarityModel center_trans = LinearSimilarityAdapter::FromArgs(
+      frame_width * 0.5f, frame_height * 0.5f, 1, 0);
+  LinearSimilarityModel inv_center_trans = LinearSimilarityAdapter::FromArgs(
+      -frame_width * 0.5f, -frame_height * 0.5f, 1, 0);
+
+  // Express model w.r.t. center.
+  LinearSimilarityModel center_model =
+      ModelCompose3(inv_center_trans, model, center_trans);
+
+  // No need to shift back to top-left after decomposition, as translations
+  // are independent from coordinate origin.
+  return TranslationAdapter::FromArgs(center_model.dx(), center_model.dy());
+}
+
+std::string ModelAdapter<AffineModel>::ToString(const AffineModel& model) {
+  return absl::StrFormat("%7f", model.dx()) + " " +
+         absl::StrFormat("%7f", model.dy()) + " " +
+         absl::StrFormat("%7f", model.a()) + " " +
+         absl::StrFormat("%7f", model.b()) + " " +
+         absl::StrFormat("%7f", model.c()) + " " +
+         absl::StrFormat("%7f", model.d());
+}
+
+AffineModel ModelAdapter<AffineModel>::NormalizationTransform(
+    float frame_width, float frame_height) {
+  const float scale = std::hypot(frame_width, frame_height);
+  DCHECK_NE(scale, 0);
+  return AffineAdapter::FromArgs(0, 0, 1.0f / scale, 0, 0, 1.0f / scale);
+}
+
+Homography ModelAdapter<AffineModel>::ToHomography(const AffineModel& model) {
+  float params[8] = {model.a(), model.b(),  model.dx(), model.c(),
+                     model.d(), model.dy(), 0,          0};
+  return HomographyAdapter::FromFloatPointer(params, false);
+}
+
+AffineModel ModelAdapter<AffineModel>::FromHomography(const Homography& model) {
+  DCHECK_EQ(model.h_20(), 0);
+  DCHECK_EQ(model.h_21(), 0);
+
+  float params[6] = {model.h_02(), model.h_12(),   // dx, dy
+                     model.h_00(), model.h_01(),   // a, b
+                     model.h_10(), model.h_11()};  // c, d
+
+  return FromFloatPointer(params, false);
+}
+
+AffineModel ModelAdapter<AffineModel>::ScaleParameters(
+    const AffineModel& model_in, float scale) {
+  AffineModel model = model_in;
+  model.set_a(model.a() * scale);
+  model.set_b(model.b() * scale);
+  model.set_c(model.c() * scale);
+  model.set_d(model.d() * scale);
+  model.set_dx(model.dx() * scale);
+  model.set_dy(model.dy() * scale);
+  return model;
+}
+
+AffineModel ModelAdapter<AffineModel>::AddIdentity(
+    const AffineModel& model_in) {
+  AffineModel model = model_in;
+  model.set_a(model.a() + 1);
+  model.set_d(model.d() + 1);
+  return model;
+}
+
+void ModelAdapter<AffineModel>::GetJacobianAtPoint(const Vector2_f& pt,
+                                                   float* jacobian) {
+  DCHECK(jacobian);
+  // First row.
+  jacobian[0] = 1;
+  jacobian[1] = 0;
+  jacobian[2] = pt.x();
+  jacobian[3] = pt.y();
+  jacobian[4] = 0;
+  jacobian[5] = 0;
+
+  // Second row.
+  jacobian[6] = 0;
+  jacobian[7] = 1;
+  jacobian[8] = 0;
+  jacobian[9] = 0;
+  jacobian[10] = pt.x();
+  jacobian[11] = pt.y();
+}
+
+AffineModel ModelAdapter<AffineModel>::Maximum(const AffineModel& lhs,
+                                               const AffineModel& rhs) {
+  return FromArgs(std::max(lhs.dx(), rhs.dx()), std::max(lhs.dy(), rhs.dy()),
+                  std::max(lhs.a(), rhs.a()), std::max(lhs.b(), rhs.b()),
+                  std::max(lhs.c(), rhs.c()), std::max(lhs.d(), rhs.d()));
+}
+
+LinearSimilarityModel ModelAdapter<LinearSimilarityModel>::ProjectFrom(
+    const AffineModel& model, float frame_width, float frame_height) {
+  return AffineAdapter::ProjectToLinearSimilarity(model, frame_width,
+                                                  frame_height);
+}
+
+LinearSimilarityModel ModelAdapter<LinearSimilarityModel>::ProjectFrom(
+    const Homography& model, float frame_width, float frame_height) {
+  return ProjectFrom(
+      AffineAdapter::ProjectFrom(model, frame_width, frame_height), frame_width,
+      frame_height);
+}
+
+LinearSimilarityModel ModelAdapter<AffineModel>::ProjectToLinearSimilarity(
+    const AffineModel& model, float frame_width, float frame_height) {
+  AffineModel center_trans = AffineAdapter::FromArgs(
+      frame_width * 0.5f, frame_height * 0.5f, 1, 0, 0, 1);
+  AffineModel inv_center_trans = AffineAdapter::FromArgs(
+      -frame_width * 0.5f, -frame_height * 0.5f, 1, 0, 0, 1);
+
+  // Express model w.r.t. center.
+  AffineModel center_model =
+      ModelCompose3(inv_center_trans, model, center_trans);
+
+  // Determine average scale.
+  const float scale = std::sqrt(AffineAdapter::Determinant(center_model));
+
+  // Goal is approximate matrix:
+  // (a   b           (a' -b'
+  //  c   d)  with     b'  a')
+  //
+  //  :=  :=
+  //  v_1 v_2
+  //  After normalization by the scale, a' = cos(u) and b' = sin(u)
+  //  therefore, the columns on the right hand side have norm 1 and are
+  //  orthogonal.
+  //
+  //  ==> Orthonormalize v_1 and v_2
+
+  Vector2_f x_map(center_model.a(), center_model.c());  // == v_1
+  Vector2_f y_map(center_model.b(), center_model.d());  // == v_2
+  x_map.Normalize();
+  y_map.Normalize();
+
+  // Two approaches here
+  // A) Perform Grahm Schmidt, i.e. QR decomp / polar decomposition, i.e.:
+  // y_map' = y_map - <y_map, x_map> * x_map;
+  //   ==> no error in x_map, error increasing with larger y
+  // B) Compute  the middle vector between x_map and y_map and create
+  //    orthogonal system from that
+  //    (rotate by -45':  [ 1  1
+  //                        -1 1]  * 1/sqrt(2)
+  //    Error equally distributed between x and y.
+  //
+  // Comparing approach A and B:
+  //
+  //             video 1 (gleicher4)      video 2 (pool dance)
+  // Method B
+  // Max diff : dx: 1.6359973             4.600112
+  //            dy: 2.1076841             11.51579
+  //            a: 1.004498               1.01036
+  //            b: 0.0047194548           0.027450036
+  // Method A
+  // Max diff : dx: 4.3549204             14.145205
+  //            dy: 2.4496114             7.7804289
+  //            a: 1.0136091              1.043335
+  //            b: 0.0024313219           0.0065769218
+
+  // Using method B:
+  Vector2_f middle = (x_map + y_map).Normalize();
+
+  Vector2_f a_b = Vector2_f(middle.x() + middle.y(),  // see above matrix.
+                            middle.y() - middle.x())
+                      .Normalize();
+  AffineModel lin_approx = AffineAdapter::FromArgs(
+      center_model.dx(), center_model.dy(), scale * a_b.x(), -scale * a_b.y(),
+      scale * a_b.y(), scale * a_b.x());
+
+  // Map back to top-left origin.
+  return LinearSimilarityAdapter::FromAffine(
+      ModelCompose3(center_trans, lin_approx, inv_center_trans));
+}
+
+AffineModel ModelAdapter<AffineModel>::ProjectFrom(const Homography& model,
+                                                   float frame_width,
+                                                   float frame_height) {
+  return HomographyAdapter::ProjectToAffine(model, frame_width, frame_height);
+}
+
+Homography ModelAdapter<Homography>::InvertChecked(const Homography& model,
+                                                   bool* success) {
+  // Could do adjoint method and do it by hand. Use Eigen for now, not that
+  // crucial at this point.
+
+  Eigen::Matrix3d model_mat;
+  model_mat(0, 0) = model.h_00();
+  model_mat(0, 1) = model.h_01();
+  model_mat(0, 2) = model.h_02();
+  model_mat(1, 0) = model.h_10();
+  model_mat(1, 1) = model.h_11();
+  model_mat(1, 2) = model.h_12();
+  model_mat(2, 0) = model.h_20();
+  model_mat(2, 1) = model.h_21();
+  model_mat(2, 2) = 1.0f;
+
+  if (model_mat.determinant() < kDetInvertibleEps) {
+    VLOG(1) << "Homography not invertible, det is zero.";
+    *success = false;
+    return Homography();
+  }
+
+  Eigen::Matrix3d inv_model_mat = model_mat.inverse();
+
+  if (inv_model_mat(2, 2) == 0) {
+    LOG(ERROR) << "Degenerate homography. See proto.";
+    *success = false;
+    return Homography();
+  }
+
+  *success = true;
+  Homography inv_model;
+  const float scale = 1.0f / inv_model_mat(2, 2);
+  inv_model.set_h_00(inv_model_mat(0, 0) * scale);
+  inv_model.set_h_01(inv_model_mat(0, 1) * scale);
+  inv_model.set_h_02(inv_model_mat(0, 2) * scale);
+  inv_model.set_h_10(inv_model_mat(1, 0) * scale);
+  inv_model.set_h_11(inv_model_mat(1, 1) * scale);
+  inv_model.set_h_12(inv_model_mat(1, 2) * scale);
+  inv_model.set_h_20(inv_model_mat(2, 0) * scale);
+  inv_model.set_h_21(inv_model_mat(2, 1) * scale);
+
+  return inv_model;
+}
+
+std::string ModelAdapter<Homography>::ToString(const Homography& model) {
+  return absl::StrFormat("%7f", model.h_00()) + " " +
+         absl::StrFormat("%7f", model.h_01()) + " " +
+         absl::StrFormat("%7f", model.h_02()) + " " +
+         absl::StrFormat("%7f", model.h_10()) + " " +
+         absl::StrFormat("%7f", model.h_11()) + " " +
+         absl::StrFormat("%7f", model.h_12()) + " " +
+         absl::StrFormat("%7f", model.h_20()) + " " +
+         absl::StrFormat("%7f", model.h_21());
+}
+
+AffineModel ModelAdapter<Homography>::ToAffine(const Homography& model) {
+  DCHECK_EQ(model.h_20(), 0);
+  DCHECK_EQ(model.h_21(), 0);
+  AffineModel affine_model;
+  affine_model.set_a(model.h_00());
+  affine_model.set_b(model.h_01());
+  affine_model.set_dx(model.h_02());
+  affine_model.set_c(model.h_10());
+  affine_model.set_d(model.h_11());
+  affine_model.set_dy(model.h_12());
+  return affine_model;
+}
+
+Homography ModelAdapter<Homography>::FromAffine(const AffineModel& model) {
+  return Embed(model);
+}
+
+bool ModelAdapter<Homography>::IsAffine(const Homography& model) {
+  return model.h_20() == 0 && model.h_21() == 0;
+}
+
+void ModelAdapter<Homography>::GetJacobianAtPoint(const Vector2_f& pt,
+                                                  float* jacobian) {
+  DCHECK(jacobian);
+  // First row.
+  jacobian[0] = pt.x();
+  jacobian[1] = pt.y();
+  jacobian[2] = 1;
+  jacobian[3] = 0;
+  jacobian[4] = 0;
+  jacobian[5] = 0;
+  jacobian[6] = -pt.x() * pt.x();
+  jacobian[7] = -pt.x() * pt.y();
+
+  // Second row.
+  jacobian[8] = 0;
+  jacobian[9] = 0;
+  jacobian[10] = 0;
+  jacobian[11] = pt.x();
+  jacobian[12] = pt.y();
+  jacobian[13] = 1;
+  jacobian[14] = -pt.x() * pt.y();
+  jacobian[15] = -pt.y() * pt.y();
+}
+
+Homography ModelAdapter<Homography>::NormalizationTransform(
+    float frame_width, float frame_height) {
+  const float scale = std::hypot(frame_width, frame_height);
+  DCHECK_NE(scale, 0);
+  return HomographyAdapter::FromArgs(1.0f / scale, 0, 0, 0, 1.0f / scale, 0, 0,
+                                     0);
+}
+
+AffineModel ModelAdapter<Homography>::ProjectToAffine(const Homography& model,
+                                                      float frame_width,
+                                                      float frame_height) {
+  Homography center_trans;
+  center_trans.set_h_02(frame_width * 0.5f);
+  center_trans.set_h_12(frame_height * 0.5f);
+
+  Homography inv_center_trans;
+  inv_center_trans.set_h_02(-frame_width * 0.5f);
+  inv_center_trans.set_h_12(-frame_height * 0.5f);
+
+  // Express model w.r.t. center.
+  Homography center_model =
+      ModelCompose3(inv_center_trans, model, center_trans);
+
+  // Zero out perspective.
+  center_model.set_h_20(0);
+  center_model.set_h_21(0);
+
+  // Map back to top left and embed.
+  return ToAffine(ModelCompose3(center_trans, center_model, inv_center_trans));
+}
+
+namespace {
+
+// Returns true if non-empty intersection is present. In this case, start and
+// end are clipped to rect, specifically after clipping the line
+// [start, end] is inside the rectangle or incident to the boundary
+// of the rectangle. If strict is set to true, [start, end] is strictly inside
+// the rectangle, i.e. not incident to the boundary (minus start and end
+// point which still can be incident).
+// Implemented using Liang-Barsky algorithm.
+// http://en.wikipedia.org/wiki/Liang%E2%80%93Barsky
+inline bool ClipLine(const Vector2_f& rect, bool strict, Vector2_f* start,
+                     Vector2_f* end) {
+  Vector2_f diff = *end - *start;
+  float p[4] = {-diff.x(), diff.x(), -diff.y(), diff.y()};
+
+  // Bounds are (x_min, y_min) = (0, 0)
+  //            (x_max, y_max) = rect
+  float q[4] = {start->x() - 0, rect.x() - start->x(), start->y() - 0,
+                rect.y() - start->y()};
+
+  // Compute parametric intersection points.
+  float near = -1e10;
+  float far = 1e10;
+  for (int k = 0; k < 4; ++k) {
+    if (fabs(p[k]) < 1e-6f) {
+      // Line is parallel to one axis of rectangle.
+      if ((strict && q[k] <= 0.0f) || q[k] < 0.0f) {
+        // Line is outside rectangle.
+        return false;
+      } else {
+        // Possible intersection along other dimensions.
+        continue;
+      }
+    } else {
+      // Line is not parallel -> compute intersection.
+      float intersect = q[k] / p[k];
+      // Sign of p determines if near or far parameter.
+      if (p[k] < 0.0f) {
+        near = std::max(near, intersect);
+      } else {
+        far = std::min(far, intersect);
+      }
+    }
+  }
+
+  if (near > far) {
+    // Line is outside of rectangle.
+    return false;
+  }
+
+  // Clip near and far to valid line segment interval [0, 1].
+  far = std::min(far, 1.0f);
+  near = std::max(near, 0.0f);
+
+  if (near <= far) {
+    // Non-empty intersection. Single points are considered valid intersection.
+    *end = *start + diff * far;
+    *start = *start + diff * near;
+    return true;
+  } else {
+    // Empty intersection.
+    return false;
+  }
+}
+
+}  // namespace.
+
+template <class Model>
+float ModelMethods<Model>::NormalizedIntersectionArea(const Model& model_1,
+                                                      const Model& model_2,
+                                                      const Vector2_f& rect) {
+  const float rect_area = rect.x() * rect.y();
+  if (rect_area <= 0) {
+    LOG(WARNING) << "Empty rectangle passed -> empty intersection.";
+    return 0.0f;
+  }
+
+  std::vector<std::pair<Vector2_f, Vector2_f>> lines(4);
+  lines[0] = std::make_pair(Vector2_f(0, 0), Vector2_f(0, rect.y()));
+  lines[1] =
+      std::make_pair(Vector2_f(0, rect.y()), Vector2_f(rect.x(), rect.y()));
+  lines[2] =
+      std::make_pair(Vector2_f(rect.x(), rect.y()), Vector2_f(rect.x(), 0));
+  lines[3] = std::make_pair(Vector2_f(rect.x(), 0), Vector2_f(0, 0));
+
+  float model_1_area = 0.0f;
+  float model_2_area = 0.0f;
+  for (int k = 0; k < 4; ++k) {
+    Vector2_f start_1 = Adapter::TransformPoint(model_1, lines[k].first);
+    Vector2_f end_1 = Adapter::TransformPoint(model_1, lines[k].second);
+    // Use trapezoidal rule for polygon area.
+    model_1_area += 0.5 * (end_1.y() + start_1.y()) * (end_1.x() - start_1.x());
+    Vector2_f start_2 = Adapter::TransformPoint(model_2, lines[k].first);
+    Vector2_f end_2 = Adapter::TransformPoint(model_2, lines[k].second);
+    model_2_area += 0.5 * (end_2.y() + start_2.y()) * (end_2.x() - start_2.x());
+  }
+
+  const float average_area = 0.5f * (model_1_area + model_2_area);
+  if (average_area <= 0) {
+    LOG(WARNING) << "Degenerative models passed -> empty intersection.";
+    return 0.0f;
+  }
+
+  // First, clip transformed rectangle against origin defined by model_1.
+  bool success = true;
+  Model diff = ModelDiffChecked(model_2, model_1, &success);
+  if (!success) {
+    LOG(WARNING) << "Model difference is singular -> empty intersection.";
+    return 0.0f;
+  }
+
+  float area = 0.0f;
+  for (int k = 0; k < 4; ++k) {
+    Vector2_f start_1 = Adapter::TransformPoint(diff, lines[k].first);
+    Vector2_f end_1 = Adapter::TransformPoint(diff, lines[k].second);
+    if (ClipLine(rect, false, &start_1, &end_1)) {
+      // Non-empty intersection.
+      // Transform intersection back to world coordinate system.
+      const Vector2_f start = Adapter::TransformPoint(model_1, start_1);
+      const Vector2_f end = Adapter::TransformPoint(model_1, end_1);
+      // Use trapezoidal rule for polygon area without explicit ordering of
+      // vertices.
+      area += 0.5 * (end.y() + start.y()) * (end.x() - start.x());
+    }
+  }
+
+  // Second, clip transformed rectangle against origin defined by model_2.
+  Model inv_diff = Adapter::InvertChecked(diff, &success);
+  if (!success) {
+    LOG(WARNING) << "Model difference is singular -> empty intersection.";
+    return 0.0f;
+  }
+
+  for (int k = 0; k < 4; ++k) {
+    Vector2_f start_2 = Adapter::TransformPoint(inv_diff, lines[k].first);
+    Vector2_f end_2 = Adapter::TransformPoint(inv_diff, lines[k].second);
+    // Use strict comparison to address degenerate case of incident rectangles
+    // in which case intersection would be counted twice if non-strict
+    // comparison is employed.
+    if (ClipLine(rect, true, &start_2, &end_2)) {
+      // Transform start and end back to origin.
+      const Vector2_f start = Adapter::TransformPoint(model_2, start_2);
+      const Vector2_f end = Adapter::TransformPoint(model_2, end_2);
+      area += 0.5 * (end.y() + start.y()) * (end.x() - start.x());
+    }
+  }
+
+  // Normalize w.r.t. average rectangle area.
+  return area / average_area;
+}
+
+MixtureRowWeights::MixtureRowWeights(int frame_height, int margin, float sigma,
+                                     float y_scale, int num_models)
+    : frame_height_(frame_height),
+      y_scale_(y_scale),
+      margin_(margin),
+      sigma_(sigma),
+      num_models_(num_models) {
+  mid_points_.resize(num_models_);
+
+  // Compute mid_point (row idx) for each model.
+  if (flags_catmull_rom_mixture_weights) {
+    const float model_height =
+        static_cast<float>(frame_height_) / (num_models - 1);
+
+    // Use Catmull-rom spline.
+    // Compute weighting matrix.
+    weights_.resize(frame_height * num_models);
+    float spline_weights[4];
+
+    // No margin support for splines.
+    if (margin_ > 0) {
+      LOG(WARNING) << "No margin support when flag catmull_rom_mixture_weights "
+                   << "is set. Margin is reset to zero, it is recommended "
+                   << "that RowWeightsBoundChecked is used to prevent "
+                   << "segfaults.";
+      margin_ = 0;
+    }
+
+    for (int i = 0; i < frame_height; ++i) {
+      float* weight_ptr = &weights_[i * num_models];
+
+      float float_pos = static_cast<float>(i) / model_height;
+      int int_pos = float_pos;
+      memset(weight_ptr, 0, sizeof(weight_ptr[0]) * num_models);
+
+      float dy = float_pos - int_pos;
+
+      // Weights sum to one, for all choices of dy.
+      // For definition see
+      // en.wikipedia.org/wiki/Cubic_Hermite_spline#Catmull.E2.80.93Rom_spline
+      spline_weights[0] = 0.5f * (dy * ((2.0f - dy) * dy - 1.0f));
+      spline_weights[1] = 0.5f * (dy * dy * (3.0f * dy - 5.0f) + 2.0f);
+      spline_weights[2] = 0.5f * (dy * ((4.0f - 3.0f * dy) * dy + 1.0f));
+      spline_weights[3] = 0.5f * (dy * dy * (dy - 1.0f));
+
+      weight_ptr[int_pos] += spline_weights[1];
+      if (int_pos > 0) {
+        weight_ptr[int_pos - 1] += spline_weights[0];
+      } else {
+        weight_ptr[int_pos] += spline_weights[0];  // Double knot.
+      }
+
+      CHECK_LT(int_pos, num_models - 1);
+      weight_ptr[int_pos + 1] += spline_weights[2];
+      if (int_pos + 1 < num_models - 1) {
+        weight_ptr[int_pos + 2] += spline_weights[3];
+      } else {
+        weight_ptr[int_pos + 1] += spline_weights[3];  // Double knot.
+      }
+    }
+  } else {
+    // Gaussian weights.
+    const float model_height = static_cast<float>(frame_height_) / num_models;
+
+    for (int i = 0; i < num_models; ++i) {
+      mid_points_[i] = (i + 0.5f) * model_height;
+    }
+
+    // Compute gaussian weights.
+    const int num_values = frame_height_ + 2 * margin_;
+    std::vector<float> row_dist_weights(num_values);
+    const float common = -0.5f / (sigma * sigma);
+    for (int i = 0; i < num_values; ++i) {
+      row_dist_weights[i] = std::exp(common * i * i);
+    }
+
+    // Compute weighting matrix.
+    weights_.resize(num_values * num_models);
+    for (int i = 0; i < num_values; ++i) {
+      float* weight_ptr = &weights_[i * num_models];
+      float weight_sum = 0;
+
+      // Gaussian weights via lookup.
+      for (int j = 0; j < num_models; ++j) {
+        weight_ptr[j] = row_dist_weights[abs(i - margin_ - mid_points_[j])];
+        weight_sum += weight_ptr[j];
+      }
+
+      // Normalize.
+      DCHECK_GT(weight_sum, 0);
+      const float inv_weight_sum = 1.0f / weight_sum;
+      for (int j = 0; j < num_models; ++j) {
+        weight_ptr[j] *= inv_weight_sum;
+      }
+    }
+  }
+}
+
+float MixtureRowWeights::WeightThreshold(float frac_blocks) {
+  const float model_height = static_cast<float>(frame_height_) / num_models_;
+
+  const float y = model_height * frac_blocks + mid_points_[0];
+  const float* row_weights = RowWeightsClamped(y / y_scale_);
+  return row_weights[0];
+}
+
+// Explicit instantiations of ModelMethods.
+template class ModelMethods<TranslationModel>;
+template class ModelMethods<SimilarityModel>;
+template class ModelMethods<LinearSimilarityModel>;
+template class ModelMethods<AffineModel>;
+template class ModelMethods<Homography>;
+
+}  // namespace mediapipe
diff --git a/mediapipe/util/tracking/motion_models.h b/mediapipe/util/tracking/motion_models.h
new file mode 100644
index 000000000..567831ad5
--- /dev/null
+++ b/mediapipe/util/tracking/motion_models.h
@@ -0,0 +1,1849 @@
+// Copyright 2019 The MediaPipe Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef MEDIAPIPE_UTIL_TRACKING_MOTION_MODELS_H_
+#define MEDIAPIPE_UTIL_TRACKING_MOTION_MODELS_H_
+
+#include <algorithm>
+#include <cmath>
+#include <string>
+#include <vector>
+
+#include "absl/container/node_hash_map.h"
+#include "mediapipe/framework/port/logging.h"
+#include "mediapipe/framework/port/singleton.h"
+#include "mediapipe/framework/port/vector.h"
+#include "mediapipe/util/tracking/camera_motion.pb.h"
+#include "mediapipe/util/tracking/motion_models.pb.h"
+#include "mediapipe/util/tracking/region_flow.pb.h"  // NOLINT
+
+namespace mediapipe {
+
+// Abstract Camera Model, functionality that each model must support.
+template <class Model>
+class ModelAdapter {
+ public:
+  // Initializes a model from vector to data.
+  // If identity_parametrization is set, it is assumed that for args = 0 ->
+  // Model = identity, else args = 0 -> zero transform.
+  static Model FromFloatPointer(const float* args,
+                                bool identity_parametrization);
+  static Model FromDoublePointer(const double* args,
+                                 bool identity_parametrization);
+
+  // Transforms points according to model * pt.
+  static Vector2_f TransformPoint(const Model& model, const Vector2_f& pt);
+
+  // Returns model^(-1), outputs feasibility to variable success (can not be
+  // NULL). If model is not invertible, function should return identity model.
+  static Model InvertChecked(const Model& model, bool* success);
+
+  // Returns model^(-1), returns identity model if inversion is not possible,
+  // and warns via LOG(ERROR). It is recommended that InvertChecked is used
+  // instead.
+  // Note: Default implementation, motion models only need to supply above
+  // function.
+  static Model Invert(const Model& model);
+
+  // Concatenates two models. Returns lhs * rhs
+  static Model Compose(const Model& lhs, const Model& rhs);
+
+  // Debugging function to create plots. Output parameters separated by space.
+  static std::string ToString(const Model& model);
+
+  // Returns number of DOF.
+  static constexpr int NumParameters();
+
+  // Access parameters in a model agnostic way. Order is the order of
+  // specification in the corresponding motion_models.proto file, i.e.
+  // id = proto_id - 1.
+  static float GetParameter(const Model& model, int id);
+
+  // Sets parameter in a model agonstic way. Same interpretation of id as for
+  // GetParameter.
+  static void SetParameter(int id, float value, Model* model);
+
+  // Returns normalization transform for specific frame size. Range after
+  // normalization is [0, 1].
+  static Model NormalizationTransform(float frame_width, float frame_height);
+
+  // Embeds a lower paramteric model into this model.
+  // Overload for specialization.
+  template <class LowerModel>
+  static Model Embed(const LowerModel& model);
+
+  // Projects higher motion model H to lower one L such that for points x_i
+  // \sum_i || H * x - L * x||  == min
+  // For this we transform the models to the center of the specified frame
+  // domain at which degree's of freedom are usually independent. Details can be
+  // found in the individual implementation functions.
+  template <class HigherModel>
+  static Model ProjectFrom(const HigherModel& model, float frame_width,
+                           float frame_height);
+};
+
+// Specialized implementations, with additional functionality if needed.
+template <>
+class ModelAdapter<TranslationModel> {
+ public:
+  static TranslationModel FromArgs(float dx, float dy);
+  static TranslationModel FromFloatPointer(const float* args,
+                                           bool identity_parametrization);
+  static TranslationModel FromDoublePointer(const double* args,
+                                            bool identity_parametrization);
+
+  static Vector2_f TransformPoint(const TranslationModel& model,
+                                  const Vector2_f& pt);
+  static TranslationModel Invert(const TranslationModel& model);
+  static TranslationModel InvertChecked(const TranslationModel& model,
+                                        bool* success);
+  static TranslationModel Compose(const TranslationModel& lhs,
+                                  const TranslationModel& rhs);
+  static float GetParameter(const TranslationModel& model, int id);
+  static void SetParameter(int id, float value, TranslationModel* model);
+
+  static std::string ToString(const TranslationModel& model);
+
+  static constexpr int NumParameters() { return 2; }
+
+  // Support to convert to and from affine.
+  static AffineModel ToAffine(const TranslationModel& model);
+
+  // Fails with debug check, if affine model is not a translation.
+  static TranslationModel FromAffine(const AffineModel& model);
+
+  static Homography ToHomography(const TranslationModel& model);
+
+  // Fails with debug check if homography is not a translation.
+  static TranslationModel FromHomography(const Homography& model);
+
+  // Evaluates Jacobian at specified point and parameters set to 0.
+  // Note: This is independent of whether identity parametrization was used
+  // or not via From*Pointer.
+  // Jacobian has to be of size 2 x NumParams(), and is returned in column
+  // major order.
+  static void GetJacobianAtPoint(const Vector2_f& pt, float* jacobian);
+
+  static TranslationModel NormalizationTransform(float frame_width,
+                                                 float frame_height);
+
+  static TranslationModel Embed(const TranslationModel& model) { return model; }
+
+  static TranslationModel ProjectFrom(const TranslationModel& model,
+                                      float frame_width, float frame_height) {
+    return model;
+  }
+
+  static TranslationModel ProjectFrom(const LinearSimilarityModel& model,
+                                      float frame_width, float frame_height);
+
+  static TranslationModel ProjectFrom(const AffineModel& model,
+                                      float frame_width, float frame_height);
+
+  static TranslationModel ProjectFrom(const Homography& model,
+                                      float frame_width, float frame_height);
+
+  // Returns parameter wise maximum.
+  static TranslationModel Maximum(const TranslationModel& lhs,
+                                  const TranslationModel& rhs);
+
+  // Return determinant of model.
+  static float Determinant(const TranslationModel& unused) { return 1; }
+};
+
+template <>
+class ModelAdapter<SimilarityModel> {
+ public:
+  static SimilarityModel FromArgs(float dx, float dy, float scale,
+                                  float rotation);
+  static SimilarityModel FromFloatPointer(const float* args, bool);
+  static SimilarityModel FromDoublePointer(const double* args, bool);
+
+  static Vector2_f TransformPoint(const SimilarityModel& model,
+                                  const Vector2_f& pt);
+  static SimilarityModel Invert(const SimilarityModel& model);
+  static SimilarityModel InvertChecked(const SimilarityModel& model,
+                                       bool* success);
+  static SimilarityModel Compose(const SimilarityModel& lhs,
+                                 const SimilarityModel& rhs);
+
+  static float GetParameter(const SimilarityModel& model, int id);
+  static void SetParameter(int id, float value, SimilarityModel* model);
+
+  static std::string ToString(const SimilarityModel& model);
+
+  static constexpr int NumParameters() { return 4; }
+
+  static SimilarityModel NormalizationTransform(float frame_width,
+                                                float frame_height);
+
+  static TranslationModel ProjectToTranslation(const SimilarityModel& model,
+                                               float frame_width,
+                                               float frame_height);
+};
+
+template <>
+class ModelAdapter<LinearSimilarityModel> {
+ public:
+  static LinearSimilarityModel FromArgs(float dx, float dy, float a, float b);
+  static LinearSimilarityModel FromFloatPointer(const float* args,
+                                                bool identity_parametrization);
+  static LinearSimilarityModel FromDoublePointer(const double* args,
+                                                 bool identity_parametrization);
+
+  static Vector2_f TransformPoint(const LinearSimilarityModel& model,
+                                  const Vector2_f& pt);
+  static LinearSimilarityModel Invert(const LinearSimilarityModel& model);
+  static LinearSimilarityModel InvertChecked(const LinearSimilarityModel& model,
+                                             bool* success);
+
+  static LinearSimilarityModel Compose(const LinearSimilarityModel& lhs,
+                                       const LinearSimilarityModel& rhs);
+
+  static float GetParameter(const LinearSimilarityModel& model, int id);
+  static void SetParameter(int id, float value, LinearSimilarityModel* model);
+
+  static std::string ToString(const LinearSimilarityModel& model);
+
+  static constexpr int NumParameters() { return 4; }
+
+  // Support to convert to and from affine.
+  static AffineModel ToAffine(const LinearSimilarityModel& model);
+
+  static Homography ToHomography(const LinearSimilarityModel& model);
+
+  // Fails with debug check, if homography is not a similarity.
+  static LinearSimilarityModel FromHomography(const Homography& model);
+
+  // Fails with debug check, if affine model is not a similarity.
+  static LinearSimilarityModel FromAffine(const AffineModel& model);
+
+  // Conversion from and to non-linear similarity.
+  static SimilarityModel ToSimilarity(const LinearSimilarityModel& model);
+  static LinearSimilarityModel FromSimilarity(const SimilarityModel& model);
+
+  // Additional functionality:
+
+  // Composes scaled identity transform with model (sI * model).
+  static LinearSimilarityModel ScaleParameters(
+      const LinearSimilarityModel& model, float scale);
+
+  // Adds identity I to model (I + model).
+  static LinearSimilarityModel AddIdentity(const LinearSimilarityModel& model);
+
+  // Evaluates Jacobian at specified point and parameters set to 0.
+  // Note: This is independent of whether identity parametrization was used
+  // or not via From*Pointer.
+  // Jacobian has to be of size 2 x NumParams(), and is returned in column
+  // major order.
+  static void GetJacobianAtPoint(const Vector2_f& pt, float* jacobian);
+
+  static LinearSimilarityModel NormalizationTransform(float frame_width,
+                                                      float frame_height);
+
+  static LinearSimilarityModel Embed(const TranslationModel& model) {
+    return FromArgs(model.dx(), model.dy(), 1, 0);
+  }
+
+  static LinearSimilarityModel Embed(const LinearSimilarityModel& model) {
+    return model;
+  }
+
+  static TranslationModel ProjectToTranslation(
+      const LinearSimilarityModel& model, float frame_width,
+      float frame_height);
+
+  static LinearSimilarityModel ProjectFrom(const LinearSimilarityModel& model,
+                                           float frame_width,
+                                           float frame_height) {
+    return model;
+  }
+
+  static LinearSimilarityModel ProjectFrom(const AffineModel& model,
+                                           float frame_width,
+                                           float frame_height);
+
+  static LinearSimilarityModel ProjectFrom(const Homography& model,
+                                           float frame_width,
+                                           float frame_height);
+  // Returns parameter wise maximum.
+  static LinearSimilarityModel Maximum(const LinearSimilarityModel& lhs,
+                                       const LinearSimilarityModel& rhs);
+
+  // Return determinant of model.
+  static float Determinant(const LinearSimilarityModel& m) {
+    return m.a() * m.a() + m.b() * m.b();
+  }
+};
+
+template <>
+class ModelAdapter<AffineModel> {
+ public:
+  static AffineModel FromArgs(float dx, float dy, float a, float b, float c,
+                              float d);
+  static AffineModel FromFloatPointer(const float* args,
+                                      bool identity_parametrization);
+  static AffineModel FromDoublePointer(const double* args,
+                                       bool identity_parametrization);
+
+  static Vector2_f TransformPoint(const AffineModel& model,
+                                  const Vector2_f& pt);
+  static AffineModel Invert(const AffineModel& model);
+  static AffineModel InvertChecked(const AffineModel& model, bool* success);
+  static AffineModel Compose(const AffineModel& lhs, const AffineModel& rhs);
+
+  static float GetParameter(const AffineModel& model, int id);
+  static void SetParameter(int id, float value, AffineModel* model);
+
+  static std::string ToString(const AffineModel& model);
+
+  static constexpr int NumParameters() { return 6; }
+
+  // Support to convert to and from affine.
+  static AffineModel ToAffine(const AffineModel& model) { return model; }
+
+  static AffineModel FromAffine(const AffineModel& model) { return model; }
+
+  static Homography ToHomography(const AffineModel& model);
+
+  // Fails with debug check, if homography is not affine.
+  static AffineModel FromHomography(const Homography& model);
+
+  // Additional functionality:
+
+  // Composes scaled identity transform with model (sI * model).
+  static AffineModel ScaleParameters(const AffineModel& model, float scale);
+
+  // Adds identity I to model (I + model).
+  static AffineModel AddIdentity(const AffineModel& model);
+
+  // Evaluates Jacobian at specified point and parameters set to 0.
+  // Note: This is independent of whether identity parametrization was used
+  // or not via From*Pointer.
+  // Jacobian has to be of size 2 x NumParams(), and is returned in column
+  // major order.
+  static void GetJacobianAtPoint(const Vector2_f& pt, float* jacobian);
+
+  static AffineModel NormalizationTransform(float frame_width,
+                                            float frame_height);
+
+  static AffineModel Embed(const TranslationModel& model) {
+    return FromArgs(model.dx(), model.dy(), 1, 0, 0, 1);
+  }
+
+  static AffineModel Embed(const LinearSimilarityModel& model) {
+    return FromArgs(model.dx(), model.dy(), model.a(), -model.b(), model.b(),
+                    model.a());
+  }
+
+  static AffineModel Embed(const AffineModel& model) { return model; }
+
+  static AffineModel ProjectFrom(const AffineModel& model, float frame_width,
+                                 float frame_height) {
+    return model;
+  }
+
+  static AffineModel ProjectFrom(const Homography& model, float frame_width,
+                                 float frame_height);
+
+  static LinearSimilarityModel ProjectToLinearSimilarity(
+      const AffineModel& model, float frame_width, float frame_height);
+
+  // Returns parameter wise maximum.
+  static AffineModel Maximum(const AffineModel& lhs, const AffineModel& rhs);
+
+  static float Determinant(const AffineModel& m) {
+    return m.a() * m.d() - m.b() * m.c();
+  }
+};
+
+template <>
+class ModelAdapter<Homography> {
+ public:
+  static Homography FromArgs(float h_00, float h_01, float h_02, float h_10,
+                             float h_11, float h_12, float h_20, float h_21);
+
+  static Homography FromFloatPointer(const float* args,
+                                     bool identity_parametrization);
+  static Homography FromDoublePointer(const double* args,
+                                      bool identity_parametrization);
+
+  static Vector2_f TransformPoint(const Homography& model, const Vector2_f& pt);
+
+  static Vector3_f TransformPoint3(const Homography& model,
+                                   const Vector3_f& pt);
+
+  static Homography Invert(const Homography& model);
+  static Homography InvertChecked(const Homography& model, bool* success);
+  static Homography Compose(const Homography& lhs, const Homography& rhs);
+
+  static float GetParameter(const Homography& model, int id);
+  static void SetParameter(int id, float value, Homography* model);
+
+  static std::string ToString(const Homography& model);
+
+  static constexpr int NumParameters() { return 8; }
+
+  static bool IsAffine(const Homography& model);
+
+  // Support to convert to and from affine. Debug check that model is actually
+  // an affine model.
+  static AffineModel ToAffine(const Homography& model);
+  static Homography FromAffine(const AffineModel& model);
+
+  static Homography ToHomography(const Homography& model) { return model; }
+  static Homography FromHomography(const Homography& model) { return model; }
+
+  // Additional functionality:
+  // Evaluates Jacobian at specified point and parameters set to 0.
+  // Note: This is independent of whether identity parametrization was used
+  // or not via From*Pointer.
+  // Jacobian has to be of size 2 x NumParams(), and is returned in column
+  // major order.
+  static void GetJacobianAtPoint(const Vector2_f& pt, float* jacobian);
+
+  static Homography NormalizationTransform(float frame_width,
+                                           float frame_height);
+
+  static Homography Embed(const Homography& model) { return model; }
+
+  static Homography Embed(const AffineModel& model) {
+    return FromArgs(model.a(), model.b(), model.dx(), model.c(), model.d(),
+                    model.dy(), 0, 0);
+  }
+
+  static Homography Embed(const LinearSimilarityModel& model) {
+    return FromArgs(model.a(), -model.b(), model.dx(), model.b(), model.a(),
+                    model.dy(), 0, 0);
+  }
+
+  static Homography Embed(const TranslationModel& model) {
+    return FromArgs(1, 0, model.dx(), 0, 1, model.dy(), 0, 0);
+  }
+
+  static float Determinant(const Homography& m) {
+    // Applying laplace formula to last column.
+    // h_00  h_01  h_02
+    // h_10  h_11  h_12
+    // h_20  h_21  1        <-- apply laplace.
+    return m.h_20() * (m.h_01() * m.h_12() - m.h_11() * m.h_02()) +
+           -m.h_21() * (m.h_00() * m.h_12() - m.h_10() * m.h_02()) +
+           1.0f * (m.h_00() * m.h_11() - m.h_10() * m.h_01());
+  }
+
+  static AffineModel ProjectToAffine(const Homography& model, float frame_width,
+                                     float frame_height);
+};
+
+// Common algorithms implemented using corresponding ModelAdapter.
+// Implemented in cc file, explicitly instantiated below.
+template <class Model>
+class ModelMethods {
+  typedef ModelAdapter<Model> Adapter;
+
+ public:
+  // Returns _normalized_ intersection area of rectangle transformed by
+  // model_1 and model_2, respectively.
+  static float NormalizedIntersectionArea(const Model& model_1,
+                                          const Model& model_2,
+                                          const Vector2_f& rect);
+};
+
+// Traits to bind mixture and corresponding base model together.
+template <class BaseModel, class MixtureModel>
+struct MixtureTraits {
+  typedef BaseModel base_model;
+  typedef MixtureModel model;
+};
+
+typedef MixtureTraits<LinearSimilarityModel, MixtureLinearSimilarity>
+    LinearSimilarityTraits;
+typedef MixtureTraits<AffineModel, MixtureAffine> AffineTraits;
+typedef MixtureTraits<Homography, MixtureHomography> HomographyTraits;
+
+template <class MixtureTraits>
+class MixtureModelAdapterBase {
+ public:
+  typedef typename MixtureTraits::model MixtureModel;
+  typedef typename MixtureTraits::base_model BaseModel;
+  typedef ModelAdapter<BaseModel> BaseModelAdapter;
+
+  // Initializes a model from vector to data. All weights are set to one.
+  // If identity_parametrization is set, it is assumed that for args = 0 ->
+  // Model = identity, else args = 0 -> zero transform.
+  // Adjacent models are assumed to be separated by
+  // NumParameters() + skip elements.
+  static MixtureModel FromFloatPointer(const float* args,
+                                       bool identity_parametrization, int skip,
+                                       int num_models);
+  static MixtureModel FromDoublePointer(const double* args,
+                                        bool identity_parametrization, int skip,
+                                        int num_models);
+
+  // Mixture models are not closed under composition and inversion.
+  // Instead, each point has to be transformed via above functions.
+  // However, a mixture model can be composed with a BaseModel from either left
+  // or right, by component-wise composition.
+  // Returns mixture_model * base_model.
+  static MixtureModel ComposeRight(const MixtureModel& mixture_model,
+                                   const BaseModel& base_model);
+
+  // Returns base_model * mixture_model.
+  static MixtureModel ComposeLeft(const MixtureModel& mixture_model,
+                                  const BaseModel& base_model);
+
+  // Debugging function to create plots. Output parameters separated by delim.
+  static std::string ToString(const MixtureModel& model, std::string delim);
+
+  // Returns total number of DOF (number of models * BaseModel DOF)
+  static int NumParameters(const MixtureModel& model) {
+    return model.model_size() * BaseModelAdapter::NumParameters();
+  }
+
+  // Access parameters in a model agnostic way. Order is the order of
+  // specification in the corresponding motion_models.proto file, i.e.
+  // id = proto_id - 1.
+  static float GetParameter(const MixtureModel& model, int model_id,
+                            int param_id);
+
+  static void SetParameter(int model_id, int param_id, float value,
+                           MixtureModel* model);
+
+  static MixtureModel IdentityModel(int num_mixtures);
+
+  // Returns average model across mixture, i.e. mean of each parameter across
+  // the mixture.
+  static BaseModel MeanModel(const MixtureModel& model);
+
+  // Fits a linear model to each parameter across mixture and returns mixture
+  // evaluated across line.
+  static MixtureModel LinearModel(const MixtureModel& model);
+
+  static MixtureModel Embed(const BaseModel& base_model, int num_mixtures);
+};
+
+class MixtureRowWeights;
+
+template <class MixtureTraits>
+class MixtureModelAdapter : public MixtureModelAdapterBase<MixtureTraits> {
+ public:
+  typedef typename MixtureModelAdapterBase<MixtureTraits>::MixtureModel
+      MixtureModel;
+  typedef typename MixtureModelAdapterBase<MixtureTraits>::BaseModel BaseModel;
+  typedef ModelAdapter<BaseModel> BaseModelAdapter;
+
+  // Returns convex combination of models from supplied mixture_model,
+  // specifically:
+  // \sum_i mixture_model.model(i) * weights[i]
+  // where:
+  // b) weights[i] need to be normalized to sum to one.
+  static BaseModel ToBaseModel(const MixtureModel& mixture_model,
+                               const float* weights);
+
+  // Transforms points according ToBaseModel(model, weights) * pt;
+  // Note: Weights need to sum to one (not checked).
+  static Vector2_f TransformPoint(const MixtureModel& model,
+                                  const float* weights, const Vector2_f& pt);
+
+  static Vector2_f TransformPoint(const MixtureModel& model,
+                                  const MixtureRowWeights& weights,
+                                  const Vector2_f& pt);
+
+  // Transforms / solves for points according to
+  // ToBaseModel(model, weights)^(-1) * pt
+  // Fails with CHECK if model is not invertible.
+  // Note: Weights need to sum to one (not checked).
+  static Vector2_f SolveForPoint(const MixtureModel& model,
+                                 const float* weights, const Vector2_f& pt);
+
+  // Same as above, indicating if model is invertible in parameter
+  // success. If model is not invertible, passed parameter
+  // pt is returned unchanged.
+  static Vector2_f SolveForPointChecked(const MixtureModel& model,
+                                        const float* weights,
+                                        const Vector2_f& pt, bool* success);
+};
+
+// Re-implemented for speed benefits.
+template <>
+class MixtureModelAdapter<HomographyTraits>
+    : public MixtureModelAdapterBase<HomographyTraits> {
+ public:
+  inline static Homography ToBaseModel(const MixtureHomography& model,
+                                       const float* weights);
+  inline static Vector2_f TransformPoint(const MixtureHomography& model,
+                                         const float* weights,
+                                         const Vector2_f& pt);
+  // Overload. OK as only input format for weights changed.
+  inline static Vector2_f TransformPoint(const MixtureHomography& model,
+                                         const MixtureRowWeights& weights,
+                                         const Vector2_f& pt);
+
+  inline static Vector2_f SolveForPoint(const MixtureHomography& model,
+                                        const float* weights,
+                                        const Vector2_f& pt);
+
+  inline static Vector2_f SolveForPointChecked(const MixtureModel& model,
+                                               const float* weights,
+                                               const Vector2_f& pt,
+                                               bool* success);
+};
+
+// Compositing for multiple models in order of argument list.
+template <class Model>
+Model ModelCompose2(const Model& a, const Model& b) {
+  typedef ModelAdapter<Model> Adapter;
+  return Adapter::Compose(a, b);
+}
+
+template <class Model>
+Model ModelCompose3(const Model& a, const Model& b, const Model& c) {
+  typedef ModelAdapter<Model> Adapter;
+  return Adapter::Compose(a, Adapter::Compose(b, c));
+}
+
+template <class Model>
+Model ModelCompose4(const Model& a, const Model& b, const Model& c,
+                    const Model& d) {
+  typedef ModelAdapter<Model> Adapter;
+  return Adapter::Compose(a, Adapter::Compose(b, Adapter::Compose(c, d)));
+}
+
+template <class Model>
+Model ModelInvert(const Model& model) {
+  typedef ModelAdapter<Model> Adapter;
+  return Adapter::Invert(model);
+}
+
+// Returns model according to b^(-1) * a
+template <class Model>
+Model ModelDiff(const Model& a, const Model& b) {
+  typedef ModelAdapter<Model> Adapter;
+  return Adapter::Compose(Adapter::Invert(b), a);
+}
+
+template <class Model>
+Model ModelDiffChecked(const Model& a, const Model& b, bool* success) {
+  typedef ModelAdapter<Model> Adapter;
+  Model b_inv = Adapter::InvertChecked(b, success);
+  return Adapter::Compose(b_inv, a);
+}
+
+template <class Model>
+Vector2_f TransformPoint(const Model& m, const Vector2_f& v) {
+  return ModelAdapter<Model>::TransformPoint(m, v);
+}
+
+// Epsilon threshold for determinant. Below this threshold we consider
+// the linear model to be non-invertible.
+const float kDetInvertibleEps = 1e-10;
+
+// Threshold for stability. Used to determine if a particular motion model
+// is invertible AND likely to be stable after inversion (imposes higher
+// threshold on determinant than just for invertibility).
+const float kDetStableEps = 1e-2;
+
+template <class Model>
+bool IsInverseStable(const Model& model) {
+  return ModelAdapter<Model>::Determinant(model) > kDetStableEps;
+}
+
+// Accumulates camera motions in accum:
+// If motions for frames 1..N are: F1, F2, .. FN, where Fk is the motion that
+// maps frame k to k-1 (backwards motion), then cumulative motion mapping frame
+// N to 0 is:
+// C = F1 * F2 * ... * FN.
+//
+// This function computes it recursively: C(k) = C(k-1) * Fk.
+template <class Model>
+void AccumulateModel(const Model& model, Model* accum) {
+  *accum = ModelCompose2(*accum, model);
+}
+
+// Accumulates inverse camera motions in accum_inverted:
+// We want to compute the inverse motion that maps frame 0 to frame N:
+// C^-1 = FN^-1 *  .... * F2^-1 * F1^-1.
+// (inverse of C defined above for AccumulateModel).
+//
+// This function computes it recursively: C(k)^-1 = Fk^-1 * C(k-1)^-1.
+//
+// Return value indicates accumulation was successful (it might fail if model
+// is not invertible), otherwise accum_inverted is left unchanged.
+template <class Model>
+bool AccumulateInvertedModel(const Model& model, Model* accum_inverted) {
+  bool success = true;
+  const Model inv_model = ModelAdapter<Model>::InvertChecked(model, &success);
+  if (success) {
+    *accum_inverted = ModelCompose2(inv_model, *accum_inverted);
+  }
+  return success;
+}
+
+// Returns true if |predicted * ground_truth^(-1)| < bounds (element wise).
+// Use UniformModel to initialize bounds.
+template <class Model>
+bool ModelDiffWithinBounds(const Model& ground_truth, const Model& predicted,
+                           const Model& bounds) {
+  Model diff = ModelAdapter<Model>::Compose(
+      predicted, ModelAdapter<Model>::Invert(ground_truth));
+  Model identity;
+  for (int p = 0; p < ModelAdapter<Model>::NumParameters(); ++p) {
+    const float bound = ModelAdapter<Model>::GetParameter(bounds, p);
+    const float diff_p = fabs(ModelAdapter<Model>::GetParameter(diff, p) -
+                              ModelAdapter<Model>::GetParameter(identity, p));
+
+    if (diff_p > bound) {
+      LOG(WARNING) << "Param diff " << p << " out of bounds: " << diff_p
+                   << " > " << bound << " bound";
+      return false;
+    }
+  }
+  return true;
+}
+
+// Returns true if model is identity within floating point accuracy.
+template <class Model>
+bool IsModelIdentity(const Model& model) {
+  Model identity;
+  for (int p = 0; p < ModelAdapter<Model>::NumParameters(); ++p) {
+    const float diff_p = fabs(ModelAdapter<Model>::GetParameter(model, p) -
+                              ModelAdapter<Model>::GetParameter(identity, p));
+
+    if (diff_p > 1e-6f) {
+      return false;
+    }
+  }
+  return true;
+}
+
+// Expresses input model M w.r.t. new domain given by LinearSimilarityTransform
+// (scale  0      0
+//  0      scale  0) := S -> S M S^(-1)
+template <class Model>
+Model CoordinateTransform(const Model& model, float scale) {
+  return CoordinateTransform(
+      model, ModelAdapter<LinearSimilarityModel>::FromArgs(0, 0, scale, 0));
+}
+
+// For model M and similarity S returns
+// S * M * S^(-1).
+template <class Model>
+Model CoordinateTransform(const Model& model,
+                          const LinearSimilarityModel& similarity) {
+  return ModelCompose3(ModelAdapter<Model>::Embed(similarity), model,
+                       ModelAdapter<Model>::Embed(ModelInvert(similarity)));
+}
+
+// Returns a model with all parameters set to value.
+template <class Model>
+Model UniformModelParameters(const float value) {
+  std::array<float, ModelAdapter<Model>::NumParameters()> params;
+  params.fill(value);
+  return ModelAdapter<Model>::FromFloatPointer(params.data(), false);
+}
+
+// Returns a blended model: a * (1 - weight_b) + b * weight_b.
+// Assumes 0 <= weight_b <= 1.
+// Note that blending the homographies is a non-linear operation if the
+// intention is to obtain a transform that blends the points transformed by
+// a and b. However, this is a linear approximation, which ignores the
+// perspective division, and simply blends the coefficients.
+template <class Model>
+Model BlendModels(const Model& a, const Model& b, float weight_b) {
+  Model blended;
+  DCHECK_GE(weight_b, 0);
+  DCHECK_LE(weight_b, 1);
+  const float weight_a = 1 - weight_b;
+  for (int p = 0; p < ModelAdapter<Model>::NumParameters(); ++p) {
+    const float pa = ModelAdapter<Model>::GetParameter(a, p);
+    const float pb = ModelAdapter<Model>::GetParameter(b, p);
+    ModelAdapter<Model>::SetParameter(p, pa * weight_a + pb * weight_b,
+                                      &blended);
+  }
+  return blended;
+}
+
+template <class Model>
+std::string ModelToString(const Model& model) {
+  return ModelAdapter<Model>::ToString(model);
+}
+
+// Typedef's.
+typedef ModelAdapter<TranslationModel> TranslationAdapter;
+typedef ModelAdapter<SimilarityModel> SimilarityAdapter;
+typedef ModelAdapter<LinearSimilarityModel> LinearSimilarityAdapter;
+typedef ModelAdapter<AffineModel> AffineAdapter;
+typedef ModelAdapter<Homography> HomographyAdapter;
+
+typedef ModelMethods<TranslationModel> TranslationMethods;
+typedef ModelMethods<SimilarityModel> SimilarityMethods;
+typedef ModelMethods<LinearSimilarityModel> LinearSimilarityMethods;
+typedef ModelMethods<AffineModel> AffineMethods;
+typedef ModelMethods<Homography> HomographyMethods;
+
+typedef MixtureModelAdapter<LinearSimilarityTraits>
+    MixtureLinearSimilarityAdapter;
+typedef MixtureModelAdapter<AffineTraits> MixtureAffineAdapter;
+typedef MixtureModelAdapter<HomographyTraits> MixtureHomographyAdapter;
+
+// Stores pre-computed normalized mixture weights.
+// Weights are computed for each scanline,
+// based on gaussian weighting of y-location to mid-points for each model
+// (specified in scanlines!).
+// We use even spacing between mid-points by default.
+// By supplying a y_scale != 1.f, normalized coordinates can be used as input.
+// Possible unnormalized y values for RowWeigts are
+// [-margin, frame_height + margin).
+class MixtureRowWeights {
+ public:
+  MixtureRowWeights(int frame_height, int margin, float sigma, float y_scale,
+                    int num_models);
+  int NumModels() const { return num_models_; }
+  float YScale() const { return y_scale_; }
+  float Sigma() const { return sigma_; }
+
+  // Test if MixtureRowWeights should be re-initialized (call constructor
+  // again), based on changed options.
+  bool NeedsInitialization(int num_models, float sigma, float y_scale) const {
+    return (num_models != num_models_ || fabs(sigma - sigma_) > 1e-6f ||
+            fabs(y_scale - y_scale_) > 1e-6f);
+  }
+
+  const float* RowWeights(float y) const {
+    int bin_y = y * y_scale_ + 0.5;
+    DCHECK_LT(bin_y, frame_height_ + margin_);
+    DCHECK_GE(bin_y, -margin_);
+    return &weights_[(bin_y + margin_) * num_models_];
+  }
+
+  // Same as above but clamps parameter y to be within interval
+  // (-margin, frame_height + margin).
+  const float* RowWeightsClamped(float y) const {
+    int bin_y = y * y_scale_ + 0.5;
+    bin_y = std::max(-margin_, std::min(frame_height_ - 1 + margin_, bin_y));
+    return &weights_[(bin_y + margin_) * num_models_];
+  }
+
+  // Returns weight threshold for fractional block distance, e.g.
+  // parameter 1.5f returns row weight at 1.5f * block_height from block center.
+  float WeightThreshold(float frac_blocks);
+
+ private:
+  int frame_height_;
+  float y_scale_;
+  int margin_;
+  float sigma_;
+  int num_models_;
+
+  std::vector<int> mid_points_;
+  std::vector<float> weights_;
+};
+
+// Returns pointer (caller takes ownership) of initialized MixtureRowWeights.
+inline MixtureRowWeights* MixtureRowWeightsFromCameraMotion(
+    const CameraMotion& camera_motion, int frame_height) {
+  return new MixtureRowWeights(frame_height,
+                               0,  // no margin.
+                               camera_motion.mixture_row_sigma(), 1.0,
+                               camera_motion.mixture_homography().model_size());
+}
+
+// Performs element wise smoothing of input models with per parameter sigma's
+// in time (and optionally bilateral). Parameters of optional
+// model_sigma that are NOT 0 are interpreted as bilateral smoothing sigma.
+// Use UniformModelParameters to set all value of sigma_time to the same sigma.
+template <class Model>
+void SmoothModels(const Model& sigma_time_model, const Model* model_sigma,
+                  std::vector<Model>* models) {
+  CHECK(models);
+
+  const int num_models = models->size();
+
+  std::vector<std::vector<float>> smoothed_model_data(num_models);
+
+  for (int param = 0; param < ModelAdapter<Model>::NumParameters(); ++param) {
+    const float sigma_time =
+        ModelAdapter<Model>::GetParameter(sigma_time_model, param);
+
+    if (sigma_time == 0) {
+      // Don't perform any smoothing, just copy.
+      for (int i = 0; i < num_models; ++i) {
+        smoothed_model_data[i].push_back(
+            ModelAdapter<Model>::GetParameter((*models)[i], param));
+      }
+      continue;
+    }
+
+    // Create lookup table for frame weights.
+    const int frame_radius =
+        std::min<int>(num_models - 1, std::ceil(sigma_time * 1.5f));
+    const int frame_diameter = 2 * frame_radius + 1;
+
+    // Create lookup table for weights.
+    std::vector<float> frame_weights(frame_diameter);
+    const float frame_coeff = -0.5f / (sigma_time * sigma_time);
+
+    int frame_idx = 0;
+    for (int i = -frame_radius; i <= frame_radius; ++i, ++frame_idx) {
+      frame_weights[frame_idx] = std::exp(frame_coeff * i * i);
+    }
+
+    // Create local copy with border.
+    std::vector<float> param_path(num_models + 2 * frame_radius);
+
+    const float param_sigma =
+        model_sigma != nullptr
+            ? ModelAdapter<Model>::GetParameter(*model_sigma, param)
+            : 0;
+    const float param_sigma_denom =
+        param_sigma != 0 ? (-0.5f / (param_sigma * param_sigma)) : 0;
+
+    for (int model_idx = 0; model_idx < num_models; ++model_idx) {
+      param_path[model_idx + frame_radius] =
+          ModelAdapter<Model>::GetParameter((*models)[model_idx], param);
+    }
+
+    // Copy right.
+    std::copy(param_path.rbegin() + frame_radius,
+              param_path.rbegin() + 2 * frame_radius,
+              param_path.end() - frame_radius);
+
+    // Copy left.
+    std::copy(param_path.begin() + frame_radius,
+              param_path.begin() + 2 * frame_radius,
+              param_path.rend() - frame_radius);
+
+    // Apply filter.
+    for (int i = 0; i < num_models; ++i) {
+      float value_sum = 0;
+      float weight_sum = 0;
+      const float curr_value = param_path[i + frame_radius];
+
+      for (int k = 0; k < frame_diameter; ++k) {
+        const float value = param_path[i + k];
+        float weight = frame_weights[k];
+        if (param_sigma != 0) {
+          // Bilateral filtering.
+          const float value_diff = curr_value - value;
+          weight *= std::exp(value_diff * value_diff * param_sigma_denom);
+        }
+        weight_sum += weight;
+        value_sum += value * weight;
+      }
+
+      // Weight_sum is always > 0, as sigma is > 0.
+      smoothed_model_data[i].push_back(value_sum / weight_sum);
+    }
+  }
+
+  for (int i = 0; i < num_models; ++i) {
+    (*models)[i].CopyFrom(ModelAdapter<Model>::FromFloatPointer(
+        &smoothed_model_data[i][0], false));
+  }
+}
+
+// Inline implementations.
+
+// Translation model.
+inline TranslationModel ModelAdapter<TranslationModel>::FromArgs(float dx,
+                                                                 float dy) {
+  TranslationModel model;
+  model.set_dx(dx);
+  model.set_dy(dy);
+  return model;
+}
+
+inline TranslationModel ModelAdapter<TranslationModel>::FromFloatPointer(
+    const float* args, bool) {
+  DCHECK(args);
+  TranslationModel model;
+  model.set_dx(args[0]);
+  model.set_dy(args[1]);
+  return model;
+}
+
+inline TranslationModel ModelAdapter<TranslationModel>::FromDoublePointer(
+    const double* args, bool) {
+  DCHECK(args);
+  TranslationModel model;
+  model.set_dx(args[0]);
+  model.set_dy(args[1]);
+  return model;
+}
+
+inline Vector2_f ModelAdapter<TranslationModel>::TransformPoint(
+    const TranslationModel& model, const Vector2_f& pt) {
+  return Vector2_f(pt.x() + model.dx(), pt.y() + model.dy());
+}
+
+inline TranslationModel ModelAdapter<TranslationModel>::Invert(
+    const TranslationModel& model) {
+  bool success = true;
+  TranslationModel result = InvertChecked(model, &success);
+  if (!success) {
+    LOG(ERROR) << "Model not invertible. Returning identity.";
+    return TranslationModel();
+  }
+
+  return result;
+}
+
+inline TranslationModel ModelAdapter<TranslationModel>::InvertChecked(
+    const TranslationModel& model, bool* success) {
+  TranslationModel inv_model;
+  inv_model.set_dx(-model.dx());
+  inv_model.set_dy(-model.dy());
+  *success = true;
+  return inv_model;
+}
+
+inline TranslationModel ModelAdapter<TranslationModel>::Compose(
+    const TranslationModel& lhs, const TranslationModel& rhs) {
+  TranslationModel result;
+  result.set_dx(lhs.dx() + rhs.dx());
+  result.set_dy(lhs.dy() + rhs.dy());
+  return result;
+}
+
+inline float ModelAdapter<TranslationModel>::GetParameter(
+    const TranslationModel& model, int id) {
+  switch (id) {
+    case 0:
+      return model.dx();
+    case 1:
+      return model.dy();
+    default:
+      LOG(FATAL) << "Parameter id is out of bounds";
+  }
+  return 0;
+}
+
+inline void ModelAdapter<TranslationModel>::SetParameter(
+    int id, float value, TranslationModel* model) {
+  switch (id) {
+    case 0:
+      return model->set_dx(value);
+    case 1:
+      return model->set_dy(value);
+    default:
+      LOG(FATAL) << "Parameter id is out of bounds";
+  }
+}
+
+// Linear Similarity model.
+inline LinearSimilarityModel ModelAdapter<LinearSimilarityModel>::FromArgs(
+    float dx, float dy, float a, float b) {
+  LinearSimilarityModel model;
+  model.set_dx(dx);
+  model.set_dy(dy);
+  model.set_a(a);
+  model.set_b(b);
+  return model;
+}
+
+inline LinearSimilarityModel
+ModelAdapter<LinearSimilarityModel>::FromFloatPointer(
+    const float* args, bool identity_parametrization) {
+  DCHECK(args);
+  LinearSimilarityModel model;
+  const float id_shift = identity_parametrization ? 1.f : 0.f;
+  model.set_dx(args[0]);
+  model.set_dy(args[1]);
+  model.set_a(id_shift + args[2]);
+  model.set_b(args[3]);
+  return model;
+}
+
+inline LinearSimilarityModel
+ModelAdapter<LinearSimilarityModel>::FromDoublePointer(
+    const double* args, bool identity_parametrization) {
+  DCHECK(args);
+  LinearSimilarityModel model;
+  const float id_shift = identity_parametrization ? 1.f : 0.f;
+  model.set_dx(args[0]);
+  model.set_dy(args[1]);
+  model.set_a(id_shift + args[2]);
+  model.set_b(args[3]);
+  return model;
+}
+
+inline Vector2_f ModelAdapter<LinearSimilarityModel>::TransformPoint(
+    const LinearSimilarityModel& model, const Vector2_f& pt) {
+  return Vector2_f(model.a() * pt.x() - model.b() * pt.y() + model.dx(),
+                   model.b() * pt.x() + model.a() * pt.y() + model.dy());
+}
+
+inline LinearSimilarityModel ModelAdapter<LinearSimilarityModel>::Invert(
+    const LinearSimilarityModel& model) {
+  bool success = true;
+  LinearSimilarityModel result = InvertChecked(model, &success);
+  if (!success) {
+    LOG(ERROR) << "Model not invertible. Returning identity.";
+    return LinearSimilarityModel();
+  } else {
+    return result;
+  }
+}
+
+inline LinearSimilarityModel ModelAdapter<LinearSimilarityModel>::InvertChecked(
+    const LinearSimilarityModel& model, bool* success) {
+  LinearSimilarityModel inv_model;
+
+  const float det = model.a() * model.a() + model.b() * model.b();
+  if (fabs(det) < kDetInvertibleEps) {
+    *success = false;
+    VLOG(1) << "Model is not invertible, det is zero.";
+    return LinearSimilarityModel();
+  }
+
+  *success = true;
+  const float inv_det = 1.0 / det;
+
+  inv_model.set_a(model.a() * inv_det);
+  inv_model.set_b(-model.b() * inv_det);
+
+  // Inverse translation is -A^(-1) * [dx dy].
+  inv_model.set_dx(-(inv_model.a() * model.dx() - inv_model.b() * model.dy()));
+  inv_model.set_dy(-(inv_model.b() * model.dx() + inv_model.a() * model.dy()));
+
+  return inv_model;
+}
+
+inline LinearSimilarityModel ModelAdapter<LinearSimilarityModel>::Compose(
+    const LinearSimilarityModel& lhs, const LinearSimilarityModel& rhs) {
+  LinearSimilarityModel result;
+  result.set_a(lhs.a() * rhs.a() - lhs.b() * rhs.b());
+  result.set_b(lhs.a() * rhs.b() + lhs.b() * rhs.a());
+
+  result.set_dx(lhs.a() * rhs.dx() - lhs.b() * rhs.dy() + lhs.dx());
+  result.set_dy(lhs.b() * rhs.dx() + lhs.a() * rhs.dy() + lhs.dy());
+  return result;
+}
+
+inline float ModelAdapter<LinearSimilarityModel>::GetParameter(
+    const LinearSimilarityModel& model, int id) {
+  switch (id) {
+    case 0:
+      return model.dx();
+    case 1:
+      return model.dy();
+    case 2:
+      return model.a();
+    case 3:
+      return model.b();
+    default:
+      LOG(FATAL) << "Parameter id is out of bounds";
+  }
+
+  return 0;
+}
+
+inline void ModelAdapter<LinearSimilarityModel>::SetParameter(
+    int id, float value, LinearSimilarityModel* model) {
+  switch (id) {
+    case 0:
+      return model->set_dx(value);
+    case 1:
+      return model->set_dy(value);
+    case 2:
+      return model->set_a(value);
+    case 3:
+      return model->set_b(value);
+    default:
+      LOG(FATAL) << "Parameter id is out of bounds";
+  }
+}
+
+// Affine model.
+inline AffineModel ModelAdapter<AffineModel>::FromArgs(float dx, float dy,
+                                                       float a, float b,
+                                                       float c, float d) {
+  AffineModel model;
+  model.set_dx(dx);
+  model.set_dy(dy);
+  model.set_a(a);
+  model.set_b(b);
+  model.set_c(c);
+  model.set_d(d);
+  return model;
+}
+
+inline AffineModel ModelAdapter<AffineModel>::FromFloatPointer(
+    const float* args, bool identity_parametrization) {
+  DCHECK(args);
+  AffineModel model;
+  const float id_shift = identity_parametrization ? 1.f : 0.f;
+  model.set_dx(args[0]);
+  model.set_dy(args[1]);
+  model.set_a(id_shift + args[2]);
+  model.set_b(args[3]);
+  model.set_c(args[4]);
+  model.set_d(id_shift + args[5]);
+  return model;
+}
+
+inline AffineModel ModelAdapter<AffineModel>::FromDoublePointer(
+    const double* args, bool identity_parametrization) {
+  DCHECK(args);
+  AffineModel model;
+  const float id_shift = identity_parametrization ? 1.f : 0.f;
+  model.set_dx(args[0]);
+  model.set_dy(args[1]);
+  model.set_a(id_shift + args[2]);
+  model.set_b(args[3]);
+  model.set_c(args[4]);
+  model.set_d(id_shift + args[5]);
+  return model;
+}
+
+inline Vector2_f ModelAdapter<AffineModel>::TransformPoint(
+    const AffineModel& model, const Vector2_f& pt) {
+  return Vector2_f(model.a() * pt.x() + model.b() * pt.y() + model.dx(),
+                   model.c() * pt.x() + model.d() * pt.y() + model.dy());
+}
+
+// Use of Invert is discouraged, always use InvertChecked.
+inline AffineModel ModelAdapter<AffineModel>::Invert(const AffineModel& model) {
+  bool success = true;
+  AffineModel result = InvertChecked(model, &success);
+  if (!success) {
+    LOG(ERROR) << "Model not invertible. Returning identity.";
+    return AffineModel();
+  } else {
+    return result;
+  }
+}
+
+inline AffineModel ModelAdapter<AffineModel>::InvertChecked(
+    const AffineModel& model, bool* success) {
+  AffineModel inv_model;
+  const float det = model.a() * model.d() - model.b() * model.c();
+  if (fabs(det) < kDetInvertibleEps) {
+    *success = false;
+    VLOG(1) << "Model is not invertible, det is zero.";
+    return AffineModel();
+  }
+
+  *success = true;
+  const float inv_det = 1.0 / det;
+
+  inv_model.set_a(model.d() * inv_det);
+  inv_model.set_d(model.a() * inv_det);
+  inv_model.set_c(-model.c() * inv_det);
+  inv_model.set_b(-model.b() * inv_det);
+
+  // Inverse translation is -A^(-1) * [dx dy].
+  inv_model.set_dx(-(inv_model.a() * model.dx() + inv_model.b() * model.dy()));
+  inv_model.set_dy(-(inv_model.c() * model.dx() + inv_model.d() * model.dy()));
+
+  return inv_model;
+}
+
+inline AffineModel ModelAdapter<AffineModel>::Compose(const AffineModel& lhs,
+                                                      const AffineModel& rhs) {
+  AffineModel result;
+  result.set_a(lhs.a() * rhs.a() + lhs.b() * rhs.c());
+  result.set_b(lhs.a() * rhs.b() + lhs.b() * rhs.d());
+  result.set_c(lhs.c() * rhs.a() + lhs.d() * rhs.c());
+  result.set_d(lhs.c() * rhs.b() + lhs.d() * rhs.d());
+
+  result.set_dx(lhs.a() * rhs.dx() + lhs.b() * rhs.dy() + lhs.dx());
+  result.set_dy(lhs.c() * rhs.dx() + lhs.d() * rhs.dy() + lhs.dy());
+  return result;
+}
+
+inline float ModelAdapter<AffineModel>::GetParameter(const AffineModel& model,
+                                                     int id) {
+  switch (id) {
+    case 0:
+      return model.dx();
+    case 1:
+      return model.dy();
+    case 2:
+      return model.a();
+    case 3:
+      return model.b();
+    case 4:
+      return model.c();
+    case 5:
+      return model.d();
+    default:
+      LOG(FATAL) << "Parameter id is out of bounds";
+  }
+
+  return 0;
+}
+
+inline void ModelAdapter<AffineModel>::SetParameter(int id, float value,
+                                                    AffineModel* model) {
+  switch (id) {
+    case 0:
+      return model->set_dx(value);
+    case 1:
+      return model->set_dy(value);
+    case 2:
+      return model->set_a(value);
+    case 3:
+      return model->set_b(value);
+    case 4:
+      return model->set_c(value);
+    case 5:
+      return model->set_d(value);
+    default:
+      LOG(FATAL) << "Parameter id is out of bounds";
+  }
+}
+
+// Homography model.
+inline Homography ModelAdapter<Homography>::FromArgs(float h_00, float h_01,
+                                                     float h_02, float h_10,
+                                                     float h_11, float h_12,
+                                                     float h_20, float h_21) {
+  Homography model;
+  model.set_h_00(h_00);
+  model.set_h_01(h_01);
+  model.set_h_02(h_02);
+  model.set_h_10(h_10);
+  model.set_h_11(h_11);
+  model.set_h_12(h_12);
+  model.set_h_20(h_20);
+  model.set_h_21(h_21);
+  return model;
+}
+
+inline Homography ModelAdapter<Homography>::FromFloatPointer(
+    const float* args, bool identity_parametrization) {
+  DCHECK(args);
+  Homography model;
+  const float id_shift = identity_parametrization ? 1.f : 0.f;
+  model.set_h_00(id_shift + args[0]);
+  model.set_h_01(args[1]);
+  model.set_h_02(args[2]);
+  model.set_h_10(args[3]);
+  model.set_h_11(id_shift + args[4]);
+  model.set_h_12(args[5]);
+  model.set_h_20(args[6]);
+  model.set_h_21(args[7]);
+  return model;
+}
+
+inline Homography ModelAdapter<Homography>::FromDoublePointer(
+    const double* args, bool identity_parametrization) {
+  DCHECK(args);
+  Homography model;
+  const float id_shift = identity_parametrization ? 1.f : 0.f;
+  model.set_h_00(id_shift + args[0]);
+  model.set_h_01(args[1]);
+  model.set_h_02(args[2]);
+  model.set_h_10(args[3]);
+  model.set_h_11(id_shift + args[4]);
+  model.set_h_12(args[5]);
+  model.set_h_20(args[6]);
+  model.set_h_21(args[7]);
+  return model;
+}
+
+inline Vector2_f ModelAdapter<Homography>::TransformPoint(
+    const Homography& model, const Vector2_f& pt) {
+  const float x = model.h_00() * pt.x() + model.h_01() * pt.y() + model.h_02();
+  const float y = model.h_10() * pt.x() + model.h_11() * pt.y() + model.h_12();
+  float z = model.h_20() * pt.x() + model.h_21() * pt.y() + 1.0f;
+
+  if (z != 1.f) {
+    // Enforce z can not assume very small values.
+    constexpr float eps = 1e-12f;
+    if (fabs(z) < eps) {
+      LOG(ERROR) << "Point mapped to infinity. "
+                 << "Degenerate homography. See proto.";
+      z = z >= 0 ? eps : -eps;
+    }
+    return Vector2_f(x / z, y / z);
+  } else {
+    return Vector2_f(x, y);
+  }
+}
+
+inline Vector3_f ModelAdapter<Homography>::TransformPoint3(
+    const Homography& model, const Vector3_f& pt) {
+  return Vector3_f(
+      model.h_00() * pt.x() + model.h_01() * pt.y() + model.h_02() * pt.z(),
+      model.h_10() * pt.x() + model.h_11() * pt.y() + model.h_12() * pt.z(),
+      model.h_20() * pt.x() + model.h_21() * pt.y() + pt.z());
+}
+
+inline Homography ModelAdapter<Homography>::Invert(const Homography& model) {
+  bool success = true;
+  Homography result = InvertChecked(model, &success);
+  if (!success) {
+    LOG(ERROR) << "Model not invertible. Returning identity.";
+    return Homography();
+  } else {
+    return result;
+  }
+}
+
+inline Homography ModelAdapter<Homography>::Compose(const Homography& lhs,
+                                                    const Homography& rhs) {
+  Homography result;
+  const float z =
+      lhs.h_20() * rhs.h_02() + lhs.h_21() * rhs.h_12() + 1.0f * 1.0f;
+  CHECK_NE(z, 0) << "Degenerate homography. See proto.";
+  const float inv_z = 1.0 / z;
+
+  result.set_h_00((lhs.h_00() * rhs.h_00() + lhs.h_01() * rhs.h_10() +
+                   lhs.h_02() * rhs.h_20()) *
+                  inv_z);
+  result.set_h_01((lhs.h_00() * rhs.h_01() + lhs.h_01() * rhs.h_11() +
+                   lhs.h_02() * rhs.h_21()) *
+                  inv_z);
+  result.set_h_02(
+      (lhs.h_00() * rhs.h_02() + lhs.h_01() * rhs.h_12() + lhs.h_02() * 1.0f) *
+      inv_z);
+
+  result.set_h_10((lhs.h_10() * rhs.h_00() + lhs.h_11() * rhs.h_10() +
+                   lhs.h_12() * rhs.h_20()) *
+                  inv_z);
+  result.set_h_11((lhs.h_10() * rhs.h_01() + lhs.h_11() * rhs.h_11() +
+                   lhs.h_12() * rhs.h_21()) *
+                  inv_z);
+  result.set_h_12(
+      (lhs.h_10() * rhs.h_02() + lhs.h_11() * rhs.h_12() + lhs.h_12() * 1.0f) *
+      inv_z);
+
+  result.set_h_20(
+      (lhs.h_20() * rhs.h_00() + lhs.h_21() * rhs.h_10() + 1.0f * rhs.h_20()) *
+      inv_z);
+  result.set_h_21(
+      (lhs.h_20() * rhs.h_01() + lhs.h_21() * rhs.h_11() + 1.f * rhs.h_21()) *
+      inv_z);
+  return result;
+}
+
+inline float ModelAdapter<Homography>::GetParameter(const Homography& model,
+                                                    int id) {
+  switch (id) {
+    case 0:
+      return model.h_00();
+    case 1:
+      return model.h_01();
+    case 2:
+      return model.h_02();
+    case 3:
+      return model.h_10();
+    case 4:
+      return model.h_11();
+    case 5:
+      return model.h_12();
+    case 6:
+      return model.h_20();
+    case 7:
+      return model.h_21();
+    default:
+      LOG(FATAL) << "Parameter id is out of bounds";
+  }
+
+  return 0;
+}
+
+inline void ModelAdapter<Homography>::SetParameter(int id, float value,
+                                                   Homography* model) {
+  switch (id) {
+    case 0:
+      return model->set_h_00(value);
+    case 1:
+      return model->set_h_01(value);
+    case 2:
+      return model->set_h_02(value);
+    case 3:
+      return model->set_h_10(value);
+    case 4:
+      return model->set_h_11(value);
+    case 5:
+      return model->set_h_12(value);
+    case 6:
+      return model->set_h_20(value);
+    case 7:
+      return model->set_h_21(value);
+    default:
+      LOG(FATAL) << "Parameter id is out of bounds";
+  }
+}
+
+// MixtureModelAdapterBase implementation.
+template <class MixtureTraits>
+typename MixtureTraits::model
+MixtureModelAdapterBase<MixtureTraits>::FromFloatPointer(
+    const float* args, bool identity_parametrization, int skip,
+    int num_models) {
+  MixtureModel model;
+  const float* arg_ptr = args;
+  for (int i = 0; i < num_models;
+       ++i, arg_ptr += BaseModelAdapter::NumParameters() + skip) {
+    BaseModel base =
+        BaseModelAdapter::FromFloatPointer(arg_ptr, identity_parametrization);
+    model.add_model()->CopyFrom(base);
+  }
+
+  return model;
+}
+
+template <class MixtureTraits>
+typename MixtureTraits::model
+MixtureModelAdapterBase<MixtureTraits>::FromDoublePointer(
+    const double* args, bool identity_parametrization, int skip,
+    int num_models) {
+  MixtureModel model;
+  const double* arg_ptr = args;
+  for (int i = 0; i < num_models;
+       ++i, arg_ptr += BaseModelAdapter::NumParameters() + skip) {
+    BaseModel base =
+        BaseModelAdapter::FromDoublePointer(arg_ptr, identity_parametrization);
+    model.add_model()->CopyFrom(base);
+  }
+
+  return model;
+}
+
+template <class MixtureTraits>
+typename MixtureTraits::model
+MixtureModelAdapterBase<MixtureTraits>::ComposeRight(
+    const MixtureModel& mixture_model, const BaseModel& base_model) {
+  const int num_models = mixture_model.model_size();
+  MixtureModel result;
+  for (int m = 0; m < num_models; ++m) {
+    result.add_model()->CopyFrom(
+        BaseModelAdapter::Compose(mixture_model.model(m), base_model));
+  }
+  return result;
+}
+
+template <class MixtureTraits>
+typename MixtureTraits::model
+MixtureModelAdapterBase<MixtureTraits>::ComposeLeft(
+    const MixtureModel& mixture_model, const BaseModel& base_model) {
+  const int num_models = mixture_model.model_size();
+  MixtureModel result;
+  for (int m = 0; m < num_models; ++m) {
+    result.add_model()->CopyFrom(
+        BaseModelAdapter::Compose(base_model, mixture_model.model(m)));
+  }
+  return result;
+}
+
+template <class MixtureTraits>
+std::string MixtureModelAdapterBase<MixtureTraits>::ToString(
+    const MixtureModel& model, std::string delim) {
+  std::string result = "";
+  for (int m = 0, size = model.model_size(); m < size; ++m) {
+    result +=
+        (m == 0 ? "" : delim) + BaseModelAdapter::ToString(model.model(m));
+  }
+  return result;
+}
+
+template <class MixtureTraits>
+float MixtureModelAdapterBase<MixtureTraits>::GetParameter(
+    const MixtureModel& model, int model_id, int param_id) {
+  return BaseModelAdapter::GetParameter(model.model(model_id), param_id);
+}
+
+template <class MixtureTraits>
+void MixtureModelAdapterBase<MixtureTraits>::SetParameter(int model_id,
+                                                          int param_id,
+                                                          float value,
+                                                          MixtureModel* model) {
+  BaseModelAdapter::SetParameter(param_id, value,
+                                 model->mutable_model(model_id));
+}
+
+template <class MixtureTraits>
+typename MixtureTraits::model
+MixtureModelAdapterBase<MixtureTraits>::IdentityModel(int num_mixtures) {
+  MixtureModel model;
+  for (int i = 0; i < num_mixtures; ++i) {
+    model.add_model();
+  }
+  return model;
+}
+
+template <class MixtureTraits>
+typename MixtureTraits::base_model
+MixtureModelAdapterBase<MixtureTraits>::MeanModel(
+    const MixtureModel& mixture_model) {
+  const int num_models = mixture_model.model_size();
+  if (num_models == 0) {
+    return BaseModel();
+  }
+
+  float params[BaseModelAdapter::NumParameters()];
+  memset(params, 0, sizeof(params[0]) * BaseModelAdapter::NumParameters());
+
+  // Average of models.
+  const float denom = 1.0f / num_models;
+  for (int k = 0; k < BaseModelAdapter::NumParameters(); ++k) {
+    for (int m = 0; m < num_models; ++m) {
+      params[k] += BaseModelAdapter::GetParameter(mixture_model.model(m), k);
+    }
+    params[k] *= denom;
+  }
+
+  return BaseModelAdapter::FromFloatPointer(params, false);
+}
+
+template <class MixtureTraits>
+typename MixtureTraits::model
+MixtureModelAdapterBase<MixtureTraits>::LinearModel(
+    const MixtureModel& mixture_model) {
+  // For each parameter: Fit line param_idx -> param value.
+  const int num_models = mixture_model.model_size();
+  if (num_models <= 1) {
+    return mixture_model;
+  }
+
+  std::vector<float> result(num_models * BaseModelAdapter::NumParameters());
+  const double inv_models = 1.0f / num_models;
+  for (int p = 0; p < BaseModelAdapter::NumParameters(); ++p) {
+    // Calculate sum, sq_sum and inner product.
+    double sum_x = 0.0;
+    double sum_y = 0.0;
+    double sum_xx = 0.0;
+    double sum_yy = 0.0;
+    double sum_xy = 0.0;
+    for (int m = 0; m < num_models; ++m) {
+      const float x = m * inv_models;
+      sum_x += x;
+      sum_xx += x * x;
+      const double y = GetParameter(mixture_model, m, p);
+      sum_y += y;
+      sum_yy += y * y;
+      sum_xy += x * y;
+    }
+
+    const double denom = sum_xx - inv_models * sum_x * sum_x;
+    CHECK_NE(denom, 0);  // As num_models > 1.
+    const double a = (sum_xy - inv_models * sum_x * sum_y) * denom;
+    const double b = inv_models * (sum_y - a * sum_x);
+
+    for (int m = 0; m < num_models; ++m) {
+      const float x = m * inv_models;
+      result[m * BaseModelAdapter::NumParameters() + p] = a * x + b;
+    }
+  }
+  MixtureModel result_model =
+      FromFloatPointer(&result[0], false, 0, num_models);
+
+  return result_model;
+}
+
+template <class MixtureTraits>
+typename MixtureTraits::model MixtureModelAdapterBase<MixtureTraits>::Embed(
+    const BaseModel& base_model, int num_mixtures) {
+  MixtureModel model;
+  for (int i = 0; i < num_mixtures; ++i) {
+    model.add_model()->CopyFrom(base_model);
+  }
+  return model;
+}
+
+// MixtureModelAdapter implementation.
+template <class MixtureTraits>
+typename MixtureModelAdapterBase<MixtureTraits>::BaseModel
+MixtureModelAdapter<MixtureTraits>::ToBaseModel(
+    const MixtureModel& mixture_model, const float* weights) {
+  const int num_models = mixture_model.model_size();
+
+  float params[BaseModelAdapter::NumParameters()];
+  memset(params, 0, sizeof(params[0]) * BaseModelAdapter::NumParameters());
+
+  // Weighted combination of mixture models.
+  for (int m = 0; m < num_models; ++m) {
+    for (int k = 0; k < BaseModelAdapter::NumParameters(); ++k) {
+      params[k] += BaseModelAdapter::GetParameter(mixture_model.model(m), k) *
+                   weights[m];
+    }
+  }
+
+  return BaseModelAdapter::FromFloatPointer(params, false);
+}
+
+template <class MixtureTraits>
+Vector2_f MixtureModelAdapter<MixtureTraits>::TransformPoint(
+    const MixtureModel& model, const float* weights, const Vector2_f& pt) {
+  const int num_models = model.model_size();
+  const Vector3_f pt3(pt.x(), pt.y(), 1.0f);
+  Vector3_f result(0, 0, 0);
+  for (int i = 0; i < num_models; ++i) {
+    result +=
+        BaseModelAdapter::TransformPoint3(model.model(i), pt3 * weights[i]);
+  }
+
+  DCHECK_NE(result.z(), 0) << "Degenerate mapping.";
+  return Vector2_f(result.x() / result.z(), result.y() / result.z());
+}
+
+template <class MixtureTraits>
+Vector2_f MixtureModelAdapter<MixtureTraits>::TransformPoint(
+    const MixtureModel& model, const MixtureRowWeights& weights,
+    const Vector2_f& pt) {
+  return TransformPoint(model, weights.RowWeightsClamped(pt.y()), pt);
+}
+
+template <class MixtureTraits>
+Vector2_f MixtureModelAdapter<MixtureTraits>::SolveForPoint(
+    const MixtureModel& model, const float* weights, const Vector2_f& pt) {
+  BaseModel base_model = ToBaseModel(model, weights);
+  return BaseModelAdapter::TransformPoint(BaseModelAdapter::Invert(base_model),
+                                          pt);
+}
+
+template <class MixtureTraits>
+Vector2_f MixtureModelAdapter<MixtureTraits>::SolveForPointChecked(
+    const MixtureModel& model, const float* weights, const Vector2_f& pt,
+    bool* success) {
+  BaseModel base_model = ToBaseModel(model, weights);
+  BaseModel inv_base_model =
+      BaseModelAdapter::InvertChecked(base_model, success);
+  return BaseModelAdapter::TransformPoint(inv_base_model, pt);
+}
+
+// MixtureModelAdapter<Homography> implementation.
+inline Homography MixtureModelAdapter<HomographyTraits>::ToBaseModel(
+    const MixtureHomography& mixture_model, const float* weights) {
+  const int num_models = mixture_model.model_size();
+
+  float params[8] = {0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f};
+  const Homography& const_homog = mixture_model.model(0);
+
+  // Weighted combination of mixture models.
+  switch (mixture_model.dof()) {
+    case MixtureHomography::ALL_DOF:
+      for (int m = 0; m < num_models; ++m) {
+        params[0] += mixture_model.model(m).h_00() * weights[m];
+        params[1] += mixture_model.model(m).h_01() * weights[m];
+        params[2] += mixture_model.model(m).h_02() * weights[m];
+        params[3] += mixture_model.model(m).h_10() * weights[m];
+        params[4] += mixture_model.model(m).h_11() * weights[m];
+        params[5] += mixture_model.model(m).h_12() * weights[m];
+        params[6] += mixture_model.model(m).h_20() * weights[m];
+        params[7] += mixture_model.model(m).h_21() * weights[m];
+      }
+      break;
+    case MixtureHomography::TRANSLATION_DOF:
+      params[0] = const_homog.h_00();
+      params[1] = const_homog.h_01();
+      params[3] = const_homog.h_10();
+      params[4] = const_homog.h_11();
+      params[6] = const_homog.h_20();
+      params[7] = const_homog.h_21();
+
+      for (int m = 0; m < num_models; ++m) {
+        params[2] += mixture_model.model(m).h_02() * weights[m];
+        params[5] += mixture_model.model(m).h_12() * weights[m];
+      }
+      break;
+    case MixtureHomography::SKEW_ROTATION_DOF:
+      params[0] = const_homog.h_00();
+      params[4] = const_homog.h_11();
+      params[6] = const_homog.h_20();
+      params[7] = const_homog.h_21();
+      for (int m = 0; m < num_models; ++m) {
+        params[1] += mixture_model.model(m).h_01() * weights[m];
+        params[2] += mixture_model.model(m).h_02() * weights[m];
+        params[3] += mixture_model.model(m).h_10() * weights[m];
+        params[5] += mixture_model.model(m).h_12() * weights[m];
+      }
+      break;
+    case MixtureHomography::CONST_DOF:
+      return const_homog;
+    default:
+      LOG(FATAL) << "Unknown type.";
+  }
+
+  return HomographyAdapter::FromFloatPointer(params, false);
+}
+
+inline Vector2_f MixtureModelAdapter<HomographyTraits>::TransformPoint(
+    const MixtureHomography& model, const float* weights, const Vector2_f& pt) {
+  const int num_models = model.model_size();
+  const Homography& const_homog = model.model(0);
+  Vector3_f result(0, 0, 0);
+  const Vector3_f pt3(pt.x(), pt.y(), 1.0f);
+  float x;
+  float y;
+  switch (model.dof()) {
+    case MixtureHomography::ALL_DOF:
+      for (int i = 0; i < num_models; ++i) {
+        result += HomographyAdapter::TransformPoint3(model.model(i),
+                                                     pt3 * weights[i]);
+      }
+      break;
+    case MixtureHomography::TRANSLATION_DOF:
+      x = const_homog.h_00() * pt.x() + const_homog.h_01() * pt.y();
+      y = const_homog.h_10() * pt.x() + const_homog.h_11() * pt.y();
+      for (int i = 0; i < num_models; ++i) {
+        x += model.model(i).h_02() * weights[i];
+        y += model.model(i).h_12() * weights[i];
+      }
+      result = Vector3_f(
+          x, y,
+          const_homog.h_20() * pt.x() + const_homog.h_21() * pt.y() + 1.0f);
+      break;
+    case MixtureHomography::SKEW_ROTATION_DOF:
+      x = const_homog.h_00() * pt.x();
+      y = const_homog.h_11() * pt.y();
+      for (int i = 0; i < num_models; ++i) {
+        x += (model.model(i).h_01() * pt.y() + model.model(i).h_02()) *
+             weights[i];
+        y += (model.model(i).h_10() * pt.x() + model.model(i).h_12()) *
+             weights[i];
+      }
+      result = Vector3_f(
+          x, y,
+          const_homog.h_20() * pt.x() + const_homog.h_21() * pt.y() + 1.0f);
+      break;
+    case MixtureHomography::CONST_DOF:
+      return HomographyAdapter::TransformPoint(model.model(0), pt);
+    default:
+      LOG(FATAL) << "Unknown type.";
+  }
+
+  DCHECK_NE(result.z(), 0) << "Degenerate mapping.";
+  return Vector2_f(result.x() / result.z(), result.y() / result.z());
+}
+
+inline Vector2_f MixtureModelAdapter<HomographyTraits>::TransformPoint(
+    const MixtureHomography& model, const MixtureRowWeights& weights,
+    const Vector2_f& pt) {
+  return TransformPoint(model, weights.RowWeightsClamped(pt.y()), pt);
+}
+
+inline Vector2_f MixtureModelAdapter<HomographyTraits>::SolveForPoint(
+    const MixtureHomography& model, const float* weights, const Vector2_f& pt) {
+  Homography base_model = ToBaseModel(model, weights);
+  return HomographyAdapter::TransformPoint(
+      HomographyAdapter::Invert(base_model), pt);
+}
+
+inline Vector2_f MixtureModelAdapter<HomographyTraits>::SolveForPointChecked(
+    const MixtureHomography& model, const float* weights, const Vector2_f& pt,
+    bool* success) {
+  Homography base_model = ToBaseModel(model, weights);
+  Homography inv_base_model =
+      HomographyAdapter::InvertChecked(base_model, success);
+  return HomographyAdapter::TransformPoint(inv_base_model, pt);
+}
+
+}  // namespace mediapipe
+
+#endif  // MEDIAPIPE_UTIL_TRACKING_MOTION_MODELS_H_
diff --git a/mediapipe/util/tracking/motion_models.proto b/mediapipe/util/tracking/motion_models.proto
new file mode 100644
index 000000000..889cc425a
--- /dev/null
+++ b/mediapipe/util/tracking/motion_models.proto
@@ -0,0 +1,122 @@
+// Copyright 2019 The MediaPipe Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Motion models, that are supported and estimated by MotionStabilization.
+// Note, that transformations represent the motion of the
+// feature points from the previous frame to the current one.
+// Note that the actual camera movement, is the inverse of this
+// transformation.
+// Check carefully, which movement (feature or camera) is required.
+// Note that for each model the default values always specify an identity
+// transform. Follow this rule when adding new models.
+
+syntax = "proto2";
+
+package mediapipe;
+
+// Here x represents a 2D point in the image plane, in the following
+// coordinate system:
+// *----> x
+// |
+// |
+// v  y
+
+// Simple translational model:
+// I * x + [dx; dy]   with I being 2x2 identity transform.
+message TranslationModel {
+  optional float dx = 1 [default = 0];
+  optional float dy = 2 [default = 0];
+}
+
+// Non-linear similarity model (w.r.t. to its parametrization).
+// c_r := cos(rotation);
+// s_r := sin(rotation);
+// Transformation applied to x:
+// [scale 0;      * [c_r  -s_r;   * x  + [dx;
+//  0     scale]     s_r   c_r]           dy]
+message SimilarityModel {
+  optional float dx = 1 [default = 0];
+  optional float dy = 2 [default = 0];
+  optional float scale = 3 [default = 1];
+  optional float rotation = 4 [default = 0];  // angle in [-pi, pi].
+}
+
+// Linear similarity model:
+//  [a -b;   * x + [dx;
+//   b  a]          dy]
+message LinearSimilarityModel {
+  optional float dx = 1 [default = 0];
+  optional float dy = 2 [default = 0];
+  optional float a = 3 [default = 1];
+  optional float b = 4 [default = 0];
+}
+
+// Affine according to
+//  (  [a b  * x + [dx;
+//  (   c d]        dy]
+message AffineModel {
+  optional float dx = 1 [default = 0];
+  optional float dy = 2 [default = 0];
+  optional float a = 3 [default = 1];
+  optional float b = 4 [default = 0];
+  optional float c = 5 [default = 0];
+  optional float d = 6 [default = 1];
+}
+
+// Homography according to
+// [h_00 h_01 h_02;
+//  h_10 h_11 h_12;
+//  h_20 h_21 1];
+//  Note: The parametrization with h_22 = 1 does not always hold, e.g.
+//        if the origin (0, 0, 1) gets mapped to the line at infinity
+//        (0, 0, 1). However for video we expect small perspective
+//        changes between frames and this parametrization improves
+//        robustness greatly as it removes an additional DOF.
+//        Therefore, all methods in motion_stabilization should not be
+//        used for general wide-baseline matching of frames.
+message Homography {
+  optional float h_00 = 1 [default = 1];
+  optional float h_01 = 2 [default = 0];
+  optional float h_02 = 3 [default = 0];
+  optional float h_10 = 4 [default = 0];
+  optional float h_11 = 5 [default = 1];
+  optional float h_12 = 6 [default = 0];
+  optional float h_20 = 7 [default = 0];
+  optional float h_21 = 8 [default = 0];
+}
+
+// Mixture models with higher degrees of freedom, according to
+// \sum_i model(i) * weight(i), where weights are passed during transform and
+// are expected to sum to one.
+message MixtureLinearSimilarity {
+  repeated LinearSimilarityModel model = 1;
+}
+
+message MixtureAffine {
+  repeated AffineModel model = 1;
+}
+
+message MixtureHomography {
+  repeated Homography model = 1;
+  // Specifies which degree of freedom vary across mixture.
+  // Can be used to implement several transformation functions quicker.
+  enum VariableDOF {
+    ALL_DOF = 0;            // All dof are variable.
+    TRANSLATION_DOF = 1;    // Only translation (h_02, h_12) varies.
+    SKEW_ROTATION_DOF = 2;  // Translation (h_02, h_12), and skew-rotation
+                            // (h_01, h_10) vary.
+    CONST_DOF = 3;          // Mixture is constant.
+  }
+  optional VariableDOF dof = 2 [default = ALL_DOF];
+}
diff --git a/mediapipe/util/tracking/motion_models_cv.cc b/mediapipe/util/tracking/motion_models_cv.cc
new file mode 100644
index 000000000..b9b428adb
--- /dev/null
+++ b/mediapipe/util/tracking/motion_models_cv.cc
@@ -0,0 +1,57 @@
+// Copyright 2019 The MediaPipe Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "mediapipe/util/tracking/motion_models_cv.h"
+
+namespace mediapipe {
+
+void ModelCvConvert<TranslationModel>::ToCvMat(const TranslationModel& model,
+                                               cv::Mat* matrix) {
+  ModelCvConvert<AffineModel>::ToCvMat(
+      ModelAdapter<TranslationModel>::ToAffine(model), matrix);
+}
+
+void ModelCvConvert<LinearSimilarityModel>::ToCvMat(
+    const LinearSimilarityModel& model, cv::Mat* matrix) {
+  ModelCvConvert<AffineModel>::ToCvMat(
+      ModelAdapter<LinearSimilarityModel>::ToAffine(model), matrix);
+}
+
+void ModelCvConvert<AffineModel>::ToCvMat(const AffineModel& model,
+                                          cv::Mat* matrix) {
+  matrix->create(2, 3, CV_32FC1);
+  matrix->at<float>(0, 0) = model.a();
+  matrix->at<float>(0, 1) = model.b();
+  matrix->at<float>(0, 2) = model.dx();
+  matrix->at<float>(1, 0) = model.c();
+  matrix->at<float>(1, 1) = model.d();
+  matrix->at<float>(1, 2) = model.dy();
+}
+
+void ModelCvConvert<Homography>::ToCvMat(const Homography& model,
+                                         cv::Mat* matrix) {
+  CHECK(matrix != nullptr);
+  matrix->create(3, 3, CV_32FC1);
+  matrix->at<float>(0, 0) = model.h_00();
+  matrix->at<float>(0, 1) = model.h_01();
+  matrix->at<float>(0, 2) = model.h_02();
+  matrix->at<float>(1, 0) = model.h_10();
+  matrix->at<float>(1, 1) = model.h_11();
+  matrix->at<float>(1, 2) = model.h_12();
+  matrix->at<float>(2, 0) = model.h_20();
+  matrix->at<float>(2, 1) = model.h_21();
+  matrix->at<float>(2, 2) = 1.0;
+}
+
+}  // namespace mediapipe
diff --git a/mediapipe/util/tracking/motion_models_cv.h b/mediapipe/util/tracking/motion_models_cv.h
new file mode 100644
index 000000000..3f8da051c
--- /dev/null
+++ b/mediapipe/util/tracking/motion_models_cv.h
@@ -0,0 +1,63 @@
+// Copyright 2019 The MediaPipe Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef MEDIAPIPE_UTIL_TRACKING_MOTION_MODELS_CV_H_
+#define MEDIAPIPE_UTIL_TRACKING_MOTION_MODELS_CV_H_
+
+#include "mediapipe/framework/port/opencv_core_inc.h"
+#include "mediapipe/util/tracking/motion_models.h"
+#include "mediapipe/util/tracking/motion_models.pb.h"  // NOLINT
+
+namespace mediapipe {
+
+template <class Model>
+class ModelCvConvert {};
+
+// Specialized implementations, with additional functionality if needed.
+template <>
+class ModelCvConvert<TranslationModel> {
+ public:
+  // Returns 2x3 floating point cv::Mat with model parameters.
+  static void ToCvMat(const TranslationModel& model, cv::Mat* matrix);
+};
+
+template <>
+class ModelCvConvert<LinearSimilarityModel> {
+ public:
+  // Returns 2x3 floating point cv::Mat with model parameters.
+  static void ToCvMat(const LinearSimilarityModel& model, cv::Mat* matrix);
+};
+
+template <>
+class ModelCvConvert<AffineModel> {
+ public:
+  // Returns 2x3 floating point cv::Mat with model parameters.
+  static void ToCvMat(const AffineModel& model, cv::Mat* matrix);
+};
+
+template <>
+class ModelCvConvert<Homography> {
+ public:
+  // Returns 3x3 floating point cv::Mat with model parameters.
+  static void ToCvMat(const Homography& model, cv::Mat* matrix);
+};
+
+template <class Model>
+void ModelToCvMat(const Model& model, cv::Mat* matrix) {
+  ModelCvConvert<Model>::ToCvMat(model, matrix);
+}
+
+}  // namespace mediapipe
+
+#endif  // MEDIAPIPE_UTIL_TRACKING_MOTION_MODELS_CV_H_
diff --git a/mediapipe/util/tracking/motion_models_test.cc b/mediapipe/util/tracking/motion_models_test.cc
new file mode 100644
index 000000000..194aedcb6
--- /dev/null
+++ b/mediapipe/util/tracking/motion_models_test.cc
@@ -0,0 +1,335 @@
+// Copyright 2019 The MediaPipe Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "mediapipe/util/tracking/motion_models.h"
+
+#include "mediapipe/framework/deps/message_matchers.h"
+#include "mediapipe/framework/port/gmock.h"
+#include "mediapipe/framework/port/gtest.h"
+#include "mediapipe/framework/port/parse_text_proto.h"
+#include "mediapipe/framework/port/proto_ns.h"
+#include "mediapipe/framework/port/vector.h"
+#include "mediapipe/util/tracking/motion_estimation.h"
+
+namespace mediapipe {
+namespace {
+
+static const float kArrayFloat[] = {1, 2, 3, 4, 5, 6, 7, 8};
+static const double kArrayDouble[] = {1, 2, 3, 4, 5, 6, 7, 8};
+
+class MotionModelsTest : public ::testing::Test {};
+
+// Test from/to array and parameter indexing functions.
+template <class Model>
+void CheckFromArrayAndGetParameter(const char* model_zero_string,
+                                   const char* model_identity_string) {
+  Model model_zero;
+  Model model_identity;
+
+  ASSERT_TRUE(
+      proto_ns::TextFormat::ParseFromString(model_zero_string, &model_zero));
+  ASSERT_TRUE(proto_ns::TextFormat::ParseFromString(model_identity_string,
+                                                    &model_identity));
+
+  typedef ModelAdapter<Model> Adapter;
+
+  EXPECT_THAT(Adapter::FromFloatPointer(kArrayFloat, false),
+              mediapipe::EqualsProto(model_zero));
+
+  EXPECT_THAT(Adapter::FromFloatPointer(kArrayFloat, true),
+              mediapipe::EqualsProto(model_identity));
+
+  EXPECT_THAT(Adapter::FromDoublePointer(kArrayDouble, false),
+              mediapipe::EqualsProto(model_zero));
+
+  EXPECT_THAT(Adapter::FromDoublePointer(kArrayDouble, true),
+              mediapipe::EqualsProto(model_identity));
+
+  ASSERT_LE(Adapter::NumParameters(), 8);
+  for (int i = 0; i < Adapter::NumParameters(); ++i) {
+    EXPECT_EQ(kArrayFloat[i], Adapter::GetParameter(model_zero, i));
+  }
+}
+
+TEST_F(MotionModelsTest, FromArrayAndGetParameter) {
+  CheckFromArrayAndGetParameter<TranslationModel>("dx: 1 dy: 2", "dx: 1 dy: 2");
+
+  CheckFromArrayAndGetParameter<SimilarityModel>(
+      "dx: 1 dy: 2 scale: 3 rotation: 4", "dx: 1 dy: 2 scale: 4 rotation: 4");
+
+  CheckFromArrayAndGetParameter<LinearSimilarityModel>("dx: 1 dy: 2 a: 3 b: 4",
+                                                       "dx: 1 dy: 2 a: 4 b: 4");
+
+  CheckFromArrayAndGetParameter<AffineModel>("dx: 1 dy: 2 a: 3 b: 4 c: 5 d: 6",
+                                             "dx: 1 dy: 2 a: 4 b: 4 c: 5 d: 7");
+
+  CheckFromArrayAndGetParameter<Homography>(
+      "h_00: 1 h_01: 2 h_02: 3 "
+      "h_10: 4 h_11: 5 h_12: 6 "
+      "h_20: 7 h_21: 8         ",
+      "h_00: 2 h_01: 2 h_02: 3 "
+      "h_10: 4 h_11: 6 h_12: 6 "
+      "h_20: 7 h_21: 8         ");
+}
+
+// Test point transformations.
+template <class Model>
+void CheckTransformPoint(const char* model_string, float x_in, float y_in,
+                         float x_out, float y_out) {
+  Model model;
+  ASSERT_TRUE(proto_ns::TextFormat::ParseFromString(model_string, &model));
+
+  const Vector2_f output =
+      ModelAdapter<Model>::TransformPoint(model, Vector2_f(x_in, y_in));
+
+  EXPECT_NEAR(x_out, output.x(), 1e-5);
+  EXPECT_NEAR(y_out, output.y(), 1e-5);
+}
+
+TEST_F(MotionModelsTest, TransformPoint) {
+  CheckTransformPoint<TranslationModel>("dx: 0 dy: 0", 1, 1, 1, 1);
+  CheckTransformPoint<TranslationModel>("dx: 1 dy: -1", 1, 1, 2, 0);
+
+  CheckTransformPoint<SimilarityModel>(
+      "dx: 0 dy: 0 scale: 1 rotation: 1.57079633", 1, 2, -2, 1);
+  CheckTransformPoint<SimilarityModel>(
+      "dx: 1 dy: -1 scale: 1 rotation: 1.57079633", 1, 2, -1, 0);
+  CheckTransformPoint<SimilarityModel>(
+      "dx: 1 dy: -1 scale: 2 rotation: 1.57079633", 1, 2, -3, 1);
+
+  CheckTransformPoint<LinearSimilarityModel>("dx: 0 dy: 0 a: 1 b: -0.5", 1, 2,
+                                             2, 1.5);
+  CheckTransformPoint<LinearSimilarityModel>("dx: 0.5 dy: -0.5 a: 1 b: 0.5", 1,
+                                             2, 0.5, 2);
+  CheckTransformPoint<LinearSimilarityModel>("dx: 0.5 dy: -0.5 a: 0.5 b: 0.5",
+                                             1, 2, 0, 1);
+
+  CheckTransformPoint<AffineModel>("dx: 0 dy: 0 a: 1 b: 0.5 c: -0.5 d: 1", 1, 2,
+                                   2, 1.5);
+  CheckTransformPoint<AffineModel>("dx: 0.5 dy: -0.5 a: 2 b: -0.5 c: 0.5 d: 1",
+                                   1, 2, 1.5, 2);
+  CheckTransformPoint<AffineModel>("dx: 1 dy: -1 a: 2 b: -2 c: 1 d: -1", 1, 2,
+                                   -1, -2);
+
+  // Transformations by Homography are followed by divsion by the 3rd element.
+  // Test division by value != 1.
+  CheckTransformPoint<Homography>(
+      "h_00: 1  h_01: 2  h_02: 3 "
+      "h_10: 4  h_11: 3  h_12: 6 "
+      "h_20: 7  h_21: 8          ",
+      1, 2, 8.0 / 24.0, 16.0 / 24.0);
+  // Test division by 1.
+  CheckTransformPoint<Homography>(
+      "h_00: 1  h_01:  2  h_02: 3 "
+      "h_10: 4  h_11:  3  h_12: 6 "
+      "h_20: 2  h_21: -1          ",
+      1, 2, 8.0, 16.0);
+}
+
+// Test model inversions.
+template <class Model>
+void CheckInvert(const char* model_string, const char* inv_model_string) {
+  Model model;
+  ASSERT_TRUE(proto_ns::TextFormat::ParseFromString(model_string, &model));
+
+  Model inv_model_expected;
+  ASSERT_TRUE(proto_ns::TextFormat::ParseFromString(inv_model_string,
+                                                    &inv_model_expected));
+
+  typedef ModelAdapter<Model> Adapter;
+  const Model inv_model_actual = Adapter::Invert(model);
+
+  for (int i = 0; i < Adapter::NumParameters(); ++i) {
+    EXPECT_NEAR(Adapter::GetParameter(inv_model_expected, i),
+                Adapter::GetParameter(inv_model_actual, i), 1e-5)
+        << "Parameter index: " << i << " of total " << Adapter::NumParameters();
+  }
+}
+
+TEST_F(MotionModelsTest, Invert) {
+  CheckInvert<TranslationModel>("dx:  1 dy: -2", "dx: -1 dy:  2");
+
+  CheckInvert<SimilarityModel>("dx: 0 dy:  0 scale: 1   rotation:  1.57079633",
+                               "dx: 0 dy:  0 scale: 1   rotation: -1.57079633");
+  CheckInvert<SimilarityModel>("dx: 1 dy: -2 scale: 1   rotation:  1.57079633",
+                               "dx: 2 dy:  1 scale: 1   rotation: -1.57079633");
+  CheckInvert<SimilarityModel>("dx: 1 dy: -2 scale: 0.5 rotation:  1.57079633",
+                               "dx: 4 dy:  2 scale: 2   rotation: -1.57079633");
+
+  CheckInvert<LinearSimilarityModel>("dx:  1    dy:  2    a: 3    b:  4    ",
+                                     "dx: -0.44 dy: -0.08 a: 0.12 b: -0.16 ");
+
+  // Test division by value != 1.
+  CheckInvert<Homography>(
+      "h_00:  1     h_01:  2     h_02:  3 "
+      "h_10: -3     h_11: -2     h_12: -1 "
+      "h_20:  8     h_21: -1              ",
+      "h_00: -0.75  h_01: -1.25  h_02:  1 "
+      "h_10: -1.25  h_11: -5.75  h_12: -2 "
+      "h_20:  4.75  h_21:  4.25           ");
+  // Test division by 1.
+  CheckInvert<Homography>(
+      "h_00: -0.75  h_01: -1.25  h_02:  1 "
+      "h_10: -1.25  h_11: -5.75  h_12: -2 "
+      "h_20:  4.75  h_21:  4.25           ",
+      "h_00:  1     h_01:  2     h_02:  3 "
+      "h_10: -3     h_11: -2     h_12: -1 "
+      "h_20:  8     h_21: -1              ");
+}
+
+// Test model compositions.
+template <class Model>
+void CheckCompose(const char* model_string1, const char* model_string2,
+                  const char* composed_string) {
+  Model model1;
+  ASSERT_TRUE(proto_ns::TextFormat::ParseFromString(model_string1, &model1));
+
+  Model model2;
+  ASSERT_TRUE(proto_ns::TextFormat::ParseFromString(model_string2, &model2));
+
+  Model composed_expected;
+  ASSERT_TRUE(proto_ns::TextFormat::ParseFromString(composed_string,
+                                                    &composed_expected));
+
+  typedef ModelAdapter<Model> Adapter;
+  const Model composed_actual = Adapter::Compose(model1, model2);
+
+  for (int i = 0; i < Adapter::NumParameters(); ++i) {
+    EXPECT_NEAR(Adapter::GetParameter(composed_expected, i),
+                Adapter::GetParameter(composed_actual, i), 1e-5)
+        << "Parameter index: " << i << " of total " << Adapter::NumParameters();
+  }
+}
+
+TEST_F(MotionModelsTest, Compose) {
+  CheckCompose<TranslationModel>("dx:  1 dy: -2", "dx: -3 dy:  4",
+                                 "dx: -2 dy:  2");
+
+  CheckCompose<SimilarityModel>(
+      "dx:  1   dy:  2 scale: 0.5 rotation:  1.57079633 ",
+      "dx: -2   dy: -1 scale: 2   rotation: -1.57079633 ",
+      "dx:  1.5 dy:  1 scale: 1   rotation:  0          ");
+
+  CheckCompose<LinearSimilarityModel>("dx:  1   dy:  2   a: 0.5  b:  0.5  ",
+                                      "dx: -2   dy: -1   a: 2    b: -0.5  ",
+                                      "dx:  0.5 dy:  0.5 a: 1.25 b:  0.75 ");
+
+  // Test division by value != 1.
+  CheckCompose<Homography>(
+      "h_00:  1  h_01:  2    h_02:  3 "
+      "h_10:  4  h_11:  5    h_12:  6 "
+      "h_20:  1  h_21: -1             ",
+      "h_00: -3  h_01: -2    h_02: -1 "
+      "h_10: -4  h_11: -5    h_12: -2 "
+      "h_20:  7  h_21:  8             ",
+      "h_00:  5  h_01:  6    h_02: -1 "
+      "h_10:  5  h_11:  7.5  h_12: -4 "
+      "h_20:  4  h_21:  5.5           ");
+  // Test division by 1.
+  CheckCompose<Homography>(
+      "h_00:  1  h_01:  2  h_02:  3 "
+      "h_10:  4  h_11:  5  h_12:  6 "
+      "h_20:  2  h_21: -1           ",
+      "h_00: -3  h_01: -2  h_02: -1 "
+      "h_10: -4  h_11: -5  h_12: -2 "
+      "h_20:  7  h_21:  8           ",
+      "h_00: 10  h_01: 12  h_02: -2 "
+      "h_10: 10  h_11: 15  h_12: -8 "
+      "h_20:  5  h_21:  9           ");
+}
+
+// Test conversions between models and their affine representations, and
+// vice-versa.
+template <class Model>
+void CheckToFromAffine(const char* model_string, const char* affine_string) {
+  Model model;
+  ASSERT_TRUE(proto_ns::TextFormat::ParseFromString(model_string, &model));
+
+  AffineModel affine;
+  ASSERT_TRUE(proto_ns::TextFormat::ParseFromString(affine_string, &affine));
+
+  typedef ModelAdapter<Model> Adapter;
+  EXPECT_THAT(Adapter::ToAffine(model), mediapipe::EqualsProto(affine));
+  EXPECT_THAT(Adapter::FromAffine(affine), mediapipe::EqualsProto(model));
+}
+
+TEST_F(MotionModelsTest, ToFromAffine) {
+  CheckToFromAffine<TranslationModel>("dx: 1 dy: 2",
+                                      "dx: 1 dy: 2 a: 1 b: 0 c: 0 d: 1");
+
+  CheckToFromAffine<LinearSimilarityModel>("dx: 1 dy: 2 a: 3 b: -4",
+                                           "dx: 1 dy: 2 a: 3 b: 4 c: -4 d: 3");
+
+  CheckToFromAffine<AffineModel>("dx: 1 dy: 2 a: 3 b: 4 c: 5 d: 6",
+                                 "dx: 1 dy: 2 a: 3 b: 4 c: 5 d: 6");
+
+  CheckToFromAffine<Homography>(
+      "h_00: 3  h_01: 4  h_02: 1 "
+      "h_10: 5  h_11: 6  h_12: 2 "
+      "h_20: 0  h_21: 0          ",
+      "dx: 1 dy: 2 a: 3 b: 4 c: 5 d: 6");
+  Homography homography;
+  ASSERT_TRUE(
+      proto_ns::TextFormat::ParseFromString("h_00: 3  h_01: 4  h_02: 1 "
+                                            "h_10: 5  h_11: 6  h_12: 2 "
+                                            "h_20: 0  h_21: 0          ",
+                                            &homography));
+
+  EXPECT_TRUE(HomographyAdapter::IsAffine(homography));
+  homography.set_h_20(7);
+  homography.set_h_21(8);
+  EXPECT_FALSE(HomographyAdapter::IsAffine(homography));
+}
+
+TEST_F(MotionModelsTest, ProjectModels) {
+  // Express models w.r.t. center for easy testing.
+  LinearSimilarityModel center_trans =
+      LinearSimilarityAdapter::FromArgs(50, 50, 1, 0);
+  LinearSimilarityModel inv_center_trans =
+      LinearSimilarityAdapter::FromArgs(-50, -50, 1, 0);
+
+  // 20 x 10 translation with scaling of factor 2 and rotation.
+  LinearSimilarityModel lin_sim =
+      LinearSimilarityAdapter::FromArgs(20, 10, 2 * cos(0.2), 2 * sin(0.2));
+
+  LinearSimilarityModel lin_sim_center =
+      ModelCompose3(center_trans, lin_sim, inv_center_trans);
+
+  TranslationModel translation =
+      TranslationAdapter::ProjectFrom(lin_sim_center, 100, 100);
+  EXPECT_NEAR(translation.dx(), 20, 1e-3);
+  EXPECT_NEAR(translation.dy(), 10, 1e-3);
+
+  translation = ProjectViaFit<TranslationModel>(lin_sim_center, 100, 100);
+  EXPECT_NEAR(translation.dx(), 20, 1e-3);
+  EXPECT_NEAR(translation.dy(), 10, 1e-3);
+
+  Homography homog = HomographyAdapter::FromArgs(
+      1, 0, 10, 0, 1, 20, 5e-3, 1e-3);  // Perspective transform: yaw + pitch.
+
+  Homography homog_center =
+      ModelCompose3(HomographyAdapter::Embed(center_trans), homog,
+                    HomographyAdapter::Embed(inv_center_trans));
+  // Rendering:
+  // https://www.wolframalpha.com/input/?i=ListPlot%5B%7B+%7B7,-7%7D,+%7B108,16%7D,+%7B104,96%7D,+%7B12.5,+125%7D,+%7B7,-7%7D%5D
+
+  translation = TranslationAdapter::ProjectFrom(homog_center, 100, 100);
+  EXPECT_NEAR(translation.dx(), 10, 1e-3);
+  EXPECT_NEAR(translation.dy(), 20, 1e-3);
+
+  // TODO: Investigate how ProjectViaFit can yield similar result.
+}
+
+}  // namespace
+}  // namespace mediapipe
diff --git a/mediapipe/util/tracking/motion_saliency.cc b/mediapipe/util/tracking/motion_saliency.cc
new file mode 100644
index 000000000..5adafca4c
--- /dev/null
+++ b/mediapipe/util/tracking/motion_saliency.cc
@@ -0,0 +1,700 @@
+// Copyright 2019 The MediaPipe Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "mediapipe/util/tracking/motion_saliency.h"
+
+#include <math.h>
+
+#include <algorithm>
+#include <cmath>
+#include <deque>
+#include <iterator>
+#include <list>
+#include <memory>
+#include <vector>
+
+#include "mediapipe/framework/port/logging.h"
+#include "mediapipe/util/tracking/camera_motion.h"
+#include "mediapipe/util/tracking/measure_time.h"
+#include "mediapipe/util/tracking/region_flow.h"
+#include "mediapipe/util/tracking/region_flow.pb.h"
+
+namespace mediapipe {
+
+MotionSaliency::MotionSaliency(const MotionSaliencyOptions& options,
+                               int frame_width, int frame_height)
+    : options_(options),
+      frame_width_(frame_width),
+      frame_height_(frame_height) {}
+
+MotionSaliency::~MotionSaliency() {}
+
+void MotionSaliency::SaliencyFromFeatures(
+    const RegionFlowFeatureList& feature_list,
+    std::vector<float>* irls_weights,  // optional.
+    SalientPointFrame* salient_frame) {
+  CHECK(salient_frame);
+  CHECK_EQ(frame_width_, feature_list.frame_width());
+  CHECK_EQ(frame_height_, feature_list.frame_height());
+
+  if (irls_weights) {
+    CHECK_EQ(feature_list.feature_size(), irls_weights->size());
+  }
+
+  if (feature_list.feature_size() < 1) {
+    return;
+  }
+
+  float max_irls_weight = 0;
+  if (irls_weights) {
+    max_irls_weight =
+        *std::max_element(irls_weights->begin(), irls_weights->end());
+  } else {
+    struct FeatureIRLSComparator {
+      bool operator()(const RegionFlowFeature& lhs,
+                      const RegionFlowFeature& rhs) const {
+        return lhs.irls_weight() < rhs.irls_weight();
+      }
+    };
+    max_irls_weight =
+        std::max_element(feature_list.feature().begin(),
+                         feature_list.feature().end(), FeatureIRLSComparator())
+            ->irls_weight();
+  }
+
+  // Max weight is too small for meaningful mode finding, terminate.
+  if (max_irls_weight < 1e-2f) {
+    return;
+  }
+
+  // Discard small weights that just slow clustering down.
+  const float irls_cutoff = max_irls_weight * 1e-2f;
+
+  int feat_idx = 0;
+
+  // Create SalientLocation's from input feature_list.
+  std::vector<SalientLocation> features;
+  for (const auto& src_feature : feature_list.feature()) {
+    const float weight =
+        irls_weights ? (*irls_weights)[feat_idx] : src_feature.irls_weight();
+    ++feat_idx;
+
+    // Discard all features with small measure or zero weight from mode finding.
+    if (weight < irls_cutoff) {
+      continue;
+    }
+
+    features.push_back(SalientLocation(FeatureLocation(src_feature), weight));
+  }
+
+  DetermineSalientFrame(features, salient_frame);
+}
+
+void MotionSaliency::SaliencyFromPoints(const std::vector<Vector2_f>* points,
+                                        const std::vector<float>* weights,
+                                        SalientPointFrame* salient_frame) {
+  // TODO: Handle vectors of size zero.
+  CHECK(salient_frame);
+  CHECK_EQ(points->size(), weights->size());
+
+  float max_weight = *std::max_element(weights->begin(), weights->end());
+
+  // Max weight is too small for meaningful mode finding, terminate.
+  if (max_weight < 1e-2f) {
+    return;
+  }
+
+  // Discard small weights that just slow clustering down.
+  const float weight_cutoff = max_weight * 1e-2f;
+
+  // Create SalientLocation's from input points.
+  std::vector<SalientLocation> features;
+  for (int point_idx = 0; point_idx < points->size(); ++point_idx) {
+    const float weight = (*weights)[point_idx];
+    // Discard all features with small measure or zero weight from mode finding.
+    if (weight < weight_cutoff) {
+      continue;
+    }
+
+    features.push_back(SalientLocation((*points)[point_idx], weight));
+  }
+
+  DetermineSalientFrame(features, salient_frame);
+}
+
+// We only keep those salient points that have neighbors along the temporal
+// dimension.
+void MotionSaliency::SelectSaliencyInliers(
+    std::vector<SalientPointFrame*>* motion_saliency,
+    bool rescale_to_median_saliency_weight) {
+  float scale = 1.0;
+
+  if (rescale_to_median_saliency_weight) {
+    // Compute median saliency weight across all frames, to rescale saliency.
+    std::vector<float> saliency_weights;
+    for (int i = 0; i < motion_saliency->size(); ++i) {
+      for (const auto& salient_point : (*motion_saliency)[i]->point()) {
+        saliency_weights.push_back(salient_point.weight());
+      }
+    }
+
+    // Nothing to filter in the frame chunk.
+    if (saliency_weights.empty()) {
+      return;
+    }
+
+    auto median_iter = saliency_weights.begin() + saliency_weights.size() / 2;
+    std::nth_element(saliency_weights.begin(), median_iter,
+                     saliency_weights.end());
+
+    const float median_weight = *median_iter;
+    if (median_weight > 0) {
+      scale = options_.saliency_weight() / median_weight;
+    }
+  }
+
+  SaliencyPointList inlier_saliency(motion_saliency->size());
+  const float sq_support_distance = options_.selection_support_distance() *
+                                    options_.selection_support_distance();
+
+  // Test each point salient point for inlierness.
+  for (int i = 0; i < motion_saliency->size(); ++i) {
+    for (const auto& salient_point : (*motion_saliency)[i]->point()) {
+      int support = 0;
+      Vector2_f salient_location(salient_point.norm_point_x(),
+                                 salient_point.norm_point_y());
+
+      // Find supporting points (saliency points close enough to current one)
+      // in adjacent frames. Linear Complexity.
+      for (int j = std::max<int>(0, i - options_.selection_frame_radius()),
+               end_j = std::min<int>(i + options_.selection_frame_radius(),
+                                     motion_saliency->size() - 1);
+
+           j <= end_j; ++j) {
+        if (i == j) {
+          continue;
+        }
+
+        for (const auto& compare_point : (*motion_saliency)[j]->point()) {
+          Vector2_f compare_location(compare_point.norm_point_x(),
+                                     compare_point.norm_point_y());
+
+          if ((salient_location - compare_location).Norm2() <=
+              sq_support_distance) {
+            ++support;
+          }
+        }
+      }  // end neighbor frames iteration.
+
+      if (support >= options_.selection_minimum_support()) {
+        SalientPoint* scaled_point = inlier_saliency[i].add_point();
+        scaled_point->CopyFrom(salient_point);
+        scaled_point->set_weight(scaled_point->weight() * scale);
+      }
+    }  // end point traversal.
+  }    // end frame traversal.
+
+  for (int k = 0; k < motion_saliency->size(); ++k) {
+    (*motion_saliency)[k]->Swap(&inlier_saliency[k]);
+  }
+}
+
+void MotionSaliency::FilterMotionSaliency(
+    std::vector<SalientPointFrame*>* saliency_point_list) {
+  CHECK(saliency_point_list != nullptr);
+  const float sigma_time = options_.filtering_sigma_time();
+  const float sigma_space = options_.filtering_sigma_space();
+
+  const int time_radius = ceil(sigma_time * 1.5);
+  const int time_diameter = 2 * time_radius + 1;
+
+  // Create lookup table for weights.
+  std::vector<float> time_weights(time_diameter);
+  const float time_coeff = -0.5f / (sigma_time * sigma_time);
+
+  for (int i = -time_radius, time_idx = 0; i <= time_radius; ++i, ++time_idx) {
+    time_weights[time_idx] = std::exp(time_coeff * i * i);
+  }
+
+  // Ignore points further than 1.65 sigmas away (includes 90% of distribution).
+  const float space_cutoff = 1.65 * sigma_space;
+  const float space_exp_scale = -0.5f / (sigma_space * sigma_space);
+
+  // Copy saliency points.
+  const int num_frames = saliency_point_list->size();
+  std::vector<SalientPointFrame> points(num_frames + 2 * time_radius);
+
+  for (int k = 0; k < saliency_point_list->size(); ++k) {
+    points[time_radius + k].CopyFrom(*(*saliency_point_list)[k]);
+  }
+
+  // Copy border.
+  std::copy(points.rbegin() + time_radius, points.rbegin() + 2 * time_radius,
+            points.end() - time_radius);
+  std::copy(points.begin() + time_radius, points.begin() + 2 * time_radius,
+            points.rend() - time_radius);
+
+  // Apply filter.
+  for (int i = time_radius; i < num_frames + time_radius; ++i) {
+    const int frame_idx = i - time_radius;
+    for (auto& sample_point :
+         *(*saliency_point_list)[frame_idx]->mutable_point()) {
+      Vector2_f point_sum(0, 0);
+      // Sum for left, bottom, right, top tuple.
+      Vector4_f bound_sum;
+      Vector3_f ellipse_sum(0, 0, 0);  // Captures major, minor and angle.
+      float weight_sum = 0;
+      float filter_sum = 0;
+
+      const float sample_angle = sample_point.angle();
+      for (int k = i - time_radius, time_idx = 0; k <= i + time_radius;
+           ++k, ++time_idx) {
+        for (const auto& test_point : points[k].point()) {
+          const float diff = std::hypot(
+              test_point.norm_point_y() - sample_point.norm_point_y(),
+              test_point.norm_point_x() - sample_point.norm_point_x());
+          if (diff > space_cutoff) {
+            continue;
+          }
+
+          const float weight = time_weights[time_idx] * test_point.weight() *
+                               std::exp(diff * diff * space_exp_scale);
+
+          filter_sum += weight;
+          point_sum +=
+              Vector2_f(test_point.norm_point_x(), test_point.norm_point_y()) *
+              weight;
+          bound_sum += Vector4_f(test_point.left(), test_point.bottom(),
+                                 test_point.right(), test_point.top()) *
+                       weight;
+          weight_sum += test_point.weight() * weight;
+
+          // Ensure test_point and sample are less than pi / 2 apart.
+          float test_angle = test_point.angle();
+          if (fabs(test_angle - sample_angle) > M_PI / 2) {
+            if (sample_angle > M_PI / 2) {
+              test_angle += M_PI;
+            } else {
+              test_angle -= M_PI;
+            }
+          }
+
+          ellipse_sum += Vector3_f(test_point.norm_major(),
+                                   test_point.norm_minor(), test_angle) *
+                         weight;
+        }
+      }
+
+      if (filter_sum > 0) {
+        const float inv_filter_sum = 1.0f / filter_sum;
+        point_sum *= inv_filter_sum;
+        bound_sum *= inv_filter_sum;
+        weight_sum *= inv_filter_sum;
+        ellipse_sum *= inv_filter_sum;
+      }
+
+      sample_point.set_norm_point_x(point_sum.x());
+      sample_point.set_norm_point_y(point_sum.y());
+      sample_point.set_left(bound_sum.x());
+      sample_point.set_bottom(bound_sum.y());
+      sample_point.set_right(bound_sum.z());
+      sample_point.set_top(bound_sum.w());
+
+      sample_point.set_weight(weight_sum);
+      sample_point.set_norm_major(ellipse_sum.x());
+      sample_point.set_norm_minor(ellipse_sum.y());
+      sample_point.set_angle(ellipse_sum.z());
+
+      if (sample_point.angle() > M_PI) {
+        sample_point.set_angle(sample_point.angle() - M_PI);
+      }
+      if (sample_point.angle() < 0) {
+        sample_point.set_angle(sample_point.angle() + M_PI);
+      }
+    }
+  }
+}
+
+void MotionSaliency::CollapseMotionSaliency(
+    const SaliencyPointList& input_saliency, const Vector4_f& bounds,
+    SaliencyPointList* output_saliency) {
+  CHECK(output_saliency);
+  output_saliency->clear();
+  output_saliency->resize(input_saliency.size());
+
+  for (int f = 0; f < input_saliency.size(); ++f) {  // traverse frames.
+    Vector2_f mean_saliency(0, 0);
+    float weight_sum = 0;
+    for (const auto& salient_point : input_saliency[f].point()) {
+      mean_saliency +=
+          Vector2_f(salient_point.norm_point_x(), salient_point.norm_point_y());
+      weight_sum += 1;
+    }
+
+    if (weight_sum > 0) {
+      SalientPoint* collapsed = (*output_saliency)[f].add_point();
+      collapsed->set_norm_point_x(mean_saliency.x() / weight_sum);
+      collapsed->set_norm_point_y(mean_saliency.y() / weight_sum);
+      collapsed->set_left(bounds.x());
+      collapsed->set_bottom(bounds.y());
+      collapsed->set_right(bounds.z());
+      collapsed->set_top(bounds.w());
+      collapsed->set_weight(1.0f);
+    }
+  }
+}
+
+namespace {
+
+// Describes feature mode for a feature in a RegionFlowFeatureList stored
+// at index feature_idx.
+struct FeatureMode {
+  Vector2_f location;
+  float irls_weight;
+  int feature_idx;
+  int mode_bin;
+};
+
+// Determines mode for each feature in feature_view.
+// Returns modes as list of pointers in mode_ptrs. Actual modes are stored
+// binned into a grid of equal size as the passed FeatureGrid
+// (bins of size grid_resolution x grid_resolution).
+void DetermineFeatureModes(
+    const FeatureFrame<MotionSaliency::SalientLocation>& features,
+    float grid_resolution, const Vector2_i& grid_dims, float band_width,
+    const FeatureGrid<MotionSaliency::SalientLocation>& feature_grid,
+    const std::vector<std::vector<int>>& feature_taps,
+    const std::vector<float>& space_lut, float space_scale,
+    std::vector<std::list<FeatureMode>>* mode_grid,
+    std::vector<FeatureMode*>* mode_ptrs) {
+  CHECK(mode_grid);
+  CHECK(mode_ptrs);
+  const int num_features = features.size();
+  mode_ptrs->reserve(num_features);
+
+  const float grid_scale = 1.0f / grid_resolution;
+  int feature_idx = 0;
+  const int kMaxIter = 100;
+  // Set convergence radius to 0.1% of bandwidth.
+  const float sq_conv_radius = band_width * band_width * 1e-6f;
+  for (const auto& feature_ptr : features) {
+    Vector2_f center = feature_ptr->pt;
+    int iter = 0;
+    for (; iter < kMaxIter; ++iter) {
+      const int bin_x = center.x() * grid_scale;
+      const int bin_y = center.y() * grid_scale;
+      const int grid_loc = bin_y * grid_dims.x() + bin_x;
+
+      float sum_weight = 0;
+      Vector2_f new_center;
+      for (const auto& bin : feature_taps[grid_loc]) {
+        for (const auto& test_feat_ptr : feature_grid[bin]) {
+          const float dist = (test_feat_ptr->pt - center).Norm();
+          const float weight = space_lut[static_cast<int>(dist * space_scale)] *
+                               test_feat_ptr->weight;
+          sum_weight += weight;
+          new_center += weight * test_feat_ptr->pt;
+        }
+      }
+
+      if (sum_weight > 0) {
+        new_center *= (1.0f / sum_weight);
+        if ((center - new_center).Norm2() < sq_conv_radius) {
+          center = new_center;
+          break;
+        } else {
+          center = new_center;
+        }
+      } else {
+        LOG(WARNING) << "No features found in band_width radius, "
+                     << "should not happen. ";
+        break;
+      }
+    }
+
+    const int mode_bin_x = center.x() * grid_scale;
+    const int mode_bin_y = center.y() * grid_scale;
+    const int mode_grid_loc = mode_bin_y * grid_dims.x() + mode_bin_x;
+    FeatureMode mode{center, feature_ptr->weight, feature_idx, mode_grid_loc};
+
+    (*mode_grid)[mode_grid_loc].push_back(mode);
+    FeatureMode* added_mode = &(*mode_grid)[mode_grid_loc].back();
+    (*mode_ptrs).push_back(added_mode);
+    ++feature_idx;
+  }
+}
+
+}  // namespace.
+
+void MotionSaliency::SalientModeFinding(std::vector<SalientLocation>* locations,
+                                        std::vector<SalientMode>* modes) {
+  CHECK(modes);
+  CHECK(locations);
+  if (locations->empty()) {
+    return;
+  }
+
+  // Scale band_width to image domain.
+  const float band_width =
+      hypot(frame_width_, frame_height_) * options_.mode_band_width();
+
+  // Select all salient locations with non-zero weight.
+  FeatureFrame<SalientLocation> salient_features;
+  salient_features.reserve(locations->size());
+  for (auto& loc : *locations) {
+    if (loc.weight > 1e-6) {
+      salient_features.push_back(&loc);
+    }
+  }
+
+  const int num_features = salient_features.size();
+  if (num_features == 0) {
+    return;
+  }
+
+  // Build feature grid according to bandwith.
+  std::vector<FeatureGrid<SalientLocation>> feature_grids;
+  std::vector<std::vector<int>> feature_taps;
+
+  // Guarantee at least 1.5 sigmas in each direction are captured with
+  // tap 3 filtering (86 % of the data).
+  const float grid_resolution = 1.5f * band_width;
+  Vector2_i grid_dims;
+  BuildFeatureGrid(
+      frame_width_, frame_height_, grid_resolution, {salient_features},
+      [](const SalientLocation& l) -> Vector2_f { return l.pt; }, &feature_taps,
+      nullptr, &grid_dims, &feature_grids);
+
+  // Just one frame input, expect one grid as output.
+  CHECK_EQ(1, feature_grids.size());
+  const auto& feature_grid = feature_grids[0];
+
+  // Setup Gaussian LUT for smoothing in space, using 2^10 discretization bins.
+  const int lut_bins = 1 << 10;
+  std::vector<float> space_lut(lut_bins);
+
+  // Using 3 tap smoothing, max distance is 2 bin diagonals.
+  // We use maximum of 2 * sqrt(2) * bin_radius plus 1% room in case maximum
+  // value is attained.
+  const float max_space_diff = sqrt(2.0) * 2.f * grid_resolution * 1.01f;
+
+  const float space_bin_size = max_space_diff / lut_bins;
+  const float space_scale = 1.0f / space_bin_size;
+  const float space_coeff = -0.5f / (band_width * band_width);
+  for (int i = 0; i < lut_bins; ++i) {
+    const float value = i * space_bin_size;
+    space_lut[i] = std::exp(value * value * space_coeff);
+  }
+
+  // Store modes for each grid bin (to be averaged later).
+  std::vector<std::list<FeatureMode>> mode_grid(grid_dims.x() * grid_dims.y());
+  std::vector<FeatureMode*> mode_ptrs;
+
+  DetermineFeatureModes(salient_features, grid_resolution, grid_dims,
+                        band_width, feature_grid, feature_taps, space_lut,
+                        space_scale, &mode_grid, &mode_ptrs);
+
+  // Read out modes, ordered by decreasing weight.
+  struct FeatureModeComparator {
+    bool operator()(const FeatureMode* mode_lhs,
+                    const FeatureMode* mode_rhs) const {
+      return mode_lhs->irls_weight > mode_rhs->irls_weight;
+    }
+  };
+
+  // Sort pointers, to keep order immutable during flagging operations.
+  std::sort(mode_ptrs.begin(), mode_ptrs.end(), FeatureModeComparator());
+
+  for (int m = 0; m < mode_ptrs.size(); ++m) {
+    // We mark a mode as processed by assigning -1 to its index.
+    if (mode_ptrs[m]->feature_idx < 0) {
+      continue;
+    }
+
+    FeatureMode* mode = mode_ptrs[m];
+
+    // Average modes within band_width based on irls_weight * spatial weight.
+    double sum_weight = mode->irls_weight;
+    double mode_x = sum_weight * mode->location.x();
+    double mode_y = sum_weight * mode->location.y();
+
+    const Vector2_f& feat_loc = salient_features[mode->feature_idx]->pt;
+    double feat_x = sum_weight * feat_loc.x();
+    double feat_y = sum_weight * feat_loc.y();
+    double feat_xx = sum_weight * feat_loc.x() * feat_loc.x();
+    double feat_xy = sum_weight * feat_loc.x() * feat_loc.y();
+    double feat_yy = sum_weight * feat_loc.y() * feat_loc.y();
+
+    mode->feature_idx = -1;  // Flag as processed, does not change order
+                             // of traversal.
+
+    for (const auto& bin : feature_taps[mode->mode_bin]) {
+      for (auto& test_mode : mode_grid[bin]) {
+        if (test_mode.feature_idx >= 0) {
+          const float dist = (test_mode.location - mode->location).Norm();
+          if (dist <= band_width) {
+            const Vector2_f test_loc =
+                salient_features[test_mode.feature_idx]->pt;
+            const float weight =
+                space_lut[static_cast<int>(dist * space_scale)] *
+                test_mode.irls_weight;
+
+            sum_weight += weight;
+            mode_x += weight * test_mode.location.x();
+            mode_y += weight * test_mode.location.y();
+
+            const float test_loc_x_w = weight * test_loc.x();
+            const float test_loc_y_w = weight * test_loc.y();
+            feat_x += test_loc_x_w;
+            feat_y += test_loc_y_w;
+
+            feat_xx += test_loc_x_w * test_loc.x();
+            feat_xy += test_loc_x_w * test_loc.y();
+            feat_yy += test_loc_y_w * test_loc.y();
+
+            // Flag as processed, does not change order of traversal.
+            test_mode.feature_idx = -1;
+          }
+        }
+      }
+    }
+
+    if (sum_weight >= options_.min_irls_mode_weight()) {
+      double inv_sum_weight = 1.0f / sum_weight;
+      mode_x *= inv_sum_weight;
+      mode_y *= inv_sum_weight;
+      feat_x *= inv_sum_weight;
+      feat_y *= inv_sum_weight;
+      feat_xx *= inv_sum_weight;
+      feat_xy *= inv_sum_weight;
+      feat_yy *= inv_sum_weight;
+
+      // Covariance matrix.
+      const float a = feat_xx - 2.0 * feat_x * mode_x + mode_x * mode_x;
+      const float bc =
+          feat_xy - feat_x * mode_y - feat_y * mode_x + mode_x * mode_y;
+      const float d = feat_yy - 2.0 * feat_y * mode_y + mode_y * mode_y;
+
+      Vector2_f axis_magnitude;
+      float angle;
+      if (!EllipseFromCovariance(a, bc, d, &axis_magnitude, &angle)) {
+        angle = 0;
+        axis_magnitude = Vector2_f(1, 1);
+      } else {
+        if (angle < 0) {
+          angle += M_PI;
+        }
+        CHECK_GE(angle, 0);
+        CHECK_LE(angle, M_PI + 1e-3);
+      }
+
+      SalientMode irls_mode;
+      irls_mode.location = Vector2_f(mode_x, mode_y);
+      irls_mode.assignment_weight = sum_weight;
+      irls_mode.axis_magnitude = axis_magnitude;
+      irls_mode.angle = angle;
+      modes->push_back(irls_mode);
+    }
+  }
+
+  // Sort modes by descreasing weight.
+  struct ModeWeightCompare {
+    bool operator()(const SalientMode& lhs, const SalientMode& rhs) const {
+      return lhs.assignment_weight > rhs.assignment_weight;
+    }
+  };
+
+  std::sort(modes->begin(), modes->end(), ModeWeightCompare());
+}
+
+// Determines the salient frame for a list of SalientLocations by performing
+// mode finding and scales each point based on frame size.
+void MotionSaliency::DetermineSalientFrame(
+    std::vector<SalientLocation> locations, SalientPointFrame* salient_frame) {
+  CHECK(salient_frame);
+
+  std::vector<SalientMode> modes;
+  {
+    MEASURE_TIME << "Mode finding";
+    SalientModeFinding(&locations, &modes);
+  }
+
+  const float denom_x = 1.0f / frame_width_;
+  const float denom_y = 1.0f / frame_height_;
+
+  // Convert to salient points.
+  for (int mode_idx = 0,
+           mode_sz = std::min<int>(modes.size(), options_.num_top_irls_modes());
+       mode_idx < mode_sz; ++mode_idx) {
+    SalientPoint* pt = salient_frame->add_point();
+    pt->set_norm_point_x(modes[mode_idx].location.x());
+    pt->set_norm_point_y(modes[mode_idx].location.y());
+    pt->set_left(options_.bound_left());
+    pt->set_bottom(options_.bound_bottom());
+    pt->set_right(options_.bound_right());
+    pt->set_top(options_.bound_top());
+
+    pt->set_norm_major(modes[mode_idx].axis_magnitude.x());
+    pt->set_norm_minor(modes[mode_idx].axis_magnitude.y());
+    pt->set_angle(modes[mode_idx].angle);
+    pt->set_weight(modes[mode_idx].assignment_weight *
+                   options_.saliency_weight());
+
+    ScaleSalientPoint(denom_x, denom_y, pt);
+  }
+}
+
+void ForegroundWeightsFromFeatures(const RegionFlowFeatureList& feature_list,
+                                   float foreground_threshold,
+                                   float foreground_gamma,
+                                   const CameraMotion* camera_motion,
+                                   std::vector<float>* weights) {
+  CHECK(weights != nullptr);
+  weights->clear();
+
+  constexpr float kEpsilon = 1e-4f;
+
+  CHECK_GT(foreground_threshold, 0.0f);
+  if (camera_motion) {
+    foreground_threshold *=
+        std::max(kEpsilon, InlierCoverage(*camera_motion, false));
+  }
+
+  const float weight_denom = 1.0f / foreground_threshold;
+
+  // Map weights to foreground measure and determine minimum irls weight.
+  for (const auto& feature : feature_list.feature()) {
+    // Skip marked outliers.
+    if (feature.irls_weight() == 0) {
+      weights->push_back(0.0f);
+      continue;
+    }
+
+    // Maps an irls_weight of magnitude weight_denom (from above) to zero,
+    // with values below weight_denom assigned linearly mapped (zero is mapped
+    // ot 1). Avoid mapping to zero as it used to mark outliers.
+    const float foreground_measure =
+        std::max(0.0f, 1.0f - feature.irls_weight() * weight_denom);
+
+    if (std::abs(foreground_gamma - 1.0f) < 1e-3f) {
+      weights->push_back(std::max(kEpsilon, foreground_measure));
+    } else {
+      weights->push_back(
+          std::max(kEpsilon, std::pow(foreground_measure, foreground_gamma)));
+    }
+  }
+  CHECK_EQ(feature_list.feature_size(), weights->size());
+}
+
+}  // namespace mediapipe
diff --git a/mediapipe/util/tracking/motion_saliency.h b/mediapipe/util/tracking/motion_saliency.h
new file mode 100644
index 000000000..1dcd8e857
--- /dev/null
+++ b/mediapipe/util/tracking/motion_saliency.h
@@ -0,0 +1,150 @@
+// Copyright 2019 The MediaPipe Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Computes MotionSaliency points that can be used for stabilization and
+// retargeting.
+
+#ifndef MEDIAPIPE_UTIL_TRACKING_MOTION_SALIENCY_H_
+#define MEDIAPIPE_UTIL_TRACKING_MOTION_SALIENCY_H_
+
+#include <utility>
+#include <vector>
+
+#include "mediapipe/framework/port/vector.h"
+#include "mediapipe/util/tracking/motion_saliency.pb.h"
+#include "mediapipe/util/tracking/region_flow.h"
+
+namespace mediapipe {
+class RegionFlowFeatureList;
+class RegionFlowFrame;
+class SalientPointFrame;
+}  // namespace mediapipe
+
+namespace mediapipe {
+
+class MotionSaliency {
+ public:
+  MotionSaliency(const MotionSaliencyOptions& options, int frame_width,
+                 int frame_height);
+  ~MotionSaliency();
+
+  // Finds modes in the RegionFlowFeatureList (clusters for high IRLS weight,
+  // per default features agreeing with the background motion).
+  // Optionally, per feature irls weights can be supplied instead of using the
+  // features weight to adapt modes that will be found, e.g. see
+  // ForegroundWeightsFromFeatures below.
+  void SaliencyFromFeatures(const RegionFlowFeatureList& feature_list,
+                            std::vector<float>* irls_weights,  // optional.
+                            SalientPointFrame* salient_frame);
+
+  // Finds saliency points (modes) from a list of points and their respective
+  // weights, outputs a SalientPointFrame.
+  void SaliencyFromPoints(const std::vector<Vector2_f>* points,
+                          const std::vector<float>* weights,
+                          SalientPointFrame* salient_frame);
+
+  // Selects saliency inliers, by searching for close-by salient points
+  // (within fractional MotionSaliencyOptions::filtering_support_distance)
+  // across adjacent frames (considered are
+  // #MotionSaliencyOptions::filtering_frame_radius before and after the
+  // current frame).
+  // If at least #MotionSaliencyOptions::filtering_minimum_support
+  // supporting points are found the tested salient point is kept, otherwise
+  // discarded.
+  // If desired performs rescaling, such that the median salient point weight
+  // equals MotionSaliencyOptions::saliency_weight().
+  void SelectSaliencyInliers(std::vector<SalientPointFrame*>* motion_saliency,
+                             bool rescale_to_median_saliency_weight);
+
+  // Averages all salient points (unweighted average) per frame. The resulting
+  // mean salient point is assigned weight one, and the specified normalized
+  // bounds (as tuple left, bottom, right, top).
+  void CollapseMotionSaliency(const SaliencyPointList& input_saliency,
+                              const Vector4_f& bounds,
+                              SaliencyPointList* output_saliency);
+
+  // Smooths saliency in space and time.
+  void FilterMotionSaliency(
+      std::vector<SalientPointFrame*>* saliency_point_list);
+
+  // Aggregates location in image domain and salient weight.
+  struct SalientLocation {
+    SalientLocation() {}
+    SalientLocation(const Vector2_f& _pt, float _weight)
+        : pt(_pt), weight(_weight) {}
+    Vector2_f pt;
+    float weight = 0;
+  };
+
+ private:
+  // Locates modes in a set of SalientLocation's.
+  // (using mean shift with bilateral weights, i.e. weight * spatial
+  // gaussian weighting).
+  // Only modes with for which the sum of total saliency weight is
+  // above min_irls_mode_sum are returned.
+  // Returns modes in the image domain as 2D points, sum of their
+  // assignment weights and spatial extend along major and minor axis.
+  // Modes are sorted w.r.t. their assignment irls weights (from highest to
+  // lowest).
+  struct SalientMode {
+    Vector2_f location;
+    // Total sum of irls weights assigned to this mode.
+    float assignment_weight = 0;
+    // Magnitude of major and minor axis storred in x and y, respectively.
+    Vector2_f axis_magnitude;
+    // Angle in radians w.r.t. x-axis.
+    float angle = 0;
+  };
+
+  // Note: input vector locations is not mutated by function.
+  void SalientModeFinding(std::vector<SalientLocation>* locations,
+                          std::vector<SalientMode>* modes);
+
+  // Determines the salient frame for a list of SalientLocations by performing
+  // mode finding and scaling each point based on frame size.
+  void DetermineSalientFrame(std::vector<SalientLocation> locations,
+                             SalientPointFrame* salient_frame);
+
+  MotionSaliencyOptions options_;
+  int frame_width_;
+  int frame_height_;
+};
+
+// Returns foregroundness weights in [0, 1] for each feature, by mapping irls
+// weight to foreground score in [0, 1].
+// In particular, the foreground threshold indicates the *inverse* registration
+// error (i.e. the irls weight) that is deemed a complete inlier.
+// Weights in the interval [0, foreground_threshold] (corresponding to
+// pixel errors in the interval [1 / foreground_threshold, inf])
+// are mapped to 1 - [0, 1], i.e. foreground threshold is mapped to zero
+// with weights below the threshold being assigned values > 0.
+// Therefore, larger values will increase amount of detected foreground
+// as well as noise.
+// In addition, foreground_gamma's < 1 can be used to increase the resolution
+// of small foreground motions (irls weight close to the foreground_threshold)
+// at the expense of larger foreground motions (irls weight close to zero).
+// If optional parameter camera_motion is specified, the passed foreground
+// threshold is scaled by the InlierCoverage of the camera_motion
+// (which is in 0, 1). That is for unstable frames with small coverage,
+// the threshold is tighter and fewer features are considered foreground.
+void ForegroundWeightsFromFeatures(
+    const RegionFlowFeatureList& feature_list,
+    float foreground_threshold,         // 0.5 is a good default value.
+    float foreground_gamma,             // use 1.0 for default
+    const CameraMotion* camera_motion,  // optional, can be nullptr.
+    std::vector<float>* weights);
+
+}  // namespace mediapipe
+
+#endif  // MEDIAPIPE_UTIL_TRACKING_MOTION_SALIENCY_H_
diff --git a/mediapipe/util/tracking/motion_saliency.proto b/mediapipe/util/tracking/motion_saliency.proto
new file mode 100644
index 000000000..b7702dae0
--- /dev/null
+++ b/mediapipe/util/tracking/motion_saliency.proto
@@ -0,0 +1,71 @@
+// Copyright 2019 The MediaPipe Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+syntax = "proto2";
+
+package mediapipe;
+
+// Next tag: 17
+message MotionSaliencyOptions {
+  // Standard normalized bounds and weights used to initialize salient points.
+  // See region_flow.proto for details.
+  optional float bound_left = 1 [default = 0.3];
+  optional float bound_bottom = 2 [default = 0.3];
+  optional float bound_right = 15 [default = 0.3];
+  optional float bound_top = 16 [default = 0.3];
+
+  optional float saliency_weight = 3 [default = 20];
+
+  // If set, scales saliency_weight by flow magnitude.
+  optional bool scale_weight_by_flow_magnitude = 8 [default = false];
+
+  // Minimum number of features within a region to be considered salient.
+  // Only applicable for functions accepting RegionFlowFrames.
+  optional int32 min_features = 4 [default = 5];
+
+  // If set, only considers regions flagged as forground.
+  optional bool use_only_foreground_regions = 9 [default = false];
+
+  // Specifies roughly number of foreground features mapped to one mode,
+  // for mode to be considered salient.
+  optional float min_irls_mode_weight = 10 [default = 10];
+
+  // Only returns the top N irls modes.
+  optional int32 num_top_irls_modes = 11 [default = 3];
+
+  // Mode finding is performed with a fraction radius of 10% of frame
+  // diameter by default.
+  optional float mode_band_width = 12 [default = 0.10];
+
+  // We filter salient points along the temporal dimension only, keeping those
+  // that have sufficient support (in form of neighboring salient points). For
+  // every salient point in frame n, all points in frames
+  // [n - filtering_frame_radius, n + filtering_frame_radius] are tested,
+  // whether they support the current test point.
+  optional int32 selection_frame_radius = 5 [default = 5];
+
+  // Fractional distance to be considered a supporting salient point for a test
+  // point.
+  optional float selection_support_distance = 6 [default = 0.2];
+
+  // Minimum number of supporting salient points that need to be present in
+  // order for a point to be considered an inlier.
+  optional int32 selection_minimum_support = 7 [default = 4];
+
+  // Sigma in space (normalized domain).
+  optional float filtering_sigma_space = 13 [default = 0.05];
+
+  // Sigma in time (in frames).
+  optional float filtering_sigma_time = 14 [default = 5];
+}
diff --git a/mediapipe/util/tracking/parallel_invoker.cc b/mediapipe/util/tracking/parallel_invoker.cc
new file mode 100644
index 000000000..39228ba97
--- /dev/null
+++ b/mediapipe/util/tracking/parallel_invoker.cc
@@ -0,0 +1,36 @@
+// Copyright 2019 The MediaPipe Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "mediapipe/util/tracking/parallel_invoker.h"
+
+// Choose between ThreadPool, OpenMP and serial execution.
+// Note only one parallel_using_* directive can be active.
+int flags_parallel_invoker_mode = PARALLEL_INVOKER_MAX_VALUE;
+int flags_parallel_invoker_max_threads = 4;
+
+namespace mediapipe {
+
+#if defined(PARALLEL_INVOKER_ACTIVE)
+ThreadPool* ParallelInvokerThreadPool() {
+  static ThreadPool* pool = []() -> ThreadPool* {
+    ThreadPool* new_pool =
+        new ThreadPool("ParallelInvoker", flags_parallel_invoker_max_threads);
+    new_pool->StartWorkers();
+    return new_pool;
+  }();
+  return pool;
+}
+#endif
+
+}  // namespace mediapipe
diff --git a/mediapipe/util/tracking/parallel_invoker.h b/mediapipe/util/tracking/parallel_invoker.h
new file mode 100644
index 000000000..41148e6c1
--- /dev/null
+++ b/mediapipe/util/tracking/parallel_invoker.h
@@ -0,0 +1,507 @@
+// Copyright 2019 The MediaPipe Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Parallel for loop execution.
+// For details adapt parallel_using_* flags defined in parallel_invoker.cc.
+
+// Usage example (for 1D):
+
+// Define Functor or lambda function that implements:
+// void operator()(const BlockedRange & range) const;
+// (in addition functor needs to be copyable).
+
+// Execute a for loop in parallel from 0 to N via:
+// ParallelFor(0,              // start_index
+//             num_frames,     // end_index, exclusive
+//             1               // number of elements processed per iteration
+//             [](const BlockedRange& range) {
+//     // Process per-thread sub-range
+//     for (int i = range.begin(); i < range.end(); ++i) {
+//       // Process i'th item.
+//     }
+//  }
+
+// Specific implementation to copy a vector of images in parallel.
+// class CopyInvoker {
+//  public:
+//   CopyInvoker(const vector<cv::Mat>& inputs,
+//               vector<cv::Mat*>* outputs)
+//       : inputs_(inputs), outputs_(outputs) {
+//   }
+//   CopyInvoker(const CopyInvoker& rhs)
+//       : inputs_(rhs.inputs_), outputs_(rhs.outputs) {
+//   }
+//   void operator()(const BlockedRange& range) {
+//     for (int frame = range.begin(); frame < range.end(); ++frame) {
+//       inputs_[frame].copyTo(*(*outputs_)[frame]);
+//     }
+//   }
+//  private:
+//   const vector<cv::Mat>& inputs_;
+//   vector<cv::Mat*>* outputs_;
+// }
+
+// vector<cv::Mat> inputs;
+// vector<cv::Mat*> outputs;
+// ParallelFor(0, num_frames, 1, CopyInvoker(inputs, &outputs));
+//
+// OR (with lambdas):
+// ParallelFor(0, num_frames, 1,
+//             [&inputs, &outputs](const BlockedRange& range) {
+//     for (int frame = range.begin(); frame < range.end(); ++frame) {
+//       inputs[frame].copyTo(*(outputs)[frame]);
+//     }
+// }
+
+#ifndef MEDIAPIPE_UTIL_TRACKING_PARALLEL_INVOKER_H_
+#define MEDIAPIPE_UTIL_TRACKING_PARALLEL_INVOKER_H_
+
+#include <stddef.h>
+
+#include <memory>
+
+#include "absl/synchronization/mutex.h"
+#include "mediapipe/framework/port/logging.h"
+
+#ifdef PARALLEL_INVOKER_ACTIVE
+#include "mediapipe/framework/port/threadpool.h"
+
+#ifdef __APPLE__
+#include <dispatch/dispatch.h>
+#include <stdatomic.h>
+#endif
+
+#endif  // PARALLEL_INVOKER_ACTIVE
+
+// Specifies parallelization implementation to use.
+enum PARALLEL_INVOKER_MODE {
+  PARALLEL_INVOKER_NONE = 0,         // Uses single threaded execution
+  PARALLEL_INVOKER_THREAD_POOL = 1,  // Uses //thread/threadpool
+  PARALLEL_INVOKER_OPENMP = 2,       // Uses OpenMP (requires compiler support)
+  PARALLEL_INVOKER_GCD = 3,          // Uses GCD (Apple)
+  PARALLEL_INVOKER_MAX_VALUE = 4,    // Increase when adding more modes
+};
+
+extern int flags_parallel_invoker_mode;
+extern int flags_parallel_invoker_max_threads;
+
+// Note flag: Parallel processing only activated if
+// PARALLEL_INVOKER_ACTIVE is defined.
+
+namespace mediapipe {
+
+// Partitions the range [begin, end) into equal blocks of size grain_size each
+// (except last one, might be less than grain_size).
+class BlockedRange {
+ public:
+  BlockedRange(int begin, int end, int grain_size)
+      : begin_(begin), end_(end), grain_size_(grain_size) {}
+
+  int begin() const { return begin_; }
+  int end() const { return end_; }
+  int grain_size() const { return grain_size_; }
+
+ private:
+  int begin_;
+  int end_;
+  int grain_size_;
+};
+
+// Partitions the range row_range x col_range into equal
+// blocks of size row_range.grain_size() x col_range.grain_size() each
+// (except last column and row might be of size less than grain_size in one
+// or both of their dimensions).
+class BlockedRange2D {
+ public:
+  BlockedRange2D(const BlockedRange& rows, const BlockedRange& cols)
+      : rows_(rows), cols_(cols) {}
+
+  const BlockedRange& rows() const { return rows_; }
+  const BlockedRange& cols() const { return cols_; }
+
+ private:
+  BlockedRange rows_;
+  BlockedRange cols_;
+};
+
+#ifdef PARALLEL_INVOKER_ACTIVE
+
+// Singleton ThreadPool for parallel invoker.
+ThreadPool* ParallelInvokerThreadPool();
+
+#ifdef __APPLE__
+// Enable to allow GCD as an option beside ThreadPool.
+#define USE_PARALLEL_INVOKER_GCD 1
+#define CHECK_GCD_PARALLEL_WORK_COUNT DEBUG
+
+template <class Invoker>
+class ParallelInvokerGCDContext {
+ public:
+  ParallelInvokerGCDContext(const Invoker& invoker, const BlockedRange& rows)
+      : local_invoker_(invoker), rows_(rows) {
+#if CHECK_GCD_PARALLEL_WORK_COUNT
+    count_ = 0;
+#endif
+  }
+
+  const Invoker& invoker() {
+#if CHECK_GCD_PARALLEL_WORK_COUNT
+    // Implicitly tracking the # of launched tasks at invoker retrieval.
+    atomic_fetch_add(&count_, 1);
+#endif
+    return local_invoker_;
+  }
+  const BlockedRange& rows() const { return rows_; }
+#if CHECK_GCD_PARALLEL_WORK_COUNT
+  const int count() { return atomic_load(&count_); }
+#endif
+
+ private:
+  Invoker local_invoker_;
+  const BlockedRange& rows_;
+#if CHECK_GCD_PARALLEL_WORK_COUNT
+  _Atomic(int32_t) count_;
+#endif
+};
+
+template <class Invoker>
+class ParallelInvokerGCDContext2D : public ParallelInvokerGCDContext<Invoker> {
+ public:
+  ParallelInvokerGCDContext2D(const Invoker& invoker, const BlockedRange& rows,
+                              const BlockedRange& cols)
+      : ParallelInvokerGCDContext<Invoker>(invoker, rows), cols_(cols) {}
+
+  const BlockedRange& cols() const { return cols_; }
+
+ private:
+  BlockedRange cols_;
+};
+
+template <class Invoker>
+static void ParallelForGCDTask(void* context, size_t index) {
+  ParallelInvokerGCDContext<Invoker>* invoker_context =
+      static_cast<ParallelInvokerGCDContext<Invoker>*>(context);
+  const BlockedRange& all_tasks = invoker_context->rows();
+  int start = all_tasks.begin() + index * all_tasks.grain_size();
+  int end = std::min(all_tasks.end(), start + all_tasks.grain_size());
+  BlockedRange this_task(start, end, all_tasks.grain_size());
+
+  const Invoker& invoker = invoker_context->invoker();
+  invoker(this_task);
+}
+
+template <class Invoker>
+static void ParallelForGCDTask2D(void* context, size_t index) {
+  ParallelInvokerGCDContext2D<Invoker>* invoker_context =
+      static_cast<ParallelInvokerGCDContext2D<Invoker>*>(context);
+  // Partitioning across rows.
+  const BlockedRange& all_tasks = invoker_context->rows();
+  int start = all_tasks.begin() + index * all_tasks.grain_size();
+  int end = std::min(all_tasks.end(), start + all_tasks.grain_size());
+  BlockedRange this_task(start, end, all_tasks.grain_size());
+
+  const Invoker& invoker = invoker_context->invoker();
+  invoker(BlockedRange2D(this_task, invoker_context->cols()));
+}
+#endif  // __APPLE__
+
+#endif  // PARALLEL_INVOKER_ACTIVE
+// Simple wrapper for compatibility with below ParallelFor function.
+template <class Invoker>
+void SerialFor(size_t start, size_t end, size_t grain_size,
+               const Invoker& invoker) {
+  invoker(BlockedRange(start, end, 1));
+}
+
+inline void CheckAndSetInvokerOptions() {
+#if defined(PARALLEL_INVOKER_ACTIVE)
+#if defined(__ANDROID__)
+  // If unsupported option is selected, force usage of OpenMP if detected, and
+  // ThreadPool otherwise.
+  if (flags_parallel_invoker_mode != PARALLEL_INVOKER_NONE &&
+      flags_parallel_invoker_mode != PARALLEL_INVOKER_THREAD_POOL &&
+      flags_parallel_invoker_mode != PARALLEL_INVOKER_OPENMP) {
+#if defined(_OPENMP)
+    LOG(WARNING) << "Unsupported invoker mode selected on Android. "
+                 << "OpenMP linkage detected, so falling back to OpenMP";
+    flags_parallel_invoker_mode = PARALLEL_INVOKER_OPENMP;
+#else   // _OPENMP
+    // Fallback mode for active parallel invoker without OpenMP is ThreadPool.
+    LOG(WARNING) << "Unsupported invoker mode selected on Android. "
+                 << "Falling back to ThreadPool";
+    flags_parallel_invoker_mode = PARALLEL_INVOKER_THREAD_POOL;
+#endif  // _OPENMP
+  }
+#endif  // __ANDROID__
+
+#if defined(__APPLE__) || defined(__EMSCRIPTEN__)
+  // Force usage of ThreadPool if unsupported option is selected.
+  // (OpenMP is not supported on iOS, due to missing clang support).
+  if (flags_parallel_invoker_mode != PARALLEL_INVOKER_NONE &&
+#if defined(USE_PARALLEL_INVOKER_GCD)
+      flags_parallel_invoker_mode != PARALLEL_INVOKER_GCD &&
+#endif  // USE_PARALLEL_INVOKER_GCD
+      flags_parallel_invoker_mode != PARALLEL_INVOKER_THREAD_POOL) {
+    LOG(WARNING) << "Unsupported invoker mode selected on iOS. "
+                 << "Falling back to ThreadPool mode";
+    flags_parallel_invoker_mode = PARALLEL_INVOKER_THREAD_POOL;
+  }
+#endif  // __APPLE__ || __EMSCRIPTEN__
+
+#if !defined(__APPLE__) && !defined(__EMSCRIPTEN__) && !defined(__ANDROID__)
+  flags_parallel_invoker_mode = PARALLEL_INVOKER_THREAD_POOL;
+#endif  // !__APPLE__ && !__EMSCRIPTEN__ && !__ANDROID__
+
+  // If OpenMP is requested, make sure we can actually use it, and fall back
+  // to ThreadPool if not.
+  if (flags_parallel_invoker_mode == PARALLEL_INVOKER_OPENMP) {
+#if !defined(_OPENMP)
+    LOG(ERROR) << "OpenMP invoker mode selected but not compiling with OpenMP "
+               << "enabled. Falling back to ThreadPool";
+    flags_parallel_invoker_mode = PARALLEL_INVOKER_THREAD_POOL;
+#endif  // _OPENMP
+  }
+
+#else   // PARALLEL_INVOKER_ACTIVE
+  if (flags_parallel_invoker_mode != PARALLEL_INVOKER_NONE) {
+    LOG(ERROR) << "Parallel execution requested but PARALLEL_INVOKER_ACTIVE "
+               << "compile flag is not set. Falling back to single threaded "
+               << "execution.";
+    flags_parallel_invoker_mode = PARALLEL_INVOKER_NONE;
+  }
+#endif  // PARALLEL_INVOKER_ACTIVE
+
+  CHECK_LT(flags_parallel_invoker_mode, PARALLEL_INVOKER_MAX_VALUE)
+      << "Invalid invoker mode specified.";
+  CHECK_GE(flags_parallel_invoker_mode, 0) << "Invalid invoker mode specified.";
+}
+
+// Performs parallel iteration from [start to end), scheduling grain_size
+// iterations per thread. For each iteration
+// invoker(BlockedRange(thread_local_start, thread_local_end))
+// is called. Each thread is given its local copy of invoker, i.e.
+// invoker needs to have copy constructor defined.
+template <class Invoker>
+void ParallelFor(size_t start, size_t end, size_t grain_size,
+                 const Invoker& invoker) {
+#ifdef PARALLEL_INVOKER_ACTIVE
+  CheckAndSetInvokerOptions();
+  switch (flags_parallel_invoker_mode) {
+#if defined(__APPLE__)
+    case PARALLEL_INVOKER_GCD: {
+      int iterations_remain = (end - start + grain_size - 1) / grain_size;
+      CHECK_GT(iterations_remain, 0);
+      if (iterations_remain == 1) {
+        // Execute invoker serially.
+        invoker(BlockedRange(start, std::min(end, start + grain_size), 1));
+      } else {
+        BlockedRange all_tasks(start, end, grain_size);
+        ParallelInvokerGCDContext<Invoker> context(invoker, all_tasks);
+        dispatch_queue_t concurrent_queue =
+            dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_DEFAULT, 0);
+        dispatch_apply_f(iterations_remain, concurrent_queue, &context,
+                         ParallelForGCDTask<Invoker>);
+#if CHECK_GCD_PARALLEL_WORK_COUNT
+        CHECK_EQ(iterations_remain, context.count());
+#endif
+      }
+      break;
+    }
+#endif  // __APPLE__
+
+    case PARALLEL_INVOKER_THREAD_POOL: {
+      int iterations_remain = (end - start + grain_size - 1) / grain_size;
+      CHECK_GT(iterations_remain, 0);
+      if (iterations_remain == 1) {
+        // Execute invoker serially.
+        invoker(BlockedRange(start, std::min(end, start + grain_size), 1));
+        break;
+      }
+
+      struct {
+        absl::Mutex mutex;
+        absl::CondVar completed;
+        int iterations_remain GUARDED_BY(mutex);
+      } loop;
+      {
+        absl::MutexLock lock(&loop.mutex);
+        loop.iterations_remain = iterations_remain;
+      }
+
+      for (int x = start; x < end; x += grain_size) {
+        auto loop_func = [x, end, grain_size, &loop, invoker]() {
+          // Execute invoker.
+          invoker(BlockedRange(x, std::min(end, x + grain_size), 1));
+
+          // Decrement counter.
+          absl::MutexLock lock(&loop.mutex);
+          --loop.iterations_remain;
+          if (loop.iterations_remain == 0) {
+            loop.completed.SignalAll();
+          }
+        };
+
+        // Attempt to run in parallel, if busy run serial to avoid deadlocking.
+        // This can happen during nested invocation of ParallelFor, as if the
+        // loop iteration itself is calling ParallelFor we might deadlock if
+        // we can not guarantee for the iteration to be scheduled.
+        ParallelInvokerThreadPool()->Schedule(loop_func);
+      }
+
+      // Wait on termination of all iterations.
+      loop.mutex.Lock();
+      while (loop.iterations_remain > 0) {
+        loop.completed.Wait(&loop.mutex);
+      }
+      loop.mutex.Unlock();
+      break;
+    }
+
+    case PARALLEL_INVOKER_OPENMP: {
+      // Use thread-local copy of invoker.
+      Invoker local_invoker(invoker);
+#pragma omp parallel for firstprivate(local_invoker) \
+    num_threads(flags_parallel_invoker_max_threads)
+      for (int x = start; x < end; ++x) {
+        local_invoker(BlockedRange(x, x + 1, 1));
+      }
+      break;
+    }
+
+    case PARALLEL_INVOKER_NONE: {
+      SerialFor(start, end, grain_size, invoker);
+      break;
+    }
+
+    case PARALLEL_INVOKER_MAX_VALUE: {
+      LOG(FATAL) << "Impossible.";
+      break;
+    }
+  }
+#else
+  SerialFor(start, end, grain_size, invoker);
+#endif  // PARALLEL_INVOKER_ACTIVE
+}
+
+// Simple wrapper for compatibility with below ParallelFor2D function.
+template <class Invoker>
+void SerialFor2D(size_t start_row, size_t end_row, size_t start_col,
+                 size_t end_col, size_t grain_size, const Invoker& invoker) {
+  invoker(BlockedRange2D(BlockedRange(start_row, end_row, 1),
+                         BlockedRange(start_col, end_col, 1)));
+}
+
+// Same as above ParallelFor for 2D iteration.
+template <class Invoker>
+void ParallelFor2D(size_t start_row, size_t end_row, size_t start_col,
+                   size_t end_col, size_t grain_size, const Invoker& invoker) {
+#ifdef PARALLEL_INVOKER_ACTIVE
+  CheckAndSetInvokerOptions();
+  switch (flags_parallel_invoker_mode) {
+#if defined(__APPLE__)
+    case PARALLEL_INVOKER_GCD: {
+      const int iterations_remain =
+          (end_row - start_row + grain_size - 1) / grain_size;
+      CHECK_GT(iterations_remain, 0);
+      if (iterations_remain == 1) {
+        // Execute invoker serially.
+        invoker(BlockedRange2D(BlockedRange(start_row, end_row, 1),
+                               BlockedRange(start_col, end_col, 1)));
+      } else {
+        BlockedRange all_tasks(start_row, end_row, grain_size);
+        ParallelInvokerGCDContext2D<Invoker> context(
+            invoker, all_tasks, BlockedRange(start_col, end_col, grain_size));
+        dispatch_queue_t concurrent_queue =
+            dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_DEFAULT, 0);
+        dispatch_apply_f(iterations_remain, concurrent_queue, &context,
+                         ParallelForGCDTask2D<Invoker>);
+#if CHECK_GCD_PARALLEL_WORK_COUNT
+        CHECK_EQ(iterations_remain, context.count());
+#endif
+      }
+      break;
+    }
+#endif  // __APPLE__
+
+    case PARALLEL_INVOKER_THREAD_POOL: {
+      int iterations_remain = end_row - start_row;  // Guarded by loop_mutex
+      CHECK_GT(iterations_remain, 0);
+      if (iterations_remain == 1) {
+        // Execute invoker serially.
+        invoker(BlockedRange2D(BlockedRange(start_row, end_row, 1),
+                               BlockedRange(start_col, end_col, 1)));
+        break;
+      }
+
+      absl::Mutex loop_mutex;
+      absl::CondVar loop_completed;
+
+      for (int y = start_row; y < end_row; ++y) {
+        auto loop_func = [y, start_col, end_col, &loop_mutex, &loop_completed,
+                          &iterations_remain, invoker]() {
+          // Execute invoker.
+          invoker(BlockedRange2D(BlockedRange(y, y + 1, 1),
+                                 BlockedRange(start_col, end_col, 1)));
+
+          // Decrement counter.
+          absl::MutexLock lock(&loop_mutex);
+          --iterations_remain;
+          if (iterations_remain == 0) {
+            loop_completed.Signal();
+          }
+        };
+
+        // Attempt to run in parallel, if busy run serial to avoid deadlocking.
+        ParallelInvokerThreadPool()->Schedule(loop_func);
+      }
+
+      // Wait on termination of all iterations.
+      loop_mutex.Lock();
+      while (iterations_remain > 0) {
+        loop_completed.Wait(&loop_mutex);
+      }
+      loop_mutex.Unlock();
+      break;
+    }
+
+    case PARALLEL_INVOKER_OPENMP: {
+      // Use thread-local copy of invoker.
+      Invoker local_invoker(invoker);
+#pragma omp parallel for firstprivate(local_invoker) \
+    num_threads(flags_parallel_invoker_max_threads)
+      for (int y = start_row; y < end_row; ++y) {
+        local_invoker(BlockedRange2D(BlockedRange(y, y + 1, 1),
+                                     BlockedRange(start_col, end_col, 1)));
+      }
+      break;
+    }
+
+    case PARALLEL_INVOKER_NONE: {
+      SerialFor2D(start_row, end_row, start_col, end_col, grain_size, invoker);
+      break;
+    }
+
+    case PARALLEL_INVOKER_MAX_VALUE: {
+      LOG(FATAL) << "Impossible.";
+      break;
+    }
+  }
+#else
+  SerialFor2D(start_row, end_row, start_col, end_col, grain_size, invoker);
+#endif  // PARALLEL_INVOKER_ACTIVE
+}
+
+}  // namespace mediapipe
+
+#endif  // MEDIAPIPE_UTIL_TRACKING_PARALLEL_INVOKER_H_
diff --git a/mediapipe/util/tracking/parallel_invoker_forbid_mixed.cc b/mediapipe/util/tracking/parallel_invoker_forbid_mixed.cc
new file mode 100644
index 000000000..383ad53a3
--- /dev/null
+++ b/mediapipe/util/tracking/parallel_invoker_forbid_mixed.cc
@@ -0,0 +1,28 @@
+// Copyright 2019 The MediaPipe Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Guard to ensure clients do not link against both,
+// single and parallel version.
+#ifdef PARALLEL_INVOKER_ACTIVE
+int LinkageAgainstBothSingleAndParallelStabilizationVersionsDetected() {
+  return 0;
+}
+
+#endif  // PARALLEL_INVOKER_ACTIVE
+
+#ifdef PARALLEL_INVOKER_INACTIVE
+int LinkageAgainstBothSingleAndParallelStabilizationVersionsDetected() {
+  return 1;
+}
+#endif  // PARALLEL_INVOKER_INACTIVE
diff --git a/mediapipe/util/tracking/parallel_invoker_test.cc b/mediapipe/util/tracking/parallel_invoker_test.cc
new file mode 100644
index 000000000..8f1c86cb9
--- /dev/null
+++ b/mediapipe/util/tracking/parallel_invoker_test.cc
@@ -0,0 +1,60 @@
+// Copyright 2019 The MediaPipe Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "mediapipe/util/tracking/parallel_invoker.h"
+
+#include <algorithm>
+#include <numeric>
+
+#include "absl/synchronization/mutex.h"
+#include "mediapipe/framework/port/gtest.h"
+
+namespace mediapipe {
+namespace {
+
+void RunParallelTest() {
+  absl::Mutex numbers_mutex;
+  std::vector<int> numbers;
+  const int kArraySize = 5000;
+
+  // Fill number array in parallel.
+  ParallelFor(0, kArraySize, 1,
+              [&numbers_mutex, &numbers](const BlockedRange& b) {
+                for (int k = b.begin(); k != b.end(); ++k) {
+                  absl::MutexLock lock(&numbers_mutex);
+                  numbers.push_back(k);
+                }
+              });
+
+  std::vector<int> expected(kArraySize);
+  std::iota(expected.begin(), expected.end(), 0);
+  EXPECT_TRUE(
+      std::is_permutation(expected.begin(), expected.end(), numbers.begin()));
+}
+
+TEST(ParallelInvokerTest, PhotosTest) {
+  flags_parallel_invoker_mode = PARALLEL_INVOKER_OPENMP;
+
+  RunParallelTest();
+}
+
+TEST(ParallelInvokerTest, ThreadPoolTest) {
+  flags_parallel_invoker_mode = PARALLEL_INVOKER_THREAD_POOL;
+
+  // Needs to be run in opt mode to pass.
+  RunParallelTest();
+}
+
+}  // namespace
+}  // namespace mediapipe
diff --git a/mediapipe/util/tracking/push_pull_filtering.h b/mediapipe/util/tracking/push_pull_filtering.h
new file mode 100644
index 000000000..f9b2c6c3c
--- /dev/null
+++ b/mediapipe/util/tracking/push_pull_filtering.h
@@ -0,0 +1,1296 @@
+// Copyright 2019 The MediaPipe Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Push pull filtering parametrized by number of channels.
+// Performs sparse vector data interpolation across a specified domain.
+// Optionally interpolation can be guided to be discontinuous across image
+// boundaries and customized with various multipliers as described below.
+//
+// Note: As the file contains templated implementations it is recommended to be
+// included in cc files instead of headers to speed up compilation.
+
+#ifndef MEDIAPIPE_UTIL_TRACKING_PUSH_PULL_FILTERING_H_
+#define MEDIAPIPE_UTIL_TRACKING_PUSH_PULL_FILTERING_H_
+
+#include <algorithm>
+#include <array>
+#include <cmath>
+#include <cstdlib>
+#include <cstring>
+#include <memory>
+#include <numeric>
+#include <string>
+#include <vector>
+
+#include "mediapipe/framework/port/opencv_core_inc.h"
+#include "mediapipe/util/tracking/image_util.h"
+#include "mediapipe/util/tracking/push_pull_filtering.pb.h"
+
+namespace mediapipe {
+
+const float kBilateralEps = 1e-6f;
+
+// Push Pull algorithm can be decorated with mip-map visualizers,
+// per-level weight adjusters and per-filter element weight multipliers.
+// Implemented by default as no-ops below.
+//
+// Mip map's are made of C + 1 channel matrices, where data is stored at
+// channels [0 .. C - 1], and push pull weights at index C. Commong flag
+// pull_down_sampling for classes below is true for first pull stage
+// (fine -> coarse) and false for second push stage (coarse -> fine).
+
+// Is called with the interpolated data at every level of the hierarchy. Enables
+// to adjust weights or perform other kinds of modification *globally* for each
+// mip-map level.
+class PushPullWeightAdjuster {
+ public:
+  virtual ~PushPullWeightAdjuster() {}
+
+  // In case of bilateral weighting, input_frame (resized to corresponding mip
+  // map level) is passed as well, otherwise parameter is NULL.
+  virtual void AdjustWeights(int mip_map_level, bool pull_down_sampling,
+                             cv::Mat* input_frame,  // Bilateral case.
+                             cv::Mat* data_with_weights) = 0;
+};
+
+// Allows mip map to be visualized after first stage (pull_down_sampling ==
+// true) and second stage (push up sampling, i.e. pull_down_sampling == false).
+// Note: For visualizers, data values in mip map are pre-multiplied by
+// confidence weights in channel C if corresponding flag is_premultiplied
+// is set to true). In this case normalization (division by confidence)
+// has to be performed before visualization.
+// Passed mip maps are borderless, i.e. views into the actual mip map with
+// border being removed.
+class PushPullMipMapVisualizer {
+ public:
+  virtual ~PushPullMipMapVisualizer() {}
+
+  virtual void Visualize(const std::vector<const cv::Mat*>& mip_map,
+                         bool pull_down_sampling,
+                         const std::vector<bool>& is_premultiplied) = 0;
+};
+
+// FilterWeightMultiplier is a template argument to PushPullFiltering to
+// adjust the filter weight at every up- and downsampling stage.
+// Specifically every point (x, y) with data  pointer anchor_ptr into
+// the current mip maplevel (C + 1 channels, first C
+// contain data, index C contains push-pull importance weight) is filtered in a
+// neighborhood with several neighboring samples (pointed to by filter_ptr).
+// In case of bilateral filtering img_ptr is pointing to 3 channel image pixel
+// of the anchor.
+// Default version of multiplier must be constructable with no arguments, and
+// the following two interfaces have to be implemented:
+//
+// // Signals change in level, can be used for mutable initialization functions.
+// void SetLevel(int mip_map_level, bool pull_down_sampling);
+//
+// // Function is called once for every neighbor (filter_ptr) of a pixel
+// // (anchor_ptr). Location (x,y) of the pixel pointed to by anchor pointer is
+// // also passed if needed for more complex operations.
+// float WeightMultiplier(const float* anchor_ptr,    // Points to anchor.
+//                        const float* filter_ptr,    // Offset element.
+//                        const uint_8t* img_ptr,     // NULL if not bilateral.
+//                        int x,
+//                        int y) const;
+//
+//  Here is an example (used by default).
+
+// Default no-op.
+class FilterWeightMultiplierOne {
+ public:
+  void SetLevel(int mip_map_level, bool pull_down_sampling) {}
+
+  float GetWeight(const float* anchor_ptr, const float* filter_ptr,
+                  const uint8* img_ptr, int x, int y) const {
+    return 1.0f;
+  }
+};
+
+class PushPullFilteringTest;
+
+// Templated by number of channels and FilterWeightMultiplier.
+template <int C, class FilterWeightMultiplier = FilterWeightMultiplierOne>
+class PushPullFiltering {
+ public:
+  enum FilterType {
+    BINOMIAL_3X3 = 0,
+    BINOMIAL_5X5 = 1,
+    GAUSSIAN_3X3 = 2,  // sigma = 1.
+    GAUSSIAN_5X5 = 3,  // sigma = 1.6.
+  };
+
+  // Initializes push pull filter for specified domain size.
+  // Optionally, weight_multiplier, mip-map visualizer and
+  // weight adjuster can be passed as argument.
+  PushPullFiltering(const cv::Size& domain_size, FilterType filter_type,
+                    bool use_bilateral,
+                    FilterWeightMultiplier* weight_multiplier,     // Optional.
+                    PushPullMipMapVisualizer* mip_map_visualizer,  // Optional.
+                    PushPullWeightAdjuster* weight_adjuster);      // Optional.
+  PushPullFiltering() = delete;
+  PushPullFiltering(const PushPullFiltering&) = delete;
+  PushPullFiltering& operator=(const PushPullFiltering&) = delete;
+
+  // Returns number of pyramid levels allocated for given domain size.
+  int PyramidLevels() const { return downsample_pyramid_.size(); }
+
+  // Returns domain size of n-th pyramid level (including border depending on
+  // filter_type).
+  cv::Size NthPyramidDomain(int level) {
+    CHECK_LT(level, PyramidLevels());
+    return downsample_pyramid_[level].size();
+  }
+
+  void SetOptions(const PushPullOptions& options);
+
+  // Push-Pull filter for C + 1 channel float displacement image
+  // (expected to be of size domain_size plus 1 (if filter == *_3x3) or
+  // 2 (if filter == *_5x5) pixel border around it, use
+  // BorderFromFilterType function for lookup).
+  // First C dimensions contain interpolated data, last dimension contains
+  // associated importance weight.
+  // Places data_values on integer location data_locations + origin with
+  // uniform weight (== push_pull_weight) and employs iterative weighted
+  // down and up-sampling. If optional data_weight is specified uses per datum
+  // feature weight instead (weights are expected to be within [0, 1]).
+  // If input_frame is specified, spatial filter type is combined with
+  // intensity based filtering yielding bilateral weighting.
+  // Results are returned in parameter results.
+  // Filter is performed in 2 stages:
+  // i) PullDownSampling: Densifies the data by successive downsampling stages,
+  //    averaging confidence and values across the domain from sparse data
+  //    locations to unset values.
+  // ii) PushUpSampling: Pushes densified data back through the pyramid by
+  //     successive upsampling stages, over-writing unset values with filled in
+  //     data from downsampled version.
+  void PerformPushPull(const std::vector<Vector2_f>& data_locations,
+                       const std::vector<cv::Vec<float, C>>& data_values,
+                       float push_pull_weight, cv::Point2i origin,
+                       int readout_level,                       // Default: 0.
+                       const std::vector<float>* data_weights,  // Optional.
+                       const cv::Mat* input_frame,              // Optional.
+                       cv::Mat* results);
+
+  // This is the same as PerformPushPull above except that it
+  // assumes that the data (the mip_map at level 0) is given as a cv::Mat.
+  // The Mat should be C + 1 channels in total.
+  // The first C channels (channels 0 to C-1) of mip_map_level_0 should contain
+  // data_values * data_weights (or push_pull_weight) at appropriate locations,
+  // offset by the border.
+  // The corresponding locations in channel C are set to the data_weights.
+  // Locations without data should be set to 0 in all channels.
+  void PerformPushPullMat(const cv::Mat& mip_map_level_0,
+                          int readout_level,           // Default: 0.
+                          const cv::Mat* input_frame,  // Optional.
+                          cv::Mat* results);
+
+  static constexpr int BorderFromFilterType(FilterType filter_type);
+
+  FilterType filter_type() const { return filter_type_; }
+
+ private:
+  // Implementation functions for PushPullFiltering.
+  void SetupFilters();
+
+  void SetupBilateralLUT();
+
+  // If allocate_base_level is set, allocates a frame for level zero of
+  // size domain_size + 2 * border, otherwise only levels 1 to end are
+  // allocated.
+  void AllocatePyramid(const cv::Size& domain_size, int border, int type,
+                       bool allocate_base_level, std::vector<cv::Mat>* pyramid);
+
+  // Downsampling operation for input_frame along pre-allocated pyramid.
+  void InitializeImagePyramid(const cv::Mat& input_frame,
+                              std::vector<cv::Mat>* pyramid);
+
+  void PerformPushPullImpl(const int readout_level, const cv::Mat* input_frame,
+                           std::vector<cv::Mat*>* mip_map_ptr);
+
+  void PullDownSampling(int num_filter_elems, const float* filter_weights,
+                        std::vector<cv::Mat*>* mip_map_ptr);
+
+  void PushUpSampling(int num_filter_elems, const float* filter_weights,
+                      int readout_level, std::vector<cv::Mat*>* mip_map_ptr);
+
+  // Convenience function selecting appropiate border size based on filter_type.
+  template <typename T, int channels>
+  void CopyNecessaryBorder(cv::Mat* mat);
+
+  // Implementation function for fast upsampling. See comments before definition
+  // for details.
+  void GetUpsampleTaps3(const float* filter_weights,
+                        const std::vector<int>* space_offsets,  // optional.
+                        int inc_x, int inc_y, std::vector<float> tap_weights[4],
+                        std::vector<int> tap_offsets[4],
+                        std::vector<int> tap_space_offsets[4]);
+
+  void GetUpsampleTaps5(const float* filter_weights,
+                        const std::vector<int>* space_offsets,  // optional.
+                        int inc_x, int inc_y, std::vector<float> tap_weights[4],
+                        std::vector<int> tap_offsets[4],
+                        std::vector<int> tap_space_offsets[4]);
+
+  inline int ColorDiffL1(const uint8* lhs_ptr, const uint8* rhs_ptr) {
+    return abs(static_cast<int>(lhs_ptr[0]) - static_cast<int>(rhs_ptr[0])) +
+           abs(static_cast<int>(lhs_ptr[1]) - static_cast<int>(rhs_ptr[1])) +
+           abs(static_cast<int>(lhs_ptr[2]) - static_cast<int>(rhs_ptr[2]));
+  }
+
+  const cv::Size domain_size_;
+  FilterType filter_type_;
+  int border_;
+
+  std::array<float, 25> binomial5_weights_;
+  std::array<float, 9> binomial3_weights_;
+  std::array<float, 25> gaussian5_weights_;
+  std::array<float, 9> gaussian3_weights_;
+
+  // Pyramids used by PushPull implementation.
+  std::vector<cv::Mat> downsample_pyramid_;
+  std::vector<cv::Mat> input_frame_pyramid_;
+
+  // Pre-computed spacial offets based on selected filter for each level of the
+  // pyramid.
+  std::vector<std::vector<int>> pyramid_space_offsets_;
+
+  bool use_bilateral_ = false;
+
+  FilterWeightMultiplier* weight_multiplier_;
+  // Used to create default multiplier is none was passed.
+  std::unique_ptr<FilterWeightMultiplier> default_weight_multiplier_;
+
+  PushPullMipMapVisualizer* mip_map_visualizer_;
+  PushPullWeightAdjuster* weight_adjuster_;
+  PushPullOptions options_;
+
+  std::vector<float> bilateral_lut_;
+
+  friend class PushPullFilteringTest;
+};
+
+// Typedef's for explicit instantiation, add more if needed.
+typedef PushPullFiltering<1> PushPullFilteringC1;
+typedef PushPullFiltering<2> PushPullFilteringC2;
+typedef PushPullFiltering<3> PushPullFilteringC3;
+typedef PushPullFiltering<4> PushPullFilteringC4;
+
+// For compatible forward declarations add this to your header:
+//   template<int C, class FilterWeightMultiplier> class PushPullFiltering;
+//   class FilterWeightMultiplierOne;
+//   typedef PushPullFiltering<1, FilterWeightMultiplierOne> PushPullFlowC1;
+
+// Several concrete helper classes below.
+///////////////////////////////////////////////////////////////////////////////
+// Template implementation functions below.
+//
+template <int C, class FilterWeightMultiplier>
+PushPullFiltering<C, FilterWeightMultiplier>::PushPullFiltering(
+    const cv::Size& domain_size, FilterType filter_type, bool use_bilateral,
+    FilterWeightMultiplier* weight_multiplier,
+    PushPullMipMapVisualizer* mip_map_visualizer,
+    PushPullWeightAdjuster* weight_adjuster)
+    : domain_size_(domain_size),
+      filter_type_(filter_type),
+      use_bilateral_(use_bilateral),
+      weight_multiplier_(weight_multiplier),
+      mip_map_visualizer_(mip_map_visualizer),
+      weight_adjuster_(weight_adjuster) {
+  border_ = BorderFromFilterType(filter_type);
+  if (border_ < 0) {
+    LOG(FATAL) << "Unknown filter requested.";
+  }
+
+  SetupFilters();
+  AllocatePyramid(domain_size_, border_, CV_32FC(C + 1), true,
+                  &downsample_pyramid_);
+
+  if (use_bilateral_) {
+    SetupBilateralLUT();
+    AllocatePyramid(domain_size_, border_, CV_8UC3, true,
+                    &input_frame_pyramid_);
+
+    // Setup space offsets.
+    pyramid_space_offsets_.resize(input_frame_pyramid_.size());
+    for (int l = 0, num_levels = input_frame_pyramid_.size(); l < num_levels;
+         ++l) {
+      std::vector<int>& space_offsets = pyramid_space_offsets_[l];
+      const cv::Mat& pyramid_frame = input_frame_pyramid_[l];
+      for (int row = -border_; row <= border_; ++row) {
+        for (int col = -border_; col <= border_; ++col) {
+          space_offsets.push_back(row * pyramid_frame.step[0] +
+                                  col * pyramid_frame.elemSize());
+        }
+      }
+    }
+  }
+
+  // Create default weight multiplier if none was passed.
+  if (weight_multiplier_ == NULL) {
+    default_weight_multiplier_.reset(new FilterWeightMultiplier());
+    weight_multiplier_ = default_weight_multiplier_.get();
+  }
+}
+
+template <int C, class FilterWeightMultiplier>
+constexpr int
+PushPullFiltering<C, FilterWeightMultiplier>::BorderFromFilterType(
+    FilterType filter_type) {
+  // -1 indicates error (unknown filter_type).
+  return (filter_type == BINOMIAL_3X3 || filter_type == GAUSSIAN_3X3)
+             ? 1
+             : ((filter_type == BINOMIAL_5X5 || filter_type == GAUSSIAN_5X5)
+                    ? 2
+                    : -1);
+}
+
+template <int C, class FilterWeightMultiplier>
+void PushPullFiltering<C, FilterWeightMultiplier>::SetOptions(
+    const PushPullOptions& options) {
+  options_ = options;
+  SetupBilateralLUT();
+}
+
+template <int C, class FilterWeightMultiplier>
+void PushPullFiltering<C, FilterWeightMultiplier>::SetupFilters() {
+  // Setup binomial weights.
+  std::array<float, 25> bin5_weights{{1,  4, 6,  4,  1,  4, 16, 24, 16,
+                                      4,  6, 24, 36, 24, 6, 4,  16, 24,
+                                      16, 4, 1,  4,  6,  4, 1}};
+
+  std::array<float, 9> bin3_weights{{1, 2, 1, 2, 4, 2, 1, 2, 1}};
+
+  // Normalize maximum of binomial filters to 1.
+  const float bin5_scale =
+      (1.f / std::accumulate(bin5_weights.begin(), bin5_weights.end(), 0.0f));
+
+  for (int i = 0; i < 25; ++i) {
+    binomial5_weights_[i] = bin5_weights[i] * bin5_scale;
+  }
+
+  const float bin3_scale =
+      (1.f / std::accumulate(bin3_weights.begin(), bin3_weights.end(), 0.0f));
+
+  for (int i = 0; i < 9; ++i) {
+    binomial3_weights_[i] = bin3_weights[i] * bin3_scale;
+  }
+
+  // Setup gaussian weights.
+  const float space_coeff_5 = -0.5f / (1.6f * 1.6f);
+  for (int j = 0; j < 5; ++j) {
+    for (int i = 0; i < 5; ++i) {
+      const float radius = (j - 2) * (j - 2) + (i - 2) * (i - 2);
+      gaussian5_weights_[j * 5 + i] =
+          std::exp(static_cast<double>(radius * space_coeff_5));
+    }
+  }
+
+  const float space_coeff_3 = -0.5f / (1.0f * 1.0f);
+  for (int j = 0; j < 3; ++j) {
+    for (int i = 0; i < 3; ++i) {
+      const float radius = (j - 1) * (j - 1) + (i - 1) * (i - 1);
+      gaussian3_weights_[j * 3 + i] =
+          std::exp(static_cast<double>(radius * space_coeff_3));
+    }
+  }
+
+  // Normalize maximum of gaussian weights to 1 (center value has largest
+  // magnitude).
+  const float gauss5_scale =
+      1.0f / std::accumulate(gaussian5_weights_.begin(),
+                             gaussian5_weights_.end(), 0.0f);
+  const float gauss3_scale =
+      1.0f / std::accumulate(gaussian3_weights_.begin(),
+                             gaussian3_weights_.end(), 0.0f);
+
+  for (int i = 0; i < 25; ++i) {
+    gaussian5_weights_[i] *= gauss5_scale;
+  }
+
+  for (int i = 0; i < 9; ++i) {
+    gaussian3_weights_[i] *= gauss3_scale;
+  }
+}
+
+template <int C, class FilterWeightMultiplier>
+void PushPullFiltering<C, FilterWeightMultiplier>::SetupBilateralLUT() {
+  // We use L1 color distance here, maximum is 3 (channels) * 256 (max
+  // intensity).
+  const int max_bins = 3 * 256;
+  bilateral_lut_.resize(max_bins, 0.0f);
+
+  const float sigma_color = options_.bilateral_sigma();
+  const float gauss_color_coeff = -0.5f / (sigma_color * sigma_color);
+
+  // Normalized such that first bin equals one.
+  // Avoid non-zero weights for large intensity differences.
+  for (int i = 0; i < max_bins; ++i) {
+    bilateral_lut_[i] = std::max<float>(
+        kBilateralEps,
+        std::exp(static_cast<double>(i * i * gauss_color_coeff)));
+  }
+}
+
+template <int C, class FilterWeightMultiplier>
+void PushPullFiltering<C, FilterWeightMultiplier>::AllocatePyramid(
+    const cv::Size& domain_size, int border, int type, bool allocate_base_level,
+    std::vector<cv::Mat>* pyramid) {
+  CHECK(pyramid != nullptr);
+  pyramid->clear();
+  pyramid->reserve(16);  // Do not anticipate videos with dimensions
+                         // larger than 2^16.
+
+  int width = domain_size.width;
+  int height = domain_size.height;
+
+  if (allocate_base_level) {
+    pyramid->push_back(cv::Mat(height + 2 * border, width + 2 * border, type));
+  }
+
+  while (width > 1 && height > 1) {
+    width = (width + 1) / 2;
+    height = (height + 1) / 2;
+    pyramid->push_back(cv::Mat(height + 2 * border, width + 2 * border, type));
+  }
+}
+
+template <int C, class FilterWeightMultiplier>
+void PushPullFiltering<C, FilterWeightMultiplier>::InitializeImagePyramid(
+    const cv::Mat& input_frame, std::vector<cv::Mat>* pyramid) {
+  CHECK(pyramid != nullptr);
+  CHECK_GT(pyramid->size(), 0);
+
+  cv::Mat base_level((*pyramid)[0],
+                     cv::Range(border_, (*pyramid)[0].rows - border_),
+                     cv::Range(border_, (*pyramid)[0].cols - border_));
+  CHECK_EQ(base_level.rows, input_frame.rows);
+  CHECK_EQ(base_level.cols, input_frame.cols);
+  CHECK_EQ(base_level.type(), input_frame.type());
+
+  input_frame.copyTo(base_level);
+  CopyNecessaryBorder<uint8, 3>(&(*pyramid)[0]);
+
+  for (int l = 0; l < pyramid->size() - 1; ++l) {
+    cv::Mat source((*pyramid)[l],
+                   cv::Range(border_, (*pyramid)[l].rows - border_),
+                   cv::Range(border_, (*pyramid)[l].cols - border_));
+    cv::Mat destination((*pyramid)[l + 1],
+                        cv::Range(border_, (*pyramid)[l + 1].rows - border_),
+                        cv::Range(border_, (*pyramid)[l + 1].cols - border_));
+    cv::pyrDown(source, destination, destination.size());
+    CopyNecessaryBorder<uint8, 3>(&(*pyramid)[l + 1]);
+  }
+}
+
+template <int C, class FilterWeightMultiplier>
+template <typename T, int channels>
+void PushPullFiltering<C, FilterWeightMultiplier>::CopyNecessaryBorder(
+    cv::Mat* mat) {
+  switch (filter_type_) {
+    case BINOMIAL_3X3:
+    case GAUSSIAN_3X3:
+      CopyMatBorder<T, 1, channels>(mat);
+      break;
+    case BINOMIAL_5X5:
+    case GAUSSIAN_5X5:
+      CopyMatBorder<T, 2, channels>(mat);
+      break;
+    default:
+      LOG(FATAL) << "Unknown filter";
+  }
+}
+
+// In case of upsampling there are 4 possible anchor / image configurations
+// that can occur. First the general layout of upsampling: x corresponds to
+// positions with defined values, 0 to the space in-between.
+// x 0 x 0 x 0 x 0
+// 0 0 0 0 0 0 0 0
+// x 0 x 0 x 0 x 0
+// 0 0 0 0 0 0 0 0
+// x 0 x 0 x 0 x 0
+
+// The 4 cases for 3x3 filter are:
+// Case 0:
+// Filter incident with x: 1x1 filter
+// Case 1:
+// Filter incident to 0 at x0x: 1x2 filter
+// Case 2:
+// Filter incident to 0 at x : 2x1 filter
+//                         0
+//                         x
+// Case 3:
+// Filter incident to center 0 at x 0 x : 2x2 filter
+//                                0 0 0
+//                                x 0 x
+// When traversing an to be upsampled image, for even rows we have to alternate
+// between cases 0 and 1, for odd rows we alternate between 2 and 3.
+// Computes the weights and tap offsets for above described cases, using
+// sample increment in x direction of inc_x (e.g. color channels) and
+// sample increment in y of inc_y (e.g. bytes in row).
+// Optionally also selects space_offsets for bilateral filtering for each
+// upsampling case, if space_offsets is specified. Reasoning here is, that
+// tap offsets are defined in the image domain of the low-res frame one level
+// above the currently processed one, while space_offsets are used to compute
+// the corresponding joint bilateral weights in the high-res frame.
+template <int C, class FilterWeightMultiplier>
+void PushPullFiltering<C, FilterWeightMultiplier>::GetUpsampleTaps3(
+    const float* filter_weights,
+    const std::vector<int>* space_offsets,  // optional.
+    int inc_x, int inc_y, std::vector<float> tap_weights[4],
+    std::vector<int> tap_offsets[4], std::vector<int> tap_space_offsets[4]) {
+  // Taps for filter 0 1 2
+  //                 3 4 5
+  //                 6 7 8
+  // Case 0:
+  tap_weights[0].push_back(filter_weights[4]);
+  tap_offsets[0].push_back(0);
+  if (space_offsets) {
+    tap_space_offsets[0].push_back((*space_offsets)[4]);
+  }
+
+  // Case 1:
+  tap_weights[1].push_back(filter_weights[3]);
+  tap_offsets[1].push_back(0);
+  tap_weights[1].push_back(filter_weights[5]);
+  tap_offsets[1].push_back(inc_x);
+  if (space_offsets) {
+    tap_space_offsets[1].push_back((*space_offsets)[3]);
+    tap_space_offsets[1].push_back((*space_offsets)[5]);
+  }
+
+  // Case 2:
+  tap_weights[2].push_back(filter_weights[1]);
+  tap_offsets[2].push_back(0);
+  tap_weights[2].push_back(filter_weights[7]);
+  tap_offsets[2].push_back(inc_y);
+  if (space_offsets) {
+    tap_space_offsets[2].push_back((*space_offsets)[1]);
+    tap_space_offsets[2].push_back((*space_offsets)[7]);
+  }
+
+  // Case 3:
+  tap_weights[3].push_back(filter_weights[0]);
+  tap_offsets[3].push_back(0);
+  tap_weights[3].push_back(filter_weights[2]);
+  tap_offsets[3].push_back(inc_x);
+  tap_weights[3].push_back(filter_weights[6]);
+  tap_offsets[3].push_back(inc_y);
+  tap_weights[3].push_back(filter_weights[8]);
+  tap_offsets[3].push_back(inc_y + inc_x);
+  if (space_offsets) {
+    tap_space_offsets[3].push_back((*space_offsets)[0]);
+    tap_space_offsets[3].push_back((*space_offsets)[2]);
+    tap_space_offsets[3].push_back((*space_offsets)[6]);
+    tap_space_offsets[3].push_back((*space_offsets)[8]);
+  }
+}
+
+// For 5x5 filter.
+// Case 0:
+// Filter incident to center x at x 0 x 0 x:  3x3 filter
+//                                0 0 0 0 0
+//                                x 0 x 0 x
+//                                0 0 0 0 0
+//                                x 0 x 0 x
+// Case 1:
+// Filter incident to center 0 at  x 0 x: 3x2 filter
+//                                 0 0 0
+//                                 x 0 x
+//                                 0 0 0
+//                                 x 0 x
+// Case 2:
+// Filter incident to center 0 at x 0 x 0 x: 2x3 filter
+//                                0 0 0 0 0
+//                                x 0 x 0 x
+// Case 3:
+// Filter incident to center 0 at x 0 x : 2x2 filter
+//                                0 0 0
+//                                x 0 x
+// Same as above for 5x5 filter.
+template <int C, class FilterWeightMultiplier>
+void PushPullFiltering<C, FilterWeightMultiplier>::GetUpsampleTaps5(
+    const float* filter_weights, const std::vector<int>* space_offsets,
+    int inc_x, int inc_y, std::vector<float> tap_weights[4],
+    std::vector<int> tap_offsets[4], std::vector<int> tap_space_offsets[4]) {
+  // Taps for filter 0  1  2  3  4
+  //                 5  6  7  8  9
+  //                 10 11 12 13 14
+  //                 15 16 17 18 19
+  //                 20 21 22 23 24
+  // Case 0:
+  tap_weights[0].push_back(filter_weights[0]);
+  tap_weights[0].push_back(filter_weights[2]);
+  tap_weights[0].push_back(filter_weights[4]);
+  tap_weights[0].push_back(filter_weights[10]);
+  tap_weights[0].push_back(filter_weights[12]);
+  tap_weights[0].push_back(filter_weights[14]);
+  tap_weights[0].push_back(filter_weights[20]);
+  tap_weights[0].push_back(filter_weights[22]);
+  tap_weights[0].push_back(filter_weights[24]);
+  tap_offsets[0].push_back(-inc_y - inc_x);
+  tap_offsets[0].push_back(-inc_y);
+  tap_offsets[0].push_back(-inc_y + inc_x);
+  tap_offsets[0].push_back(-inc_x);
+  tap_offsets[0].push_back(0);
+  tap_offsets[0].push_back(inc_x);
+  tap_offsets[0].push_back(inc_y - inc_x);
+  tap_offsets[0].push_back(inc_y);
+  tap_offsets[0].push_back(inc_y + inc_x);
+
+  if (space_offsets) {
+    tap_space_offsets[0].push_back((*space_offsets)[0]);
+    tap_space_offsets[0].push_back((*space_offsets)[2]);
+    tap_space_offsets[0].push_back((*space_offsets)[4]);
+    tap_space_offsets[0].push_back((*space_offsets)[10]);
+    tap_space_offsets[0].push_back((*space_offsets)[12]);
+    tap_space_offsets[0].push_back((*space_offsets)[14]);
+    tap_space_offsets[0].push_back((*space_offsets)[20]);
+    tap_space_offsets[0].push_back((*space_offsets)[22]);
+    tap_space_offsets[0].push_back((*space_offsets)[24]);
+  }
+
+  // Case 1:
+  tap_weights[1].push_back(filter_weights[1]);
+  tap_weights[1].push_back(filter_weights[3]);
+  tap_weights[1].push_back(filter_weights[11]);
+  tap_weights[1].push_back(filter_weights[13]);
+  tap_weights[1].push_back(filter_weights[21]);
+  tap_weights[1].push_back(filter_weights[23]);
+  tap_offsets[1].push_back(-inc_y);
+  tap_offsets[1].push_back(-inc_y + inc_x);
+  tap_offsets[1].push_back(0);
+  tap_offsets[1].push_back(inc_x);
+  tap_offsets[1].push_back(inc_y);
+  tap_offsets[1].push_back(inc_y + inc_x);
+
+  if (space_offsets) {
+    tap_space_offsets[1].push_back((*space_offsets)[1]);
+    tap_space_offsets[1].push_back((*space_offsets)[3]);
+    tap_space_offsets[1].push_back((*space_offsets)[11]);
+    tap_space_offsets[1].push_back((*space_offsets)[13]);
+    tap_space_offsets[1].push_back((*space_offsets)[21]);
+    tap_space_offsets[1].push_back((*space_offsets)[23]);
+  }
+
+  // Repeating for readability.
+  // Taps for filter 0  1  2  3  4
+  //                 5  6  7  8  9
+  //                 10 11 12 13 14
+  //                 15 16 17 18 19
+  //                 20 21 22 23 24
+  // Case 2:
+  tap_weights[2].push_back(filter_weights[5]);
+  tap_weights[2].push_back(filter_weights[7]);
+  tap_weights[2].push_back(filter_weights[9]);
+  tap_weights[2].push_back(filter_weights[15]);
+  tap_weights[2].push_back(filter_weights[17]);
+  tap_weights[2].push_back(filter_weights[19]);
+  tap_offsets[2].push_back(-inc_x);
+  tap_offsets[2].push_back(0);
+  tap_offsets[2].push_back(inc_x);
+  tap_offsets[2].push_back(inc_y - inc_x);
+  tap_offsets[2].push_back(inc_y);
+  tap_offsets[2].push_back(inc_y + inc_x);
+
+  if (space_offsets) {
+    tap_space_offsets[2].push_back((*space_offsets)[5]);
+    tap_space_offsets[2].push_back((*space_offsets)[7]);
+    tap_space_offsets[2].push_back((*space_offsets)[9]);
+    tap_space_offsets[2].push_back((*space_offsets)[15]);
+    tap_space_offsets[2].push_back((*space_offsets)[17]);
+    tap_space_offsets[2].push_back((*space_offsets)[19]);
+  }
+
+  // Case 3:
+  tap_weights[3].push_back(filter_weights[6]);
+  tap_weights[3].push_back(filter_weights[8]);
+  tap_weights[3].push_back(filter_weights[16]);
+  tap_weights[3].push_back(filter_weights[18]);
+  tap_offsets[3].push_back(0);
+  tap_offsets[3].push_back(inc_x);
+  tap_offsets[3].push_back(inc_y);
+  tap_offsets[3].push_back(inc_y + inc_x);
+
+  if (space_offsets) {
+    tap_space_offsets[3].push_back((*space_offsets)[6]);
+    tap_space_offsets[3].push_back((*space_offsets)[8]);
+    tap_space_offsets[3].push_back((*space_offsets)[16]);
+    tap_space_offsets[3].push_back((*space_offsets)[18]);
+  }
+}
+
+// Scattered data interpolation via PushPull algorithm.
+// Interpolation is performed over a size of domain_size, where
+// features are placed into the corresponding bin at location
+// feature.[x()|y()] + origin.
+// The output displacement is assumed be a 3 channel float image of
+// size == domain_sz + 2 to account for a one pixel border.
+template <int C, class FilterWeightMultiplier>
+void PushPullFiltering<C, FilterWeightMultiplier>::PerformPushPull(
+    const std::vector<Vector2_f>& data_locations,
+    const std::vector<cv::Vec<float, C>>& data_values, float push_pull_weight,
+    cv::Point2i origin, int readout_level,
+    const std::vector<float>* data_weights, const cv::Mat* input_frame,
+    cv::Mat* results) {
+  CHECK_EQ(data_locations.size(), data_values.size());
+  CHECK(results != nullptr);
+
+  if (data_weights) {
+    CHECK_EQ(data_weights->size(), data_locations.size());
+  }
+
+  origin.x += border_;
+  origin.y += border_;
+
+  // Create mip-map view from downsample pyramid.
+  std::vector<cv::Mat*> mip_map(PyramidLevels());
+
+  for (int i = 0; i < mip_map.size(); ++i) {
+    mip_map[i] = &downsample_pyramid_[i];
+  }
+
+  CHECK_GE(readout_level, 0);
+  CHECK_LT(readout_level, PyramidLevels());
+
+  // CHECK if passed results matrix is compatible w.r.t. type and domain.
+  CHECK_EQ(downsample_pyramid_[readout_level].cols, results->cols);
+  CHECK_EQ(downsample_pyramid_[readout_level].rows, results->rows);
+  CHECK_EQ(downsample_pyramid_[readout_level].type(), results->type());
+
+  // Use caller-allocated results Mat.
+  mip_map[readout_level] = results;
+
+  // Place data_values into their final positions in mip map @ level 0.
+  mip_map[0]->setTo(0);
+  for (int idx = 0; idx < data_locations.size(); ++idx) {
+    const Vector2_f& location = data_locations[idx];
+    const cv::Vec<float, C>& value = data_values[idx];
+
+    float* ptr = mip_map[0]->ptr<float>(static_cast<int>(location.y() + 0.5f) +
+                                        origin.y) +
+                 (C + 1) * (static_cast<int>(location.x() + 0.5f) + origin.x);
+
+    const float data_weight =
+        data_weights ? (*data_weights)[idx] : push_pull_weight;
+
+    // Pre-multiply with data_weight.
+    for (int c = 0; c < C; ++c) {
+      ptr[c] = value[c] * data_weight;
+    }
+
+    // A weight of 1 would assume zero noise in the displacements. Smaller
+    // values lead to a smoother interpolation that approximates the
+    // initial values.
+    ptr[C] = data_weight;
+  }
+
+  PerformPushPullImpl(readout_level, input_frame, &mip_map);
+}
+
+// This is the same as PerformPushPull above except that it
+// assumes that the data (the mip_map at level 0) is given as a cv::Mat.
+template <int C, class FilterWeightMultiplier>
+void PushPullFiltering<C, FilterWeightMultiplier>::PerformPushPullMat(
+    const cv::Mat& mip_map_level_0,
+    int readout_level,           // Default: 0.
+    const cv::Mat* input_frame,  // Optional.
+    cv::Mat* results) {
+  CHECK(results != nullptr);
+
+  // Create mip-map view (concat displacements with downsample_pyramid).
+  std::vector<cv::Mat*> mip_map(PyramidLevels());
+
+  for (int i = 0; i < mip_map.size(); ++i) {
+    mip_map[i] = &downsample_pyramid_[i];
+  }
+
+  CHECK_GE(readout_level, 0);
+  CHECK_LT(readout_level, PyramidLevels());
+
+  // CHECK if passed mip_map at level[0] is compatible w.r.t. type and domain.
+  CHECK_EQ(mip_map_level_0.cols, results->cols);
+  CHECK_EQ(mip_map_level_0.rows, results->rows);
+  CHECK_EQ(mip_map_level_0.type(), results->type());
+
+  // CHECK if passed results matrix is compatible w.r.t. type and domain.
+  CHECK_EQ(downsample_pyramid_[readout_level].cols, results->cols);
+  CHECK_EQ(downsample_pyramid_[readout_level].rows, results->rows);
+  CHECK_EQ(downsample_pyramid_[readout_level].type(), results->type());
+
+  // Use caller-allocated results Mat.
+  mip_map[readout_level] = results;
+
+  // Place data_values into their final positions in mip map at level 0.
+  mip_map_level_0.copyTo(*mip_map[0]);
+
+  PerformPushPullImpl(readout_level, input_frame, &mip_map);
+}
+
+// Perform sparse data interpolation.
+// Generate filter weights and then perform pull-down sampling
+// followed by push-up sampling.
+// Assumes that the mip_map has already been allocated and
+// the data inserted at level 0.
+// Results are placed in (*mip_map_ptr)[readout_level].
+template <int C, class FilterWeightMultiplier>
+void PushPullFiltering<C, FilterWeightMultiplier>::PerformPushPullImpl(
+    const int readout_level, const cv::Mat* input_frame,
+    std::vector<cv::Mat*>* mip_map_ptr) {
+  const float* filter_weights;
+  int num_filter_elems;
+  switch (filter_type_) {
+    case BINOMIAL_3X3:
+      num_filter_elems = 9;
+      filter_weights = binomial3_weights_.data();
+      break;
+    case BINOMIAL_5X5:
+      num_filter_elems = 25;
+      filter_weights = binomial5_weights_.data();
+      break;
+    case GAUSSIAN_3X3:
+      num_filter_elems = 9;
+      filter_weights = gaussian3_weights_.data();
+      break;
+    case GAUSSIAN_5X5:
+      num_filter_elems = 25;
+      filter_weights = gaussian5_weights_.data();
+      break;
+    default:
+      LOG(FATAL) << "Unknown filter requested.";
+  }
+
+  const std::vector<cv::Mat*>& mip_map = *mip_map_ptr;
+
+  // Borderless views into mip maps.
+  std::vector<cv::Mat> mip_map_views(mip_map.size());
+  std::vector<const cv::Mat*> mip_map_view_ptrs(mip_map.size());
+  for (int l = 0; l < mip_map.size(); ++l) {
+    mip_map_views[l] =
+        cv::Mat(*mip_map[l], cv::Range(border_, mip_map[l]->rows - border_),
+                cv::Range(border_, mip_map[l]->cols - border_));
+
+    mip_map_view_ptrs[l] = &mip_map_views[l];
+  }
+
+  if (use_bilateral_) {
+    CHECK(input_frame != nullptr);
+    InitializeImagePyramid(*input_frame, &input_frame_pyramid_);
+  }
+
+  PullDownSampling(num_filter_elems, filter_weights, mip_map_ptr);
+
+  if (mip_map_visualizer_) {
+    std::vector<bool> is_premultiplied(mip_map_view_ptrs.size(), true);
+    mip_map_visualizer_->Visualize(mip_map_view_ptrs, true, is_premultiplied);
+  }
+
+  PushUpSampling(num_filter_elems, filter_weights, readout_level, mip_map_ptr);
+
+  if (mip_map_visualizer_) {
+    std::vector<bool> is_premultiplied(mip_map_view_ptrs.size(), true);
+    is_premultiplied[readout_level] = false;
+    mip_map_visualizer_->Visualize(mip_map_view_ptrs, false, is_premultiplied);
+  }
+}
+
+template <class T>
+inline const T* PtrOffset(const T* ptr, int offset) {
+  return reinterpret_cast<const T*>(reinterpret_cast<const uint8*>(ptr) +
+                                    offset);
+}
+
+inline void GetFilterOffsets(const cv::Mat& mat, int border, int channels,
+                             std::vector<int>* filter_offsets) {
+  for (int i = -border; i <= border; ++i) {
+    for (int j = -border; j <= border; ++j) {
+      filter_offsets->push_back(i * mat.step[0] + sizeof(float) * channels * j);
+    }
+  }
+}
+
+// Perform filter operation at locations in zero_pos.
+template <int C>
+inline void FillInZeros(const std::vector<float*>& zero_pos,
+                        int num_filter_elems, const float* filter_weights,
+                        int border, cv::Mat* mat) {
+  std::vector<int> filter_offsets;
+  GetFilterOffsets(*mat, border, C + 1, &filter_offsets);
+  for (float* zero_ptr : zero_pos) {
+    float weight_sum = 0;
+    float val_sum[C];
+    memset(val_sum, 0, C * sizeof(val_sum[0]));
+
+    for (int k = 0; k < num_filter_elems; ++k) {
+      const float* cur_ptr = PtrOffset(zero_ptr, filter_offsets[k]);
+      const float w = filter_weights[k] * cur_ptr[C];
+
+      for (int c = 0; c < C; ++c) {
+        val_sum[c] += cur_ptr[c] * w;
+      }
+
+      weight_sum += w;
+    }
+
+    if (weight_sum > 0) {
+      const float inv_weight_sum = 1.f / weight_sum;
+      for (int c = 0; c < C; ++c) {
+        val_sum[c] *= inv_weight_sum;
+        zero_ptr[c] = val_sum[c];
+      }
+    }
+  }
+}
+
+template <int C, class FilterWeightMultiplier>
+void PushPullFiltering<C, FilterWeightMultiplier>::PullDownSampling(
+    int num_filter_elems, const float* filter_weights,
+    std::vector<cv::Mat*>* mip_map_ptr) {
+  const std::vector<cv::Mat*>& mip_map = *mip_map_ptr;
+
+  // Filter pyramid via push-pull.
+  // We always filter from [border, border] to
+  // [width - 1 - border, height - 1 - border].
+
+  for (int l = 1; l < mip_map.size(); ++l) {
+    CopyNecessaryBorder<float, C + 1>(mip_map[l - 1]);
+    mip_map[l]->setTo(0);
+
+    // Local copy for faster access.
+    const int border = border_;
+    const int channels = C + 1;
+
+    // Signal level to weight_multiplier.
+    weight_multiplier_->SetLevel(l - 1, true);
+
+    std::vector<int> filter_offsets;
+    GetFilterOffsets(*mip_map[l - 1], border, channels, &filter_offsets);
+
+    const std::vector<int>* space_offsets =
+        use_bilateral_ ? &pyramid_space_offsets_[l - 1] : NULL;
+
+    const int height = mip_map[l]->rows - 2 * border;
+    const int width = mip_map[l]->cols - 2 * border;
+
+    // Downweight bilateral influence as level progress as due to iterative
+    // downsampling image becomes less and less reliable.
+    const float bilateral_scale =
+        std::pow(options_.pull_bilateral_scale(), l - 1);
+
+    // Filter odd pixels (downsample).
+    for (int i = 0; i < height; ++i) {
+      float* dst_ptr = mip_map[l]->ptr<float>(i + border) + border * channels;
+      const float* src_ptr =
+          mip_map[l - 1]->ptr<float>(2 * i + border) + border * channels;
+      const uint8* img_ptr =
+          use_bilateral_ ? (input_frame_pyramid_[l - 1].template ptr<uint8>(
+                                2 * i + border) +
+                            border * 3)
+                         : NULL;
+
+      for (int j = 0; j < width; ++j, dst_ptr += channels,
+               src_ptr += 2 * channels, img_ptr += 2 * 3) {
+        float weight_sum = 0;
+        float val_sum[C];
+        memset(val_sum, 0, C * sizeof(val_sum[0]));
+
+        const int i2 = i * 2;
+        const int j2 = j * 2;
+        if (use_bilateral_) {
+          for (int k = 0; k < num_filter_elems; ++k) {
+            const float* cur_ptr = PtrOffset(src_ptr, filter_offsets[k]);
+
+            // If neighbor is not important, skip further evaluation.
+            if (cur_ptr[C] < kBilateralEps * kBilateralEps) {
+              continue;
+            }
+
+            const uint8* match_ptr = PtrOffset(img_ptr, (*space_offsets)[k]);
+
+            float bilateral_w = bilateral_lut_[ColorDiffL1(img_ptr, match_ptr) *
+                                               bilateral_scale];
+
+            const float multiplier = weight_multiplier_->GetWeight(
+                src_ptr, cur_ptr, img_ptr, j2, i2);
+
+            const float w = filter_weights[k] * bilateral_w * multiplier;
+
+            // cur_ptr is already pre-multiplied with importance
+            // weight cur_ptr[C].
+            for (int c = 0; c < C; ++c) {
+              val_sum[c] += cur_ptr[c] * w;
+            }
+            weight_sum += w * cur_ptr[C];
+          }
+        } else {
+          for (int k = 0; k < num_filter_elems; ++k) {
+            const float* cur_ptr = PtrOffset(src_ptr, filter_offsets[k]);
+            const float multiplier =
+                weight_multiplier_->GetWeight(src_ptr, cur_ptr, NULL, j2, i2);
+            const float w = filter_weights[k] * multiplier;
+
+            // cur_ptr is already pre-multiplied with importance
+            // weight cur_ptr[C].
+            for (int c = 0; c < C; ++c) {
+              val_sum[c] += cur_ptr[c] * w;
+            }
+
+            weight_sum += w * cur_ptr[C];
+          }
+        }
+
+        DCHECK_GE(weight_sum, 0);
+
+        if (weight_sum >= kBilateralEps * kBilateralEps) {
+          const float inv_weight_sum = 1.f / weight_sum;
+          for (int c = 0; c < C; ++c) {
+            dst_ptr[c] = val_sum[c] * inv_weight_sum;
+          }
+        } else {
+          for (int c = 0; c <= C; ++c) {
+            dst_ptr[c] = 0;
+          }
+        }
+
+        const float prop_scale = options_.pull_propagation_scale();
+        weight_sum *= prop_scale;
+        dst_ptr[C] = std::min<float>(1.0f, weight_sum);
+      }
+    }
+
+    if (weight_adjuster_) {
+      CopyNecessaryBorder<float, C + 1>(mip_map[l]);
+      cv::Mat mip_map_view(*mip_map[l],
+                           cv::Range(border_, mip_map[l]->rows - border_),
+                           cv::Range(border_, mip_map[l]->cols - border_));
+      cv::Mat image_view;
+      if (use_bilateral_) {
+        image_view =
+            cv::Mat(input_frame_pyramid_[l],
+                    cv::Range(border_, input_frame_pyramid_[l].rows - border_),
+                    cv::Range(border_, input_frame_pyramid_[l].cols - border_));
+      }
+      weight_adjuster_->AdjustWeights(
+          l, true, use_bilateral_ ? &image_view : NULL, &mip_map_view);
+    }
+
+    // Pre-multiply weight for next level.
+    for (int i = 0; i < height; ++i) {
+      float* data_ptr = mip_map[l]->ptr<float>(i + border_) + border * channels;
+      for (int j = 0; j < width; ++j, data_ptr += channels) {
+        for (int c = 0; c < C; ++c) {
+          data_ptr[c] *= data_ptr[C];
+        }
+      }
+    }
+  }  // end level processing.
+}
+
+template <int C, class FilterWeightMultiplier>
+void PushPullFiltering<C, FilterWeightMultiplier>::PushUpSampling(
+    int num_filter_elems, const float* filter_weights, int readout_level,
+    std::vector<cv::Mat*>* mip_map_ptr) {
+  const std::vector<cv::Mat*>& mip_map = *mip_map_ptr;
+
+  for (int l = mip_map.size() - 2; l >= readout_level; --l) {
+    CopyNecessaryBorder<float, C + 1>(mip_map[l + 1]);
+
+    // Signal mip map level to weight_multiplier.
+    weight_multiplier_->SetLevel(l, false);
+
+    // Instead of upsampling we use 4 special tap filters. See comment at above
+    // function.
+    std::vector<float> tap_weights[4];
+    std::vector<int> tap_offsets[4];
+    std::vector<int> tap_space_offsets[4];
+    const int channels = C + 1;
+
+    switch (filter_type_) {
+      case BINOMIAL_3X3:
+      case GAUSSIAN_3X3:
+        GetUpsampleTaps3(filter_weights,
+                         use_bilateral_ ? &pyramid_space_offsets_[l] : NULL,
+                         channels * sizeof(float), mip_map[l + 1]->step[0],
+                         tap_weights, tap_offsets, tap_space_offsets);
+        break;
+      case BINOMIAL_5X5:
+      case GAUSSIAN_5X5:
+        GetUpsampleTaps5(filter_weights,
+                         use_bilateral_ ? &pyramid_space_offsets_[l] : NULL,
+                         channels * sizeof(float), mip_map[l + 1]->step[0],
+                         tap_weights, tap_offsets, tap_space_offsets);
+        break;
+      default:
+        LOG(FATAL) << "Filter unknown";
+    }
+
+    // Local copy for faster access.
+    const int border = border_;
+    const int height = mip_map[l]->rows - 2 * border;
+    const int width = mip_map[l]->cols - 2 * border;
+
+    const float bilateral_scale =
+        std::pow(options_.push_bilateral_scale(), l + 1);
+
+    // Apply filter.
+    // List of zero positions that need to be smoothed.
+    std::vector<float*> zero_pos;
+
+    for (int i = 0; i < height; ++i) {
+      float* dst_ptr = mip_map[l]->ptr<float>(i + border) + border * channels;
+      const float* src_ptr =
+          mip_map[l + 1]->ptr<float>(i / 2 + border) + border * channels;
+      const uint8* img_ptr =
+          use_bilateral_
+              ? (input_frame_pyramid_[l].template ptr<uint8>(i + border) +
+                 border * 3)
+              : NULL;
+
+      // Select tap offset.
+      const int tap_kind_row = 2 * (i % 2);  // odd row, case 2 & 3.
+
+      for (int j = 0; j < width;
+           // Increase src_ptr only for even rows (i.e. previous one was odd).
+           src_ptr += channels * (j % 2),
+               ++j, dst_ptr += channels, img_ptr += 3) {
+        if (dst_ptr[C] >= 1) {  // Skip if already saturated.
+          continue;
+        }
+
+        const int tap_kind = tap_kind_row + j % 2;
+        const std::vector<float>& tap_weight = tap_weights[tap_kind];
+        const std::vector<int>& tap_offset = tap_offsets[tap_kind];
+        const int tap_size = tap_weight.size();
+
+        float weight_sum = 0;
+        float val_sum[C];
+        memset(val_sum, 0, C * sizeof(val_sum[0]));
+
+        if (use_bilateral_) {
+          const std::vector<int>& tap_space_offset =
+              tap_space_offsets[tap_kind];
+          for (int k = 0; k < tap_size; ++k) {
+            const float* cur_ptr = PtrOffset(src_ptr, tap_offset[k]);
+
+            // If neighbor is not important, skip further evaluation.
+            if (cur_ptr[C] < kBilateralEps * kBilateralEps) {
+              continue;
+            }
+
+            const uint8* match_ptr = PtrOffset(img_ptr, tap_space_offset[k]);
+            float bilateral_w = bilateral_lut_[ColorDiffL1(img_ptr, match_ptr) *
+                                               bilateral_scale];
+
+            const float multiplier =
+                weight_multiplier_->GetWeight(src_ptr, cur_ptr, img_ptr, j, i);
+
+            const float w = tap_weight[k] * bilateral_w * multiplier;
+
+            // Values in above mip map level are pre-multiplied by
+            // importance weight cur_ptr[C].
+            for (int c = 0; c < C; ++c) {
+              val_sum[c] += cur_ptr[c] * w;
+            }
+            weight_sum += w * cur_ptr[C];
+          }
+        } else {
+          for (int k = 0; k < tap_size; ++k) {
+            const float* cur_ptr = PtrOffset(src_ptr, tap_offset[k]);
+            const float multiplier =
+                weight_multiplier_->GetWeight(src_ptr, cur_ptr, NULL, j, i);
+
+            const float w = tap_weight[k] * multiplier;
+
+            // Values in above mip map level are pre-multiplied by weight
+            // cur_ptr[C].
+            for (int c = 0; c < C; ++c) {
+              val_sum[c] += cur_ptr[c] * w;
+            }
+
+            weight_sum += w * cur_ptr[C];
+          }
+        }
+
+        if (weight_sum >= kBilateralEps * kBilateralEps) {
+          const float inv_weight_sum = 1.f / weight_sum;
+          for (int c = 0; c < C; ++c) {
+            val_sum[c] *= inv_weight_sum;
+          }
+        } else {
+          weight_sum = 0;
+          for (int c = 0; c < C; ++c) {
+            val_sum[c] = 0;
+          }
+
+          zero_pos.push_back(dst_ptr);
+        }
+
+        const float prop_scale = options_.push_propagation_scale();
+        weight_sum *= prop_scale;
+
+        // Maximum influence of pushed result on current pixel.
+        const float alpha_inv = std::min(1.0f - dst_ptr[C], weight_sum);
+        const float denom =
+            1.0f / (dst_ptr[C] + alpha_inv + kBilateralEps * kBilateralEps);
+
+        // Blend (dst_ptr is premultiplied with weight dst_ptr[C],
+        //        val_sum is normalized).
+        for (int c = 0; c < C; ++c) {
+          dst_ptr[c] = (dst_ptr[c] + val_sum[c] * alpha_inv) * denom;
+        }
+
+        // Increase current confidence by above sample.
+        dst_ptr[C] =
+            std::min(1.0f, dst_ptr[C] + std::min(weight_sum, alpha_inv));
+      }
+    }
+
+    if (weight_adjuster_) {
+      CopyNecessaryBorder<float, C + 1>(mip_map[l]);
+      cv::Mat mip_map_view(*mip_map[l],
+                           cv::Range(border_, mip_map[l]->rows - border_),
+                           cv::Range(border_, mip_map[l]->cols - border_));
+      cv::Mat image_view;
+      if (use_bilateral_) {
+        image_view =
+            cv::Mat(input_frame_pyramid_[l],
+                    cv::Range(border, input_frame_pyramid_[l].rows - border),
+                    cv::Range(border, input_frame_pyramid_[l].cols - border));
+      }
+      weight_adjuster_->AdjustWeights(
+          l, false, use_bilateral_ ? &image_view : NULL, &mip_map_view);
+    }
+
+    // Pre-multiply with weight for next level if haven't reached base level
+    // yet. (Base level is not pre-multiplied so result can be used directly).
+    if (l != readout_level) {
+      for (int i = 0; i < height; ++i) {
+        float* data_ptr =
+            mip_map[l]->ptr<float>(i + border_) + border * channels;
+        for (int j = 0; j < width; ++j, data_ptr += channels) {
+          for (int c = 0; c < C; ++c) {
+            data_ptr[c] *= data_ptr[C];
+          }
+        }
+      }
+    } else {
+      CopyNecessaryBorder<float, C + 1>(mip_map[l]);
+      FillInZeros<C>(zero_pos, num_filter_elems, filter_weights, border_,
+                     mip_map[l]);
+    }
+  }  // end mip map levels.
+}
+
+}  // namespace mediapipe
+
+#endif  // MEDIAPIPE_UTIL_TRACKING_PUSH_PULL_FILTERING_H_
diff --git a/mediapipe/util/tracking/push_pull_filtering.proto b/mediapipe/util/tracking/push_pull_filtering.proto
new file mode 100644
index 000000000..fa27e872f
--- /dev/null
+++ b/mediapipe/util/tracking/push_pull_filtering.proto
@@ -0,0 +1,39 @@
+// Copyright 2019 The MediaPipe Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+syntax = "proto2";
+
+package mediapipe;
+
+message PushPullOptions {
+  // Sigma for color difference.
+  optional float bilateral_sigma = 1 [default = 20.0];
+
+  // Determines how fast confident values can propagate. Filters are normalized,
+  // such that confidence dissipates quickly instead of propagating.
+  // To ensure confidence propagates the importance weight is scaled by the
+  // scalars specified below. Larger values yield quicker propagation.
+  optional float pull_propagation_scale = 3 [default = 8.0];
+  optional float push_propagation_scale = 4 [default = 8.0];
+
+  // Above bilateral sigma is scaled at each level by the specified scale
+  // (for push and pull phase). This is due to iterative downsampling of the
+  // guidance image introduces errors making bilateral weighting increasingly
+  // errorneous.
+  optional float pull_bilateral_scale = 5 [default = 0.7];
+  optional float push_bilateral_scale = 6 [default = 0.9];
+
+  // Deprecated fields.
+  extensions 2;
+}
diff --git a/mediapipe/util/tracking/region_flow.cc b/mediapipe/util/tracking/region_flow.cc
new file mode 100644
index 000000000..2d91cdb67
--- /dev/null
+++ b/mediapipe/util/tracking/region_flow.cc
@@ -0,0 +1,799 @@
+// Copyright 2019 The MediaPipe Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "mediapipe/util/tracking/region_flow.h"
+
+#include <stddef.h>
+
+#include <cmath>
+#include <memory>
+#include <numeric>
+
+#include "absl/strings/str_cat.h"
+#include "mediapipe/framework/port/integral_types.h"
+#include "mediapipe/util/tracking/measure_time.h"
+#include "mediapipe/util/tracking/parallel_invoker.h"
+
+namespace mediapipe {
+
+namespace {
+
+bool IsPointWithinBounds(const Vector2_f& pt, float bounds, int frame_width,
+                         int frame_height) {
+  // Ensure stability under float -> rounding operations.
+  if (pt.x() - 0.5f >= bounds && pt.x() + 0.5f <= frame_width - 1 - bounds &&
+      pt.y() - 0.5f >= bounds && pt.y() + 0.5f <= frame_height - 1 - bounds) {
+    return true;
+  } else {
+    return false;
+  }
+}
+
+}  // namespace.
+
+void GetRegionFlowFeatureList(const RegionFlowFrame& region_flow_frame,
+                              int distance_from_border,
+                              RegionFlowFeatureList* flow_feature_list) {
+  CHECK(flow_feature_list);
+  flow_feature_list->clear_feature();
+  const int frame_width = region_flow_frame.frame_width();
+  const int frame_height = region_flow_frame.frame_height();
+  flow_feature_list->set_frame_width(frame_width);
+  flow_feature_list->set_frame_height(frame_height);
+  flow_feature_list->set_unstable(region_flow_frame.unstable_frame());
+  flow_feature_list->set_distance_from_border(distance_from_border);
+  flow_feature_list->set_blur_score(region_flow_frame.blur_score());
+
+  for (const auto& region_flow : region_flow_frame.region_flow()) {
+    for (const auto& feature : region_flow.feature()) {
+      if (distance_from_border > 0) {
+        if (!IsPointWithinBounds(FeatureLocation(feature), distance_from_border,
+                                 frame_width, frame_height) ||
+            !IsPointWithinBounds(FeatureMatchLocation(feature),
+                                 distance_from_border, frame_width,
+                                 frame_height)) {
+          continue;
+        }
+      }
+
+      flow_feature_list->add_feature()->CopyFrom(feature);
+    }
+  }
+}
+
+float RegionFlowFeatureDistance(const PatchDescriptor& patch_desc_1,
+                                const PatchDescriptor& patch_desc_2) {
+  DCHECK_EQ(patch_desc_1.data_size(), patch_desc_2.data_size());
+  DCHECK_GE(patch_desc_1.data_size(), 3);
+
+  constexpr int kNumMeans = 3;
+  float sq_distance_sum = 0;
+  for (int n = 0; n < kNumMeans; ++n) {
+    const float distance = patch_desc_1.data(n) - patch_desc_2.data(n);
+    sq_distance_sum += distance * distance;
+  }
+
+  return std::sqrt(sq_distance_sum);
+}
+
+void ResetRegionFlowFeatureIRLSWeights(
+    float value, RegionFlowFeatureList* flow_feature_list) {
+  for (auto& feature : *flow_feature_list->mutable_feature()) {
+    feature.set_irls_weight(value);
+  }
+}
+
+double RegionFlowFeatureIRLSSum(const RegionFlowFeatureList& feature_list) {
+  double sum = 0.0;
+  for (const auto& feature : feature_list.feature()) {
+    sum += feature.irls_weight();
+  }
+  return sum;
+}
+
+void ClampRegionFlowFeatureIRLSWeights(float lower, float upper,
+                                       RegionFlowFeatureView* feature_view) {
+  for (auto& feature_ptr : *feature_view) {
+    if (feature_ptr->irls_weight() < lower) {
+      feature_ptr->set_irls_weight(lower);
+    } else if (feature_ptr->irls_weight() > upper) {
+      feature_ptr->set_irls_weight(upper);
+    }
+  }
+}
+
+void ComputeRegionFlowFeatureTexturedness(
+    const RegionFlowFeatureList& flow_feature_list, bool use_15percent_as_max,
+    std::vector<float>* texturedness) {
+  CHECK(texturedness != nullptr);
+  *texturedness = std::vector<float>(flow_feature_list.feature_size(), 1.0f);
+
+  int texture_idx = 0;
+  for (auto feature = flow_feature_list.feature().begin();
+       feature != flow_feature_list.feature().end(); ++feature, ++texture_idx) {
+    const float feature_stdev_l1 =
+        PatchDescriptorColorStdevL1(feature->feature_descriptor());
+
+    if (feature_stdev_l1 < 0.0f) {
+      LOG_IF(WARNING,
+             []() {
+               static int k = 0;
+               return k++ < 2;
+             }())
+          << "Feature descriptor does not contain variance information. Was "
+          << "ComputeRegionFlowFeatureDescriptors called?";
+      continue;
+    }
+
+    // feature_stdev_l1  is within [0, 3 * 128 = 384]
+    float alpha = feature_stdev_l1 / 384.f;
+
+    // In [0, 1], 0 = low texture, 1 = high texture. Scale such that around
+    // 15% of per channel maximum stdev is considered totally textured
+    // (1 / 0.15 * 3 ~ 18);
+    if (use_15percent_as_max) {
+      alpha = std::min(1.0f, alpha * 18.f);
+    }
+
+    (*texturedness)[texture_idx] = alpha;
+  }
+}
+
+void TextureFilteredRegionFlowFeatureIRLSWeights(
+    float low_texture_threshold, float low_texture_outlier_clamp,
+    RegionFlowFeatureList* flow_feature_list) {
+  std::vector<float> texturedness;
+  ComputeRegionFlowFeatureTexturedness(*flow_feature_list, true, &texturedness);
+
+  int texture_idx = 0;
+  for (auto feature = flow_feature_list->mutable_feature()->begin();
+       feature != flow_feature_list->mutable_feature()->end();
+       ++feature, ++texture_idx) {
+    if (feature->irls_weight() == 0.0f) {
+      continue;
+    }
+
+    if (texturedness[texture_idx] < low_texture_threshold &&
+        feature->irls_weight() < low_texture_outlier_clamp) {
+      feature->set_irls_weight(low_texture_outlier_clamp);
+    } else {
+      feature->set_irls_weight(feature->irls_weight() /
+                               (texturedness[texture_idx] + 1.e-6f));
+    }
+  }
+}
+
+void CornerFilteredRegionFlowFeatureIRLSWeights(
+    float low_corner_threshold, float low_corner_outlier_clamp,
+    RegionFlowFeatureList* flow_feature_list) {
+  for (auto feature = flow_feature_list->mutable_feature()->begin();
+       feature != flow_feature_list->mutable_feature()->end(); ++feature) {
+    if (feature->irls_weight() == 0.0f) {
+      continue;
+    }
+
+    const float corner_response = feature->corner_response();
+
+    if (corner_response < low_corner_threshold &&
+        feature->irls_weight() < low_corner_threshold) {
+      feature->set_irls_weight(low_corner_outlier_clamp);
+    } else {
+      feature->set_irls_weight(feature->irls_weight() /
+                               (corner_response + 1.e-6f));
+    }
+  }
+}
+
+void GetRegionFlowFeatureIRLSWeights(
+    const RegionFlowFeatureList& flow_feature_list,
+    std::vector<float>* irls_weights) {
+  CHECK(irls_weights != nullptr);
+  irls_weights->clear();
+  irls_weights->reserve(flow_feature_list.feature_size());
+  for (auto feature = flow_feature_list.feature().begin();
+       feature != flow_feature_list.feature().end(); ++feature) {
+    irls_weights->push_back(feature->irls_weight());
+  }
+}
+
+void SetRegionFlowFeatureIRLSWeights(const std::vector<float>& irls_weights,
+                                     RegionFlowFeatureList* flow_feature_list) {
+  CHECK(flow_feature_list != nullptr);
+  CHECK_EQ(irls_weights.size(), flow_feature_list->feature_size());
+  int idx = 0;
+  for (auto feature = flow_feature_list->mutable_feature()->begin();
+       feature != flow_feature_list->mutable_feature()->end();
+       ++feature, ++idx) {
+    feature->set_irls_weight(irls_weights[idx]);
+  }
+}
+
+int CountIgnoredRegionFlowFeatures(
+    const RegionFlowFeatureList& flow_feature_list, float threshold) {
+  int count = 0;
+  for (auto feature = flow_feature_list.feature().begin();
+       feature != flow_feature_list.feature().end(); ++feature) {
+    if (feature->irls_weight() <= threshold) {
+      ++count;
+    }
+  }
+  return count;
+}
+
+namespace {
+
+struct RegionFlowLocator {
+  bool operator()(const RegionFlow& lhs, const RegionFlow& rhs) const {
+    return lhs.region_id() < rhs.region_id();
+  }
+};
+
+}  // namespace.
+
+const RegionFlow* GetRegionFlowById(int region_id,
+                                    const RegionFlowFrame& flow_frame) {
+  RegionFlow region_flow;
+  region_flow.set_region_id(region_id);
+
+  const auto& region_pos = std::lower_bound(flow_frame.region_flow().begin(),
+                                            flow_frame.region_flow().end(),
+                                            region_flow, RegionFlowLocator());
+
+  if (region_pos == flow_frame.region_flow().end() ||
+      region_pos->region_id() != region_id) {
+    return NULL;
+  } else {
+    return &(*region_pos);
+  }
+}
+
+RegionFlow* GetMutableRegionFlowById(int region_id,
+                                     RegionFlowFrame* flow_frame) {
+  RegionFlow region_flow;
+  region_flow.set_region_id(region_id);
+
+  auto region_pos = std::lower_bound(flow_frame->mutable_region_flow()->begin(),
+                                     flow_frame->mutable_region_flow()->end(),
+                                     region_flow, RegionFlowLocator());
+
+  if (region_pos == flow_frame->mutable_region_flow()->end() ||
+      region_pos->region_id() != region_id) {
+    return NULL;
+  } else {
+    return &(*region_pos);
+  }
+}
+
+void SortRegionFlowById(RegionFlowFrame* flow_frame) {
+  std::sort(flow_frame->mutable_region_flow()->begin(),
+            flow_frame->mutable_region_flow()->end(), RegionFlowLocator());
+}
+
+void InvertRegionFlow(const RegionFlowFrame& region_flow_frame,
+                      RegionFlowFrame* inverted_flow_frame) {
+  CHECK(inverted_flow_frame);
+  inverted_flow_frame->CopyFrom(region_flow_frame);
+  for (auto& region_flow : *inverted_flow_frame->mutable_region_flow()) {
+    region_flow.set_centroid_x(region_flow.centroid_x() + region_flow.flow_x());
+    region_flow.set_centroid_y(region_flow.centroid_y() + region_flow.flow_y());
+    region_flow.set_flow_x(-region_flow.flow_x());
+    region_flow.set_flow_y(-region_flow.flow_y());
+
+    for (auto& feature : *region_flow.mutable_feature()) {
+      feature.set_x(feature.x() + feature.dx());
+      feature.set_y(feature.y() + feature.dy());
+      feature.set_dx(-feature.dx());
+      feature.set_dy(-feature.dy());
+    }
+  }
+}
+
+void InvertRegionFlowFeatureList(const RegionFlowFeatureList& feature_list,
+                                 RegionFlowFeatureList* inverted_feature_list) {
+  CHECK(inverted_feature_list);
+  *inverted_feature_list = feature_list;
+  for (auto& feature : *inverted_feature_list->mutable_feature()) {
+    InvertRegionFlowFeature(&feature);
+  }
+}
+
+void InvertRegionFlowFeature(RegionFlowFeature* feature) {
+  Vector2_f pt_match = FeatureMatchLocation(*feature);
+  feature->set_x(pt_match.x());
+  feature->set_y(pt_match.y());
+  Vector2_f flow = FeatureFlow(*feature);
+  feature->set_dx(-flow.x());
+  feature->set_dy(-flow.y());
+}
+
+void LimitFeaturesToBounds(int frame_width, int frame_height, float bounds,
+                           RegionFlowFeatureList* feature_list) {
+  RegionFlowFeatureList limited;
+  for (const auto& feature : feature_list->feature()) {
+    if (!IsPointWithinBounds(FeatureLocation(feature), bounds, frame_width,
+                             frame_height)) {
+      continue;
+    } else {
+      limited.add_feature()->CopyFrom(feature);
+    }
+  }
+  feature_list->Swap(&limited);
+}
+
+void NormalizeRegionFlowFeatureList(RegionFlowFeatureList* feature_list) {
+  const LinearSimilarityModel norm_model =
+      LinearSimilarityAdapter::NormalizationTransform(
+          feature_list->frame_width(), feature_list->frame_height());
+  TransformRegionFlowFeatureList(norm_model, feature_list);
+}
+
+void DeNormalizeRegionFlowFeatureList(RegionFlowFeatureList* feature_list) {
+  const LinearSimilarityModel norm_model =
+      LinearSimilarityAdapter::NormalizationTransform(
+          feature_list->frame_width(), feature_list->frame_height());
+  TransformRegionFlowFeatureList(ModelInvert(norm_model), feature_list);
+}
+
+void ScaleSalientPoint(float scale_x, float scale_y, SalientPoint* sp) {
+  sp->set_norm_point_x(sp->norm_point_x() * scale_x);
+  sp->set_norm_point_y(sp->norm_point_y() * scale_y);
+
+  const float cos_angle = std::cos(sp->angle());
+  const float sin_angle = std::sin(sp->angle());
+  Vector2_f major_axis = Vector2_f(cos_angle, sin_angle) * sp->norm_major();
+  Vector2_f minor_axis = Vector2_f(-sin_angle, cos_angle) * sp->norm_minor();
+
+  major_axis[0] *= scale_x;
+  major_axis[1] *= scale_y;
+  minor_axis[0] *= scale_x;
+  minor_axis[1] *= scale_y;
+
+  sp->set_norm_major(major_axis.Norm());
+  sp->set_norm_minor(minor_axis.Norm());
+  sp->set_angle(std::atan2(major_axis.y(), major_axis.x()));
+  if (sp->angle() < 0) {
+    sp->set_angle(sp->angle() + M_PI);
+  }
+}
+
+void ScaleSaliencyList(float scale, bool normalize_to_scale,
+                       SaliencyPointList* saliency_list) {
+  CHECK(saliency_list != nullptr);
+  for (auto& point_frame : *saliency_list) {
+    ScaleSalientPointFrame(scale, normalize_to_scale, &point_frame);
+  }
+}
+
+void ScaleSalientPointFrame(float scale, bool normalize_to_scale,
+                            SalientPointFrame* saliency) {
+  CHECK(saliency != nullptr);
+  float saliency_scale = scale;
+  if (normalize_to_scale) {
+    float weight_sum = 0.0f;
+    for (const auto& salient_point : saliency->point()) {
+      weight_sum += salient_point.weight();
+    }
+
+    if (weight_sum > 1e-6f) {
+      saliency_scale /= weight_sum;
+    }
+  }
+
+  for (auto& salient_point : *saliency->mutable_point()) {
+    salient_point.set_weight(salient_point.weight() * saliency_scale);
+  }
+}
+
+void ResetSaliencyBounds(float left, float bottom, float right, float top,
+                         SaliencyPointList* saliency_list) {
+  CHECK(saliency_list != nullptr);
+  for (auto& point_frame : *saliency_list) {
+    for (auto& salient_point : *point_frame.mutable_point()) {
+      salient_point.set_left(left);
+      salient_point.set_bottom(bottom);
+      salient_point.set_right(right);
+      salient_point.set_top(top);
+    }
+  }
+}
+
+bool EllipseFromCovariance(float a, float bc, float d,
+                           Vector2_f* axis_magnitude, float* angle) {
+  CHECK(axis_magnitude != nullptr);
+  CHECK(angle != nullptr);
+
+  // Get trace and determinant
+  const float trace = a + d;
+  const float det = a * d - bc * bc;
+
+  // If area is very small (small axis in at least one direction)
+  // axis are unreliable -> return false.
+  if (det < 4) {  // Measured in sq. pixels.
+    *axis_magnitude = Vector2_f(1, 1);
+    *angle = 0;
+    return false;
+  }
+
+  const float discriminant = std::max(0.f, trace * trace * 0.25f - det);
+  const float sqrt_discrm = std::sqrt(discriminant);
+
+  // Get eigenvalues.
+  float eig_1 = trace * 0.5f + sqrt_discrm;
+  float eig_2 = trace * 0.5f - sqrt_discrm;
+
+  // Compute eigenvectors.
+  Vector2_f vec_1, vec_2;
+
+  if (fabs(bc) < 1e-6) {
+    // Right-most case, we already have diagonal matrix.
+    vec_1.Set(1, 0);
+    vec_2.Set(0, 1);
+  } else {
+    vec_1.Set(eig_1 - d, bc);
+    vec_2.Set(eig_2 - d, bc);
+
+    // Normalize. Norm is always > 0, as bc is > 0 via above if.
+    vec_1 /= vec_1.Norm();
+    vec_2 /= vec_2.Norm();
+  }
+
+  // Select positive eigenvector.
+  if (eig_1 < 0) {
+    eig_1 *= -1.f;
+  }
+
+  if (eig_2 < 0) {
+    eig_2 *= -1.f;
+  }
+
+  // Major first.
+  if (eig_1 < eig_2) {
+    using std::swap;
+    swap(vec_1, vec_2);
+    swap(eig_1, eig_2);
+  }
+
+  *axis_magnitude = Vector2_f(std::sqrt(eig_1), std::sqrt(eig_2));
+  *angle = std::atan2(vec_1.y(), vec_1.x());
+
+  return eig_2 >= 1.5f;  // Measurement in pixels.
+}
+
+void BoundingBoxFromEllipse(const Vector2_f& center, float norm_major_axis,
+                            float norm_minor_axis, float angle,
+                            std::vector<Vector2_f>* bounding_box) {
+  CHECK(bounding_box != nullptr);
+  float dim_x;
+  float dim_y;
+  if (angle < M_PI * 0.25 || angle > M_PI * 0.75) {
+    dim_x = norm_major_axis;
+    dim_y = norm_minor_axis;
+  } else {
+    dim_y = norm_major_axis;
+    dim_x = norm_minor_axis;
+  }
+
+  // Construct bounding for for axes aligned ellipse.
+  *bounding_box = {
+      Vector2_f(-dim_x, -dim_y),
+      Vector2_f(-dim_x, dim_y),
+      Vector2_f(dim_x, dim_y),
+      Vector2_f(dim_x, -dim_y),
+  };
+
+  for (Vector2_f& corner : *bounding_box) {
+    corner += center;
+  }
+}
+
+void CopyToEmptyFeatureList(RegionFlowFeatureList* src,
+                            RegionFlowFeatureList* dst) {
+  CHECK(src != nullptr);
+  CHECK(dst != nullptr);
+
+  // Swap out features for empty list.
+  RegionFlowFeatureList empty_list;
+  empty_list.mutable_feature()->Swap(src->mutable_feature());
+
+  // Copy.
+  dst->CopyFrom(*src);
+
+  // Swap back.
+  src->mutable_feature()->Swap(empty_list.mutable_feature());
+
+  // src_features should be empty as in the beginning.
+  CHECK_EQ(0, empty_list.feature_size());
+}
+
+void IntersectRegionFlowFeatureList(
+    const RegionFlowFeatureList& to,
+    std::function<Vector2_f(const RegionFlowFeature&)> to_location_eval,
+    RegionFlowFeatureList* from, RegionFlowFeatureList* result,
+    std::vector<int>* source_indices) {
+  CHECK(from != nullptr);
+  CHECK(result != nullptr);
+  CHECK(from->long_tracks()) << "Intersection only works for long features";
+  CHECK(to.long_tracks()) << "Intersection only works for long features";
+
+  // Hash features in to, based on track_id.
+  std::unordered_map<int, const RegionFlowFeature*> track_map;
+  for (const auto& feature : to.feature()) {
+    track_map[feature.track_id()] = &feature;
+  }
+
+  // Initialize result.
+  CopyToEmptyFeatureList(from, result);
+  const int num_from_features = from->feature_size();
+  result->mutable_feature()->Reserve(num_from_features);
+
+  int feature_idx = 0;
+  for (const auto& feature : from->feature()) {
+    auto find_location = track_map.find(feature.track_id());
+    if (find_location != track_map.end()) {
+      const Vector2_f diff =
+          to_location_eval(*find_location->second) - FeatureLocation(feature);
+
+      RegionFlowFeature* new_feature = result->add_feature();
+      new_feature->CopyFrom(feature);
+      new_feature->set_dx(diff.x());
+      new_feature->set_dy(diff.y());
+      if (source_indices != nullptr) {
+        source_indices->push_back(feature_idx);
+      }
+    }
+
+    ++feature_idx;
+  }
+}
+
+void LongFeatureStream::AddFeatures(const RegionFlowFeatureList& feature_list,
+                                    bool check_connectivity,
+                                    bool purge_non_present_features) {
+  if (!feature_list.long_tracks()) {
+    LOG(ERROR) << "Feature stream should be used only used with long feature "
+               << "tracks. Ensure POLICY_LONG_FEATURE was used for "
+               << "RegionFlowComputation.";
+    return;
+  }
+
+  if (feature_list.match_frame() == 0) {
+    // Skip first frame.
+    return;
+  }
+
+  if (std::abs(feature_list.match_frame()) != 1) {
+    LOG(ERROR) << "Only matching frames one frame from current one are "
+               << "supported";
+    return;
+  }
+
+  // Record id of each track that is present in the current feature_list.
+  std::unordered_set<int> present_tracks;
+  for (auto feature : feature_list.feature()) {  // Copy feature.
+    if (feature.track_id() < 0) {
+      LOG_IF(WARNING, []() {
+        static int k = 0;
+        return k++ < 2;
+      }()) << "Feature does not have a valid track id assigned. Ignoring.";
+      continue;
+    }
+    present_tracks.insert(feature.track_id());
+    if (check_connectivity) {
+      // A new feature should never have been erased before.
+      CHECK(old_ids_.find(feature.track_id()) == old_ids_.end())
+          << "Feature : " << feature.track_id() << "was already removed.";
+    }
+
+    // Invert the features to be foward or backward according to forward_ flag.
+    if ((!forward_ && feature_list.match_frame() > 0) ||
+        (forward_ && feature_list.match_frame() < 0)) {
+      InvertRegionFlowFeature(&feature);
+    }
+
+    auto find_pos = tracks_.find(feature.track_id());
+    if (find_pos != tracks_.end()) {
+      // Track is present, add to it.
+      if (check_connectivity) {
+        CHECK_LT((FeatureLocation(find_pos->second.back()) -
+                  FeatureMatchLocation(feature))
+                     .Norm2(),
+                 1e-4);
+      }
+      find_pos->second.push_back(feature);
+    } else {
+      tracks_[feature.track_id()] = std::vector<RegionFlowFeature>(1, feature);
+    }
+  }
+
+  if (purge_non_present_features) {
+    std::vector<int> to_be_removed;
+    for (const auto& track : tracks_) {
+      if (present_tracks.find(track.first) == present_tracks.end()) {
+        to_be_removed.push_back(track.first);
+        if (check_connectivity) {
+          old_ids_.insert(track.first);
+        }
+      }
+    }
+    for (int id : to_be_removed) {
+      tracks_.erase(id);
+    }
+  }
+}
+
+void LongFeatureStream::FlattenTrack(
+    const std::vector<RegionFlowFeature>& features,
+    std::vector<Vector2_f>* result, std::vector<float>* irls_weight,
+    std::vector<Vector2_f>* flow) const {
+  CHECK(result != nullptr);
+  if (features.empty()) {
+    return;
+  }
+
+  if (irls_weight) {
+    irls_weight->clear();
+  }
+
+  if (flow) {
+    flow->clear();
+  }
+
+  if (!forward_) {
+    // Backward tracking, add first match.
+    result->push_back(FeatureMatchLocation(features[0]));
+  }
+
+  for (const auto& feature : features) {
+    result->push_back(FeatureLocation(feature));
+    if (flow) {
+      flow->push_back(FeatureFlow(feature));
+    }
+    if (irls_weight) {
+      irls_weight->push_back(feature.irls_weight());
+    }
+  }
+
+  if (forward_) {
+    // Forward tracking, add last match.
+    result->push_back(FeatureMatchLocation(features.back()));
+  }
+
+  // Replicate last irls weight.
+  if (irls_weight) {
+    irls_weight->push_back(irls_weight->back());
+  }
+}
+
+const std::vector<RegionFlowFeature>* LongFeatureStream::TrackById(
+    int id) const {
+  auto track_pos = tracks_.find(id);
+  if (track_pos == tracks_.end()) {
+    return nullptr;
+  } else {
+    return &track_pos->second;
+  }
+}
+
+std::vector<Vector2_f> LongFeatureStream::FlattenedTrackById(int id) const {
+  const auto* track = TrackById(id);
+  if (track != nullptr) {
+    std::vector<Vector2_f> points;
+    FlattenTrack(*track, &points, nullptr, nullptr);
+    return points;
+  } else {
+    return std::vector<Vector2_f>();
+  }
+}
+
+void LongFeatureInfo::AddFeatures(const RegionFlowFeatureList& feature_list) {
+  if (!feature_list.long_tracks()) {
+    LOG(ERROR) << "Passed feature list was not computed with long tracks. ";
+    return;
+  }
+
+  for (const auto& feature : feature_list.feature()) {
+    AddFeature(feature);
+  }
+
+  IncrementFrame();
+}
+
+void LongFeatureInfo::AddFeature(const RegionFlowFeature& feature) {
+  if (feature.irls_weight() == 0) {  // Skip outliers.
+    return;
+  }
+  const int track_id = feature.track_id();
+  if (track_id < 0) {  // Skip unassigned ids.
+    return;
+  }
+  auto insert_pos = track_info_.find(track_id);
+  if (insert_pos == track_info_.end()) {
+    track_info_[track_id].start = num_frames_;
+    track_info_[track_id].length = 1;
+  } else {
+    ++insert_pos->second.length;
+  }
+}
+
+void LongFeatureInfo::TrackLengths(const RegionFlowFeatureList& feature_list,
+                                   std::vector<int>* track_lengths) const {
+  CHECK(track_lengths);
+  const int feature_size = feature_list.feature_size();
+  track_lengths->resize(feature_size);
+  for (int k = 0; k < feature_size; ++k) {
+    (*track_lengths)[k] = TrackLength(feature_list.feature(k));
+  }
+}
+
+int LongFeatureInfo::TrackLength(const RegionFlowFeature& feature) const {
+  const auto insert_pos = track_info_.find(feature.track_id());
+  return insert_pos != track_info_.end() ? insert_pos->second.length : 0;
+}
+
+int LongFeatureInfo::TrackStart(const RegionFlowFeature& feature) const {
+  const auto insert_pos = track_info_.find(feature.track_id());
+  return insert_pos != track_info_.end() ? insert_pos->second.start : -1;
+}
+
+void LongFeatureInfo::Reset() {
+  track_info_.clear();
+  num_frames_ = 0;
+}
+
+int LongFeatureInfo::GlobalTrackLength(float percentile) const {
+  std::vector<int> track_lengths;
+  track_lengths.reserve(track_info_.size());
+
+  for (const auto& pair : track_info_) {
+    track_lengths.push_back(pair.second.length);
+  }
+
+  if (track_lengths.empty()) {
+    return 0;
+  }
+
+  auto percentile_item =
+      track_lengths.begin() + percentile * track_lengths.size();
+
+  std::nth_element(track_lengths.begin(), percentile_item, track_lengths.end());
+
+  return *percentile_item;
+}
+
+void GridTaps(int dim_x, int dim_y, int tap_radius,
+              std::vector<std::vector<int>>* taps) {
+  CHECK(taps);
+  const int grid_size = dim_x * dim_y;
+  const int diam = 2 * tap_radius + 1;
+  taps->resize(grid_size);
+
+  for (int i = 0; i < dim_y; ++i) {
+    for (int j = 0; j < dim_x; ++j) {
+      std::vector<int>& grid_bin = (*taps)[i * dim_x + j];
+      grid_bin.clear();
+      grid_bin.reserve(diam * diam);
+      for (int k = std::max(0, i - tap_radius),
+               end_k = std::min(dim_y - 1, i + tap_radius);
+           k <= end_k; ++k) {
+        for (int l = std::max(0, j - tap_radius),
+                 end_l = std::min(dim_x - 1, j + tap_radius);
+             l <= end_l; ++l) {
+          grid_bin.push_back(k * dim_x + l);
+        }
+      }
+    }
+  }
+}
+
+}  // namespace mediapipe
diff --git a/mediapipe/util/tracking/region_flow.h b/mediapipe/util/tracking/region_flow.h
new file mode 100644
index 000000000..2f9b34227
--- /dev/null
+++ b/mediapipe/util/tracking/region_flow.h
@@ -0,0 +1,636 @@
+// Copyright 2019 The MediaPipe Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Small helper function for RegionFlow.
+#ifndef MEDIAPIPE_UTIL_TRACKING_REGION_FLOW_H_
+#define MEDIAPIPE_UTIL_TRACKING_REGION_FLOW_H_
+
+#include <algorithm>
+#include <cmath>
+#include <deque>
+#include <unordered_map>
+#include <unordered_set>
+#include <utility>
+#include <vector>
+
+#include "mediapipe/framework/port/logging.h"
+#include "mediapipe/framework/port/vector.h"
+#include "mediapipe/util/tracking/motion_models.h"
+#include "mediapipe/util/tracking/region_flow.pb.h"
+
+namespace mediapipe {
+
+typedef RegionFlowFrame::RegionFlow RegionFlow;
+typedef std::vector<RegionFlowFeature*> RegionFlowFeatureView;
+
+inline RegionFlowFeature FeatureFromFloats(float x, float y, float dx,
+                                           float dy) {
+  RegionFlowFeature feat;
+  feat.set_x(x);
+  feat.set_y(y);
+  feat.set_dx(dx);
+  feat.set_dy(dy);
+  return feat;
+}
+
+inline RegionFlowFeature FeatureFromVec2f(const Vector2_f& loc,
+                                          const Vector2_f& flow) {
+  RegionFlowFeature feat;
+  feat.set_x(loc.x());
+  feat.set_y(loc.y());
+  feat.set_dx(flow.x());
+  feat.set_dy(flow.y());
+  return feat;
+}
+
+inline Vector2_f FeatureFlow(const RegionFlowFeature& feature) {
+  return Vector2_f(feature.dx(), feature.dy());
+}
+
+inline Vector2_f FeatureLocation(const RegionFlowFeature& feature) {
+  return Vector2_f(feature.x(), feature.y());
+}
+
+inline Vector2_f FeatureMatchLocation(const RegionFlowFeature& feature) {
+  return FeatureLocation(feature) + FeatureFlow(feature);
+}
+
+inline Vector2_i FeatureIntLocation(const RegionFlowFeature& feature) {
+  return Vector2_i::Cast(FeatureLocation(feature) + Vector2_f(0.5f, 0.5f));
+}
+
+inline Vector2_i FeatureMatchIntLocation(const RegionFlowFeature& feature) {
+  return Vector2_i::Cast(FeatureMatchLocation(feature) + Vector2_f(0.5, 0.5f));
+}
+
+// Returns L1 norm of color standard deviation of feature descriptor,
+// -1 if descriptor information is not present
+// (e.g. if ComputeRegionFlowFeatureDescriptors was not called previously).
+// Specifically returns stdev_red + stdev_blue + stdev_green.
+inline float PatchDescriptorColorStdevL1(const PatchDescriptor& descriptor) {
+  constexpr int kRedIdx = 3;
+  constexpr int kGreenIdx = 6;
+  constexpr int kBlueIdx = 8;
+  DCHECK_GE(descriptor.data(kRedIdx), 0);
+  DCHECK_GE(descriptor.data(kGreenIdx), 0);
+  DCHECK_GE(descriptor.data(kBlueIdx), 0);
+
+  if (descriptor.data_size() > kBlueIdx) {
+    return std::sqrt(descriptor.data(kRedIdx)) +
+           std::sqrt(descriptor.data(kGreenIdx)) +
+           std::sqrt(descriptor.data(kBlueIdx));
+  } else {
+    return -1.0f;
+  }
+}
+
+// Extracts features from region flow. Set distance_from_border > 0 to ensure
+// feature and matched location are at least the specified distance away
+// from the frame rectangle (test is not executed if distance_from_border <= 0),
+// so that feature descriptors can be computed (see function below).
+void GetRegionFlowFeatureList(const RegionFlowFrame& flow_frame,
+                              int distance_from_border,
+                              RegionFlowFeatureList* flow_feature_list);
+
+// Returns L2 norm of difference of mean color (first 3 dimensions of
+// feature descriptors).
+float RegionFlowFeatureDistance(const PatchDescriptor& patch_desc_1,
+                                const PatchDescriptor& patch_desc_2);
+
+// Resets IRLS weight of each RegionFlowFeature to value.
+void ResetRegionFlowFeatureIRLSWeights(
+    float value, RegionFlowFeatureList* flow_feature_list);
+
+// Returns sum of feature's irls weights.
+double RegionFlowFeatureIRLSSum(const RegionFlowFeatureList& feature_list);
+
+// Computes per region flow feature texturedness score. Score is within [0, 1],
+// where 0 means low texture and 1 high texture. Requires for each feature
+// descriptor to be computed (via ComputeRegionFlowFeatureDescriptors). If
+// missing, LOG(WARNING) is issued and value defaults to 1.
+// If use_15percent_as_max is set, score is scaled and threshold back to [0, 1]
+// such that 1 is assumed at 15% of maximum PER channel variance.
+void ComputeRegionFlowFeatureTexturedness(
+    const RegionFlowFeatureList& region_flow_feature_list,
+    bool use_15percent_as_max, std::vector<float>* texturedness);
+
+// IRLS weights are multiplied by inverse texturedness (expressed as variance
+// of feature descriptor), effectively upweighting outliers if in low textured
+// areas. Features with texturedness below low_texture_threshold can be
+// optionally clamped to low_texture_outlier_clamp (set to -1 for no clamping).
+void TextureFilteredRegionFlowFeatureIRLSWeights(
+    float low_texture_threshold, float low_texture_outlier_clamp,
+    RegionFlowFeatureList* flow_feature_list);
+
+// Same as above but normalizes w.r.t. corner response.
+void CornerFilteredRegionFlowFeatureIRLSWeights(
+    float low_corner_threshold, float low_corner_outlier_clamp,
+    RegionFlowFeatureList* flow_feature_list);
+
+// Simple setter and getter methods for irls weights.
+void GetRegionFlowFeatureIRLSWeights(
+    const RegionFlowFeatureList& flow_feature_list,
+    std::vector<float>* irls_weights);
+
+void SetRegionFlowFeatureIRLSWeights(const std::vector<float>& irls_weights,
+                                     RegionFlowFeatureList* flow_feature_list);
+
+// Counts number of region flow features with an irls weight of less than or
+// equal to threshold.
+int CountIgnoredRegionFlowFeatures(
+    const RegionFlowFeatureList& flow_feature_list, float threshold);
+
+// Locates region with id region_id in RegionFlowFrame via binary search.
+// Returns NULL if no region with specied region_id is present.
+const RegionFlow* GetRegionFlowById(int region_id,
+                                    const RegionFlowFrame& flow_frame);
+
+// Same as above for mutable RegionFlow.
+RegionFlow* GetMutableRegionFlowById(int region_id,
+                                     RegionFlowFrame* flow_frame);
+
+void SortRegionFlowById(RegionFlowFrame* flow_frame);
+
+// Switches each feature with its correspondence, i.e.
+// (x, y) <-> (x + dx, x + dy), (dx, dy) -> (-dx, -dy). Same holds for centroid
+// and mean flow. Note: Member fundamental_matrix is invalid after inversion,
+// FeaturePointLists are not affected from inversion.
+void InvertRegionFlow(const RegionFlowFrame& flow_frame,
+                      RegionFlowFrame* inverted_flow_frame);
+
+// Same as above for feature lists.
+void InvertRegionFlowFeatureList(const RegionFlowFeatureList& feature_list,
+                                 RegionFlowFeatureList* inverted_feature_list);
+
+// Inverts a single feature.
+void InvertRegionFlowFeature(RegionFlowFeature* feature);
+
+// Removes features that are out bounds of the domain:
+// [bounds, frame_width - bounds] x [bounds, frame_height - bounds].
+void LimitFeaturesToBounds(int frame_width, int frame_height, float bounds,
+                           RegionFlowFeatureList* feature_list);
+
+// List of saliency points for each frame.
+typedef std::deque<SalientPointFrame> SaliencyPointList;
+
+// Normalizes region flow by frame diameter, i.e. uniform downscale such that
+// feature list fits within [0, 1].
+void NormalizeRegionFlowFeatureList(RegionFlowFeatureList* feature_list);
+// Inverse of above operation.
+void DeNormalizeRegionFlowFeatureList(RegionFlowFeatureList* feature_list);
+
+// Templated implementations.
+// Applies model to each feature and displacement vector.
+template <class Model>
+void TransformRegionFlowFeatureList(const Model& model,
+                                    RegionFlowFeatureList* flow_feature_list) {
+  for (auto& feature : *flow_feature_list->mutable_feature()) {
+    Vector2_f pt =
+        ModelAdapter<Model>::TransformPoint(model, FeatureLocation(feature));
+    Vector2_f match = ModelAdapter<Model>::TransformPoint(
+        model, FeatureMatchLocation(feature));
+    feature.set_x(pt.x());
+    feature.set_y(pt.y());
+    feature.set_dx(match.x() - pt.x());
+    feature.set_dy(match.y() - pt.y());
+  }
+}
+
+// Similar to above but applies transformation to each feature to derive
+// matching location, according to the formula:
+// (dx, dy) <-- a * (transformed location - location) + b * (dx, dy)
+// e.g. with b = 0, and a = 1, (dx, dy) is replaced with distance
+// between transformed location and original location.
+// If set_match == true, the original feature location is replaced
+// with its matching location.
+template <class Model>
+void RegionFlowFeatureListViaTransform(
+    const Model& model, RegionFlowFeatureList* flow_feature_list, float a,
+    float b, bool set_match, const MixtureRowWeights* row_weights = nullptr) {
+  for (auto& feature : *flow_feature_list->mutable_feature()) {
+    Vector2_f match =
+        ModelAdapter<Model>::TransformPoint(model, FeatureLocation(feature));
+    feature.set_dx(b * feature.dx() + a * (match.x() - feature.x()));
+    feature.set_dy(b * feature.dy() + a * (match.y() - feature.y()));
+    if (set_match) {
+      feature.set_x(match.x());
+      feature.set_y(match.y());
+    }
+  }
+}
+
+template <>
+inline void RegionFlowFeatureListViaTransform(
+    const MixtureHomography& mix, RegionFlowFeatureList* flow_feature_list,
+    float a, float b, bool set_match, const MixtureRowWeights* row_weights) {
+  CHECK(row_weights) << "Row weights required for mixtures.";
+
+  for (auto& feature : *flow_feature_list->mutable_feature()) {
+    const float* weights = row_weights->RowWeights(feature.y());
+    Vector2_f match = MixtureHomographyAdapter::TransformPoint(
+        mix, weights, FeatureLocation(feature));
+    feature.set_dx(b * feature.dx() + a * (match.x() - feature.x()));
+    feature.set_dy(b * feature.dy() + a * (match.y() - feature.y()));
+    if (set_match) {
+      feature.set_x(match.x());
+      feature.set_y(match.y());
+    }
+  }
+}
+
+// Helper implementation function for functions below.
+// Returns pair of <filtered weight, predicate result> where predicate result is
+// the boolean result of applying the predicate to the feature.
+template <class Predicate>
+std::pair<float, bool> GetFilteredWeightImpl(const Predicate& predicate,
+                                             float reset_value,
+                                             const RegionFlowFeature& feature) {
+  if (feature.irls_weight() == 0.0f) {
+    return std::make_pair(0.0f, false);  // Zero is false by default.
+  } else if (!predicate(feature)) {
+    return std::make_pair(reset_value, false);
+  } else {
+    return std::make_pair(feature.irls_weight(), true);
+  }
+}
+
+// If predicate evaluates to false, corresponding irls weight is set to zero.
+// Returns number of features with non-zero irls weight.
+// Interface for predicate: bool operator()(const RegionFlowFeature&) const
+// Example: Predicate compares registration error of a feature under some linear
+// model and returns true if error is below some threshold. Consequently all
+// features having registration error above the threshold are set to weight zero
+// effectively ignoring those features during estimation.
+template <class Predicate>
+int FilterRegionFlowFeatureList(const Predicate& predicate, float reset_value,
+                                RegionFlowFeatureList* flow_feature_list) {
+  CHECK(flow_feature_list != nullptr);
+  int num_passing_features = 0;
+  for (auto& feature : *flow_feature_list->mutable_feature()) {
+    std::pair<float, bool> filter_result =
+        GetFilteredWeightImpl(predicate, reset_value, feature);
+    feature.set_irls_weight(filter_result.first);
+    if (filter_result.second) {
+      ++num_passing_features;
+    }
+  }
+
+  return num_passing_features;
+}
+
+// Same function as above, but instead of setting the corresponding weights,
+// returns resulting weights in a float vector.
+template <class Predicate>
+int FilterRegionFlowFeatureWeights(const Predicate& predicate,
+                                   float reset_value,
+                                   const RegionFlowFeatureList& feature_list,
+                                   std::vector<float>* result_weights) {
+  CHECK(result_weights != nullptr);
+  result_weights->clear();
+
+  int num_passing_features = 0;
+  for (auto feature : feature_list.feature()) {
+    std::pair<float, bool> filter_result =
+        GetFilteredWeightImpl(predicate, reset_value, feature);
+    result_weights->push_back(filter_result.first);
+    if (filter_result.second) {
+      ++num_passing_features;
+    }
+  }
+
+  return num_passing_features;
+}
+
+// Select features from the passed list for which the predicate is true.
+// Returned view contains pointers to mutable features.
+template <class Predicate>
+void SelectFeaturesFromList(const Predicate& predicate,
+                            RegionFlowFeatureList* feature_list,
+                            RegionFlowFeatureView* feature_view) {
+  CHECK(feature_list != nullptr);
+  CHECK(feature_view != nullptr);
+  for (auto& feature : *feature_list->mutable_feature()) {
+    if (predicate(feature)) {
+      feature_view->push_back(&feature);
+    }
+  }
+}
+
+inline void SelectAllFeaturesFromList(RegionFlowFeatureList* feature_list,
+                                      RegionFlowFeatureView* feature_view) {
+  CHECK(feature_list != nullptr);
+  CHECK(feature_view != nullptr);
+  for (auto& feature : *feature_list->mutable_feature()) {
+    feature_view->push_back(&feature);
+  }
+}
+
+// Sorts region flow feature views, w.r.t. predicate. Predicate must define:
+// bool operator()(const RegionFlowFeature* lhs,
+//                 const RegionFlowFeature* rhs) const;
+template <class Predicate>
+void SortRegionFlowFeatureView(const Predicate& predicate,
+                               RegionFlowFeatureView* feature_view) {
+  CHECK(feature_view != nullptr);
+  std::sort(feature_view->begin(), feature_view->end(), predicate);
+}
+
+// Clamps IRLS weight of each RegionFlowFeature to lie within [lower, upper].
+void ClampRegionFlowFeatureIRLSWeights(
+    float lower, float upper, RegionFlowFeatureView* flow_feature_list);
+
+// Makes a copy of src to dest without copying any features, i.e. dest will
+// have the same values as src for all field except the actual features.
+// Implemented by temporarily swapping features in and out, therefore source has
+// to be mutable but will not be modified.
+void CopyToEmptyFeatureList(RegionFlowFeatureList* src,
+                            RegionFlowFeatureList* dst);
+
+// Intersects passed RegionFlowFeatureLists based on track_id, returning new
+// RegionFlowFeatureList indicating new motion from features location in list
+// 'from' to feature's location in list 'to' (specified by to_location
+// function, e.g. pass FeatureLocation or FeatureMatchLocation here).
+// Requires RegionFlowFeatureList's computed with long_tracks.
+// Output result is initialized to contain all fields from input from (same
+// holds for intersected features, minus their match location).
+// For performance reasons, from is passed at mutable pointer to use above
+// CopyToEmptyFeatureList, but is not modified on output.
+// Optionally outputs source index for each feature in result into feature
+// array of from.
+void IntersectRegionFlowFeatureList(
+    const RegionFlowFeatureList& to,
+    std::function<Vector2_f(const RegionFlowFeature&)> to_location_eval,
+    RegionFlowFeatureList* from, RegionFlowFeatureList* result,
+    std::vector<int>* source_indices = nullptr);
+
+// Streaming representation for long feature tracks. Ingests
+// RegionFlowFeatureList for passed frames and maps them to their corresponding
+// track id.
+// Usage example:
+// LongFeatureStream stream;
+// for (int f = 0; f < frames; ++f) {
+//   RegionFlowFeatureList feature_list = ...   // from input.
+//   stream.AddFeatures(feature_list, true, true);
+//
+//   // Traverse tracks starting at the current frame f (going backwards in
+//   // time).
+//   for (const auto& track : stream) {
+//     track.first    // Holds id.
+//     track.second   // Holds vector<RegionFlowFeature>, most recent one are at
+//                    // the end.
+//     Convert track to a list of points.
+//     vector<Vector2_f> poly_line;
+//     stream.FlattenTrack(track.second, &poly_line, nullptr, nullptr);
+//
+//     // Returned track where poly_line[0] is the beginning (oldest) and
+//     // poly_line[poly_line.size() - 1] the end of the track (most recent
+//     // point).
+//
+//     for (const Vector2_f point : poly_line) {
+//       // ... do something ...
+//     }
+//   }
+class LongFeatureStream {
+ private:
+  // Buffers features according to their track id. Most recent region flow
+  // features are added last.
+  typedef std::unordered_map<int, std::vector<RegionFlowFeature>> TrackBuffer;
+
+ public:
+  // Default constructor for LongFeatureStream. The default long feature stream
+  // is backward.
+  LongFeatureStream() = default;
+
+  // Constructor for LongFeatureStream. The param forward indicates if the long
+  // feature stream is forward or backward.
+  explicit LongFeatureStream(bool forward) : forward_(forward) {}
+
+  // Adds new features for the current frame. Region flow must be computed
+  // w.r.t to previous or next frame (i.e. inter-frame distance = 1, CHECKED).
+  // If check_connectivity is specified, CHECKS if a feature's match location
+  // equals it last known location in the buffer.
+  // Optionally removes features from the buffer that are not present
+  // in the current list.
+  void AddFeatures(const RegionFlowFeatureList& feature_list,
+                   bool check_connectivity, bool purge_non_present_features);
+
+  // Traversal example:
+  // LongFeatureStream stream;
+  // for (auto track : stream) {
+  //   track.first    // Holds id.
+  //   track.second   // Holds vector<RegionFlowFeature>.
+  //                  // Note: These are always backward flow features
+  //                  //       even if you added forward ones. Ordered in time,
+  //                  //       oldest features come first.
+  //   vector<Vector2_f> poly_line;
+  //   stream.FlattenTrack(track.second, &poly_line, nullptr, nullptr);
+  // }
+  typename TrackBuffer::const_iterator begin() const { return tracks_.begin(); }
+  typename TrackBuffer::const_iterator end() const { return tracks_.end(); }
+
+  // Extract track as poly-line (vector of positions).
+  // Specifically, tracks[0] is the beginning (oldest) and
+  // tracks[tracks.size() - 1] the end of the track (most recent point).
+  // Optionally, returns irls weight for each point pair along the track,
+  // i.e weight at position N, specifies weight of track between points
+  // N and N + 1. For convenience, weight at last position is replicated, i.e.
+  // length of tracks and irls_weight is identical.
+  // Optionally, returns the flow vector associated with each point on the
+  // track with a direction as requested by the flow direction of the
+  // constructor. Note: For N points, N - 1 flow vectors are returned.
+  void FlattenTrack(const std::vector<RegionFlowFeature>& features,
+                    std::vector<Vector2_f>* tracks,
+                    std::vector<float>* irls_weight,      // optional.
+                    std::vector<Vector2_f>* flow) const;  // optional.
+
+  // Random access. Returns nullptr if not found.
+  const std::vector<RegionFlowFeature>* TrackById(int id) const;
+
+  // Convenience function calling TrackById and FlattenTrack. Returns empty
+  // vector if track id is not present.
+  std::vector<Vector2_f> FlattenedTrackById(int id) const;
+
+ private:
+  // Long Feature tracks indexed by id.
+  TrackBuffer tracks_;
+
+  // Stores old ids that have been removed. Used during check_connectivity.
+  std::unordered_set<int> old_ids_;
+
+  // A flag indicating if the long feature stream is forward or backward.
+  bool forward_ = false;
+};
+
+// Helper class for testing which features are present, computing overall track
+// length and other statistics.
+// Usage:
+//   LongFeatureInfo lfi;
+//   std::vector<RegionFlowFeatureList> feature_lists = FROM_OUTSIDE
+//   for (const auto& feature_list : feature_lists) {
+//     lfi.AddFeatures(feature_list);
+//     // Get track length for each feature, so far.
+//     std::vector<int> track_length;
+//     lfi.TrackLenghts(feature_list, &track_length);
+//  }
+class LongFeatureInfo {
+ public:
+  // Adds features to current info state.
+  void AddFeatures(const RegionFlowFeatureList& feature_list);
+
+  // Adds a single feature. If used instead of above function, requires
+  // IncrementFrame to be called manually.
+  void AddFeature(const RegionFlowFeature& feature);
+
+  // Returns track length for each passed feature.
+  // Note: If feature is not yet present, zero is returned as length.
+  void TrackLengths(const RegionFlowFeatureList& feature_list,
+                    std::vector<int>* track_lengths) const;
+
+  // Same as above for an individual feature.
+  int TrackLength(const RegionFlowFeature& feature) const;
+
+  // Returns starting frame for a feature.
+  int TrackStart(const RegionFlowFeature& feature) const;
+
+  int NumFrames() const { return num_frames_; }
+
+  void Reset();
+
+  // Returns track length at passed percentile across all tracks added so far.
+  int GlobalTrackLength(float percentile) const;
+
+  void IncrementFrame() { ++num_frames_; }
+
+ private:
+  struct TrackInfo {
+    int length = 0;
+    int start = 0;
+  };
+
+  // Maps track id to above info struct.
+  std::unordered_map<int, TrackInfo> track_info_;
+
+  int num_frames_ = 0;
+};
+
+// Scales a salient point in x and y by specified scales. For example, use to
+// map a salient point to a specific frame width and height.
+void ScaleSalientPoint(float scale_x, float scale_y,
+                       SalientPoint* salient_point);
+
+// Scales salient points in saliency by factor scale. If normalize_to_scale
+// is true, the weights of salient points per frame sum up to scale.
+void ScaleSalientPointFrame(float scale, bool normalize_to_scale,
+                            SalientPointFrame* saliency);
+
+// Convenience function for SaliencyPointList's invoking above function on each
+// frame.
+void ScaleSaliencyList(float scale, bool normalize_to_scale,
+                       SaliencyPointList* saliency_list);
+
+// Resets the normalized bounds of salient points in saliency list to
+// specified bounds.
+void ResetSaliencyBounds(float left, float bottom, float right, float top,
+                         SaliencyPointList* saliency_list);
+
+// Returns major and minor axis from covariance matrix (scaled by one sigma) in
+// each direction) and angle of major axis (in radians, counter clockwise).
+// Returns false if degenerate (ellipses with less than 2 pixels in diameter
+// in either direction).
+// Assuming 2x2 Covariance matrix of the form [a  bc
+//                                             bc  d]
+// for variances specified in pixels^2.
+bool EllipseFromCovariance(float a, float bc, float d,
+                           Vector2_f* axis_magnitude, float* angle);
+
+// Calculate the bounding box from an axis aligned ellipse defined by the major
+// and minor axis. Returns 4 corners of a minimal bounding box.
+void BoundingBoxFromEllipse(const Vector2_f& center, float norm_major_axis,
+                            float norm_minor_axis, float angle,
+                            std::vector<Vector2_f>* bounding_box);
+
+// Helper function used by BuildFeature grid to determine the sample taps
+// for a domain of size dim_x x dim_y with a specified tap_radius.
+void GridTaps(int dim_x, int dim_y, int tap_radius,
+              std::vector<std::vector<int>>* taps);
+
+// Generic function to bin features (of generic type Feature, specified for a
+// set of frames) over the domain frame_width x frame_height into equally
+// sized square bins of size grid_resolution x grid_resolution.
+// Function outputs for each bin the indicies of neighboring bins according
+// 3x3 or 5x5 neighborhood centered at that bin. Also output number of bins
+// created along each dimension as 2 dim vector (x, y).
+// FeatureEvaluator should implement:
+// Vector2_f operator()(const Feature& f) const {
+//   // Returns spatial location for feature f;
+// }
+template <class Feature>
+using FeatureFrame = std::vector<Feature*>;
+
+template <class Feature>
+using FeatureGrid = std::vector<FeatureFrame<Feature>>;
+
+template <class Feature, class FeatureEvaluator>
+void BuildFeatureGrid(
+    float frame_width, float frame_height, float grid_resolution,
+    const std::vector<FeatureFrame<Feature>>& feature_views,
+    const FeatureEvaluator& evaluator,
+    std::vector<std::vector<int>>* feature_taps_3,  // Optional.
+    std::vector<std::vector<int>>* feature_taps_5,  // Optional.
+    Vector2_i* num_grid_bins,                       // Optional.
+    std::vector<FeatureGrid<Feature>>* feature_grids) {
+  CHECK(feature_grids);
+  CHECK_GT(grid_resolution, 0.0f);
+
+  const int num_frames = feature_views.size();
+  const int grid_dim_x = std::ceil(frame_width / grid_resolution);
+  const int grid_dim_y = std::ceil(frame_height / grid_resolution);
+  const int grid_size = grid_dim_x * grid_dim_y;
+  const float grid_scale = 1.0f / grid_resolution;
+
+  // Pre-compute neighbor grids.
+  feature_grids->clear();
+  feature_grids->resize(num_frames);
+  for (int f = 0; f < num_frames; ++f) {
+    // Populate.
+    auto& curr_grid = (*feature_grids)[f];
+    curr_grid.resize(grid_size);
+    const FeatureFrame<Feature>& curr_view = feature_views[f];
+    for (int i = 0, size = curr_view.size(); i < size; ++i) {
+      Feature* feature = curr_view[i];
+      Vector2_f feature_loc = evaluator(*feature);
+      const int x = feature_loc.x() * grid_scale;
+      const int y = feature_loc.y() * grid_scale;
+      DCHECK_LT(y, grid_dim_y);
+      DCHECK_LT(x, grid_dim_x);
+      const int grid_loc = y * grid_dim_x + x;
+      curr_grid[grid_loc].push_back(feature);
+    }
+  }
+
+  if (feature_taps_3 != NULL) {
+    GridTaps(grid_dim_x, grid_dim_y, 1, feature_taps_3);
+  }
+  if (feature_taps_5 != NULL) {
+    GridTaps(grid_dim_x, grid_dim_y, 2, feature_taps_5);
+  }
+
+  if (num_grid_bins) {
+    *num_grid_bins = Vector2_i(grid_dim_x, grid_dim_y);
+  }
+}
+
+}  // namespace mediapipe
+
+#endif  // MEDIAPIPE_UTIL_TRACKING_REGION_FLOW_H_
diff --git a/mediapipe/util/tracking/region_flow.proto b/mediapipe/util/tracking/region_flow.proto
new file mode 100644
index 000000000..351396771
--- /dev/null
+++ b/mediapipe/util/tracking/region_flow.proto
@@ -0,0 +1,295 @@
+// Copyright 2019 The MediaPipe Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+syntax = "proto2";
+
+package mediapipe;
+
+option java_package = "com.google.mediapipe.tracking";
+option java_multiple_files = true;
+
+// Captures additional information about a RegionFlowFeature's
+// surrounding patch.
+// Using MotionEstimation::RetrieveRegionFlowFeatureList or
+// ComputeRegionFlowFeatureDescriptors the patch descriptor has the folling
+// layout:
+// (9 dimensional: 3 mean intensities, 3x3 covariance matrix, (only store upper
+// half (6 elems) in column major order, i.e. indices for data in patch
+// descriptor refer to:
+// mean: 0 1 2,  covariance: 3 4 5
+//                             6 7
+//                               8
+message PatchDescriptor {
+  repeated float data = 1;  // The actual feature descriptor.
+}
+
+// Binary feature descriptor for a particular feature.
+// For example: orb
+// http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.370.4395&rep=rep1&type=pdf
+message BinaryFeatureDescriptor {
+  optional bytes data = 1;
+}
+
+// Internal datastructure used during temporal IRLS smoothing.
+message TemporalIRLSSmoothing {
+  optional float weight_sum = 1 [default = 0];
+  optional float value_sum = 2 [default = 0];
+}
+
+// Tracked feature at location (x,y) with flow (dx, dy) and patch based
+// error (sum of absolute value of intensity difference).
+// Next tag: 19
+message RegionFlowFeature {
+  optional float x = 1 [default = 0];
+  optional float y = 2 [default = 0];
+  optional float dx = 3 [default = 0];
+  optional float dy = 4 [default = 0];
+
+  // Features that belong to the same feature track are assigned a unique id
+  // and are identified via it.
+  // Note, this id is only unique within the lifetime of a RegionFlowComputation
+  // object. That is, if distribution or parallelization using multiple
+  // instances was used, the ids are only unique within that instance context.
+  optional int32 track_id = 13 [default = -1];  // no id.
+
+  // Tracking error as patch intensity residual (SSD).
+  optional float tracking_error = 5 [default = 0];
+
+  // Inverse of registration error (in pixels), after parametric motion model
+  // fitting. Values are in [0, 1e6].
+  // Low values correspond to outliers, high values to inliers.
+  // Set by MotionEstimation::EstimateMotions*
+  optional float irls_weight = 6 [default = 1.0];
+
+  // Corner response (computed as minimum eigenvalue of
+  // block filtered 2nd moment matrix).
+  optional float corner_response = 11 [default = 0.0];
+
+  // Patch feature descriptors. *For internal use only*. External clients should
+  // not rely on their contents.
+  optional PatchDescriptor feature_descriptor = 7;
+  optional PatchDescriptor feature_match_descriptor = 8;
+
+  // Internal datastructure used temporally during temporal IRLS smoothing.
+  optional TemporalIRLSSmoothing internal_irls = 10;
+
+  // Optional label for debugging purposes.
+  optional string label = 14;
+
+  // Flags indicating specific statuses.
+  enum Flags {
+    FLAG_BROKEN_TRACK = 1;  // Used for long feature tracks if track id
+                            // was reset.
+  }
+
+  optional int32 flags = 15;
+
+  // Unique feature id per RegionFlowComputation object.
+  optional int32 feature_id = 16;
+
+  // octave (pyramid layer) from which the keypoint has been extracted
+  optional int32 octave = 17 [default = 0];
+
+  // Feature descriptor for the current feature.
+  optional BinaryFeatureDescriptor binary_feature_descriptor = 18;
+
+  // Deprecated fields.
+  extensions 9, 12;
+}
+
+// RegionFlowFrame is a optical flow representation where each region has a
+// consistent optical flow (adheres to local translational model).
+// Regions are arranged in a regular grid according to BlockDescriptor.
+// Next tag: 11.
+message RegionFlowFrame {
+  // Next tag: 8
+  message RegionFlow {
+    required int32 region_id = 1;
+
+    // Mean anchor point (centroid) of flow vector and mean flow.
+    optional float centroid_x = 2 [default = 0];
+    optional float centroid_y = 3 [default = 0];
+    optional float flow_x = 4 [default = 0];
+    optional float flow_y = 5 [default = 0];
+
+    repeated RegionFlowFeature feature = 7;
+
+    // Deprecated fields.
+    extensions 6;
+  }
+
+  // Sorted by id for quick lookup.
+  repeated RegionFlow region_flow = 1;
+
+  // Total number of features in all RegionFlow's.
+  optional int32 num_total_features = 2 [default = 0];
+
+  // If set, indicates that the frame's region flow is unstable.
+  // (not enough features or coverage too low).
+  optional bool unstable_frame = 4 [default = false];
+
+  // Blur score of the current frame is defined as the n-th percentile
+  // of the corneress of the input frame evaluated over regions of high
+  // corneress. For details see BlurScoreOptions in
+  // region_flow_computation.proto.
+  // The actual value is pretty meaningless, but relative to the blur score
+  // of other frames one can detect blurry frames, e.g. by a 'significant'
+  // local maxima in a sequence of blur_scores.
+  optional float blur_score = 7;
+
+  optional int32 frame_width = 8;
+  optional int32 frame_height = 9;
+
+  // Region flow is estimated using a grid of equal sized bins as regions.
+  // BlockDescriptor specifies size of bins/blocks.
+  message BlockDescriptor {
+    optional int32 block_width = 1;
+    optional int32 block_height = 2;
+    optional int32 num_blocks_x = 3 [default = 0];
+    optional int32 num_blocks_y = 4 [default = 0];
+  }
+  optional BlockDescriptor block_descriptor = 10;
+
+  // Deprecated fields.
+  extensions 3, 5, 6;
+}
+
+// Encapsulates a list of features with associated flow.
+// Can be extracted from RegionFlow via GetRegionFlowFeatureList
+// declared in region_flow.h. This is the essential (additional) information
+// required by Cropper using wobble_suppression with displacements.
+// Next tag: 13
+message RegionFlowFeatureList {
+  repeated RegionFlowFeature feature = 1;
+  optional int32 frame_width = 2;
+  optional int32 frame_height = 3;
+
+  // Set from corresponding RegionFlowFrame field.
+  optional bool unstable = 4 [default = false];
+
+  // Records the minimum distance from the image border for each feature and
+  // matching feature (if enforced > 0).
+  optional int32 distance_from_border = 5 [default = 0];
+
+  // Set from corresponding RegionFlowFrame field.
+  optional float blur_score = 6;
+
+  // If set, indicates, that features represent long tracks, i.e. each feature
+  // has a valid track_id() >= 0.
+  optional bool long_tracks = 7 [default = false];
+
+  // If long_tracks, stores number of long feature tracks that got rejected in
+  // this frame, as their patches were deemed inconsistent with the track's very
+  // first extracted patch.
+  optional float frac_long_features_rejected = 8 [default = 0];
+
+  // Measures visual consistency between adjacent frames. In particular, stores
+  // the absolute *change* in visual difference between two adjancent frame
+  // pairs, i.e. the modulus of the 2nd derivative of the frame appearance.
+  // Normalized w.r.t. number of channels and total pixels of the underlying
+  // frame.
+  // In particular for sudden changes (e.g. shot boundaries) this value will
+  // be significantly non-zero (> 0.05).
+  // Negative value per default indicates no consistency has been computed.
+  optional float visual_consistency = 9 [default = -1];
+
+  // Timestamp in micro seconds of the underlying frame, that is the frame
+  // for which the source features (not matching features) were computed.
+  optional int64 timestamp_usec = 10 [default = 0];
+
+  // Denotes the frame that flow was computed w.r.t. to, locally to the current
+  // frame. For example, if current frame is N, N + match_frame is the matching
+  // frame that flow was computed to.
+  // Values < 0 indicate backward tracking, while values > 0 indicate forward
+  // tracking. By default, for empty feature lists, matching frame is the
+  // same as current frame, i.e. match_frame = 0.
+  optional int32 match_frame = 11 [default = 0];
+
+  // Set, if frame is estimated to be an exact duplicate of the previous frame.
+  optional bool is_duplicated = 12 [default = false];
+}
+
+// Salient point location (normalized w.r.t. frame_width and frame_height, i.e.
+// specified in the domain [0, 1] x [0, 1]).
+
+// For TYPE_INCLUDE:
+// During retargeting and stabilization salient points introduce constraints
+// that will try to keep the normalized location in the rectangle
+// frame_size - normalized bounds.
+// For this soft constraints are used, therefore the weight specifies
+// how "important" the salient point is (higher is better).
+// In particular for each point p the retargeter introduces two pairs of
+// constraints of the form:
+//           x - slack < width - right
+//    and    x + slack > 0 + left,        with slack > 0
+//  where the weight specifies the importance of the slack.
+//
+// For TYPE_EXCLUDE_*:
+// Similar to above, but constraints are introduced to keep
+// the point to the left of the left bound OR the right of the right bound.
+// In particular:
+//          x - slack < left OR
+//          x + slack >= right
+//  Similar to above, the weight specifies the importance of the slack.
+//
+// Note: Choosing a too high weight can lead to
+// jerkiness as the stabilization essentially starts tracking the salient point.
+message SalientPoint {
+  // Normalized location of the point (within domain [0, 1] x [0, 1].
+  optional float norm_point_x = 1 [default = 0.0];
+  optional float norm_point_y = 2 [default = 0.0];
+
+  enum SalientPointType {
+    TYPE_INCLUDE = 1;
+    TYPE_EXCLUDE_LEFT = 2;
+    TYPE_EXCLUDE_RIGHT = 3;
+  }
+
+  // Salient point type. By default we try to frame the salient point within
+  // the bounding box specified by left, bottom, right, top. Alternatively, one
+  // can choose to exclude the point. For details, see discussion above.
+  optional SalientPointType type = 11 [default = TYPE_INCLUDE];
+
+  // Bounds are specified in normalized coordinates [0, 1], FROM the specified
+  // border. Opposing bounds (e.g. left and right) may not add to values
+  // larger than 1.
+  // Default bounds center salient point within centering third of the frame.
+  optional float left = 3 [default = 0.3];
+  optional float bottom = 4 [default = 0.3];
+  optional float right = 9 [default = 0.3];
+  optional float top = 10 [default = 0.3];
+
+  optional float weight = 5 [default = 15];
+
+  // In addition salient point can represent a region of interest (defined as
+  // ellipse of size norm_major x norm_minor (normalized to [0, 1] domain)
+  // which orientation is given by angle (in radians in [0, pi]).
+  // Due to aspect ratio change of the normalized domain, it is recommended that
+  // transformations to other domains are done via the ScaleSalientPoint
+  // function.
+  optional float norm_major = 6;
+  optional float norm_minor = 7;
+
+  // Angle of major axis with x-axis (counter-clock wise, in radians).
+  optional float angle = 8;
+
+  extensions 20000 to max;
+}
+
+// Aggregates SalientPoint's for a frame.
+message SalientPointFrame {
+  repeated SalientPoint point = 1;
+
+  extensions 20000 to max;
+}
diff --git a/mediapipe/util/tracking/region_flow_computation.cc b/mediapipe/util/tracking/region_flow_computation.cc
new file mode 100644
index 000000000..331246220
--- /dev/null
+++ b/mediapipe/util/tracking/region_flow_computation.cc
@@ -0,0 +1,3560 @@
+// Copyright 2019 The MediaPipe Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "mediapipe/util/tracking/region_flow_computation.h"
+
+#include <algorithm>
+#include <cmath>
+#include <cstdint>
+#include <cstring>
+#include <iterator>
+#include <memory>
+#include <numeric>
+#include <random>
+#include <unordered_map>
+#include <utility>
+
+#include "Eigen/Core"
+#include "absl/container/node_hash_set.h"
+#include "absl/memory/memory.h"
+#include "mediapipe/framework/port/logging.h"
+#include "mediapipe/framework/port/opencv_features2d_inc.h"
+#include "mediapipe/framework/port/opencv_imgproc_inc.h"
+#include "mediapipe/framework/port/opencv_video_inc.h"
+#include "mediapipe/framework/port/vector.h"
+#include "mediapipe/util/tracking/camera_motion.pb.h"
+#include "mediapipe/util/tracking/image_util.h"
+#include "mediapipe/util/tracking/measure_time.h"
+#include "mediapipe/util/tracking/motion_estimation.h"
+#include "mediapipe/util/tracking/motion_estimation.pb.h"
+#include "mediapipe/util/tracking/motion_models.h"
+#include "mediapipe/util/tracking/parallel_invoker.h"
+#include "mediapipe/util/tracking/region_flow.h"
+#include "mediapipe/util/tracking/tone_estimation.h"
+#include "mediapipe/util/tracking/tone_estimation.pb.h"
+#include "mediapipe/util/tracking/tone_models.h"
+#include "mediapipe/util/tracking/tone_models.pb.h"
+
+using std::max;
+using std::min;
+
+namespace mediapipe {
+
+typedef RegionFlowFrame::RegionFlow RegionFlow;
+typedef RegionFlowFeature Feature;
+constexpr float kZeroMotion = 0.25f;  // Quarter pixel average motion.
+
+// Helper struct used by RegionFlowComputation and MotionEstimation.
+// Feature position, flow and error. Unique id per track, set to -1 if no such
+// id can be assigned.
+struct TrackedFeature {
+  TrackedFeature(const Vector2_f& point_, const Vector2_f& flow_,
+                 float tracking_error_, float corner_response_, int octave_,
+                 int track_id_ = -1, float verify_dist_ = 0)
+      : point(point_),
+        flow(flow_),
+        tracking_error(tracking_error_),
+        corner_response(corner_response_),
+        octave(octave_),
+        irls_weight(1.0f),
+        num_bins(1),
+        track_id(track_id_),
+        verify_dist(verify_dist_) {}
+
+  Vector2_f point;
+  Vector2_f flow;
+  float tracking_error = 0;
+  float corner_response = 0;
+  int octave = 0;
+  float irls_weight = 1.0f;
+  int num_bins = 1;   // Total number of bins feature is binned into.
+  int track_id = -1;  // Unique id, assigned to each feature belonging to the
+                      // same track. Negative values indicates no id.
+  float verify_dist = 0;
+
+  // Flags as defined by RegionFlowFeature.
+  int flags = 0;
+
+  // Descriptors of this feature (single row).
+  cv::Mat descriptors;
+
+  // Original neighborhood of the feature. Refers to the patch that the feature
+  // was extracted the very first time.
+  // Using shared_ptr instead of unique_ptr to make TrackingData copyable.
+  // We don't use cv::Mat directly (which is reference counted) as
+  // neighborhoods are only used for long feature verification (which is
+  // optional).
+  std::shared_ptr<const cv::Mat> orig_neighborhood;
+
+  void Invert() {
+    point += flow;
+    flow = -flow;
+  }
+};
+
+// Inverts features (swaps location and matches). In-place operation OK, i.e.
+// inverted_list == &list acceptable and faster.
+void InvertFeatureList(const TrackedFeatureList& list,
+                       TrackedFeatureList* inverted_list) {
+  if (inverted_list != &list) {
+    *inverted_list = list;
+  }
+
+  for (auto& feature : *inverted_list) {
+    feature.Invert();
+  }
+}
+
+// Allocates pyramid images of sufficient size (suggested OpenCV settings,
+// independent of number of pyramid levels).
+void AllocatePyramid(int frame_width, int frame_height, cv::Mat* pyramid) {
+  const int pyramid_width = frame_width + 8;
+  const int pyramid_height = frame_height / 2 + 1;
+  pyramid->create(pyramid_height, pyramid_width, CV_8UC1);
+}
+
+namespace {
+
+// lab_window is used as scratch space only, to avoid allocations.
+void GetPatchDescriptorAtPoint(const cv::Mat& rgb_frame, const Vector2_i& pt,
+                               const int radius, cv::Mat* lab_window,
+                               PatchDescriptor* descriptor) {
+  CHECK(descriptor);
+  descriptor->clear_data();
+
+  // Reserve enough data for mean and upper triangular part of
+  // covariance matrix.
+  descriptor->mutable_data()->Reserve(3 + 6);
+
+  // Extract a window of the RGB frame for Lab color conversion. We know that at
+  // this point the window doesn't overlap with the frame boundary. The
+  // windowing operation just generates a reference and doesn't copy the values.
+  const int diameter = 2 * radius + 1;
+  const cv::Mat rgb_window =
+      rgb_frame(cv::Rect(pt.x() - radius, pt.y() - radius, diameter, diameter));
+
+  // Compute channel sums and means.
+  int sum[3] = {0, 0, 0};
+  for (int y = 0; y < diameter; ++y) {
+    const uint8* data = rgb_window.ptr<uint8>(y);
+    for (int x = 0; x < diameter; ++x, data += 3) {
+      for (int c = 0; c < 3; ++c) {
+        sum[c] += data[c];
+      }
+    }
+  }
+  const float scale = 1.f / (diameter * diameter);
+  for (int c = 0; c < 3; ++c) {
+    descriptor->add_data(sum[c] * scale);  // Mean value.
+  }
+
+  const float denom = 1.0f / (diameter * diameter);
+
+  // Compute the channel dot products, after centering around the respective
+  // channel means. Only computing upper triangular part.
+  int product[3][3];
+  for (int c = 0; c < 3; ++c) {
+    for (int d = c; d < 3; ++d) {
+      // We want to compute
+      //     sum_{x,y}[(data[c] - mean[c]) * (data[d] - mean[d])],
+      // which simplifies to
+      //     sum_{x,y}[data[c] * data[d]] - sum[c] * sum[d] / N
+      // using N = diameter * diameter and sum[c] = N * mean[c].
+      product[c][d] = -sum[c] * sum[d] * denom;
+      for (int y = 0; y < diameter; ++y) {
+        const uint8* data = rgb_window.ptr<uint8>(y);
+        for (int x = 0; x < diameter; ++x, data += 3) {
+          product[c][d] += static_cast<int>(data[c]) * data[d];
+        }
+      }
+    }
+  }
+
+  // Finally, add the descriptors only storring upper triangular part.
+  for (int c = 0; c < 3; ++c) {
+    for (int d = c; d < 3; ++d) {
+      descriptor->add_data(product[c][d] * scale);
+    }
+  }
+}
+
+class PatchDescriptorInvoker {
+ public:
+  PatchDescriptorInvoker(const cv::Mat& rgb_frame,
+                         const cv::Mat* prev_rgb_frame, int radius,
+                         RegionFlowFeatureList* features)
+      : rgb_frame_(rgb_frame),
+        prev_rgb_frame_(prev_rgb_frame),
+        radius_(radius),
+        features_(features) {}
+
+  void operator()(const BlockedRange& range) const {
+    cv::Mat lab_window;  // To avoid repeated allocations below.
+    for (int feature_idx = range.begin(); feature_idx != range.end();
+         ++feature_idx) {
+      RegionFlowFeature* feature = features_->mutable_feature(feature_idx);
+      Vector2_i pt(FeatureIntLocation(*feature));
+      DCHECK_GE(pt.x(), radius_);
+      DCHECK_GE(pt.y(), radius_);
+      DCHECK_LT(pt.x(), rgb_frame_.cols - radius_);
+      DCHECK_LT(pt.y(), rgb_frame_.rows - radius_);
+      GetPatchDescriptorAtPoint(rgb_frame_, pt, radius_, &lab_window,
+                                feature->mutable_feature_descriptor());
+
+      if (prev_rgb_frame_) {
+        Vector2_i pt_match(FeatureMatchIntLocation(*feature));
+        DCHECK_GE(pt_match.x(), radius_);
+        DCHECK_GE(pt_match.y(), radius_);
+        DCHECK_LT(pt_match.x(), rgb_frame_.cols - radius_);
+        DCHECK_LT(pt_match.y(), rgb_frame_.rows - radius_);
+        GetPatchDescriptorAtPoint(*prev_rgb_frame_, pt_match, radius_,
+                                  &lab_window,
+                                  feature->mutable_feature_match_descriptor());
+      }
+    }
+  }
+
+ private:
+  const cv::Mat& rgb_frame_;
+  const cv::Mat* prev_rgb_frame_;
+  int radius_;
+  RegionFlowFeatureList* features_;
+};
+
+}  // namespace.
+
+// Computes patch descriptor in color domain (LAB), see region_flow.proto for
+// specifics.
+// If optional parameter prev_rgb_frame is set, also computes corresponding
+// feature_match_descriptor.
+// IMPORTANT: Ensure that patch_descriptor_rad <= distance_from_border in
+// GetRegionFlowFeatureList. Checked by function.
+void ComputeRegionFlowFeatureDescriptors(
+    const cv::Mat& rgb_frame, const cv::Mat* prev_rgb_frame,
+    int patch_descriptor_radius, RegionFlowFeatureList* flow_feature_list) {
+  const int rows = rgb_frame.rows;
+  const int cols = rgb_frame.cols;
+  CHECK_EQ(rgb_frame.depth(), CV_8U);
+  CHECK_EQ(rgb_frame.channels(), 3);
+
+  if (prev_rgb_frame) {
+    CHECK_EQ(prev_rgb_frame->depth(), CV_8U);
+    CHECK_EQ(prev_rgb_frame->channels(), 3);
+    CHECK_EQ(prev_rgb_frame->rows, rows);
+    CHECK_EQ(prev_rgb_frame->cols, cols);
+  }
+
+  CHECK_LE(patch_descriptor_radius, flow_feature_list->distance_from_border());
+
+  ParallelFor(
+      0, flow_feature_list->feature_size(), 1,
+      PatchDescriptorInvoker(rgb_frame, prev_rgb_frame, patch_descriptor_radius,
+                             flow_feature_list));
+}
+
+// Stores 2D location's of feature points and their corresponding descriptors,
+// where the i'th row in descriptors corresponds to the i'th entry in
+// key_points.
+struct RegionFlowComputation::ORBFeatureDescriptors {
+  cv::Mat descriptors;
+  std::vector<cv::KeyPoint> key_points;
+  bool computed;
+
+  ORBFeatureDescriptors() { Reset(); }
+
+  void Reset() {
+    key_points.clear();
+    computed = false;
+  }
+};
+
+struct RegionFlowComputation::FrameTrackingData {
+  cv::Mat frame;
+
+  // Pyramid used for tracking. Just contains the a single image if old
+  // c-interface is used.
+  std::vector<cv::Mat> pyramid;
+  cv::Mat blur_data;
+  cv::Mat tiny_image;  // Used if visual consistency verification is performed.
+  cv::Mat mask;  // Features need to be extracted only where mask value > 0.
+  float mean_intensity = 0;  // Mean intensity of the frame.
+
+  // Pyramid used during feature extraction at multiple levels.
+  std::vector<cv::Mat> extraction_pyramid;
+
+  // Records number of pyramid levels stored by member pyramid. If zero, pyramid
+  // has not been computed yet.
+  int pyramid_levels = 0;
+
+  // Features extracted in this frame or tracked from a source frame.
+  std::vector<cv::Point2f> features;
+
+  // FrameTrackingData that resulting features were tracked from.
+  FrameTrackingData* source = nullptr;
+
+  // Indicates for each feature, corresponding source feature index (index into
+  // above features vector for source FrameTrackingData).
+  std::vector<int> feature_source_map;
+
+  // If set, indicates that member features was pre-initialized.
+  bool features_initialized = false;
+
+  // Time (in frames) when the last feature extraction was carried out for the
+  // features used in this frame. Time increases by 1 every time we reuse
+  // tracked features as the features for this frame.
+  int last_feature_extraction_time = -1;
+
+  // Number of extracted and tracked features in the original extraction frame
+  // (referred to by last_feature_extraction_time) and the current frame.
+  // That is stores the number inliers according to function
+  // GetFeatureTrackInliers.
+  int num_original_extracted_and_tracked = -1;  // Negative for not set.
+  int num_extracted_and_tracked = -1;           // Negative for not set.
+
+  // 1:1 mapping w.r.t. features.
+  std::vector<float> corner_responses;
+
+  // 1:1 mapping w.r.t. features. Records octave each feature belongs to.
+  std::vector<int> octaves;
+
+  // 1:1 mapping w.r.t. features. Records track id each feature belongs to,
+  // use -1 if no such track id exists. Only used for long feature tracks.
+  std::vector<int> track_ids;
+
+  // 1:1 mapping w.r.t. features. Stores the original patch neighborhood when
+  // the feature was extracted for the first time, that is for features with
+  // assigned track_id (>= 0) the data refers to a neighborhood in an earlier
+  // frame or else the neighborhood in the current frame.
+  // Stores a CV_8U grayscale square patch with length
+  // TrackingOptions::tracking_window_size().
+  std::shared_ptr<std::vector<cv::Mat>> neighborhoods;
+
+  // Absolute frame number of this FrameTrackingData.
+  int frame_num = 0;
+
+  // Timestamp of the underlying frame.
+  int64 timestamp_usec = 0;
+
+  // Difference of this FrameTrackingData's tiny_image w.r.t. previous one,
+  // i.e. one frame earlier.
+  float tiny_image_diff = 0.0f;
+
+  // Initial transform for matching features. Optional. Always stores the
+  // transform from current to previous frame.
+  std::shared_ptr<Homography> initial_transform;
+
+  ORBFeatureDescriptors orb;
+
+  bool use_cv_tracking = false;
+
+  FrameTrackingData(int width, int height, int extraction_levels,
+                    bool _use_cv_tracking)
+      : use_cv_tracking(_use_cv_tracking) {
+    // Extraction pyramid.
+    extraction_pyramid.clear();
+    for (int i = 0, iwidth = width, iheight = height; i < extraction_levels;
+         ++i) {
+      extraction_pyramid.push_back(cv::Mat(iheight, iwidth, CV_8UC1));
+      iwidth = (iwidth + 1) / 2;
+      iheight = (iheight + 1) / 2;
+    }
+    CHECK_GE(extraction_levels, 1);
+    // Frame is the same as first extraction level.
+    frame = extraction_pyramid[0];
+
+    if (!use_cv_tracking) {
+      // Tracking pyramid for old c-interface.
+      pyramid.resize(1);
+      AllocatePyramid(width, height, &pyramid[0]);
+    }
+  }
+
+  void BuildPyramid(int levels, int window_size, bool with_derivative) {
+    if (use_cv_tracking) {
+#if CV_MAJOR_VERSION == 3
+      // No-op if not called for opencv 3.0 (c interface computes
+      // pyramids in place).
+      // OpenCV changed how window size gets specified from our radius setting
+      // < 2.2 to diameter in 2.2+.
+      cv::buildOpticalFlowPyramid(
+          frame, pyramid, cv::Size(2 * window_size + 1, 2 * window_size + 1),
+          levels, with_derivative);
+      // Store max level for above pyramid.
+      pyramid_levels = levels;
+#endif
+    }
+  }
+
+  void Reset(int frame_num_, int64 timestamp_) {
+    frame_num = frame_num_;
+    timestamp_usec = timestamp_;
+    pyramid_levels = 0;
+    ResetFeatures();
+    neighborhoods.reset();
+    orb.Reset();
+  }
+
+  void ResetFeatures() {
+    features.clear();
+    corner_responses.clear();
+    octaves.clear();
+    track_ids.clear();
+    feature_source_map.clear();
+    if (neighborhoods != nullptr) {
+      neighborhoods->clear();
+    }
+    source = nullptr;
+    features_initialized = false;
+    last_feature_extraction_time = 0;
+    num_original_extracted_and_tracked = -1;
+    num_extracted_and_tracked = -1;
+  }
+
+  void PreAllocateFeatures(int num_features) {
+    features.reserve(num_features);
+    octaves.reserve(num_features);
+    corner_responses.reserve(num_features);
+    track_ids.reserve(num_features);
+  }
+
+  // Adds new feature and with required information.
+  void AddFeature(const cv::Point2f& location, float corner_response,
+                  int octave,
+                  int track_id,                   // optional, -1 for none.
+                  const cv::Mat* neighborhood) {  // optional.
+    features.push_back(location);
+    corner_responses.push_back(corner_response);
+    octaves.push_back(octave);
+    track_ids.push_back(track_id);
+    if (neighborhoods != nullptr) {
+      if (neighborhood) {
+        neighborhoods->push_back(*neighborhood);
+      } else {
+        neighborhoods->push_back(cv::Mat());
+      }
+    }
+  }
+
+  void RemoveFeature(int pos) {
+    DCHECK_LT(pos, features.size());
+    features.erase(features.begin() + pos);
+    feature_source_map.erase(feature_source_map.begin() + pos);
+    corner_responses.erase(corner_responses.begin() + pos);
+    octaves.erase(octaves.begin() + pos);
+    track_ids.erase(track_ids.begin() + pos);
+    if (neighborhoods) {
+      neighborhoods->erase(neighborhoods->begin() + pos);
+    }
+  }
+
+  // Stores grayscale square patch with length patch_size extracted at center in
+  // image frame and stores result in patch.
+  void ExtractPatch(const cv::Point2f& center, int patch_size, cv::Mat* patch) {
+    CHECK(patch != nullptr);
+    patch->create(patch_size, patch_size, CV_8UC1);
+    cv::getRectSubPix(frame, cv::Size(patch_size, patch_size), center, *patch);
+  }
+};
+
+// Data to be used across AddImage calls for long feature tracking.
+struct RegionFlowComputation::LongTrackData {
+  LongTrackData() = default;
+
+  // Returns next id and records its start frame.
+  int CreateNextTrackId(int start_frame, float motion_mag) {
+    track_info[next_track_id] = TrackInfo(start_frame, motion_mag);
+    const int result = next_track_id;
+
+    // Advance.
+    ++next_track_id;
+    if (next_track_id < 0) {
+      LOG(ERROR) << "Exhausted maximum possible ids. RegionFlowComputation "
+                 << "instance lifetime is likely to be too long. Consider "
+                 << "chunking the input.";
+      next_track_id = 0;
+    }
+
+    return result;
+  }
+
+  // Returns last id that was created or -1 if an id was never created.
+  int LastTrackId() const { return next_track_id - 1; }
+
+  // Returns -1 if id is not present.
+  int StartFrameForId(int id) const {
+    auto id_iter = track_info.find(id);
+    if (id_iter == track_info.end()) {
+      return -1;
+    } else {
+      return id_iter->second.start_frame;
+    }
+  }
+
+  // Clears buffered information for all features that are not present
+  // anymore, i.e. not within the specified hash_set.
+  void RemoveAbsentFeatureEntries(
+      const absl::node_hash_set<int>& present_features) {
+    auto entry = track_info.begin();
+    while (entry != track_info.end()) {
+      if (present_features.find(entry->first) == present_features.end()) {
+        // Not present anymore, remove!
+        auto to_erase = entry;
+        ++entry;
+        track_info.erase(to_erase);
+      } else {
+        ++entry;
+      }
+    }
+  }
+
+  float MotionMagForId(int id) const {
+    auto id_iter = track_info.find(id);
+    DCHECK(id_iter != track_info.end());
+    return id_iter->second.motion_mag;
+  }
+
+  void UpdateMotion(int id, float motion_mag) {
+    auto id_iter = track_info.find(id);
+    DCHECK(id_iter != track_info.end());
+    if (id_iter->second.motion_mag >= 0) {
+      id_iter->second.motion_mag =
+          id_iter->second.motion_mag * 0.5f + 0.5f * motion_mag;
+    }
+  }
+
+  // Next id to be assigned to a new track.
+  int next_track_id = 0;
+
+  // Holds the previous result to seed the next frame.
+  TrackedFeatureList prev_result;
+
+  // Records for each track id some additional information.
+  struct TrackInfo {
+    TrackInfo() {}
+    TrackInfo(int _start_frame, float _motion_mag)
+        : start_frame(_start_frame), motion_mag(_motion_mag) {}
+    int start_frame = 0;   // Start frame of track.
+    float motion_mag = 0;  // Smoothed average motion. -1 for unknown.
+  };
+
+  std::unordered_map<int, TrackInfo> track_info;
+};
+
+template <class T>
+std::unique_ptr<T> MakeUnique(T* ptr) {
+  return std::unique_ptr<T>(ptr);
+}
+
+RegionFlowComputation::RegionFlowComputation(
+    const RegionFlowComputationOptions& options, int frame_width,
+    int frame_height)
+    : options_(options),
+      frame_width_(frame_width),
+      frame_height_(frame_height) {
+  switch (options_.gain_correct_mode()) {
+    case RegionFlowComputationOptions::GAIN_CORRECT_DEFAULT_USER:
+      // Do nothing, simply use supplied bounds.
+      break;
+
+    case RegionFlowComputationOptions::GAIN_CORRECT_VIDEO: {
+      auto* gain_bias = options_.mutable_gain_bias_bounds();
+      gain_bias->Clear();
+      gain_bias->set_lower_gain(0.8f);
+      gain_bias->set_upper_gain(1.2f);
+      gain_bias->set_lower_bias(-0.2f);
+      gain_bias->set_upper_bias(0.2f);
+      gain_bias->set_min_inlier_weight(0.2f);
+      gain_bias->set_min_inlier_fraction(0.6f);
+      break;
+    }
+
+    case RegionFlowComputationOptions::GAIN_CORRECT_HDR: {
+      auto* gain_bias = options_.mutable_gain_bias_bounds();
+      gain_bias->Clear();
+      gain_bias->set_lower_gain(0.8f);
+      gain_bias->set_lower_gain(0.33f);
+      gain_bias->set_upper_gain(3.0f);
+      gain_bias->set_lower_bias(-0.5f);
+      gain_bias->set_upper_bias(0.5f);
+      gain_bias->set_min_inlier_weight(0.15f);
+      gain_bias->set_min_inlier_fraction(0.6f);
+      break;
+    }
+
+    case RegionFlowComputationOptions::GAIN_CORRECT_PHOTO_BURST: {
+      auto* gain_bias = options_.mutable_gain_bias_bounds();
+      gain_bias->Clear();
+      gain_bias->set_min_inlier_fraction(0.6f);
+      gain_bias->set_min_inlier_weight(0.1f);
+      gain_bias->set_lower_gain(0.4f);
+      gain_bias->set_upper_gain(2.5f);
+      gain_bias->set_lower_bias(-0.6f);
+      gain_bias->set_upper_bias(0.6f);
+      break;
+    }
+  }
+
+  CHECK_NE(options.tracking_options().output_flow_direction(),
+           TrackingOptions::CONSECUTIVELY)
+      << "Output direction must be either set to FORWARD or BACKWARD.";
+  use_downsampling_ = options_.downsample_mode() !=
+                      RegionFlowComputationOptions::DOWNSAMPLE_NONE;
+  downsample_scale_ = 1;
+  original_width_ = frame_width_;
+  original_height_ = frame_height_;
+
+  switch (options_.downsample_mode()) {
+    case RegionFlowComputationOptions::DOWNSAMPLE_NONE:
+      break;
+    case RegionFlowComputationOptions::DOWNSAMPLE_TO_MAX_SIZE: {
+      const float max_size = std::max(frame_width_, frame_height_);
+      if (max_size > 1.03f * options_.downsampling_size()) {
+        downsample_scale_ = max_size / options_.downsampling_size();
+        if (options_.round_downsample_factor()) {
+          downsample_scale_ = std::round(downsample_scale_);
+        }
+      }
+      break;
+    }
+    case RegionFlowComputationOptions::DOWNSAMPLE_TO_MIN_SIZE: {
+      const float min_size = std::min(frame_width_, frame_height_);
+      if (min_size > 1.03f * options_.downsampling_size()) {
+        downsample_scale_ = min_size / options_.downsampling_size();
+        if (options_.round_downsample_factor()) {
+          downsample_scale_ = std::round(downsample_scale_);
+        }
+      }
+      break;
+    }
+    case RegionFlowComputationOptions::DOWNSAMPLE_BY_FACTOR:
+    case RegionFlowComputationOptions::DOWNSAMPLE_TO_INPUT_SIZE: {
+      CHECK_GE(options_.downsample_factor(), 1);
+      downsample_scale_ = options_.downsample_factor();
+      break;
+    }
+    case RegionFlowComputationOptions::DOWNSAMPLE_BY_SCHEDULE: {
+      const int frame_area = frame_width_ * frame_height_;
+      if (frame_area <= (16 * 1.03 / 9 * 360 * 360)) {
+        downsample_scale_ =
+            options_.downsample_schedule().downsample_factor_360p();
+      } else if (frame_area <= (16 * 1.03 / 9 * 480 * 480)) {
+        downsample_scale_ =
+            options_.downsample_schedule().downsample_factor_480p();
+      } else if (frame_area <= (16 * 1.03 / 9 * 720 * 720)) {
+        downsample_scale_ =
+            options_.downsample_schedule().downsample_factor_720p();
+      } else {
+        downsample_scale_ =
+            options_.downsample_schedule().downsample_factor_1080p();
+      }
+      break;
+    }
+  }
+
+  frame_width_ = std::round(frame_width_ / downsample_scale_);
+  frame_height_ = std::round(frame_height_ / downsample_scale_);
+
+  if (use_downsampling_ &&
+      options_.downsample_mode() !=
+          RegionFlowComputationOptions::DOWNSAMPLE_TO_INPUT_SIZE) {
+    // Make downscaled size even.
+    frame_width_ += frame_width_ % 2;
+    frame_height_ += frame_height_ % 2;
+
+    LOG(INFO) << "Using a downsampling scale of " << downsample_scale_;
+  }
+
+  // Make sure value is equal to local variable, in case someone uses that on
+  // accident below.
+  frame_width = frame_width_;
+  frame_height = frame_height_;
+
+  // Allocate temporary frames.
+  switch (options_.image_format()) {
+    case RegionFlowComputationOptions::FORMAT_RGB:
+    case RegionFlowComputationOptions::FORMAT_BGR:
+      curr_color_image_.reset(
+          new cv::Mat(frame_height_, frame_width_, CV_8UC3));
+      break;
+
+    case RegionFlowComputationOptions::FORMAT_RGBA:
+    case RegionFlowComputationOptions::FORMAT_BGRA:
+      curr_color_image_.reset(
+          new cv::Mat(frame_height_, frame_width_, CV_8UC4));
+      break;
+
+    case RegionFlowComputationOptions::FORMAT_GRAYSCALE:
+      // Do nothing.
+      break;
+  }
+
+  if (options_.compute_blur_score()) {
+    corner_values_.reset(new cv::Mat(frame_height_, frame_width_, CV_32F));
+    corner_filtered_.reset(new cv::Mat(frame_height_, frame_width_, CV_32F));
+    corner_mask_.reset(new cv::Mat(frame_height_, frame_width_, CV_8U));
+  }
+
+  max_long_track_length_ = 1;
+  switch (options_.tracking_options().tracking_policy()) {
+    case TrackingOptions::POLICY_SINGLE_FRAME:
+      if (options_.tracking_options().multi_frames_to_track() > 1) {
+        LOG(ERROR) << "TrackingOptions::multi_frames_to_track is > 1, "
+                   << "but tracking_policy is set to POLICY_SINGLE_FRAME. "
+                   << "Consider using POLICY_MULTI_FRAME instead.";
+      }
+
+      frames_to_track_ = 1;
+      break;
+    case TrackingOptions::POLICY_MULTI_FRAME:
+      CHECK_GT(options_.tracking_options().multi_frames_to_track(), 0);
+      frames_to_track_ = options_.tracking_options().multi_frames_to_track();
+      break;
+    case TrackingOptions::POLICY_LONG_TRACKS:
+      if (options_.tracking_options().multi_frames_to_track() > 1) {
+        LOG(ERROR) << "TrackingOptions::multi_frames_to_track is > 1, "
+                   << "but tracking_policy is set to POLICY_LONG_TRACKS. "
+                   << "Use TrackingOptions::long_tracks_max_frames to set "
+                   << "length of long feature tracks.";
+      }
+
+      if (options_.tracking_options().internal_tracking_direction() !=
+          TrackingOptions::FORWARD) {
+        LOG(ERROR) << "Long tracks are only supported if tracking direction "
+                   << "is set to FORWARD. Adjusting direction to FORWARD. "
+                   << "This does not affect the expected "
+                   << "output_flow_direction";
+        options_.mutable_tracking_options()->set_internal_tracking_direction(
+            TrackingOptions::FORWARD);
+      }
+
+      frames_to_track_ = 1;
+      max_long_track_length_ =
+          options_.tracking_options().long_tracks_max_frames();
+      long_track_data_.reset(new LongTrackData());
+      break;
+  }
+
+  CHECK(!options_.gain_correction() || !IsVerifyLongFeatures())
+      << "Gain correction mode with verification of long features is not "
+      << "supported.";
+
+  // Tracking algorithm dependent on cv support and flag.
+  use_cv_tracking_ = options_.tracking_options().use_cv_tracking_algorithm();
+#if CV_MAJOR_VERSION != 3
+  if (use_cv_tracking_) {
+    LOG(WARNING) << "Compiled without OpenCV 3.0 but cv_tracking_algorithm "
+                 << "was requested. Falling back to older algorithm";
+    use_cv_tracking_ = false;
+  }
+#endif
+
+  if (options_.gain_correction()) {
+    gain_image_.reset(new cv::Mat(frame_height_, frame_width_, CV_8UC1));
+    if (!use_cv_tracking_) {
+      gain_pyramid_.reset(new cv::Mat());
+      AllocatePyramid(frame_width_, frame_height_, gain_pyramid_.get());
+    }
+  }
+
+  // Determine number of levels at which to extract features. If lowest image
+  // size for extraction is given, it overrides extraction levels.
+  extraction_levels_ = options_.tracking_options().adaptive_extraction_levels();
+  const int lowest_extraction_size =
+      options_.tracking_options().adaptive_extraction_levels_lowest_size();
+  if (lowest_extraction_size > 0) {
+    const float frame_size = max(frame_width_, frame_height_);
+    extraction_levels_ =
+        1 + std::ceil(std::log2(frame_size / lowest_extraction_size) - 0.01);
+  }
+  extraction_levels_ = max(1, extraction_levels_);
+  VLOG(1) << "Feature extraction will be done over " << extraction_levels_
+          << " levels, starting at size (width, height): (" << frame_width_
+          << ", " << frame_height_ << ")";
+
+  feature_tmp_image_1_.reset(new cv::Mat(frame_height_, frame_width_, CV_32F));
+  feature_tmp_image_2_.reset(new cv::Mat(frame_height_, frame_width_, CV_32F));
+
+  // Allocate feature point arrays.
+  max_features_ = options_.tracking_options().max_features();
+
+  // Compute number of pyramid levels.
+  float track_distance =
+      hypot(frame_width_, frame_height_) *
+      options_.tracking_options().fractional_tracking_distance();
+  pyramid_levels_ = PyramidLevelsFromTrackDistance(track_distance);
+  VLOG(1) << "Using pyramid levels: " << pyramid_levels_;
+
+  // Compute settings for block based flow.
+  const float block_size = options_.fast_estimation_block_size();
+  CHECK_GT(block_size, 0) << "Need positive block size";
+
+  block_width_ = block_size < 1 ? block_size * original_width_ : block_size;
+  block_height_ = block_size < 1 ? block_size * original_height_ : block_size;
+  // Ensure block_[width|height] is not zero.
+  block_width_ = max(1, block_width_);
+  block_height_ = max(1, block_height_);
+
+  // Compute block pyramid levels.
+  double min_block_dim = max(block_width_, block_height_);
+
+  // Choose last_level such that
+  // min_block_dim * 0.5^(last_level - 1) = min_block_size
+  double last_level =
+      (log(static_cast<double>(options_.fast_estimation_min_block_size())) -
+       log(min_block_dim)) /
+          log(0.5) +
+      1;
+  block_levels_ = max(2.0, floor(last_level));
+
+  Reset();
+}
+
+RegionFlowComputation::~RegionFlowComputation() {}
+
+bool RegionFlowComputation::AddImage(const cv::Mat& source,
+                                     int64 timestamp_usec) {
+  return AddImageAndTrack(source, cv::Mat(), timestamp_usec, Homography());
+}
+
+bool RegionFlowComputation::AddImageWithSeed(
+    const cv::Mat& source, int64 timestamp_usec,
+    const Homography& initial_transform) {
+  return AddImageAndTrack(source, cv::Mat(), timestamp_usec, initial_transform);
+}
+
+bool RegionFlowComputation::AddImageWithMask(const cv::Mat& source,
+                                             const cv::Mat& source_mask,
+                                             int64 timestamp_usec) {
+  return AddImageAndTrack(source, source_mask, timestamp_usec, Homography());
+}
+
+RegionFlowFeatureList* RegionFlowComputation::RetrieveRegionFlowFeatureList(
+    bool compute_feature_descriptor, bool compute_match_descriptor,
+    const cv::Mat* curr_color_image, const cv::Mat* prev_color_image) {
+  return (RetrieveRegionFlowFeatureListImpl(
+              0, compute_feature_descriptor, compute_match_descriptor,
+              curr_color_image ? curr_color_image : nullptr,
+              prev_color_image ? prev_color_image : nullptr)
+              .release());
+}
+
+RegionFlowFrame* RegionFlowComputation::RetrieveRegionFlow() {
+  return RetrieveMultiRegionFlow(0);
+}
+
+std::unique_ptr<RegionFlowFeatureList>
+RegionFlowComputation::RetrieveRegionFlowFeatureListImpl(
+    int track_index, bool compute_feature_descriptor,
+    bool compute_match_descriptor, const cv::Mat* curr_color_image,
+    const cv::Mat* prev_color_image) {
+  CHECK_GT(region_flow_results_.size(), track_index);
+  CHECK(region_flow_results_[track_index].get());
+
+  std::unique_ptr<RegionFlowFeatureList> feature_list(
+      std::move(region_flow_results_[track_index]));
+
+  if (compute_feature_descriptor) {
+    CHECK(curr_color_image != nullptr);
+    CHECK_EQ(3, curr_color_image->channels());
+    if (compute_match_descriptor) {
+      CHECK(prev_color_image != nullptr);
+      CHECK_EQ(3, prev_color_image->channels());
+    }
+
+    ComputeRegionFlowFeatureDescriptors(
+        *curr_color_image,
+        compute_match_descriptor ? prev_color_image : nullptr,
+        options_.patch_descriptor_radius(), feature_list.get());
+  } else {
+    CHECK(!compute_match_descriptor) << "Set compute_feature_descriptor also "
+                                     << "if setting compute_match_descriptor";
+  }
+
+  return feature_list;
+}
+
+RegionFlowFrame* RegionFlowComputation::RetrieveMultiRegionFlow(int frame) {
+  std::unique_ptr<RegionFlowFeatureList> feature_list(
+      RetrieveRegionFlowFeatureListImpl(frame,
+                                        false,  // No descriptors.
+                                        false,  // No match descriptors.
+                                        nullptr, nullptr));
+
+  std::unique_ptr<RegionFlowFrame> flow_frame(new RegionFlowFrame());
+
+  RegionFlowFeatureListToRegionFlow(*feature_list, flow_frame.get());
+  return flow_frame.release();
+}
+
+RegionFlowFeatureList*
+RegionFlowComputation::RetrieveMultiRegionFlowFeatureList(
+    int track_index, bool compute_feature_descriptor,
+    bool compute_match_descriptor, const cv::Mat* curr_color_image,
+    const cv::Mat* prev_color_image) {
+  return (RetrieveRegionFlowFeatureListImpl(
+              track_index, compute_feature_descriptor, compute_match_descriptor,
+              curr_color_image ? curr_color_image : nullptr,
+              prev_color_image ? prev_color_image : nullptr)
+              .release());
+}
+
+bool RegionFlowComputation::InitFrame(const cv::Mat& source,
+                                      const cv::Mat& source_mask,
+                                      FrameTrackingData* data) {
+  // Destination frame, CV_8U grayscale of dimension frame_width_ x
+  // frame_height_.
+  cv::Mat& dest_frame = data->frame;
+  cv::Mat& dest_mask = data->mask;
+
+  // Do we need to downsample image?
+  const cv::Mat* source_ptr = &source;
+  if (use_downsampling_ &&
+      options_.downsample_mode() !=
+          RegionFlowComputationOptions::DOWNSAMPLE_TO_INPUT_SIZE) {
+    // Area based method best for downsampling.
+    // For color images to temporary buffer.
+    cv::Mat& resized = source.channels() == 1 ? dest_frame : *curr_color_image_;
+    cv::resize(source, resized, resized.size(), 0, 0, CV_INTER_AREA);
+    source_ptr = &resized;
+    // Resize feature extraction mask if needed.
+    if (!source_mask.empty()) {
+      dest_mask.create(resized.rows, resized.cols, CV_8UC1);
+      cv::resize(source_mask, dest_mask, dest_mask.size(), 0, 0, CV_INTER_NN);
+    }
+  } else if (!source_mask.empty()) {
+    source_mask.copyTo(dest_mask);
+  }
+
+  // Stores as tiny frame before color conversion if requested.
+  const auto& visual_options = options_.visual_consistency_options();
+  if (visual_options.compute_consistency()) {
+    // Allocate tiny image.
+    const int type = source_ptr->type();
+    const int dimension = visual_options.tiny_image_dimension();
+    data->tiny_image.create(dimension, dimension, type);
+    cv::resize(*source_ptr, data->tiny_image, data->tiny_image.size(), 0, 0,
+               CV_INTER_AREA);
+  }
+
+  if (source_ptr->channels() == 1 &&
+      options_.image_format() !=
+          RegionFlowComputationOptions::FORMAT_GRAYSCALE) {
+    options_.set_image_format(RegionFlowComputationOptions::FORMAT_GRAYSCALE);
+    LOG(WARNING) << "#channels = 1, but image_format was not set to "
+                    "FORMAT_GRAYSCALE. Assuming GRAYSCALE input.";
+  }
+
+  // Convert image to grayscale.
+  switch (options_.image_format()) {
+    case RegionFlowComputationOptions::FORMAT_RGB:
+      if (3 != source_ptr->channels()) {
+        LOG(ERROR) << "Expecting 3 channel input for RGB.";
+        return false;
+      }
+      cv::cvtColor(*source_ptr, dest_frame, CV_RGB2GRAY);
+      break;
+
+    case RegionFlowComputationOptions::FORMAT_BGR:
+      if (3 != source_ptr->channels()) {
+        LOG(ERROR) << "Expecting 3 channel input for BGR.";
+        return false;
+      }
+      cv::cvtColor(*source_ptr, dest_frame, CV_BGR2GRAY);
+      break;
+
+    case RegionFlowComputationOptions::FORMAT_RGBA:
+      if (4 != source_ptr->channels()) {
+        LOG(ERROR) << "Expecting 4 channel input for RGBA.";
+        return false;
+      }
+      cv::cvtColor(*source_ptr, dest_frame, CV_RGBA2GRAY);
+      break;
+
+    case RegionFlowComputationOptions::FORMAT_BGRA:
+      if (4 != source_ptr->channels()) {
+        LOG(ERROR) << "Expecting 4 channel input for BGRA.";
+        return false;
+      }
+      cv::cvtColor(*source_ptr, dest_frame, CV_BGRA2GRAY);
+      break;
+
+    case RegionFlowComputationOptions::FORMAT_GRAYSCALE:
+      if (1 != source_ptr->channels()) {
+        LOG(ERROR) << "Expecting 1 channel input for GRAYSCALE.";
+        return false;
+      }
+      CHECK_EQ(1, source_ptr->channels());
+      if (source_ptr != &dest_frame) {
+        source_ptr->copyTo(dest_frame);
+      }
+      break;
+  }
+
+  // Do histogram equalization.
+  if (options_.histogram_equalization()) {
+    cv::equalizeHist(dest_frame, dest_frame);
+  }
+
+  // Compute mean for gain correction.
+  if (options_.gain_correction()) {
+    data->mean_intensity = cv::mean(dest_frame)[0];
+  }
+
+  // Consistency checks; not input governed.
+  CHECK_EQ(dest_frame.cols, frame_width_);
+  CHECK_EQ(dest_frame.rows, frame_height_);
+
+  data->BuildPyramid(pyramid_levels_,
+                     options_.tracking_options().tracking_window_size(),
+                     options_.compute_derivative_in_pyramid());
+
+  return true;
+}
+
+bool RegionFlowComputation::AddImageAndTrack(
+    const cv::Mat& source, const cv::Mat& source_mask, int64 timestamp_usec,
+    const Homography& initial_transform) {
+  VLOG(1) << "Processing frame " << frame_num_ << " at " << timestamp_usec;
+  MEASURE_TIME << "AddImageAndTrack";
+
+  if (options_.downsample_mode() ==
+      RegionFlowComputationOptions::DOWNSAMPLE_TO_INPUT_SIZE) {
+    if (frame_width_ != source.cols || frame_height_ != source.rows) {
+      LOG(ERROR) << "Source input dimensions incompatible with "
+                 << "DOWNSAMPLE_TO_INPUT_SIZE. frame_width_: " << frame_width_
+                 << ", source.cols: " << source.cols
+                 << ", frame_height_: " << frame_height_
+                 << ", source.rows: " << source.rows;
+      return false;
+    }
+
+    if (!source_mask.empty()) {
+      if (frame_width_ != source_mask.cols ||
+          frame_height_ != source_mask.rows) {
+        LOG(ERROR) << "Input mask dimensions incompatible with "
+                   << "DOWNSAMPLE_TO_INPUT_SIZE";
+        return false;
+      }
+    }
+  } else {
+    if (original_width_ != source.cols || original_height_ != source.rows) {
+      LOG(ERROR) << "Source input dimensions differ from those specified "
+                 << "in the constructor";
+      return false;
+    }
+    if (!source_mask.empty()) {
+      if (original_width_ != source_mask.cols ||
+          original_height_ != source_mask.rows) {
+        LOG(ERROR) << "Input mask dimensions incompatible with those "
+                   << "specified in the constructor";
+        return false;
+      }
+    }
+  }
+
+  // Create data queue element for current frame. If queue is full, reuse the
+  // data field from front in a circular buffer, otherwise add a new one.
+  if (data_queue_.size() > frames_to_track_) {
+    data_queue_.push_back(std::move(data_queue_.front()));
+    data_queue_.pop_front();
+  } else {
+    data_queue_.push_back(MakeUnique(new FrameTrackingData(
+        frame_width_, frame_height_, extraction_levels_, use_cv_tracking_)));
+  }
+
+  FrameTrackingData* curr_data = data_queue_.back().get();
+  curr_data->Reset(frame_num_, timestamp_usec);
+
+  if (!IsModelIdentity(initial_transform)) {
+    CHECK_EQ(1, frames_to_track_) << "Initial transform is not supported "
+                                  << "for multi frame tracking";
+    Homography transform = initial_transform;
+    if (downsample_scale_ != 1) {
+      const float scale = 1.0f / downsample_scale_;
+      transform = CoordinateTransform(initial_transform, scale);
+    }
+    curr_data->initial_transform.reset(new Homography(transform));
+  }
+
+  if (!InitFrame(source, source_mask, curr_data)) {
+    LOG(ERROR) << "Could not init frame.";
+    return false;
+  }
+
+  // Precompute blur score from original (not pre-blurred) frame.
+  cv::Mat& curr_frame = curr_data->frame;
+  curr_blur_score_ =
+      options_.compute_blur_score() ? ComputeBlurScore(curr_frame) : -1;
+
+  if (options_.pre_blur_sigma() > 0) {
+    cv::GaussianBlur(curr_frame, curr_frame, cv::Size(0, 0),
+                     options_.pre_blur_sigma(), options_.pre_blur_sigma());
+  }
+
+  // By default, create empty region flows for as many frames as we want to
+  // track.
+  region_flow_results_.clear();
+  for (int i = 0; i < frames_to_track_; ++i) {
+    region_flow_results_.push_back(MakeUnique(new RegionFlowFeatureList()));
+    InitializeRegionFlowFeatureList(region_flow_results_.back().get());
+  }
+
+  // Do we have enough frames to start tracking?
+  const bool synthetic_tracks =
+      options_.use_synthetic_zero_motion_tracks_all_frames() ||
+      (frame_num_ == 0 &&
+       options_.use_synthetic_zero_motion_tracks_first_frame());
+
+  int curr_frames_to_track = frames_to_track_;
+  // Update frames-to-track to match actual frames that we can track.
+  if (!synthetic_tracks) {
+    curr_frames_to_track = min(frame_num_, frames_to_track_);
+  }
+
+  // Compute region flows for all frames being tracked.
+  const auto internal_flow_direction =
+      options_.tracking_options().internal_tracking_direction();
+  const bool invert_flow = internal_flow_direction !=
+                           options_.tracking_options().output_flow_direction();
+
+  switch (internal_flow_direction) {
+    case TrackingOptions::FORWARD:
+      if (long_track_data_ != nullptr && curr_frames_to_track > 0) {
+        // Long feature tracking.
+        TrackedFeatureList curr_result;
+        ComputeRegionFlow(-1, 0, synthetic_tracks, invert_flow,
+                          &long_track_data_->prev_result, &curr_result,
+                          region_flow_results_[0].get());
+        long_track_data_->prev_result.swap(curr_result);
+      } else {
+        // Track from the closest frame last, so that the last set of features
+        // updated in FrameTrackingData are from the closest one.
+        for (int i = curr_frames_to_track; i >= 1; --i) {
+          ComputeRegionFlow(-i, 0, synthetic_tracks, invert_flow, nullptr,
+                            nullptr, region_flow_results_[i - 1].get());
+        }
+      }
+      break;
+    case TrackingOptions::BACKWARD:
+      for (int i = 1; i <= curr_frames_to_track; ++i) {
+        if (!synthetic_tracks && i > 1) {
+          InitializeFeatureLocationsFromPreviousResult(-i + 1, -i);
+        }
+        ComputeRegionFlow(0, -i, synthetic_tracks, invert_flow, nullptr,
+                          nullptr, region_flow_results_[i - 1].get());
+      }
+      break;
+    case TrackingOptions::CONSECUTIVELY:
+      const bool invert_flow_forward =
+          TrackingOptions::FORWARD !=
+          options_.tracking_options().output_flow_direction();
+      const bool invert_flow_backward = !invert_flow_forward;
+      for (int i = curr_frames_to_track; i >= 1; --i) {
+        // Compute forward flow.
+        ComputeRegionFlow(-i, 0, synthetic_tracks, invert_flow_forward, nullptr,
+                          nullptr, region_flow_results_[i - 1].get());
+        if (region_flow_results_[i - 1]->unstable()) {
+          // If forward flow is unstable, compute backward flow.
+          ComputeRegionFlow(0, -i, synthetic_tracks, invert_flow_backward,
+                            nullptr, nullptr,
+                            region_flow_results_[i - 1].get());
+        }
+      }
+      break;
+  }
+
+  if (frames_to_track_ == 1) {
+    const int num_features = region_flow_results_.front()->feature_size();
+    if (frame_num_ == 0) {
+      curr_num_features_avg_ = num_features;
+    } else {
+      // Low pass filter number of current features.
+      constexpr float kAlpha = 0.3f;
+      curr_num_features_avg_ =
+          (1.0f - kAlpha) * curr_num_features_avg_ + kAlpha * num_features;
+    }
+  }
+
+  ++frame_num_;
+
+  return true;
+}
+
+cv::Mat RegionFlowComputation::GetGrayscaleFrameFromResults() {
+  CHECK_GT(data_queue_.size(), 0) << "Empty queue, was AddImage* called?";
+  FrameTrackingData* curr_data = data_queue_.back().get();
+  CHECK(curr_data);
+  return curr_data->frame;
+}
+
+void RegionFlowComputation::GetFeatureTrackInliers(
+    bool skip_estimation, TrackedFeatureList* features,
+    TrackedFeatureView* inliers) const {
+  CHECK(features != nullptr);
+  CHECK(inliers != nullptr);
+  inliers->clear();
+  if (skip_estimation) {
+    inliers->reserve(features->size());
+    for (auto& feature : *features) {
+      inliers->push_back(&feature);
+    }
+  } else {
+    ComputeBlockBasedFlow(features, inliers);
+  }
+}
+
+float RegionFlowComputation::ComputeVisualConsistency(
+    FrameTrackingData* previous, FrameTrackingData* current) const {
+  CHECK_EQ(previous->frame_num + 1, current->frame_num);
+  const int total = previous->tiny_image.total();
+  CHECK_GT(total, 0) << "Tiny image dimension set to zero.";
+  current->tiny_image_diff =
+      FrameDifferenceMedian(previous->tiny_image, current->tiny_image) *
+      (1.0f / total);
+
+  return fabs(previous->tiny_image_diff - current->tiny_image_diff);
+}
+
+void RegionFlowComputation::ComputeRegionFlow(
+    int from, int to, bool synthetic_tracks, bool invert_flow,
+    const TrackedFeatureList* prev_result, TrackedFeatureList* curr_result,
+    RegionFlowFeatureList* feature_list) {
+  MEASURE_TIME << "Compute RegionFlow.";
+  // feature_tracks should be in the outer scope since the inliers form a view
+  // on them (store pointers to features stored in feature_tracks).
+  TrackedFeatureList feature_tracks;
+  TrackedFeatureView feature_inliers;
+
+  FrameTrackingData* data1 = nullptr;
+  FrameTrackingData* data2 = nullptr;
+  float frac_long_features_rejected = 0;
+  float visual_consistency = 0;
+
+  if (synthetic_tracks) {
+    const float step =
+        options_.tracking_options().synthetic_zero_motion_grid_step();
+    ZeroMotionGridTracks(original_width_, original_height_, step, step,
+                         &feature_tracks);
+    GetFeatureTrackInliers(true /* skip_estimation */, &feature_tracks,
+                           &feature_inliers);
+  } else {
+    const int index1 = data_queue_.size() + from - 1;
+    const int index2 = data_queue_.size() + to - 1;
+    CHECK_GE(index1, 0);
+    CHECK_LT(index1, data_queue_.size());
+    CHECK_GE(index2, 0);
+    CHECK_LT(index2, data_queue_.size());
+    data1 = data_queue_[index1].get();
+    data2 = data_queue_[index2].get();
+
+    std::unique_ptr<Homography> initial_transform;
+    if (index1 + 1 == index2) {
+      // Forward track, check if initial transform present.
+      if (data2->initial_transform != nullptr) {
+        initial_transform = absl::make_unique<Homography>(
+            ModelInvert(*data2->initial_transform));
+      }
+    } else if (index1 - 1 == index2) {
+      // Backward track, check if initial transform present.
+      if (data1->initial_transform != nullptr) {
+        initial_transform.reset(new Homography(*data1->initial_transform));
+      }
+    }
+
+    if (std::abs(from - to) == 1 &&
+        options_.visual_consistency_options().compute_consistency()) {
+      FrameTrackingData* later_data = data1;
+      FrameTrackingData* earlier_data = data2;
+      if (from < to) {
+        std::swap(later_data, earlier_data);
+      }
+
+      visual_consistency = ComputeVisualConsistency(earlier_data, later_data);
+    }
+
+    bool track_features = true;
+    bool force_feature_extraction_next_frame = false;
+    if (options_.tracking_options().wide_baseline_matching()) {
+      CHECK(initial_transform == nullptr)
+          << "Can't use wide baseline matching and initial transform as the "
+          << "same time.";
+
+      WideBaselineMatchFeatures(data1, data2, &feature_tracks);
+      track_features =
+          options_.tracking_options().refine_wide_baseline_matches();
+      if (track_features) {
+        initial_transform = absl::make_unique<Homography>(
+            HomographyAdapter::Embed(AffineModelFromFeatures(&feature_tracks)));
+        feature_tracks.clear();
+      } else {
+        GetFeatureTrackInliers(options_.no_estimation_mode(), &feature_tracks,
+                               &feature_inliers);
+      }
+    }
+
+    if (track_features) {
+      ExtractFeatures(prev_result, data1);
+
+      if (initial_transform != nullptr) {
+        InitializeFeatureLocationsFromTransform(from, to, *initial_transform);
+      }
+
+      // Compute tracks with gain correction if requested.
+      bool gain_correction = options_.gain_correction();
+      const float triggering_ratio =
+          options_.gain_correction_triggering_ratio();
+      if (options_.gain_correction() && triggering_ratio > 0) {
+        // Only compute gain if change in intensity across frames
+        // is sufficiently large.
+        const float intensity_ratio =
+            std::max(data1->mean_intensity, data2->mean_intensity) /
+            (std::min(data1->mean_intensity, data2->mean_intensity) + 1e-6f);
+        gain_correction = intensity_ratio > triggering_ratio;
+      }
+
+      const bool gain_hypotheses =
+          options_.gain_correction_multiple_hypotheses();
+
+      // Trigger feature extraction on next frame every time we perform
+      // gain correction.
+      force_feature_extraction_next_frame = gain_correction;
+
+      // Backup FrameTrackingData if needed for reset when using multiple
+      // hypothesis.
+      std::unique_ptr<FrameTrackingData> wo_gain_data2;
+      if (gain_correction && gain_hypotheses) {
+        wo_gain_data2.reset(new FrameTrackingData(*data2));
+      }
+
+      TrackFeatures(data1, data2, &gain_correction,
+                    &frac_long_features_rejected, &feature_tracks);
+      GetFeatureTrackInliers(options_.no_estimation_mode(), &feature_tracks,
+                             &feature_inliers);
+
+      // Second pass: If gain correction was successful and multiple hypotheses,
+      // are requested run again without it.
+      if (gain_correction && gain_hypotheses) {
+        // Re-run without gain correction.
+        TrackedFeatureList wo_gain_tracks;
+        TrackedFeatureView wo_gain_inliers;
+
+        gain_correction = false;
+        TrackFeatures(data1, wo_gain_data2.get(), &gain_correction, nullptr,
+                      &wo_gain_tracks);
+        GetFeatureTrackInliers(options_.no_estimation_mode(), &wo_gain_tracks,
+                               &wo_gain_inliers);
+
+        // Only use gain correction if it is better than tracking
+        // without gain correction, i.e gain correction should result in more
+        // inliers, at least by the specified fractional improvement.
+        const float improvement_weight =
+            1.0f + options_.gain_correction_inlier_improvement_frac();
+        const int gain_count = feature_inliers.size();
+        const int wo_gain_count = wo_gain_inliers.size();
+        if (gain_count < wo_gain_count * improvement_weight) {
+          // Reject gain result, insufficient improvement. Use result without
+          // gain correction instead.
+          feature_tracks.swap(wo_gain_tracks);
+          feature_inliers.swap(wo_gain_inliers);
+          std::swap(*data2, *wo_gain_data2);
+          VLOG(1) << "Rejecting gain correction. Number of inliers with "
+                  << "gain: " << gain_count
+                  << ", without gain: " << wo_gain_count;
+          force_feature_extraction_next_frame = false;
+        }
+      }
+    }  // end if track features.
+
+    if (data1->num_original_extracted_and_tracked < 0) {
+      // Record initial track.
+      data1->num_original_extracted_and_tracked = feature_inliers.size();
+    }
+
+    if (force_feature_extraction_next_frame) {
+      data2->num_extracted_and_tracked = 0;
+    } else {
+      data2->num_extracted_and_tracked = feature_inliers.size();
+    }
+
+    // Forward initial number of tracked features.
+    data2->num_original_extracted_and_tracked =
+        data1->num_original_extracted_and_tracked;
+  }
+
+  // Convert tracks to region flow.
+  if (invert_flow) {
+    InvertFeatureList(feature_tracks, &feature_tracks);
+  }
+
+  const float flow_magnitude = TrackedFeatureViewToRegionFlowFeatureList(
+      feature_inliers, curr_result, feature_list);
+
+  // Assign unique ids to the features.
+  for (auto& feature : *feature_list->mutable_feature()) {
+    feature.set_feature_id(++feature_count_);
+  }
+
+  if (from != to) {
+    // Record average flow magnitude (normalized w.r.t. tracking distance in
+    // frames).
+    flow_magnitudes_.push_back(flow_magnitude / std::abs(from - to));
+    const int kMaxMagnitudeRecords = 10;
+    // Limit to only most recent observations.
+    while (flow_magnitudes_.size() > kMaxMagnitudeRecords) {
+      flow_magnitudes_.pop_front();
+    }
+
+    // Adaptively size pyramid based on previous observations.
+    // 130% of previous maximum.
+    if (options_.tracking_options().adaptive_tracking_distance() &&
+        flow_magnitudes_.size() > 2) {
+      pyramid_levels_ = PyramidLevelsFromTrackDistance(
+          *std::max_element(flow_magnitudes_.begin(), flow_magnitudes_.end()) *
+          1.3f);
+    }
+  }
+
+  // Check if sufficient features found, set corresponding flags.
+  if (!HasSufficientFeatures(*feature_list)) {
+    feature_list->set_unstable(true);
+    // If region flow is unstable, then the tracked features in the "to" frame
+    // should not be relied upon for reuse.
+    if (data2 != nullptr) {
+      data2->ResetFeatures();
+    }
+  }
+
+  // Store additional information in feature_list.
+  feature_list->set_frac_long_features_rejected(frac_long_features_rejected);
+  feature_list->set_visual_consistency(visual_consistency);
+  if (invert_flow) {
+    if (data2 != nullptr) {
+      feature_list->set_timestamp_usec(data2->timestamp_usec);
+    }
+  } else {
+    if (data1 != nullptr) {
+      feature_list->set_timestamp_usec(data1->timestamp_usec);
+    }
+  }
+
+  feature_list->set_match_frame((to - from) * (invert_flow ? -1 : 1));
+}
+
+// Resets computation by setting frame_num_ == 0. No need to clear other data
+// structures, since previous frames are used only once frame_num_ > 0.
+void RegionFlowComputation::Reset() {
+  frame_num_ = 0;
+  data_queue_.clear();
+  flow_magnitudes_.clear();
+}
+
+namespace {
+
+struct FloatPointerComparator {
+  bool operator()(const float* lhs, const float* rhs) const {
+    return *lhs > *rhs;
+  }
+};
+
+// Invoker for ParallelFor. Needs to be copyable.
+// Extracts features from a 2nd moment gradient response image (eig_image)
+// by grid-based thresholding (removing feature responses below
+// local_quality_level * maximum cell value or lowest_quality_level) and
+// non-maxima suppression via dilation. Results are output in corner_pointers
+// and partially sorted (limited to max_cell_features, highest first).
+class GridFeatureLocator {
+ public:
+  GridFeatureLocator(int frame_width, int frame_height, int block_width,
+                     int block_height, int bins_per_row,
+                     float local_quality_level, float lowest_quality_level,
+                     int max_cell_features,
+                     std::vector<std::vector<const float*>>* corner_pointers,
+                     cv::Mat* eig_image, cv::Mat* tmp_image)
+      : frame_width_(frame_width),
+        frame_height_(frame_height),
+        block_width_(block_width),
+        block_height_(block_height),
+        bins_per_row_(bins_per_row),
+        local_quality_level_(local_quality_level),
+        lowest_quality_level_(lowest_quality_level),
+        max_cell_features_(max_cell_features),
+        corner_pointers_(corner_pointers),
+        eig_image_(eig_image),
+        tmp_image_(tmp_image) {}
+
+  void operator()(const BlockedRange2D& range) const {
+    // Iterate over views.
+    for (int bin_y = range.rows().begin(); bin_y != range.rows().end();
+         ++bin_y) {
+      for (int bin_x = range.cols().begin(); bin_x != range.cols().end();
+           ++bin_x) {
+        const int view_x = bin_x * block_width_;
+        const int view_y = bin_y * block_height_;
+        const int view_end_x = min(frame_width_, (bin_x + 1) * block_width_);
+        const int view_end_y = min(frame_height_, (bin_y + 1) * block_height_);
+
+        // Guarantee at least one pixel in each dimension.
+        if (view_x >= view_end_x || view_y >= view_end_y) {
+          continue;
+        }
+
+        cv::Mat eig_view(*eig_image_, cv::Range(view_y, view_end_y),
+                         cv::Range(view_x, view_end_x));
+        cv::Mat tmp_view(*tmp_image_, cv::Range(view_y, view_end_y),
+                         cv::Range(view_x, view_end_x));
+
+        // Ignore features below quality level.
+        double maximum = 0;
+        cv::minMaxLoc(eig_view, nullptr, &maximum, nullptr, nullptr);
+        double lowest_quality = std::max<double>(maximum * local_quality_level_,
+                                                 lowest_quality_level_);
+
+        // Copy borders that do not get dilated below.
+        cv::Rect borders[4] = {
+            cv::Rect(0, 0, eig_view.cols, 1),                  // top
+            cv::Rect(0, 0, 1, eig_view.rows),                  // left
+            cv::Rect(0, eig_view.rows - 1, eig_view.cols, 1),  // bottom
+            cv::Rect(eig_view.cols - 1, 0, 1, eig_view.rows)};
+
+        for (int k = 0; k < 4; ++k) {
+          cv::Mat dst_view(tmp_view, borders[k]);
+          cv::Mat(eig_view, borders[k]).copyTo(dst_view);
+        }
+
+        // Non-maxima suppression.
+        if (tmp_view.rows > 2 && tmp_view.cols > 2) {
+          // Remove border from dilate, as cv::dilate reads out of
+          // bound values otherwise.
+          cv::Mat dilate_src =
+              cv::Mat(eig_view, cv::Range(1, eig_view.rows - 1),
+                      cv::Range(1, eig_view.cols - 1));
+
+          cv::Mat dilate_dst =
+              cv::Mat(tmp_view, cv::Range(1, tmp_view.rows - 1),
+                      cv::Range(1, tmp_view.cols - 1));
+          cv::Mat kernel(3, 3, CV_32F);
+          kernel.setTo(1.0);
+          cv::dilate(dilate_src, dilate_dst, kernel);
+        }
+
+        const int grid_pos = bin_y * bins_per_row_ + bin_x;
+        std::vector<const float*>& grid_cell = (*corner_pointers_)[grid_pos];
+
+        // Iterate over view in image domain as we store feature location
+        // pointers w.r.t. original frame.
+        for (int i = view_y; i < view_end_y; ++i) {
+          const float* tmp_ptr = tmp_image_->ptr<float>(i);
+          const float* eig_ptr = eig_image_->ptr<float>(i);
+          for (int j = view_x; j < view_end_x; ++j) {
+            const float max_supp_value = tmp_ptr[j];
+            if (max_supp_value > lowest_quality &&
+                max_supp_value == eig_ptr[j]) {
+              // This is a local maxima -> store in list.
+              grid_cell.push_back(eig_ptr + j);
+            }
+          }
+        }
+
+        const int level_max_elems =
+            min<int>(max_cell_features_, grid_cell.size());
+        std::partial_sort(grid_cell.begin(),
+                          grid_cell.begin() + level_max_elems, grid_cell.end(),
+                          FloatPointerComparator());
+      }
+    }
+  }
+
+ private:
+  int frame_width_;
+  int frame_height_;
+  int block_width_;
+  int block_height_;
+  int bins_per_row_;
+  float local_quality_level_;
+  float lowest_quality_level_;
+  int max_cell_features_;
+  std::vector<std::vector<const float*>>* corner_pointers_;
+  cv::Mat* eig_image_;
+  cv::Mat* tmp_image_;
+};
+
+// Sets (2 * N + 1) x (2 * N + 1) neighborhood of the passed mask to K
+// or adds K to the existing mask if add is set to true.
+template <int N, int K, bool add>
+inline void SetMaskNeighborhood(int mask_x, int mask_y, cv::Mat* mask) {
+  DCHECK_EQ(mask->type(), CV_8U);
+  const int mask_start_x = max(0, mask_x - N);
+  const int mask_end_x = min(mask->cols - 1, mask_x + N);
+  const int mask_dx = mask_end_x - mask_start_x + 1;
+  const int mask_start_y = max(0, mask_y - N);
+  const int mask_end_y = min(mask->rows - 1, mask_y + N);
+  DCHECK_LE(mask_start_x, mask_end_x);
+  DCHECK_LE(mask_start_y, mask_end_y);
+
+  if (!add) {
+    for (int i = mask_start_y; i <= mask_end_y; ++i) {
+      uint8* mask_ptr = mask->ptr<uint8>(i) + mask_start_x;
+      memset(mask_ptr, K, mask_dx * sizeof(*mask_ptr));
+    }
+  } else {
+    for (int i = mask_start_y; i <= mask_end_y; ++i) {
+      uint8* mask_ptr = mask->ptr<uint8>(i);
+      for (int j = mask_start_x; j <= mask_end_x; ++j) {
+        mask_ptr[j] = (mask_ptr[j] & 0x7F) + K;  // Limit to 128.
+      }
+    }
+  }
+}
+
+}  // namespace.
+
+void RegionFlowComputation::AdaptiveGoodFeaturesToTrack(
+    const std::vector<cv::Mat>& extraction_pyramid, int max_features,
+    float mask_scale, cv::Mat* mask, FrameTrackingData* data) {
+  CHECK(data != nullptr);
+  CHECK(feature_tmp_image_1_.get() != nullptr);
+  CHECK(feature_tmp_image_2_.get() != nullptr);
+
+  cv::Mat* eig_image = feature_tmp_image_1_.get();
+  cv::Mat* tmp_image = feature_tmp_image_2_.get();
+
+  const auto& tracking_options = options_.tracking_options();
+
+  // Setup grid information.
+  const float block_size = tracking_options.adaptive_features_block_size();
+  CHECK_GT(block_size, 0) << "Need positive block size";
+
+  int block_width = block_size < 1 ? block_size * frame_width_ : block_size;
+  int block_height = block_size < 1 ? block_size * frame_height_ : block_size;
+
+  // Ensure valid block width and height regardless of settings.
+  block_width = max(1, block_width);
+  block_height = max(1, block_height);
+
+  bool use_harris = tracking_options.corner_extraction_method() ==
+                    TrackingOptions::EXTRACTION_HARRIS;
+
+  const int adaptive_levels =
+      options_.tracking_options().adaptive_features_levels();
+
+  // For Harris negative values are possible, so lowest_quality level
+  // is being ignored.
+  const float lowest_quality_level =
+      use_harris ? -100.0f
+                 : tracking_options.min_eig_val_settings()
+                       .adaptive_lowest_quality_level();
+
+  const float local_quality_level =
+      use_harris
+          ? tracking_options.harris_settings().feature_quality_level()
+          : tracking_options.min_eig_val_settings().feature_quality_level();
+
+  bool use_fast = tracking_options.corner_extraction_method() ==
+                  TrackingOptions::EXTRACTION_FAST;
+  cv::Ptr<cv::FastFeatureDetector> fast_detector;
+  if (use_fast) {
+    fast_detector = cv::FastFeatureDetector::create(
+        tracking_options.fast_settings().threshold());
+  }
+
+  // Extract features at multiple scales and adaptive block sizes.
+  for (int e = 0, step = 1; e < extraction_pyramid.size(); ++e) {
+    if (data->features.size() >= max_features) {
+      break;
+    }
+
+    const cv::Mat& image = extraction_pyramid[e];
+    const int rows = image.rows;
+    const int cols = image.cols;
+
+    // Compute corner response.
+    constexpr int kBlockSize = 3;
+    constexpr double kHarrisK = 0.04;  // Harris magical constant as
+                                       // set by OpenCV.
+    std::vector<cv::KeyPoint> fast_keypoints;
+    if (e == 0) {
+      MEASURE_TIME << "Corner extraction";
+      CHECK_EQ(rows, frame_height_);
+      CHECK_EQ(cols, frame_width_);
+
+      if (use_fast) {
+        fast_detector->detect(image, fast_keypoints);
+      } else if (use_harris) {
+        cv::cornerHarris(image, *eig_image, kBlockSize, kBlockSize, kHarrisK);
+      } else {
+        cv::cornerMinEigenVal(image, *eig_image, kBlockSize);
+      }
+    } else {
+      // Compute corner response on a down-scaled image and upsample.
+      step *= 2;
+      CHECK_EQ(rows, (extraction_pyramid[e - 1].rows + 1) / 2);
+      CHECK_EQ(cols, (extraction_pyramid[e - 1].cols + 1) / 2);
+
+      if (use_fast) {
+        fast_detector->detect(image, fast_keypoints);
+
+        for (int j = 0; j < fast_keypoints.size(); ++j) {
+          fast_keypoints[j].pt.x *= step;
+          fast_keypoints[j].pt.y *= step;
+        }
+      } else {
+        // Use tmp_image to compute eigen-values on resized images.
+        cv::Mat eig_view(*tmp_image, cv::Range(0, rows), cv::Range(0, cols));
+
+        if (use_harris) {
+          cv::cornerHarris(image, eig_view, kBlockSize, kBlockSize, kHarrisK);
+        } else {
+          cv::cornerMinEigenVal(image, eig_view, kBlockSize);
+        }
+
+        // Upsample (without interpolation) eig_view to match frame size.
+        eig_image->setTo(0);
+        for (int r = 0, r_up = 0; r < rows && r_up < frame_height_;
+             ++r, r_up += step) {
+          const float* ptr = eig_view.ptr<float>(r);
+          float* up_ptr = eig_image->ptr<float>(r_up);
+          for (int c = 0, c_up = 0; c < cols && c_up < frame_width_;
+               ++c, c_up += step) {
+            up_ptr[c_up] = ptr[c];
+          }
+        }
+      }  // end if use_fast
+    }    // end if e.
+
+    if (use_fast) {
+      // TODO: Perform grid based feature detection.
+      std::sort(fast_keypoints.begin(), fast_keypoints.end(),
+                [](const cv::KeyPoint& a, const cv::KeyPoint& b) {
+                  return b.response < a.response;
+                });
+
+      for (int j = 0; j < fast_keypoints.size(); ++j) {
+        const int corner_y = fast_keypoints[j].pt.y;
+        const int corner_x = fast_keypoints[j].pt.x;
+        const int mask_x = corner_x * mask_scale;
+        const int mask_y = corner_y * mask_scale;
+
+        // Test if neighboring element is already set.
+        if (mask->at<uint8>(mask_y, mask_x) >= 1) {
+          continue;
+        }
+
+        SetMaskNeighborhood<2, 1, false>(mask_x, mask_y, mask);
+
+        // `e` stands for which pyramid layer this feature is extracted from.
+        data->AddFeature(cv::Point2f(corner_x, corner_y),
+                         min(1.0f, fast_keypoints[j].response), e,
+                         -1,  // No track id assigned yet. Only assign ids
+                              // to successfully tracked features.
+                         nullptr);
+      }
+    } else {
+      // Iterate over adaptive pyramid levels.
+      int level_width = block_width;
+      int level_height = block_height;
+      for (int level = 0; level < adaptive_levels; ++level) {
+        // Perform grid based threshold and non-maxima suppression.
+        const int bins_per_column =
+            std::ceil(static_cast<float>(frame_height_) / level_height);
+        const int bins_per_row =
+            std::ceil(static_cast<float>(frame_width_) / level_width);
+        const int num_bins = bins_per_row * bins_per_column;
+        const int level_max_features = max_features - data->features.size();
+        if (level_max_features < 0) {
+          break;
+        }
+
+        std::vector<std::vector<const float*>> corner_pointers(num_bins);
+        for (int k = 0; k < num_bins; ++k) {
+          corner_pointers[k].reserve(level_max_features);
+        }
+
+        GridFeatureLocator locator(
+            frame_width_, frame_height_, level_width, level_height,
+            bins_per_row, local_quality_level, lowest_quality_level,
+            level_max_features, &corner_pointers, eig_image, tmp_image);
+
+        ParallelFor2D(0, bins_per_column, 0, bins_per_row, 1, locator);
+
+        // Round robin across bins, add one feature per bin, until
+        // max_features is hit.
+        bool more_features_available = true;
+
+        // Index of next to be processed corner in corner_points[k] array.
+        std::vector<int> corner_index(num_bins, 0);
+        while (more_features_available &&
+               data->features.size() < max_features) {
+          more_features_available = false;
+
+          // Add one feature per bin (stratified sampling).
+          for (int k = 0; k < num_bins; ++k) {
+            if (corner_index[k] >= corner_pointers[k].size()) {
+              continue;
+            }
+
+            const float* corner_ptr = corner_pointers[k][corner_index[k]];
+            // Advance.
+            ++corner_index[k];
+            if (corner_index[k] < corner_pointers[k].size() - 1) {
+              more_features_available = true;
+            }
+
+            // Map corner pointer to x and y location.
+            const int offset = reinterpret_cast<const uint8*>(corner_ptr) -
+                               eig_image->ptr<const uint8>(0);
+
+            const int corner_y = offset / eig_image->step[0];
+            const int corner_x =
+                (offset - eig_image->step[0] * corner_y) / sizeof(*corner_ptr);
+
+            // Ensure corner is at least 2 pixel away from boundary.
+            if (corner_x < 2 || corner_x > frame_width_ - 2 || corner_y < 2 ||
+                corner_y > frame_height_ - 2) {
+              continue;
+            }
+
+            const int mask_x = corner_x * mask_scale;
+            const int mask_y = corner_y * mask_scale;
+
+            // Test if neighboring element is already set.
+            if (mask->at<uint8>(mask_y, mask_x) >= 1) {
+              continue;
+            }
+
+            SetMaskNeighborhood<2, 1, false>(mask_x, mask_y, mask);
+
+            // `e` stands for which pyramid layer the feature is extracted from.
+            data->AddFeature(
+                cv::Point2f(corner_x, corner_y),
+                min(1.0f, *corner_ptr * options_.corner_response_scale()), e,
+                -1,  // No track id assigned yet. Only assign ids
+                     // to successfully tracked features.
+                nullptr);
+          }  // end bins.
+        }    // end while.
+
+        if (level + 1 < adaptive_levels) {
+          level_width = (level_width + 1) / 2;
+          level_height = (level_height + 1) / 2;
+        }
+      }  // end adaptive level.
+    }    // end use_fast
+  }      // end extraction level.
+
+  // If adaptive_levels or extraction_levels > 1, for 2nd or larger level, we
+  // can potentially add corners above the max_features threshold. In this case
+  // truncation performs well without having to resort to sorting the features,
+  // as 2nd or larger level add features of lower corner response that pass the
+  // locality test (as the neighborhood size decreases).
+  if (data->features.size() > max_features) {
+    data->features.resize(max_features);
+    data->corner_responses.resize(max_features);
+    data->octaves.resize(max_features);
+    data->track_ids.resize(max_features);
+  }
+}
+
+AffineModel RegionFlowComputation::AffineModelFromFeatures(
+    TrackedFeatureList* features) const {
+  CHECK(features != nullptr);
+
+  // Downscaled domain as output.
+  MotionEstimation motion_estimation(MotionEstimationOptions(), frame_width_,
+                                     frame_height_);
+
+  RegionFlowFrame region_flow;
+  region_flow.set_frame_width(original_width_);
+  region_flow.set_frame_height(original_height_);
+
+  TrackedFeatureView feature_view;
+  ComputeBlockBasedFlow(features, &feature_view);
+
+  RegionFlowFeatureList feature_list;
+  TrackedFeatureViewToRegionFlowFeatureList(feature_view, nullptr,
+                                            &feature_list);
+
+  return FitAffineModel(feature_list);
+}
+
+void RegionFlowComputation::ZeroMotionGridFeatures(
+    int frame_width, int frame_height, float frac_grid_step_x,
+    float frac_grid_step_y, RegionFlowFeatureList* result) {
+  CHECK(result != nullptr);
+  result->Clear();
+
+  TrackedFeatureList features;
+  const int border_dist = ZeroMotionGridTracks(
+      frame_width, frame_height, frac_grid_step_x, frac_grid_step_y, &features);
+
+  result->set_frame_width(frame_width);
+  result->set_frame_height(frame_height);
+  result->set_distance_from_border(border_dist);
+
+  for (const auto& feature : features) {
+    RegionFlowFeature* new_feature = result->add_feature();
+    new_feature->set_x(feature.point.x());
+    new_feature->set_y(feature.point.y());
+    new_feature->set_dx(feature.flow.x());
+    new_feature->set_dy(feature.flow.y());
+  }
+}
+
+void RegionFlowComputation::DenseZeroMotionSamples(
+    int frame_width, int frame_height, float frac_diameter, float frac_steps_x,
+    float frac_steps_y, RegionFlowFeatureList* result) {
+  CHECK(result != nullptr);
+
+  // Ensure patch fits into frame.
+  const int radius =
+      max<int>(1, min<int>(min<int>(frame_width / 2 - 1, frame_height / 2 - 1),
+                           hypot(frame_width, frame_height) * frac_diameter) /
+                      2);
+  result->Clear();
+  result->set_frame_width(frame_width);
+  result->set_frame_height(frame_height);
+  result->set_distance_from_border(radius);
+
+  const int start = radius;
+  const int end_y = frame_height - radius;
+  const int end_x = frame_width - radius;
+
+  const int steps_x = max<int>(1, frame_width * frac_steps_x);
+  const int steps_y = max<int>(1, frame_height * frac_steps_y);
+  for (int y = start; y < end_y; y += steps_y) {
+    for (int x = start; x < end_x; x += steps_x) {
+      RegionFlowFeature* new_feature = result->add_feature();
+      new_feature->set_x(x);
+      new_feature->set_y(y);
+      new_feature->set_dx(0);
+      new_feature->set_dy(0);
+    }
+  }
+}
+
+namespace {
+
+bool PointOutOfBound(const Vector2_f& point, int frame_width,
+                     int frame_height) {
+  if (point.x() < 0 || point.y() < 0 || point.x() > frame_width - 1 ||
+      point.y() > frame_height - 1) {
+    return true;
+  }
+  return false;
+}
+
+}  // namespace.
+
+int RegionFlowComputation::ZeroMotionGridTracks(int frame_width,
+                                                int frame_height,
+                                                float frac_grid_step_x,
+                                                float frac_grid_step_y,
+                                                TrackedFeatureList* results) {
+  CHECK(results);
+  auto& tracked_features = *results;
+  tracked_features.clear();
+
+  const int grid_step_x =
+      max(1, static_cast<int>(frac_grid_step_x * frame_width));
+  const int grid_step_y =
+      max(1, static_cast<int>(frac_grid_step_y * frame_height));
+
+  const int num_features_x = (frame_width - 1) / grid_step_x;
+  const int num_features_y = (frame_height - 1) / grid_step_y;
+  const int max_features = num_features_x * num_features_y;
+
+  // Only track features in one frame for synthetic features.
+  tracked_features.reserve(max_features);
+  const int border_dist_x = grid_step_x / 2;
+  const int border_dist_y = grid_step_y / 2;
+  for (int i = 0, y = border_dist_y; i < num_features_y;
+       ++i, y += grid_step_y) {
+    for (int j = 0, x = border_dist_x; j < num_features_x;
+         ++j, x += grid_step_x) {
+      TrackedFeature tracked_feature(Vector2_f(x, y), Vector2_f(0, 0), 0.0f,
+                                     0.0f,
+                                     -1);  // No track id assigned.
+      tracked_features.push_back(tracked_feature);
+    }
+  }
+
+  return min(border_dist_x, border_dist_y);
+}
+
+bool RegionFlowComputation::GainCorrectFrame(const cv::Mat& reference_frame,
+                                             const cv::Mat& input_frame,
+                                             float reference_mean,
+                                             float input_mean,
+                                             cv::Mat* calibrated_frame) const {
+  CHECK(calibrated_frame);
+  CHECK_EQ(reference_frame.rows, input_frame.rows);
+  CHECK_EQ(reference_frame.cols, input_frame.cols);
+
+  // Do not attempt gain correction for tiny images.
+  if (std::min(reference_frame.rows, reference_frame.cols) < 10) {
+    VLOG(1) << "Tiny image, aborting gain correction.";
+    return false;
+  }
+
+  GainBiasModel gain_bias;
+  if (options_.fast_gain_correction()) {
+    const int kMinMean = 5;
+    if (input_mean < kMinMean) {
+      return false;  // Badly exposed.
+    }
+    const float gain = reference_mean / input_mean;
+    if (gain < options_.gain_bias_bounds().lower_gain() ||
+        gain > options_.gain_bias_bounds().upper_gain()) {
+      return false;  // Unstable: Out of bound.
+    }
+
+    gain_bias.set_gain_c1(gain);
+  }
+
+  constexpr float kMaxFastGain = 1.12f;
+  if (!options_.fast_gain_correction() || gain_bias.gain_c1() > kMaxFastGain) {
+    // Estimate tone change w.r.t. reference_frame.
+    RegionFlowFeatureList zero_features;
+    DenseZeroMotionSamples(
+        frame_width_, frame_height_, options_.frac_gain_feature_size(),
+        options_.frac_gain_step(), options_.frac_gain_step(), &zero_features);
+
+    ClipMask<1> reference_mask;
+    ClipMask<1> input_mask;
+    ToneEstimation::ComputeClipMask(ClipMaskOptions(), reference_frame,
+                                    &reference_mask);
+
+    ToneEstimation::ComputeClipMask(ClipMaskOptions(), input_frame,
+                                    &input_mask);
+
+    ColorToneMatches tone_matches;
+    ToneMatchOptions tone_match_options;
+    tone_match_options.set_patch_radius(zero_features.distance_from_border() -
+                                        1);
+
+    // Nothing to extract.
+    if (tone_match_options.patch_radius() < 1) {
+      VLOG(1) << "Patch radius is < 1, aborting gain correction.";
+      return false;
+    }
+
+    ToneEstimation::ComputeToneMatches<1>(
+        tone_match_options, zero_features, input_frame, reference_frame,
+        input_mask, reference_mask, &tone_matches);
+
+    // Only attempt estimation if not too much frame area is clipped.
+    if (tone_matches[0].size() <= 0.5 * zero_features.feature_size()) {
+      VLOG(1) << "Too much frame area is clipped for gain correction.";
+      return false;
+    }
+
+    ToneEstimation::EstimateGainBiasModel(5,  // number of irls iterations.
+                                          &tone_matches, &gain_bias);
+
+    if (!ToneEstimation::IsStableGainBiasModel(
+            options_.gain_bias_bounds(), gain_bias, tone_matches, nullptr)) {
+      VLOG(1) << "Unstable gain-bias model.";
+      return false;
+    }
+  }
+
+  GainBiasModelMethods::MapImageIndependent<1>(gain_bias,
+                                               false,  // log_domain.
+                                               true,   // normalized_model.
+                                               input_frame, calibrated_frame);
+  return true;
+}
+
+void RegionFlowComputation::WideBaselineMatchFeatures(
+    FrameTrackingData* from_data_ptr, FrameTrackingData* to_data_ptr,
+    TrackedFeatureList* results) {
+#if (defined(__ANDROID__) || defined(__APPLE__) || defined(__EMSCRIPTEN__)) && \
+    !defined(CV_WRAPPER_3X)
+  LOG(FATAL) << "Supported on only with OpenCV 3.0. "
+             << "Use bazel build flag : --define CV_WRAPPER=3X";
+#else   // (defined(__ANDROID__) || defined(__APPLE__) ||
+        // defined(__EMSCRIPTEN__)) && !defined(CV_WRAPPER_3X)
+  results->clear();
+
+  const auto& frame1 = from_data_ptr->frame;
+  auto& data1 = from_data_ptr->orb;
+
+  const auto& frame2 = to_data_ptr->frame;
+  auto& data2 = to_data_ptr->orb;
+
+  cv::Ptr<cv::ORB> orb = cv::ORB::create(max_features_);
+
+  // Compute ORB features in frame1.
+  if (!data1.computed) {
+    orb->detect(frame1, data1.key_points);
+    orb->compute(frame1, data1.key_points, data1.descriptors);
+    data1.computed = true;
+  }
+
+  const int num_features = data1.key_points.size();
+  if (num_features == 0) {
+    // No features found, probably black or extremely blurry frame. Return empty
+    // results.
+    VLOG(1) << "Couldn't extract any features. Frame probably empty.";
+    return;
+  }
+
+  // Compute ORB features in frame2.
+  if (!data2.computed) {
+    orb->detect(frame2, data2.key_points);
+    orb->compute(frame2, data2.key_points, data2.descriptors);
+    data2.computed = true;
+  }
+
+  // Match feature descriptors.
+  cv::BFMatcher matcher(cv::NORM_HAMMING);
+  std::vector<std::vector<cv::DMatch>> matches;
+  matcher.knnMatch(data2.descriptors,  // Query.
+                   data1.descriptors,  // Train.
+                   matches,
+                   2);  // 2 closest matches per descriptor.
+
+  results->reserve(matches.size());
+
+  // Get successfully matched features.
+  for (const auto& match : matches) {
+    if (match.size() > 1 &&
+        match[0].distance < options_.tracking_options().ratio_test_threshold() *
+                                match[1].distance) {
+      // Passed ratio test.
+      const cv::Point2f& feature_location =
+          data1.key_points[match[0].trainIdx].pt;
+      const cv::Point2f& match_location =
+          data2.key_points[match[0].queryIdx].pt;
+
+      const Vector2_f feature_point(feature_location.x, feature_location.y);
+      const Vector2_f flow =
+          Vector2_f(match_location.x, match_location.y) - feature_point;
+
+      TrackedFeature tracked_feature(feature_point * downsample_scale_,
+                                     flow * downsample_scale_,
+                                     match[0].distance, 0.0f,
+                                     -1);  // no track id assigned.
+
+      if (PointOutOfBound(tracked_feature.point, original_width_,
+                          original_height_)) {
+        continue;
+      }
+
+      VLOG(2) << "Flow: " << tracked_feature.flow << " @ "
+              << tracked_feature.point;
+      results->push_back(tracked_feature);
+    }
+  }
+#endif  // (defined(__ANDROID__) || defined(__APPLE__) ||
+        // defined(__EMSCRIPTEN__)) && !defined(CV_WRAPPER_3X)
+}
+
+void RegionFlowComputation::RemoveAbsentFeatures(
+    const TrackedFeatureList& prev_result, FrameTrackingData* data) {
+  CHECK(long_track_data_ != nullptr);
+
+  // Build hash set of track ids.
+  absl::node_hash_set<int> track_ids;
+  for (const auto& feature : prev_result) {
+    DCHECK_NE(feature.track_id, -1);
+    track_ids.insert(feature.track_id);
+  }
+
+  long_track_data_->RemoveAbsentFeatureEntries(track_ids);
+
+  // Remove indices (backwards to not destroy index positions).
+  for (int k = data->track_ids.size() - 1; k >= 0; --k) {
+    if (track_ids.find(data->track_ids[k]) == track_ids.end()) {
+      data->RemoveFeature(k);
+    }
+  }
+}
+
+void RegionFlowComputation::RemoveFeaturesOutsideMask(FrameTrackingData* data) {
+  if (data->mask.empty()) {
+    return;
+  }
+
+  // Remove indices (backwards to not destroy index positions).
+  for (int k = data->features.size() - 1; k >= 0; --k) {
+    const int x = static_cast<int>(data->features[k].x + 0.5);
+    const int y = static_cast<int>(data->features[k].y + 0.5);
+    if (data->mask.at<uint8>(y, x) == 0) {
+      data->RemoveFeature(k);
+    }
+  }
+}
+
+void RegionFlowComputation::ExtractFeatures(
+    const TrackedFeatureList* prev_result, FrameTrackingData* data) {
+  MEASURE_TIME << "ExtractFeatures";
+  if (!options_.tracking_options().adaptive_good_features_to_track()) {
+    LOG(FATAL) << "Deprecated! Activate adaptive_good_features_to_track "
+               << "in TrackingOptions";
+  }
+
+  // Check if features can simply be re-used.
+  if (!data->features.empty()) {
+    if (prev_result) {
+      // Long feature tracking case, remove features,
+      // whose ids are not present anymore (these are mainly outliers
+      // that got removed during DetermineRegionFlowInliers call).
+      RemoveAbsentFeatures(*prev_result, data);
+    }
+
+    if (data->last_feature_extraction_time == 0) {
+      // Features already extracted from this frame.
+      CHECK_EQ(data->corner_responses.size(), data->features.size());
+      CHECK_EQ(data->octaves.size(), data->features.size());
+      VLOG(1) << "Features already present (extracted from this frame)";
+      return;
+    }
+
+    // Remove features that lie outside feature extraction mask.
+    RemoveFeaturesOutsideMask(data);
+
+    CHECK_EQ(data->corner_responses.size(), data->features.size());
+    CHECK_EQ(data->octaves.size(), data->features.size());
+
+    float feature_fraction = 0;
+    if (data->num_original_extracted_and_tracked > 0) {
+      feature_fraction = data->num_extracted_and_tracked * 1.0f /
+                         data->num_original_extracted_and_tracked;
+    }
+
+    // If features were not tracked from too far away, reuse them unless the
+    // number of tracked features is below a threshold percentage of the
+    // original source features.
+    const int max_frame_distance =
+        options_.tracking_options().reuse_features_max_frame_distance();
+    const float min_survived_frac =
+        options_.tracking_options().reuse_features_min_survived_frac();
+
+    if (data->last_feature_extraction_time <= max_frame_distance &&
+        feature_fraction > min_survived_frac) {
+      VLOG(1) << "Features already present, (tracked "
+              << data->last_feature_extraction_time << " times)";
+      return;
+    }
+  }
+  // If execution reaches this point, new features will be extracted.
+
+  // Scale feature_distance by sqrt(scale). Sqrt is purely a heuristic choice.
+  float min_feature_distance =
+      options_.tracking_options().min_feature_distance();
+  if (min_feature_distance < 1) {
+    min_feature_distance *= hypot(frame_width_, frame_height_);
+  }
+  if (options_.tracking_options().distance_downscale_sqrt()) {
+    min_feature_distance =
+        std::round(min_feature_distance / std::sqrt(downsample_scale_));
+  }
+
+  // Result mask that ensures we don't place features too closely.
+  const float mask_dim = max(1.0f, min_feature_distance * 0.5f);
+  const float mask_scale = 1.0f / mask_dim;
+  cv::Mat mask = cv::Mat::zeros(std::ceil(frame_height_ * mask_scale),
+                                std::ceil(frame_width_ * mask_scale), CV_8U);
+
+  // Initialize mask from frame's feature extraction mask, by downsampling and
+  // negating the latter mask.
+  if (!data->mask.empty()) {
+    cv::resize(data->mask, mask, mask.size(), 0, 0, CV_INTER_NN);
+    for (int y = 0; y < mask.rows; ++y) {
+      uint8* mask_ptr = mask.ptr<uint8>(y);
+      for (int x = 0; x < mask.cols; ++x) {
+        mask_ptr[x] = mask_ptr[x] == 0 ? 1 : 0;
+      }
+    }
+  }
+
+  data->ResetFeatures();
+  const int features_to_allocate =
+      prev_result ? prev_result->size() * 1.2f : max_features_ / 2;
+  data->PreAllocateFeatures(features_to_allocate);
+
+  if (IsVerifyLongFeatures()) {
+    if (data->neighborhoods == nullptr) {
+      data->neighborhoods.reset(new std::vector<cv::Mat>());
+    }
+    data->neighborhoods->reserve(features_to_allocate);
+  }
+
+  CHECK_EQ(data->extraction_pyramid.size(), extraction_levels_);
+  for (int i = 1; i < extraction_levels_; ++i) {
+    // Need factor 2 as OpenCV stores image + gradient pairs when
+    // "with_derivative" is set to true.
+    const int layer_stored_in_pyramid =
+        options_.compute_derivative_in_pyramid() ? 2 * i : i;
+    const bool index_within_limit =
+        (layer_stored_in_pyramid < data->pyramid.size());
+    if (index_within_limit && options_.compute_derivative_in_pyramid() &&
+        i <= data->pyramid_levels) {
+      // Just re-use from already computed pyramid.
+      data->extraction_pyramid[i] = data->pyramid[layer_stored_in_pyramid];
+    } else {
+      cv::pyrDown(data->extraction_pyramid[i - 1], data->extraction_pyramid[i],
+                  data->extraction_pyramid[i].size());
+    }
+  }
+
+  if (prev_result) {
+    // Seed feature mask and results with tracking ids.
+    CHECK(long_track_data_ != nullptr);
+    const int max_track_length =
+        options_.tracking_options().long_tracks_max_frames();
+    // Drop a feature with a propability X, such that all qualifying
+    // features are dropped with a 95% probability within the interval
+    // [.8, 1.2] * long_tracks_max_frames.
+    const int lower_max_track_length = max(1.0f, 0.8f * max_track_length);
+    const int upper_max_track_length = 1.2f * max_track_length;
+
+    // Ensure interval is positive.
+    const int interval_length =
+        upper_max_track_length - lower_max_track_length + 1;
+    // Drop probability: p  == > survival: 1 - p
+    // (1 - p) ^ interval_length >= 5%    // only 5% survival chance across
+    //                                    // all frames
+    //  ==> p <= 1 - (0.05 ^ (1.0 / interval_length)
+    // Ensure positive probability.
+    const float drop_permil =
+        max(1.0, (1.0 - pow(0.05, 1.0 / interval_length)));
+
+    unsigned int seed = 900913;  // = Google in leet :)
+    std::default_random_engine rand_gen(seed);
+    std::uniform_real_distribution<float> distribution(0.0f, 1.0f);
+
+    // Mask out locations.
+    // For FORWARD output flow, we need to add flow to obtain the match
+    // position, for BACKWARD output flow, flow is inverted, so that feature
+    // locations already point to locations in the current frame.
+    CHECK_EQ(options_.tracking_options().internal_tracking_direction(),
+             TrackingOptions::FORWARD);
+    float match_sign = options_.tracking_options().output_flow_direction() ==
+                               TrackingOptions::FORWARD
+                           ? 1.0f
+                           : 0.0f;
+    const float inv_downsample_scale_ = 1.0f / downsample_scale_;
+
+    for (const auto& feature : *prev_result) {
+      // Need to convert to downsampled domain.
+      const Vector2_f pos =
+          (feature.point + feature.flow * match_sign) * inv_downsample_scale_;
+
+      const int track_id = feature.track_id;
+      if (track_id < 0) {
+        // TODO: Use LOG_FIRST_N here.
+        LOG_IF(WARNING,
+               []() {
+                 static int k = 0;
+                 return k++ < 2;
+               }())
+            << "Expecting an assigned track id, "
+            << "skipping feature.";
+        continue;
+      }
+
+      // Skip features for which the track would get too long.
+      const int start_frame = long_track_data_->StartFrameForId(track_id);
+      if (start_frame < 0) {
+        LOG(ERROR) << "Id is not present, skipping feature.";
+        continue;
+      }
+
+      if (data->frame_num - start_frame >= lower_max_track_length &&
+          distribution(rand_gen) <= drop_permil) {
+        continue;
+      }
+
+      const int mask_x = pos.x() * mask_scale;
+      const int mask_y = pos.y() * mask_scale;
+
+      // Skip if already occupied by too many features. This allows paths
+      // to "join", without having to explicitly represent this.
+      // Value of 2 improves number of connected features.
+      constexpr int kMaxFeaturesPerBin = 1;
+      if (mask.at<uint8>(mask_y, mask_x) >= kMaxFeaturesPerBin) {
+        continue;
+      }
+
+      SetMaskNeighborhood<2, 1, false>(mask_x, mask_y, &mask);
+
+      // Copy results to features.
+      const cv::Mat* neighborhood = (options_.verify_long_features() &&
+                                     feature.orig_neighborhood != nullptr)
+                                        ? feature.orig_neighborhood.get()
+                                        : nullptr;
+      data->AddFeature(cv::Point2f(pos.x(), pos.y()), feature.corner_response,
+                       feature.octave, feature.track_id, neighborhood);
+    }
+  }
+
+  // Extracts additional features in regions excluding the mask and adds them to
+  // data.
+  AdaptiveGoodFeaturesToTrack(data->extraction_pyramid, max_features_,
+                              mask_scale, &mask, data);
+
+  const int num_features = data->features.size();
+  CHECK_EQ(num_features, data->octaves.size());
+  CHECK_EQ(num_features, data->corner_responses.size());
+  CHECK_EQ(num_features, data->track_ids.size());
+}
+
+// Selects features based on lambda evaluator: bool (int index)
+// Performs inplace moves and final resize operation.
+template <class Eval>
+int RegionFlowComputation::InplaceFeatureSelection(
+    FrameTrackingData* data, std::vector<std::vector<int>*> int_vecs,
+    std::vector<std::vector<float>*> float_vecs, const Eval& eval) {
+  int num_selected_features = 0;
+  const int num_features = data->features.size();
+  DCHECK_EQ(num_features, data->corner_responses.size());
+  DCHECK_EQ(num_features, data->octaves.size());
+  DCHECK_EQ(num_features, data->track_ids.size());
+  DCHECK_EQ(num_features, data->feature_source_map.size());
+  if (data->neighborhoods != nullptr) {
+    DCHECK_EQ(num_features, data->neighborhoods->size());
+  }
+
+  for (const auto vec_ptr : int_vecs) {
+    DCHECK_EQ(num_features, vec_ptr->size());
+  }
+  for (const auto vec_ptr : float_vecs) {
+    DCHECK_EQ(num_features, vec_ptr->size());
+  }
+
+  for (int i = 0; i < num_features; ++i) {
+    DCHECK_LE(num_selected_features, i);
+    if (eval(i)) {
+      data->features[num_selected_features] = data->features[i];
+      data->corner_responses[num_selected_features] = data->corner_responses[i];
+      data->octaves[num_selected_features] = data->octaves[i];
+      data->track_ids[num_selected_features] = data->track_ids[i];
+      data->feature_source_map[num_selected_features] =
+          data->feature_source_map[i];
+      if (data->neighborhoods != nullptr) {
+        (*data->neighborhoods)[num_selected_features] =
+            (*data->neighborhoods)[i];
+      }
+
+      for (auto* vec_ptr : int_vecs) {
+        (*vec_ptr)[num_selected_features] = (*vec_ptr)[i];
+      }
+      for (auto* vec_ptr : float_vecs) {
+        (*vec_ptr)[num_selected_features] = (*vec_ptr)[i];
+      }
+      ++num_selected_features;
+    }
+  }
+
+  // Trim to number of selected features.
+  data->features.resize(num_selected_features);
+  data->corner_responses.resize(num_selected_features);
+  data->octaves.resize(num_selected_features);
+  data->track_ids.resize(num_selected_features);
+  data->feature_source_map.resize(num_selected_features);
+  if (data->neighborhoods != nullptr) {
+    data->neighborhoods->resize(num_selected_features);
+  }
+
+  for (const auto vec_ptr : int_vecs) {
+    vec_ptr->resize(num_selected_features);
+  }
+  for (const auto vec_ptr : float_vecs) {
+    vec_ptr->resize(num_selected_features);
+  }
+
+  return num_selected_features;
+}
+
+void RegionFlowComputation::TrackFeatures(FrameTrackingData* from_data_ptr,
+                                          FrameTrackingData* to_data_ptr,
+                                          bool* gain_correction_ptr,
+                                          float* frac_long_features_rejected,
+                                          TrackedFeatureList* results_ptr) {
+  MEASURE_TIME << "TrackFeatures";
+
+  FrameTrackingData& data1 = *from_data_ptr;
+  const cv::Mat& frame1 = data1.frame;
+  const std::vector<cv::Point2f>& features1 = data1.features;
+  const std::vector<float>& corner_responses1 = data1.corner_responses;
+  const std::vector<int>& octaves1 = data1.octaves;
+  // New features will be assigned new track ids.
+  std::vector<int>& track_ids1 = data1.track_ids;
+
+  FrameTrackingData& data2 = *to_data_ptr;
+  cv::Mat& frame2 = data2.frame;
+  std::vector<cv::Point2f>& features2 = data2.features;
+  std::vector<float>& corner_responses2 = data2.corner_responses;
+  std::vector<int>& octaves2 = data2.octaves;
+  std::vector<int>& track_ids2 = data2.track_ids;
+
+  bool& gain_correction = *gain_correction_ptr;
+  TrackedFeatureList& results = *results_ptr;
+  std::vector<int>& feature_source_map = data2.feature_source_map;
+
+  // Start frame for new features. Minimum of from and to.
+  const int min_frame =
+      std::min(from_data_ptr->frame_num, to_data_ptr->frame_num);
+
+  feature_source_map.clear();
+  results.clear();
+
+  const int num_features = features1.size();
+  if (num_features == 0) {
+    // No features found, probably black or extremely blurry frame.
+    // Return empty results.
+    VLOG(1) << "Couldn't find any features to track. Frame probably empty.";
+    return;
+  }
+
+  int tracking_flags = 0;
+  // Check if features in the destination are already initialized. If so, use
+  // their location as initial guess.
+  if (!data2.features_initialized) {
+    data2.ResetFeatures();
+    features2.resize(num_features);
+    corner_responses2.resize(num_features);
+    octaves2.resize(num_features);
+    data2.source = from_data_ptr;
+  } else {
+    CHECK_EQ(data2.source, from_data_ptr);
+    CHECK_EQ(num_features, features2.size());
+    tracking_flags |= cv::OPTFLOW_USE_INITIAL_FLOW;
+  }
+
+  const int track_win_size = options_.tracking_options().tracking_window_size();
+  CHECK_GT(track_win_size, 1) << "Needs to be at least 2 pixels in each "
+                              << "direction";
+
+  // Proceed with gain correction only if it succeeds, and set flag accordingly.
+  bool frame1_gain_reference = true;
+  if (gain_correction) {
+    cv::Mat reference_frame = frame1;
+    cv::Mat input_frame = frame2;
+    float reference_mean = data1.mean_intensity;
+    float input_mean = data2.mean_intensity;
+    // Use brighter frame as reference, if requested.
+    if (options_.gain_correction_bright_reference() &&
+        data1.mean_intensity < data2.mean_intensity) {
+      std::swap(input_frame, reference_frame);
+      std::swap(input_mean, reference_mean);
+      frame1_gain_reference = false;
+    }
+
+    // Gain correct and update output with success result.
+    gain_correction =
+        GainCorrectFrame(reference_frame, input_frame, reference_mean,
+                         input_mean, gain_image_.get());
+  }
+
+#if CV_MAJOR_VERSION == 3
+  // OpenCV changed how window size gets specified from our radius setting
+  // < 2.2 to diameter in 2.2+.
+  const cv::Size cv_window_size(track_win_size * 2 + 1, track_win_size * 2 + 1);
+
+  cv::TermCriteria cv_criteria(
+      cv::TermCriteria::COUNT + cv::TermCriteria::EPS,
+      options_.tracking_options().tracking_iterations(), 0.02f);
+
+  cv::_InputArray input_frame1(data1.pyramid);
+  cv::_InputArray input_frame2(data2.pyramid);
+#endif
+
+  // Using old c-interface for OpenCV's 2.2 tracker.
+  CvTermCriteria criteria;
+  criteria.type = CV_TERMCRIT_EPS | CV_TERMCRIT_ITER;
+  criteria.max_iter = options_.tracking_options().tracking_iterations();
+  criteria.epsilon = 0.02f;
+
+  feature_track_error_.resize(num_features);
+  feature_status_.resize(num_features);
+  if (use_cv_tracking_) {
+#if CV_MAJOR_VERSION == 3
+    if (gain_correction) {
+      if (!frame1_gain_reference) {
+        input_frame1 = cv::_InputArray(*gain_image_);
+      } else {
+        input_frame2 = cv::_InputArray(*gain_image_);
+      }
+    }
+
+    if (options_.tracking_options().klt_tracker_implementation() ==
+        TrackingOptions::KLT_OPENCV) {
+      cv::calcOpticalFlowPyrLK(input_frame1, input_frame2, features1, features2,
+                               feature_status_, feature_track_error_,
+                               cv_window_size, pyramid_levels_, cv_criteria,
+                               tracking_flags);
+    } else {
+      LOG(ERROR) << "Tracking method unspecified.";
+      return;
+    }
+#endif
+  } else {
+    LOG(ERROR) << "only cv tracking is supported.";
+    return;
+  }
+
+  // Inherit corner response and octaves from extracted features.
+  corner_responses2 = corner_responses1;
+  octaves2 = octaves1;
+
+  // Remember mapping from destination to source index;
+  // Fill feature_source_map with 0 ... num_features - 1.
+  feature_source_map.resize(num_features);
+  std::iota(feature_source_map.begin(), feature_source_map.end(), 0);
+
+  // Init track ids.
+  track_ids2 = std::vector<int>(num_features, -1);  // Unassigned by default.
+
+  // Select features that were successfully tracked from data1 to data2.
+  int num_valid_features = InplaceFeatureSelection(
+      to_data_ptr, {&feature_source_map}, {&feature_track_error_},
+      [this](int i) -> bool { return feature_status_[i] == 1; });
+
+  // Init neighborhoods if needed.
+  if (IsVerifyLongFeatures()) {
+    // data1 should be initialized at this point.
+    CHECK(data1.neighborhoods != nullptr);
+    if (data2.neighborhoods == nullptr) {
+      data2.neighborhoods.reset(new std::vector<cv::Mat>());
+      data2.neighborhoods->resize(num_valid_features);
+    }
+  }
+
+  // Remember last id threshold before assigning new ones.
+  const int prev_id_threshold =
+      long_track_data_ != nullptr ? long_track_data_->LastTrackId() : 0;
+
+  std::vector<int> ids_to_verify;
+  std::vector<int> motions_to_verify;
+  if (long_track_data_) {
+    // Compute motion for each feature and average magnitude.
+    std::vector<float> motion_mag(num_valid_features, 0);
+    float avg_motion_mag = 0;
+
+    for (int i = 0; i < num_valid_features; ++i) {
+      const int match_idx = feature_source_map[i];
+      const cv::Point2f diff =
+          (features2[i] - features1[match_idx]) * downsample_scale_;
+      const float norm = std::abs(diff.x) + std::abs(diff.y);
+      motion_mag[i] = norm;
+      avg_motion_mag += norm;
+    }
+
+    if (num_valid_features > 0) {
+      avg_motion_mag /= num_valid_features;
+    }
+
+    bool is_duplicated = num_valid_features > 0 && avg_motion_mag < kZeroMotion;
+
+    const float max_acc = options_.max_long_feature_acceleration();
+
+    // Minimum motion for stable ratio test.
+    constexpr float kMinMotion = 1.0f;
+
+    // Initialize all track_ids of data2.
+    int num_restarted_tracks = 0;
+    for (int i = 0; i < num_valid_features; ++i) {
+      const int match_idx = feature_source_map[i];
+      if (track_ids1[match_idx] < 0) {
+        const float motion_mag_arg = is_duplicated ? -1 : motion_mag[i];
+        track_ids1[match_idx] =
+            long_track_data_->CreateNextTrackId(min_frame, motion_mag_arg);
+        track_ids2[i] = track_ids1[match_idx];
+      } else if (!is_duplicated) {
+        // Check for change in acceleration for non duplicated frame.
+        const float prev_motion_mag =
+            long_track_data_->MotionMagForId(track_ids1[match_idx]);
+
+        // Test for acceleration or deacceleration, but only if previous
+        // motion was known (from non duplicated frame).
+        if (prev_motion_mag >= 0 &&
+            (motion_mag[i] > max_acc * std::max(kMinMotion, prev_motion_mag) ||
+             prev_motion_mag > max_acc * std::max(kMinMotion, motion_mag[i]))) {
+          // Start new track or flag for testing.
+          if (options_.verify_long_feature_acceleration()) {
+            // Keep track id and update motion. If feature is removed and
+            // therefore motion was updated incorrectly, motion wont be used
+            // again.
+            track_ids2[i] = track_ids1[match_idx];
+            long_track_data_->UpdateMotion(track_ids1[match_idx],
+                                           motion_mag[i]);
+            ids_to_verify.push_back(i);
+            motions_to_verify.push_back(motion_mag[i]);
+          } else {
+            ++num_restarted_tracks;
+            track_ids2[i] =
+                long_track_data_->CreateNextTrackId(min_frame, motion_mag[i]);
+          }
+        } else {
+          long_track_data_->UpdateMotion(track_ids1[match_idx], motion_mag[i]);
+          track_ids2[i] = track_ids1[match_idx];
+        }
+      } else {
+        // Duplicated frame with existing track, re-use id without updating
+        // motion.
+        track_ids2[i] = track_ids1[match_idx];
+      }
+
+      if (IsVerifyLongFeatures()) {
+        cv::Mat& mat1 = (*data1.neighborhoods)[match_idx];
+        cv::Mat& mat2 = (*data2.neighborhoods)[i];
+        if (mat1.empty()) {
+          data1.ExtractPatch(features1[match_idx], track_win_size, &mat1);
+        }
+        data2.ExtractPatch(features2[i], track_win_size, &mat2);
+      }
+    }
+    VLOG(1) << "Restarted tracks: " << num_restarted_tracks;
+  }  // end long track data.
+
+  if (!ids_to_verify.empty() &&
+      ids_to_verify.size() <
+          options_.verify_long_feature_trigger_ratio() * num_valid_features) {
+    // Reset feature ids, instead of triggering verification.
+    VLOG(1) << "Canceling feature verification, resetting tracks: "
+            << ids_to_verify.size() << " of " << num_valid_features;
+    for (int k = 0; k < ids_to_verify.size(); ++k) {
+      const int id = ids_to_verify[k];
+      track_ids2[id] =
+          long_track_data_->CreateNextTrackId(min_frame, motions_to_verify[k]);
+    }
+    ids_to_verify.clear();
+    motions_to_verify.clear();
+  }
+
+  // Distance between source location x and tracked-back f^(-1)(y) location
+  // starting at the tracked location y = f(x): x - f^(-1)(f(x)).
+  // Close to zero for tracks that could be verified.
+  std::vector<float> verify_distance(num_valid_features, 0);
+
+  // Compile list of indices we need to verify via backtracking.
+  std::vector<int> feat_ids_to_verify;
+  if (options_.verify_features()) {
+    feat_ids_to_verify.resize(num_valid_features);
+    std::iota(feat_ids_to_verify.begin(), feat_ids_to_verify.end(), 0);
+  } else if (options_.verify_long_feature_acceleration()) {
+    feat_ids_to_verify = ids_to_verify;
+  }
+
+  VLOG(1) << "Verifying: " << feat_ids_to_verify.size() << " out of "
+          << num_valid_features;
+  if (!feat_ids_to_verify.empty()) {
+    const int num_to_verify = feat_ids_to_verify.size();
+    std::vector<cv::Point2f> verify_features;
+    std::vector<cv::Point2f> verify_features_tracked;
+    verify_features.reserve(num_to_verify);
+    verify_features_tracked.reserve(num_to_verify);
+    for (int idx : feat_ids_to_verify) {
+      const int match_idx = feature_source_map[idx];
+      verify_features.push_back(features2[idx]);
+      verify_features_tracked.push_back(features1[match_idx]);
+    }
+
+    tracking_flags |= cv::OPTFLOW_USE_INITIAL_FLOW;
+
+    // Unused track error.
+    std::vector<float> verify_track_error(num_to_verify);
+    feature_status_.resize(num_to_verify);
+
+    if (use_cv_tracking_) {
+#if CV_MAJOR_VERSION == 3
+      cv::calcOpticalFlowPyrLK(input_frame2, input_frame1, verify_features,
+                               verify_features_tracked, feature_status_,
+                               verify_track_error, cv_window_size,
+                               pyramid_levels_, cv_criteria, tracking_flags);
+#endif
+    } else {
+      LOG(ERROR) << "only cv tracking is supported.";
+      return;
+    }
+
+    // Check feature destinations, that when tracked back to from data1 to
+    // data2, don't differ more than a threshold from their original location
+    // in data1.
+    std::vector<uchar> verify_result(num_valid_features, 1);  // 1 for accept.
+    int num_accepted = 0;
+    for (int k = 0; k < num_to_verify; ++k) {
+      const int idx = feat_ids_to_verify[k];
+      const int match_idx = feature_source_map[idx];
+      const cv::Point2f diff =
+          features1[match_idx] - verify_features_tracked[k];
+      const float dist = std::sqrt(diff.dot(diff));
+      verify_distance[idx] = dist;
+      verify_result[idx] =
+          dist < options_.verification_distance() && feature_status_[k] == 1;
+      num_accepted += verify_result[idx];
+    }
+    VLOG(1) << "Accepted number of verified features " << num_accepted;
+
+    num_valid_features = InplaceFeatureSelection(
+        to_data_ptr, {&feature_source_map},
+        {&feature_track_error_, &verify_distance},
+        [this, verify_result](int i) -> bool { return verify_result[i]; });
+  }
+
+  if (frac_long_features_rejected != nullptr) {
+    *frac_long_features_rejected = 0;
+  }
+
+  // Verify long features if requested.
+  if (IsVerifyLongFeatures() && num_valid_features > 0) {
+    // track_win_size > 0, checked above.
+    const float denom = 1.0f / (track_win_size * track_win_size * 255.0);
+    int new_tracks = 0;
+    for (int track_id : data2.track_ids) {
+      if (track_id > prev_id_threshold) {
+        ++new_tracks;
+      }
+    }
+
+    // Select features that don't differ from the original extracted path by
+    // more than a threshold. If that is the case, propagate original patch from
+    // data1 to data2. Used for subsequent verifications.
+    int num_selected_features = InplaceFeatureSelection(
+        to_data_ptr, {&feature_source_map},
+        {&feature_track_error_, &verify_distance},
+        [this, denom, &data1, &data2, &feature_source_map](int i) -> bool {
+          cv::Mat& mat1 = (*data1.neighborhoods)[feature_source_map[i]];
+          cv::Mat& mat2 = (*data2.neighborhoods)[i];
+          const float norm = cv::norm(mat1, mat2, cv::NORM_L1) * denom;
+          if (norm < options_.long_feature_verification_threshold()) {
+            // Store original patch.
+            mat2 = mat1;
+            return true;
+          } else {
+            return false;
+          }
+        });
+
+    if (frac_long_features_rejected) {
+      // There needs to be a reasonable number of features to test this
+      // criteria.
+      constexpr int kMinPrevValidFeatures = 10;
+      if (num_valid_features > kMinPrevValidFeatures) {
+        *frac_long_features_rejected =
+            (1.0f - num_selected_features * (1.0f / num_valid_features));
+        // Note: One could add number of new tracks here, i.e.
+        // += new_tracks / num_valid_features.
+        // Concern is that is very shaky video where a lot of tracks get created
+        // this criteria overpowers the current one.
+      }
+    }
+    num_valid_features = num_selected_features;
+  }
+
+  // How many tracking steps were used to generate these features.
+  data2.last_feature_extraction_time = 1 + data1.last_feature_extraction_time;
+  // Features are resized, so they cannot be considered initialized.
+  data2.features_initialized = false;
+
+  // Copy verified features to results.
+  results.reserve(num_valid_features);
+  for (int i = 0; i < num_valid_features; ++i) {
+    const int match_idx = feature_source_map[i];
+    const Vector2_f point1 =
+        Vector2_f(features1[match_idx].x, features1[match_idx].y) *
+        downsample_scale_;
+    const Vector2_f point2 =
+        Vector2_f(features2[i].x, features2[i].y) * downsample_scale_;
+
+    if (PointOutOfBound(point1, original_width_, original_height_) ||
+        PointOutOfBound(point2, original_width_, original_height_)) {
+      continue;
+    }
+
+    const Vector2_f flow = point2 - point1;
+    results.emplace_back(point1, flow, feature_track_error_[i],
+                         corner_responses2[i], octaves2[i], track_ids2[i],
+                         verify_distance[i]);
+
+    if (long_track_data_ != nullptr && track_ids1[match_idx] != track_ids2[i]) {
+      results.back().flags |= RegionFlowFeature::FLAG_BROKEN_TRACK;
+    }
+
+    if (IsVerifyLongFeatures()) {
+      const cv::Mat& orig_patch = (*data1.neighborhoods)[match_idx];
+      results.back().orig_neighborhood.reset(new cv::Mat(orig_patch));
+    }
+
+    if (data1.orb.computed) {
+      results.back().descriptors = data1.orb.descriptors.row(match_idx);
+    }
+  }
+}
+
+void RegionFlowComputation::AppendUniqueFeaturesSorted(
+    const TrackedFeatureView& to_be_added, TrackedFeatureView* features) const {
+  for (auto feature_ptr : to_be_added) {
+    auto insert_pos =
+        std::lower_bound(features->begin(), features->end(), feature_ptr);
+    if (insert_pos == features->end() || *insert_pos != feature_ptr) {
+      // Not present yet.
+      features->insert(insert_pos, feature_ptr);
+      feature_ptr->irls_weight = 1;
+    }
+
+    // Present, increase score.
+    ++feature_ptr->irls_weight;
+  }
+}
+
+void RegionFlowComputation::InitializeFeatureLocationsFromTransform(
+    int from, int to, const Homography& transform) {
+  const int index1 = data_queue_.size() + from - 1;
+  const int index2 = data_queue_.size() + to - 1;
+  FrameTrackingData& data1 = *data_queue_[index1];
+  FrameTrackingData* data2 = data_queue_[index2].get();
+
+  data2->features = data1.features;
+
+  for (cv::Point2f& feature : data2->features) {
+    const Vector2_f trans_pt =
+        TransformPoint(transform, Vector2_f(feature.x, feature.y));
+    feature = cv::Point2f(trans_pt.x(), trans_pt.y());
+  }
+
+  data2->source = &data1;
+  data2->features_initialized = true;
+}
+
+void RegionFlowComputation::InitializeFeatureLocationsFromPreviousResult(
+    int from, int to) {
+  CHECK_NE(from, to) << "Cannot initialize FrameTrackingData from itself.";
+
+  const int index1 = data_queue_.size() + from - 1;
+  const int index2 = data_queue_.size() + to - 1;
+  CHECK_GE(index1, 0);
+  CHECK_LT(index1, data_queue_.size());
+  CHECK_GE(index2, 0);
+  CHECK_LT(index2, data_queue_.size());
+  const FrameTrackingData& data1 = *data_queue_[index1];
+  FrameTrackingData* data2 = data_queue_[index2].get();
+  CHECK(data1.source != nullptr);
+
+  if (!data1.features_initialized) {
+    data2->features = data1.source->features;
+    for (int k = 0; k < data1.feature_source_map.size(); ++k) {
+      data2->features[data1.feature_source_map[k]] = data1.features[k];
+    }
+  } else {
+    data2->features = data1.features;
+    CHECK_EQ(data1.features.size(), data1.source->features.size());
+  }
+  data2->source = data1.source;
+  data2->features_initialized = true;
+}
+
+namespace {
+
+void ComputeMeanForRegionFlow(RegionFlowFrame::RegionFlow* region_flow) {
+  // Compute mean for this region.
+  Vector2_f centroid(0, 0);
+  Vector2_f mean_flow(0, 0);
+
+  for (const auto& feature : region_flow->feature()) {
+    centroid += Vector2_f(feature.x(), feature.y());
+    mean_flow += Vector2_f(feature.dx(), feature.dy());
+  }
+
+  const float denom = 1.0f / region_flow->feature_size();
+  centroid *= denom;
+  mean_flow *= denom;
+
+  region_flow->set_centroid_x(centroid.x());
+  region_flow->set_centroid_y(centroid.y());
+  region_flow->set_flow_x(mean_flow.x());
+  region_flow->set_flow_y(mean_flow.y());
+}
+
+}  // namespace.
+
+void RegionFlowComputation::ComputeBlockBasedFlow(
+    TrackedFeatureList* feature_list,
+    TrackedFeatureView* inlier_features) const {
+  MEASURE_TIME << "Block based flow";
+  TrackedFeatureView inlier_view;
+  inlier_view.reserve(feature_list->size());
+
+  const float frame_diam = hypot(original_width_, original_height_);
+  const float max_magnitude_threshold =
+      frame_diam * options_.max_magnitude_threshold_ratio();
+  float sq_max_magnitude_threshold =
+      max_magnitude_threshold * max_magnitude_threshold;
+
+  // Refine threshold, for magnitudes over N times the mean motion, if
+  // requested.
+  if (!feature_list->empty() && options_.median_magnitude_bounds() > 0) {
+    std::vector<float> motion_magnitudes;
+    motion_magnitudes.reserve(feature_list->size());
+    for (auto feature : *feature_list) {
+      motion_magnitudes.push_back(feature.flow.Norm2());
+    }
+    auto median_iter = motion_magnitudes.begin() + motion_magnitudes.size() / 2;
+    std::nth_element(motion_magnitudes.begin(), median_iter,
+                     motion_magnitudes.end());
+    const float median = *median_iter;
+    // Only apply if non-zero motion is present (at least one pixel).
+    if (median > 1.0) {
+      const float outlier_threshold = median *
+                                      options_.median_magnitude_bounds() *
+                                      options_.median_magnitude_bounds();
+      // Refine threshold.
+      sq_max_magnitude_threshold =
+          min(sq_max_magnitude_threshold, outlier_threshold);
+    }
+  }
+
+  for (auto& feature : *feature_list) {
+    // Skip features with exceeding motions.
+    if (feature.flow.Norm2() < sq_max_magnitude_threshold) {
+      inlier_view.push_back(&feature);
+      inlier_view.back()->num_bins = 0;
+    }
+  }
+
+  const int num_overlaps = options_.fast_estimation_overlap_grids();
+  const int num_grids = block_levels_ * num_overlaps * num_overlaps;
+
+  // Put all features into region bins.
+  std::vector<TrackedFeatureMap> grid_feature_views(num_grids);
+
+  int grid_idx = 0;
+  int block_width = block_width_;
+  int block_height = block_height_;
+
+  for (int level = 0; level < block_levels_; ++level) {
+    const float inv_block_width = 1.0f / block_width;
+    const float inv_block_height = 1.0f / block_height;
+
+    for (int overlap_y = 0; overlap_y < num_overlaps; ++overlap_y) {
+      // |    |    |    |  <- unshifted
+      // | |    |    |  |  <- shifted
+      //   ^
+      //   block_height * overlap_y / num_overlaps  := t
+      // We need to add block_height - t to each value, so that at t
+      // we get bin 1.
+      // Special case is overlap_y == 0 -> shift = 0.
+      const int grid_shift_y =
+          overlap_y == 0
+              ? 0
+              : (block_height - block_height * overlap_y / num_overlaps);
+
+      for (int overlap_x = 0; overlap_x < num_overlaps;
+           ++overlap_x, ++grid_idx) {
+        const int grid_shift_x =
+            overlap_x == 0
+                ? 0
+                : (block_width - block_width * overlap_x / num_overlaps);
+
+        const int bins_per_row =
+            std::ceil((original_width_ + grid_shift_x) * inv_block_width);
+        const int bins_per_column =
+            std::ceil((original_height_ + grid_shift_y) * inv_block_height);
+        TrackedFeatureMap& feature_view = grid_feature_views[grid_idx];
+        feature_view.resize(bins_per_row * bins_per_column);
+
+        for (auto feature_ptr : inlier_view) {
+          const int x = feature_ptr->point.x() + 0.5f + grid_shift_x;
+          const int y = feature_ptr->point.y() + 0.5f + grid_shift_y;
+          const int block_x = x * inv_block_width;
+          const int block_y = y * inv_block_height;
+
+          int block_id = block_y * bins_per_row + block_x;
+          feature_view[block_id].push_back(feature_ptr);
+        }
+      }
+    }
+
+    // We use smallest block width and height later on.
+    if (level + 1 < block_levels_) {
+      block_width = (block_width + 1) / 2;
+      block_height = (block_height + 1) / 2;
+    }
+  }
+
+  for (int k = 0; k < num_grids; ++k) {
+    TrackedFeatureMap& region_features = grid_feature_views[k];
+    const int min_inliers = GetMinNumFeatureInliers(region_features);
+    for (TrackedFeatureView& feature_view : region_features) {
+      if (feature_view.size() >= min_inliers) {
+        for (auto feature_ptr : feature_view) {
+          ++feature_ptr->num_bins;
+        }
+      } else {
+        feature_view.clear();
+      }
+    }
+  }
+
+  if (num_grids == 1) {
+    TrackedFeatureView all_inliers;
+    DetermineRegionFlowInliers(grid_feature_views[0], &all_inliers);
+    AppendUniqueFeaturesSorted(all_inliers, inlier_features);
+  } else {
+    // Get all features across all grids without duplicates.
+    std::vector<TrackedFeatureView> grid_inliers(num_grids);
+    ParallelFor(
+        0, num_grids, 1,
+        [&grid_feature_views, &grid_inliers, this](const BlockedRange& range) {
+          for (int k = range.begin(); k < range.end(); ++k) {
+            grid_inliers[k].reserve(grid_feature_views[k].size());
+            DetermineRegionFlowInliers(grid_feature_views[k], &grid_inliers[k]);
+          }
+        });
+
+    for (int grid = 0; grid < num_grids; ++grid) {
+      AppendUniqueFeaturesSorted(grid_inliers[grid], inlier_features);
+    }
+  }
+}
+
+void RegionFlowComputation::DetermineRegionFlowInliers(
+    const TrackedFeatureMap& region_feature_map,
+    TrackedFeatureView* inliers) const {
+  CHECK(inliers);
+  inliers->clear();
+
+  // Run RANSAC on each region.
+  const int max_iterations = options_.ransac_rounds_per_region();
+  float absolute_err_threshold =
+      max<float>(options_.absolute_inlier_error_threshold(),
+                 options_.frac_inlier_error_threshold() *
+                     hypot(original_width_, original_height_));
+  absolute_err_threshold *= absolute_err_threshold;
+
+  // Save ptr to each inlier feature.
+  TrackedFeatureView inlier_set;
+  TrackedFeatureView best_inlier_set;
+  unsigned int seed = 900913;  // = Google in leet :)
+  std::default_random_engine rand_gen(seed);
+
+  const int min_features = GetMinNumFeatureInliers(region_feature_map);
+
+  for (const TrackedFeatureView& region_features : region_feature_map) {
+    if (region_features.empty()) {
+      continue;
+    }
+    // Select top N inlier sets.
+    int loop_count = options_.top_inlier_sets();
+
+    // Get local copy if necessary and sort.
+    const TrackedFeatureView* all_features = nullptr;
+    TrackedFeatureView all_features_storage;
+
+    if (loop_count > 1) {
+      all_features_storage = region_features;
+      all_features = &all_features_storage;
+      std::sort(all_features_storage.begin(), all_features_storage.end());
+    } else {
+      all_features = &region_features;
+    }
+
+    const int num_features = all_features->size();
+
+    // Extract inlier sets as long as more than 20% of original features remain.
+    int last_inlier_set_size = 0;
+
+    while (all_features->size() >= max(min_features, num_features / 5) &&
+           loop_count-- > 0) {
+      best_inlier_set.clear();
+      std::uniform_int_distribution<> distribution(0, all_features->size() - 1);
+
+      for (int i = 0; i < max_iterations; ++i) {
+        // Pick a random vector.
+        const int rand_idx = distribution(rand_gen);
+        Vector2_f vec = (*all_features)[rand_idx]->flow;
+
+        float relative_err_threshold =
+            options_.relative_inlier_error_threshold() * vec.Norm();
+        relative_err_threshold *= relative_err_threshold;
+        const float err_threshold =
+            std::max(relative_err_threshold, absolute_err_threshold);
+
+        // Determine inlier vectors.
+        inlier_set.clear();
+
+        for (auto feature_ptr : *all_features) {
+          if ((feature_ptr->flow - vec).Norm2() < err_threshold) {
+            inlier_set.push_back(feature_ptr);
+          }
+        }
+
+        if (inlier_set.size() >= best_inlier_set.size()) {
+          best_inlier_set.swap(inlier_set);
+        }
+      }
+
+      if (best_inlier_set.size() >=
+          max(options_.min_feature_inliers(), last_inlier_set_size / 2)) {
+        last_inlier_set_size = best_inlier_set.size();
+        inliers->insert(inliers->end(), best_inlier_set.begin(),
+                        best_inlier_set.end());
+
+        if (loop_count > 0) {
+          // Remove inliers from all feature set.
+          TrackedFeatureView remaining_features;
+          std::set_difference(all_features_storage.begin(),
+                              all_features_storage.end(),
+                              best_inlier_set.begin(), best_inlier_set.end(),
+                              back_inserter(remaining_features));
+          all_features_storage.swap(remaining_features);
+        }
+      } else {
+        break;
+      }
+    }
+  }  // end traverse region bins.
+}
+
+int RegionFlowComputation::GetMinNumFeatureInliers(
+    const TrackedFeatureMap& region_feature_map) const {
+  // Determine number of features.
+  int total_features = 0;
+  for (const TrackedFeatureView& region_features : region_feature_map) {
+    total_features += region_features.size();
+  }
+
+  CHECK(!region_feature_map.empty())
+      << "Empty grid passed. Check input dimensions";
+
+  const float threshold =
+      std::max<int>(options_.min_feature_inliers(),
+                    options_.relative_min_feature_inliers() * total_features /
+                        region_feature_map.size());
+
+  return threshold;
+}
+
+void RegionFlowComputation::RegionFlowFeatureListToRegionFlow(
+    const RegionFlowFeatureList& feature_list, RegionFlowFrame* frame) const {
+  CHECK(frame != nullptr);
+
+  frame->set_num_total_features(feature_list.feature_size());
+  frame->set_unstable_frame(feature_list.unstable());
+  if (feature_list.has_blur_score()) {
+    frame->set_blur_score(feature_list.blur_score());
+  }
+  frame->set_frame_width(feature_list.frame_width());
+  frame->set_frame_height(feature_list.frame_height());
+
+  RegionFlowFrame::BlockDescriptor* block_descriptor =
+      frame->mutable_block_descriptor();
+
+  // Compute minimum block size.
+  int min_block_width = block_width_;
+  int min_block_height = block_height_;
+
+  for (int level = 0; level < block_levels_; ++level) {
+    if (level + 1 < block_levels_) {
+      min_block_width = (min_block_width + 1) / 2;
+      min_block_height = (min_block_height + 1) / 2;
+    }
+  }
+  block_descriptor->set_block_width(min_block_width);
+  block_descriptor->set_block_height(min_block_height);
+  const int bins_per_row =
+      std::ceil(original_width_ * (1.0f / min_block_width));
+  const int bins_per_col =
+      std::ceil(original_height_ * (1.0f / min_block_height));
+  block_descriptor->set_num_blocks_x(bins_per_row);
+  block_descriptor->set_num_blocks_y(bins_per_col);
+
+  const int num_regions = bins_per_row * bins_per_col;
+  frame->mutable_region_flow()->Reserve(num_regions);
+  for (int k = 0; k < num_regions; ++k) {
+    frame->add_region_flow()->set_region_id(k);
+  }
+
+  // Add feature according smallest block width and height to regions.
+  for (auto feature : feature_list.feature()) {
+    const int x = static_cast<int>(feature.x());
+    const int y = static_cast<int>(feature.y());
+    // Guard, in case equation is wrong.
+    int region_id = min(
+        num_regions, y / min_block_height * bins_per_row + x / min_block_width);
+    *frame->mutable_region_flow(region_id)->add_feature() = feature;
+  }
+
+  for (auto& region_flow : *frame->mutable_region_flow()) {
+    ComputeMeanForRegionFlow(&region_flow);
+  }
+}
+
+void RegionFlowComputation::InitializeRegionFlowFeatureList(
+    RegionFlowFeatureList* region_flow_feature_list) const {
+  region_flow_feature_list->set_frame_width(original_width_);
+  region_flow_feature_list->set_frame_height(original_height_);
+  if (curr_blur_score_ >= 0.0f) {
+    region_flow_feature_list->set_blur_score(curr_blur_score_);
+  }
+
+  region_flow_feature_list->set_distance_from_border(std::max(
+      options_.patch_descriptor_radius(), options_.distance_from_border()));
+  region_flow_feature_list->set_long_tracks(long_track_data_ != nullptr);
+}
+
+namespace {
+
+bool IsPointWithinBounds(const Vector2_f& pt, int bounds, int frame_width,
+                         int frame_height) {
+  // Ensure stability under float -> rounding operations.
+  if (pt.x() - 0.5f >= bounds && pt.x() + 0.5f <= frame_width - 1 - bounds &&
+      pt.y() - 0.5f >= bounds && pt.y() + 0.5f <= frame_height - 1 - bounds) {
+    return true;
+  } else {
+    return false;
+  }
+}
+
+}  // namespace.
+
+float RegionFlowComputation::TrackedFeatureViewToRegionFlowFeatureList(
+    const TrackedFeatureView& region_feature_view,
+    TrackedFeatureList* flattened_feature_list,
+    RegionFlowFeatureList* region_flow_feature_list) const {
+  const int border = region_flow_feature_list->distance_from_border();
+
+  region_flow_feature_list->mutable_feature()->Reserve(
+      region_feature_view.size());
+
+  float sq_flow_sum = 0;
+
+  for (auto feature_ptr : region_feature_view) {
+    const Vector2_f& location = feature_ptr->point;
+    const Vector2_f& match_location = feature_ptr->point + feature_ptr->flow;
+
+    if (border > 0) {
+      if (!IsPointWithinBounds(location, border, original_width_,
+                               original_height_) ||
+          !IsPointWithinBounds(match_location, border, original_width_,
+                               original_height_)) {
+        continue;
+      }
+    }
+
+    const Vector2_f& flow = feature_ptr->flow;
+    sq_flow_sum += flow.Norm2();
+
+    RegionFlowFeature* feature = region_flow_feature_list->add_feature();
+    feature->set_x(location.x());
+    feature->set_y(location.y());
+    feature->set_dx(flow.x());
+    feature->set_dy(flow.y());
+    feature->set_tracking_error(feature_ptr->tracking_error);
+    feature->set_corner_response(feature_ptr->corner_response);
+
+    if (long_track_data_ != nullptr) {
+      feature->set_track_id(feature_ptr->track_id);
+    }
+    feature->set_flags(feature_ptr->flags);
+
+    switch (options_.irls_initialization()) {
+      case RegionFlowComputationOptions::INIT_UNIFORM:
+        feature->set_irls_weight(1.0f);
+        break;
+
+      case RegionFlowComputationOptions::INIT_CONSISTENCY:
+        feature->set_irls_weight(2.0f * feature_ptr->irls_weight /
+                                 feature_ptr->num_bins);
+        break;
+    }
+
+    // Remember original TrackedFeature if requested.
+    if (flattened_feature_list) {
+      flattened_feature_list->push_back(*feature_ptr);
+    }
+
+    if (feature_ptr->descriptors.cols != 0) {
+      feature->mutable_binary_feature_descriptor()->set_data(
+          static_cast<void*>(feature_ptr->descriptors.data),
+          feature_ptr->descriptors.cols);
+    }
+  }
+
+  const int num_features = region_flow_feature_list->feature_size();
+  float avg_motion = 0;
+  if (num_features > 0) {
+    avg_motion = std::sqrt(sq_flow_sum / num_features);
+    if (avg_motion < kZeroMotion) {
+      region_flow_feature_list->set_is_duplicated(true);
+    }
+  }
+
+  return avg_motion;
+}
+
+// Check if enough features as specified by
+// MotionStabilizationOptions::min_feature_requirement are present, and if
+// features cover a sufficient area.
+bool RegionFlowComputation::HasSufficientFeatures(
+    const RegionFlowFeatureList& feature_list) {
+  const int area_size = options_.min_feature_cover_grid();
+  const float scaled_width = static_cast<float>(area_size) / original_width_;
+  const float scaled_height = static_cast<float>(area_size) / original_height_;
+  std::vector<int> area_mask(area_size * area_size);
+
+  for (const auto& feature : feature_list.feature()) {
+    int x = feature.x() * scaled_width;
+    int y = feature.y() * scaled_height;
+    area_mask[y * area_size + x] = 1;
+  }
+
+  int covered_bins = std::accumulate(area_mask.begin(), area_mask.end(), 0);
+  float area_covered =
+      static_cast<float>(covered_bins) / (area_size * area_size);
+
+  const int num_features = feature_list.feature_size();
+  bool has_sufficient_features =
+      num_features >= options_.min_feature_requirement() &&
+      area_covered > options_.min_feature_cover();
+
+  if (has_sufficient_features) {
+    VLOG(1) << "Sufficient features: " << num_features;
+  } else {
+    VLOG(1) << "!! Insufficient features: " << num_features
+            << " required: " << options_.min_feature_requirement()
+            << " cover: " << area_covered
+            << " required: " << options_.min_feature_cover();
+  }
+
+  VLOG(1) << (has_sufficient_features ? "Has sufficient " : "Insufficient ")
+          << " features: " << num_features;
+
+  return has_sufficient_features;
+}
+
+int RegionFlowComputation::PyramidLevelsFromTrackDistance(
+    float track_distance) {
+  // The number of pyramid levels l is chosen such that
+  // 2^l * tracking_window_size / 2 >= track_distance.
+  int pyramid_levels =
+      std::ceil(std::log2(std::max(track_distance, 1.0f) * 2.f /
+                          options_.tracking_options().tracking_window_size()));
+  // The maximum pyramid level that we are targeting. The minimum size in the
+  // pyramid is intended to be 2x2.
+  const int max_pyramid_levels =
+      max<int>(1, std::log2(min<float>(frame_height_, frame_width_)) - 1);
+  pyramid_levels = min(max_pyramid_levels, max(pyramid_levels, 2));
+  return pyramid_levels;
+}
+
+void RegionFlowComputation::ComputeBlurMask(const cv::Mat& input,
+                                            cv::Mat* min_eig_vals,
+                                            cv::Mat* mask) {
+  MEASURE_TIME << "Computing blur score";
+  const auto& blur_options = options_.blur_score_options();
+  cv::cornerMinEigenVal(input, *corner_values_, 3);
+
+  // Create over-exposure mask to mask out corners in high exposed areas.
+  // Reason is, that motion blur does not affect lights in the same manner as
+  // diffuse surfaces due to the limited dynamic range of the camera.
+  // Blurring a light tends not to lower the corner score but simply changes the
+  // shape of the light with the corner score being unaffected.
+  cv::compare(input, 245, *corner_mask_, cv::CMP_GE);
+
+  // Dilate reads out of bound values.
+  if (corner_mask_->rows > 5 && corner_mask_->cols > 5) {
+    cv::Mat dilate_domain(*corner_mask_, cv::Range(2, corner_mask_->rows - 2),
+                          cv::Range(2, corner_mask_->cols - 2));
+    cv::Mat kernel(5, 5, CV_8U);
+    kernel.setTo(1.0);
+    cv::dilate(dilate_domain, dilate_domain, kernel);
+  }
+  corner_values_->setTo(cv::Scalar(0), *corner_mask_);
+
+  // Box filter corner score to diffuse and suppress noise.
+  cv::boxFilter(
+      *corner_values_, *corner_filtered_, CV_32F,
+      cv::Size(blur_options.box_filter_diam(), blur_options.box_filter_diam()));
+
+  // Determine maximum cornerness in robust manner over bins.
+  // Specular reflections or colored lights might not be filtered out by
+  // the above operation.
+  const int max_blocks = 8;
+  const int block_width =
+      std::ceil(static_cast<float>(corner_filtered_->cols) / max_blocks);
+  const int block_height =
+      std::ceil(static_cast<float>(corner_filtered_->rows) / max_blocks);
+  std::vector<float> block_maximums;
+  for (int block_y = 0; block_y < max_blocks; ++block_y) {
+    if (block_y * block_height >= corner_filtered_->rows) {
+      continue;
+    }
+
+    cv::Range y_range(block_y * block_height, min((block_y + 1) * block_height,
+                                                  corner_filtered_->rows));
+
+    for (int block_x = 0; block_x < max_blocks; ++block_x) {
+      if (block_x * block_width >= corner_filtered_->cols) {
+        continue;
+      }
+
+      cv::Range x_range(block_x * block_width, min((block_x + 1) * block_width,
+                                                   corner_filtered_->cols));
+      cv::Mat block(*corner_filtered_, y_range, x_range);
+      double min_val, max_val;
+      cv::minMaxLoc(block, &min_val, &max_val);
+      block_maximums.push_back(max_val);
+    }
+  }
+
+  auto block_median =
+      block_maximums.begin() + static_cast<int>(block_maximums.size() * 0.75);
+  std::nth_element(block_maximums.begin(), block_median, block_maximums.end());
+  const float max_val = *block_median;
+
+  const float thresh =
+      max<float>(blur_options.absolute_cornerness_threshold(),
+                 blur_options.relative_cornerness_threshold() * max_val);
+
+  cv::compare(*corner_filtered_, thresh, *corner_mask_, cv::CMP_GE);
+}
+
+float RegionFlowComputation::ComputeBlurScore(const cv::Mat& input) {
+  // TODO: Already computed during feature extraction, re-use!
+  cv::cornerMinEigenVal(input, *corner_values_, 3);
+  ComputeBlurMask(input, corner_values_.get(), corner_mask_.get());
+
+  // Compute median corner score over masked area.
+  std::vector<float> corner_score;
+  corner_score.reserve(frame_width_ * frame_height_);
+  for (int i = 0; i < corner_mask_->rows; ++i) {
+    const uint8_t* mask_ptr = corner_mask_->ptr<uint8_t>(i);
+    const float* corner_ptr = corner_values_->ptr<float>(i);
+    for (int j = 0; j < corner_mask_->cols; ++j) {
+      if (mask_ptr[j]) {
+        corner_score.push_back(corner_ptr[j]);
+      }
+    }
+  }
+
+  const auto& blur_options = options_.blur_score_options();
+  std::vector<float>::iterator median_iter =
+      corner_score.begin() +
+      static_cast<int>(corner_score.size() * blur_options.median_percentile());
+
+  float blur_score = 1e10f;
+  if (median_iter != corner_score.end()) {
+    std::nth_element(corner_score.begin(), median_iter, corner_score.end());
+    if (*median_iter > 1e-10f) {
+      blur_score = 1.0f / *median_iter;
+    }
+  }
+
+  return blur_score;
+}
+
+}  // namespace mediapipe
diff --git a/mediapipe/util/tracking/region_flow_computation.h b/mediapipe/util/tracking/region_flow_computation.h
new file mode 100644
index 000000000..1d9f1159a
--- /dev/null
+++ b/mediapipe/util/tracking/region_flow_computation.h
@@ -0,0 +1,463 @@
+// Copyright 2019 The MediaPipe Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Computes the RegionFlow for a set of frames.
+// Specifically, extracts Harris-like features from each frame, tracks these
+// between frames and regularizes the tracked features locally (outlier
+// rejection) by leveraging fast per-frame segmentation.
+// Optionally, features can be assigned to either foreground or background based
+// on the computation of the fundamental matrix for a pair of frames,
+//
+// Basic usage:
+// RegionFlowComputation flow_computation(RegionFlowComputationOptions(),
+//                                        frame_width,
+//                                        frame_height);
+//
+// std::vector<cv::Mat> input_images;                // Supplied by caller.
+// for (int i = 0; i < num_frames; ++i) {
+//   flow_computation.AddImage(input_images[i]);
+//
+//   // Result is owned by this caller.
+//   std::unique_ptr<RegionFlow> result(
+//       flow_computation.RetrieveRegionFlow());
+//
+//   // OR
+//   std::unique_ptr<RegionFlowFeatureList> result(
+//       flow_computation.RetrieveRegionFlowFeatureList(
+//           true,              // Compute feature descriptor.
+//           false,             // no match descriptor for this example.
+//           &input_images[i],
+//           nullptr);
+//
+//   // Do your custom processing or pass on to MotionEstimation.
+//
+//  }
+
+#ifndef MEDIAPIPE_UTIL_TRACKING_REGION_FLOW_COMPUTATION_H_
+#define MEDIAPIPE_UTIL_TRACKING_REGION_FLOW_COMPUTATION_H_
+
+#include <deque>
+#include <memory>
+#include <unordered_map>
+#include <vector>
+
+#include "mediapipe/framework/port/integral_types.h"
+#include "mediapipe/framework/port/opencv_core_inc.h"
+#include "mediapipe/util/tracking/motion_models.pb.h"
+#include "mediapipe/util/tracking/region_flow.h"
+#include "mediapipe/util/tracking/region_flow.pb.h"
+#include "mediapipe/util/tracking/region_flow_computation.pb.h"
+
+namespace mediapipe {
+class RegionFlowFeatureList;
+class RegionFlowFrame;
+}  // namespace mediapipe
+
+namespace mediapipe {
+
+struct TrackedFeature;
+typedef std::vector<TrackedFeature> TrackedFeatureList;
+class MotionAnalysis;
+
+class RegionFlowComputation {
+ public:
+  RegionFlowComputation(const RegionFlowComputationOptions& options,
+                        int frame_width, int frame_height);
+  virtual ~RegionFlowComputation();
+  RegionFlowComputation(const RegionFlowComputation&) = delete;
+  RegionFlowComputation& operator=(const RegionFlowComputation&) = delete;
+
+  // Performs motion analysis on source w.r.t. to source passed in previous
+  // call. Therefore, first call will compute empty flow. If
+  // RegionFlowComputationOptions::frame_to_track := ftt > 0, motion analysis
+  // performed w.r.t. the previous ftt source passed via AddImage.
+  // Motion analysis uses grid-based regions to enforce locally consistent flow.
+  // Source is expected to be 3-channel RGB 8bit image (24bit in total), OR
+  // 1-channel Grayscale 8bit image, compatible with
+  // RegionFlowComputationOptions::ImageFormat.
+  // Pass the frame's timestamp to have it stored in the result or zero if not
+  // needed.
+  // Returns true on success, false otherwise.
+  virtual bool AddImage(const cv::Mat& source, int64 timestamp_usec);
+
+  // Same as above, but seed initial feature position in the matching frame
+  // with initial_transform.
+  virtual bool AddImageWithSeed(const cv::Mat& source, int64 timestamp_usec,
+                                const Homography& initial_transform);
+
+  // Same as AddImage but also accepts an optional source_mask (pass empty
+  // cv::Mat to get the same behavior as AddImage). If non-empty, features are
+  // only extracted in regions where the mask value is > 0. Mask should be 8-bit
+  // grayscale of the same size as source, unless empty.
+  virtual bool AddImageWithMask(const cv::Mat& source,
+                                const cv::Mat& source_mask,
+                                int64 timestamp_usec);
+
+  // Call after AddImage* to retrieve last downscaled, grayscale image.
+  cv::Mat GetGrayscaleFrameFromResults();
+
+  // Returns result as RegionFlowFrame. Result is owned by caller.
+  // Will return NULL if called twice without AddImage* call.
+  virtual RegionFlowFrame* RetrieveRegionFlow();
+
+  // Returns result as RegionFlowFeatureList. Result is owned by caller.
+  // Will return NULL if called twice without AddImage* call.
+  // Computes optionally feature descriptors (if compute_feature_descriptor
+  // is set, in that case curr_color_image must not be NULL)
+  // and additionally matching descriptor (if compute_match_descriptor is set,
+  // in this case prev_color_image must not be NULL).
+  // Passed images should be in sync with those passed to AddImage, i.e.
+  // source in AddImage and parameter curr_color_image should refer to the same
+  // image.
+  virtual RegionFlowFeatureList* RetrieveRegionFlowFeatureList(
+      bool compute_feature_descriptor, bool compute_match_descriptor,
+      const cv::Mat* curr_color_image,   // optional.
+      const cv::Mat* prev_color_image);  // optional.
+
+  // Same as above, but returns specific tracked result from current frame C
+  // to C - track_index - 1.
+  virtual RegionFlowFeatureList* RetrieveMultiRegionFlowFeatureList(
+      int track_index, bool compute_feature_descriptor,
+      bool compute_match_descriptor,
+      const cv::Mat* curr_color_image,   // optional.
+      const cv::Mat* prev_color_image);  // optional.
+
+  // Returns result of a specific RegionFlowFrame in case
+  // RegionFlowComputationOptions::frames_to_track() > 1. Result is owned by
+  // caller.
+  virtual RegionFlowFrame* RetrieveMultiRegionFlow(int frame);
+
+  // Resets computation to ignore any previously added frames. Next frame passed
+  // via AddImageXXX() routines will be treated as the first frame in the
+  // sequence.
+  virtual void Reset();
+
+  // Creates synthetic tracks with feature points in a grid with zero motion
+  // w.r.t. prev frame. Points are located at the center of each grid. Step size
+  // is fractional w.r.t. image size.
+  static void ZeroMotionGridFeatures(int frame_width, int frame_height,
+                                     float frac_grid_step_x,
+                                     float frac_grid_step_y,
+                                     RegionFlowFeatureList* result);
+
+  // Returns densly sampled motions zero motion features.
+  // Features are centered in a box of size frac_diameter that is shifted by
+  // frac_steps_x * frame_width and frac_steps_y * frame_height.
+  static void DenseZeroMotionSamples(int frame_width, int frame_height,
+                                     float frac_diameter, float frac_steps_x,
+                                     float frac_steps_y,
+                                     RegionFlowFeatureList* result);
+
+ private:
+  typedef std::vector<std::unique_ptr<RegionFlowFeatureList>>
+      RegionFlowFeatureListVector;
+
+  typedef std::vector<TrackedFeature*> TrackedFeatureView;
+
+  // Indexed via grid bin, each bin contains list of its corresponding features.
+  typedef std::vector<TrackedFeatureView> TrackedFeatureMap;
+
+  struct FrameTrackingData;
+  struct LongTrackData;
+  struct ORBFeatureDescriptors;
+
+  // Implementation function to retrieve the i-th RegionFlowFeatureList
+  // (specified track_index). Specifically, i-th feature list, denotes the flow
+  // from the current frame N to the previous frame N - 1 - track_index.
+  // Casts arguments to cv.
+  virtual std::unique_ptr<RegionFlowFeatureList>
+  RetrieveRegionFlowFeatureListImpl(int track_index,
+                                    bool compute_feature_descriptor,
+                                    bool compute_match_descriptor,
+                                    const cv::Mat* curr_color_image,
+                                    const cv::Mat* prev_color_image);
+
+  // Initializes the FrameTrackingData's members from source and source_mask.
+  // Returns true on success.
+  bool InitFrame(const cv::Mat& source, const cv::Mat& source_mask,
+                 FrameTrackingData* data);
+
+  // Adds image to the current buffer and starts tracking.
+  bool AddImageAndTrack(const cv::Mat& source, const cv::Mat& source_mask,
+                        int64 timestamp_usec,
+                        const Homography& initial_transform);
+
+  // Computes *change* in visual difference between adjacent frames. Normalized
+  // w.r.t. number of channels and number of pixels. For this to be meaningful
+  // it is expected that passed FrameTrackingData's are exactly one frame apart
+  // (CHECKED).
+  float ComputeVisualConsistency(FrameTrackingData* previous,
+                                 FrameTrackingData* current) const;
+
+  // Computes flow regularized based on regions and other options, from frame
+  // index "from" to index "to", specified relative to current frame, i.e. index
+  // of current frame = 0, prev frame = -1, next frame = 1, etc. Set invert_flow
+  // to true if the flow should be inverted after tracking.
+  // Optionally, can input the previous result to link features via ids,
+  // effectively creating long feature tracks. In this case you usually want to
+  // request the current result (same as returned in feature_list) in form of
+  // a TrackedFeatureList.
+  void ComputeRegionFlow(int from, int to, bool synthetic_tracks,
+                         bool invert_flow,
+                         const TrackedFeatureList* prev_result,  // optional.
+                         TrackedFeatureList* curr_result,        // optional.
+                         RegionFlowFeatureList* feature_list);
+
+  // Gain corrects input frame w.r.t. reference frame. Returns true iff gain
+  // correction succeeds. If false, calibrated_frame is left untouched.
+  bool GainCorrectFrame(const cv::Mat& reference_frame,
+                        const cv::Mat& input_frame, float reference_mean,
+                        float input_mean, cv::Mat* calibrated_frame) const;
+
+  // Feature extraction method.
+  // Expects as input an image pyramid of gray scale image (each subsequent
+  // level should be downsampled by a factor of 2 (always rounding up), CHECKED
+  // against).
+  // For each level extracts corner features across a grid by considering all
+  // locations that have a corner response corner repsonse above
+  // options_.feature_quality_level() * maximum within the grid bin.
+  // Features with high corner response are output first (but corner response is
+  // not necessarily monotonic). Feature locations are binned into mask
+  // (via downscaling by mask_scale), using a 5x5 patch, to discarded features
+  // that are too close to each other.
+  // Features and corner responses are added to the corresponding vectors in
+  // data, i.e. passed data is not cleared and expected to be initialized.
+  virtual void AdaptiveGoodFeaturesToTrack(
+      const std::vector<cv::Mat>& extraction_pyramid, int max_features,
+      float mask_scale, cv::Mat* mask, FrameTrackingData* data);
+
+  // Uses prev_result to remove all features that are not present in data.
+  // Uses track_ids, i.e. only works with long feature processing.
+  void RemoveAbsentFeatures(const TrackedFeatureList& prev_result,
+                            FrameTrackingData* data);
+
+  // Remove features in data that lie outside the feature extraction mask for
+  // that frame.
+  void RemoveFeaturesOutsideMask(FrameTrackingData* data);
+
+  // Extracts features for tracking from frame corresponding to data.
+  // Optionally, may reuse tracked features if available, based on options.
+  // Optionally, if new features are extracted, can use feature_list to mask out
+  // feature locations that should not be extracted again.
+  void ExtractFeatures(const TrackedFeatureList* prev_result,
+                       FrameTrackingData* data);
+
+  // Performs inplace feature selection, by evaluating the range
+  // [0, data->features.size()] via an Evaluator implementing
+  // [](int) -> bool. Only feature indices for which eval returns true are kept
+  // (using in place moves) the remainder is discarded. Applies moves operation
+  // to FrameTrackingData's feature, track_idx, feature_source_map and
+  // neighborhoods. Also applies moves to any vector<int> and vector<float>
+  // that can be optionally supplied.
+  // Note: All vectors are assumed to of the same size (checked in debug
+  // mode).
+  template <class Evaluator>
+  int InplaceFeatureSelection(FrameTrackingData* data,
+                              std::vector<std::vector<int>*> int_vecs,
+                              std::vector<std::vector<float>*> float_vecs,
+                              const Evaluator& eval);
+
+  // Tracks features between two frames (from -> to). Operates on internal data
+  // structure FrameTrackingData which stores all frame information relavant for
+  // tracking.
+  //
+  // If gain_correct is true, tracking is carried out between the "from" and the
+  // gain-corrected "to" image. It is also an output variable indicating whether
+  // gain correction succeeded or failed.
+  //
+  // Updates internal data structure, so any computation can be reused in
+  // successive calls to feature extraction or tracking.
+  void TrackFeatures(FrameTrackingData* from_data_ptr,
+                     FrameTrackingData* to_data_ptr, bool* gain_correct,
+                     float* frac_long_features_rejected,
+                     TrackedFeatureList* results);
+
+  // Wide-baseline version of above function, using feature descriptor matching
+  // instead of tracking.
+  void WideBaselineMatchFeatures(FrameTrackingData* from_data_ptr,
+                                 FrameTrackingData* to_data_ptr,
+                                 TrackedFeatureList* results);
+
+  // Fits affine model to TrackedFeatureList via direct call of
+  // MotionEstimation::EstimateAffineModelIRLS.
+  AffineModel AffineModelFromFeatures(TrackedFeatureList* features) const;
+
+  // Creates synthetic tracks with feature points in a grid with zero motion
+  // w.r.t. prev frame. Points are located at the center of each grid. Step size
+  // is fractional w.r.t. image size.
+  // Returns minimum distance from border across all features.
+  static int ZeroMotionGridTracks(int frame_width, int frame_height,
+                                  float frac_grid_step_x,
+                                  float frac_grid_step_y,
+                                  TrackedFeatureList* results);
+
+  // Computes region flow using a rectangular grid of square regions.
+  void ComputeBlockBasedFlow(TrackedFeatureList* feature_list,
+                             TrackedFeatureView* inlier_features) const;
+
+  // Initializes feature locations for FrameTrackingData at index to,
+  // from resulting tracks in from.
+  void InitializeFeatureLocationsFromPreviousResult(int from, int to);
+
+  // Initializes feature locations in "to" from initial transform by applying
+  // it to every feature of "from".
+  void InitializeFeatureLocationsFromTransform(int from, int to,
+                                               const Homography& transform);
+
+  // Enforces a translational model within each region, only retaining inliers
+  // that are output to inliers.
+  void DetermineRegionFlowInliers(const TrackedFeatureMap& region_feature_map,
+                                  TrackedFeatureView* inliers) const;
+
+  // Determines number of minimum inliers based on absolute and relative
+  // thresholds.
+  int GetMinNumFeatureInliers(
+      const TrackedFeatureMap& region_feature_map) const;
+
+  // Internal conversion function from a feature list to corresponding frame.
+  void RegionFlowFeatureListToRegionFlow(
+      const RegionFlowFeatureList& feature_list, RegionFlowFrame* frame) const;
+
+  // Initializes all members except actual features in a RegionFlowFeatureList.
+  void InitializeRegionFlowFeatureList(
+      RegionFlowFeatureList* region_flow_feature_list) const;
+
+  // Converts TrackedFeatureView to RegionFlowFeatureList, flattening over
+  // all bins. Returns average motion magnitude.
+  // Optionally TrackedFeature's corresponding to each feature output in
+  // region_flow_feature_list can be recorded via flattened_feature_list.
+  float TrackedFeatureViewToRegionFlowFeatureList(
+      const TrackedFeatureView& region_feature_view,
+      TrackedFeatureList* flattened_feature_list,
+      RegionFlowFeatureList* region_flow_feature_list) const;
+
+  // Determines if sufficient (spatially distributed) features are available.
+  bool HasSufficientFeatures(const RegionFlowFeatureList& feature_list);
+
+  // Returns number of required pyramid levels to track the specified distance.
+  int PyramidLevelsFromTrackDistance(float track_distance);
+
+  // Returns blur score (inverse of average corner measure) for input image.
+  // The higher the value the blurrier the frame.
+  float ComputeBlurScore(const cv::Mat& image);
+
+  // Computes binary mask of pixels, for which the corner score (passed in
+  // min_eig_vals) can be used to as a measure to quanity the amount of blur.
+  // For pixelx not part of the mask the corner score is not a reliable measure
+  // to quanity blur. For example, discards over-exposed regions and regions
+  // that do not have sufficient cornerness.
+  // Note: Modifies the corner values!
+  void ComputeBlurMask(const cv::Mat& input, cv::Mat* min_eig_vals,
+                       cv::Mat* mask);
+
+  // Appends features in a sorted manner (by pointer location) while discarding
+  // duplicates.
+  void AppendUniqueFeaturesSorted(const TrackedFeatureView& to_be_added,
+                                  TrackedFeatureView* features) const;
+
+  void GetFeatureTrackInliers(bool skip_estimation,
+                              TrackedFeatureList* features,
+                              TrackedFeatureView* inliers) const;
+
+  bool IsVerifyLongFeatures() const {
+    return long_track_data_ != nullptr && options_.verify_long_features();
+  }
+
+  int DownsampleWidth() const { return frame_width_; }
+  int DownsampleHeight() const { return frame_height_; }
+
+  // Returns 1.0 / scale that is being applied to the features for downscaling.
+  float DownsampleScale() const { return downsample_scale_; }
+
+ private:
+  RegionFlowComputationOptions options_;
+
+  // Frame width and height after downsampling.
+  int frame_width_;
+  int frame_height_;
+
+  // Number of frames w.r.t each frame is tracked.
+  int frames_to_track_;
+  // Maximum length of long feature tracks in frames.
+  int max_long_track_length_;
+
+  // Original frame width and height.
+  int original_width_;
+  int original_height_;
+
+  // Scale and state of downsampling.
+  float downsample_scale_;
+  bool use_downsampling_;
+
+  int pyramid_levels_;
+  int extraction_levels_;
+
+  int frame_num_ = 0;
+  int max_features_ = 0;
+  float curr_blur_score_ = 0;
+  // Moving average of number of features across recently computed tracks.
+  float curr_num_features_avg_ = 0;
+
+  // Count used to generate unique feature ids.
+  int feature_count_ = 0;
+
+  // List of RegionFlow frames of size options_.frames_to_track.
+  RegionFlowFeatureListVector region_flow_results_;
+
+  // Gain adapted version.
+  std::unique_ptr<cv::Mat> gain_image_;
+  std::unique_ptr<cv::Mat> gain_pyramid_;
+
+  // Temporary buffers.
+  std::unique_ptr<cv::Mat> corner_values_;
+  std::unique_ptr<cv::Mat> corner_filtered_;
+  std::unique_ptr<cv::Mat> corner_mask_;
+
+  std::unique_ptr<cv::Mat> curr_color_image_;
+
+  // Temporary images for feature extraction.
+  std::unique_ptr<cv::Mat> feature_tmp_image_1_;
+  std::unique_ptr<cv::Mat> feature_tmp_image_2_;
+
+  std::vector<uint8> feature_status_;       // Indicates if point could be
+                                            // tracked.
+  std::vector<float> feature_track_error_;  // Patch-based error.
+
+  // Circular queue to buffer tracking data.
+  std::deque<std::unique_ptr<FrameTrackingData>> data_queue_;
+
+  // Global settings for block based flow.
+  int block_width_;
+  int block_height_;
+  int block_levels_;
+
+  // Stores average flow magnitudes for recently processed frames.
+  std::deque<float> flow_magnitudes_;
+
+  // Records data for long feature tracks.
+  std::unique_ptr<LongTrackData> long_track_data_;
+
+  bool use_cv_tracking_ = false;
+
+  // Counter used for controlling how ofter do we run descriptor extraction.
+  // Count from 0 to options_.extract_descriptor_every_n_frame() - 1.
+  // Extract descriptors only when counter == 0.
+  int cnt_extract_descriptors_ = 0;
+
+  friend class MotionAnalysis;
+};
+
+}  // namespace mediapipe
+
+#endif  // MEDIAPIPE_UTIL_TRACKING_REGION_FLOW_COMPUTATION_H_
diff --git a/mediapipe/util/tracking/region_flow_computation.proto b/mediapipe/util/tracking/region_flow_computation.proto
new file mode 100644
index 000000000..800e430a6
--- /dev/null
+++ b/mediapipe/util/tracking/region_flow_computation.proto
@@ -0,0 +1,576 @@
+// Copyright 2019 The MediaPipe Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Options for tracking by pyramidical Lucas-Kanade. Local outlier rejection
+// is performed by enforcing translation models over regions (grid-based
+// or obtained from segmentation).
+
+syntax = "proto2";
+
+package mediapipe;
+
+import "mediapipe/util/tracking/tone_estimation.proto";
+
+// Next tag: 33
+message TrackingOptions {
+  // Describes direction of flow during feature tracking and for the output
+  // region flow.
+  enum FlowDirection {
+    FORWARD = 1;   // Tracks are forward, from frame N-k -> frame N (k > 0).
+    BACKWARD = 2;  // Tracks are backward, from frame N -> frame N-k
+                   // (k > 0).
+    CONSECUTIVELY = 3;  // Try forward and backward tracking consecutively.
+                        // Forward tracking is done first. If the flow is
+                        // stable, i.e. there are sufficient tracked features,
+                        // the region flow computation ends. Otherwise, backward
+                        // tracking is performed.
+  }
+
+  // Flow direction used internally during tracking features. Forward tracking
+  // allows reusing tracked features instead of explicitly tracking them in
+  // every frame, and can therefore be faster. See the reuse_features_XXX
+  // options below. However, if not reusing features, then it is best to match
+  // the direction for both internal tracking and output flow, for peformance
+  // reasons.
+  optional FlowDirection internal_tracking_direction = 19 [default = BACKWARD];
+
+  // Direction of flow vectors that are computed and output by calls to retrieve
+  // region flow, tracked features, etc. Note when this is BACKWARD, then the
+  // returned flow for frame N contains features tracked *from* frame N to a
+  // previous frame N-k. When this is FORWARD, the flow for frame N contains
+  // the flow from features in a previous frame N-k, tracked *to* frame N.
+  // Note that the output flow direction can only be set to FORWARD or BACKWARD.
+  optional FlowDirection output_flow_direction = 20 [default = BACKWARD];
+
+  // Specifies how a feature is tracked w.r.t. previous or next frames
+  // (dependent on the FlowDirection options above).
+  // Per default, each frame is tracked w.r.t. a single neighboring frame
+  // (TRACK_SINGLE_FRAME). If associations across multiple frames are desired,
+  // TRACK_MULTI_FRAME creates multiple results for the current frame, by
+  // tracking features w.r.t. multiple neighbors. Number of neighbors is
+  // specified by multi_frames_to_track.
+  // If long feature tracks are desired (i.e. a track across a frame pair
+  // that is identified to belong to an earlier known feature), use
+  // TRACK_ACROSS_FRAMES. Maximum track length can be specified by
+  // long_tracks_max_frames.
+  enum TrackingPolicy {
+    POLICY_SINGLE_FRAME = 1;  // Tracks w.r.t. previous or next frame.
+    POLICY_MULTI_FRAME = 2;   // Tracks w.r.t. multiple frames.
+    POLICY_LONG_TRACKS = 3;   // Create long feature tracks.
+                              // Requires internal_tracking_direction to be
+                              // FORWARD. Checked against.
+  }
+
+  optional TrackingPolicy tracking_policy = 25 [default = POLICY_SINGLE_FRAME];
+
+  // Number of frame-pairs used for POLICY_MULTI_FRAME, ignored for other
+  // policies.
+  // Value of 1 means we are tracking features in the current frame, w.r.t.
+  // the previous one. Value of 2 denotes tracking of features in current
+  // w.r.t the previous one and the one before the previous one, etc.
+  optional int32 multi_frames_to_track = 1 [default = 1];
+
+  // Maximum length of long feature tracks for POLICY_LONG_TRACKS in frames.
+  // Note: This maximum is not hard enforced, to avoid that many long
+  // tracks are dropped at the same time. Instead if a feature reaches
+  // long_tracks_max_frames * 0.8, it will get dropped with a probability of X,
+  // where X is calculated, such that 95% of all qualifying features are
+  // dropped within the interval [.8, 1.2] * long_tracks_max_frames.
+  optional int32 long_tracks_max_frames = 26 [default = 300];
+
+  // Hard limit of maximum number of features. Control density of features, with
+  // min_feature_distance option. This limit is to guarantee that the
+  // run-time of RegionFlowComputation does not spiral out of control.
+  optional int32 max_features = 2 [default = 2000];
+
+  // Specifies the extraction method for features.
+  enum CornerExtractionMethod {
+    EXTRACTION_HARRIS = 1;       // Using Harris' approximation of
+                                 // EXTRACTION_MIN_EIG_VAL.
+    EXTRACTION_MIN_EIG_VAL = 2;  // Exact smallest eigenvalue computation.
+    EXTRACTION_FAST = 3;         // Extract using FAST feature detector.
+  }
+
+  optional CornerExtractionMethod corner_extraction_method = 27
+      [default = EXTRACTION_MIN_EIG_VAL];
+
+  // Settings for above corner extraction methods.
+  message MinEigValExtractionSettings {
+    // Quality level of features (features with
+    // min_eig_value < quality_level * max_eig_value are rejected).
+    // Here [min|max]_eig_value denote the minimum and maximum eigen value of
+    // the auto-correlation matrix of the patch centered at a feature point. The
+    // ratio of eigenvalues denotes the "cornerness", lower means more
+    // pronounced corners.
+    // (see http://en.wikipedia.org/wiki/Harris-Affine for details.)
+    optional float feature_quality_level = 1 [default = 0.01];
+
+    // Features below this quality level are always discarded, even if their
+    // score is above feature_quality_level() * local maximum within that grid
+    // cell. This prevents us from including very poor features.
+    optional float adaptive_lowest_quality_level = 2 [default = 8e-5];
+  }
+
+  optional MinEigValExtractionSettings min_eig_val_settings = 28;
+
+  message HarrisExtractionSettings {
+    // Same as in MinEigValExtractionSettings.
+    optional float feature_quality_level = 1 [default = 2.5e-4];
+
+    // Note, due to Harris response being negative for some pixels,
+    // no lowest quality level is enforced.
+  }
+
+  optional HarrisExtractionSettings harris_settings = 29;
+
+  message FastExtractionSettings {
+    // threshold on difference between intensity of the central pixel and pixels
+    // of a circle around this pixel. Empirically, the larger the threshold, the
+    // fewer the keypoints will be detected.
+    // Default value set as the same with OpenCV.
+    optional int32 threshold = 1 [default = 10];
+  }
+
+  optional FastExtractionSettings fast_settings = 31;
+
+  optional int32 tracking_window_size = 4 [default = 10];
+
+  optional int32 tracking_iterations = 5 [default = 10];
+
+  // Fractional tracking distance w.r.t. to frame diameter d. The number of
+  // pyramid levels l is chosen such that
+  // 2^l * tracking_window_size / 2 >= fractional_tracking_distance * d.
+  // Therefore, theoretically it is guaranteed that objects moving less than
+  // fractional_tracking_distance * d can be tracked.
+  optional float fractional_tracking_distance = 6 [default = 0.15];
+
+  // If set, modifies tracking distance to be 130% of maximum average
+  // tracking distances of previous frames.
+  optional bool adaptive_tracking_distance = 24 [default = false];
+
+  // Minimum feature distance in pixels. Close features are suppressed. If value
+  // < 1, the distance is computed as a fraction of the frame diameter.
+  optional float min_feature_distance = 7 [default = 7];
+
+  // By default, when downscaling by factor x, the minimum feature distance
+  // is downscaled by a factor of sqrt(x). If set false, no scaling is
+  // performed.
+  optional bool distance_downscale_sqrt = 21 [default = true];
+
+  // Uses grid based extraction of features. Quality level is local within a
+  // grid cell and results are combined over all cells and multiple scales and
+  // grid offsets.
+  // Default option, setting it to false is deprecated and will fail.
+  optional bool adaptive_good_features_to_track = 8 [default = true];
+
+  // Size of each grid cell. Values < 1 are interpreted to be relative to
+  // frame_width_ x frame_height_.
+  optional float adaptive_features_block_size = 9 [default = .26];
+
+  // Scales / levels employed for feature extraction. Grid cell size is scaled
+  // by 0.5 for each level.
+  optional int32 adaptive_features_levels = 10 [default = 1];
+
+  // If > 1, feature extraction is carried out at multiple scales by downscaling
+  // the image repeatedly, extracting features (eigenvalue images) and upscaling
+  // them.
+  optional int32 adaptive_extraction_levels = 22 [default = 1];
+
+  // Alternate way of specifying extraction levels: number of levels is
+  // automatically computed by downsampling the image until its maximum
+  // dimension (width or height) reaches this value. Overrides
+  // adaptive_extraction_levels if > 0.
+  optional int32 adaptive_extraction_levels_lowest_size = 23 [default = 0];
+
+  // Grid step-size in fraction of width or height used for creating synthetic
+  // zero motion tracks with feature points lying on a grid. Can be set based on
+  // desired number of total features as 1/sqrt(num_features),
+  // e.g. .04 ~= 1/sqrt(600).
+  optional float synthetic_zero_motion_grid_step = 13 [default = .04];
+
+  // If set, uses ORB features with brute force matching and ratio test
+  // to track frames across larger perspective changes than possible with
+  // default KLT features.
+  optional bool wide_baseline_matching = 14 [default = false];
+
+  // Only brute force matches with
+  // best_match_distance < ratio_test_threshold * second_best_match_distance
+  // are retained.
+  optional float ratio_test_threshold = 15 [default = 0.8];
+
+  // Refines wide baseline matches by estimating affine transform to
+  // wide-baseline matches which is used to seed initial positions for KLT
+  // matches.
+  optional bool refine_wide_baseline_matches = 16 [default = false];
+
+  // When tracking features, features tracked from frame A to frame B may be
+  // reused as the features for frame B when tracking from it (instead of
+  // extracting features). The max_frame_distance flag limits the distance
+  // between A and B for the features to be reused. Setting it to 0 => no
+  // re-use.
+  optional int32 reuse_features_max_frame_distance = 17 [default = 0];
+
+  // In conjunction with above, the features are reused in frame B only if they
+  // are at-least this fraction of the original features in frame A. Otherwise
+  // they are reset and extracted from scratch.
+  optional float reuse_features_min_survived_frac = 18 [default = 0.7];
+
+  // If set uses newer OpenCV tracking algorithm.
+  // Recommended to be set for all new projects.
+  optional bool use_cv_tracking_algorithm = 30 [default = true];
+
+  enum KltTrackerImplementation {
+    UNSPECIFIED = 0;
+    KLT_OPENCV = 1;  // Use OpenCV's implementation of KLT tracker.
+  }
+
+  // Implementation choice of KLT tracker.
+  optional KltTrackerImplementation klt_tracker_implementation = 32
+      [default = KLT_OPENCV];
+
+  // Deprecated fields.
+  extensions 3, 11, 12;
+}
+
+// Next tag: 67
+message RegionFlowComputationOptions {
+  optional TrackingOptions tracking_options = 1;
+
+  // Features are binned into grids of different resolutions (see
+  // fast_estimation_block_size below) and retained if they survive a localized
+  // translation based RANSAC algorithm and at the survivors are at least of
+  // size min_feature_inliers. Must be at least 3!
+  optional int32 min_feature_inliers = 2 [default = 3];
+
+  // Relative number of inlier features w.r.t. average number of features
+  // per grid bin. Maximum of both thresholds is used as actual threshold.
+  optional float relative_min_feature_inliers = 46 [default = 0.2];
+
+  // Pre-blur before computing features to reduce noise. Set to zero for no
+  // blurring.
+  optional float pre_blur_sigma = 33 [default = 0.8];
+
+  // Number of ransac rounds to estimate per region flow vector. This could be
+  // adaptive, but the required number of rounds is so low, that estimating
+  // the bound is more costly than just running it for a fixed number of times.
+  optional int32 ransac_rounds_per_region = 3 [default = 15];
+
+  // Error thresholds for a feature to be considered as an inlier in
+  // pixel-distance. The max of all three thresholds below is used as the actual
+  // threshold.
+  // Absolute in pixels.
+  optional float absolute_inlier_error_threshold = 4 [default = 2];
+  // Scaled w.r.t. frame diameter.
+  optional float frac_inlier_error_threshold = 52 [default = 0];
+  // Scaled w.r.t model estimated during each RANSAC round.
+  optional float relative_inlier_error_threshold = 44 [default = 0.1];
+  // Returns for each grid only the top N inlier sets.
+  optional int32 top_inlier_sets = 45 [default = 2];
+
+  // For debugging purposes, uses all tracked features regardless of the above
+  // setting.
+  optional bool no_estimation_mode = 40 [default = false];
+
+  // Block size in pixels. If fractional block_size is used (0 < size < 1),
+  // it is interpreted as fraction of the image dimensions.
+  // We use 4 blocks in each dimension by standard.
+  optional float fast_estimation_block_size = 6 [default = .25];
+
+  // Minimum block size in pixels (larger dimension) to perform fast estimation
+  // on. Pyramid levels are allocated such that
+  // block_size * 0.5^(level - 1) = min_block_size.
+  // At least two levels are used.
+  optional int32 fast_estimation_min_block_size = 25 [default = 100];
+
+  // We use overlapping versions of the grid, next parameters specifies how
+  // many in each dimensions (total is therefore, the value squared!).
+  optional int32 fast_estimation_overlap_grids = 22 [default = 3];
+
+  // Flow features with motion above this thresholds (w.r.t. frame diameter)
+  // are rejected.
+  optional float max_magnitude_threshold_ratio = 23 [default = 0.2];
+
+  // Flow features that have a motion that is larger than
+  // median_magnitude_bounds times the median magnitude are discarded.
+  // If set to zero, test is not enforced.
+  optional float median_magnitude_bounds = 51 [default = 0.0];
+
+  // Determines how irls weights for computed features are initialized.
+  // In general, more stable features are given higher weight.
+  enum IrlsInitialization {
+    INIT_UNIFORM = 1;  // All weights equal 1
+
+    // Feature's irls weight is initialized to a value in [0, 2]
+    // indicating how consistent the feature's motion is w.r.t. neighboring
+    // features (high values = very consistent). Determined by counting how
+    // often a feature is part of the inlier set for a particular bin.
+    INIT_CONSISTENCY = 2;
+  }
+
+  // If this option is activated, feature's irls weight is initialized to the
+  // inverse of its computed flow.
+  optional IrlsInitialization irls_initialization = 49
+      [default = INIT_CONSISTENCY];
+
+  // We support down-sampling of an incoming frame before running the
+  // resolution dependent part of the region flow computation (feature
+  // extraction and tracking if desired).
+  // Note that in all downsampling modes except for DOWNSAMPLE_TO_INPUT_SIZE,
+  // for uneven dimensions after downsampling, we always round up to
+  // the nearest even dimension, i.e. 350p with a downsample_factor of 2.0
+  // would expect an input of size 176p.
+  enum DownsampleMode {
+    // No downsampling.
+    DOWNSAMPLE_NONE = 1;
+    // Downsizes the input frame such that frame_size == downsampling_size,
+    // where frame_size := max(width, height).
+    DOWNSAMPLE_TO_MAX_SIZE = 2;
+    // Downsizes frame by pre-defined factor, downsample_factor below.
+    DOWNSAMPLE_BY_FACTOR = 3;
+    // Downsampling based on downsampling schedule, see DownsampleSchedule below
+    // for details.
+    DOWNSAMPLE_BY_SCHEDULE = 4;
+    // Downsizes the input frame such that frame_size == downsampling_size,
+    // where frame_size := min(width, height).
+    DOWNSAMPLE_TO_MIN_SIZE = 5;
+    // Input frame is assumed to be already downsampled by the factor specified
+    // by downsample_factor below. For example if the original frame is 720p,
+    // and downsample_factor is set to 2.0, then we expect as input 360p.
+    DOWNSAMPLE_TO_INPUT_SIZE = 6;
+  }
+
+  optional DownsampleMode downsample_mode = 11 [default = DOWNSAMPLE_NONE];
+
+  // Specify the size of either dimension here, the frame will be
+  // downsampled to fit downsampling_size.
+  optional int32 downsampling_size = 12 [default = 256];
+  optional float downsample_factor = 18 [default = 2.0];
+
+  // If set, we will force the computed downsampling factor to be the nearest
+  // integer, resulting in faster downsampling.  This will have no effect for
+  // DOWNSAMPLE_TO_INPUT_SIZE, DOWNSAMPLE_BY_FACTOR, and DOWNSAMPLE_BY_SCHEDULE,
+  // which should have exact values defined.
+  optional bool round_downsample_factor = 62 [default = false];
+
+  // Downsampling schedule. Frame sizes up to which a particular downsampling
+  // factor is applied. Factor chosen by comparing actual frame area against
+  // standard area (standard_width * standard_height), where standard_width =
+  // 16/9 X standard_height.
+  message DownSampleSchedule {
+    optional float downsample_factor_360p = 1 [default = 1];   // For <= 360p.
+    optional float downsample_factor_480p = 2 [default = 1];   // For <= 480p.
+    optional float downsample_factor_720p = 3 [default = 2];   // For <= 720p.
+    optional float downsample_factor_1080p = 4 [default = 2];  // >= 720p.
+  }
+  // Used if downsample_mode is DOWNSAMPLE_BY_SCHEDULE.
+  optional DownSampleSchedule downsample_schedule = 19;
+
+  // Minimum number of good features that we require to be present.
+  // Without good features, the estimated motion models will do more harm than
+  // good, so it is better to use simply the identity transform for this frame,
+  // and set the flag unstable_models to true in RegionFlow.
+  optional int32 min_feature_requirement = 13 [default = 20];
+
+  // We also require features to cover a minimum percentage area of the frame.
+  // We use downsampling and plot each feature by a 1 in a grid, this is
+  // equivalent to plotting each feature by a rectangle in the original frame.
+  optional float min_feature_cover = 14 [default = 0.15];
+
+  // Grid size for above min feature cover.
+  optional int32 min_feature_cover_grid = 20 [default = 8];
+
+  // Computes blur score for each frame. Score is proportional to amount of
+  // blur present in a frame, i.e. higher scores reflect more blurred frames.
+  // Note that the score is dependent on the gradient distribution of the image
+  // content, i.e. the score itself is rather meaningless but needs to be
+  // compared to scores of neighboring frames.
+  optional bool compute_blur_score = 17 [default = false];
+
+  message BlurScoreOptions {
+    // Blur score is only computed over image regions of high cornerness
+    // (as blur in any direction will always alter these regions). First, the
+    // corner image (smallest eigenvalue of 2nd moment matrix) is box filtered,
+    // and then thresholded.
+    optional int32 box_filter_diam = 1 [default = 3];
+
+    // Specifies relative (w.r.t. maximum) and absolute corneress threshold
+    // for threshold operation.
+    optional float relative_cornerness_threshold = 2 [default = 3e-2];
+    optional float absolute_cornerness_threshold = 3 [default = 1e-4];
+
+    // Blur score is defined as 1.0 / <median cornerness>, where
+    // <median cornerness> is the n-th percentile of the cornerness evaluated
+    // over the image regions of high corness as specified above.
+    optional float median_percentile = 5 [default = 0.85];
+  }
+
+  optional BlurScoreOptions blur_score_options = 31;
+
+  // Determines how/if visual consistency is computed. If activated,
+  // computes the absolute *change* in visual difference between two adjancent
+  // frame pairs, i.e. the modulus of the 2nd derivative of the frame
+  // appearance. Stores result in RegionFlowFeatureList::visual_consistency.
+  message VisualConsistencyOptions {
+    // Computation of visual consistency is only performed if activated.
+    optional bool compute_consistency = 1 [default = true];
+
+    // Incoming color or gray scale image is scaled to a tiny square image of
+    // the specified dimension. Used to compare adjacent images via SSD.
+    optional int32 tiny_image_dimension = 2 [default = 20];
+  }
+
+  optional VisualConsistencyOptions visual_consistency_options = 55;
+
+  // Radius of patch descriptor computed during RetrieveRegionFlowFeatureList
+  // call.
+  optional int32 patch_descriptor_radius = 21 [default = 3];
+  // Minimum distance from image border. Must be greater or equal to
+  // patch_descriptor_radius.
+  optional int32 distance_from_border = 50 [default = 3];
+
+  // Corner response is scaled by scalar below and normalized to lie within
+  // [0, 1], where 0 is low corner score and 1 high corner score.
+  optional float corner_response_scale = 26 [default = 1500];
+
+  // Verifies reliablity of features, by back-tracking operation from matched
+  // location. If returned location is within verification_distance feature is
+  // accepted otherwise discarded.
+  optional bool verify_features = 27 [default = false];
+  optional float verification_distance = 28 [default = 0.5];
+
+  // If set, consistency of long features is verified (in case tracking_policy
+  // is set to POLICY_LONG_FEATURES) by extracting a patch
+  // around the feature during the very first observation and comparing the
+  // matching patching along the long feature trajectory via SSD. If the
+  // difference is above the long_feature_verification_threshold the feature is
+  // removed.
+  optional bool verify_long_features = 53 [default = true];
+
+  // Maximum average per pixel error (in L1 norm) in the normalized intensity
+  // domain for matching patches to be considered to be consistent.
+  optional float long_feature_verification_threshold = 54 [default = 0.04];
+
+  // Long features are expected to have limited acceleration over time.
+  // If acceleration exceeds specified value based on the setting in
+  // verify_long_feature_acceleration either:
+  // a) verify_long_feature_acceleration = false
+  //    A new track is started instead of continuing the old one.
+  //    The track itself is not removed in this case.
+  //
+  // b) verify_long_feature_acceleration = true
+  //    The track is flagged for verification, by back-tracking operation from
+  //    matched location. If track fails verification test it is
+  //    discarded. This only triggers if at least
+  //    verify_long_feature_trigger_ratio of features have been flagged,
+  //    otherwise option a is used.
+  optional float max_long_feature_acceleration = 56 [default = 5.0];
+  optional bool verify_long_feature_acceleration = 63 [default = false];
+  optional float verify_long_feature_trigger_ratio = 64 [default = 0.0];
+
+  // If true, histogram equalization is performed to the input image sequence
+  // before registration.
+  optional bool histogram_equalization = 57 [default = false];
+
+  // If true, synthetic region flows with zero motion are used for all (or just
+  // the first) frame.
+  optional bool use_synthetic_zero_motion_tracks_all_frames = 34
+      [default = false];
+  optional bool use_synthetic_zero_motion_tracks_first_frame = 35
+      [default = false];
+
+  // Optional gain correction before tracking features. Improves robustness when
+  // lighting is changing.
+  optional bool gain_correction = 36 [default = false];
+
+  // If set performs gain correction by simply equalizing mean intensity
+  // between frames, instead of using ToneEstimation.
+  optional bool fast_gain_correction = 61 [default = false];
+
+  // If the multiple hypothesis flag is set, features are tracked using both
+  // with and without gain correction, and the hypothesis with more inliers
+  // is selected.
+  optional bool gain_correction_multiple_hypotheses = 47 [default = true];
+
+  // This flag, when used together with the multiple hypotheses flag, specifies
+  // that gain correction should increase the number of inliers by at least this
+  // fraction for it to be used instead of default tracking.
+  optional float gain_correction_inlier_improvement_frac = 48 [default = 0.1];
+
+  // If set, always uses the brighter frame as reference. This is the
+  // preferred direction of correction, to avoid overexposed regions from
+  // being corrected which leads to spurious matches.
+  optional bool gain_correction_bright_reference = 59 [default = false];
+
+  // Only performs gain correction if number of tracked features falls under
+  // specified ratio (w.r.t. previous frame).
+  // Set to zero, to always perform gain correction if requested.
+  optional float gain_correction_triggering_ratio = 60 [default = 0.0];
+
+  // Gain correction is based on a grid of zero motion features, independent of
+  // the underlying motion. Fractional parameter specifies resolution of the
+  // grid w.r.t. frame size.
+  optional float frac_gain_feature_size = 37 [default = 0.3];
+  optional float frac_gain_step = 38 [default = 0.1];
+
+  enum GainCorrectMode {
+    GAIN_CORRECT_DEFAULT_USER = 1;  // Uses default or user supplied bounds,
+                                    // i.e. gain_bias_bounds is left untouched.
+    GAIN_CORRECT_VIDEO = 2;         // Uses defaults for video (most strict).
+    GAIN_CORRECT_HDR = 3;           // Uses most relaxed settings to track
+                                    // across HDR frames, taken at different
+                                    // exposures.
+    GAIN_CORRECT_PHOTO_BURST = 4;   // More relaxed than video but stricter
+                                    // than HDR; use for photo burst where
+                                    // exposure between frames can change.
+  }
+
+  optional GainCorrectMode gain_correct_mode = 41
+      [default = GAIN_CORRECT_DEFAULT_USER];
+
+  // Bounds for the estimated model. If not set externally, will be set
+  // based on GainCorrectMode.
+  optional ToneEstimationOptions.GainBiasBounds gain_bias_bounds = 39;
+
+  // Supported image formats. All images are converted to grayscale
+  // before processing. These image formats only concern AddImage.
+  // IMPORTANT: All the Retrieve* methods expect RGB when the descriptors
+  // are computed.
+  enum ImageFormat {
+    FORMAT_GRAYSCALE = 1;
+    FORMAT_RGB = 2;
+    FORMAT_RGBA = 3;
+    FORMAT_BGR = 4;
+    FORMAT_BGRA = 5;
+  }
+
+  // Image format of the input.
+  optional ImageFormat image_format = 58 [default = FORMAT_RGB];
+
+  enum DescriptorExtractorType {
+    ORB =
+        0;  // http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.370.4395&rep=rep1&type=pdf
+  }
+
+  // The descriptor extractor type used.
+  optional DescriptorExtractorType descriptor_extractor_type = 65
+      [default = ORB];
+
+  // Whether to compute derivatives when building the pyramid. When set to
+  // true, it's building a Laplacian pyramid. When set to false, it's building
+  // a Gaussian pyramid.
+  optional bool compute_derivative_in_pyramid = 66 [default = true];
+
+  // Deprecated fields.
+  extensions 5, 7, 8, 9, 10, 15, 16, 24, 29, 30, 32, 42, 43;
+}
diff --git a/mediapipe/util/tracking/region_flow_computation_test.cc b/mediapipe/util/tracking/region_flow_computation_test.cc
new file mode 100644
index 000000000..256cfefbd
--- /dev/null
+++ b/mediapipe/util/tracking/region_flow_computation_test.cc
@@ -0,0 +1,304 @@
+// Copyright 2019 The MediaPipe Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "mediapipe/util/tracking/region_flow_computation.h"
+
+#include <math.h>
+
+#include <algorithm>
+#include <memory>
+#include <random>
+#include <string>
+
+#include "absl/time/clock.h"
+#include "mediapipe/framework/deps/file_path.h"
+#include "mediapipe/framework/port/commandlineflags.h"
+#include "mediapipe/framework/port/file_helpers.h"
+#include "mediapipe/framework/port/gtest.h"
+#include "mediapipe/framework/port/logging.h"
+#include "mediapipe/framework/port/opencv_core_inc.h"
+#include "mediapipe/framework/port/opencv_highgui_inc.h"
+#include "mediapipe/framework/port/opencv_imgproc_inc.h"
+#include "mediapipe/framework/port/status.h"
+#include "mediapipe/framework/port/vector.h"
+#include "mediapipe/util/tracking/region_flow.h"
+#include "mediapipe/util/tracking/region_flow.pb.h"
+
+// To ensure that the selected thresholds are robust, it is recommend
+// to run this test mutiple times with time seed, if changes are made.
+DEFINE_bool(time_seed, false, "Activate to test thresholds");
+
+namespace mediapipe {
+namespace {
+
+using RandomEngine = std::mt19937_64;
+
+struct FlowDirectionParam {
+  TrackingOptions::FlowDirection internal_direction;
+  TrackingOptions::FlowDirection output_direction;
+};
+
+class RegionFlowComputationTest
+    : public ::testing::TestWithParam<FlowDirectionParam> {
+ protected:
+  virtual void SetUp() {
+    // Setup tracking direction options based on testing parameter.
+    const auto& param = GetParam();
+    auto* tracking_options = base_options_.mutable_tracking_options();
+    tracking_options->set_internal_tracking_direction(param.internal_direction);
+    tracking_options->set_output_flow_direction(param.output_direction);
+
+    // Load bee image.
+    data_dir_ = file::JoinPath("./", "/mediapipe/util/tracking/testdata/");
+
+    std::string png_data;
+    MEDIAPIPE_CHECK_OK(
+        file::GetContents(data_dir_ + "stabilize_test.png", &png_data));
+    std::vector<char> buffer(png_data.begin(), png_data.end());
+    original_frame_ = cv::imdecode(cv::Mat(buffer), 1);
+    ASSERT_FALSE(original_frame_.empty());
+    ASSERT_EQ(original_frame_.type(), CV_8UC3);
+  }
+
+  // Creates a movie in the specified format by displacing original_frame_ to
+  // random positions.
+  void MakeMovie(int frames, RegionFlowComputationOptions::ImageFormat format,
+                 std::vector<cv::Mat>* movie,
+                 std::vector<Vector2_f>* positions);
+
+  // Outputs allocated resized input frame.
+  void GetResizedFrame(int width, int height, cv::Mat* result) const;
+
+  // Runs frame pair test using RGB, RGBA or grayscale input.
+  void RunFramePairTest(RegionFlowComputationOptions::ImageFormat format);
+
+ protected:
+  RegionFlowComputationOptions base_options_;
+
+ private:
+  std::string data_dir_;
+  cv::Mat original_frame_;
+};
+
+std::vector<FlowDirectionParam> FlowDirectionCombinations() {
+  return {{TrackingOptions::FORWARD, TrackingOptions::FORWARD},
+          {TrackingOptions::FORWARD, TrackingOptions::BACKWARD},
+          {TrackingOptions::BACKWARD, TrackingOptions::FORWARD},
+          {TrackingOptions::BACKWARD, TrackingOptions::BACKWARD}};
+}
+
+INSTANTIATE_TEST_SUITE_P(FlowDirection, RegionFlowComputationTest,
+                         ::testing::ValuesIn(FlowDirectionCombinations()));
+
+void RegionFlowComputationTest::MakeMovie(
+    int num_frames, RegionFlowComputationOptions::ImageFormat format,
+    std::vector<cv::Mat>* movie, std::vector<Vector2_f>* positions) {
+  CHECK(positions != nullptr);
+  CHECK(movie != nullptr);
+
+  const int border = 40;
+  int frame_width = original_frame_.cols - 2 * border;
+  int frame_height = original_frame_.rows - 2 * border;
+  ASSERT_GT(frame_width, 0);
+  ASSERT_GT(frame_height, 0);
+
+  // First generate random positions.
+  int seed = 900913;  // google.
+  if (FLAGS_time_seed) {
+    seed = ToUnixMillis(absl::Now()) % (1 << 16);
+    LOG(INFO) << "Using time seed: " << seed;
+  }
+
+  RandomEngine random(seed);
+  std::uniform_int_distribution<> uniform_dist(-10, 10);
+  positions->resize(num_frames);
+  (*positions)[0] = Vector2_f(border, border);
+  for (int f = 1; f < num_frames; ++f) {
+    Vector2_f pos = (*positions)[f - 1] +
+                    Vector2_f(uniform_dist(random), uniform_dist(random));
+
+    // Clamp to valid positions.
+    pos.x(std::max<int>(0, pos.x()));
+    pos.y(std::max<int>(0, pos.y()));
+    pos.x(std::min<int>(2 * border, pos.x()));
+    pos.y(std::min<int>(2 * border, pos.y()));
+
+    (*positions)[f] = pos;
+  }
+
+  // Create movie by copying.
+  movie->resize(num_frames);
+  cv::Mat original_frame = original_frame_;
+
+  auto convert = [&](int channel_format, int conversion_code) {
+    original_frame.create(original_frame_.rows, original_frame_.cols,
+                          channel_format);
+    cv::cvtColor(original_frame_, original_frame, conversion_code);
+  };
+
+  switch (format) {
+    case RegionFlowComputationOptions::FORMAT_RGB:
+      break;
+
+    case RegionFlowComputationOptions::FORMAT_BGR:
+      convert(CV_8UC3, CV_RGB2BGR);
+      break;
+
+    case RegionFlowComputationOptions::FORMAT_GRAYSCALE:
+      convert(CV_8UC1, CV_RGB2GRAY);
+      break;
+
+    case RegionFlowComputationOptions::FORMAT_RGBA:
+      convert(CV_8UC4, CV_RGB2RGBA);
+      break;
+
+    case RegionFlowComputationOptions::FORMAT_BGRA:
+      convert(CV_8UC4, CV_RGB2BGRA);
+      break;
+  }
+  for (int f = 0; f < num_frames; ++f) {
+    (*movie)[f].create(frame_height, frame_width, original_frame.type());
+    const auto& pos = (*positions)[f];
+    cv::Mat tmp_view(original_frame, cv::Range(pos.y(), pos.y() + frame_height),
+                     cv::Range(pos.x(), pos.x() + frame_width));
+    tmp_view.copyTo((*movie)[f]);
+  }
+}
+
+void RegionFlowComputationTest::GetResizedFrame(int width, int height,
+                                                cv::Mat* result) const {
+  CHECK(result != nullptr);
+  cv::resize(original_frame_, *result, cv::Size(width, height));
+}
+
+void RegionFlowComputationTest::RunFramePairTest(
+    RegionFlowComputationOptions::ImageFormat format) {
+  std::vector<cv::Mat> movie;
+  std::vector<Vector2_f> positions;
+  const int num_frames = 10;
+  MakeMovie(num_frames, format, &movie, &positions);
+
+  const int frame_width = movie[0].cols;
+  const int frame_height = movie[0].rows;
+
+  base_options_.set_image_format(format);
+
+  RegionFlowComputation flow_computation(base_options_, frame_width,
+                                         frame_height);
+
+  for (int i = 0; i < num_frames; ++i) {
+    flow_computation.AddImage(movie[i], 0);
+
+    if (i > 0) {
+      float inliers = 0;
+      std::unique_ptr<RegionFlowFrame> region_flow_frame(
+          flow_computation.RetrieveRegionFlow());
+      // Get flow vector based on actual motion applied to frames. Direction
+      // based on output flow direction.
+      Vector2_f flow_vector;
+      switch (base_options_.tracking_options().output_flow_direction()) {
+        case TrackingOptions::BACKWARD:
+          flow_vector = positions[i] - positions[i - 1];
+          break;
+        case TrackingOptions::FORWARD:
+          flow_vector = positions[i - 1] - positions[i];
+          break;
+        case TrackingOptions::CONSECUTIVELY:
+          FAIL();
+          break;
+      }
+
+      // We expect all flow vectors to be similar to flow_vector.
+      for (const auto& region_flow : region_flow_frame->region_flow()) {
+        for (const auto& feature : region_flow.feature()) {
+          // Half a pixel error is very reasonable.
+          if (fabs(flow_vector.x() - FeatureFlow(feature).x()) < 0.5f &&
+              fabs(flow_vector.y() - FeatureFlow(feature).y()) < 0.5f) {
+            ++inliers;
+          }
+        }
+      }
+      // 95% of all features should be tracked reliably.
+      EXPECT_GE(inliers / region_flow_frame->num_total_features(), 0.95f);
+    }
+  }
+}
+
+TEST_P(RegionFlowComputationTest, FramePairTest) {
+  // The output flow direction should be either forward or backward.
+  EXPECT_NE(base_options_.tracking_options().output_flow_direction(),
+            TrackingOptions::CONSECUTIVELY);
+  // Test on grayscale input.
+  RunFramePairTest(RegionFlowComputationOptions::FORMAT_GRAYSCALE);
+  // Test on RGB input.
+  RunFramePairTest(RegionFlowComputationOptions::FORMAT_RGB);
+  // Test on BGR input.
+  RunFramePairTest(RegionFlowComputationOptions::FORMAT_BGR);
+  // Test on RGBA input.
+  RunFramePairTest(RegionFlowComputationOptions::FORMAT_RGBA);
+  // Test on BGRA input.
+  RunFramePairTest(RegionFlowComputationOptions::FORMAT_BGRA);
+}
+
+TEST_P(RegionFlowComputationTest, ResolutionTests) {
+  // Test all kinds of resolutions (disregard resulting flow).
+  // Square test, synthetic tracks.
+  for (int dim = 1; dim <= 50; ++dim) {
+    RegionFlowComputationOptions options = base_options_;
+    // Force synthetic features, as images are too small to track anything.
+    options.set_use_synthetic_zero_motion_tracks_all_frames(true);
+    RegionFlowComputation flow_computation(options, dim, dim);
+
+    cv::Mat input_frame;
+    GetResizedFrame(dim, dim, &input_frame);
+    // Add frame several times.
+    for (int i = 0; i < 5; ++i) {
+      flow_computation.AddImage(input_frame, 0);
+
+      // Don't care about the result here, simply test for segfaults.
+      delete flow_computation.RetrieveRegionFlow();
+    }
+  }
+
+  // Larger frames with tracking.
+  for (int dim = 50; dim <= 100; ++dim) {
+    RegionFlowComputation flow_computation(base_options_, dim, dim);
+
+    cv::Mat input_frame;
+    GetResizedFrame(dim, dim, &input_frame);
+    for (int i = 0; i < 5; ++i) {
+      flow_computation.AddImage(input_frame, 0);
+      delete flow_computation.RetrieveRegionFlow();
+    }
+  }
+
+  // Different aspect ratios, first frame synthetic only.
+  for (int y = 1; y <= 50; y += 3) {
+    for (int x = 1; x <= 100; x += 7) {
+      RegionFlowComputationOptions options = base_options_;
+      options.set_use_synthetic_zero_motion_tracks_first_frame(true);
+      RegionFlowComputation flow_computation(options, x, y);
+
+      cv::Mat input_frame;
+      GetResizedFrame(x, y, &input_frame);
+      for (int i = 0; i < 5; ++i) {
+        flow_computation.AddImage(input_frame, 0);
+        delete flow_computation.RetrieveRegionFlow();
+      }
+    }
+  }
+}
+
+}  // namespace
+}  // namespace mediapipe
diff --git a/mediapipe/util/tracking/region_flow_visualization.cc b/mediapipe/util/tracking/region_flow_visualization.cc
new file mode 100644
index 000000000..8d36b92a5
--- /dev/null
+++ b/mediapipe/util/tracking/region_flow_visualization.cc
@@ -0,0 +1,196 @@
+// Copyright 2019 The MediaPipe Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "mediapipe/util/tracking/region_flow_visualization.h"
+
+#include <stddef.h>
+
+#include <memory>
+#include <numeric>
+
+#include "absl/strings/str_cat.h"
+#include "mediapipe/framework/port/integral_types.h"
+#include "mediapipe/util/tracking/measure_time.h"
+#include "mediapipe/util/tracking/parallel_invoker.h"
+#include "mediapipe/util/tracking/region_flow.h"
+
+namespace mediapipe {
+
+void VisualizeRegionFlowImpl(const RegionFlowFrame& region_flow_frame,
+                             cv::Mat* output) {
+  // Just draw a green line for each feature.
+  for (const auto& region_flow : region_flow_frame.region_flow()) {
+    cv::Scalar color(0, 255, 0);
+
+    for (const auto& feature : region_flow.feature()) {
+      cv::Point p1(static_cast<int>(feature.x() + 0.5f),
+                   static_cast<int>(feature.y() + 0.5f));
+
+      cv::Point p2(static_cast<int>(feature.x() + feature.dx() + 0.5f),
+                   static_cast<int>(feature.y() + feature.dy() + 0.5f));
+
+      cv::line(*output, p1, p2, color);
+    }
+  }
+}
+
+void VisualizeRegionFlow(const RegionFlowFrame& region_flow_frame,
+                         cv::Mat* output) {
+  CHECK(output);
+  VisualizeRegionFlowImpl(region_flow_frame, output);
+}
+
+void VisualizeRegionFlowFeaturesImpl(const RegionFlowFeatureList& feature_list,
+                                     const cv::Scalar& color,
+                                     const cv::Scalar& outlier,
+                                     bool irls_visualization, float scale_x,
+                                     float scale_y, cv::Mat* output) {
+  const float kLineScaleOneRows = 1080.0;
+  const int line_size =
+      std::max(std::min<int>(output->rows / kLineScaleOneRows, 4), 1);
+
+  const float text_scale = std::max(output->rows, output->cols) * 3e-4;
+  for (const auto& feature : feature_list.feature()) {
+    Vector2_i v1 = FeatureIntLocation(feature);
+    Vector2_i v2 = FeatureMatchIntLocation(feature);
+    cv::Point p1(v1.x() * scale_x, v1.y() * scale_y);
+    cv::Point p2(v2.x() * scale_x, v2.y() * scale_y);
+
+    float alpha = 0;
+    if (irls_visualization) {
+      if (feature.irls_weight() < 0) {
+        continue;
+      }
+
+      if (feature.irls_weight() == 0.0f) {
+        // Ignored feature, draw as circle.
+        cv::circle(*output, p1, 4.0 * line_size, outlier, 2 * line_size);
+        continue;
+      }
+
+      // Scale irls_weight, such that error of one pixel equals alpha of 0.5
+      // (evenly mix inlier and outlier color).
+      alpha = std::min(1.0f, feature.irls_weight() * 0.5f);
+
+    } else {
+      const float feature_stdev_l1 =
+          PatchDescriptorColorStdevL1(feature.feature_descriptor());
+      if (feature_stdev_l1 >= 0.0f) {
+        // Normalize w.r.t. maximum per channel standard deviation (128.0)
+        // and scale (scale could be user-defined and was chosen based on some
+        // test videos such that only very low textured features are colored
+        // with outlier color).
+        alpha = std::min(1.0f, feature_stdev_l1 / 128.f * 6.f);
+      } else {
+        alpha = 0.5;  // Dummy value indicating, descriptor is not present.
+      }
+    }
+
+#ifdef __APPLE__
+    cv::Scalar color_scaled(color.mul(alpha) + outlier.mul(1.0f - alpha));
+#else
+    cv::Scalar color_scaled(color * alpha + outlier * (1.0f - alpha));
+#endif
+    cv::line(*output, p1, p2, color_scaled, line_size, CV_AA);
+    cv::circle(*output, p1, 2.0 * line_size, color_scaled, line_size);
+
+    if (feature.has_label()) {
+      cv::putText(*output, absl::StrCat(" ", feature.label()), p1,
+                  cv::FONT_HERSHEY_SIMPLEX, text_scale, color_scaled,
+                  3.0 * text_scale, CV_AA);
+    }
+  }
+}
+
+void VisualizeRegionFlowFeatures(const RegionFlowFeatureList& feature_list,
+                                 const cv::Scalar& color,
+                                 const cv::Scalar& outlier,
+                                 bool irls_visualization, float scale_x,
+                                 float scale_y, cv::Mat* output) {
+  CHECK(output);
+  VisualizeRegionFlowFeaturesImpl(feature_list, color, outlier,
+                                  irls_visualization, scale_x, scale_y, output);
+}
+
+void VisualizeLongFeatureStreamImpl(const LongFeatureStream& stream,
+                                    const cv::Scalar& color,
+                                    const cv::Scalar& outlier,
+                                    int min_track_length,
+                                    int max_points_per_track, float scale_x,
+                                    float scale_y, cv::Mat* output) {
+  const float text_scale = std::max(output->cols, output->rows) * 3e-4;
+  for (const auto& track : stream) {
+    std::vector<Vector2_f> pts;
+    std::vector<float> irls_weights;
+    stream.FlattenTrack(track.second, &pts, &irls_weights, nullptr);
+
+    if (min_track_length > 0 && pts.size() < min_track_length) {
+      continue;
+    }
+    CHECK_GT(pts.size(), 1);  // Should have at least two points per track.
+
+    // Tracks are ordered with oldest point first, most recent one last.
+    const int start_k =
+        max_points_per_track > 0
+            ? std::max<int>(0, pts.size() - max_points_per_track)
+            : 0;
+    // Draw points in last to first order.
+    for (int k = start_k; k < pts.size(); ++k) {
+      cv::Point p1(pts[k].x() * scale_x, pts[k].y() * scale_y);
+
+      if (irls_weights[k] == 0.0f) {
+        // Ignored feature, draw as circle.
+        cv::circle(*output, p1, 4.0, outlier, 2);
+        continue;
+      }
+
+      const float alpha = std::min(1.0f, irls_weights[k] * 0.5f);
+#ifdef __APPLE__
+      cv::Scalar color_scaled(color.mul(alpha) + outlier.mul(1.0f - alpha));
+#else
+      cv::Scalar color_scaled(color * alpha + outlier * (1.0f - alpha));
+#endif
+
+      if (k < pts.size() - 1) {
+        // Draw line connecting points.
+        cv::Point p2(pts[k + 1].x() * scale_x, pts[k + 1].y() * scale_y);
+        cv::line(*output, p1, p2, color_scaled, 1.0, CV_AA);
+      }
+      if (k + 1 == pts.size()) {  // Last iteration.
+        cv::circle(*output, p1, 2.0, color_scaled, 1.0);
+
+        const RegionFlowFeature& latest_feature = track.second.back();
+        if (latest_feature.has_label()) {
+          cv::putText(*output, absl::StrCat(" ", latest_feature.label()), p1,
+                      cv::FONT_HERSHEY_SIMPLEX, text_scale, color_scaled,
+                      3 * text_scale, CV_AA);
+        }
+      }
+    }
+  }
+}
+
+void VisualizeLongFeatureStream(const LongFeatureStream& stream,
+                                const cv::Scalar& color,
+                                const cv::Scalar& outlier, int min_track_length,
+                                int max_points_per_track, float scale_x,
+                                float scale_y, cv::Mat* output) {
+  CHECK(output);
+  VisualizeLongFeatureStreamImpl(stream, color, outlier, min_track_length,
+
+                                 max_points_per_track, scale_x, scale_y,
+                                 output);
+}
+
+}  // namespace mediapipe
diff --git a/mediapipe/util/tracking/region_flow_visualization.h b/mediapipe/util/tracking/region_flow_visualization.h
new file mode 100644
index 000000000..b28ce9ca8
--- /dev/null
+++ b/mediapipe/util/tracking/region_flow_visualization.h
@@ -0,0 +1,78 @@
+// Copyright 2019 The MediaPipe Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Small helper function for RegionFlow Visualization.
+#ifndef MEDIAPIPE_UTIL_TRACKING_REGION_FLOW_VISUALIZATION_H_
+#define MEDIAPIPE_UTIL_TRACKING_REGION_FLOW_VISUALIZATION_H_
+
+#include <algorithm>
+#include <cmath>
+#include <deque>
+#include <unordered_map>
+#include <unordered_set>
+#include <utility>
+#include <vector>
+
+#include "mediapipe/framework/port/logging.h"
+#include "mediapipe/framework/port/opencv_core_inc.h"
+#include "mediapipe/framework/port/opencv_imgproc_inc.h"
+#include "mediapipe/framework/port/vector.h"
+#include "mediapipe/util/tracking/motion_models.h"
+#include "mediapipe/util/tracking/region_flow.pb.h"  // NOLINT
+
+namespace mediapipe {
+
+typedef RegionFlowFrame::RegionFlow RegionFlow;
+typedef std::vector<RegionFlowFeature*> RegionFlowFeatureView;
+
+// Visualizes each tracked feature by a line connecting original and tracked
+// points. Uses red for foreground features and blue for background.
+// Output is expected to be a VALID 3 channel 8 bit image, clipping of lines
+// is performed if points are out of bound. Image should be allocated with the
+// same size that was used to compute flow_frame.
+void VisualizeRegionFlow(const RegionFlowFrame& region_flow_frame,
+                         cv::Mat* output);
+
+// Visualizes tracked features by lines connecting original and tracked
+// points. Image is expected to be a VALID 3 channel 8 bit image,
+// clipping of lines is performed if points are out of bound.
+// For color, use for example cv::Scalar(0, 255, 0). If outlier_color != color,
+// each feature is colored by linearly blending the two colors w.r.t. its
+// irls_weight (if irls_visualization is set), otherwise feature descriptor
+// patch variance is used for visualization (color for high variance,
+// outlier_color for low variance).
+// Parameters scale_x and scale_y allow for scaling features and matches
+// uniformly before plotting to output.
+// TODO: Retire irls_visualization.
+void VisualizeRegionFlowFeatures(const RegionFlowFeatureList& feature_list,
+                                 const cv::Scalar& color,
+                                 const cv::Scalar& outlier,
+                                 bool irls_visualization,
+                                 float scale_x,  // use 1.0 for no scaling
+                                 float scale_y,  // ditto.
+                                 cv::Mat* output);
+
+// Similar to above, visualizes tracks as lines over time.
+// Tracks with length smaller than min_track_length are not visualized. Track
+// visualization is limited to max_points_per_track.
+class LongFeatureStream;
+
+void VisualizeLongFeatureStream(const LongFeatureStream& stream,
+                                const cv::Scalar& color,
+                                const cv::Scalar& outlier, int min_track_length,
+                                int max_points_per_track, float scale_x,
+                                float scale_y, cv::Mat* output);
+
+}  // namespace mediapipe
+#endif  // MEDIAPIPE_UTIL_TRACKING_REGION_FLOW_VISUALIZATION_H_
diff --git a/mediapipe/util/tracking/streaming_buffer.cc b/mediapipe/util/tracking/streaming_buffer.cc
new file mode 100644
index 000000000..e089c367f
--- /dev/null
+++ b/mediapipe/util/tracking/streaming_buffer.cc
@@ -0,0 +1,149 @@
+// Copyright 2019 The MediaPipe Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "mediapipe/util/tracking/streaming_buffer.h"
+
+#include "absl/strings/str_cat.h"
+
+namespace mediapipe {
+
+StreamingBuffer::StreamingBuffer(
+    const std::vector<TaggedType>& data_configuration, int overlap)
+    : overlap_(overlap) {
+  CHECK_GE(overlap, 0);
+  for (auto& item : data_configuration) {
+    CHECK(data_config_.find(item.first) == data_config_.end())
+        << "Tag " << item.first << " already exists";
+    data_config_[item.first] = item.second;
+    // Init deque.
+    data_[item.first].clear();
+  }
+}
+
+bool StreamingBuffer::HasTag(const std::string& tag) const {
+  return data_config_.find(tag) != data_config_.end();
+}
+
+bool StreamingBuffer::HasTags(const std::vector<std::string>& tags) const {
+  for (const auto& tag : tags) {
+    if (!HasTag(tag)) {
+      return false;
+    }
+  }
+  return true;
+}
+
+int StreamingBuffer::BufferSize(const std::string& tag) const {
+  CHECK(HasTag(tag));
+  return data_.find(tag)->second.size();
+}
+
+int StreamingBuffer::MaxBufferSize() const {
+  int max_buffer = 0;
+  for (const auto& elem : data_) {
+    max_buffer = std::max(max_buffer, BufferSize(elem.first));
+  }
+  return max_buffer;
+}
+
+bool StreamingBuffer::HaveEqualSize(
+    const std::vector<std::string>& tags) const {
+  if (tags.size() < 2) {
+    return true;
+  }
+  int first_size = BufferSize(tags[0]);
+  for (int k = 1; k < tags.size(); ++k) {
+    if (BufferSize(tags[1]) != first_size) {
+      return false;
+    }
+  }
+  return true;
+}
+
+std::vector<std::string> StreamingBuffer::AllTags() const {
+  std::vector<std::string> all_tags;
+  for (auto& item : data_config_) {
+    all_tags.push_back(item.first);
+  }
+  return all_tags;
+}
+
+bool StreamingBuffer::TruncateBuffer(bool flush) {
+  // Only truncate if sufficient elements have been buffered.
+  const int elems_to_clear =
+      std::max(0, MaxBufferSize() - (flush ? 0 : overlap_));
+
+  if (elems_to_clear == 0) {
+    return true;
+  }
+
+  bool is_consistent = true;
+  for (auto& item : data_) {
+    auto& buffer = item.second;
+    const int buffer_elems_to_clear =
+        std::min<int>(elems_to_clear, buffer.size());
+    if (buffer_elems_to_clear < elems_to_clear) {
+      LOG(WARNING) << "For tag " << item.first << " got "
+                   << elems_to_clear - buffer_elems_to_clear
+                   << "fewer elements than buffer can hold.";
+      is_consistent = false;
+    }
+    buffer.erase(buffer.begin(), buffer.begin() + buffer_elems_to_clear);
+  }
+
+  first_frame_index_ += elems_to_clear;
+
+  const int remaining_elems = flush ? 0 : overlap_;
+  for (auto item : data_) {
+    const auto& buffer = item.second;
+    if (buffer.size() != remaining_elems) {
+      LOG(WARNING) << "After trunctation, for tag " << item.first << "got "
+                   << buffer.size() << " elements, "
+                   << "expected " << remaining_elems;
+      is_consistent = false;
+    }
+  }
+
+  return is_consistent;
+}
+
+void StreamingBuffer::DiscardDatum(const std::string& tag, int num_frames) {
+  CHECK(HasTag(tag));
+  auto& queue = data_[tag];
+  if (queue.empty()) {
+    return;
+  }
+  queue.erase(queue.begin(),
+              queue.begin() + std::min<int>(queue.size(), num_frames));
+}
+
+void StreamingBuffer::DiscardDatumFromEnd(const std::string& tag,
+                                          int num_frames) {
+  CHECK(HasTag(tag));
+  auto& queue = data_[tag];
+  if (queue.empty()) {
+    return;
+  }
+  queue.erase(queue.end() - std::min<int>(queue.size(), num_frames),
+              queue.end());
+}
+
+void StreamingBuffer::DiscardData(const std::vector<std::string>& tags,
+                                  int num_frames) {
+  for (const std::string& tag : tags) {
+    DiscardDatum(tag, num_frames);
+  }
+}
+
+}  // namespace mediapipe
diff --git a/mediapipe/util/tracking/streaming_buffer.h b/mediapipe/util/tracking/streaming_buffer.h
new file mode 100644
index 000000000..c2be024f4
--- /dev/null
+++ b/mediapipe/util/tracking/streaming_buffer.h
@@ -0,0 +1,519 @@
+// Copyright 2019 The MediaPipe Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef MEDIAPIPE_UTIL_TRACKING_STREAMING_BUFFER_H_
+#define MEDIAPIPE_UTIL_TRACKING_STREAMING_BUFFER_H_
+
+#include <deque>
+#include <memory>
+#include <string>
+#include <tuple>
+#include <utility>
+#include <vector>
+
+#include "absl/container/node_hash_map.h"
+#include "absl/types/any.h"
+#include "mediapipe/framework/port/logging.h"
+
+namespace mediapipe {
+
+// Streaming buffer to store arbitrary data over a chunk of frames with overlap
+// between chunks.
+// Useful to compute solutions that require as input buffered inputs I_ij, of
+// type T_j for all frames i of a chunk. Output solutions S_ik of type T_k
+// for each frame i (T_k is not associated with input types T_i in any way)
+// can than be stored in the buffer as well and made available to the next
+// chunk.
+// After solution S_ik has been computed, buffered results (I_ij, S_ik) can be
+// output and buffer is truncated to discard all elements minus the overlap.
+// Remaining elements (I_ij, S_ik) form the basis for the next chunk.
+// Again, arbitrary input data I_ij is buffered until buffer is full.
+// However, previous solutions S_ik from the overlap still remain in the buffer
+// and can be used to compute new solutions in a temporally constrained manner.
+
+// Detailed usage example:
+// Setup streaming buffer, that will buffer cv::Mat's and AffineModel's over a
+// set of 100 frames. Some algorithm will then compute for each input pair the
+// foreground saliency in the form of a SaliencyPointList proto.
+//
+// // Prepare streaming buffer with 2 inputs and one solution, i.e. 3 different
+// // data types.
+// vector<TaggedType> data_config = {
+//     TaggedPointerType<cv::Mat>("frame"),
+//     TaggedPointerType<AffineModel>("motion"),
+//     TaggedPointerType<SaliencyPointList>("saliency") };
+//
+// // Create new buffer with overlap of 10 frames between chunks.
+// StreamingBuffer streaming_buffer(data_config, 10);
+//
+// // Buffer inputs.
+// for (int k = 0; k < 100; ++k) {
+//   std::unique_ptr<cv::Mat> input_frame = ...
+//   std::unique_ptr<AffineModel> affine_model = ...
+//   streaming_buffer.AddDatum("frame", std::move(input_frame));
+//   streaming_buffer.AddDatum("motion", std::move(affine_model);
+//   // OR:
+//   streaming_buffer.AddData({"frame", "motion"},
+//                              std::move(input_frame),
+//                              std::move(affine_model));
+//  // Note: WrapUnique from util::gtl is imported into this namespace when
+//  // including this file.
+//
+//  // Get maximum buffer size.
+//  int buffer_size = streaming_buffer.MaxBufferSize("frame");
+//
+//  // Reached chunk boundary?
+//  if (buffer_size == 100) {
+//    // Check that we buffered one frame for each motion.
+//    CHECK(streaming_buffer.HaveEqualSize({"frame", "motion"}));
+//
+//    // Compute saliency.
+//    for (int k = 0; k < 100; ++k) {
+//      cv::Mat& frame = streaming_buffer.GetDatum<cv::Mat>("frame", k);
+//      AffineModel* model = streaming_buffer.GetMutableDatum<AffineModel>(
+//          "motion", k);
+//
+//      // OR:
+//      std::tuple<cv::Mat*, AffineModel*> data =
+//          streaming_buffer.GetMutableData({"frame", "motion"}, k);
+//
+//      // Resulting saliency.
+//      std::unique_ptr<SaliencyPointList> saliency(new SaliencyPointList);
+//
+//      // Some Computation using frame and model from above ...
+//
+//      // Store computed saliency.
+//      streaming_buffer.AddDatum("saliency", saliency.release());
+//    }
+//
+//    // Output elements, transfers ownership.
+//    streaming_buffer.OutputDatum<cv::Mat>(
+//      false /*is_flush*/, "frame", [](int frame_idx,
+//                                      std::unique_ptr<cv::Mat> frame) {
+//    });
+//
+//    // OR:
+//    streaming_buffer.OutputData(/*is_flush*/, {"frame", "motion", "saliency"},
+//      [](int frame_idx,
+//         std::unique_ptr<cv::Mat> frame,
+//         std::unique_ptr<AffineModel> model,
+//         std::unique_ptr<SaliencyPointList> saliency) {
+//    });
+//
+//    // Truncate buffers for next chunk.
+//    streaming_buffer.TruncateBuffer(false /* is_flush */);
+//
+//    // End chunk boundary processing.
+//  }
+
+// Stores pair (tag, TypeId of type).
+typedef std::pair<std::string, size_t> TaggedType;
+
+// Returns TaggedType for type T* tagged with passed std::string.
+template <class T>
+TaggedType TaggedPointerType(const std::string& tag);
+
+// Helper function to create unique_ptr's until std::make_unique is allowed.
+template <class T>
+std::unique_ptr<T> MakeUnique(T* ptr);
+
+template <typename Type>
+inline size_t TypeId() {
+  static_assert(sizeof(char*) <= sizeof(size_t),
+                "ptr size too large for size_t");
+  static char dummy_var;
+  return reinterpret_cast<size_t>(&dummy_var);
+}
+
+// Note: If any of the function below are called with a tag not registered by
+// the constructor, the function will fail with CHECK.
+// Also, if any of the functions below is called with an existing tag but
+// incompatible type, the function will fail with CHECK.
+class StreamingBuffer {
+ public:
+  // Constructs a new buffer with passed mappings (TAG_NAME, DATA_TYPE) of
+  // maximum size buffer_size.
+  // Data_configuration must have unique tag for each type.
+  StreamingBuffer(const std::vector<TaggedType>& data_configuration,
+                  int overlap);
+
+  // Call will transfer ownership to StreamingBuffer.
+  // Returns true if datum was successfully stored, false otherwise.
+  template <class T>
+  void AddDatum(const std::string& tag, std::unique_ptr<T> pointer);
+
+  // Same as above but forwards pointer T* to a unique_ptr<T>.
+  // Transfers ownership.
+  template <class T>
+  void EmplaceDatum(const std::string& tag, T* pointer);
+
+  // Creates a deep copy and stores it in the StreamingBuffer.
+  template <class T>
+  void AddDatumCopy(const std::string& tag, const T& datum);
+
+  // Convenience function to store multiple pointers at once.
+  // Returns true if all data was successfully stored, false if at least one
+  // failed.
+  template <class... Types>
+  void AddData(const std::vector<std::string>& tags,
+               std::unique_ptr<Types>... pointers);
+
+  // Convenience function to add a whole vector of objects to the buffer.
+  // For each datum a copy will be created.
+  template <class T>
+  void AddDatumVector(const std::string& tag, const std::vector<T>& datum_vec);
+
+  // Retrieves datum with specified tag and frame index. Returns nullptr if
+  // datum does not exist.
+  template <class T>
+  const T* GetDatum(const std::string& tag, int frame_index) const;
+
+  // Gets a reference on the datum.
+  template <class T>
+  T& GetDatumRef(const std::string& tag, int frame_index) const;
+
+  // Same as above for mutable pointer.
+  template <class T>
+  T* GetMutableDatum(const std::string& tag, int frame_index) const;
+
+  // Access all elements for a tag as vector, recommended usage:
+  // for (auto ptr : buffer.GetDatumVector<SomeType>("some_tag")) { ... }
+  template <class T>
+  std::vector<const T*> GetDatumVector(const std::string& tag) const;
+  template <class T>
+  std::vector<T*> GetMutableDatumVector(const std::string& tag) const;
+
+  // Gets a vector of references.
+  template <class T>
+  std::vector<std::reference_wrapper<T>> GetReferenceVector(
+      const std::string& tag) const;
+
+  template <class T>
+  std::vector<std::reference_wrapper<const T>> GetConstReferenceVector(
+      const std::string& tag) const;
+
+  // Returns number of buffered inputs for specified tag.
+  int BufferSize(const std::string& tag) const;
+
+  // Returns maximum over all tags.
+  int MaxBufferSize() const;
+
+  // Returns true if the buffers for all passed tags have equal size.
+  // Call with HaveEqualSize(AllTags()) to check if all buffers have equal size.
+  bool HaveEqualSize(const std::vector<std::string>& tags) const;
+
+  // Check if all items buffered for the specified tag are initialized (i.e. not
+  // equal to nullptr).
+  template <class T>
+  bool IsInitialized(const std::string& tag) const;
+
+  // Returns all tags.
+  std::vector<std::string> AllTags() const;
+
+  // Output function to be called after output values have been computed from
+  // input values and stored in the buffer as well.
+  // Use to transfer ownership for buffered content out of the streaming buffer,
+  // by iteratively calling an output functor for each frame.
+  //
+  // Specifically, functor is repeatedly called with:
+  // void operator()(int frame_index, std::unique_ptr<T> pointer)
+  //
+  // If flush is set, functor will be called with all frames in the half-open
+  // interval [0, MaxBufferSize()), otherwise interval is limited to
+  // [0, MaxBufferSize() - overlap).
+  // Note that the interval is independent of the tag, i.e. for consistency
+  // the interval is constant for all tags (if items are not buffered or
+  // exists a nullptr is passed instead).
+  // Ownership is transferred to caller via unique_ptr.
+  // Element storred in buffer is reset to nullptr.
+  // Note: Does not truncate the actual buffer, use TruncateBuffer after
+  // outputting all desired data.
+  template <class T, class Functor>
+  void OutputDatum(bool flush, const std::string& tag, const Functor& functor);
+
+  // Same as above for multiple elements.
+  // Functor is repeatedly called with:
+  // void operator()(int frame_index, std::unique_ptr<Pointers>... pointers)
+  template <class... Pointers, class Functor>
+  void OutputData(bool flush, const std::vector<std::string>& tags,
+                  const Functor& functor);
+
+  // Releases and returns the input at the specified tag and frame_index.
+  // Returns nullptr if datum does not exists.
+  // Element stored in buffer is reset to nullptr.
+  // It is recommended that above OutputFunctions are used instead.
+  template <class T>
+  std::unique_ptr<T> ReleaseDatum(const std::string& tag, int frame_index);
+
+  // Truncates the buffer by discarding all its elements within the interval
+  // [0, MaxBufferSize() - overlap] if flush is set to false, or
+  // [0, MaxBufferSize()] otherwise.
+  // Should be called after chunk boundary has been reached, computation of
+  // outputs from inputs is done and data has been Output via
+  // Output[Datum|Data].
+  // Returns true if each element within the truncated interval exists,
+  // and all buffers have the same number of remaining elements
+  // (#overlap if flush is false, zero otherwise).
+  bool TruncateBuffer(bool flush);
+
+  // Discards first num_frames of data for specified tag.
+  // This is different from ReleaseDatum, as it will shrink the buffer for
+  // the specified tag.
+  void DiscardDatum(const std::string& tag, int num_frames);
+
+  /// Same as above, but removes num_frames items from the end of the buffer.
+  void DiscardDatumFromEnd(const std::string& tag, int num_frames);
+
+  // Same as above for a list of tags.
+  void DiscardData(const std::vector<std::string>& tags, int num_frames);
+
+  // Returns true if tag exist.
+  bool HasTag(const std::string& tag) const;
+  // Returns true if all passed tags exist.
+  bool HasTags(const std::vector<std::string>& tags) const;
+
+  // Returns frame index of the first item in the buffer. This is useful to
+  // determine how many frames in total went through the buffer after several
+  // *successfull* calls of Truncate().
+  int FirstFrameIndex() const { return first_frame_index_; }
+
+  template <class T>
+  using PointerType = std::shared_ptr<std::unique_ptr<T>>;
+  template <class T>
+  PointerType<T> CreatePointer(T* t);
+
+ private:
+  // Implementation function with aquiring ownership in case failure while
+  // adding single argument pointer occurs.
+  template <class Pointer, class... Pointers>
+  void AddDataImpl(const std::vector<std::string>& tags,
+                   std::unique_ptr<Pointer> pointer,
+                   std::unique_ptr<Pointers>... pointers);
+  // Terminates recursive template expansion for AddDataImpl. Will never be
+  // called.
+  void AddDataImpl(const std::vector<std::string>& tags) {
+    CHECK(tags.empty());
+  }
+
+ private:
+  int overlap_ = 0;
+  int first_frame_index_ = 0;
+  absl::node_hash_map<std::string, std::deque<absl::any>> data_;
+
+  // Stores tag, TypeId of corresponding type.
+  absl::node_hash_map<std::string, size_t> data_config_;
+};
+
+//// Implementation details.
+template <class T>
+TaggedType TaggedPointerType(const std::string& tag) {
+  return std::make_pair(tag, TypeId<StreamingBuffer::PointerType<T>>());
+}
+
+template <class T>
+StreamingBuffer::PointerType<T> StreamingBuffer::CreatePointer(T* t) {
+  return PointerType<T>(new std::unique_ptr<T>(t));
+}
+
+template <class T>
+void StreamingBuffer::AddDatum(const std::string& tag,
+                               std::unique_ptr<T> pointer) {
+  CHECK(HasTag(tag));
+  CHECK_EQ(data_config_[tag], TypeId<PointerType<T>>());
+  auto& buffer = data_[tag];
+  absl::any packet(PointerType<T>(CreatePointer(pointer.release())));
+  buffer.push_back(packet);
+}
+
+template <class T>
+void StreamingBuffer::EmplaceDatum(const std::string& tag, T* pointer) {
+  std::unique_ptr<T> forwarded(pointer);
+  AddDatum(tag, std::move(forwarded));
+}
+
+template <class T>
+void StreamingBuffer::AddDatumCopy(const std::string& tag, const T& datum) {
+  AddDatum(tag, std::unique_ptr<T>(new T(datum)));
+}
+
+template <class... Types>
+void StreamingBuffer::AddData(const std::vector<std::string>& tags,
+                              std::unique_ptr<Types>... pointers) {
+  CHECK_EQ(tags.size(), sizeof...(pointers))
+      << "Number of tags and data pointers is inconsistent";
+  return AddDataImpl(tags, std::move(pointers)...);
+}
+
+template <class T>
+void StreamingBuffer::AddDatumVector(const std::string& tag,
+                                     const std::vector<T>& datum_vec) {
+  for (const auto& datum : datum_vec) {
+    AddDatumCopy(tag, datum);
+  }
+}
+
+template <class Pointer, class... Pointers>
+void StreamingBuffer::AddDataImpl(const std::vector<std::string>& tags,
+                                  std::unique_ptr<Pointer> pointer,
+                                  std::unique_ptr<Pointers>... pointers) {
+  AddDatum(tags[0], std::move(pointer));
+  if (sizeof...(pointers) > 0) {
+    return AddDataImpl(std::vector<std::string>(tags.begin() + 1, tags.end()),
+                       std::move(pointers)...);
+  }
+}
+
+template <class T>
+std::unique_ptr<T> MakeUnique(T* t) {
+  return std::unique_ptr<T>(t);
+}
+
+template <class T>
+const T* StreamingBuffer::GetDatum(const std::string& tag,
+                                   int frame_index) const {
+  return GetMutableDatum<T>(tag, frame_index);
+}
+
+template <class T>
+T& StreamingBuffer::GetDatumRef(const std::string& tag, int frame_index) const {
+  return *GetMutableDatum<T>(tag, frame_index);
+}
+
+template <class T>
+T* StreamingBuffer::GetMutableDatum(const std::string& tag,
+                                    int frame_index) const {
+  CHECK_GE(frame_index, 0);
+  CHECK(HasTag(tag));
+  auto& buffer = data_.find(tag)->second;
+  if (frame_index > buffer.size()) {
+    return nullptr;
+  } else {
+    const absl::any& packet = buffer[frame_index];
+    if (absl::any_cast<PointerType<T>>(&packet) == nullptr) {
+      LOG(ERROR) << "Stored item is not of requested type. "
+                 << "Check data configuration.";
+      return nullptr;
+    }
+
+    // Unpack and return.
+    const PointerType<T>& pointer =
+        *absl::any_cast<const PointerType<T>>(&packet);
+    return pointer->get();
+  }
+}
+
+template <class T>
+std::vector<const T*> StreamingBuffer::GetDatumVector(
+    const std::string& tag) const {
+  std::vector<T*> result(GetMutableDatumVector<T>(tag));
+  return std::vector<const T*>(result.begin(), result.end());
+}
+
+template <class T>
+std::vector<std::reference_wrapper<T>> StreamingBuffer::GetReferenceVector(
+    const std::string& tag) const {
+  std::vector<T*> ptrs(GetMutableDatumVector<T>(tag));
+  std::vector<std::reference_wrapper<T>> refs;
+  refs.reserve(ptrs.size());
+  for (T* ptr : ptrs) {
+    refs.push_back(std::ref(*ptr));
+  }
+  return refs;
+}
+
+template <class T>
+std::vector<std::reference_wrapper<const T>>
+StreamingBuffer::GetConstReferenceVector(const std::string& tag) const {
+  std::vector<const T*> ptrs(GetDatumVector<T>(tag));
+  std::vector<std::reference_wrapper<const T>> refs;
+  refs.reserve(ptrs.size());
+  for (const T* ptr : ptrs) {
+    refs.push_back(std::cref(*ptr));
+  }
+  return refs;
+}
+
+template <class T>
+bool StreamingBuffer::IsInitialized(const std::string& tag) const {
+  CHECK(HasTag(tag));
+  const auto& buffer = data_.find(tag)->second;
+  int idx = 0;
+  for (const auto& item : buffer) {
+    const PointerType<T>* pointer = absl::any_cast<const PointerType<T>>(&item);
+    CHECK(pointer != nullptr);
+    if (*pointer == nullptr) {
+      LOG(ERROR) << "Data for " << tag << " at frame " << idx
+                 << " is not initialized.";
+      return false;
+    }
+  }
+  return true;
+}
+
+template <class T>
+std::vector<T*> StreamingBuffer::GetMutableDatumVector(
+    const std::string& tag) const {
+  CHECK(HasTag(tag));
+  auto& buffer = data_.find(tag)->second;
+  std::vector<T*> result;
+  for (const auto& packet : buffer) {
+    if (absl::any_cast<PointerType<T>>(&packet) == nullptr) {
+      LOG(ERROR) << "Stored item is not of requested type. "
+                 << "Check data configuration.";
+      result.push_back(nullptr);
+    } else {
+      result.push_back(
+          absl::any_cast<const PointerType<T>>(&packet)->get()->get());
+    }
+  }
+  return result;
+}
+
+template <class T, class Functor>
+void StreamingBuffer::OutputDatum(bool flush, const std::string& tag,
+                                  const Functor& functor) {
+  CHECK(HasTag(tag));
+  const int end_frame = MaxBufferSize() - (flush ? 0 : overlap_);
+  for (int k = 0; k < end_frame; ++k) {
+    functor(k, ReleaseDatum<T>(tag, k));
+  }
+}
+
+template <class T>
+std::unique_ptr<T> StreamingBuffer::ReleaseDatum(const std::string& tag,
+                                                 int frame_index) {
+  CHECK(HasTag(tag));
+  CHECK_GE(frame_index, 0);
+
+  auto& buffer = data_.find(tag)->second;
+  if (frame_index >= buffer.size()) {
+    return nullptr;
+  } else {
+    const absl::any& packet = buffer[frame_index];
+    if (absl::any_cast<PointerType<T>>(&packet) == nullptr) {
+      LOG(ERROR) << "Stored item is not of requested type. "
+                 << "Check data configuration.";
+      return nullptr;
+    }
+
+    // Unpack and return.
+    const PointerType<T>& pointer =
+        *absl::any_cast<const PointerType<T>>(&packet);
+    return std::move(*pointer);
+  }
+}
+
+}  // namespace mediapipe
+
+#endif  // MEDIAPIPE_UTIL_TRACKING_STREAMING_BUFFER_H_
diff --git a/mediapipe/util/tracking/testdata/box_tracker/chunk_0000 b/mediapipe/util/tracking/testdata/box_tracker/chunk_0000
new file mode 100644
index 000000000..3da0a3784
Binary files /dev/null and b/mediapipe/util/tracking/testdata/box_tracker/chunk_0000 differ
diff --git a/mediapipe/util/tracking/testdata/box_tracker/chunk_0001 b/mediapipe/util/tracking/testdata/box_tracker/chunk_0001
new file mode 100644
index 000000000..dddde7033
Binary files /dev/null and b/mediapipe/util/tracking/testdata/box_tracker/chunk_0001 differ
diff --git a/mediapipe/util/tracking/testdata/box_tracker/chunk_0002 b/mediapipe/util/tracking/testdata/box_tracker/chunk_0002
new file mode 100644
index 000000000..30fe6cfd5
Binary files /dev/null and b/mediapipe/util/tracking/testdata/box_tracker/chunk_0002 differ
diff --git a/mediapipe/util/tracking/testdata/box_tracker/chunk_0003 b/mediapipe/util/tracking/testdata/box_tracker/chunk_0003
new file mode 100644
index 000000000..5e481e56d
Binary files /dev/null and b/mediapipe/util/tracking/testdata/box_tracker/chunk_0003 differ
diff --git a/mediapipe/util/tracking/testdata/box_tracker/chunk_0004 b/mediapipe/util/tracking/testdata/box_tracker/chunk_0004
new file mode 100644
index 000000000..b6d55e3d8
Binary files /dev/null and b/mediapipe/util/tracking/testdata/box_tracker/chunk_0004 differ
diff --git a/mediapipe/util/tracking/testdata/box_tracker/chunk_0005 b/mediapipe/util/tracking/testdata/box_tracker/chunk_0005
new file mode 100644
index 000000000..3a4609500
Binary files /dev/null and b/mediapipe/util/tracking/testdata/box_tracker/chunk_0005 differ
diff --git a/mediapipe/util/tracking/testdata/box_tracker/chunk_0006 b/mediapipe/util/tracking/testdata/box_tracker/chunk_0006
new file mode 100644
index 000000000..3992696d5
Binary files /dev/null and b/mediapipe/util/tracking/testdata/box_tracker/chunk_0006 differ
diff --git a/mediapipe/util/tracking/testdata/stabilize_test.png b/mediapipe/util/tracking/testdata/stabilize_test.png
new file mode 100644
index 000000000..239a3ff9e
Binary files /dev/null and b/mediapipe/util/tracking/testdata/stabilize_test.png differ
diff --git a/mediapipe/util/tracking/tone_estimation.cc b/mediapipe/util/tracking/tone_estimation.cc
new file mode 100644
index 000000000..fd6e8edfe
--- /dev/null
+++ b/mediapipe/util/tracking/tone_estimation.cc
@@ -0,0 +1,377 @@
+// Copyright 2019 The MediaPipe Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "mediapipe/util/tracking/tone_estimation.h"
+
+#include <math.h>
+
+#include <algorithm>
+#include <cmath>
+#include <numeric>
+#include <vector>
+
+#include "mediapipe/util/tracking/motion_models.pb.h"
+#include "mediapipe/util/tracking/tone_models.pb.h"
+
+namespace mediapipe {
+
+ToneEstimation::ToneEstimation(const ToneEstimationOptions& options,
+                               int frame_width, int frame_height)
+    : options_(options),
+      frame_width_(frame_width),
+      frame_height_(frame_height),
+      original_width_(frame_width),
+      original_height_(frame_height) {
+  switch (options_.downsample_mode()) {
+    case ToneEstimationOptions::DOWNSAMPLE_NONE:
+      break;
+    case ToneEstimationOptions::DOWNSAMPLE_TO_MAX_SIZE: {
+      const float max_size = std::max(frame_width_, frame_height_);
+      if (max_size > 1.03f * options_.downsampling_size()) {
+        downsample_scale_ = max_size / options_.downsampling_size();
+        frame_height_ /= downsample_scale_;
+        frame_width_ /= downsample_scale_;
+        use_downsampling_ = true;
+      }
+      break;
+    }
+    case ToneEstimationOptions::DOWNSAMPLE_TO_MIN_SIZE: {
+      const float min_size = std::min(frame_width_, frame_height_);
+      if (min_size > 1.03f * options_.downsampling_size()) {
+        downsample_scale_ = min_size / options_.downsampling_size();
+        frame_height_ /= downsample_scale_;
+        frame_width_ /= downsample_scale_;
+        use_downsampling_ = true;
+      }
+
+      break;
+    }
+    case ToneEstimationOptions::DOWNSAMPLE_BY_FACTOR: {
+      CHECK_GE(options_.downsample_factor(), 1);
+      frame_width_ /= options_.downsample_factor();
+      frame_height_ /= options_.downsample_factor();
+      downsample_scale_ = options_.downsample_factor();
+      use_downsampling_ = true;
+      break;
+    }
+  }
+
+  if (use_downsampling_) {
+    resized_input_.reset(new cv::Mat(frame_height_, frame_width_, CV_8UC3));
+    prev_resized_input_.reset(
+        new cv::Mat(frame_height_, frame_width_, CV_8UC3));
+  }
+}
+
+ToneEstimation::~ToneEstimation() {}
+
+void ToneEstimation::EstimateToneChange(
+    const RegionFlowFeatureList& feature_list_input,
+    const cv::Mat& curr_frame_input, const cv::Mat* prev_frame_input,
+    ToneChange* tone_change, cv::Mat* debug_output) {
+  CHECK_EQ(original_height_, curr_frame_input.rows);
+  CHECK_EQ(original_width_, curr_frame_input.cols);
+  CHECK(tone_change != nullptr);
+
+  const cv::Mat& curr_frame =
+      use_downsampling_ ? *resized_input_ : curr_frame_input;
+  const cv::Mat* prev_frame = (use_downsampling_ && prev_frame_input)
+                                  ? prev_resized_input_.get()
+                                  : prev_frame_input;
+
+  RegionFlowFeatureList scaled_feature_list;
+  const RegionFlowFeatureList& feature_list =
+      use_downsampling_ ? scaled_feature_list : feature_list_input;
+
+  if (use_downsampling_) {
+    cv::resize(curr_frame_input, *resized_input_, resized_input_->size());
+    if (prev_frame_input) {
+      cv::resize(*prev_frame_input, *prev_resized_input_,
+                 prev_resized_input_->size());
+    }
+    LinearSimilarityModel scale_transform;
+    scale_transform.set_a(1.0f / downsample_scale_);
+    scaled_feature_list = feature_list_input;
+    TransformRegionFlowFeatureList(scale_transform, &scaled_feature_list);
+  }
+
+  CHECK_EQ(frame_height_, curr_frame.rows);
+  CHECK_EQ(frame_width_, curr_frame.cols);
+
+  ClipMask<3> curr_clip;
+  ComputeClipMask<3>(options_.clip_mask_options(), curr_frame, &curr_clip);
+
+  // Compute tone statistics.
+  tone_change->set_frac_clipped(cv::sum(curr_clip.mask)[0] /
+                                (frame_height_ * frame_width_));
+
+  IntensityPercentiles(curr_frame, curr_clip.mask,
+                       options_.tone_match_options().log_domain(), tone_change);
+
+  ColorToneMatches color_tone_matches;
+  // TODO: Buffer clip mask.
+  if (prev_frame) {
+    ClipMask<3> prev_clip;
+    ComputeClipMask<3>(options_.clip_mask_options(), *prev_frame, &prev_clip);
+    ComputeToneMatches<3>(options_.tone_match_options(), feature_list,
+                          curr_frame, *prev_frame, curr_clip, prev_clip,
+                          &color_tone_matches, debug_output);
+
+    EstimateGainBiasModel(options_.irls_iterations(), &color_tone_matches,
+                          tone_change->mutable_gain_bias());
+
+    if (!IsStableGainBiasModel(options_.stable_gain_bias_bounds(),
+                               tone_change->gain_bias(), color_tone_matches,
+                               tone_change->mutable_stability_stats())) {
+      VLOG(1) << "Warning: Estimated gain-bias is unstable.";
+      // Reset to identity.
+      tone_change->mutable_gain_bias()->CopyFrom(GainBiasModel());
+      tone_change->set_type(ToneChange::INVALID);
+    }
+
+    // TODO: EstimateMixtureGainBiasModel();
+  }
+}
+
+void ToneEstimation::IntensityPercentiles(const cv::Mat& frame,
+                                          const cv::Mat& clip_mask,
+                                          bool log_domain,
+                                          ToneChange* tone_change) const {
+  cv::Mat intensity(frame.rows, frame.cols, CV_8UC1);
+  cv::cvtColor(frame, intensity, CV_RGB2GRAY);
+
+  std::vector<float> histogram(256, 0.0f);
+
+  for (int i = 0; i < intensity.rows; ++i) {
+    const uint8* intensity_ptr = intensity.ptr<uint8>(i);
+    const uint8* clip_ptr = clip_mask.ptr<uint8>(i);
+
+    for (int j = 0; j < intensity.cols; ++j) {
+      if (!clip_ptr[j]) {
+        ++histogram[intensity_ptr[j]];
+      }
+    }
+  }
+
+  // Construct cumulative histogram.
+  std::partial_sum(histogram.begin(), histogram.end(), histogram.begin());
+
+  // Normalize histogram.
+  const float histogram_sum = histogram.back();
+  if (histogram_sum == 0) {
+    // Frame is of solid color. Use default values.
+    return;
+  }
+
+  const float denom = 1.0f / histogram_sum;
+  for (auto& entry : histogram) {
+    entry *= denom;
+  }
+
+  std::vector<float> percentiles;
+  percentiles.push_back(options_.stats_low_percentile());
+  percentiles.push_back(options_.stats_low_mid_percentile());
+  percentiles.push_back(options_.stats_mid_percentile());
+  percentiles.push_back(options_.stats_high_mid_percentile());
+  percentiles.push_back(options_.stats_high_percentile());
+
+  std::vector<float> percentile_values(percentiles.size());
+
+  const float log_denom = 1.0f / LogDomainLUT().MaxLogDomainValue();
+  for (int k = 0; k < percentile_values.size(); ++k) {
+    const int percentile_bin =
+        std::lower_bound(histogram.begin(), histogram.end(), percentiles[k]) -
+        histogram.begin();
+    percentile_values[k] = percentile_bin;
+    if (log_domain) {
+      percentile_values[k] =
+          LogDomainLUT().Map(percentile_values[k]) * log_denom;
+    } else {
+      percentile_values[k] *= (1.0f / 255.0f);
+    }
+  }
+
+  tone_change->set_low_percentile(percentile_values[0]);
+  tone_change->set_low_mid_percentile(percentile_values[1]);
+  tone_change->set_mid_percentile(percentile_values[2]);
+  tone_change->set_high_mid_percentile(percentile_values[3]);
+  tone_change->set_high_percentile(percentile_values[4]);
+}
+
+void ToneEstimation::EstimateGainBiasModel(int irls_iterations,
+                                           ColorToneMatches* color_tone_matches,
+                                           GainBiasModel* gain_bias_model) {
+  CHECK(color_tone_matches != nullptr);
+  CHECK(gain_bias_model != nullptr);
+
+  // Effectively estimate each model independently.
+  float solution_ptr[6] = {1.0f, 0.0f, 1.0f, 0.0f, 1.0f, 0.0f};
+
+  const int num_channels = color_tone_matches->size();
+  CHECK_GT(num_channels, 0);
+  CHECK_LE(num_channels, 3);
+
+  // TODO: One IRLS weight per color match.
+  for (int c = 0; c < num_channels; ++c) {
+    std::deque<PatchToneMatch>& patch_tone_matches = (*color_tone_matches)[c];
+    // Reset irls weight.
+    int num_matches = 0;
+    for (auto& patch_tone_match : patch_tone_matches) {
+      patch_tone_match.set_irls_weight(1.0);
+      num_matches += patch_tone_match.tone_match_size();
+    }
+
+    // Do not attempt solution if not matches have been found.
+    if (num_matches < 3) {
+      continue;
+    }
+
+    cv::Mat model_mat(num_matches, 2, CV_32F);
+    cv::Mat rhs(num_matches, 1, CV_32F);
+    cv::Mat solution(2, 1, CV_32F);
+
+    for (int iteration = 0; iteration < irls_iterations; ++iteration) {
+      // Setup matrix.
+      int row = 0;
+      for (const auto& patch_tone_match : patch_tone_matches) {
+        const float irls_weight = patch_tone_match.irls_weight();
+        for (const auto& tone_match : patch_tone_match.tone_match()) {
+          float* row_ptr = model_mat.ptr<float>(row);
+          float* rhs_ptr = rhs.ptr<float>(row);
+          row_ptr[0] = tone_match.curr_val() * irls_weight;
+          row_ptr[1] = irls_weight;
+          rhs_ptr[0] = tone_match.prev_val() * irls_weight;
+          ++row;
+        }
+      }
+
+      // Solve.
+      if (!cv::solve(model_mat, rhs, solution, cv::DECOMP_QR)) {
+        // Fallback to identity.
+        solution_ptr[2 * c] = 1;
+        solution_ptr[2 * c + 1] = 0;
+        break;  // Break to next color channel.
+      }
+
+      float a = solution.at<float>(0, 0);
+      float b = solution.at<float>(1, 0);
+
+      // Copy to solution.
+      solution_ptr[2 * c] = a;
+      solution_ptr[2 * c + 1] = b;
+
+      // Evaluate error.
+      for (auto& patch_tone_match : patch_tone_matches) {
+        const int num_tone_matches = patch_tone_match.tone_match_size();
+
+        if (num_tone_matches == 0) {
+          continue;
+        }
+
+        float summed_error = 0.0f;
+        for (const auto& tone_match : patch_tone_match.tone_match()) {
+          // Express tone registration error in 0 .. 100.
+          const float error =
+              100.0f * (tone_match.curr_val() * a + b - tone_match.prev_val());
+          summed_error += error * error;
+        }
+
+        // Compute RMSE.
+        const float patch_error =
+            std::sqrt(static_cast<double>(summed_error / num_tone_matches));
+        // TODO: L1 instead of L0?
+        patch_tone_match.set_irls_weight(1.0f / (patch_error + 1e-6f));
+      }
+    }
+  }
+
+  gain_bias_model->CopyFrom(
+      GainBiasModelAdapter::FromPointer<float>(solution_ptr, false));
+
+  // Test invertability, reset if failed.
+  const float det = gain_bias_model->gain_c1() * gain_bias_model->gain_c2() *
+                    gain_bias_model->gain_c3();
+  if (fabs(det) < 1e-6f) {
+    LOG(WARNING) << "Estimated gain bias model is not invertible. "
+                 << "Falling back to identity model.";
+    gain_bias_model->CopyFrom(GainBiasModel());
+  }
+}
+
+bool ToneEstimation::IsStableGainBiasModel(
+    const ToneEstimationOptions::GainBiasBounds& bounds,
+    const GainBiasModel& model, const ColorToneMatches& color_tone_matches,
+    ToneChange::StabilityStats* stats) {
+  if (stats != nullptr) {
+    stats->Clear();
+  }
+
+  // Test each channel for stability.
+  if (model.gain_c1() < bounds.lower_gain() ||
+      model.gain_c1() > bounds.upper_gain() ||
+      model.bias_c1() < bounds.lower_bias() ||
+      model.bias_c1() > bounds.upper_bias()) {
+    return false;
+  }
+
+  if (model.gain_c2() < bounds.lower_gain() ||
+      model.gain_c2() > bounds.upper_gain() ||
+      model.bias_c2() < bounds.lower_bias() ||
+      model.bias_c2() > bounds.upper_bias()) {
+    return false;
+  }
+
+  if (model.gain_c3() < bounds.lower_gain() ||
+      model.gain_c3() > bounds.upper_gain() ||
+      model.bias_c3() < bounds.lower_bias() ||
+      model.bias_c3() > bounds.upper_bias()) {
+    return false;
+  }
+
+  // Test each channel independently.
+  int total_inliers = 0;
+  int total_tone_matches = 0;
+  double total_inlier_weight = 0.0;
+  for (const auto& patch_tone_matches : color_tone_matches) {
+    int num_inliers = 0;
+    for (const auto& patch_tone_match : patch_tone_matches) {
+      if (patch_tone_match.irls_weight() > bounds.min_inlier_weight()) {
+        ++num_inliers;
+        // Clamp the weight to a registration error of 1 intensity value
+        // difference (out of 255). Since weight are inversely proportional to
+        // registration errors in the range 0..100, this corresponds to a max
+        // weight of 2.55.
+        total_inlier_weight += std::min(2.55f, patch_tone_match.irls_weight());
+      }
+    }
+
+    if (num_inliers <
+        bounds.min_inlier_fraction() * patch_tone_matches.size()) {
+      return false;
+    }
+
+    total_inliers += num_inliers;
+    total_tone_matches += patch_tone_matches.size();
+  }
+
+  if (stats != nullptr && total_tone_matches > 0) {
+    stats->set_num_inliers(total_inliers);
+    stats->set_inlier_fraction(total_inliers * 1.0f / total_tone_matches);
+    stats->set_inlier_weight(total_inlier_weight);
+  }
+
+  return true;
+}
+
+}  // namespace mediapipe
diff --git a/mediapipe/util/tracking/tone_estimation.h b/mediapipe/util/tracking/tone_estimation.h
new file mode 100644
index 000000000..0fa049e2b
--- /dev/null
+++ b/mediapipe/util/tracking/tone_estimation.h
@@ -0,0 +1,409 @@
+// Copyright 2019 The MediaPipe Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Fits tone models to intensity matches
+// (gathered from order statistics of matching patches supplied by
+// RegionFlowFeatureList's).
+
+#ifndef MEDIAPIPE_UTIL_TRACKING_TONE_ESTIMATION_H_
+#define MEDIAPIPE_UTIL_TRACKING_TONE_ESTIMATION_H_
+
+#include <algorithm>
+#include <cmath>
+#include <deque>
+#include <memory>
+#include <vector>
+
+#include "mediapipe/framework/port/integral_types.h"
+#include "mediapipe/framework/port/logging.h"
+#include "mediapipe/framework/port/opencv_core_inc.h"
+#include "mediapipe/framework/port/opencv_imgproc_inc.h"
+#include "mediapipe/framework/port/vector.h"
+#include "mediapipe/util/tracking/region_flow.h"
+#include "mediapipe/util/tracking/region_flow.pb.h"
+#include "mediapipe/util/tracking/tone_estimation.pb.h"
+#include "mediapipe/util/tracking/tone_models.h"
+
+namespace mediapipe {
+
+class GainBiasModel;
+class RegionFlowFeatureList;
+
+typedef std::deque<PatchToneMatch> PatchToneMatches;
+
+// Each vector element presents its own channel.
+typedef std::vector<PatchToneMatches> ColorToneMatches;
+
+// Clip mask for C channels.
+template <int C>
+struct ClipMask {
+  ClipMask() {
+    min_exposure_threshold.resize(C);
+    max_exposure_threshold.resize(C);
+  }
+
+  cv::Mat mask;
+  std::vector<float> min_exposure_threshold;
+  std::vector<float> max_exposure_threshold;
+};
+
+class ToneEstimation {
+ public:
+  ToneEstimation(const ToneEstimationOptions& options, int frame_width,
+                 int frame_height);
+  virtual ~ToneEstimation();
+  ToneEstimation(const ToneEstimation&) = delete;
+  ToneEstimation& operator=(const ToneEstimation&) = delete;
+
+  // Estimates ToneChange models from matching feature points.
+  // Input RegionFlowFeatureList supplies (x, y) matches, where x is a feature
+  // point in curr_frame, and y the matching feature point in prev_frame. The
+  // estimated ToneChange describes the change from curr_frame to
+  // prev_frame in the tone domain using a variety of linear models as specified
+  // by ToneEstimationOptions.
+  // If debug_output is not nullptr, contains visualization of inlier patches
+  // and clip masks for debugging: current and previous frames are rendered
+  // side-by-side (left:current, right:previous).
+  // TODO: Parallel estimation across frames.
+  void EstimateToneChange(const RegionFlowFeatureList& feature_list,
+                          const cv::Mat& curr_frame, const cv::Mat* prev_frame,
+                          ToneChange* tone_change,
+                          cv::Mat* debug_output = nullptr);
+
+  // Returns mask of clipped pixels for input frame
+  // (pixels that are clipped due to limited dynamic range are set to 1),
+  // as well as per channel value denoting the min and max exposure threshold.
+  template <int C>
+  static void ComputeClipMask(const ClipMaskOptions& options,
+                              const cv::Mat& frame, ClipMask<C>* clip_mask);
+
+  // Returns color tone matches of size C from passed feature list. Tone matches
+  // are obtained from order statistics of sampled patches at feature locations.
+  // Matches are obtained for each color channel, with over and under-exposed
+  // patches being discarded.
+  // If debug_output is not NULL, contains visualization of inlier patches and
+  // clip masks for debugging.
+  template <int C>
+  static void ComputeToneMatches(const ToneMatchOptions& tone_match_options,
+                                 const RegionFlowFeatureList& feature_list,
+                                 const cv::Mat& curr_frame,
+                                 const cv::Mat& prev_frame,
+                                 const ClipMask<C>& curr_clip_mask,
+                                 const ClipMask<C>& prev_clip_mask,
+                                 ColorToneMatches* color_tone_matches,
+                                 cv::Mat* debug_output = nullptr);
+
+  // Can be called with color tone matches for 1 - 3 channels, will simply set
+  // missing channels in GainBiasModel to identity.
+  // Returns the estimated gain and bias model for tone change,
+  // that maps colors in current frame to those in the prev frame
+  // using color_tone_matches as corresponding samples. Modifies
+  // the irls_weights of samples in color_tone_matches based on their
+  // fit to the estimated gain_bias_model.
+  static void EstimateGainBiasModel(int irls_iterations,
+                                    ColorToneMatches* color_tone_matches,
+                                    GainBiasModel* gain_bias_model);
+
+  // Tests if the estimated gain-bias model is stable based on:
+  // (1) whether the model parameters are within gain and bias bounds, and
+  // (2) there are enough inliers in the tone matches.
+  // Some statistics related to stability analysis are returned in stats (if
+  // not NULL).
+  static bool IsStableGainBiasModel(
+      const ToneEstimationOptions::GainBiasBounds& gain_bias_bounds,
+      const GainBiasModel& model, const ColorToneMatches& tone_matches,
+      ToneChange::StabilityStats* stats = nullptr);
+
+ private:
+  // Computes normalized intensity percentiles.
+  void IntensityPercentiles(const cv::Mat& frame, const cv::Mat& clip_mask,
+                            bool log_domain, ToneChange* tone_change) const;
+
+ private:
+  ToneEstimationOptions options_;
+  int frame_width_;
+  int frame_height_;
+
+  int original_width_;
+  int original_height_;
+
+  float downsample_scale_ = 1.0f;
+  bool use_downsampling_ = false;
+
+  std::unique_ptr<cv::Mat> resized_input_;
+  std::unique_ptr<cv::Mat> prev_resized_input_;
+};
+
+// Template implementation functions.
+template <int C>
+void ToneEstimation::ComputeClipMask(const ClipMaskOptions& options,
+                                     const cv::Mat& frame,
+                                     ClipMask<C>* clip_mask) {
+  CHECK(clip_mask != nullptr);
+  CHECK_EQ(frame.channels(), C);
+
+  // Over / Underexposure handling.
+  // Masks pixels affected by clipping.
+  clip_mask->mask.create(frame.rows, frame.cols, CV_8U);
+
+  const float min_exposure_thresh = options.min_exposure() * 255.0f;
+  const float max_exposure_thresh = options.max_exposure() * 255.0f;
+  const int max_clipped_channels = options.max_clipped_channels();
+
+  std::vector<cv::Mat> planes;
+  cv::split(frame, planes);
+  CHECK_EQ(C, planes.size());
+  float min_exposure[C];
+  float max_exposure[C];
+  for (int c = 0; c < C; ++c) {
+    min_exposure[c] = min_exposure_thresh;
+    max_exposure[c] = max_exposure_thresh;
+  }
+
+  for (int c = 0; c < C; ++c) {
+    clip_mask->min_exposure_threshold[c] = min_exposure[c];
+    clip_mask->max_exposure_threshold[c] = max_exposure[c];
+  }
+
+  for (int i = 0; i < frame.rows; ++i) {
+    const uint8* img_ptr = frame.ptr<uint8>(i);
+    uint8* clip_ptr = clip_mask->mask.template ptr<uint8>(i);
+
+    for (int j = 0; j < frame.cols; ++j) {
+      const int idx = C * j;
+      int clipped_channels = 0;  // Count clipped channels.
+
+      for (int p = 0; p < C; ++p) {
+        clipped_channels +=
+            static_cast<int>(img_ptr[idx + p] < min_exposure[p] ||
+                             img_ptr[idx + p] > max_exposure[p]);
+      }
+
+      if (clipped_channels > max_clipped_channels) {
+        clip_ptr[j] = 1;
+      } else {
+        clip_ptr[j] = 0;
+      }
+    }
+  }
+
+  // Dilate to address blooming.
+  const int dilate_diam = options.clip_mask_diameter();
+  const int dilate_rad = ceil(dilate_diam * 0.5);
+  // Remove border from dilate, as cv::dilate reads out of
+  // bound values otherwise.
+  if (clip_mask->mask.rows > 2 * dilate_rad &&
+      clip_mask->mask.cols > 2 * dilate_rad) {
+    cv::Mat dilate_domain =
+        cv::Mat(clip_mask->mask,
+                cv::Range(dilate_rad, clip_mask->mask.rows - dilate_rad),
+                cv::Range(dilate_rad, clip_mask->mask.cols - dilate_rad));
+    cv::Mat kernel(options.clip_mask_diameter(), options.clip_mask_diameter(),
+                   CV_8U);
+    kernel.setTo(1.0);
+    cv::dilate(dilate_domain, dilate_domain, kernel);
+  }
+}
+
+template <int C>
+void ToneEstimation::ComputeToneMatches(
+    const ToneMatchOptions& options, const RegionFlowFeatureList& feature_list,
+    const cv::Mat& curr_frame, const cv::Mat& prev_frame,
+    const ClipMask<C>& curr_clip_mask,  // Optional.
+    const ClipMask<C>& prev_clip_mask,  // Optional.
+    ColorToneMatches* color_tone_matches, cv::Mat* debug_output) {
+  CHECK(color_tone_matches != nullptr);
+  CHECK_EQ(curr_frame.channels(), C);
+  CHECK_EQ(prev_frame.channels(), C);
+
+  color_tone_matches->clear();
+  color_tone_matches->resize(C);
+
+  const int patch_diam = 2 * options.patch_radius() + 1;
+  const int patch_area = patch_diam * patch_diam;
+  const float patch_denom = 1.0f / (patch_area);
+  const float log_denom = 1.0f / LogDomainLUT().MaxLogDomainValue();
+
+  int num_matches = 0;
+  std::vector<int> curr_intensities[C];
+  std::vector<int> prev_intensities[C];
+  for (int c = 0; c < C; ++c) {
+    curr_intensities[c].resize(256, 0);
+    prev_intensities[c].resize(256, 0);
+  }
+
+  // Debugging output. Horizontally concatenate current and previous frames.
+  cv::Mat curr_debug;
+  cv::Mat prev_debug;
+  if (debug_output != nullptr) {
+    const int rows = std::max(curr_frame.rows, prev_frame.rows);
+    const int cols = curr_frame.cols + prev_frame.cols;
+    const cv::Rect curr_rect(cv::Point(0, 0), curr_frame.size());
+    const cv::Rect prev_rect(cv::Point(curr_frame.cols, 0), prev_frame.size());
+    debug_output->create(rows, cols, CV_8UC3);
+    debug_output->setTo(cv::Scalar(255, 0, 0));
+    curr_debug = (*debug_output)(curr_rect);
+    prev_debug = (*debug_output)(prev_rect);
+    curr_frame.copyTo(curr_debug, curr_clip_mask.mask ^ cv::Scalar(1));
+    prev_frame.copyTo(prev_debug, prev_clip_mask.mask ^ cv::Scalar(1));
+  }
+
+  const int patch_radius = options.patch_radius();
+  const int frame_width = curr_frame.cols;
+  const int frame_height = curr_frame.rows;
+  for (const auto& feature : feature_list.feature()) {
+    // Extract matching masks at this feature.
+    Vector2_i curr_loc = FeatureIntLocation(feature);
+    Vector2_i prev_loc = FeatureMatchIntLocation(feature);
+
+    // Start bound inclusive, end bound exclusive.
+    Vector2_i curr_start = Vector2_i(std::max(0, curr_loc.x() - patch_radius),
+                                     std::max(0, curr_loc.y() - patch_radius));
+
+    Vector2_i curr_end =
+        Vector2_i(std::min(frame_width, curr_loc.x() + patch_radius + 1),
+                  std::min(frame_height, curr_loc.y() + patch_radius + 1));
+
+    Vector2_i prev_start = Vector2_i(std::max(0, prev_loc.x() - patch_radius),
+                                     std::max(0, prev_loc.y() - patch_radius));
+
+    Vector2_i prev_end =
+        Vector2_i(std::min(frame_width, prev_loc.x() + patch_radius + 1),
+                  std::min(frame_height, prev_loc.y() + patch_radius + 1));
+
+    Vector2_i curr_size = curr_end - curr_start;
+    Vector2_i prev_size = prev_end - prev_start;
+
+    if (prev_size != curr_size ||
+        (curr_size.x() * curr_size.y()) != patch_area) {
+      continue;  // Ignore border patches.
+    }
+
+    // TODO: Instead of summing masks, perform box filter over mask
+    // and just grab element at feature location.
+    cv::Mat curr_patch_mask(curr_clip_mask.mask,
+                            cv::Range(curr_start.y(), curr_end.y()),
+                            cv::Range(curr_start.x(), curr_end.x()));
+    cv::Mat prev_patch_mask(prev_clip_mask.mask,
+                            cv::Range(prev_start.y(), prev_end.y()),
+                            cv::Range(prev_start.x(), prev_end.x()));
+
+    // Skip patch if too many clipped pixels.
+    if (cv::sum(curr_patch_mask)[0] * patch_denom >
+            options.max_frac_clipped() ||
+        cv::sum(prev_patch_mask)[0] * patch_denom >
+            options.max_frac_clipped()) {
+      continue;
+    }
+
+    // Extract matching patches at this feature.
+    cv::Mat curr_patch(curr_frame, cv::Range(curr_start.y(), curr_end.y()),
+                       cv::Range(curr_start.x(), curr_end.x()));
+    cv::Mat prev_patch(prev_frame, cv::Range(prev_start.y(), prev_end.y()),
+                       cv::Range(prev_start.x(), prev_end.x()));
+
+    // Reset histograms to zero.
+    for (int c = 0; c < C; ++c) {
+      std::fill(curr_intensities[c].begin(), curr_intensities[c].end(), 0);
+      std::fill(prev_intensities[c].begin(), prev_intensities[c].end(), 0);
+    }
+
+    // Build histogram (to sidestep sorting).
+    // Note: We explicitly add over and under-exposed pixels to the
+    // histogram, as we are going to sample the histograms at specific
+    // percentiles modeling the shift in intensity between frames.
+    // (If the average intensity increases by N, histogram shifts by N
+    // bins to the right). However, matches that are over or underexposed
+    // are discarded afterwards.
+    for (int i = 0; i < patch_diam; ++i) {
+      const uint8* prev_ptr = prev_patch.ptr<uint8>(i);
+      const uint8* curr_ptr = curr_patch.ptr<uint8>(i);
+      for (int j = 0; j < patch_diam; ++j) {
+        const int j_c = C * j;
+        for (int c = 0; c < C; ++c) {
+          ++curr_intensities[c][curr_ptr[j_c + c]];
+          ++prev_intensities[c][prev_ptr[j_c + c]];
+        }
+      }
+    }
+
+    // Sample at percentiles.
+    for (int c = 0; c < C; ++c) {
+      // Accumulate.
+      for (int k = 1; k < 256; ++k) {
+        curr_intensities[c][k] += curr_intensities[c][k - 1];
+        prev_intensities[c][k] += prev_intensities[c][k - 1];
+      }
+
+      float percentile = options.min_match_percentile();
+      float percentile_step =
+          (options.max_match_percentile() - options.min_match_percentile()) /
+          options.match_percentile_steps();
+
+      PatchToneMatch patch_tone_match;
+      for (int k = 0; k < options.match_percentile_steps();
+           ++k, percentile += percentile_step) {
+        const auto& curr_iter = std::lower_bound(curr_intensities[c].begin(),
+                                                 curr_intensities[c].end(),
+                                                 percentile * patch_area);
+
+        const auto& prev_iter = std::lower_bound(prev_intensities[c].begin(),
+                                                 prev_intensities[c].end(),
+                                                 percentile * patch_area);
+
+        const int curr_int = curr_iter - curr_intensities[c].begin();
+        const int prev_int = prev_iter - prev_intensities[c].begin();
+
+        // If either clipped, discard match.
+        if (curr_int < curr_clip_mask.min_exposure_threshold[c] ||
+            curr_int > curr_clip_mask.max_exposure_threshold[c] ||
+            prev_int < prev_clip_mask.min_exposure_threshold[c] ||
+            prev_int > prev_clip_mask.max_exposure_threshold[c]) {
+          continue;
+        }
+
+        ToneMatch* tone_match = patch_tone_match.add_tone_match();
+        if (options.log_domain()) {
+          tone_match->set_curr_val(LogDomainLUT().Map(curr_int) * log_denom);
+          tone_match->set_prev_val(LogDomainLUT().Map(prev_int) * log_denom);
+        } else {
+          tone_match->set_curr_val(curr_int * (1.0f / 255.0f));
+          tone_match->set_prev_val(prev_int * (1.0f / 255.0f));
+        }
+      }
+
+      (*color_tone_matches)[c].push_back(patch_tone_match);
+
+      // Debug output.
+      if (debug_output != nullptr) {
+        cv::rectangle(curr_debug, cv::Point(curr_start.x(), curr_start.y()),
+                      cv::Point(curr_end.x(), curr_end.y()),
+                      cv::Scalar(0, 0, 255));
+        cv::rectangle(prev_debug, cv::Point(prev_start.x(), prev_start.y()),
+                      cv::Point(prev_end.x(), prev_end.y()),
+                      cv::Scalar(0, 0, 255));
+      }
+    }
+
+    ++num_matches;
+  }
+
+  VLOG(1) << "Extracted fraction: "
+          << static_cast<float>(num_matches) /
+                 std::max(1, feature_list.feature_size());
+}
+
+}  // namespace mediapipe
+
+#endif  // MEDIAPIPE_UTIL_TRACKING_TONE_ESTIMATION_H_
diff --git a/mediapipe/util/tracking/tone_estimation.proto b/mediapipe/util/tracking/tone_estimation.proto
new file mode 100644
index 000000000..711765a08
--- /dev/null
+++ b/mediapipe/util/tracking/tone_estimation.proto
@@ -0,0 +1,196 @@
+// Copyright 2019 The MediaPipe Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+syntax = "proto2";
+
+package mediapipe;
+
+import "mediapipe/util/tracking/tone_models.proto";
+
+// Capture tone change between two frames and per-frame tone statistics.
+// The estimated tone change describes the transformation of color intensities
+// from the current to the previous frame.
+// Next tag: 16
+message ToneChange {
+  optional GainBiasModel gain_bias = 1;
+  optional AffineToneModel affine = 2;
+
+  optional MixtureGainBiasModel mixture_gain_bias = 3;
+  optional MixtureAffineToneModel mixture_affine = 4;
+
+  // TODO: Implement.
+  optional float mixture_domain_sigma = 5;
+
+  // Tone statistics, describing statistics of intensities of the current frame
+  // (independent from the previous frame).
+
+  // Fraction of clipped pixels in [0, 1].
+  // A pixel is considered clipped if more than
+  // ToneEstimationOptions::max_clipped_channels are over-
+  // or under exposed.
+  optional float frac_clipped = 6 [default = 0.0];
+
+  // Intensity percentiles of the current frame. Normalized to [0, 1].
+  // We sort the intensities in the image (RGB to intensity conversion) and
+  // select the intensities that fall at the ranks specified by the
+  // stats_[low|mid|high]_percentile options below as the
+  /*
+    // Minimum fractional number of inlier gain features for gain correction
+    // deemed to be successful.
+    optional float min_frac_gain_inliers = 39 [default = 0.8];
+
+    // On average accept up to 10% intensity registration error.
+    optional float min_gain_inlier_weight = 40 [default = 0.1];
+    */
+  // [low|mid|high]_percentile's.
+  optional float low_percentile = 8;
+  optional float low_mid_percentile = 9;
+  optional float mid_percentile = 10;
+  optional float high_mid_percentile = 11;
+  optional float high_percentile = 12;
+
+  // If set, all models are estimated in log domain, specifically
+  // intensity I is transformed via log(1.0 + I) := I'
+  // Consequently after apply the models, intensity needs to be transformed
+  // back to visible range via exp(I') - 1.0.
+  optional bool log_domain = 13 [default = false];
+
+  // ToneChange type indicates whether highest degree of freedom (DOF)
+  // model estimation was deemed stable, in which case ToneChange::Type is set
+  // to VALID.
+  // If a model was deemed not stable (according to *StabilityBounds in
+  // ToneEstimationOptions), it is set to the lower dof type which was deemed
+  // stable.
+  enum Type {
+    VALID = 0;
+    INVALID = 10;  // Identity model, gain bias unrealiable.
+  }
+
+  optional Type type = 14 [default = VALID];
+
+  // Stats based on stability analysis.
+  message StabilityStats {
+    // Number of tone matches that were iniliers (used for tone estimation).
+    optional int32 num_inliers = 1;
+    // Fraction of tone matches that were inliers.
+    optional float inlier_fraction = 2;
+    // Total IRLS weight summed over all inliers.
+    optional double inlier_weight = 3;
+  }
+  optional StabilityStats stability_stats = 15;
+}
+
+message ToneMatchOptions {
+  // ToneChange's are fit to ToneMatches extracted from matching patches, using
+  // order statistics of their corresponding intensities. Matches are defined by
+  // having the same percentile of ordered intensities. If any member of the
+  // ToneMatch is below under or above over-exposed the match is discarded
+  // (based on parameters min and max_exposure above).
+  // Matches are extracted from min_match_percentile to max_match_percentile in
+  // #match_percentile_steps equidistant steps.
+  optional float min_match_percentile = 1 [default = 0.01];
+  optional float max_match_percentile = 2 [default = 0.99];
+  optional int32 match_percentile_steps = 3 [default = 10];
+
+  // Patch radius from which order statistics are collected.
+  optional int32 patch_radius = 4 [default = 18];
+
+  // Only matches with not too many pixels over- or underexposed are used.
+  optional float max_frac_clipped = 5 [default = 0.4];
+
+  // If set matches will be collected in the log domain.
+  optional bool log_domain = 8 [default = false];
+}
+
+message ClipMaskOptions {
+  // Over/Under exposure setting. Pixels that are clipped due to limited
+  // dynamic range are masked out from analysis. Values specified w.r.t.
+  // [0, 1] range.
+  optional float min_exposure = 1 [default = 0.02];
+  optional float max_exposure = 2 [default = 0.98];
+
+  // A pixel can have clipped color values in atmost max_clipped_channels before
+  // it will be labeled as clipped.
+  optional int32 max_clipped_channels = 4 [default = 1];
+
+  // Over-exposure tends to show blooming (neighboring pixels are affected by
+  // over-exposure as well). For robustness mask of clipped pixels is dilated
+  // with structuring element of diameter clip_mask_diam.
+  optional int32 clip_mask_diameter = 5 [default = 5];
+}
+
+// Next tag: 13
+message ToneEstimationOptions {
+  optional ToneMatchOptions tone_match_options = 1;
+  optional ClipMaskOptions clip_mask_options = 2;
+
+  // Percentiles for tone statistics.
+  optional float stats_low_percentile = 3 [default = 0.05];
+  optional float stats_low_mid_percentile = 4 [default = 0.2];
+  optional float stats_mid_percentile = 5 [default = 0.5];
+  optional float stats_high_mid_percentile = 6 [default = 0.8];
+  optional float stats_high_percentile = 7 [default = 0.95];
+
+  optional int32 irls_iterations = 8 [default = 10];
+
+  message GainBiasBounds {
+    optional float min_inlier_fraction = 1 [default = 0.75];
+
+    // Accept 2% intensity difference as valid inlier.
+    optional float min_inlier_weight = 2 [default = 0.5];
+
+    optional float lower_gain = 3 [default = 0.75];
+    optional float upper_gain = 4 [default = 1.334];
+
+    optional float lower_bias = 5 [default = -0.2];
+    optional float upper_bias = 6 [default = 0.2];
+  }
+
+  optional GainBiasBounds stable_gain_bias_bounds = 9;
+
+  // We support down-sampling of an incoming frame before running the
+  // resolution dependent part of the tone estimation.
+  // tracking if desired).
+  enum DownsampleMode {
+    DOWNSAMPLE_NONE = 1;         // no downsampling.
+    DOWNSAMPLE_TO_MAX_SIZE = 2;  // downsizes frame such that frame_size ==
+                                 // downsampling_size.
+                                 // frame_size := max(width, height).
+    DOWNSAMPLE_BY_FACTOR = 3;    // downsizes frame by pre-defined factor.
+    DOWNSAMPLE_TO_MIN_SIZE = 4;  // downsizes frame such that frame_size ==
+                                 // downsampling_size.
+                                 // frame_size := min(width, height).
+  }
+
+  optional DownsampleMode downsample_mode = 10 [default = DOWNSAMPLE_NONE];
+
+  // Specify the size of either dimension here, the frame will be
+  // downsampled to fit downsampling_size.
+  optional int32 downsampling_size = 11 [default = 256];
+  optional float downsample_factor = 12 [default = 2.0];
+}
+
+message ToneMatch {
+  // Intensity in current frame.
+  optional float curr_val = 1;
+  // Matching intensity in previous frame.
+  optional float prev_val = 2;
+}
+
+message PatchToneMatch {
+  // Several intensity matches computed from equal percentiles of matching patch
+  // pairs. No number or particular ordering is assumed.
+  repeated ToneMatch tone_match = 1;
+  optional float irls_weight = 2 [default = 1.0];
+}
diff --git a/mediapipe/util/tracking/tone_models.cc b/mediapipe/util/tracking/tone_models.cc
new file mode 100644
index 000000000..ab98bfbaf
--- /dev/null
+++ b/mediapipe/util/tracking/tone_models.cc
@@ -0,0 +1,144 @@
+// Copyright 2019 The MediaPipe Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "mediapipe/util/tracking/tone_models.h"
+
+#include <cmath>
+
+#include "absl/strings/str_format.h"
+
+namespace mediapipe {
+
+LogDomainLUTImpl::LogDomainLUTImpl() {
+  log_lut_.resize(256);
+  const float alpha = 0.05f;
+
+  // Mapping: log(1 + x) = y \in [0, log(256.0)].
+  for (int k = 0; k < 256; ++k) {
+    log_lut_[k] = std::log(1.0f + alpha * k);
+  }
+
+  exp_lut_.resize(kExpBins);
+  max_log_value_ = std::log(1.0f + alpha * 255.0f) * 1.001;
+  inv_max_log_value_ = 1.0f / max_log_value_;
+
+  const float denom = (1.0f / (kExpBins - 2));
+  // Inverse operation: exp(y) - 1.0 = x \in [0, 255].
+  for (int k = 0; k < kExpBins; ++k) {
+    exp_lut_[k] =
+        (std::exp(k * denom * max_log_value_) - 1.0f) * (1.0f / alpha);
+  }
+}
+
+template <class Model, class Adapter>
+void ToneModelMethods<Model, Adapter>::MapImage(const Model& model,
+                                                bool log_domain,
+                                                bool normalized_model,
+                                                const cv::Mat& input,
+                                                cv::Mat* output) {
+  CHECK(output != nullptr);
+
+  const int out_channels = output->channels();
+  CHECK_EQ(input.channels(), 3);
+  CHECK_LE(out_channels, 3);
+  CHECK_EQ(input.rows, output->rows);
+  CHECK_EQ(input.cols, output->cols);
+
+  float norm_scale =
+      normalized_model
+          ? (log_domain ? (1.0f / LogDomainLUT().MaxLogDomainValue())
+                        : (1.0f / 255.0f))
+          : 1.0f;
+
+  const float inv_norm_scale = 1.0f / norm_scale;
+
+  for (int i = 0; i < input.rows; ++i) {
+    const uint8* input_ptr = input.ptr<uint8>(i);
+    uint8* output_ptr = output->ptr<uint8>(i);
+    for (int j = 0; j < input.cols;
+         ++j, input_ptr += 3, output_ptr += out_channels) {
+      Vector3_f color_vec(input_ptr[0], input_ptr[1], input_ptr[2]);
+      Vector3_f mapped;
+      if (log_domain) {
+        mapped = Adapter::TransformColor(
+            model, LogDomainLUT().MapVec(color_vec) * norm_scale);
+        mapped = LogDomainLUT().UnMapVec(mapped * inv_norm_scale);
+      } else {
+        mapped = Adapter::TransformColor(model, color_vec * norm_scale) *
+                 inv_norm_scale;
+      }
+      // Clamp to visible range and output.
+      const Vector3_i result = RoundAndClampColor(mapped);
+      for (int c = 0; c < out_channels; ++c) {
+        output_ptr[c] = result[c];
+      }
+    }
+  }
+}
+
+GainBiasModel ToneModelAdapter<GainBiasModel>::AddIdentity(
+    const GainBiasModel& model) {
+  GainBiasModel result = model;
+  result.set_gain_c1(result.gain_c1() + 1.0f);
+  result.set_gain_c2(result.gain_c2() + 1.0f);
+  result.set_gain_c3(result.gain_c3() + 1.0f);
+  return result;
+}
+
+GainBiasModel ToneModelAdapter<GainBiasModel>::ScaleParameters(
+    const GainBiasModel& model, float scale) {
+  GainBiasModel result = model;
+  result.set_gain_c1(result.gain_c1() * scale);
+  result.set_gain_c2(result.gain_c2() * scale);
+  result.set_gain_c3(result.gain_c3() * scale);
+  result.set_bias_c1(result.bias_c1() * scale);
+  result.set_bias_c2(result.bias_c2() * scale);
+  result.set_bias_c3(result.bias_c3() * scale);
+  return result;
+}
+
+std::string ToneModelAdapter<GainBiasModel>::ToString(
+    const GainBiasModel& model) {
+  return absl::StrFormat("%f %f | %f %f | %f %f", model.gain_c1(),
+                         model.bias_c1(), model.gain_c2(), model.bias_c2(),
+                         model.gain_c3(), model.bias_c3());
+}
+
+AffineToneModel ToneModelAdapter<AffineToneModel>::AddIdentity(
+    const AffineToneModel& model) {
+  AffineToneModel result = model;
+  result.set_g_00(result.g_00() + 1.0f);
+  result.set_g_11(result.g_11() + 1.0f);
+  result.set_g_22(result.g_22() + 1.0f);
+  return result;
+}
+
+AffineToneModel ToneModelAdapter<AffineToneModel>::ScaleParameters(
+    const AffineToneModel& model, float scale) {
+  float elems[kNumParameters];
+  ToPointer<float>(model, elems);
+  for (int k = 0; k < kNumParameters; ++k) {
+    elems[k] *= scale;
+  }
+  return FromPointer<float>(elems, false);
+}
+
+// Explicit instantiations.
+template class MixtureToneAdapter<GainBiasModelTraits>;
+template class MixtureToneAdapter<AffineToneModelTraits>;
+
+template class ToneModelMethods<GainBiasModel, GainBiasModelAdapter>;
+template class ToneModelMethods<AffineToneModel, AffineToneModelAdapter>;
+
+}  // namespace mediapipe
diff --git a/mediapipe/util/tracking/tone_models.h b/mediapipe/util/tracking/tone_models.h
new file mode 100644
index 000000000..266257e1f
--- /dev/null
+++ b/mediapipe/util/tracking/tone_models.h
@@ -0,0 +1,628 @@
+// Copyright 2019 The MediaPipe Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Implements various tone-models describing tonal change of color intensities
+// across frame pairs.
+
+#ifndef MEDIAPIPE_UTIL_TRACKING_TONE_MODELS_H_
+#define MEDIAPIPE_UTIL_TRACKING_TONE_MODELS_H_
+
+#include <algorithm>
+#include <cmath>
+#include <string>
+#include <vector>
+
+#include "mediapipe/framework/port/integral_types.h"
+#include "mediapipe/framework/port/logging.h"
+#include "mediapipe/framework/port/opencv_core_inc.h"
+#include "mediapipe/framework/port/vector.h"
+#include "mediapipe/util/tracking/tone_models.pb.h"
+
+namespace mediapipe {
+// Abstract Adapter for tone models.
+template <class Model>
+class ToneModelAdapter {
+ public:
+  // Initialized Model from pointer to arguments. If identitiy_parametrization
+  // is set, args = {0,0, ... 0} correspond to identity model.
+  template <class T>
+  static Model FromPointer(const T* args, bool identity_parametrization);
+
+  // Outputs parameters to pointer args.
+  template <class T>
+  static void ToPointer(const GainBiasModel& model, T* args);
+
+  // Transforms color (RGB space) according to model.
+  static Vector3_f TransformColor(const Model& model, const Vector3_f& color);
+
+  // Inverts color mode, if not invertible success is set to zero.
+  static Model InvertChecked(const Model& model, bool* success);
+  static Model Compose(const Model& lhs, const Model& rhs);
+
+  static int NumParameters();
+  static float GetParameter(const Model& model, int idx);
+
+  static float Determinant(const Model& model);
+};
+
+// LogDomain LUT:
+// Converts from intensity domain to log-domain via Map, inverse mapping via
+// UnMap.
+// Use as Singleton only, via inline LogDomainLUT()
+class LogDomainLUTImpl {
+ public:
+  LogDomainLUTImpl(const LogDomainLUTImpl&) = delete;
+  LogDomainLUTImpl& operator=(const LogDomainLUTImpl&) = delete;
+
+  // Maps intensity between [0, 255] to log-domain by applying log(1 + x).
+  // Result will be within [0, log(256)]. Truncation occurs for non-integer
+  // inputs. Discritization error is at most 0.5 intensity levels.
+  inline float Map(float value) const {
+    return log_lut_[std::max(0, std::min<int>(255, value + 0.5f))];
+  }
+
+  inline Vector3_f MapVec(Vector3_f vec) const {
+    return Vector3_f(Map(vec.x()), Map(vec.y()), Map(vec.z()));
+  }
+
+  // Unmaps intensity from log-domain in [0, log(256)] = [0, 2.4] to regular
+  // domain, via exp(x) - 1.0. Discritization error of at most 0.01 intensity
+  // levels might occur.
+  inline float UnMap(float value) const {
+    const int bin = value * inv_max_log_value_ * (kExpBins - 2);
+    return exp_lut_[std::max(0, std::min(bin, kExpBins - 1))];
+  }
+
+  inline Vector3_f UnMapVec(Vector3_f vec) const {
+    return Vector3_f(UnMap(vec.x()), UnMap(vec.y()), UnMap(vec.z()));
+  }
+
+  float MaxLogDomainValue() const { return max_log_value_; }  // ~ log(256).
+
+ private:
+  friend const LogDomainLUTImpl& LogDomainLUT();
+
+  LogDomainLUTImpl();
+  std::vector<float> log_lut_;
+  std::vector<float> exp_lut_;
+  float max_log_value_;
+  float inv_max_log_value_;
+  enum {
+    kExpBins = 2560
+  };  // Allocated such that exp(2.4) -
+      // exp(2.4 * (1.0 - 1.0 / kExpBins)) < 0.01.
+};
+
+inline const LogDomainLUTImpl& LogDomainLUT() {
+  static auto p = new LogDomainLUTImpl();
+  return *p;
+}
+
+inline Vector3_i RoundAndClampColor(const Vector3_f& vec) {
+  return Vector3_i(
+      std::max(0, std::min(255, static_cast<int>(vec.x() + 0.5f))),
+      std::max(0, std::min(255, static_cast<int>(vec.y() + 0.5f))),
+      std::max(0, std::min(255, static_cast<int>(vec.z() + 0.5f))));
+}
+
+template <>
+class ToneModelAdapter<GainBiasModel> {
+ public:
+  template <class T>
+  static GainBiasModel FromPointer(const T* args,
+                                   bool identity_parametrization);
+  template <class T>
+  static void ToPointer(const GainBiasModel& model, T* args);
+
+  inline static Vector3_f TransformColor(const GainBiasModel& model,
+                                         const Vector3_f& color);
+
+  inline static GainBiasModel InvertChecked(const GainBiasModel& model,
+                                            bool* success);
+
+  inline static GainBiasModel Compose(const GainBiasModel& lhs,
+                                      const GainBiasModel& rhs);
+
+  enum { kNumParameters = 6 };
+  static int NumParameters() { return kNumParameters; }
+  inline static float GetParameter(const GainBiasModel& model, int idx);
+
+  static float Determinant(const GainBiasModel& model);
+
+  static GainBiasModel AddIdentity(const GainBiasModel& model);
+  static GainBiasModel ScaleParameters(const GainBiasModel& model, float scale);
+
+  static std::string ToString(const GainBiasModel& model);
+};
+
+template <>
+class ToneModelAdapter<AffineToneModel> {
+ public:
+  template <class T>
+  static AffineToneModel FromPointer(const T* args,
+                                     bool identity_parametrization);
+  template <class T>
+  static void ToPointer(const AffineToneModel& model, T* args) {
+    return ToPointerPad(model, false, args);
+  }
+
+  // If pad_square is set the 12 DOF 3x4 model is embedded in a 4x4 model
+  // (last row = [ 0 0 0 1]) and output to args (row major ordering),
+  // otherwise the original 12 DOF 3x4 model is written to args (row major).
+  template <class T>
+  static void ToPointerPad(const AffineToneModel& model, bool pad_square,
+                           T* args);
+
+  inline static Vector3_f TransformColor(const AffineToneModel& model,
+                                         const Vector3_f& color);
+
+  inline static AffineToneModel InvertChecked(const AffineToneModel& model,
+                                              bool* success);
+
+  inline static AffineToneModel Compose(const AffineToneModel& lhs,
+                                        const AffineToneModel& rhs);
+
+  enum { kNumParameters = 12 };
+  static int NumParameters() { return kNumParameters; }
+  inline static float GetParameter(const AffineToneModel& model, int idx);
+
+  // Used during stabilization: Adds identity model (model + I)
+  // and scales each parameter by scale.
+  static AffineToneModel AddIdentity(const AffineToneModel& model);
+  static AffineToneModel ScaleParameters(const AffineToneModel& model,
+                                         float scale);
+};
+
+typedef ToneModelAdapter<GainBiasModel> GainBiasModelAdapter;
+typedef ToneModelAdapter<AffineToneModel> AffineToneModelAdapter;
+
+// Mixture Models.
+template <class BaseModel, class MixtureModel>
+struct MixtureToneTraits {
+  typedef BaseModel BaseModelType;
+  typedef MixtureModel ModelType;
+};
+
+typedef MixtureToneTraits<GainBiasModel, MixtureGainBiasModel>
+    GainBiasModelTraits;
+typedef MixtureToneTraits<AffineToneModel, MixtureAffineToneModel>
+    AffineToneModelTraits;
+
+template <class Traits>
+class MixtureToneAdapter {
+  typedef typename Traits::ModelType MixtureModel;
+  typedef typename Traits::BaseModelType BaseModel;
+  typedef ToneModelAdapter<BaseModel> BaseModelAdapter;
+  enum { kBaseNumParameters = BaseModelAdapter::kNumParameters };
+
+ public:
+  template <class T>
+  static MixtureModel FromPointer(const T* args, bool identity_parametrization,
+                                  int skip, int num_models);
+
+  template <class T>
+  static void ToPointer(const MixtureModel& model, T* args);
+
+  static int NumParameters(const MixtureModel& model) {
+    return model.model_size() * kBaseNumParameters;
+  }
+
+  inline static float GetParameter(const MixtureModel& model, int model_id,
+                                   int param_id);
+
+  inline static MixtureModel IdentityModel(int num_mixtures);
+
+  inline static BaseModel ToBaseModel(const MixtureModel& mixture_model,
+                                      const float* weights);
+
+  inline static Vector3_f TransformColor(const MixtureModel& model,
+                                         const float* weights,
+                                         const Vector3_f& pt);
+
+  // Mixtures are not invertible via closed form, but invertible for a specific
+  // set of weights by computing the underlying BaseModel via ToBaseModel.
+  // Resulting BaseModel is applied to pt, if BaseModel is not invertible
+  // success is set to false.
+  inline static Vector3_f SolveForColorChecked(const MixtureModel& model,
+                                               const float* weights,
+                                               const Vector3_f& pt,
+                                               bool* success);
+};
+
+typedef MixtureToneAdapter<GainBiasModelTraits> MixtureGainBiasModelAdapter;
+typedef MixtureToneAdapter<AffineToneModelTraits> MixtureAffineToneModelAdapter;
+
+template <class T>
+GainBiasModel ToneModelAdapter<GainBiasModel>::FromPointer(const T* args,
+                                                           bool identity) {
+  DCHECK(args);
+  GainBiasModel model;
+  const float id_shift = identity ? 1.0f : 0.0f;
+  model.set_gain_c1(args[0] + id_shift);
+  model.set_bias_c1(args[1]);
+  model.set_gain_c2(args[2] + id_shift);
+  model.set_bias_c2(args[3]);
+  model.set_gain_c3(args[4] + id_shift);
+  model.set_bias_c3(args[5]);
+  return model;
+}
+
+template <class T>
+void ToneModelAdapter<GainBiasModel>::ToPointer(const GainBiasModel& model,
+                                                T* args) {
+  args[0] = model.gain_c1();
+  args[1] = model.bias_c1();
+  args[2] = model.gain_c2();
+  args[3] = model.bias_c2();
+  args[4] = model.gain_c3();
+  args[5] = model.bias_c3();
+}
+
+inline Vector3_f ToneModelAdapter<GainBiasModel>::TransformColor(
+    const GainBiasModel& model, const Vector3_f& color) {
+  return Vector3_f(model.gain_c1() * color.x() + model.bias_c1(),
+                   model.gain_c2() * color.y() + model.bias_c2(),
+                   model.gain_c3() * color.z() + model.bias_c3());
+}
+
+inline float ToneModelAdapter<GainBiasModel>::Determinant(
+    const GainBiasModel& model) {
+  return model.gain_c1() * model.gain_c2() * model.gain_c3();
+}
+
+inline GainBiasModel ToneModelAdapter<GainBiasModel>::InvertChecked(
+    const GainBiasModel& model, bool* success) {
+  // (g_1  0   0   b_1       ==  (1/g_1  0      0       -b_1/g_1
+  //    0  g_2 0   b_2             0    1/g_2   0       -b_2/g_2
+  //    0  0   g_3 b_3             0    0     1/g_3     -b_3/g_3
+  //    0  0   0   1)^(-1)         0    0       0        1
+
+  // Compute determinant.
+  const float det = GainBiasModelAdapter::Determinant(model);
+  if (fabs(det) < 1e-10f) {
+    *success = false;
+    LOG(ERROR) << "Model not invertible.";
+    return GainBiasModel();
+  }
+
+  *success = true;
+  GainBiasModel result;
+  const float inv_gain_c1 = 1.0f / model.gain_c1();
+  const float inv_gain_c2 = 1.0f / model.gain_c2();
+  const float inv_gain_c3 = 1.0f / model.gain_c3();
+  result.set_gain_c1(inv_gain_c1);
+  result.set_bias_c1(inv_gain_c1 * -model.bias_c1());
+  result.set_gain_c2(inv_gain_c2);
+  result.set_bias_c2(inv_gain_c2 * -model.bias_c2());
+  result.set_gain_c3(inv_gain_c3);
+  result.set_bias_c3(inv_gain_c3 * -model.bias_c3());
+  return result;
+}
+
+inline GainBiasModel ToneModelAdapter<GainBiasModel>::Compose(
+    const GainBiasModel& lhs, const GainBiasModel& rhs) {
+  GainBiasModel result;
+  result.set_gain_c1(lhs.gain_c1() * rhs.gain_c1());
+  result.set_bias_c1(lhs.gain_c1() * rhs.bias_c1() + lhs.bias_c1());
+  result.set_gain_c2(lhs.gain_c2() * rhs.gain_c2());
+  result.set_bias_c2(lhs.gain_c2() * rhs.bias_c2() + lhs.bias_c2());
+  result.set_gain_c3(lhs.gain_c3() * rhs.gain_c3());
+  result.set_bias_c3(lhs.gain_c3() * rhs.bias_c3() + lhs.bias_c3());
+  return result;
+}
+
+inline float ToneModelAdapter<GainBiasModel>::GetParameter(
+    const GainBiasModel& model, int idx) {
+  switch (idx) {
+    case 0:
+      return model.gain_c1();
+    case 1:
+      return model.bias_c1();
+    case 2:
+      return model.gain_c2();
+    case 3:
+      return model.bias_c2();
+    case 4:
+      return model.gain_c3();
+    case 5:
+      return model.bias_c3();
+    default:
+      LOG(FATAL) << "Unknown parameter requested.";
+  }
+
+  return 0.0f;
+}
+template <class T>
+AffineToneModel ToneModelAdapter<AffineToneModel>::FromPointer(const T* args,
+                                                               bool identity) {
+  DCHECK(args);
+  AffineToneModel model;
+  const float id_shift = identity ? 1.0f : 0.0f;
+  model.set_g_00(args[0] + id_shift);
+  model.set_g_01(args[1]);
+  model.set_g_02(args[2]);
+  model.set_g_03(args[3]);
+
+  model.set_g_10(args[4]);
+  model.set_g_11(args[5] + id_shift);
+  model.set_g_12(args[6]);
+  model.set_g_13(args[7]);
+
+  model.set_g_20(args[8]);
+  model.set_g_21(args[9]);
+  model.set_g_22(args[10] + id_shift);
+  model.set_g_23(args[11]);
+  return model;
+}
+
+template <class T>
+void ToneModelAdapter<AffineToneModel>::ToPointerPad(
+    const AffineToneModel& model, bool pad_square, T* args) {
+  DCHECK(args);
+  args[0] = model.g_00();
+  args[1] = model.g_01();
+  args[2] = model.g_02();
+  args[3] = model.g_03();
+
+  args[4] = model.g_10();
+  args[5] = model.g_11();
+  args[6] = model.g_12();
+  args[7] = model.g_13();
+
+  args[8] = model.g_20();
+  args[9] = model.g_21();
+  args[10] = model.g_22();
+  args[11] = model.g_23();
+
+  if (pad_square) {
+    args[12] = 0;
+    args[13] = 0;
+    args[14] = 0;
+    args[15] = 1;
+  }
+}
+
+inline Vector3_f ToneModelAdapter<AffineToneModel>::TransformColor(
+    const AffineToneModel& model, const Vector3_f& color) {
+  return Vector3_f(model.g_00() * color.x() + model.g_01() * color.y() +
+                       model.g_02() * color.z() + model.g_03(),
+                   model.g_10() * color.x() + model.g_11() * color.y() +
+                       model.g_12() * color.z() + model.g_13(),
+                   model.g_20() * color.x() + model.g_21() * color.y() +
+                       model.g_22() * color.z() + model.g_23());
+}
+
+inline AffineToneModel ToneModelAdapter<AffineToneModel>::InvertChecked(
+    const AffineToneModel& model, bool* success) {
+  double data[16];
+  double inv_data[16];
+  ToPointerPad<double>(model, true, data);
+
+  cv::Mat model_mat(4, 4, CV_64F, data);
+  cv::Mat inv_model_mat(4, 4, CV_64F, inv_data);
+
+  if (cv::invert(model_mat, inv_model_mat) < 1e-10) {
+    LOG(ERROR) << "AffineToneModel not invertible, det is zero.";
+    *success = false;
+    return AffineToneModel();
+  }
+
+  *success = true;
+  return FromPointer<double>(inv_data, false);
+}
+
+inline AffineToneModel ToneModelAdapter<AffineToneModel>::Compose(
+    const AffineToneModel& lhs, const AffineToneModel& rhs) {
+  AffineToneModel result;
+  double lhs_data[16];
+  double rhs_data[16];
+  double result_data[16];
+  ToPointerPad<double>(lhs, true, lhs_data);
+  ToPointerPad<double>(rhs, true, rhs_data);
+
+  cv::Mat lhs_mat(4, 4, CV_64F, lhs_data);
+  cv::Mat rhs_mat(4, 4, CV_64F, rhs_data);
+  cv::Mat result_mat(4, 4, CV_64F, result_data);
+
+  cv::gemm(lhs_mat, rhs_mat, 1.0, cv::Mat(), 0, result_mat);
+  return FromPointer<double>(result_data, false);
+}
+
+inline float ToneModelAdapter<AffineToneModel>::GetParameter(
+    const AffineToneModel& model, int idx) {
+  switch (idx) {
+    case 0:
+      return model.g_00();
+    case 1:
+      return model.g_01();
+    case 2:
+      return model.g_02();
+    case 3:
+      return model.g_03();
+    case 4:
+      return model.g_10();
+    case 5:
+      return model.g_11();
+    case 6:
+      return model.g_12();
+    case 7:
+      return model.g_13();
+    case 8:
+      return model.g_20();
+    case 9:
+      return model.g_21();
+    case 10:
+      return model.g_22();
+    case 11:
+      return model.g_23();
+    default:
+      LOG(FATAL) << "Unknown parameter requested.";
+  }
+
+  return 0.0f;
+}
+
+template <class Traits>
+template <class T>
+typename Traits::ModelType MixtureToneAdapter<Traits>::FromPointer(
+    const T* args, bool identity_parametrization, int skip, int num_models) {
+  MixtureModel model;
+  const T* arg_ptr = args;
+  for (int i = 0; i < num_models; ++i, arg_ptr += kBaseNumParameters + skip) {
+    BaseModel base =
+        BaseModelAdapter::FromPointer(arg_ptr, identity_parametrization);
+    model.add_model()->CopyFrom(base);
+  }
+
+  return model;
+}
+
+template <class Traits>
+template <class T>
+void MixtureToneAdapter<Traits>::ToPointer(const MixtureModel& model, T* args) {
+  const T* arg_ptr = args;
+  const int num_models = model.model_size();
+  for (int i = 0; i < num_models; ++i, arg_ptr += kBaseNumParameters) {
+    BaseModel base = BaseModelAdapter::ToPointer(model.model(i), arg_ptr);
+  }
+}
+
+template <class Traits>
+inline float MixtureToneAdapter<Traits>::GetParameter(const MixtureModel& model,
+                                                      int model_id,
+                                                      int param_id) {
+  return BaseModelAdapter::GetParameter(model.model(model_id), param_id);
+}
+
+template <class Traits>
+inline typename Traits::ModelType MixtureToneAdapter<Traits>::IdentityModel(
+    int num_mixtures) {
+  MixtureModel model;
+  for (int i = 0; i < num_mixtures; ++i) {
+    model.add_model();
+  }
+  return model;
+}
+
+template <class Traits>
+inline typename MixtureToneAdapter<Traits>::BaseModel
+MixtureToneAdapter<Traits>::ToBaseModel(const MixtureModel& mixture_model,
+                                        const float* weights) {
+  const int num_models = mixture_model.model_size();
+
+  float params[kBaseNumParameters];
+  memset(params, 0, sizeof(params[0]) * kBaseNumParameters);
+
+  // Weighted combination of mixture models.
+  for (int m = 0; m < num_models; ++m) {
+    for (int k = 0; k < kBaseNumParameters; ++k) {
+      params[k] += BaseModelAdapter::GetParameter(mixture_model.model(m), k) *
+                   weights[m];
+    }
+  }
+
+  return BaseModelAdapter::FromPointer(params, false);
+}
+
+template <class Traits>
+inline Vector3_f MixtureToneAdapter<Traits>::TransformColor(
+    const MixtureModel& model, const float* weights, const Vector3_f& pt) {
+  const int num_models = model.model_size();
+  Vector3_f result(0, 0, 0);
+  for (int i = 0; i < num_models; ++i) {
+    result += BaseModelAdapter::TransformColor(model.model(i), pt) * weights[i];
+  }
+
+  return result;
+}
+
+template <class Traits>
+inline Vector3_f MixtureToneAdapter<Traits>::SolveForColorChecked(
+    const MixtureModel& model, const float* weights, const Vector3_f& pt,
+    bool* success) {
+  BaseModel base_model = ToBaseModel(model, weights);
+  BaseModel inv_base_model =
+      BaseModelAdapter::InvertChecked(base_model, success);
+  return BaseModelAdapter::TransformColor(inv_base_model, pt);
+}
+
+template <class Model, class Adapter>
+class ToneModelMethods {
+ public:
+  // Maps image input to output by applying model to each pixel's intensity.
+  // Set log_domain to true, if model has been estimated in the log-domain.
+  // Set normalized_model to true, if model is normalized (expected input
+  // intensity range in [0, 1]).
+  // Number of output channels (N) should be <= number of input channels (M),
+  // in which case the only first N input channels are copied to the first N
+  // output channels.
+  static void MapImage(const Model& model, bool log_domain,
+                       bool normalized_model, const cv::Mat& input,
+                       cv::Mat* output);
+
+  // Fast mapping version of above function via LUT if color model is
+  // independent across specified C channels (that is model is a diagonal
+  // matrix).
+  // Otherwise incorrect results will be obtained.
+  template <int C>
+  static void MapImageIndependent(const Model& model, bool log_domain,
+                                  bool normalized_model, const cv::Mat& input,
+                                  cv::Mat* output);
+};
+
+typedef ToneModelMethods<GainBiasModel, GainBiasModelAdapter>
+    GainBiasModelMethods;
+
+typedef ToneModelMethods<AffineToneModel, AffineToneModelAdapter>
+    AffineToneModelMethods;
+
+template <class Model, class Adapter>
+template <int C>
+void ToneModelMethods<Model, Adapter>::MapImageIndependent(
+    const Model& model, bool log_domain, bool normalized_model,
+    const cv::Mat& input, cv::Mat* output) {
+  CHECK(output != nullptr);
+  CHECK_EQ(input.channels(), C);
+  CHECK_EQ(output->channels(), C);
+
+  // Input LUT which will be mapped to the output LUT by the tone change model.
+  // Needs 3 channels to represent input RGB colors, but since they are assumed
+  // independent, we can assign the same value to each channel, which will be
+  // transformed by the respective channel's transform.
+  cv::Mat lut_input(1, 256, CV_8UC3);
+  uint8* lut_ptr = lut_input.ptr<uint8>(0);
+  for (int k = 0; k < 256; ++k, lut_ptr += 3) {
+    for (int c = 0; c < 3; ++c) {
+      lut_ptr[c] = k;
+    }
+  }
+
+  // Output LUT. Only needs C channels (as many in the input/output).
+  cv::Mat lut(1, 256, CV_8UC(C));
+  MapImage(model, log_domain, normalized_model, lut_input, &lut);
+
+  cv::LUT(input, lut, *output);
+}
+
+// TODO: Implement for mixtures.
+// typedef ToneModelMethods<MixtureGainBiasModel,
+//                         MixtureGainBiasModelAdapter>
+//    MixtureGainBiasModelMethods;
+// typedef ToneModelMethods<MixtureAffineToneModel,
+//                         MixtureAffineToneModelAdapter>
+//    MixtureAffineToneModelMethods;
+
+}  // namespace mediapipe
+
+#endif  // MEDIAPIPE_UTIL_TRACKING_TONE_MODELS_H_
diff --git a/mediapipe/util/tracking/tone_models.proto b/mediapipe/util/tracking/tone_models.proto
new file mode 100644
index 000000000..d53ad2172
--- /dev/null
+++ b/mediapipe/util/tracking/tone_models.proto
@@ -0,0 +1,64 @@
+// Copyright 2019 The MediaPipe Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Models to describe color transform between adjacent frames.
+// Color transform are always specified for 3 channels (RGB).
+
+syntax = "proto2";
+
+package mediapipe;
+
+// Transforms a 3D color vector x = (c1, c2, c3) according to
+//  [ gain_c1    0      0      bias_c1       *   [ c1
+//    0       gain_c2   0      bias_c2             c2
+//    0          0    gain_c3  bias_c3 ]           c3
+//                                                  1 ]
+message GainBiasModel {
+  optional float gain_c1 = 1 [default = 1.0];
+  optional float bias_c1 = 2 [default = 0.0];
+  optional float gain_c2 = 3 [default = 1.0];
+  optional float bias_c2 = 4 [default = 0.0];
+  optional float gain_c3 = 5 [default = 1.0];
+  optional float bias_c3 = 6 [default = 0.0];
+}
+
+message MixtureGainBiasModel {
+  repeated GainBiasModel model = 1;
+}
+
+// Transforms a 3D color vector x = (c1, c2, c3) according to
+//  [ g_00    g_01      g_02      g_03       *   [ c1
+//    g_10    g_11      g_12      g_13             c2
+//    g_20    g_21      g_22      g_23 ]           c3
+//                                                 1 ]
+message AffineToneModel {
+  optional float g_00 = 1 [default = 1.0];
+  optional float g_01 = 2 [default = 0.0];
+  optional float g_02 = 3 [default = 0.0];
+  optional float g_03 = 4 [default = 0.0];
+
+  optional float g_10 = 5 [default = 0.0];
+  optional float g_11 = 6 [default = 1.0];
+  optional float g_12 = 7 [default = 0.0];
+  optional float g_13 = 8 [default = 0.0];
+
+  optional float g_20 = 9 [default = 0.0];
+  optional float g_21 = 10 [default = 0.0];
+  optional float g_22 = 11 [default = 1.0];
+  optional float g_23 = 12 [default = 0.0];
+}
+
+message MixtureAffineToneModel {
+  repeated AffineToneModel model = 1;
+}
diff --git a/mediapipe/util/tracking/tracked_detection.cc b/mediapipe/util/tracking/tracked_detection.cc
new file mode 100644
index 000000000..3127a9e44
--- /dev/null
+++ b/mediapipe/util/tracking/tracked_detection.cc
@@ -0,0 +1,144 @@
+// Copyright 2019 The MediaPipe Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "mediapipe/util/tracking/tracked_detection.h"
+
+#include "mediapipe/framework/formats/rect.pb.h"
+
+namespace mediapipe {
+
+namespace {
+
+// Struct for carrying boundary information.
+struct NormalizedRectBounds {
+  float left, right, top, bottom;
+};
+
+// Computes the area of a NormalizedRect.
+float BoxArea(const NormalizedRect& box) { return box.width() * box.height(); }
+
+// Computes the bounds of a NormalizedRect.
+void GetNormalizedRectBounds(const NormalizedRect& box,
+                             NormalizedRectBounds* bounds) {
+  bounds->left = box.x_center() - box.width() / 2.f;
+  bounds->right = box.x_center() + box.width() / 2.f;
+  bounds->top = box.y_center() - box.height() / 2.f;
+  bounds->bottom = box.y_center() + box.height() / 2.f;
+}
+
+// Computes the overlapping area of two boxes, ignoring rotation.
+float OverlapArea(const NormalizedRect& box1, const NormalizedRect& box2) {
+  NormalizedRectBounds bounds1, bounds2;
+  GetNormalizedRectBounds(box1, &bounds1);
+  GetNormalizedRectBounds(box2, &bounds2);
+  const float x_overlap =
+      std::max(0.f, std::min(bounds1.right, bounds2.right) -
+                        std::max(bounds1.left, bounds2.left));
+  const float y_overlap =
+      std::max(0.f, std::min(bounds1.bottom, bounds2.bottom) -
+                        std::max(bounds1.top, bounds2.top));
+  return x_overlap * y_overlap;
+}
+
+std::array<Vector2_f, 4> ComputeCorners(const NormalizedRect& normalized_box,
+                                        const Vector2_f& center,
+                                        int image_width, int image_height) {
+  NormalizedRectBounds bounds;
+  GetNormalizedRectBounds(normalized_box, &bounds);
+  // Rotate 4 corner w.r.t. center.
+  std::array<Vector2_f, 4> corners{{
+      Vector2_f(bounds.left * image_width, bounds.top * image_height),
+      Vector2_f(bounds.left * image_width, bounds.bottom * image_height),
+      Vector2_f(bounds.right * image_width, bounds.bottom * image_height),
+      Vector2_f(bounds.right * image_width, bounds.top * image_height),
+  }};
+  if (std::abs(normalized_box.rotation()) <= 1e-5) {
+    return corners;
+  }
+
+  const float cos_a = std::cos(normalized_box.rotation());
+  const float sin_a = std::sin(normalized_box.rotation());
+  for (int k = 0; k < 4; ++k) {
+    // Scale and rotate w.r.t. center.
+    const Vector2_f rad = corners[k] - center;
+    const Vector2_f rot_rad(cos_a * rad.x() - sin_a * rad.y(),
+                            sin_a * rad.x() + cos_a * rad.y());
+    corners[k] = center + rot_rad;
+  }
+  return corners;
+}
+
+}  // namespace
+
+void TrackedDetection::AddLabel(const std::string& label, float score) {
+  auto label_ptr = label_to_score_map_.find(label);
+  if (label_ptr == label_to_score_map_.end()) {
+    label_to_score_map_[label] = score;
+  } else {
+    label_ptr->second = label_ptr->second > score ? label_ptr->second : score;
+  }
+}
+
+bool TrackedDetection::IsSameAs(const TrackedDetection& other) const {
+  const auto box0 = bounding_box_;
+  const auto box1 = other.bounding_box_;
+  const double box0_area = BoxArea(box0);
+  const double box1_area = BoxArea(box1);
+  const double overlap_area = OverlapArea(box0, box1);
+
+  // For cases where a small object is in front of a big object.
+  // TODO: This is hard threshold. Make the threshold smaller
+  // (e.g. 2.0) will cause issues when two detections of the same object is
+  // vertial to each other. For example, if we first get a detection (e.g. a
+  // long water bottle) vertically and then change the camera to horizontal
+  // quickly, then it will get another detection which will have a diamond shape
+  // that is much larger than the previous rectangle one.
+  const double kMaxAreaRatio = 3.0;
+  if (box0_area / box1_area > kMaxAreaRatio ||
+      box1_area / box0_area > kMaxAreaRatio)
+    return false;
+
+  const double kMaxOverlapRatio = 0.5;
+  if (overlap_area / box0_area > kMaxOverlapRatio ||
+      overlap_area / box1_area > kMaxOverlapRatio) {
+    return true;
+  }
+
+  return false;
+}
+
+void TrackedDetection::MergeLabelScore(const TrackedDetection& other) {
+  for (const auto& label_score : other.label_to_score_map()) {
+    const auto label_score_ptr = label_to_score_map_.find(label_score.first);
+    if (label_score_ptr == label_to_score_map_.end()) {
+      AddLabel(label_score.first, label_score.second);
+    } else {
+      // TODO: Consider other strategy of merging scores, e.g. mean.
+      label_score_ptr->second =
+          std::max(label_score_ptr->second, label_score.second);
+    }
+  }
+}
+
+std::array<Vector2_f, 4> TrackedDetection::GetCorners(
+    float image_width, float image_height) const {
+  NormalizedRectBounds bounds;
+  GetNormalizedRectBounds(bounding_box_, &bounds);
+  Vector2_f center((bounds.right + bounds.left) / 2.0f * image_width,
+                   (bounds.bottom + bounds.top) / 2.0f * image_height);
+
+  return ComputeCorners(bounding_box_, center, image_width, image_height);
+}
+
+}  // namespace mediapipe
diff --git a/mediapipe/util/tracking/tracked_detection.h b/mediapipe/util/tracking/tracked_detection.h
new file mode 100644
index 000000000..d4d0feee4
--- /dev/null
+++ b/mediapipe/util/tracking/tracked_detection.h
@@ -0,0 +1,127 @@
+// Copyright 2019 The MediaPipe Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef MEDIAPIPE_UTIL_TRACKING_TRACKED_DETECTION_H_
+#define MEDIAPIPE_UTIL_TRACKING_TRACKED_DETECTION_H_
+
+#include <string>
+
+#include "absl/container/node_hash_map.h"
+#include "mediapipe/framework/formats/rect.pb.h"
+#include "mediapipe/framework/port/vector.h"
+
+namespace mediapipe {
+
+// Class for keeping records of detected objects.
+// Each detection should be set an id to uniquely identify it.
+class TrackedDetection {
+ public:
+  TrackedDetection() = delete;
+  // Creates a detection that has a bounding box occupying the entire image.
+  TrackedDetection(int unique_id, int64 timestamp)
+      : unique_id_(unique_id),
+        initial_timestamp_(timestamp),
+        last_updated_timestamp_(timestamp),
+        previous_id_(-1) {}
+
+  // Creates a detection with the specified bounding box normalized by image
+  // dimensions.
+  TrackedDetection(int unique_id, int64 timestamp,
+                   const ::mediapipe::NormalizedRect& bounding_box)
+      : unique_id_(unique_id),
+        initial_timestamp_(timestamp),
+        last_updated_timestamp_(timestamp),
+        previous_id_(-1) {
+    bounding_box_.CopyFrom(bounding_box);
+    set_bounds();
+  }
+
+  // Adds a (label, score) pair. If the label already exists then keep the
+  // higher score of both entries.
+  void AddLabel(const std::string& label, float score);
+
+  // Checks if two detections are in fact the same object.
+  // TODO: This is just the first step. It will be expand to a family
+  // of functionalities that do reconciliation.
+  bool IsSameAs(const TrackedDetection& other) const;
+
+  // Merges labels and score from another TrackedDetection.
+  void MergeLabelScore(const TrackedDetection& other);
+
+  int64 initial_timestamp() const { return initial_timestamp_; }
+  int64 last_updated_timestamp() const { return last_updated_timestamp_; }
+  void set_last_updated_timestamp(int64 timestamp) {
+    last_updated_timestamp_ = timestamp;
+  }
+
+  int unique_id() const { return unique_id_; }
+
+  int previous_id() const { return previous_id_; }
+  void set_previous_id(int id) { previous_id_ = id; }
+
+  const ::mediapipe::NormalizedRect& bounding_box() const {
+    return bounding_box_;
+  }
+  float left() const { return left_; }
+  float right() const { return right_; }
+  float top() const { return top_; }
+  float bottom() const { return bottom_; }
+
+  void set_bounding_box(const ::mediapipe::NormalizedRect& bounding_box) {
+    bounding_box_ = bounding_box;
+    set_bounds();
+  }
+
+  // Gets corners of the bounding box of the object in image coordinates.
+  std::array<Vector2_f, 4> GetCorners(float width, float height) const;
+
+  // Gets corners of the bounding box of the object in normalized coordinates.
+  std::array<Vector2_f, 4> GetCorners() const { return GetCorners(1.0f, 1.0f); }
+
+  const absl::node_hash_map<std::string, float>& label_to_score_map() const {
+    return label_to_score_map_;
+  }
+
+ private:
+  void set_bounds() {
+    left_ = bounding_box_.x_center() - bounding_box_.width() / 2.f;
+    right_ = bounding_box_.x_center() + bounding_box_.width() / 2.f;
+    top_ = bounding_box_.y_center() - bounding_box_.height() / 2.f;
+    bottom_ = bounding_box_.y_center() + bounding_box_.height() / 2.f;
+  }
+
+  int unique_id_;
+
+  ::mediapipe::NormalizedRect bounding_box_;
+  float left_ = 0.f;
+  float right_ = 0.f;
+  float top_ = 0.f;
+  float bottom_ = 0.f;
+
+  // The timestamp in milliseconds when this object is initialized.
+  int64 initial_timestamp_;
+  // The latest timestamp when this object is updated.
+  int64 last_updated_timestamp_;
+
+  // The id of the previous detection that this detection is associated with.
+  int previous_id_;
+
+  // Each detection could have multiple labels, each with a corresponding
+  // detection score.
+  absl::node_hash_map<std::string, float> label_to_score_map_;
+};
+
+}  // namespace mediapipe
+
+#endif  // MEDIAPIPE_UTIL_TRACKING_TRACKED_DETECTION_H_
diff --git a/mediapipe/util/tracking/tracked_detection_manager.cc b/mediapipe/util/tracking/tracked_detection_manager.cc
new file mode 100644
index 000000000..4a822ea6b
--- /dev/null
+++ b/mediapipe/util/tracking/tracked_detection_manager.cc
@@ -0,0 +1,191 @@
+// Copyright 2019 The MediaPipe Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "mediapipe/util/tracking/tracked_detection_manager.h"
+
+#include <vector>
+
+#include "mediapipe/framework/formats/rect.pb.h"
+#include "mediapipe/util/tracking/tracked_detection.h"
+
+namespace {
+
+using ::mediapipe::TrackedDetection;
+
+// Checks if a point is out of view.
+// x and y should both be in [0, 1] to be considered in view.
+bool IsPointOutOfView(float x, float y) {
+  return x < 0.0f || x > 1.0f || y < 0.0f || y > 1.0f;
+}
+
+// Checks if all corners of a bounding box of an object are out of view.
+bool AreCornersOutOfView(const TrackedDetection& object) {
+  std::array<Vector2_f, 4> corners = object.GetCorners();
+  for (int k = 0; k < 4; ++k) {
+    if (!IsPointOutOfView(corners[k].x(), corners[k].y())) {
+      return false;
+    }
+  }
+  return true;
+}
+}  // namespace
+
+namespace mediapipe {
+
+std::vector<int> TrackedDetectionManager::AddDetection(
+    std::unique_ptr<TrackedDetection> detection) {
+  std::vector<int> ids_to_remove;
+
+  int64 latest_duplicate_timestamp = 0;
+  // TODO: All detections should be fastforwarded to the current
+  // timestamp before adding the detection manager. E.g. only check they are the
+  // same if the timestamp are the same.
+  for (auto& existing_detection_ptr : detections_) {
+    const auto& existing_detection = *existing_detection_ptr.second;
+    if (detection->IsSameAs(existing_detection)) {
+      // Merge previous labels to the new detection. Because new detections
+      // usually have better bounding box than the one from tracking.
+      // TODO: This might cause unstable change of the bounding box.
+      // TODO: Add a filter for the box using new detection as
+      // observation.
+      detection->MergeLabelScore(existing_detection);
+      // Pick the duplicated detection that has the latest initial timestamp as
+      // the previous detection of the new one.
+      if (existing_detection.initial_timestamp() > latest_duplicate_timestamp) {
+        latest_duplicate_timestamp = existing_detection.initial_timestamp();
+        if (existing_detection.previous_id() == -1) {
+          detection->set_previous_id(existing_detection.unique_id());
+        } else {
+          detection->set_previous_id(existing_detection.previous_id());
+        }
+      }
+      ids_to_remove.push_back(existing_detection_ptr.first);
+    }
+  }
+  // Erase old detections.
+  for (auto id : ids_to_remove) {
+    detections_.erase(id);
+  }
+  const int id = detection->unique_id();
+  detections_[id] = std::move(detection);
+  return ids_to_remove;
+}
+
+std::vector<int> TrackedDetectionManager::UpdateDetectionLocation(
+    int id, const NormalizedRect& bounding_box, int64 timestamp) {
+  // TODO: Remove all boxes that are not updating.
+  auto detection_ptr = detections_.find(id);
+  if (detection_ptr == detections_.end()) {
+    return std::vector<int>();
+  }
+  auto& detection = *detection_ptr->second;
+  detection.set_bounding_box(bounding_box);
+  detection.set_last_updated_timestamp(timestamp);
+
+  // It's required to do this here in addition to in AddDetection because during
+  // fast motion, two or more detections of the same object could coexist since
+  // the locations could be quite different before they are propagated to the
+  // same timestamp.
+  return RemoveDuplicatedDetections(detection.unique_id());
+}
+
+std::vector<int> TrackedDetectionManager::RemoveObsoleteDetections(
+    int64 timestamp) {
+  std::vector<int> ids_to_remove;
+  for (auto& existing_detection : detections_) {
+    if (existing_detection.second->last_updated_timestamp() < timestamp) {
+      ids_to_remove.push_back(existing_detection.first);
+    }
+  }
+  for (auto idx : ids_to_remove) {
+    detections_.erase(idx);
+  }
+  return ids_to_remove;
+}
+
+std::vector<int> TrackedDetectionManager::RemoveOutOfViewDetections() {
+  std::vector<int> ids_to_remove;
+  for (auto& existing_detection : detections_) {
+    if (AreCornersOutOfView(*existing_detection.second)) {
+      ids_to_remove.push_back(existing_detection.first);
+    }
+  }
+  for (auto idx : ids_to_remove) {
+    detections_.erase(idx);
+  }
+  return ids_to_remove;
+}
+
+std::vector<int> TrackedDetectionManager::RemoveDuplicatedDetections(int id) {
+  std::vector<int> ids_to_remove;
+  auto detection_ptr = detections_.find(id);
+  if (detection_ptr == detections_.end()) {
+    return ids_to_remove;
+  }
+  auto& detection = *detection_ptr->second;
+
+  // For duplciated detections, we keep the one that's added most recently.
+  auto latest_detection = detection_ptr->second.get();
+  // For setting up the |previous_id| of the latest detection. For now, if there
+  // are multiple duplicated detections at the same timestamp, we will use the
+  // one that has the second latest initial timestamp
+  const TrackedDetection* previous_detection = nullptr;
+  for (auto& existing_detection : detections_) {
+    auto other = *(existing_detection.second);
+    if (detection.unique_id() != other.unique_id()) {
+      // Only check if they are updated at the same timestamp. Comparing
+      // locations of detections at different timestamp is not correct.
+      if (detection.last_updated_timestamp() ==
+          other.last_updated_timestamp()) {
+        if (detection.IsSameAs(other)) {
+          const TrackedDetection* detection_to_remove = nullptr;
+          if (latest_detection->initial_timestamp() >=
+              other.initial_timestamp()) {
+            // Removes the earlier one.
+            ids_to_remove.push_back(other.unique_id());
+            detection_to_remove = existing_detection.second.get();
+            latest_detection->MergeLabelScore(other);
+          } else {
+            ids_to_remove.push_back(latest_detection->unique_id());
+            detection_to_remove = latest_detection;
+            existing_detection.second->MergeLabelScore(*latest_detection);
+            latest_detection = existing_detection.second.get();
+          }
+          if (!previous_detection ||
+              previous_detection->initial_timestamp() <
+                  detection_to_remove->initial_timestamp()) {
+            previous_detection = detection_to_remove;
+          }
+        }
+      }
+    }
+  }
+
+  // If the latest detection is not the one passed into this function, it might
+  // already has its previous detection. In that case, we don't override it.
+  if (latest_detection->previous_id() == -1 && previous_detection) {
+    if (previous_detection->previous_id() == -1) {
+      latest_detection->set_previous_id(previous_detection->unique_id());
+    } else {
+      latest_detection->set_previous_id(previous_detection->previous_id());
+    }
+  }
+
+  for (auto idx : ids_to_remove) {
+    detections_.erase(idx);
+  }
+  return ids_to_remove;
+}
+
+}  // namespace mediapipe
diff --git a/mediapipe/util/tracking/tracked_detection_manager.h b/mediapipe/util/tracking/tracked_detection_manager.h
new file mode 100644
index 000000000..d42d7c17c
--- /dev/null
+++ b/mediapipe/util/tracking/tracked_detection_manager.h
@@ -0,0 +1,82 @@
+// Copyright 2019 The MediaPipe Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef MEDIAPIPE_UTIL_TRACKING_DETECTION_MANAGER_H_
+#define MEDIAPIPE_UTIL_TRACKING_DETECTION_MANAGER_H_
+
+#include "absl/container/node_hash_map.h"
+#include "mediapipe/framework/formats/rect.pb.h"
+#include "mediapipe/util/tracking/tracked_detection.h"
+
+namespace mediapipe {
+
+// Class for managing detections.
+// The detections are from either server side or a mobile detector and are
+// tracked using either 2D or 3D tracker. The TrackedDetectionManager is used to
+// identify duplicated detections or obsolete detections to keep a set of
+// active detections.
+class TrackedDetectionManager {
+ public:
+  TrackedDetectionManager() = default;
+  ~TrackedDetectionManager() = default;
+
+  // Adds a new detection at a given timestamp. Returns the IDs of the
+  // detections that are removed due to duplication.
+  std::vector<int> AddDetection(std::unique_ptr<TrackedDetection> detection);
+
+  // Updates the location of a detection identified by its id.
+  // Returns false if the requested id doesn't exist a corresponding
+  // detection. Returns the IDs of the detections that are removed due to
+  // duplication.
+  std::vector<int> UpdateDetectionLocation(
+      int id, const ::mediapipe::NormalizedRect& bounding_box, int64 timestamp);
+
+  // Removes detections that are not updated after |timestamp|. Returns the IDs
+  // of the detections that are removed.
+  std::vector<int> RemoveObsoleteDetections(int64 timestamp);
+
+  // TODO: Do we really need this? Pursuit tracker doesn't do well
+  // in loop closure. Boxes out of view are usually not attached to objects
+  // any more when it's in view again. Returns the IDs of the
+  // detections that are removed.
+  std::vector<int> RemoveOutOfViewDetections();
+
+  int GetNumDetections() const { return detections_.size(); }
+
+  // Get TrackedDetection by its unique id.
+  const TrackedDetection* GetTrackedDetection(int id) const {
+    auto detection_ptr = detections_.find(id);
+    if (detection_ptr == detections_.end()) {
+      return nullptr;
+    }
+    return detection_ptr->second.get();
+  }
+
+  const absl::node_hash_map<int, std::unique_ptr<TrackedDetection>>&
+  GetAllTrackedDetections() const {
+    return detections_;
+  }
+
+ private:
+  // Finds all detections that are duplicated with the one of |id| and remove
+  // all detections except the one that is added most recently. Returns the IDs
+  // of the detections that are removed.
+  std::vector<int> RemoveDuplicatedDetections(int id);
+
+  absl::node_hash_map<int, std::unique_ptr<TrackedDetection>> detections_;
+};
+
+}  // namespace mediapipe
+
+#endif  // MEDIAPIPE_UTIL_TRACKING_DETECTION_MANAGER_H_
diff --git a/mediapipe/util/tracking/tracked_detection_test.cc b/mediapipe/util/tracking/tracked_detection_test.cc
new file mode 100644
index 000000000..60b9df1b1
--- /dev/null
+++ b/mediapipe/util/tracking/tracked_detection_test.cc
@@ -0,0 +1,109 @@
+// Copyright 2019 The MediaPipe Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "mediapipe/util/tracking/tracked_detection.h"
+
+#include "mediapipe/framework/port/gtest.h"
+
+namespace mediapipe {
+
+const float kErrorMargin = 1e-4f;
+
+TEST(TrackedDetectionTest, ConstructorWithoutBox) {
+  TrackedDetection detection(0, 0);
+  EXPECT_EQ(detection.unique_id(), 0);
+  EXPECT_EQ(detection.previous_id(), -1);
+  EXPECT_NEAR(detection.left(), 0.f, kErrorMargin);
+  EXPECT_NEAR(detection.top(), 0.f, kErrorMargin);
+  EXPECT_NEAR(detection.right(), 0.f, kErrorMargin);
+  EXPECT_NEAR(detection.bottom(), 0.f, kErrorMargin);
+}
+
+TEST(TrackedDetectionTest, ConstructorWithAnOutOfBoundBox) {
+  NormalizedRect box;
+  box.set_x_center(-0.15);
+  box.set_y_center(-0.15);
+  box.set_height(0.1);
+  box.set_width(0.1);
+  TrackedDetection detection(0, 0, box);
+  EXPECT_EQ(detection.unique_id(), 0);
+  EXPECT_EQ(detection.previous_id(), -1);
+  EXPECT_NEAR(detection.left(), -0.2f, kErrorMargin);
+  EXPECT_NEAR(detection.top(), -0.2f, kErrorMargin);
+  EXPECT_NEAR(detection.right(), -0.1f, kErrorMargin);
+  EXPECT_NEAR(detection.bottom(), -0.1f, kErrorMargin);
+  EXPECT_NEAR(detection.bounding_box().width(), 0.1f, kErrorMargin);
+  EXPECT_NEAR(detection.bounding_box().height(), 0.1f, kErrorMargin);
+}
+
+TEST(TrackedDetectionTest, ConstructorWithAnOutOfBoundBox2) {
+  NormalizedRect box;
+  box.set_x_center(0.5);
+  box.set_y_center(0.5);
+  box.set_height(1.2);
+  box.set_width(1.2);
+  TrackedDetection detection(0, 0, box);
+  EXPECT_EQ(detection.unique_id(), 0);
+  EXPECT_EQ(detection.previous_id(), -1);
+  EXPECT_NEAR(detection.left(), -0.1f, kErrorMargin);
+  EXPECT_NEAR(detection.top(), -0.1f, kErrorMargin);
+  EXPECT_NEAR(detection.right(), 1.1f, kErrorMargin);
+  EXPECT_NEAR(detection.bottom(), 1.1f, kErrorMargin);
+  EXPECT_NEAR(detection.bounding_box().width(), 1.2f, kErrorMargin);
+  EXPECT_NEAR(detection.bounding_box().height(), 1.2f, kErrorMargin);
+}
+
+TEST(TrackedDetectionTest, CheckSameDetections) {
+  NormalizedRect box0;
+  box0.set_x_center(0.5);
+  box0.set_y_center(0.5);
+  box0.set_height(0.5);
+  box0.set_width(0.5);
+  NormalizedRect box1;
+  box1.set_x_center(0.55);
+  box1.set_y_center(0.5);
+  box1.set_height(0.5);
+  box1.set_width(0.5);
+  NormalizedRect box2;
+  box2.set_x_center(0.75);
+  box2.set_y_center(0.55);
+  box2.set_height(0.5);
+  box2.set_width(0.5);
+  TrackedDetection detection0(0, 0, box0);
+  TrackedDetection detection1(1, 0, box1);
+  TrackedDetection detection2(2, 0, box2);
+  EXPECT_TRUE(detection0.IsSameAs(detection1));
+  EXPECT_FALSE(detection0.IsSameAs(detection2));
+}
+
+TEST(TrackedDetectionTest, MergeLabels) {
+  TrackedDetection detection0(0, 0);
+  detection0.AddLabel("1", 1.f);
+  detection0.AddLabel("2", 2.f);
+  detection0.AddLabel("3", 3.f);
+
+  TrackedDetection detection1(1, 0);
+  detection0.AddLabel("1", 0.f);
+  detection0.AddLabel("2", 3.f);
+  detection0.AddLabel("4", 4.f);
+
+  detection0.MergeLabelScore(detection1);
+  const auto& labels = detection0.label_to_score_map();
+  EXPECT_EQ(labels.find("1")->second, 1.f);
+  EXPECT_EQ(labels.find("2")->second, 3.f);
+  EXPECT_EQ(labels.find("3")->second, 3.f);
+  EXPECT_EQ(labels.find("4")->second, 4.f);
+}
+
+}  // namespace mediapipe
diff --git a/mediapipe/util/tracking/tracking.cc b/mediapipe/util/tracking/tracking.cc
new file mode 100644
index 000000000..3d4ea39e8
--- /dev/null
+++ b/mediapipe/util/tracking/tracking.cc
@@ -0,0 +1,3386 @@
+// Copyright 2019 The MediaPipe Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "mediapipe/util/tracking/tracking.h"
+
+#include <algorithm>
+#include <cmath>
+#include <memory>
+#include <numeric>
+#include <random>
+#include <unordered_set>
+
+#include "Eigen/Core"
+#include "Eigen/Dense"
+#include "Eigen/SVD"
+#include "absl/algorithm/container.h"
+#include "absl/memory/memory.h"
+#include "mediapipe/framework/port/logging.h"
+#include "mediapipe/framework/port/opencv_calib3d_inc.h"
+#include "mediapipe/framework/port/opencv_core_inc.h"
+#include "mediapipe/framework/port/opencv_imgproc_inc.h"
+#include "mediapipe/util/tracking/flow_packager.pb.h"
+#include "mediapipe/util/tracking/measure_time.h"
+#include "mediapipe/util/tracking/motion_models.h"
+
+namespace mediapipe {
+
+bool MotionBox::print_motion_box_warnings_ = true;
+
+namespace {
+
+static constexpr int kNormalizationGridSize = 10;
+constexpr float kShortScale = 16383.0f;
+constexpr float kInvShortScale = 1.0f / kShortScale;
+
+// Motion vectors with weights larger than kMinInlierWeight are classified as
+// inliers.
+constexpr float kMinInlierWeight = 0.5f;
+// Motion vectors with weights smaller han kMaxOutlierWeight are classified as
+// outliers.
+constexpr float kMaxOutlierWeight = 0.1f;
+
+// Lexicographic (first x, then y) comparator for MotionVector::pos.
+struct MotionVectorComparator {
+  bool operator()(const MotionVector& lhs, const MotionVector& rhs) const {
+    return (lhs.pos.x() < rhs.pos.x()) ||
+           (lhs.pos.x() == rhs.pos.x() && lhs.pos.y() < rhs.pos.y());
+  }
+};
+
+void StoreInternalState(const std::vector<const MotionVector*>& vectors,
+                        const std::vector<float>& inlier_weights,
+                        float aspect_ratio, MotionBoxInternalState* internal) {
+  const int num_vectors = vectors.size();
+  CHECK_EQ(num_vectors, inlier_weights.size());
+
+  float scale_x = 1.0f;
+  float scale_y = 1.0f;
+  ScaleFromAspect(aspect_ratio, true, &scale_x, &scale_y);
+
+  internal->Clear();
+  for (int k = 0; k < num_vectors; ++k) {
+    internal->add_pos_x(vectors[k]->pos.x() * scale_x);
+    internal->add_pos_y(vectors[k]->pos.y() * scale_y);
+    internal->add_dx(vectors[k]->object.x() * scale_x);
+    internal->add_dy(vectors[k]->object.y() * scale_y);
+    internal->add_camera_dx(vectors[k]->background.x() * scale_x);
+    internal->add_camera_dy(vectors[k]->background.y() * scale_y);
+    internal->add_track_id(vectors[k]->track_id);
+    internal->add_inlier_score(inlier_weights[k]);
+  }
+}
+
+// protolite compatible MotionBoxState_TrackStatus_Name
+std::string TrackStatusToString(MotionBoxState::TrackStatus status) {
+  switch (status) {
+    case MotionBoxState::BOX_UNTRACKED:
+      return "BOX_UNTRACKED";
+    case MotionBoxState::BOX_EMPTY:
+      return "BOX_EMPTY";
+    case MotionBoxState::BOX_NO_FEATURES:
+      return "BOX_NO_FEATURES";
+    case MotionBoxState::BOX_TRACKED:
+      return "BOX_TRACKED";
+    case MotionBoxState::BOX_DUPLICATED:
+      return "BOX_DUPLICATED";
+    case MotionBoxState::BOX_TRACKED_OUT_OF_BOUND:
+      return "BOX_TRACKED_OUT_OF_BOUND";
+  }
+  LOG(FATAL) << "Should not happen.";
+  return "UNKNOWN";
+}
+
+void ClearInlierState(MotionBoxState* state) {
+  state->clear_inlier_ids();
+  state->clear_inlier_length();
+  state->clear_inlier_id_match_pos();
+  state->clear_outlier_ids();
+  state->clear_outlier_id_match_pos();
+}
+
+// Returns orthogonal error system from motion_vec scaled by irls_scale.
+std::pair<Vector2_f, Vector2_f> ComputeIrlsErrorSystem(
+    const Vector2_f& irls_scale, const Vector2_f& motion_vec) {
+  Vector2_f irls_vec = motion_vec.Normalize();
+  Vector2_f irls_vec_ortho = irls_vec.Ortho();
+  return std::make_pair(irls_vec * irls_scale.x(),
+                        irls_vec_ortho * irls_scale.y());
+}
+
+// Returns error for a given difference vector and error system.
+float ErrorDiff(const Vector2_f& diff,
+                const std::pair<Vector2_f, Vector2_f>& error_system) {
+  // Error system is an orthogonal system of originally unit vectors that were
+  // pre-multiplied by the corresponding irls scale.
+  // One can think of this function here as L2 norm *after* scaling the whole
+  // vector space w.r.t. the error system.
+  //
+  // In particular, we will project the vector diff onto this system and then
+  // scale the magnitude along each direction with the corresponding irls scale.
+  // As scalar multiplication is commutative with the dot product of vectors
+  // pre-multiplication of the scale with the error system is sufficient.
+  return Vector2_f(diff.DotProd(error_system.first),
+                   diff.DotProd(error_system.second))
+      .Norm();
+}
+
+// Returns true if point is within the inlier extent of the passed state (within
+// small bound of 5% of frame diameter).
+bool PointWithinInlierExtent(const Vector2_f pt, const MotionBoxState& state) {
+  // No extent known, assume to be inside.
+  if (state.prior_weight() == 0) {
+    return true;
+  }
+
+  const float width_radius = state.inlier_width() * 0.55f;
+  const float height_radius = state.inlier_height() * 0.55f;
+  const float left = state.inlier_center_x() - width_radius;
+  const float right = state.inlier_center_x() + width_radius;
+  const float top = state.inlier_center_y() - height_radius;
+  const float bottom = state.inlier_center_y() + height_radius;
+
+  return pt.x() >= left && pt.x() <= right && pt.y() >= top && pt.y() <= bottom;
+}
+
+// Taken from MotionEstimation::LinearSimilarityL2SolveSystem.
+bool LinearSimilarityL2Solve(
+    const std::vector<const MotionVector*>& motion_vectors,
+    const std::vector<float>& weights, LinearSimilarityModel* model) {
+  CHECK(model);
+  if (motion_vectors.size() < 4) {
+    LOG(ERROR) << "Requiring at least 4 input vectors for sufficient solve.";
+    return false;
+  }
+
+  cv::Mat matrix(4, 4, CV_32F);
+  cv::Mat solution(4, 1, CV_32F);
+  cv::Mat rhs(4, 1, CV_32F);
+
+  matrix.setTo(0);
+  rhs.setTo(0);
+
+  CHECK_EQ(motion_vectors.size(), weights.size());
+  for (int k = 0; k < motion_vectors.size(); ++k) {
+    const float x = motion_vectors[k]->pos.x();
+    const float y = motion_vectors[k]->pos.y();
+    const float w = weights[k];
+
+    // double J[2 * 4] = {1, 0, x,  -y,
+    //                    0, 1, y,   x};
+    // Compute J^t * J * w = {1,  0,   x,    -y
+    //                        0,  1,   y,     x,
+    //                        x,  y,   xx+yy, 0,
+    //                       -y,  x,   0,     xx+yy} * w;
+
+    const float x_w = x * w;
+    const float y_w = y * w;
+    const float xx_yy_w = (x * x + y * y) * w;
+    float* matrix_ptr = matrix.ptr<float>(0);
+    matrix_ptr[0] += w;
+    matrix_ptr[2] += x_w;
+    matrix_ptr[3] += -y_w;
+
+    matrix_ptr += 4;
+    matrix_ptr[1] += w;
+    matrix_ptr[2] += y_w;
+    matrix_ptr[3] += x_w;
+
+    matrix_ptr += 4;
+    matrix_ptr[0] += x_w;
+    matrix_ptr[1] += y_w;
+    matrix_ptr[2] += xx_yy_w;
+
+    matrix_ptr += 4;
+    matrix_ptr[0] += -y_w;
+    matrix_ptr[1] += x_w;
+    matrix_ptr[3] += xx_yy_w;
+
+    float* rhs_ptr = rhs.ptr<float>(0);
+
+    const float m_x = motion_vectors[k]->object.x() * w;
+    const float m_y = motion_vectors[k]->object.y() * w;
+
+    rhs_ptr[0] += m_x;
+    rhs_ptr[1] += m_y;
+    rhs_ptr[2] += x * m_x + y * m_y;
+    rhs_ptr[3] += -y * m_x + x * m_y;
+  }
+
+  // Solution parameters p.
+  if (cv::solve(matrix, rhs, solution)) {
+    const float* ptr = solution.ptr<float>(0);
+    model->set_dx(ptr[0]);
+    model->set_dy(ptr[1]);
+    model->set_a(ptr[2] + 1.0);  // Identity parametrization.
+    model->set_b(ptr[3]);
+    return true;
+  } else {
+    return false;
+  }
+}
+
+// Taken from MotionEstimation::HomographyL2NormalEquationSolve
+bool HomographyL2Solve(const std::vector<const MotionVector*>& motion_vectors,
+                       const std::vector<float>& weights, Homography* model) {
+  CHECK(model);
+
+  cv::Mat matrix(8, 8, CV_32F);
+  cv::Mat solution(8, 1, CV_32F);
+  cv::Mat rhs(8, 1, CV_32F);
+
+  matrix.setTo(0);
+  rhs.setTo(0);
+
+  // Matrix multiplications are hand-coded for speed improvements vs.
+  // opencv's cvGEMM calls.
+  CHECK_EQ(motion_vectors.size(), weights.size());
+  for (int k = 0; k < motion_vectors.size(); ++k) {
+    const float x = motion_vectors[k]->pos.x();
+    const float y = motion_vectors[k]->pos.y();
+    const float w = weights[k];
+
+    const float xw = x * w;
+    const float yw = y * w;
+    const float xxw = x * x * w;
+    const float yyw = y * y * w;
+    const float xyw = x * y * w;
+    const float mx = x + motion_vectors[k]->object.x();
+    const float my = y + motion_vectors[k]->object.y();
+
+    const float mxxyy = mx * mx + my * my;
+    // Jacobian
+    // double J[2 * 8] = {x, y, 1,  0,  0,   0, -x * m_x, -y * m_x,
+    //                   {0, 0, 0,  x,  y,   1, -x * m_y, -y * m_y}
+    //
+    // // Compute J^t * J * w =
+    // ( xx        xy    x      0       0    0    -xx*mx  -xy*mx    )
+    // ( xy        yy    y      0       0    0    -xy*mx  -yy*mx    )
+    // ( x         y     1      0       0    0     -x*mx   -y*mx    )
+    // ( 0         0     0     xx      xy    x    -xx*my  -xy*my    )
+    // ( 0         0     0     xy      yy    y    -xy*my  -yy*my    )
+    // ( 0         0     0      x      y     1     -x*my   -y*my    )
+    // ( -xx*mx -xy*mx -x*mx -xx*my -xy*my -x*my xx*mxxyy  xy*mxxyy )
+    // ( -xy*mx -yy*mx -y*mx -xy*my -yy*my -y*my xy*mxxyy  yy*mxxyy  ) * w
+
+    // 1st row:  xx        xy    x      0       0    0    -xx*mx  -xy*mx
+    float* matrix_ptr = matrix.ptr<float>(0);
+    matrix_ptr[0] += xxw;
+    matrix_ptr[1] += xyw;
+    matrix_ptr[2] += xw;
+    matrix_ptr[6] += -xxw * mx;
+    matrix_ptr[7] += -xyw * mx;
+
+    // 2nd row:  xy       yy   y      0       0    0    -xy*mx  -yy*mx
+    matrix_ptr += 8;
+    matrix_ptr[0] += xyw;
+    matrix_ptr[1] += yyw;
+    matrix_ptr[2] += yw;
+    matrix_ptr[6] += -xyw * mx;
+    matrix_ptr[7] += -yyw * mx;
+
+    // 3rd row: x         y     1      0       0    0     -x*mx   -y*mx
+    matrix_ptr += 8;
+    matrix_ptr[0] += xw;
+    matrix_ptr[1] += yw;
+    matrix_ptr[2] += w;
+    matrix_ptr[6] += -xw * mx;
+    matrix_ptr[7] += -yw * mx;
+
+    // 4th row: 0         0     0     xx      xy    x    -xx*my  -xy*my
+    matrix_ptr += 8;
+    matrix_ptr[3] += xxw;
+    matrix_ptr[4] += xyw;
+    matrix_ptr[5] += xw;
+    matrix_ptr[6] += -xxw * my;
+    matrix_ptr[7] += -xyw * my;
+
+    // 5th row: 0         0     0     xy      yy    y    -xy*my  -yy*my
+    matrix_ptr += 8;
+    matrix_ptr[3] += xyw;
+    matrix_ptr[4] += yyw;
+    matrix_ptr[5] += yw;
+    matrix_ptr[6] += -xyw * my;
+    matrix_ptr[7] += -yyw * my;
+
+    // 6th row:  0         0     0     x     y     1      -x*my    -y*my
+    matrix_ptr += 8;
+    matrix_ptr[3] += xw;
+    matrix_ptr[4] += yw;
+    matrix_ptr[5] += w;
+    matrix_ptr[6] += -xw * my;
+    matrix_ptr[7] += -yw * my;
+
+    // 7th row:  -xx*mx -xy*mx -x*mx -xx*my -xy*my -x*my xx*mxxyy  xy*mxxyy
+    matrix_ptr += 8;
+    matrix_ptr[0] += -xxw * mx;
+    matrix_ptr[1] += -xyw * mx;
+    matrix_ptr[2] += -xw * mx;
+    matrix_ptr[3] += -xxw * my;
+    matrix_ptr[4] += -xyw * my;
+    matrix_ptr[5] += -xw * my;
+    matrix_ptr[6] += xxw * mxxyy;
+    matrix_ptr[7] += xyw * mxxyy;
+
+    // 8th row: -xy*mx -yy*mx -y*mx -xy*my -yy*my -y*my xy*mxxyy  yy*mxxyy
+    matrix_ptr += 8;
+    matrix_ptr[0] += -xyw * mx;
+    matrix_ptr[1] += -yyw * mx;
+    matrix_ptr[2] += -yw * mx;
+    matrix_ptr[3] += -xyw * my;
+    matrix_ptr[4] += -yyw * my;
+    matrix_ptr[5] += -yw * my;
+    matrix_ptr[6] += xyw * mxxyy;
+    matrix_ptr[7] += yyw * mxxyy;
+
+    // Right hand side:
+    // b = ( x
+    //       y )
+    // Compute J^t * b  * w =
+    // ( x*mx  y*mx  mx  x*my  y*my  my  -x*mxxyy -y*mxxyy ) * w
+
+    float* rhs_ptr = rhs.ptr<float>(0);
+    rhs_ptr[0] += xw * mx;
+    rhs_ptr[1] += yw * mx;
+    rhs_ptr[2] += mx * w;
+    rhs_ptr[3] += xw * my;
+    rhs_ptr[4] += yw * my;
+    rhs_ptr[5] += my * w;
+    rhs_ptr[6] += -xw * mxxyy;
+    rhs_ptr[7] += -yw * mxxyy;
+  }
+
+  // Solution parameters p.
+  if (cv::solve(matrix, rhs, solution)) {
+    const float* ptr = solution.ptr<float>(0);
+    *model = HomographyAdapter::FromFloatPointer(ptr, false);
+    return true;
+  }
+
+  return false;
+}
+
+void TransformQuadInMotionBoxState(const MotionBoxState& curr_pos,
+                                   const Homography& homography,
+                                   MotionBoxState* next_pos) {
+  CHECK(next_pos != nullptr);
+  if (!curr_pos.has_pos_x() || !curr_pos.has_pos_y() || !curr_pos.has_width() ||
+      !curr_pos.has_height()) {
+    LOG(ERROR) << "Previous box does not exist, cannot transform!";
+    return;
+  }
+  const int kQuadVerticesSize = 8;
+  const MotionBoxState::Quad* curr_quad_ptr = nullptr;
+  auto quad = absl::make_unique<MotionBoxState::Quad>();
+  if (curr_pos.has_quad() &&
+      curr_pos.quad().vertices_size() == kQuadVerticesSize) {
+    curr_quad_ptr = &curr_pos.quad();
+  } else {
+    std::array<Vector2_f, 4> corners =
+        GetCornersOfRotatedRect(curr_pos, Vector2_f(1.0f, 1.0f));
+    for (const auto& vertex : corners) {
+      quad->add_vertices(vertex.x());
+      quad->add_vertices(vertex.y());
+    }
+    curr_quad_ptr = quad.get();
+  }
+
+  MotionBoxState::Quad* next_pos_quad = next_pos->mutable_quad();
+  bool next_pos_quad_existed = true;
+  if (next_pos_quad->vertices_size() != kQuadVerticesSize) {
+    next_pos_quad_existed = false;
+    next_pos_quad->clear_vertices();
+  }
+  for (int i = 0; i < kQuadVerticesSize / 2; ++i) {
+    const auto& curr_pos_quad_vertex = Vector2_f(
+        curr_quad_ptr->vertices(i * 2), curr_quad_ptr->vertices(i * 2 + 1));
+    const auto& next_pos_quad_vertex_diff =
+        HomographyAdapter::TransformPoint(homography, curr_pos_quad_vertex) -
+        curr_pos_quad_vertex;
+    if (next_pos_quad_existed) {
+      next_pos_quad->set_vertices(i * 2, next_pos_quad->vertices(i * 2) +
+                                             next_pos_quad_vertex_diff.x());
+      next_pos_quad->set_vertices(
+          i * 2 + 1,
+          next_pos_quad->vertices(i * 2 + 1) + next_pos_quad_vertex_diff.y());
+    } else {
+      next_pos_quad->add_vertices(curr_pos_quad_vertex.x() +
+                                  next_pos_quad_vertex_diff.x());
+      next_pos_quad->add_vertices(curr_pos_quad_vertex.y() +
+                                  next_pos_quad_vertex_diff.y());
+    }
+  }
+}
+
+void UpdateStatePositionAndSizeFromStateQuad(MotionBoxState* box_state) {
+  Vector2_f top_left, bottom_right;
+  MotionBoxBoundingBox(*box_state, &top_left, &bottom_right);
+  box_state->set_width(bottom_right.x() - top_left.x());
+  box_state->set_height(bottom_right.y() - top_left.y());
+  box_state->set_pos_x(top_left.x());
+  box_state->set_pos_y(top_left.y());
+}
+
+void ApplyCameraTrackingDegrees(const MotionBoxState& prev_state,
+                                const Homography& background_model,
+                                const TrackStepOptions& options,
+                                const Vector2_f& domain,
+                                MotionBoxState* next_state) {
+  // Determine center translation.
+  Vector2_f center(MotionBoxCenter(prev_state));
+  const Vector2_f background_motion =
+      HomographyAdapter::TransformPoint(background_model, center) - center;
+
+  if (options.tracking_degrees() ==
+          TrackStepOptions::TRACKING_DEGREE_TRANSLATION ||
+      !options.track_object_and_camera()) {
+    SetMotionBoxPosition(MotionBoxPosition(*next_state) + background_motion,
+                         next_state);
+    return;
+  }
+
+  // Transform corners and fit similarity.
+  // Overall idea:
+  // We got corners, x0, x1, x2, x3 of the rect in the previous location
+  // transform by background_model H.
+  // Assuming H = [A | t], their target location in the next frame
+  // is:
+  // xi' = A * xi + t for i = 0..3
+  // We want to express the location of ci' w.r.t. to the translated center c,
+  // to decouple H from the translation of the center.
+  // In particular, we are looking for the translation of the center:
+  // c* = c + t* and points
+  // xi* = xi + t*
+  // Express location of xi' w.r.t. c:
+  // xi' = A(xi* - c*) + c*
+  // Axi + t = A(xi - c) + c + t*
+  // Axi + t = Axi - Ac + c + t*
+  // t* = Ac - c + t
+  std::array<Vector2_f, 4> corners = MotionBoxCorners(prev_state);
+  std::vector<MotionVector> corner_vecs(4);
+  std::vector<const MotionVector*> corner_vec_ptrs(4);
+
+  for (int k = 0; k < 4; ++k) {
+    MotionVector v;
+    v.pos = corners[k];
+    v.object = HomographyAdapter::TransformPoint(background_model, corners[k]) -
+               corners[k];
+    corner_vecs[k] = v;
+    corner_vec_ptrs[k] = &corner_vecs[k];
+  }
+
+  LinearSimilarityModel linear_similarity;
+  LinearSimilarityL2Solve(corner_vec_ptrs, std::vector<float>(4, 1.0f),
+                          &linear_similarity);
+
+  SimilarityModel similarity =
+      LinearSimilarityAdapter::ToSimilarity(linear_similarity);
+
+  // See above derivation, motion of the center is t* = Ac + t - c;
+  // Could also make the point that background_model instead of
+  // linear_similarity is more accurate here due to the fitting operation above.
+  SetMotionBoxPosition(MotionBoxPosition(*next_state) +
+                           TransformPoint(linear_similarity, center) - center,
+                       next_state);
+
+  switch (options.tracking_degrees()) {
+    case TrackStepOptions::TRACKING_DEGREE_TRANSLATION:
+      break;
+    case TrackStepOptions::TRACKING_DEGREE_CAMERA_SCALE:
+    case TrackStepOptions::TRACKING_DEGREE_OBJECT_SCALE:
+      next_state->set_scale(next_state->scale() * similarity.scale());
+      break;
+    case TrackStepOptions::TRACKING_DEGREE_CAMERA_ROTATION:
+    case TrackStepOptions::TRACKING_DEGREE_OBJECT_ROTATION:
+      next_state->set_rotation(next_state->rotation() + similarity.rotation());
+      break;
+    case TrackStepOptions::TRACKING_DEGREE_CAMERA_ROTATION_SCALE:
+    case TrackStepOptions::TRACKING_DEGREE_OBJECT_ROTATION_SCALE:
+      next_state->set_scale(next_state->scale() * similarity.scale());
+      next_state->set_rotation(next_state->rotation() + similarity.rotation());
+      break;
+    case TrackStepOptions::TRACKING_DEGREE_CAMERA_PERSPECTIVE:
+    case TrackStepOptions::TRACKING_DEGREE_OBJECT_PERSPECTIVE:
+      TransformQuadInMotionBoxState(prev_state, background_model, next_state);
+      if (prev_state.has_pnp_homography()) {
+        *(next_state->mutable_pnp_homography()) = HomographyAdapter::Compose(
+            prev_state.pnp_homography(), background_model);
+        UpdateStatePositionAndSizeFromStateQuad(next_state);
+      }
+      break;
+  }
+}
+
+void ApplyObjectMotion(const MotionBoxState& curr_pos,
+                       const Vector2_f& object_translation,
+                       const LinearSimilarityModel& object_similarity,
+                       const Homography& object_homography,
+                       const TrackStepOptions& options,
+                       MotionBoxState* next_pos) {
+  switch (options.tracking_degrees()) {
+    case TrackStepOptions::TRACKING_DEGREE_OBJECT_ROTATION_SCALE:
+    case TrackStepOptions::TRACKING_DEGREE_OBJECT_ROTATION:
+    case TrackStepOptions::TRACKING_DEGREE_OBJECT_SCALE: {
+      Vector2_f center(MotionBoxCenter(curr_pos));
+      // See ApplyCameraTrackingDegrees for derivation.
+      SetMotionBoxPosition(MotionBoxPosition(*next_pos) +
+                               TransformPoint(object_similarity, center) -
+                               center,
+                           next_pos);
+      SimilarityModel similarity =
+          LinearSimilarityAdapter::ToSimilarity(object_similarity);
+      if (options.tracking_degrees() !=
+          TrackStepOptions::TRACKING_DEGREE_OBJECT_ROTATION) {
+        next_pos->set_scale(next_pos->scale() * similarity.scale());
+      }
+      if (options.tracking_degrees() !=
+          TrackStepOptions::TRACKING_DEGREE_OBJECT_SCALE) {
+        next_pos->set_rotation(next_pos->rotation() + similarity.rotation());
+      }
+      break;
+    }
+
+    case TrackStepOptions::TRACKING_DEGREE_OBJECT_PERSPECTIVE: {
+      Vector2_f center(MotionBoxCenter(curr_pos));
+      SetMotionBoxPosition(
+          MotionBoxPosition(*next_pos) +
+              HomographyAdapter::TransformPoint(object_homography, center) -
+              center,
+          next_pos);
+      TransformQuadInMotionBoxState(curr_pos, object_homography, next_pos);
+      break;
+    }
+    default:
+      // Use translation per default.
+      SetMotionBoxPosition(MotionBoxPosition(*next_pos) + object_translation,
+                           next_pos);
+  }
+}
+
+bool IsBoxValid(const MotionBoxState& state) {
+  const float kMaxBoxHeight =
+      10000.0f;  // as relative to normalized [0, 1] space
+  const float kMaxBoxWidth =
+      10000.0f;  // as relative to normalized [0, 1] space
+  if (state.width() > kMaxBoxWidth) {
+    LOG(ERROR) << "box width " << state.width() << " too big";
+    return false;
+  }
+  if (state.height() > kMaxBoxHeight) {
+    LOG(ERROR) << "box height " << state.height() << " too big";
+    return false;
+  }
+
+  return true;
+}
+
+Homography PnpHomographyFromRotationAndTranslation(const cv::Mat& rvec,
+                                                   const cv::Mat& tvec) {
+  cv::Mat homography_matrix(3, 3, CV_64F);
+  cv::Rodrigues(rvec, homography_matrix);
+
+  for (int c = 0; c < 3; ++c) {
+    homography_matrix.at<double>(c, 2) = tvec.at<double>(c);
+  }
+
+  // check non zero
+  homography_matrix /= homography_matrix.at<double>(2, 2);
+
+  return HomographyAdapter::FromDoublePointer(homography_matrix.ptr<double>(0),
+                                              false);
+}
+
+// Translate CameraIntrinsics proto to cv format.
+void ConvertCameraIntrinsicsToCvMat(
+    const TrackStepOptions::CameraIntrinsics& camera_intrinsics,
+    cv::Mat* camera_mat, cv::Mat* dist_coef) {
+  *camera_mat = cv::Mat::eye(3, 3, CV_64F);
+  *dist_coef = cv::Mat::zeros(1, 5, CV_64FC1);
+  camera_mat->at<double>(0, 0) = camera_intrinsics.fx();
+  camera_mat->at<double>(1, 1) = camera_intrinsics.fy();
+  camera_mat->at<double>(0, 2) = camera_intrinsics.cx();
+  camera_mat->at<double>(1, 2) = camera_intrinsics.cy();
+  dist_coef->at<double>(0) = camera_intrinsics.k0();
+  dist_coef->at<double>(1) = camera_intrinsics.k1();
+  dist_coef->at<double>(4) = camera_intrinsics.k2();
+}
+
+}  // namespace.
+
+void ScaleFromAspect(float aspect, bool invert, float* scale_x,
+                     float* scale_y) {
+  *scale_x = aspect >= 1.0f ? 1.0f : aspect;
+  *scale_y = aspect >= 1.0f ? 1.0f / aspect : 1.0f;
+  if (invert) {
+    *scale_x = 1.0f / *scale_x;
+    *scale_y = 1.0f / *scale_y;
+  }
+}
+
+std::array<Vector2_f, 4> MotionBoxCorners(const MotionBoxState& state,
+                                          const Vector2_f& scaling) {
+  std::array<Vector2_f, 4> transformed_corners;
+  if (state.has_quad() && state.quad().vertices_size() == 8) {
+    for (int k = 0; k < 4; ++k) {
+      transformed_corners[k] = Vector2_f(state.quad().vertices(2 * k),
+                                         state.quad().vertices(2 * k + 1))
+                                   .MulComponents(scaling);
+    }
+  } else {
+    transformed_corners = GetCornersOfRotatedRect(state, scaling);
+  }
+
+  return transformed_corners;
+}
+
+bool MotionBoxLines(const MotionBoxState& state, const Vector2_f& scaling,
+                    std::array<Vector3_f, 4>* box_lines) {
+  CHECK(box_lines);
+  std::array<Vector2_f, 4> corners = MotionBoxCorners(state, scaling);
+  std::array<Vector3_f, 4> lines;
+  for (int k = 0; k < 4; ++k) {
+    const Vector2_f diff = corners[(k + 1) % 4] - corners[k];
+    const Vector2_f normal = diff.Ortho().Normalize();
+    box_lines->at(k).Set(normal.x(), normal.y(), -normal.DotProd(corners[k]));
+    // Double check that second point is on the computed line.
+    if (box_lines->at(k).DotProd(Vector3_f(corners[(k + 1) % 4].x(),
+                                           corners[(k + 1) % 4].y(), 1.0f)) >=
+        0.02f) {
+      LOG(ERROR) << "box is abnormal. Line equations don't satisfy constraint";
+      return false;
+    }
+  }
+  return true;
+}
+
+void MotionBoxBoundingBox(const MotionBoxState& state, Vector2_f* top_left,
+                          Vector2_f* bottom_right) {
+  CHECK(top_left);
+  CHECK(bottom_right);
+
+  std::array<Vector2_f, 4> corners = MotionBoxCorners(state);
+
+  // Determine min and max across dimension.
+  *top_left = Vector2_f(std::numeric_limits<float>::max(),
+                        std::numeric_limits<float>::max());
+
+  *bottom_right = Vector2_f(std::numeric_limits<float>::min(),
+                            std::numeric_limits<float>::min());
+
+  for (const Vector2_f& c : corners) {
+    top_left->x(std::min(top_left->x(), c.x()));
+    top_left->y(std::min(top_left->y(), c.y()));
+    bottom_right->x(std::max(bottom_right->x(), c.x()));
+    bottom_right->y(std::max(bottom_right->y(), c.y()));
+  }
+}
+
+void MotionBoxInlierLocations(const MotionBoxState& state,
+                              std::vector<Vector2_f>* inlier_pos) {
+  CHECK(inlier_pos);
+  inlier_pos->clear();
+  for (int k = 0; k < state.inlier_id_match_pos_size(); k += 2) {
+    inlier_pos->push_back(
+        Vector2_f(state.inlier_id_match_pos(k) * kInvShortScale,
+                  state.inlier_id_match_pos(k + 1) * kInvShortScale));
+  }
+}
+
+void MotionBoxOutlierLocations(const MotionBoxState& state,
+                               std::vector<Vector2_f>* outlier_pos) {
+  CHECK(outlier_pos);
+  outlier_pos->clear();
+  for (int k = 0; k < state.outlier_id_match_pos_size(); k += 2) {
+    outlier_pos->push_back(
+        Vector2_f(state.outlier_id_match_pos(k) * kInvShortScale,
+                  state.outlier_id_match_pos(k + 1) * kInvShortScale));
+  }
+}
+
+std::array<Vector2_f, 4> GetCornersOfRotatedRect(const MotionBoxState& state,
+                                                 const Vector2_f& scaling) {
+  std::array<Vector2_f, 4> transformed_corners;
+  // Scale and rotate 4 corner w.r.t. center.
+  const Vector2_f center = MotionBoxCenter(state).MulComponents(scaling);
+  const Vector2_f top_left = MotionBoxPosition(state).MulComponents(scaling);
+  const std::array<Vector2_f, 4> corners{{
+      top_left,
+      top_left + Vector2_f(0, state.height() * scaling.y()),
+      top_left +
+          Vector2_f(state.width() * scaling.x(), state.height() * scaling.y()),
+      top_left + Vector2_f(state.width() * scaling.x(), 0),
+  }};
+
+  const float cos_a = std::cos(state.rotation());
+  const float sin_a = std::sin(state.rotation());
+  for (int k = 0; k < 4; ++k) {
+    // Scale and rotate w.r.t. center.
+    const Vector2_f rad = corners[k] - center;
+    const Vector2_f rot_rad(cos_a * rad.x() - sin_a * rad.y(),
+                            sin_a * rad.x() + cos_a * rad.y());
+    const Vector2_f transformed_corner = center + rot_rad * state.scale();
+    transformed_corners[k] = transformed_corner;
+  }
+
+  return transformed_corners;
+}
+
+void InitializeQuadInMotionBoxState(MotionBoxState* state) {
+  CHECK(state != nullptr);
+  // Every quad has 4 vertices. Each vertex has x and y 2 coordinates. So
+  // a total of 8 floating point values.
+  if (state->quad().vertices_size() != 8) {
+    MotionBoxState::Quad* quad_ptr = state->mutable_quad();
+    quad_ptr->clear_vertices();
+    std::array<Vector2_f, 4> corners =
+        GetCornersOfRotatedRect(*state, Vector2_f(1.0f, 1.0f));
+    for (const auto& vertex : corners) {
+      quad_ptr->add_vertices(vertex.x());
+      quad_ptr->add_vertices(vertex.y());
+    }
+  }
+}
+
+void InitializeInliersOutliersInMotionBoxState(const TrackingData& tracking,
+                                               MotionBoxState* state) {
+  MotionVectorFrame mvf;  // Holds motion from current to previous frame.
+  MotionVectorFrameFromTrackingData(tracking, &mvf);
+
+  std::array<Vector3_f, 4> box_lines;
+  if (!MotionBoxLines(*state, Vector2_f(1.0f, 1.0f), &box_lines)) {
+    LOG(ERROR) << "Error in computing MotionBoxLines.";
+    return;
+  }
+
+  // scale for motion vectors.
+  float scale_x, scale_y;
+  ScaleFromAspect(mvf.aspect_ratio, true, &scale_x, &scale_y);
+
+  state->clear_inlier_ids();
+  state->clear_inlier_length();
+  state->clear_outlier_ids();
+
+  float inlier_center_x = 0.0f;
+  float inlier_center_y = 0.0f;
+  int cnt_inlier = 0;
+
+  float min_x = std::numeric_limits<float>::max();
+  float max_x = -std::numeric_limits<float>::max();
+  float min_y = std::numeric_limits<float>::max();
+  float max_y = -std::numeric_limits<float>::max();
+
+  for (const auto& motion_vec : mvf.motion_vectors) {
+    const float pos_x = motion_vec.pos.x() * scale_x;
+    const float pos_y = motion_vec.pos.y() * scale_y;
+
+    bool insider = true;
+
+    for (const Vector3_f& line : box_lines) {
+      if (line.DotProd(Vector3_f(pos_x, pos_y, 1.0f)) > 0.0f) {
+        insider = false;
+        break;
+      }
+    }
+
+    if (insider) {
+      ++cnt_inlier;
+      inlier_center_x += pos_x;
+      inlier_center_y += pos_y;
+
+      min_x = std::min(min_x, pos_x);
+      max_x = std::max(max_x, pos_x);
+      min_y = std::min(min_y, pos_y);
+      max_y = std::max(max_y, pos_y);
+
+      state->add_inlier_ids(motion_vec.track_id);
+      state->add_inlier_length(1.0f);
+    } else {
+      state->add_outlier_ids(motion_vec.track_id);
+    }
+  }
+
+  if (cnt_inlier) {
+    state->set_prior_weight(1.0f);
+    state->set_inlier_center_x(inlier_center_x / cnt_inlier);
+    state->set_inlier_center_y(inlier_center_y / cnt_inlier);
+    state->set_inlier_width(max_x - min_x);
+    state->set_inlier_height(max_y - min_y);
+  }
+}
+
+void InitializePnpHomographyInMotionBoxState(
+    const TrackingData& tracking, const TrackStepOptions& track_step_options,
+    MotionBoxState* state) {
+  // Only happen when `quad` and `aspect_ratio` are all specified.
+  if (!state->has_quad()) {
+    VLOG(1) << "Skip pnp tracking since box does not contain quad info.";
+    return;
+  }
+
+  const int kQuadCornersSize = 4;
+  CHECK_EQ(state->quad().vertices_size(), kQuadCornersSize * 2);
+  float scale_x, scale_y;
+  ScaleFromAspect(tracking.frame_aspect(), false, &scale_x, &scale_y);
+  std::vector<cv::Point2f> corners_2d(kQuadCornersSize);
+  if (track_step_options.has_camera_intrinsics()) {
+    const auto& camera = track_step_options.camera_intrinsics();
+    for (int c = 0; c < kQuadCornersSize; ++c) {
+      corners_2d[c].x = state->quad().vertices(c * 2) * camera.w();
+      corners_2d[c].y = state->quad().vertices(c * 2 + 1) * camera.h();
+    }
+
+    cv::Mat camera_mat, dist_coef;
+    ConvertCameraIntrinsicsToCvMat(camera, &camera_mat, &dist_coef);
+    cv::undistortPoints(corners_2d, corners_2d, camera_mat, dist_coef);
+  } else {
+    const float center_x = scale_x * 0.5f;
+    const float center_y = scale_y * 0.5f;
+    for (int c = 0; c < kQuadCornersSize; ++c) {
+      corners_2d[c].x = state->quad().vertices(c * 2) * scale_x - center_x;
+      corners_2d[c].y = state->quad().vertices(c * 2 + 1) * scale_y - center_y;
+    }
+  }
+
+  if (!state->has_aspect_ratio()) {
+    if (!track_step_options.forced_pnp_tracking()) {
+      VLOG(1) << "Skip pnp tracking since aspect ratio is unknown and "
+                 "estimation of it is not forced.";
+      return;
+    }
+    const float u2_u0 = corners_2d[2].x - corners_2d[0].x;
+    const float v2_v0 = corners_2d[2].y - corners_2d[0].y;
+    const float u3_u1 = corners_2d[3].x - corners_2d[1].x;
+    const float v3_v1 = corners_2d[3].y - corners_2d[1].y;
+
+    constexpr float kEpsilon = 1e-6f;
+    const float denominator = u2_u0 * v3_v1 - v2_v0 * u3_u1;
+    if (std::abs(denominator) < kEpsilon) {
+      LOG(WARNING) << "Zero denominator. Failed calculating aspect ratio.";
+      return;
+    }
+
+    float s[kQuadCornersSize];
+    s[0] = ((corners_2d[2].x - corners_2d[3].x) * v3_v1 -
+            (corners_2d[2].y - corners_2d[3].y) * u3_u1) *
+           2.0f / denominator;
+    s[1] = -(u2_u0 * (corners_2d[2].y - corners_2d[3].y) -
+             v2_v0 * (corners_2d[2].x - corners_2d[3].x)) *
+           2.0f / denominator;
+    s[2] = 2.0f - s[0];
+    s[3] = 2.0f - s[1];
+
+    std::vector<Vector3_f> corners(kQuadCornersSize);
+    for (int i = 0; i < kQuadCornersSize; ++i) {
+      if (s[0] <= 0) {
+        LOG(WARNING) << "Negative scale. Failed calculating aspect ratio.";
+        return;
+      }
+      corners[i] =
+          Vector3_f(corners_2d[i].x * s[i], corners_2d[i].y * s[i], s[i]);
+    }
+
+    const Vector3_f width_edge = corners[2] - corners[1];
+    const Vector3_f height_edge = corners[0] - corners[1];
+    const float height_norm = height_edge.Norm();
+    const float width_norm = width_edge.Norm();
+    if (height_norm < kEpsilon || width_norm < kEpsilon) {
+      LOG(WARNING)
+          << "abnormal 3d quadrangle. Failed calculating aspect ratio.";
+      return;
+    }
+
+    constexpr float kMaxCosAngle = 0.258819;  // which is cos(75 deg)
+    if (width_edge.DotProd(height_edge) / height_norm / width_norm >
+        kMaxCosAngle) {
+      LOG(WARNING)
+          << "abnormal 3d quadrangle. Failed calculating aspect ratio.";
+      return;
+    }
+
+    state->set_aspect_ratio(width_norm / height_norm);
+  }
+
+  CHECK_GT(state->aspect_ratio(), 0.0f);
+
+  const float half_width = state->aspect_ratio();
+  const float half_height = 1.0f;
+  const std::vector<cv::Point3f> corners_3d{
+      cv::Point3f(-half_width, -half_height, 0.0f),
+      cv::Point3f(-half_width, half_height, 0.0f),
+      cv::Point3f(half_width, half_height, 0.0f),
+      cv::Point3f(half_width, -half_height, 0.0f),
+  };
+
+  std::vector<MotionVector> motion_vectors(kQuadCornersSize);
+  std::vector<const MotionVector*> motion_vector_pointers(kQuadCornersSize);
+
+  for (int c = 0; c < kQuadCornersSize; ++c) {
+    motion_vectors[c].pos = Vector2_f(corners_3d[c].x, corners_3d[c].y);
+    motion_vectors[c].object =
+        Vector2_f(corners_2d[c].x, corners_2d[c].y) - motion_vectors[c].pos;
+
+    motion_vector_pointers[c] = &motion_vectors[c];
+  }
+
+  const std::vector<float> weights(kQuadCornersSize, 1.0f);
+  HomographyL2Solve(motion_vector_pointers, weights,
+                    state->mutable_pnp_homography());
+}
+
+// Scales velocity and all other velocity dependent fields according to
+// temporal_scale.
+void ScaleStateTemporally(float temporal_scale, MotionBoxState* state) {
+  state->set_dx(state->dx() * temporal_scale);
+  state->set_dy(state->dy() * temporal_scale);
+  state->set_kinetic_energy(state->kinetic_energy() * temporal_scale);
+}
+
+void ScaleStateAspect(float aspect, bool invert, MotionBoxState* state) {
+  float scale_x = 1.0f;
+  float scale_y = 1.0f;
+  ScaleFromAspect(aspect, invert, &scale_x, &scale_y);
+
+  if (state->has_quad() && state->quad().vertices_size() == 8) {
+    for (int i = 0; i < 4; ++i) {
+      float curr_val = state->quad().vertices(i * 2);
+      state->mutable_quad()->set_vertices(i * 2, curr_val * scale_x);
+      curr_val = state->quad().vertices(i * 2 + 1);
+      state->mutable_quad()->set_vertices(i * 2 + 1, curr_val * scale_y);
+    }
+  }
+
+  state->set_pos_x(state->pos_x() * scale_x);
+  state->set_pos_y(state->pos_y() * scale_y);
+  state->set_width(state->width() * scale_x);
+  state->set_height(state->height() * scale_y);
+  state->set_dx(state->dx() * scale_x);
+  state->set_dy(state->dy() * scale_y);
+  state->set_inlier_center_x(state->inlier_center_x() * scale_x);
+  state->set_inlier_center_y(state->inlier_center_y() * scale_y);
+  state->set_inlier_width(state->inlier_width() * scale_x);
+  state->set_inlier_height(state->inlier_height() * scale_y);
+}
+
+MotionVector MotionVector::FromInternalState(
+    const MotionBoxInternalState& internal, int index) {
+  CHECK_LT(index, internal.pos_x_size());
+  MotionVector v;
+  v.pos = Vector2_f(internal.pos_x(index), internal.pos_y(index));
+  v.object = Vector2_f(internal.dx(index), internal.dy(index));
+  v.background =
+      Vector2_f(internal.camera_dx(index), internal.camera_dy(index));
+  v.track_id = internal.track_id(index);
+  return v;
+}
+
+void MotionBox::ResetAtFrame(int frame, const MotionBoxState& state) {
+  states_.clear();
+  queue_start_ = frame;
+
+  states_.push_back(state);
+  states_.back().set_track_status(MotionBoxState::BOX_TRACKED);
+  // Init inlier dimensions from state if not set.
+  if (states_.back().inlier_width() == 0 ||
+      states_.back().inlier_height() == 0) {
+    states_.back().set_inlier_width(state.width());
+    states_.back().set_inlier_height(state.height());
+  }
+
+  initial_state_ = state;
+}
+
+bool MotionBox::TrackStep(int from_frame,
+                          const MotionVectorFrame& motion_vectors,
+                          bool forward) {
+  if (!TrackableFromFrame(from_frame)) {
+    LOG(WARNING) << "Tracking requested for initial position that is not "
+                 << "trackable.";
+    return false;
+  }
+  const int queue_pos = from_frame - queue_start_;
+
+  MotionBoxState new_state;
+  if (motion_vectors.is_duplicated) {
+    // Do not track or update the state, just copy.
+    new_state = states_[queue_pos];
+    new_state.set_track_status(MotionBoxState::BOX_DUPLICATED);
+  } else {
+    // Compile history and perform tracking.
+    std::vector<const MotionBoxState*> history;
+    constexpr int kHistorySize = 10;
+    if (forward) {
+      for (int k = queue_pos - 1; k >= std::max(0, queue_pos - kHistorySize);
+           --k) {
+        history.push_back(&states_[k]);
+      }
+    } else {
+      for (int k = queue_pos + 1;
+           k <= std::min<int>(states_.size() - 1, queue_pos + kHistorySize);
+           ++k) {
+        history.push_back(&states_[k]);
+      }
+    }
+
+    TrackStepImpl(from_frame, states_[queue_pos], motion_vectors, history,
+                  &new_state);
+  }
+
+  if (new_state.track_status() < MotionBoxState::BOX_TRACKED) {
+    new_state.set_tracking_confidence(0.0f);
+  }
+  if (!new_state.has_tracking_confidence()) {
+    // In this case, track status should be >= MotionBoxState::BOX_TRACKED
+    new_state.set_tracking_confidence(1.0f);
+  }
+
+  VLOG(1) << "Track status from frame " << from_frame << ": "
+          << TrackStatusToString(new_state.track_status())
+          << ". Has quad: " << new_state.has_quad();
+
+  constexpr float kFailureDisparity = 0.8f;
+  if (new_state.track_status() >= MotionBoxState::BOX_TRACKED) {
+    if (forward) {
+      const int new_pos = queue_pos + 1;
+      if (new_pos < states_.size()) {
+        states_[new_pos] = new_state;
+      } else {
+        states_.push_back(new_state);
+      }
+
+      // Check for successive tracking failures of in bound boxes.
+      if (new_pos >= options_.max_track_failures()) {
+        int num_track_errors = 0;
+        // Cancel at the N + 1 tracking failure.
+        for (int f = new_pos - options_.max_track_failures(); f <= new_pos;
+             ++f) {
+          if (states_[f].track_status() !=
+              MotionBoxState::BOX_TRACKED_OUT_OF_BOUND) {
+            num_track_errors += (fabs(states_[f].motion_disparity()) *
+                                     states_[f].prior_weight() >
+                                 kFailureDisparity);
+          }
+        }
+
+        if (num_track_errors >= options_.max_track_failures()) {
+          LOG_IF(INFO, print_motion_box_warnings_)
+              << "Tracking failed during max track failure "
+              << "verification.";
+          states_[new_pos].set_track_status(MotionBoxState::BOX_UNTRACKED);
+          return false;
+        }
+      }
+    } else {
+      // Backward tracking.
+      int new_pos = queue_pos - 1;
+      if (new_pos >= 0) {
+        states_[new_pos] = new_state;
+      } else {
+        states_.push_front(new_state);
+        --queue_start_;
+        new_pos = 0;
+      }
+
+      // Check for successive tracking failures.
+      if (new_pos + options_.max_track_failures() + 1 < states_.size()) {
+        int num_track_errors = 0;
+        // Cancel at the N + 1 tracking failure.
+        for (int f = new_pos; f <= new_pos + options_.max_track_failures();
+             ++f) {
+          if (states_[f].track_status() !=
+              MotionBoxState::BOX_TRACKED_OUT_OF_BOUND) {
+            num_track_errors += (fabs(states_[f].motion_disparity()) *
+                                     states_[f].prior_weight() >
+                                 kFailureDisparity);
+          }
+        }
+
+        if (num_track_errors >= options_.max_track_failures()) {
+          LOG_IF(INFO, print_motion_box_warnings_)
+              << "Tracking failed during max track failure "
+              << "verification.";
+          states_[new_pos].set_track_status(MotionBoxState::BOX_UNTRACKED);
+          return false;
+        }
+      }
+    }
+
+    // Signal track success.
+    return true;
+  } else {
+    LOG_IF(WARNING, print_motion_box_warnings_)
+        << "Tracking error at " << from_frame
+        << " status : " << TrackStatusToString(new_state.track_status());
+    return false;
+  }
+}
+
+namespace {
+
+Vector2_f SpatialPriorPosition(const Vector2_f& location,
+                               const MotionBoxState& state) {
+  const int grid_size = state.spatial_prior_grid_size();
+  return Vector2_f(
+      Clamp((location.x() - state.pos_x()) / state.width(), 0, 1) *
+          (grid_size - 1),
+      Clamp((location.y() - state.pos_y()) / state.height(), 0, 1) *
+          (grid_size - 1));
+}
+
+// Creates spatial prior for current set of inlier vectors and blends
+// it with previous prior (based on blend_prior). If interpolate is set to true,
+// uses more accurate interpolation into bins, instead of nearest neighbor
+// interpolation. If use_next_position is set to true, the position in
+// the next/previous frame is used instead of the current one.
+void ComputeSpatialPrior(bool interpolate, bool use_next_position,
+                         float blend_prior, MotionBoxState* update_pos) {
+  const int grid_size = update_pos->spatial_prior_grid_size();
+
+  std::vector<float> old_prior(update_pos->spatial_prior().begin(),
+                               update_pos->spatial_prior().end());
+  std::vector<float> old_confidence(update_pos->spatial_confidence().begin(),
+                                    update_pos->spatial_confidence().end());
+
+  CHECK_EQ(old_confidence.size(), old_prior.size());
+  CHECK(old_confidence.empty() ||
+        grid_size * grid_size == old_confidence.size())
+      << "Empty or priors of constant size expected";
+
+  update_pos->clear_spatial_prior();
+  update_pos->clear_spatial_confidence();
+  auto* spatial_prior = update_pos->mutable_spatial_prior();
+  auto* spatial_confidence = update_pos->mutable_spatial_confidence();
+  spatial_prior->Reserve(grid_size * grid_size);
+  spatial_confidence->Reserve(grid_size * grid_size);
+
+  for (int k = 0; k < grid_size * grid_size; ++k) {
+    spatial_prior->AddAlreadyReserved(0);
+    spatial_confidence->AddAlreadyReserved(0);
+  }
+
+  // Aggregate inlier weights (0 = outlier, 1 = total inlier) across grid.
+  const MotionBoxInternalState& internal = update_pos->internal();
+  const int num_elems = internal.pos_x_size();
+
+  for (int k = 0; k < num_elems; ++k) {
+    MotionVector vec = MotionVector::FromInternalState(internal, k);
+    const Vector2_f pos =
+        use_next_position ? vec.MatchLocation() : vec.Location();
+    float weight = internal.inlier_score(k);
+
+    const Vector2_f grid_pos = SpatialPriorPosition(pos, *update_pos);
+
+    if (use_next_position) {
+      // Check for out of bound and skip.
+      if (grid_pos.x() < 0 || grid_pos.y() < 0 ||
+          grid_pos.x() > update_pos->spatial_prior_grid_size() - 1 ||
+          grid_pos.y() > update_pos->spatial_prior_grid_size() - 1) {
+        continue;
+      }
+    }
+
+    if (interpolate) {
+      const int int_x = static_cast<int>(grid_pos.x());
+      const int int_y = static_cast<int>(grid_pos.y());
+
+      CHECK_GE(grid_pos.x(), 0) << pos.x() << ", " << update_pos->pos_x();
+      CHECK_GE(grid_pos.y(), 0);
+      CHECK_LE(grid_pos.x(), grid_size - 1);
+      CHECK_LE(grid_pos.y(), grid_size - 1);
+
+      const float dx = grid_pos.x() - int_x;
+      const float dy = grid_pos.y() - int_y;
+      const float dx_1 = 1.0f - dx;
+      const float dy_1 = 1.0f - dy;
+      const int stride = static_cast<int>(dx != 0);
+
+      int grid_pos = int_y * grid_size + int_x;
+
+      // Bilinear interpolation. Total sum of weights across all 4 additions
+      // (for prior and confidence each), is one.
+      *spatial_prior->Mutable(grid_pos) += dx_1 * dy_1 * weight;
+      *spatial_confidence->Mutable(grid_pos) += dx_1 * dy_1;
+
+      *spatial_prior->Mutable(grid_pos + stride) += dx * dy_1 * weight;
+      *spatial_confidence->Mutable(grid_pos + stride) += dx * dy_1;
+
+      grid_pos += (dy != 0) * grid_size;
+      *spatial_prior->Mutable(grid_pos) += dx_1 * dy * weight;
+      *spatial_confidence->Mutable(grid_pos) += dx_1 * dy;
+
+      *spatial_prior->Mutable(grid_pos + stride) += dx * dy * weight;
+      *spatial_confidence->Mutable(grid_pos + stride) += dx * dy;
+    } else {
+      // Nearest neighbor.
+      const int grid_bin = static_cast<int>(grid_pos.y() + 0.5f) * grid_size +
+                           static_cast<int>(grid_pos.x() + 0.5f);
+      *spatial_prior->Mutable(grid_bin) += weight;
+      *spatial_confidence->Mutable(grid_bin) += 1;
+    }
+  }
+
+  // Normalize, i.e. max truncation.
+  float total_prior_difference = 0;
+  float weight_sum = 0;
+  for (int k = 0; k < grid_size * grid_size; ++k) {
+    // Convert aggregated inlier weights to grid cell prior.
+    // Here we consider a grid cell to be an inlier, if at least two
+    // 2 inliers within that cell where found.
+    *spatial_prior->Mutable(k) = std::min(1.0f, spatial_prior->Get(k) * 0.5f);
+    *spatial_confidence->Mutable(k) =
+        std::min(1.0f, spatial_confidence->Get(k) * 0.5f);
+
+    if (!old_prior.empty()) {
+      // Truncated error, consider a difference of 0.2 within normal
+      // update range.
+      const float difference = std::max<float>(
+          0.f, fabs(update_pos->spatial_prior(k) - old_prior[k]) - 0.2f);
+      // Weight error by confidence.
+      total_prior_difference += difference * update_pos->spatial_confidence(k);
+      weight_sum += update_pos->spatial_confidence(k);
+
+      // Blend confidence with previous confidence.
+      const float curr_confidence =
+          update_pos->spatial_confidence(k) * (1.0f - blend_prior);
+      const float prev_confidence = old_confidence[k] * blend_prior;
+
+      float summed_confidence = curr_confidence + prev_confidence;
+      const float denom =
+          summed_confidence > 0 ? 1.0f / summed_confidence : 1.0f;
+
+      // Update prior and confidence as weighted linear combination between
+      // current and previous prior.
+      *spatial_prior->Mutable(k) =
+          (update_pos->spatial_prior(k) * curr_confidence +
+           old_prior[k] * prev_confidence) *
+          denom;
+
+      *spatial_confidence->Mutable(k) =
+          (update_pos->spatial_confidence(k) * curr_confidence +
+           prev_confidence * prev_confidence) *
+          denom;
+    }
+  }
+
+  update_pos->set_prior_diff(std::sqrt(
+      total_prior_difference * (weight_sum > 0 ? 1.0f / weight_sum : 1.0f)));
+}
+
+}  // namespace.
+
+void MotionBox::GetStartPosition(const MotionBoxState& curr_pos,
+                                 float aspect_ratio, float* expand_mag,
+                                 Vector2_f* top_left,
+                                 Vector2_f* bottom_right) const {
+  CHECK(top_left);
+  CHECK(bottom_right);
+  CHECK(expand_mag);
+
+  MotionBoxBoundingBox(curr_pos, top_left, bottom_right);
+
+  if (curr_pos.has_pnp_homography()) {
+    *expand_mag = 0.0f;
+  } else {
+    // Expaned box by the specified minimum expansion_size. For fast moving
+    // objects, we ensure that magnitude is twice the box velocity, but not more
+    // than 1/4 of the box diameter.
+    *expand_mag = std::max(options_.expansion_size(),
+                           std::min(MotionBoxSize(curr_pos).Norm() * 0.25f,
+                                    MotionBoxVelocity(curr_pos).Norm() * 2.0f));
+  }
+
+  // Expansion magnitude is not non-uniformly scaled w.r.t. aspect ratio
+  // to ensure inclusion test in GetVectorsAndWeights can assume uniform
+  // explansion magnitude.
+  const Vector2_f expand = Vector2_f(*expand_mag, *expand_mag);
+  *top_left -= expand;
+  *bottom_right += expand;
+}
+
+void MotionBox::GetSpatialGaussWeights(const MotionBoxState& box_state,
+                                       const Vector2_f& inv_box_domain,
+                                       float* spatial_gauss_x,
+                                       float* spatial_gauss_y) const {
+  CHECK(spatial_gauss_x);
+  CHECK(spatial_gauss_y);
+
+  // Space sigma depends on how much the tracked object fills the rectangle.
+  // We get this information from the inlier extent of the previous
+  // estimation.
+  // Motivation: Choose sigma s such that the inlier domain equals 90% coverage.
+  //             i.e. using z-score one sided of 95% = 1.65
+  //             s * 1.65 = domain
+  //          ==> s = domain / 1.65f
+  const float space_sigma_x = std::max(
+      options_.spatial_sigma(), box_state.inlier_width() * inv_box_domain.x() *
+                                    0.5f * box_state.prior_weight() / 1.65f);
+  const float space_sigma_y = options_.spatial_sigma();
+  std::max(options_.spatial_sigma(), box_state.inlier_height() *
+                                         inv_box_domain.y() * 0.5f *
+                                         box_state.prior_weight() / 1.65f);
+
+  *spatial_gauss_x = -0.5f / (space_sigma_x * space_sigma_x);
+  *spatial_gauss_y = -0.5f / (space_sigma_y * space_sigma_y);
+}
+
+// Computes for each vector its 2D grid position for a grid spanning
+// the domain top_left to bottom_right.
+// Note: Passed vectors must lie within the domain or function will return false
+// for error.
+template <int kGridSize>
+bool ComputeGridPositions(const Vector2_f& top_left,
+                          const Vector2_f& bottom_right,
+                          const std::vector<const MotionVector*>& vectors,
+                          std::vector<Vector2_f>* grid_positions) {
+  CHECK(grid_positions);
+
+  // Slightly larger domain to avoid boundary issues.
+  const Vector2_f inv_grid_domain(
+      (1.0f - 1e-3f) / (bottom_right.x() - top_left.x()),
+      (1.0f - 1e-3f) / (bottom_right.y() - top_left.y()));
+
+  grid_positions->clear();
+  grid_positions->reserve(vectors.size());
+  for (const MotionVector* vec : vectors) {
+    // Get grid position. Note grid is never rotated, but we only use it for
+    // density estimation.
+    const Vector2_f grid_pos =
+        (vec->pos - top_left).MulComponents(inv_grid_domain) * (kGridSize - 1);
+    if (grid_pos.x() < 0 || grid_pos.y() < 0 || grid_pos.x() > kGridSize ||
+        grid_pos.y() > kGridSize) {
+      return false;
+    }
+
+    grid_positions->push_back(grid_pos);
+  }
+
+  return true;
+}
+
+template <int kGridSize>
+void AddToGrid(const Vector2_f& grid_pos, std::vector<float>* grid) {
+  const float grid_x = grid_pos.x();
+  const float grid_y = grid_pos.y();
+
+  const int int_grid_x = grid_x;
+  const int int_grid_y = grid_y;
+
+  const float dx = grid_x - int_grid_x;
+  const float dy = grid_y - int_grid_y;
+  const float dxdy = dx * dy;
+  const float dx_plus_dy = dx + dy;
+
+  const int inc_x = dx != 0;
+  const int inc_y = dy != 0;
+
+  int bin_idx = int_grid_y * kGridSize + int_grid_x;
+  // (1 - dx)(1 - dy) = 1 - (dx + dy) + dx*dy
+  (*grid)[bin_idx] += 1 - dx_plus_dy + dxdy;
+  // dx * (1 - dy) = dx - dxdy
+  (*grid)[bin_idx + inc_x] += dx - dxdy;
+
+  bin_idx += kGridSize * inc_y;
+  // (1 - dx) * dy = dy - dxdy
+  (*grid)[bin_idx] += dy - dxdy;
+  (*grid)[bin_idx + inc_x] += dxdy;
+}
+
+template <int kGridSize>
+float SampleFromGrid(const Vector2_f& grid_pos,
+                     const std::vector<float>& grid) {
+  const float grid_x = grid_pos.x();
+  const float grid_y = grid_pos.y();
+
+  const int int_grid_x = grid_x;
+  const int int_grid_y = grid_y;
+
+  const float dx = grid_x - int_grid_x;
+  const float dy = grid_y - int_grid_y;
+  const float dxdy = dx * dy;
+  const float dx_plus_dy = dx + dy;
+  int inc_x = dx != 0;
+  int inc_y = dy != 0;
+
+  float normalizer = 0;
+  int bin_idx = int_grid_y * kGridSize + int_grid_x;
+
+  // See above.
+  normalizer += grid[bin_idx] * (1 - dx_plus_dy + dxdy);
+  normalizer += grid[bin_idx + inc_x] * (dx - dxdy);
+
+  bin_idx += kGridSize * inc_y;
+  normalizer += grid[bin_idx] * (dy - dxdy);
+  normalizer += grid[bin_idx + inc_x] * dxdy;
+
+  const float inv_normalizer = normalizer > 0 ? 1.0f / normalizer : 0;
+  // Density should always decrease weight; never increase.
+  return std::min(1.0f, inv_normalizer);
+}
+
+MotionBox::DistanceWeightsComputer::DistanceWeightsComputer(
+    const MotionBoxState& initial_state, const MotionBoxState& current_state,
+    const TrackStepOptions& options) {
+  tracking_degrees_ = options.tracking_degrees();
+  const Vector2_f box_domain(current_state.width() * current_state.scale(),
+                             current_state.height() * current_state.scale());
+  CHECK_GT(box_domain.x(), 0.0f);
+  CHECK_GT(box_domain.y(), 0.0f);
+  inv_box_domain_ = Vector2_f(1.0f / box_domain.x(), 1.0f / box_domain.y());
+
+  // Space sigma depends on how much the tracked object fills the rectangle.
+  // We get this information from the inlier extent of the previous
+  // estimation.
+  // Motivation: Choose sigma s such that the inlier domain equals 90%
+  // coverage.
+  //             i.e. using z-score one sided of 95% = 1.65
+  //             s * 1.65 = domain
+  //          ==> s = domain / 1.65f
+  const float space_sigma_x =
+      std::max(options.spatial_sigma(),
+               current_state.inlier_width() * inv_box_domain_.x() * 0.5f *
+                   current_state.prior_weight() / 1.65f);
+
+  const float space_sigma_y =
+      std::max(options.spatial_sigma(),
+               current_state.inlier_height() * inv_box_domain_.y() * 0.5f *
+                   current_state.prior_weight() / 1.65f);
+
+  spatial_gauss_x_ = -0.5f / (space_sigma_x * space_sigma_x);
+  spatial_gauss_y_ = -0.5f / (space_sigma_y * space_sigma_y);
+
+  if (tracking_degrees_ == TrackStepOptions::TRACKING_DEGREE_OBJECT_ROTATION ||
+      tracking_degrees_ ==
+          TrackStepOptions::TRACKING_DEGREE_OBJECT_ROTATION_SCALE) {
+    cos_neg_a_ = std::cos(-current_state.rotation());
+    sin_neg_a_ = std::sin(-current_state.rotation());
+    if (std::abs(current_state.rotation()) > 0.01f) {
+      is_large_rotation_ = true;
+    }
+  }
+
+  // Compute box center as blend between geometric center and inlier center.
+  constexpr float kMaxBoxCenterBlendWeight = 0.5f;
+  box_center_ =
+      Lerp(MotionBoxCenter(current_state), InlierCenter(current_state),
+           std::min(kMaxBoxCenterBlendWeight, current_state.prior_weight()));
+  if (tracking_degrees_ ==
+      TrackStepOptions::TRACKING_DEGREE_OBJECT_PERSPECTIVE) {
+    CHECK(initial_state.has_quad());
+    CHECK(current_state.has_quad());
+    homography_ =
+        ComputeHomographyFromQuad(current_state.quad(), initial_state.quad());
+    box_center_transformed_ =
+        HomographyAdapter::TransformPoint(homography_, box_center_);
+  }
+}
+
+float MotionBox::DistanceWeightsComputer::ComputeDistanceWeight(
+    const MotionVector& test_vector) {
+  // Distance weighting.
+  Vector2_f diff_center;
+  if (tracking_degrees_ ==
+      TrackStepOptions::TRACKING_DEGREE_OBJECT_PERSPECTIVE) {
+    Vector2_f test_vector_transformed =
+        HomographyAdapter::TransformPoint(homography_, test_vector.pos);
+    diff_center = test_vector_transformed - box_center_transformed_;
+  } else {
+    diff_center = test_vector.pos - box_center_;
+    if (is_large_rotation_) {
+      // Rotate difference vector to normalized domain.
+      diff_center.Set(
+          cos_neg_a_ * diff_center.x() - sin_neg_a_ * diff_center.y(),
+          sin_neg_a_ * diff_center.x() + cos_neg_a_ * diff_center.y());
+    }
+  }
+
+  const Vector2_f diff = diff_center.MulComponents(inv_box_domain_);
+  // Regular gaussian with variance in each direction, assuming directions
+  // are indpendent.
+  float weight = std::exp(diff.x() * diff.x() * spatial_gauss_x_ +
+                          diff.y() * diff.y() * spatial_gauss_y_);
+  return weight;
+}
+
+Homography MotionBox::DistanceWeightsComputer::ComputeHomographyFromQuad(
+    const MotionBoxState::Quad& src_quad,
+    const MotionBoxState::Quad& dst_quad) {
+  std::vector<float> src_quad_vec(8);
+  std::vector<float> dst_quad_vec(8);
+  for (int i = 0; i < 8; ++i) {
+    src_quad_vec[i] = src_quad.vertices(i);
+    dst_quad_vec[i] = dst_quad.vertices(i);
+  }
+  // Construct the matrix
+  Eigen::Matrix<float, 8, 8> A = Eigen::Matrix<float, 8, 8>::Zero();
+  for (int i = 0; i < 4; ++i) {
+    const int r0 = 2 * i;
+    const int r1 = 2 * i + 1;
+    A(r0, 0) = src_quad_vec[r0];
+    A(r0, 1) = src_quad_vec[r1];
+    A(r0, 2) = 1.f;
+    A(r0, 6) = -src_quad_vec[r0] * dst_quad_vec[r0];
+    A(r0, 7) = -src_quad_vec[r1] * dst_quad_vec[r0];
+    A(r1, 3) = src_quad_vec[r0];
+    A(r1, 4) = src_quad_vec[r1];
+    A(r1, 5) = 1.f;
+    A(r1, 6) = -src_quad_vec[r0] * dst_quad_vec[r1];
+    A(r1, 7) = -src_quad_vec[r1] * dst_quad_vec[r1];
+  }
+
+  // Map arrays to Eigen vectors without memcpy
+  std::vector<float> homography_vector(8);
+  Eigen::Map<Eigen::Matrix<float, 8, 1> > x(homography_vector.data());
+  Eigen::Map<const Eigen::Matrix<float, 8, 1> > b(dst_quad_vec.data());
+
+  x = A.fullPivLu().solve(b);
+  Homography homography;
+  homography.set_h_00(homography_vector[0]);
+  homography.set_h_01(homography_vector[1]);
+  homography.set_h_02(homography_vector[2]);
+  homography.set_h_10(homography_vector[3]);
+  homography.set_h_11(homography_vector[4]);
+  homography.set_h_12(homography_vector[5]);
+  homography.set_h_20(homography_vector[6]);
+  homography.set_h_21(homography_vector[7]);
+  return homography;
+}
+
+bool MotionBox::GetVectorsAndWeights(
+    const std::vector<MotionVector>& motion_vectors, int start_idx, int end_idx,
+    const Vector2_f& top_left, const Vector2_f& bottom_right,
+    const MotionBoxState& box_state, bool valid_background_model,
+    bool is_chunk_boundary, float temporal_scale, float expand_mag,
+    const std::vector<const MotionBoxState*>& history,
+    std::vector<const MotionVector*>* vectors, std::vector<float>* weights,
+    int* number_of_good_prior, int* number_of_cont_inliers) const {
+  CHECK(weights);
+  CHECK(vectors);
+  CHECK(number_of_good_prior);
+  CHECK(number_of_cont_inliers);
+
+  const int num_max_vectors = end_idx - start_idx;
+  weights->clear();
+  vectors->clear();
+  weights->reserve(num_max_vectors);
+  vectors->reserve(num_max_vectors);
+
+  const Vector2_f box_domain(box_state.width() * box_state.scale(),
+                             box_state.height() * box_state.scale());
+
+  CHECK_GT(box_domain.x(), 0.0f);
+  CHECK_GT(box_domain.y(), 0.0f);
+  const Vector2_f inv_box_domain(1.0f / box_domain.x(), 1.0f / box_domain.y());
+
+  // The four lines of the rotated and scaled box.
+  std::array<Vector3_f, 4> box_lines;
+  if (!MotionBoxLines(box_state, Vector2_f(1.0f, 1.0f), &box_lines)) {
+    LOG(ERROR) << "Error in computing MotionBoxLines. Return 0 good inits and "
+                  "continued inliers";
+    return false;
+  }
+
+  // Get list of previous tracking inliers and outliers.
+  // Ids are used for non-chunk boundaries (faster matching), locations
+  // for chunk boundaries.
+  std::unordered_map<int, int> inlier_ids;
+  std::unordered_set<int> outlier_ids;
+  std::vector<Vector2_f> inlier_locations;
+  std::vector<Vector2_f> outlier_locations;
+
+  if (!is_chunk_boundary) {
+    MotionBoxInliers(box_state, &inlier_ids);
+    MotionBoxOutliers(box_state, &outlier_ids);
+
+    // Never map ids in history across a chunk boundary.
+    for (const auto* state_ptr : history) {
+      MotionBoxOutliers(*state_ptr, &outlier_ids);
+    }
+    // Why don't we build inlier map from a history of inliers?
+    // It is unlikely we skip a feature as an inlier, it is either
+    // consistently part of the motion model or it is not.
+  } else {
+    MotionBoxInlierLocations(box_state, &inlier_locations);
+    MotionBoxOutlierLocations(box_state, &outlier_locations);
+  }
+
+  // Indicator for each vector, if inlier or outlier from prev. estimation.
+  std::vector<uchar> is_inlier;
+  std::vector<uchar> is_outlier;
+  is_inlier.reserve(num_max_vectors);
+  is_outlier.reserve(num_max_vectors);
+  int num_cont_inliers = 0;
+
+  // Approx. 2 pix at SD resolution.
+  constexpr float kSqProximity = 2e-3 * 2e-3;
+
+  for (int k = start_idx; k < end_idx; ++k) {
+    // x is within bound due to sorting.
+    const MotionVector& test_vector = motion_vectors[k];
+
+    if (test_vector.pos.y() < top_left.y() ||
+        test_vector.pos.y() > bottom_right.y()) {
+      continue;
+    }
+
+    if (std::abs(box_state.rotation()) > 0.01f ||
+        options_.tracking_degrees() ==
+            TrackStepOptions::TRACKING_DEGREE_OBJECT_PERSPECTIVE) {
+      // Test also if vector is within transformed convex area.
+      bool accepted = true;
+      for (const Vector3_f& line : box_lines) {
+        if (line.DotProd(Vector3_f(test_vector.pos.x(), test_vector.pos.y(),
+                                   1.0f)) > expand_mag) {
+          // Outside, reject.
+          accepted = false;
+          break;
+        }
+      }
+      if (!accepted) {
+        continue;
+      }
+    }
+
+    vectors->push_back(&motion_vectors[k]);
+
+    auto is_close_to_test_vector = [test_vector](const Vector2_f v) -> bool {
+      return (v - test_vector.pos).Norm2() < kSqProximity;
+    };
+
+    const bool is_inlier_flag =
+        inlier_ids.find(test_vector.track_id) != inlier_ids.end() ||
+        std::find_if(inlier_locations.begin(), inlier_locations.end(),
+                     is_close_to_test_vector) != inlier_locations.end();
+    num_cont_inliers += is_inlier_flag;
+
+    const bool is_outlier_flag =
+        outlier_ids.find(test_vector.track_id) != outlier_ids.end() ||
+        std::find_if(outlier_locations.begin(), outlier_locations.end(),
+                     is_close_to_test_vector) != outlier_locations.end();
+
+    is_inlier.push_back(is_inlier_flag);
+    is_outlier.push_back(is_outlier_flag);
+  }
+
+  CHECK_EQ(vectors->size(), is_inlier.size());
+  CHECK_EQ(vectors->size(), is_outlier.size());
+
+  const float prev_motion_mag = MotionBoxVelocity(box_state).Norm();
+
+  // Try to lock on object again, if disparity is high.
+  constexpr float kMinPriorMotionWeight = 0.2f;
+  const float prior_motion_weight =
+      std::max(kMinPriorMotionWeight, std::abs(box_state.motion_disparity())) *
+      box_state.prior_weight();
+
+  const float motion_sigma =
+      std::max<float>(options_.min_motion_sigma(),
+                      prev_motion_mag * options_.relative_motion_sigma());
+  const float motion_gaussian_scale = -0.5f / (motion_sigma * motion_sigma);
+
+  // Maps current kinetic energy to [0, 1] quantifying static (0) vs. moving (1)
+  // object.
+  // Map normalized thresholds to current frame period.
+  const float low_kinetic_energy =
+      options_.low_kinetic_energy() * temporal_scale;
+  const float high_kinetic_energy =
+      options_.high_kinetic_energy() * temporal_scale;
+  const float kinetic_identity = LinearRamp(
+      box_state.kinetic_energy(), low_kinetic_energy, high_kinetic_energy);
+  int num_good_inits = 0;
+
+  // Map number of continued inliers to score in [0, 1].
+  const float cont_inlier_score = LinearRamp(num_cont_inliers, 10, 30);
+
+  VLOG(1) << "GetVectorsAndWeights, found cont. inliers: " << num_cont_inliers
+          << "  score: " << cont_inlier_score;
+
+  DistanceWeightsComputer distance_weights_computer(initial_state_, box_state,
+                                                    options_);
+  for (int k = 0; k < vectors->size(); ++k) {
+    const MotionVector& test_vector = *(*vectors)[k];
+
+    float weight = distance_weights_computer.ComputeDistanceWeight(test_vector);
+
+    if (valid_background_model) {
+      const float motion_diff =
+          fabs(prev_motion_mag - test_vector.object.Norm());
+      const float motion_weight =
+          std::exp(motion_gaussian_scale * motion_diff * motion_diff);
+
+      // Blend with spatial weight, that is the higher the disparity
+      // (i.e. we lost tracking, the more inclined we are to lock
+      //       onto vectors of similar motion magnitude regardless of their
+      //       position).
+      // Note: One might feel inclined to always bias towards the previous
+      // motion, by multiplying weight with motion_weight. This however fails
+      // when tracking objects that start at rest and start moving.
+      weight = Lerp(weight, motion_weight, prior_motion_weight);
+    }
+
+    // There are two kinds of vectors we are trying to balance here:
+    // - inliers from previous estimation
+    // - similar vectors
+    //
+    // Current strategy:
+    // - For static objects: Boost inliers a lot, discount outliers a lot,
+    // do not care about similar vectors.
+    // - For moving objects: Boost inliers proportional to number of continued
+    // inliers, discount outliers a lot, boost similar vectors and
+    // actively downweight dis-similar objects.
+    //
+    // Motivation: Inliers are usually not very stable, so if not sufficient
+    // have been continued prefer velocity over inliers for moving objects.
+
+    // NOTE: Regarding additive vs. multiplicative weights. We need to
+    // multiply the weight here. Adding the weight messes
+    // with the gaussian spatial weighting which in turn makes it hard to lock
+    // onto moving objects in the first place (as center is assumed to be
+    // placed over moving objects, this helps distinguish initial foreground
+    // and background).
+
+    // Up-weighting of inlier vectors and vectors of similar motion.
+    float upweight = 1.0f;
+    if (is_inlier[k]) {
+      // Previous track, boost weight significantly.
+      //
+      // NOTE: Regarding the amount of up-weighting: Long features are not
+      // very stable on moving objects. Therefore only upweight strongly for
+      // static objects.
+      constexpr float kWeakMultiplier = 5.0f;
+      constexpr float kStrongMultiplier = 20.0f;
+
+      // Map 0 -> 1 and values >= 0.5 -> 0, because long features are not
+      // very stable on moving objects. Therefore only upweight strongly for
+      // static objects.
+      const float kinetic_alpha =
+          std::max(0.0f, 1.0f - 2.0f * kinetic_identity);
+
+      // Choose strong multiplier only when kinetic_alpha OR inlier score
+      // support it.
+      const float multiplier = Lerp(kWeakMultiplier, kStrongMultiplier,
+                                    std::max(cont_inlier_score, kinetic_alpha));
+      upweight *= multiplier;
+    }
+
+    // Scale weight boost for moving objects by prior, i.e. modulate
+    // strength of scale w.r.t. confidence.
+    const float kin_scale = Lerp(1, 10, box_state.prior_weight());
+    // 80% moving object weighted by prior. This weighting is biasing towards
+    // moving object when the prior is low.
+    if (kinetic_identity >= 0.8f * box_state.prior_weight() &&
+        test_vector.object.Norm() > high_kinetic_energy && !is_outlier[k]) {
+      // If we track a moving object, long tracks are less likely to be stable
+      // due to appearance variations. In that case boost similar vectors.
+      upweight *= 5.f * kin_scale;
+    }
+
+    float downweight = 1.0f;
+    // Down-weighting of outlier vectors and vectors of different motion.
+    if (!is_inlier[k]) {
+      // Outlier.
+      if (is_outlier[k]) {
+        // Note: Outlier ids might overlap with inliers as outliers are built
+        // from a history of frames.
+        // *Always favor inliers over outliers*! Important to keep!!
+        downweight *= 20.0f;
+      }
+
+      // Vectors of different motion, for 100% moving object downweight
+      // vectors with small motion.
+      if (kinetic_identity >= 1.0f * box_state.prior_weight() &&
+          test_vector.object.Norm() < low_kinetic_energy) {
+        downweight *= 2.f * kin_scale;
+      }
+    }
+
+    // Cap any kind of up or down weighting so that no vector overwhelms
+    // all others.
+    const float kMaxWeight = 100.f;
+    upweight = std::min(kMaxWeight, upweight);
+    downweight = std::min(kMaxWeight, downweight);
+    weight *= upweight / downweight;
+
+    num_good_inits += (weight >= 0.1f);
+    weights->push_back(weight);
+  }
+
+  const int num_vectors = vectors->size();
+  CHECK_EQ(num_vectors, weights->size());
+
+  const float weight_sum =
+      std::accumulate(weights->begin(), weights->end(), 0.0f);
+
+  // Normalize weights.
+  if (weight_sum > 0) {
+    const float inv_weight_sum = 1.0f / weight_sum;
+    for (auto& weight : *weights) {
+      weight *= inv_weight_sum;
+    }
+  }
+
+  *number_of_good_prior = num_good_inits;
+  *number_of_cont_inliers = num_cont_inliers;
+
+  return true;
+}
+
+void MotionBox::TranslationIrlsInitialization(
+    const std::vector<const MotionVector*>& vectors,
+    const Vector2_f& irls_scale, std::vector<float>* weights) const {
+  const int num_features = vectors.size();
+
+  const auto& irls_options = options_.irls_initialization();
+  if (!irls_options.activated() || !num_features) {
+    return;
+  }
+
+  // Bool indicator which features agree with model in each round.
+  // In case no RANSAC rounds are performed considered all features inliers.
+  std::vector<uint8> best_features(num_features, 1);
+  std::vector<uint8> curr_features(num_features);
+  float best_sum = 0;
+
+  unsigned int seed = 900913;
+  std::default_random_engine rand_gen(seed);
+  std::uniform_int_distribution<> distribution(0, num_features - 1);
+
+  const float cutoff = irls_options.cutoff();
+  const float sq_cutoff = cutoff * cutoff;
+
+  for (int rounds = 0; rounds < irls_options.rounds(); ++rounds) {
+    float curr_sum = 0;
+    // Pick a random vector.
+    const int rand_idx = distribution(rand_gen);
+    const Vector2_f flow = vectors[rand_idx]->object;
+    const auto error_system = ComputeIrlsErrorSystem(irls_scale, flow);
+
+    // curr_features gets set for every feature below; no need to reset.
+    for (int i = 0; i < num_features; ++i) {
+      const Vector2_f diff = vectors[i]->object - flow;
+      const float error = ErrorDiff(diff, error_system);
+      curr_features[i] = static_cast<uint8>(error < sq_cutoff);
+      if (curr_features[i]) {
+        curr_sum += (*weights)[i];
+      }
+    }
+
+    if (curr_sum > best_sum) {
+      best_sum = curr_sum;
+      std::swap(best_features, curr_features);
+    }
+  }
+
+  std::vector<float> inlier_weights;
+  inlier_weights.reserve(num_features);
+
+  // Score outliers low.
+  int num_inliers = 0;
+  for (int i = 0; i < num_features; ++i) {
+    if (best_features[i] == 0) {
+      (*weights)[i] = 1e-10f;
+    } else {
+      ++num_inliers;
+      inlier_weights.push_back((*weights)[i]);
+    }
+  }
+
+  if (!inlier_weights.empty()) {
+    // Ensure that all selected inlier features have at least median weight.
+    auto median = inlier_weights.begin() + inlier_weights.size() * 0.5f;
+    std::nth_element(inlier_weights.begin(), median, inlier_weights.end());
+
+    for (int i = 0; i < num_features; ++i) {
+      if (best_features[i] != 0) {
+        (*weights)[i] = std::max(*median, (*weights)[i]);
+      }
+    }
+  }
+}
+
+void MotionBox::EstimateObjectMotion(
+    const std::vector<const MotionVector*>& motion_vectors,
+    const std::vector<float>& prior_weights, int num_continued_inliers,
+    const Vector2_f& irls_scale, std::vector<float>* weights,
+    Vector2_f* object_translation, LinearSimilarityModel* object_similarity,
+    Homography* object_homography) const {
+  CHECK(object_translation);
+  CHECK(object_similarity);
+  CHECK(object_homography);
+
+  const int num_vectors = motion_vectors.size();
+  CHECK_EQ(num_vectors, prior_weights.size());
+  CHECK_EQ(num_vectors, weights->size());
+
+  // Create backup of weights if needed.
+  std::vector<float> similarity_weights;
+
+  switch (options_.tracking_degrees()) {
+    case TrackStepOptions::TRACKING_DEGREE_OBJECT_ROTATION:
+    case TrackStepOptions::TRACKING_DEGREE_OBJECT_SCALE:
+    case TrackStepOptions::TRACKING_DEGREE_OBJECT_ROTATION_SCALE:
+    case TrackStepOptions::TRACKING_DEGREE_OBJECT_PERSPECTIVE:
+      similarity_weights = *weights;
+      break;
+
+    default:
+      // Nothing to do here.
+      break;
+  }
+
+  EstimateTranslation(motion_vectors, prior_weights, irls_scale, weights,
+                      object_translation);
+
+  TranslationModel translation_model = TranslationAdapter::FromArgs(
+      object_translation->x(), object_translation->y());
+
+  // For any additional degrees of freedom, require a good set of inliers.
+  if (num_continued_inliers < options_.object_similarity_min_contd_inliers()) {
+    VLOG_IF(2, options_.tracking_degrees() !=
+                   TrackStepOptions::TRACKING_DEGREE_TRANSLATION)
+        << "Falling back to translation!!!";
+    VLOG(1) << "num_continued_inliers: " << num_continued_inliers << " < "
+            << options_.object_similarity_min_contd_inliers()
+            << ", fall back to translation";
+    *object_similarity = LinearSimilarityAdapter::Embed(translation_model);
+    *object_homography = HomographyAdapter::Embed(translation_model);
+    return;
+  }
+
+  switch (options_.tracking_degrees()) {
+    case TrackStepOptions::TRACKING_DEGREE_OBJECT_ROTATION:
+    case TrackStepOptions::TRACKING_DEGREE_OBJECT_SCALE:
+    case TrackStepOptions::TRACKING_DEGREE_OBJECT_ROTATION_SCALE: {
+      if (EstimateSimilarity(motion_vectors, prior_weights, irls_scale,
+                             &similarity_weights, object_similarity)) {
+        if (!ObjectMotionValidator::IsValidSimilarity(
+                *object_similarity, options_.box_similarity_max_scale(),
+                options_.box_similarity_max_rotation())) {
+          LOG(WARNING) << "Unstable similarity model - falling back to "
+                       << "translation.";
+          *object_similarity =
+              LinearSimilarityAdapter::Embed(translation_model);
+        } else {
+          // Good estimation, use weights as output.
+          weights->swap(similarity_weights);
+        }
+      } else {
+        *object_similarity = LinearSimilarityAdapter::Embed(translation_model);
+      }
+
+      break;
+    }
+    case TrackStepOptions::TRACKING_DEGREE_OBJECT_PERSPECTIVE:
+      if (EstimateHomography(motion_vectors, prior_weights, irls_scale,
+                             &similarity_weights, object_homography)) {
+        if (!ObjectMotionValidator::IsValidHomography(
+                *object_homography, options_.quad_homography_max_scale(),
+                options_.quad_homography_max_rotation())) {
+          LOG(WARNING) << "Unstable homography model - falling back to "
+                       << "translation.";
+          *object_homography = HomographyAdapter::Embed(translation_model);
+        } else {
+          weights->swap(similarity_weights);
+        }
+      } else {
+        *object_homography = HomographyAdapter::Embed(translation_model);
+      }
+      VLOG(1) << "Got homography: "
+              << HomographyAdapter::ToString(*object_homography);
+      break;
+    default:
+      // Plenty of CAMERA_ cases are not handled in this function.
+      break;
+  }
+}
+
+void MotionBox::EstimateTranslation(
+    const std::vector<const MotionVector*>& motion_vectors,
+    const std::vector<float>& prior_weights, const Vector2_f& irls_scale,
+    std::vector<float>* weights, Vector2_f* translation) const {
+  CHECK(weights);
+  CHECK(translation);
+
+  const int iterations = options_.irls_iterations();
+
+  // NOTE: Floating point accuracy is totally sufficient here.
+  //                 We tried changing to double now 3 times and it just does
+  //                 not matter. Do not do it again.      - Past self
+  Vector2_f object_translation;
+  const int num_vectors = motion_vectors.size();
+  const float kEpsilon = 1e-8f;
+
+  VLOG(1) << "Estimating translation for " << num_vectors << " vectors";
+
+  for (int i = 0; i < iterations; ++i) {
+    float flow_sum = 0;
+    object_translation = Vector2_f(0.0, 0.0);
+    for (int k = 0; k < num_vectors; ++k) {
+      const MotionVector& motion_vector = *motion_vectors[k];
+      const Vector2_f flow = motion_vector.object;
+      object_translation += flow * (*weights)[k];
+      flow_sum += (*weights)[k];
+    }
+
+    if (flow_sum > 0) {
+      object_translation *= (1.0 / flow_sum);
+
+      const auto error_system =
+          ComputeIrlsErrorSystem(irls_scale, object_translation);
+
+      // Update irls weights.
+      for (int k = 0; k < num_vectors; ++k) {
+        const MotionVector& motion_vector = *motion_vectors[k];
+        Vector2_f diff(motion_vector.object - object_translation);
+        const float error = ErrorDiff(diff, error_system);
+        // In last iteration compute weights without any prior bias.
+        const float numerator = (i + 1 == iterations) ? 1.0f : prior_weights[k];
+        (*weights)[k] = numerator / (error + kEpsilon);
+      }
+    }
+  }
+  *translation = object_translation;
+
+  VLOG(1) << "Got translation: " << *translation;
+}
+
+bool MotionBox::EstimateSimilarity(
+    const std::vector<const MotionVector*>& motion_vectors,
+    const std::vector<float>& prior_weights, const Vector2_f& irls_scale,
+    std::vector<float>* weights, LinearSimilarityModel* lin_sim) const {
+  CHECK(weights);
+  CHECK(lin_sim);
+
+  const int iterations = options_.irls_iterations();
+  LinearSimilarityModel object_similarity;
+  const int num_vectors = motion_vectors.size();
+  const float kEpsilon = 1e-8f;
+
+  VLOG(1) << "Estimating similarity for " << num_vectors << " vectors";
+  for (int i = 0; i < iterations; ++i) {
+    if (LinearSimilarityL2Solve(motion_vectors, *weights, &object_similarity)) {
+      // Update irls weights.
+      for (int k = 0; k < num_vectors; ++k) {
+        const MotionVector& motion_vector = *motion_vectors[k];
+        const Vector2_f model_vec =
+            TransformPoint(object_similarity, motion_vector.pos) -
+            motion_vector.pos;
+        const auto error_system = ComputeIrlsErrorSystem(irls_scale, model_vec);
+
+        Vector2_f diff(motion_vector.object - model_vec);
+        const float error = ErrorDiff(diff, error_system);
+        // In last iteration compute weights without any prior bias.
+        const float numerator = (i + 1 == iterations) ? 1.0f : prior_weights[k];
+        (*weights)[k] = numerator / (error + kEpsilon);
+      }
+    } else {
+      return false;
+    }
+  }
+  *lin_sim = object_similarity;
+
+  VLOG(1) << "Got similarity: "
+          << LinearSimilarityAdapter::ToString(object_similarity);
+  return true;
+}
+
+bool MotionBox::EstimateHomography(
+    const std::vector<const MotionVector*>& motion_vectors,
+    const std::vector<float>& prior_weights, const Vector2_f& irls_scale,
+    std::vector<float>* weights, Homography* object_homography) const {
+  CHECK(weights);
+
+  const int iterations = options_.irls_iterations();
+  Homography homography;
+  const int num_vectors = motion_vectors.size();
+  const float kEpsilon = 1e-8f;
+
+  VLOG(1) << "Estimating homography for " << num_vectors << " vectors";
+  for (int i = 0; i < iterations; ++i) {
+    if (HomographyL2Solve(motion_vectors, *weights, &homography)) {
+      // Update irls weights.
+      for (int k = 0; k < num_vectors; ++k) {
+        const MotionVector& motion_vector = *motion_vectors[k];
+        const Vector2_f model_vec =
+            TransformPoint(homography, motion_vector.pos) - motion_vector.pos;
+        const auto error_system = ComputeIrlsErrorSystem(irls_scale, model_vec);
+
+        Vector2_f diff(motion_vector.object - model_vec);
+        const float error = ErrorDiff(diff, error_system);
+        // In last iteration compute weights without any prior bias.
+        const float numerator = (i + 1 == iterations) ? 1.0f : prior_weights[k];
+        (*weights)[k] = numerator / (error + kEpsilon);
+      }
+    } else {
+      return false;
+    }
+  }
+  *object_homography = homography;
+
+  return true;
+}
+
+bool MotionBox::EstimatePnpHomography(
+    const MotionBoxState& curr_pos,
+    const std::vector<const MotionVector*>& motion_vectors,
+    const std::vector<float>& weights, float domain_x, float domain_y,
+    Homography* pnp_homography) const {
+  constexpr int kMinVectors = 4;
+  if (motion_vectors.size() < kMinVectors) return false;
+
+  Homography inv_h = HomographyAdapter::Invert(curr_pos.pnp_homography());
+
+  std::vector<cv::Point3f> vectors_3d;
+  vectors_3d.reserve(motion_vectors.size());
+  std::vector<cv::Point2f> vectors_2d;
+  vectors_2d.reserve(motion_vectors.size());
+  if (options_.has_camera_intrinsics()) {
+    const auto& camera = options_.camera_intrinsics();
+    cv::Mat camera_mat, dist_coef;
+    ConvertCameraIntrinsicsToCvMat(camera, &camera_mat, &dist_coef);
+    float scale = std::max(camera.w(), camera.h());
+
+    std::vector<cv::Point2f> mv_p;
+    mv_p.reserve(motion_vectors.size());
+    std::vector<cv::Point2f> mv_q;
+    mv_q.reserve(motion_vectors.size());
+    for (int j = 0; j < motion_vectors.size(); ++j) {
+      if (weights[j] < kMaxOutlierWeight) continue;
+      mv_p.emplace_back(motion_vectors[j]->pos.x() * scale,
+                        motion_vectors[j]->pos.y() * scale);
+
+      Vector2_f q = motion_vectors[j]->pos + motion_vectors[j]->object +
+                    motion_vectors[j]->background;
+      mv_q.emplace_back(q.x() * scale, q.y() * scale);
+    }
+
+    if (mv_p.size() < kMinVectors) return false;
+
+    cv::undistortPoints(mv_p, mv_p, camera_mat, dist_coef);
+    cv::undistortPoints(mv_q, mv_q, camera_mat, dist_coef);
+
+    vectors_3d.resize(mv_p.size());
+    vectors_2d.resize(mv_q.size());
+    for (int j = 0; j < mv_p.size(); ++j) {
+      Vector2_f p = TransformPoint(inv_h, Vector2_f(mv_p[j].x, mv_p[j].y));
+      vectors_3d[j] = cv::Point3f(p.x(), p.y(), 0.0f);
+      vectors_2d[j] = cv::Point2f(mv_q[j].x, mv_q[j].y);
+    }
+  } else {
+    Vector2_f center(domain_x * 0.5f, domain_y * 0.5f);
+    for (int j = 0; j < motion_vectors.size(); ++j) {
+      if (weights[j] < kMaxOutlierWeight) continue;
+      Vector2_f p = TransformPoint(inv_h, motion_vectors[j]->pos - center);
+      vectors_3d.emplace_back(p.x(), p.y(), 0.0f);
+
+      Vector2_f q = motion_vectors[j]->pos + motion_vectors[j]->object +
+                    motion_vectors[j]->background - center;
+      vectors_2d.emplace_back(q.x(), q.y());
+    }
+
+    if (vectors_3d.size() < kMinVectors) return false;
+  }
+
+  // TODO: use previous rvec and tvec to initilize the solver.
+  cv::Mat rvec, tvec;
+  cv::solvePnP(vectors_3d, vectors_2d,
+               cv::Mat::eye(3, 3, CV_64F) /* camera_mat */,
+               cv::Mat::zeros(1, 5, CV_64FC1) /* dist_coef */, rvec, tvec);
+  *pnp_homography = PnpHomographyFromRotationAndTranslation(rvec, tvec);
+
+  return true;
+}
+
+void MotionBox::ApplyObjectMotionPerspectively(const MotionBoxState& curr_pos,
+                                               const Homography& pnp_homography,
+                                               float domain_x, float domain_y,
+                                               MotionBoxState* next_pos) const {
+  const float half_width = curr_pos.aspect_ratio();
+  const float half_height = 1.0f;
+
+  constexpr int kQuadCornersSize = 4;
+
+  // Omitting the 3rd dimension because they are all zeros.
+  const std::vector<Vector2_f> corners_3d{
+      Vector2_f(-half_width, -half_height),
+      Vector2_f(-half_width, half_height),
+      Vector2_f(half_width, half_height),
+      Vector2_f(half_width, -half_height),
+  };
+
+  std::vector<Vector2_f> corners_2d(kQuadCornersSize);
+  for (int c = 0; c < kQuadCornersSize; ++c) {
+    corners_2d[c] =
+        HomographyAdapter::TransformPoint(pnp_homography, corners_3d[c]);
+  }
+
+  if (options_.has_camera_intrinsics()) {
+    std::vector<cv::Point3f> cv_points(4);
+    for (int c = 0; c < kQuadCornersSize; ++c) {
+      cv_points[c] = cv::Point3f(corners_2d[c].x(), corners_2d[c].y(), 1.0);
+    }
+
+    const auto& camera = options_.camera_intrinsics();
+    cv::Mat camera_mat, dist_coef;
+    ConvertCameraIntrinsicsToCvMat(camera, &camera_mat, &dist_coef);
+    cv::Mat dummy_zeros = cv::Mat::zeros(1, 3, CV_64FC1);
+    std::vector<cv::Point2f> cv_points_distorted;
+    cv::projectPoints(cv_points, dummy_zeros /* rvec */, dummy_zeros /* tvec */,
+                      camera_mat, dist_coef, cv_points_distorted);
+    const float scale = 1.0f / std::max(camera.w(), camera.h());
+    for (int c = 0; c < kQuadCornersSize; ++c) {
+      next_pos->mutable_quad()->set_vertices(c * 2,
+                                             cv_points_distorted[c].x * scale);
+      next_pos->mutable_quad()->set_vertices(c * 2 + 1,
+                                             cv_points_distorted[c].y * scale);
+    }
+  } else {
+    const float center_x = domain_x * 0.5f;
+    const float center_y = domain_y * 0.5f;
+    for (int c = 0; c < kQuadCornersSize; ++c) {
+      next_pos->mutable_quad()->set_vertices(c * 2,
+                                             corners_2d[c].x() + center_x);
+      next_pos->mutable_quad()->set_vertices(c * 2 + 1,
+                                             corners_2d[c].y() + center_y);
+    }
+  }
+
+  *next_pos->mutable_pnp_homography() = pnp_homography;
+  UpdateStatePositionAndSizeFromStateQuad(next_pos);
+}
+
+float MotionBox::ComputeMotionDisparity(
+    const MotionBoxState& curr_pos, const Vector2_f& irls_scale,
+    float continued_inliers, int num_inliers,
+    const Vector2_f& object_translation) const {
+  // Motion disparity does not take into account change of direction,
+  // only use parallel irls scale.
+  const float curr_velocity = MotionBoxVelocity(curr_pos).Norm();
+  const float sign = object_translation.Norm() < curr_velocity ? -1.0f : 1.0f;
+  const float motion_diff = fabs(object_translation.Norm() - curr_velocity);
+
+  // Score difference.
+  const float measured_motion_disparity = LinearRamp(
+      motion_diff * irls_scale.x(), options_.motion_disparity_low_level(),
+      options_.motion_disparity_high_level());
+
+  // Cap disparity measurement by inlier ratio, to account for objects
+  // suddenly stopping/accelerating. In this case measured disparity might be
+  // high, whereas inliers continue to be tracked.
+  const float max_disparity = 1.0f - continued_inliers;
+
+  const float capped_disparity =
+      std::min(max_disparity, measured_motion_disparity);
+
+  // Take into account large disparity in previous frames. Score by prior of
+  // previous motion.
+  const float motion_disparity =
+      std::max(curr_pos.motion_disparity() * options_.disparity_decay(),
+               capped_disparity) *
+      curr_pos.prior_weight();
+
+  // Map number of inliers to score in [0, 1], assuming a lot of inliers
+  // indicate lock onto object.
+  const float inlier_score = LinearRamp(num_inliers, 20, 40);
+
+  // Decaying motion disparity faster if number of inliers indicate lock onto
+  // tracking objects has occurred.
+  return std::min(1.0f - inlier_score, motion_disparity) * sign;
+}
+
+void MotionBox::ScoreAndRecordInliers(
+    const MotionBoxState& curr_pos,
+    const std::vector<const MotionVector*>& vectors,
+    const std::vector<Vector2_f>& grid_positions,
+    const std::vector<float>& pre_estimation_weights,
+    const std::vector<float>& post_estimation_weights,
+    float background_discrimination, MotionBoxState* next_pos,
+    std::vector<float>* inlier_weights, std::vector<float>* inlier_density,
+    int* continued_inliers, int* swapped_inliers, float* motion_inliers_out,
+    float* kinetic_average_out) const {
+  CHECK(inlier_weights);
+  CHECK(inlier_density);
+  CHECK(continued_inliers);
+  CHECK(swapped_inliers);
+  CHECK(motion_inliers_out);
+  CHECK(kinetic_average_out);
+
+  std::unordered_map<int, int> prev_inliers;
+  MotionBoxInliers(curr_pos, &prev_inliers);
+
+  std::unordered_set<int> prev_outliers;
+  MotionBoxOutliers(curr_pos, &prev_outliers);
+
+  ClearInlierState(next_pos);
+
+  // Continued inlier fraction denotes amount of spatial occlusion. Very low
+  // values indicate that we are in very difficult tracking territory.
+  *continued_inliers = 0;
+  *swapped_inliers = 0;
+  float kinetic_average = 0;
+  float kinetic_average_sum = 0;
+  float motion_inliers = 0;
+  const int num_vectors = vectors.size();
+  inlier_weights->resize(num_vectors);
+  inlier_density->resize(num_vectors);
+
+  // Inliers normalization grid.
+  std::vector<float> grid_count(kNormalizationGridSize *
+                                kNormalizationGridSize);
+  const float prev_object_motion = MotionBoxVelocity(curr_pos).Norm();
+  // Count number of similar moving inliers as previous object motion.
+  const float similar_motion_threshold =
+      std::max(2e-3f, prev_object_motion * 0.3f);
+
+  // If background discrimination is low, inliers are ambiguous: Hard to
+  // distinguish from earlier outliers. In this case do not record inliers
+  // outside our current tracking extent, as everything will look like an
+  // inlier.
+  //
+  // TODO: Compute 2nd moment for inliers and describe as ellipse,
+  // improve shape here then.
+  bool inlier_ambiguity = background_discrimination < 0.5f;
+  int rejected = 0;
+  int num_inliers = 0;
+  for (int k = 0; k < num_vectors; ++k) {
+    (*inlier_weights)[k] =
+        LinearRamp(post_estimation_weights[k], options_.inlier_low_weight(),
+                   options_.inlier_high_weight());
+    const int track_id = vectors[k]->track_id;
+
+    bool is_prev_outlier = prev_outliers.find(track_id) != prev_outliers.end();
+
+    const Vector2_f match_loc = vectors[k]->MatchLocation();
+    if ((*inlier_weights)[k] > kMinInlierWeight) {  // Inlier.
+      if (is_prev_outlier) {
+        ++(*swapped_inliers);
+      }
+
+      if (inlier_ambiguity &&
+          !PointWithinInlierExtent(vectors[k]->Location(), curr_pos)) {
+        ++rejected;
+        continue;
+      }
+
+      ++num_inliers;
+
+      AddToGrid<kNormalizationGridSize>(grid_positions[k], &grid_count);
+
+      if (track_id >= 0) {
+        next_pos->add_inlier_ids(track_id);
+        next_pos->add_inlier_id_match_pos(match_loc.x() * kShortScale);
+        next_pos->add_inlier_id_match_pos(match_loc.y() * kShortScale);
+        auto pos = prev_inliers.find(track_id);
+        if (pos != prev_inliers.end()) {
+          // Count length of observation.
+          next_pos->add_inlier_length(pos->second + 1.0f);
+          ++(*continued_inliers);
+        } else {
+          next_pos->add_inlier_length(1);
+        }
+      }
+
+      // Note: This should be weighted by the pre estimation weights, simply
+      // adding a 1 for each inlier leads to lower irls averages.
+      kinetic_average += vectors[k]->object.Norm() * pre_estimation_weights[k];
+      kinetic_average_sum += pre_estimation_weights[k];
+
+      // Count number of inliers that agree with previous kinetic energy
+      // estimate.
+      if (std::abs(vectors[k]->object.Norm() - prev_object_motion) *
+              curr_pos.prior_weight() <
+          similar_motion_threshold) {
+        motion_inliers += pre_estimation_weights[k];
+      }
+    } else if ((*inlier_weights)[k] < kMaxOutlierWeight) {  // Outlier.
+      next_pos->add_outlier_ids(track_id);
+      next_pos->add_outlier_id_match_pos(match_loc.x() * kShortScale);
+      next_pos->add_outlier_id_match_pos(match_loc.y() * kShortScale);
+    }
+  }
+
+  // Read out density of inliers.
+  for (int k = 0; k < num_vectors; ++k) {
+    if ((*inlier_weights)[k] > kMinInlierWeight) {
+      (*inlier_density)[k] = 2 * SampleFromGrid<kNormalizationGridSize>(
+                                     grid_positions[k], grid_count);
+    } else {
+      (*inlier_density)[k] = 0;
+    }
+  }
+
+  if (kinetic_average_sum > 0) {
+    kinetic_average *= 1.0f / kinetic_average_sum;
+  }
+
+  VLOG(1) << "num inliers: " << num_inliers << " rejected: " << rejected;
+
+  *kinetic_average_out = kinetic_average;
+  *motion_inliers_out = motion_inliers;
+}
+
+void MotionBox::ComputeInlierCenterAndExtent(
+    const std::vector<const MotionVector*>& motion_vectors,
+    const std::vector<float>& weights, const std::vector<float>& density,
+    const MotionBoxState& box_state, float* min_inlier_sum, Vector2_f* center,
+    Vector2_f* extent) const {
+  CHECK(min_inlier_sum);
+  CHECK(center);
+  CHECK(extent);
+
+  float weight_sum = 0;
+  float inlier_sum = 0;
+  const int num_vectors = motion_vectors.size();
+  CHECK_EQ(num_vectors, weights.size());
+  CHECK_EQ(num_vectors, density.size());
+
+  Vector2_f first_moment(0.0f, 0.0f);
+  Vector2_f second_moment(0.0f, 0.0f);
+
+  Vector2_f top_left;
+  Vector2_f bottom_right;
+  MotionBoxBoundingBox(box_state, &top_left, &bottom_right);
+
+  for (int k = 0; k < num_vectors; ++k) {
+    const MotionVector& motion_vector = *motion_vectors[k];
+    const Vector2_f match = motion_vector.MatchLocation();
+    float space_multiplier = 1.0f;
+    // Decrease contribution of out of bound inliers.
+    // Note: If all inliers are out of bound this down weighting will have no
+    // effect. It is designed to prevent skewing the inlier center towards
+    // similar moving inliers outside the tracked box.
+    if (match.x() < top_left.x() || match.x() > bottom_right.x() ||
+        match.y() < top_left.y() || match.y() > bottom_right.y()) {
+      space_multiplier = 0.25f;
+    }
+    const float w = weights[k] * density[k] * space_multiplier;
+    if (w > 0) {
+      first_moment += w * match;
+      second_moment +=
+          w * Vector2_f(match.x() * match.x(), match.y() * match.y());
+      weight_sum += w;
+      inlier_sum += weights[k];
+    }
+  }
+
+  // Update center if sufficient inliers present.
+  if (inlier_sum > *min_inlier_sum) {
+    const float inv_weight_sum = 1.0f / weight_sum;
+    first_moment *= inv_weight_sum;
+    second_moment *= inv_weight_sum;
+
+    *center = first_moment;
+    *extent = second_moment - Vector2_f(first_moment.x() * first_moment.x(),
+                                        first_moment.y() * first_moment.y());
+
+    // 1.645 sigmas in each direction = 90% of the data captured.
+    *extent =
+        Vector2_f(std::sqrt(extent->x()) * 3.29, std::sqrt(extent->y()) * 3.29);
+  } else {
+    // Gravitate back to box center with inlier center.
+    *center = Lerp(MotionBoxCenter(box_state), InlierCenter(box_state), 0.5f);
+  }
+
+  // Record number of inliers present.
+  *min_inlier_sum = weight_sum;
+}
+
+float MotionBox::ScaleEstimate(
+    const std::vector<const MotionVector*>& motion_vectors,
+    const std::vector<float>& weights, float min_sum) const {
+  const int num_vectors = motion_vectors.size();
+  CHECK_EQ(num_vectors, weights.size());
+
+  float scale_sum = 0;
+
+  // First moments.
+  Vector2_d sum_coords(0, 0);
+  Vector2_d match_sum_coords(0, 0);
+
+  // Second moments.
+  Vector2_d sum_sq_coords(0, 0);
+  Vector2_d match_sum_sq_coords(0, 0);
+
+  for (int k = 0; k < num_vectors; ++k) {
+    const MotionVector& motion_vector = *motion_vectors[k];
+
+    const Vector2_d pos(motion_vector.pos.x(), motion_vector.pos.y());
+    double weight = weights[k];
+    sum_coords += weight * pos;
+    sum_sq_coords += weight * Vector2_d(pos.x() * pos.x(), pos.y() * pos.y());
+
+    const Vector2_d match =
+        Vector2_d::Cast<float>(motion_vector.MatchLocation());
+    match_sum_coords += weight * match;
+    match_sum_sq_coords +=
+        weight * Vector2_d(match.x() * match.x(), match.y() * match.y());
+    scale_sum += weights[k];
+  }
+
+  if (scale_sum > min_sum) {
+    const double denom = 1.0f / scale_sum;
+    sum_coords *= denom;
+    match_sum_coords *= denom;
+    sum_sq_coords *= denom;
+    match_sum_sq_coords *= denom;
+
+    const float curr_scale =
+        sqrt(sum_sq_coords.x() - sum_coords.x() * sum_coords.x() +
+             sum_sq_coords.y() - sum_coords.y() * sum_coords.y());
+    const float next_scale = sqrt(
+        match_sum_sq_coords.x() - match_sum_coords.x() * match_sum_coords.x() +
+        match_sum_sq_coords.y() - match_sum_coords.y() * match_sum_coords.y());
+    return next_scale / curr_scale;
+  }
+
+  return 1.0f;
+}
+
+void MotionBox::ApplySpringForce(const Vector2_f& center_of_interest,
+                                 const float rel_threshold,
+                                 const float spring_force,
+                                 MotionBoxState* box_state) const {
+  // Apply spring force towards center of interest.
+  const Vector2_f center = MotionBoxCenter(*box_state);
+  const float center_diff_x = center_of_interest.x() - center.x();
+  const float center_diff_y = center_of_interest.y() - center.y();
+
+  const float diff_x = fabs(center_diff_x) - box_state->width() * rel_threshold;
+  const float diff_y =
+      fabs(center_diff_y) - box_state->height() * rel_threshold;
+
+  if (diff_x > 0) {
+    const float correction_mag = diff_x * spring_force;
+    const float correction =
+        center_diff_x < 0 ? -correction_mag : correction_mag;
+    box_state->set_pos_x(box_state->pos_x() + correction);
+  }
+
+  if (diff_y > 0) {
+    const float correction_mag = diff_y * spring_force;
+    const float correction =
+        center_diff_y < 0 ? -correction_mag : correction_mag;
+    box_state->set_pos_y(box_state->pos_y() + correction);
+  }
+}
+
+void MotionBox::TrackStepImpl(int from_frame, const MotionBoxState& curr_pos,
+                              const MotionVectorFrame& motion_frame,
+                              const std::vector<const MotionBoxState*>& history,
+                              MotionBoxState* next_pos) const {
+  // Create new curr pos with velocity scaled to current duration.
+  constexpr float kDefaultPeriodMs = 1000.0f / kTrackingDefaultFps;
+
+  // Scale to be applied to velocity related fields in MotionBoxState
+  // to transform state from standard frame period to current one.
+  float temporal_scale = (motion_frame.duration_ms == 0)
+                             ? 1.0f
+                             : motion_frame.duration_ms / kDefaultPeriodMs;
+
+  MotionBoxState curr_pos_normalized = curr_pos;
+  ScaleStateTemporally(temporal_scale, &curr_pos_normalized);
+  ScaleStateAspect(motion_frame.aspect_ratio, false, &curr_pos_normalized);
+
+  TrackStepImplDeNormalized(from_frame, curr_pos_normalized, motion_frame,
+                            history, next_pos);
+
+  // Scale back velocity and aspect to normalized domains.
+  ScaleStateTemporally(1.0f / temporal_scale, next_pos);
+  ScaleStateAspect(motion_frame.aspect_ratio, true, next_pos);
+
+  // Test if out of bound, only for moving objects.
+  const float static_motion =
+      options_.static_motion_temporal_ratio() * temporal_scale;
+  if (MotionBoxVelocity(*next_pos).Norm() > static_motion) {
+    // Test if close to boundary and keep moving towards it.
+    constexpr float kRatio = 0.3;
+    if ((next_pos->pos_x() < -next_pos->width() * kRatio &&
+         next_pos->dx() < -static_motion / 2) ||
+        (next_pos->pos_y() < -next_pos->height() * kRatio &&
+         next_pos->dy() < -static_motion / 2) ||
+        (next_pos->pos_x() > 1.0f - next_pos->width() * (1.0f - kRatio) &&
+         next_pos->dx() > static_motion / 2) ||
+        (next_pos->pos_y() > 1.0f - next_pos->height() * (1.0f - kRatio) &&
+         next_pos->dy() > static_motion / 2)) {
+      VLOG(1) << "Tracked box went out of bound.";
+      next_pos->set_track_status(MotionBoxState::BOX_UNTRACKED);
+      return;
+    }
+  }
+}
+
+float MotionBox::ComputeTrackingConfidence(
+    const MotionBoxState& motion_box_state) const {
+  const float inlier_num_lower_bound = 10.0f;
+  const float inlier_num_upper_bound = 30.0f;
+  const float confidence =
+      LinearRamp(motion_box_state.inlier_ids_size(), inlier_num_lower_bound,
+                 inlier_num_upper_bound);
+  return confidence;
+}
+
+// General tracking algorithm overview:
+// Tracking algorithm consists of 6 main stages.
+// 1.) Selecting features from passed MotionVectorFrame based on incidence with
+//     rectangle defined by curr_pos
+// 2.) Assigning each vector a prior weight. Vector are mainly scored by a box
+//     centered gaussian, giving more weights to vectors within the center of
+//     the box. If current state is deemed unreliable, vectors with similar
+//     velocity to previously estimated velocity are favored. If current state
+//     indicates tracking of a moving object, high velocity vectors are favored.
+// 3.) Estimating a translational model via IRLS enforcing the prior of step 2
+//     in every iteration.
+// 4.) Score how much the estimate model deviates from the previous motion
+//     (termed motion disparity) and how discriminative the motion is
+//     from the background motion (termed motion discrimination)
+// 5.) Computing the inlier center (position of vectors used for the motion
+//     model in the next frame) and center of high velocity vectors. Apply a
+//     spring force towards each center based on the motion discrimination.
+// 6.) Update velocity and kinetic energy by blending current measurment with
+//     previous one.
+void MotionBox::TrackStepImplDeNormalized(
+    int from_frame, const MotionBoxState& curr_pos,
+    const MotionVectorFrame& motion_frame,
+    const std::vector<const MotionBoxState*>& history,
+    MotionBoxState* next_pos) const {
+  CHECK(next_pos);
+
+  constexpr float kDefaultPeriodMs = 1000.0f / kTrackingDefaultFps;
+  float temporal_scale = (motion_frame.duration_ms == 0)
+                             ? 1.0f
+                             : motion_frame.duration_ms / kDefaultPeriodMs;
+
+  // Initialize to current position.
+  *next_pos = curr_pos;
+
+  if (!IsBoxValid(curr_pos)) {
+    LOG(ERROR) << "curr_pos is not a valid box. Stop tracking!";
+    next_pos->set_track_status(MotionBoxState::BOX_UNTRACKED);
+    return;
+  }
+
+  Vector2_f top_left, bottom_right;
+  float expand_mag = 0.0f;
+  GetStartPosition(curr_pos, motion_frame.aspect_ratio, &expand_mag, &top_left,
+                   &bottom_right);
+
+  const float aspect_ratio = motion_frame.aspect_ratio;
+  float domain_x = 1.0f;
+  float domain_y = 1.0f;
+  ScaleFromAspect(aspect_ratio, false, &domain_x, &domain_y);
+
+  // Binary search for start and end index (lexicographic search, i.e.
+  // x indices are guaranteed to be within bounds, but y coordinates could be
+  // outside and need to be checked against the domain of the box via
+  // GetVectorsAndWeights below).
+  MotionVector search_start;
+  MotionVector search_end;
+  search_start.pos = top_left;
+  search_end.pos = bottom_right;
+
+  int start_idx = std::lower_bound(motion_frame.motion_vectors.begin(),
+                                   motion_frame.motion_vectors.end(),
+                                   search_start, MotionVectorComparator()) -
+                  motion_frame.motion_vectors.begin();
+
+  int end_idx = std::lower_bound(motion_frame.motion_vectors.begin(),
+                                 motion_frame.motion_vectors.end(), search_end,
+                                 MotionVectorComparator()) -
+                motion_frame.motion_vectors.begin();
+
+  const float static_motion =
+      options_.static_motion_temporal_ratio() * temporal_scale;
+  if (start_idx >= end_idx || top_left.x() >= domain_x - expand_mag ||
+      top_left.y() >= domain_y - expand_mag || bottom_right.x() <= expand_mag ||
+      bottom_right.y() <= expand_mag) {
+    // Empty box, no features found. It can happen if box is outside
+    // field of view, or there is no features in the box.
+    // Move box by background model if it has static motion, else move return
+    // tracking error.
+    if (MotionBoxVelocity(curr_pos).Norm() > static_motion ||
+        (!motion_frame.valid_background_model && from_frame != queue_start_)) {
+      next_pos->set_track_status(MotionBoxState::BOX_NO_FEATURES);
+    } else {
+      // Static object, move by background model.
+      next_pos->set_track_status(MotionBoxState::BOX_TRACKED_OUT_OF_BOUND);
+      ApplyCameraTrackingDegrees(curr_pos, motion_frame.background_model,
+                                 options_, Vector2_f(domain_x, domain_y),
+                                 next_pos);
+
+      // The further the quad is away from the FOV (range 0 to 1), the larger
+      // scale change will be applied to the quad by homography transform. To
+      // some certain extent, the position of vertices will be flipped from
+      // positive to negative, or vice versa. Here we reject all the quads with
+      // abnormal shape by convexity of the quad..
+      if (next_pos->has_quad() &&
+          (ObjectMotionValidator::IsQuadOutOfFov(
+               next_pos->quad(), Vector2_f(domain_x, domain_y)) ||
+           !ObjectMotionValidator::IsValidQuad(next_pos->quad()))) {
+        LOG(ERROR) << "Quad is out of fov or not convex. Cancel tracking.";
+        next_pos->set_track_status(MotionBoxState::BOX_UNTRACKED);
+        return;
+      }
+    }
+    return;
+  }
+
+  float start_x = Clamp(top_left.x(), 0.0f, domain_x);
+  float start_y = Clamp(top_left.y(), 0.0f, domain_y);
+  float end_x = Clamp(bottom_right.x(), 0.0f, domain_x);
+  float end_y = Clamp(bottom_right.y(), 0.0f, domain_y);
+
+  const Vector2_f curr_pos_size = MotionBoxSize(curr_pos);
+  constexpr float kMinSize = 1e-3f;  // 1 pix for 1080p.
+  if (start_x >= end_x || start_y >= end_y || curr_pos_size.x() < kMinSize ||
+      curr_pos_size.y() < kMinSize) {
+    next_pos->set_track_status(MotionBoxState::BOX_EMPTY);
+    return;
+  }
+
+  top_left = Vector2_f(start_x, start_y);
+  bottom_right = Vector2_f(end_x, end_y);
+
+  // Get indices of features within box, corresponding priors and position
+  // in feature grid.
+  std::vector<const MotionVector*> vectors;
+  std::vector<float> prior_weights;
+  bool valid_background_model = motion_frame.valid_background_model;
+
+  int num_good_inits;
+  int num_cont_inliers;
+  const bool get_vec_weights_status = GetVectorsAndWeights(
+      motion_frame.motion_vectors, start_idx, end_idx, top_left, bottom_right,
+      curr_pos, valid_background_model, motion_frame.is_chunk_boundary,
+      temporal_scale, expand_mag, history, &vectors, &prior_weights,
+      &num_good_inits, &num_cont_inliers);
+  if (!get_vec_weights_status) {
+    LOG(ERROR) << "error in GetVectorsAndWeights. Terminate tracking.";
+    next_pos->set_track_status(MotionBoxState::BOX_UNTRACKED);
+    return;
+  }
+
+  // `num_good_inits` comes from motion vector weights, but pnp solver currently
+  // does not depend on weights. So for pnp tracking mode, we don't fall back to
+  // camera motion tracking based on num_good_inits.
+  if (!curr_pos.has_pnp_homography() &&
+      (num_good_inits < 3 &&
+       MotionBoxVelocity(curr_pos).Norm() <= static_motion)) {
+    // Static object, move by background model.
+    next_pos->set_track_status(MotionBoxState::BOX_TRACKED);
+    ApplyCameraTrackingDegrees(curr_pos, motion_frame.background_model,
+                               options_, Vector2_f(domain_x, domain_y),
+                               next_pos);
+    VLOG(1) << "No good inits; applying camera motion for static object";
+
+    if (next_pos->has_quad() &&
+        !ObjectMotionValidator::IsValidQuad(next_pos->quad())) {
+      LOG(ERROR) << "Quad is not convex. Cancel tracking.";
+      next_pos->set_track_status(MotionBoxState::BOX_UNTRACKED);
+      return;
+    }
+    return;
+  }
+
+  VLOG(1) << "Good inits: " << num_good_inits;
+
+  const int num_vectors = vectors.size();
+  CHECK_EQ(num_vectors, prior_weights.size());
+
+  Vector2_f object_translation;
+
+  // Compute a rough estimate of the current motion.
+  for (int k = 0; k < num_vectors; ++k) {
+    object_translation += vectors[k]->Motion() * prior_weights[k];
+  }
+
+  Vector2_f prev_object_motion = MotionBoxVelocity(curr_pos);
+
+  // Estimate expected motion magnitude. In case of low prior, skew more
+  // towards rough estimate instead of previous motion.
+  const float motion_mag_estimate =
+      std::max(object_translation.Norm(),
+               prev_object_motion.Norm() * curr_pos.prior_weight());
+
+  // For motivation about this: See MotionEstimation::GetIRLSResidualScale.
+  // Assume 1 pixel estimation error for tracked features at 360p video.
+  // This serves as absolute minimum of the estimation error, so we do not
+  // scale translation fractions below this threshold.
+  constexpr float kMinError = 1.25e-3f;
+
+  // We use a combination of absolute and relative error. If a predefined
+  // fraction of the motion exceeds the minimum error, we scale the error
+  // such that the relative error equals the min error.
+  // We use different thresholds parallel and perpendicular to the estimation
+  // direction.
+  // Motivation is that we allow for more error perpendicular to an estimation
+  // (angular difference) than it is direction (magnitude error).
+
+  // Scale in parallel, orthogonal direction.
+  Vector2_f irls_scale(1.0f, 1.0f);
+
+  const Vector2_f kMotionPercentage(0.1f, 0.25f);
+  const Vector2_f motion_mag_scaled = motion_mag_estimate * kMotionPercentage;
+
+  if (motion_mag_scaled.x() > kMinError) {
+    irls_scale.x(kMinError / motion_mag_scaled.x());
+  }
+  if (motion_mag_scaled.y() > kMinError) {
+    irls_scale.y(kMinError / motion_mag_scaled.y());
+  }
+
+  // Irls init for translation.
+  // TODO: Adjust to object tracking degrees.
+  TranslationIrlsInitialization(vectors, irls_scale, &prior_weights);
+
+  LinearSimilarityModel object_similarity;
+  Homography object_homography;
+  Homography pnp_homography;
+
+  std::vector<float> weights = prior_weights;
+  if (num_good_inits > 0) {
+    MEASURE_TIME << "Estimate object motion.";
+    EstimateObjectMotion(vectors, prior_weights, num_cont_inliers, irls_scale,
+                         &weights, &object_translation, &object_similarity,
+                         &object_homography);
+  } else {
+    // There is no hope to estimate a model in a stable manner here.
+    object_translation = prev_object_motion;
+    VLOG(1) << "No good inits, reusing prev. motion instead of estimation";
+  }
+
+  // Multiplier to quanitify how discriminative object motion is
+  // (larger motions are more discriminative).
+  // Note: Independent from temporal scale.
+  float background_discrimination = curr_pos.background_discrimination();
+  if (valid_background_model) {
+    background_discrimination =
+        LinearRamp(object_translation.Norm(),
+                   options_.background_discrimination_low_level(),
+                   options_.background_discrimination_high_level());
+  }
+
+  // Score weights from motion estimation to determine set of inliers.
+  std::vector<float> inlier_weights;
+  std::vector<float> inlier_density;
+
+  // Compute grid positions for each vector to determine density of inliers.
+  std::vector<Vector2_f> grid_positions;
+  ComputeGridPositions<kNormalizationGridSize>(top_left, bottom_right, vectors,
+                                               &grid_positions);
+
+  // Continued inlier fraction denotes amount of spatial occlusion. Very low
+  // values indicate that we are in very difficult tracking territory.
+  int continued_inliers = 0;
+  int swapped_inliers = 0;
+  float kinetic_average = 0;
+  float motion_inliers = 0;
+  ScoreAndRecordInliers(curr_pos, vectors, grid_positions, prior_weights,
+                        weights, background_discrimination, next_pos,
+                        &inlier_weights, &inlier_density, &continued_inliers,
+                        &swapped_inliers, &motion_inliers, &kinetic_average);
+
+  const int num_prev_inliers = curr_pos.inlier_ids_size();
+  const int num_inliers = next_pos->inlier_ids_size();
+  // Must be in [0, 1].
+  const float continued_inlier_fraction =
+      num_prev_inliers == 0 ? 1.0f
+                            : continued_inliers * 1.0f / num_prev_inliers;
+  // Within [0, M], where M is maximum number of features. Values of > 1
+  // indicate significant number of inlieres were outliers in previous frame.
+  const float swapped_inlier_fraction =
+      num_prev_inliers == 0 ? 0.0f : swapped_inliers * 1.0f / num_prev_inliers;
+
+  if (curr_pos.has_pnp_homography()) {
+    MEASURE_TIME << "Estimate pnp homography.";
+
+    // Use IRLS homography `inlier_weights` to determin inliers / outliers.
+    // The rationale is: solving homography transform is 20X faster than solving
+    // perspective transform (0.05ms vs 1ms). So, we use 5 iterations of
+    // reweighted homography to filter out outliers first. And only use inliers
+    // to solve for perspective.
+    if (!EstimatePnpHomography(curr_pos, vectors, inlier_weights, domain_x,
+                               domain_y, &pnp_homography)) {
+      // Here, we can either cancel tracking or apply homography or even
+      // translation as our best guess. But since some specific use cases of
+      // pnp tracking (for example Augmented Images) prefer high precision
+      // over high recall, we choose to cancel tracking once and for all.
+      VLOG(1) << "Not enough motion vectors to solve pnp. Cancel tracking.";
+      next_pos->set_track_status(MotionBoxState::BOX_UNTRACKED);
+      return;
+    }
+  }
+
+  // Compute disparity.
+  if (num_good_inits > 0) {
+    next_pos->set_motion_disparity(ComputeMotionDisparity(
+        curr_pos, irls_scale, continued_inliers, num_inliers,
+        valid_background_model ? object_translation : prev_object_motion));
+  } else {
+    // No good features, signal error.
+    next_pos->set_motion_disparity(1.0);
+  }
+
+  VLOG(1) << "Motion inliers: " << motion_inliers
+          << ", continued inliers: " << continued_inliers
+          << ", continued ratio: " << continued_inlier_fraction
+          << ", swapped fraction: " << swapped_inlier_fraction
+          << ", motion disparity: " << next_pos->motion_disparity();
+
+  if (options_.cancel_tracking_with_occlusion_options().activated() &&
+      curr_pos.track_status() != MotionBoxState::BOX_DUPLICATED &&
+      continued_inlier_fraction <
+          options_.cancel_tracking_with_occlusion_options()
+              .min_motion_continuity()) {
+    next_pos->set_track_status(MotionBoxState::BOX_UNTRACKED);
+    LOG(INFO) << "Occlusion detected. continued_inlier_fraction: "
+              << continued_inlier_fraction << " too low. Stop tracking";
+    return;
+  }
+
+  // Force reset of state when inlier continuity is severly violated,
+  // disparity maxes out or significant of number of inliers were outliers in
+  // the previous frame.
+  if (std::max(continued_inlier_fraction, motion_inliers) < 0.15f ||
+      std::abs(next_pos->motion_disparity()) >= 1.0f ||
+      swapped_inlier_fraction >= 2.5) {
+    VLOG(1) << "Track error, state reset.";
+    // Bad tracking error occurred.
+    // Current set of inliers is not reliable.
+    ClearInlierState(next_pos);
+    next_pos->set_motion_disparity(1.0f);
+    inlier_weights.assign(inlier_weights.size(), 0);
+
+    // Reuse previous motion and discrimination.
+    object_translation = prev_object_motion;
+    background_discrimination = curr_pos.background_discrimination();
+  }
+
+  next_pos->set_inlier_sum(
+      std::accumulate(inlier_weights.begin(), inlier_weights.end(), 0.0f));
+  if (history.empty()) {
+    // Assign full confidence on first frame, otherwise all other stats
+    // are zero and there is no way to compute.
+    next_pos->set_tracking_confidence(1.0f);
+    LOG(INFO) << "no history. confidence : 1.0";
+  } else {
+    next_pos->set_tracking_confidence(ComputeTrackingConfidence(*next_pos));
+    VLOG(1) << "confidence: " << next_pos->tracking_confidence();
+  }
+  next_pos->set_background_discrimination(background_discrimination);
+
+  // Slowly decay current kinetic energy. Blend with current measurement based
+  // on disparity (high = use previous value, low = use current one).
+  next_pos->set_kinetic_energy(std::max(
+      options_.kinetic_energy_decay() * curr_pos.kinetic_energy(),
+      kinetic_average * (1.0f - std::abs(next_pos->motion_disparity()))));
+
+  float inlier_max = curr_pos.inlier_sum();
+  int num_tracked_frames_in_history = 0;
+  for (auto entry : history) {
+    inlier_max = std::max(inlier_max, entry->inlier_sum());
+    if (entry->track_status() == MotionBoxState::BOX_TRACKED) {
+      num_tracked_frames_in_history++;
+    }
+  }
+
+  const float inlier_ratio =
+      inlier_max > 0 ? (next_pos->inlier_sum() / (inlier_max + 1e-3f)) : 0.0f;
+
+  next_pos->set_inlier_ratio(inlier_ratio);
+
+  const bool is_perfect_fit = inlier_ratio > 0.85f && inlier_ratio < 1.15f;
+
+  // num_tracked_frames_in_history has to be greater than 1, since the first
+  // frame is marked as BOX_TRACKED in ResetAtFrame.
+  if (options_.cancel_tracking_with_occlusion_options().activated() &&
+      curr_pos.track_status() != MotionBoxState::BOX_DUPLICATED &&
+      num_tracked_frames_in_history > 1 &&
+      inlier_ratio < options_.cancel_tracking_with_occlusion_options()
+                         .min_inlier_ratio()) {
+    next_pos->set_track_status(MotionBoxState::BOX_UNTRACKED);
+    LOG(INFO) << "inlier_ratio: " << inlier_ratio
+              << " too small. Stop tracking. inlier_max: " << inlier_max
+              << ". length in history: " << history.size();
+    return;
+  }
+
+  // Blend measured object motion based on motion disparity, i.e. the more the
+  // measured and previous motion agree, the less the smoothing. This is to
+  // propagate the box in the direction of the previous object motion
+  // in case tracking has been lost.
+  // Allow new measurements to propagate slowly.
+  if (valid_background_model && !is_perfect_fit) {
+    // Always move some fraction in the direction of the measured object
+    // even if deemed in disagreement with previous motion.
+    const float kMinimumBlend = 0.2f;
+    object_translation = Lerp(
+        object_translation, prev_object_motion,
+        std::min(1.0f - kMinimumBlend, std::abs(next_pos->motion_disparity())));
+  }
+
+  if (curr_pos.has_pnp_homography()) {
+    ApplyObjectMotionPerspectively(curr_pos, pnp_homography, domain_x, domain_y,
+                                   next_pos);
+  } else {
+    ApplyObjectMotion(curr_pos, object_translation, object_similarity,
+                      object_homography, options_, next_pos);
+
+    ApplyCameraTrackingDegrees(curr_pos, motion_frame.background_model,
+                               options_, Vector2_f(domain_x, domain_y),
+                               next_pos);
+  }
+
+  if (next_pos->has_quad() &&
+      !ObjectMotionValidator::IsValidQuad(next_pos->quad())) {
+    LOG(ERROR) << "Quad is not convex. Cancel tracking.";
+    next_pos->set_track_status(MotionBoxState::BOX_UNTRACKED);
+    return;
+  }
+
+  // Storing pre-comp weights.
+  const std::vector<float>& internal_weights =
+      options_.use_post_estimation_weights_for_state() ? inlier_weights
+                                                       : prior_weights;
+
+  StoreInternalState(vectors, internal_weights, aspect_ratio,
+                     next_pos->mutable_internal());
+
+  // Compute center of inliers in next frame and change in scale for inliers.
+  Vector2_f inlier_center;
+  Vector2_f inlier_extent;
+  float min_inlier_weight = 2.0f;  // Only update inlier_center if more
+                                   // inliers than specified are found.
+  ComputeInlierCenterAndExtent(vectors, inlier_weights, inlier_density,
+                               *next_pos, &min_inlier_weight, &inlier_center,
+                               &inlier_extent);
+
+  // Determine difference to previous estimate.
+  Vector2_f prev_inlier_center(InlierCenter(curr_pos));
+  const float rel_inlier_center_diff =
+      (inlier_center - prev_inlier_center).Norm() /
+      MotionBoxSize(curr_pos).Norm();
+
+  // Smooth with previous location, based on relative inlier difference.
+  // A difference of 1.0 is mapped to a weight of 1.0 (total outlier).
+  // Blend weight is maxed out at .6f to always allow measurements to propagate
+  // over time (assuming high motion discrimination).
+  const float center_blend =
+      std::min(Lerp(0.95f, 0.6f, background_discrimination),
+               rel_inlier_center_diff) *
+      curr_pos.prior_weight();
+  inlier_center = Lerp(inlier_center, prev_inlier_center, center_blend);
+
+  next_pos->set_inlier_center_x(inlier_center.x());
+  next_pos->set_inlier_center_y(inlier_center.y());
+
+  // Update extent only when sufficient inliers are present.
+  // TODO: This is too hacky, evaluate.
+  if (min_inlier_weight > 30) {
+    Vector2_f prev_inlier_extent(curr_pos.inlier_width(),
+                                 curr_pos.inlier_height());
+    // Blend with previous extent based on prior and discrimination.
+    inlier_extent = Lerp(
+        inlier_extent, prev_inlier_extent,
+        curr_pos.prior_weight() * Lerp(1.0, 0.85, background_discrimination));
+    next_pos->set_inlier_width(inlier_extent.x());
+    next_pos->set_inlier_height(inlier_extent.y());
+  }
+
+  VLOG(1) << "Inlier extent " << next_pos->inlier_width() << " , "
+          << next_pos->inlier_height();
+
+  // Spring force applied to the inlier_center is modulated by the background
+  // discrimination. Motivation: Low background discrimination leads to inlier
+  // center more biased towards previous result due to update weight being
+  // tampered down. Always apply a minimum force.
+  // TODO: During challenging (low inlier) situations this can
+  // save the lock onto objects. Cook up a condition to set min spring force to
+  // 0.25 or so.
+  constexpr float kMinSpringForceFraction = 0.0;
+  ApplySpringForce(inlier_center, options_.inlier_center_relative_distance(),
+                   std::min(1.0f, options_.inlier_spring_force() *
+                                      std::max(kMinSpringForceFraction,
+                                               background_discrimination)),
+                   next_pos);
+
+  if (options_.compute_spatial_prior()) {
+    // Blend based on object multiplier using high prior weight for low
+    // multipliers.
+    // Magic update numbers, prior is not important for tracking, only for
+    // visualization purposes.
+    const float prior_weight = Lerp(0.98, 0.85, background_discrimination);
+    ComputeSpatialPrior(true, true, prior_weight, next_pos);
+  }
+
+  // Update velocity.
+  float velocity_update_weight =
+      is_perfect_fit
+          ? 0.0f
+          : (options_.velocity_update_weight() * curr_pos.prior_weight());
+  // Computed object motion is completely random when background model is
+  // invalid. Use previous motion in this case.
+  if (!valid_background_model) {
+    velocity_update_weight = 1.0f;
+  }
+
+  next_pos->set_dx(
+      Lerp(object_translation.x(), curr_pos.dx(), velocity_update_weight));
+  next_pos->set_dy(
+      Lerp(object_translation.y(), curr_pos.dy(), velocity_update_weight));
+
+  // Update prior.
+  if (valid_background_model) {
+    next_pos->set_prior_weight(std::min(
+        1.0f, curr_pos.prior_weight() + options_.prior_weight_increase()));
+  } else {
+    next_pos->set_prior_weight(std::max(
+        0.0f, curr_pos.prior_weight() - options_.prior_weight_increase()));
+  }
+
+  next_pos->set_track_status(MotionBoxState::BOX_TRACKED);
+}
+
+void MotionVectorFrameFromTrackingData(const TrackingData& tracking_data,
+                                       MotionVectorFrame* motion_vector_frame) {
+  CHECK(motion_vector_frame != nullptr);
+
+  const auto& motion_data = tracking_data.motion_data();
+  float aspect_ratio = tracking_data.frame_aspect();
+  if (aspect_ratio < 0.1 || aspect_ratio > 10.0f) {
+    LOG(ERROR) << "Aspect ratio : " << aspect_ratio << " is out of bounds. "
+               << "Resetting to 1.0.";
+    aspect_ratio = 1.0f;
+  }
+
+  float scale_x, scale_y;
+  // Normalize longest dimension to 1 under aspect ratio preserving scaling.
+  ScaleFromAspect(aspect_ratio, false, &scale_x, &scale_y);
+
+  scale_x /= tracking_data.domain_width();
+  scale_y /= tracking_data.domain_height();
+
+  const bool use_background_model =
+      !(tracking_data.frame_flags() & TrackingData::FLAG_BACKGROUND_UNSTABLE);
+
+  Homography homog_scale = HomographyAdapter::Embed(
+      AffineAdapter::FromArgs(0, 0, scale_x, 0, 0, scale_y));
+
+  Homography inv_homog_scale = HomographyAdapter::Embed(
+      AffineAdapter::FromArgs(0, 0, 1.0f / scale_x, 0, 0, 1.0f / scale_y));
+
+  // Might be just the identity if not set.
+  const Homography background_model = tracking_data.background_model();
+  const Homography background_model_scaled =
+      ModelCompose3(homog_scale, background_model, inv_homog_scale);
+
+  motion_vector_frame->background_model.CopyFrom(background_model_scaled);
+  motion_vector_frame->valid_background_model = use_background_model;
+  motion_vector_frame->is_duplicated =
+      tracking_data.frame_flags() & TrackingData::FLAG_DUPLICATED;
+  motion_vector_frame->is_chunk_boundary =
+      tracking_data.frame_flags() & TrackingData::FLAG_CHUNK_BOUNDARY;
+  motion_vector_frame->aspect_ratio = tracking_data.frame_aspect();
+  motion_vector_frame->motion_vectors.reserve(motion_data.row_indices_size());
+
+  motion_vector_frame->motion_vectors.clear();
+  const bool long_tracks = motion_data.track_id_size() > 0;
+
+  for (int c = 0; c < motion_data.col_starts_size() - 1; ++c) {
+    const float x = c;
+    const float scaled_x = x * scale_x;
+
+    for (int r = motion_data.col_starts(c),
+             r_end = motion_data.col_starts(c + 1);
+         r < r_end; ++r) {
+      MotionVector motion_vector;
+
+      const float y = motion_data.row_indices(r);
+      const float scaled_y = y * scale_y;
+
+      const float dx = motion_data.vector_data(2 * r);
+      const float dy = motion_data.vector_data(2 * r + 1);
+
+      if (use_background_model) {
+        Vector2_f loc(x, y);
+        Vector2_f background_motion =
+            HomographyAdapter::TransformPoint(background_model, loc) - loc;
+        motion_vector.background = Vector2_f(background_motion.x() * scale_x,
+                                             background_motion.y() * scale_y);
+      }
+
+      motion_vector.pos = Vector2_f(scaled_x, scaled_y);
+      motion_vector.object = Vector2_f(dx * scale_x, dy * scale_y);
+
+      if (long_tracks) {
+        motion_vector.track_id = motion_data.track_id(r);
+      }
+      motion_vector_frame->motion_vectors.push_back(motion_vector);
+    }
+  }
+}
+
+void FeatureAndDescriptorFromTrackingData(
+    const TrackingData& tracking_data, std::vector<Vector2_f>* features,
+    std::vector<std::string>* descriptors) {
+  const auto& motion_data = tracking_data.motion_data();
+  float aspect_ratio = tracking_data.frame_aspect();
+  if (aspect_ratio < 0.1 || aspect_ratio > 10.0f) {
+    LOG(ERROR) << "Aspect ratio : " << aspect_ratio << " is out of bounds. "
+               << "Resetting to 1.0.";
+    aspect_ratio = 1.0f;
+  }
+
+  if (motion_data.feature_descriptors_size() == 0) {
+    LOG(WARNING) << "Feature descriptors not exist";
+    return;
+  }
+
+  float scale_x, scale_y;
+  // Normalize longest dimension to 1 under aspect ratio preserving scaling.
+  ScaleFromAspect(aspect_ratio, false, &scale_x, &scale_y);
+  scale_x /= tracking_data.domain_width();
+  scale_y /= tracking_data.domain_height();
+
+  features->clear();
+  descriptors->clear();
+
+  for (int c = 0; c < motion_data.col_starts_size() - 1; ++c) {
+    const float x = c;
+    const float scaled_x = x * scale_x;
+
+    for (int r = motion_data.col_starts(c),
+             r_end = motion_data.col_starts(c + 1);
+         r < r_end; ++r) {
+      const std::string& descriptor = motion_data.feature_descriptors(r).data();
+
+      if (absl::c_all_of(descriptor, [](char c) { return c == 0; })) {
+        continue;
+      }
+
+      const float y = motion_data.row_indices(r);
+      const float scaled_y = y * scale_y;
+
+      features->emplace_back(scaled_x, scaled_y);
+      descriptors->emplace_back(descriptor);
+    }
+  }
+}
+
+void InvertMotionVectorFrame(const MotionVectorFrame& input,
+                             MotionVectorFrame* output) {
+  CHECK(output != nullptr);
+
+  output->background_model.CopyFrom(ModelInvert(input.background_model));
+  output->valid_background_model = input.valid_background_model;
+  output->is_duplicated = input.is_duplicated;
+  output->is_chunk_boundary = input.is_chunk_boundary;
+  output->duration_ms = input.duration_ms;
+  output->aspect_ratio = input.aspect_ratio;
+  output->motion_vectors.clear();
+  output->motion_vectors.reserve(input.motion_vectors.size());
+
+  const float aspect_ratio = input.aspect_ratio;
+  float domain_x = 1.0f;
+  float domain_y = 1.0f;
+  ScaleFromAspect(aspect_ratio, false, &domain_x, &domain_y);
+
+  // Explicit copy.
+  for (auto motion_vec : input.motion_vectors) {
+    motion_vec.background *= -1.0f;
+    motion_vec.object *= -1.0f;
+
+    motion_vec.pos -= motion_vec.background + motion_vec.object;
+
+    // Inverted vector might be out of bound.
+    if (motion_vec.pos.x() < 0.0f || motion_vec.pos.x() > domain_x ||
+        motion_vec.pos.y() < 0.0f || motion_vec.pos.y() > domain_y) {
+      continue;
+    }
+
+    // Approximately 40 - 60% of all inserts happen to be at the end.
+    if (output->motion_vectors.empty() ||
+        MotionVectorComparator()(output->motion_vectors.back(), motion_vec)) {
+      output->motion_vectors.push_back(motion_vec);
+    } else {
+      output->motion_vectors.insert(
+          std::lower_bound(output->motion_vectors.begin(),
+                           output->motion_vectors.end(), motion_vec,
+                           MotionVectorComparator()),
+          motion_vec);
+    }
+  }
+}
+
+float TrackingDataDurationMs(const TrackingDataChunk::Item& item) {
+  return (item.timestamp_usec() - item.prev_timestamp_usec()) * 1e-3f;
+}
+
+void GetFeatureIndicesWithinBox(const std::vector<Vector2_f>& features,
+                                const MotionBoxState& box_state,
+                                const Vector2_f& box_scaling,
+                                float max_enlarge_size, int min_num_features,
+                                std::vector<int>* inlier_indices) {
+  CHECK(inlier_indices);
+  inlier_indices->clear();
+
+  if (features.empty()) return;
+  std::array<Vector3_f, 4> box_lines;
+  if (!MotionBoxLines(box_state, box_scaling, &box_lines)) {
+    LOG(ERROR) << "Error in computing MotionBoxLines.";
+    return;
+  }
+
+  // If the box size isn't big enough to cover sufficient features to
+  // reacquire the box, the following code will try to iteratively enlarge the
+  // box size by half of 'max_enlarge_size' to include more features, but
+  // maximimum twice.
+  float distance_threshold = 0.0f;
+  int inliers_count = 0;
+  std::vector<bool> chosen(features.size(), false);
+  std::vector<float> signed_distance(features.size());
+
+  for (int j = 0; j < features.size(); ++j) {
+    float max_dist = std::numeric_limits<float>::lowest();
+    for (const Vector3_f& line : box_lines) {
+      float dist =
+          line.DotProd(Vector3_f(features[j].x(), features[j].y(), 1.0f));
+      if (dist > max_enlarge_size) {
+        max_dist = dist;
+        break;
+      }
+      max_dist = std::max(dist, max_dist);
+    }
+
+    signed_distance[j] = max_dist;
+    if (signed_distance[j] < distance_threshold) {
+      ++inliers_count;
+      chosen[j] = true;
+      inlier_indices->push_back(j);
+    }
+  }
+
+  const float box_enlarge_step = max_enlarge_size * 0.5f;
+  while (inliers_count < min_num_features) {
+    distance_threshold += box_enlarge_step;
+    if (distance_threshold > max_enlarge_size) break;
+    for (int j = 0; j < features.size(); ++j) {
+      if (chosen[j]) continue;
+      if (signed_distance[j] < distance_threshold) {
+        ++inliers_count;
+        chosen[j] = true;
+        inlier_indices->push_back(j);
+      }
+    }
+  }
+}
+
+}  // namespace mediapipe.
diff --git a/mediapipe/util/tracking/tracking.h b/mediapipe/util/tracking/tracking.h
new file mode 100644
index 000000000..782eae47f
--- /dev/null
+++ b/mediapipe/util/tracking/tracking.h
@@ -0,0 +1,643 @@
+// Copyright 2019 The MediaPipe Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef MEDIAPIPE_UTIL_TRACKING_TRACKING_H_
+#define MEDIAPIPE_UTIL_TRACKING_TRACKING_H_
+
+// Performs tracking via rectangular regions (MotionBoxes) from pre-initialized
+// positions, using metadata from tracked features (TrackingData converted to
+// MotionVectorFrames), forward and backward in time.
+
+#include <deque>
+#include <tuple>
+#include <unordered_map>
+#include <unordered_set>
+#include <vector>
+
+#include "mediapipe/framework/port/vector.h"
+#include "mediapipe/util/tracking/flow_packager.pb.h"
+#include "mediapipe/util/tracking/motion_models.h"
+#include "mediapipe/util/tracking/motion_models.pb.h"
+#include "mediapipe/util/tracking/tracking.pb.h"
+
+namespace mediapipe {
+
+// Useful helper functions.
+//
+// Clamps values to be within interval [left, right].
+inline float Clamp(float value, float left, float right) {
+  return value < left ? left : (value > right ? right : value);
+}
+
+// Standard linear interpolation function.
+template <class T>
+T Lerp(T a, T b, float alpha) {
+  return static_cast<T>(a * (1.0f - alpha) + b * alpha);
+}
+
+// Approximates sigmoid function with a linear ramp, mapping
+// x <= lhs to 0, x >= rhs to 1 (for lhs < rhs) linear in between interval
+// [lhs, rhs]. If lhs > rhs, roles are reversed.
+inline float LinearRamp(float value, float lhs, float rhs) {
+  return Clamp((value - lhs) / (rhs - lhs), 0, 1);
+}
+
+inline Vector2_f MotionBoxPosition(const MotionBoxState& state) {
+  return Vector2_f(state.pos_x(), state.pos_y());
+}
+
+inline void SetMotionBoxPosition(const Vector2_f& pos, MotionBoxState* state) {
+  state->set_pos_x(pos.x());
+  state->set_pos_y(pos.y());
+}
+
+// TODO: this needs to be changed for quad
+inline Vector2_f MotionBoxSize(const MotionBoxState& state) {
+  return Vector2_f(state.width(), state.height());
+}
+
+inline void SetMotionBoxSize(const Vector2_f& size, MotionBoxState* state) {
+  state->set_width(size.x());
+  state->set_height(size.y());
+}
+
+inline Vector2_f MotionBoxCenter(const MotionBoxState& state) {
+  return MotionBoxPosition(state) + 0.5f * MotionBoxSize(state);
+}
+
+inline Vector2_f InlierCenter(const MotionBoxState& state) {
+  return Vector2_f(state.inlier_center_x(), state.inlier_center_y());
+}
+
+inline Vector2_f MotionBoxVelocity(const MotionBoxState& state) {
+  return Vector2_f(state.dx(), state.dy());
+}
+
+inline void SetMotionBoxVelocity(const Vector2_f& velo, MotionBoxState* state) {
+  state->set_dx(velo.x());
+  state->set_dy(velo.y());
+}
+
+// Derive normalization factor from image aspect ratio so that the scale for the
+// longer edge is 1. scale will be reversed if `invert` is true.
+void ScaleFromAspect(float aspect, bool invert, float* scale_x, float* scale_y);
+
+// Returns 4 corners of the MotionBox as top_left, bottom_left, bottom_right
+// and top_right.  Applies 2D scaling prior to rotation, which is necessary to
+// preserve orthogonality of the rotation if the scaling is not isotropic.
+std::array<Vector2_f, 4> MotionBoxCorners(
+    const MotionBoxState& state,
+    const Vector2_f& scaling = Vector2_f(1.0f, 1.0f));
+
+// Computes corresponding line equations for MotionBoxCorners.
+// Output line equations on 4 sides.
+// Returns true if box is normal, false if we encounter abnormal box which
+// leads to numerical problems.
+// Applies 2D scaling prior to rotation, which is necessary to
+// preserve orthogonality of the rotation if the scaling is not isotropic.
+bool MotionBoxLines(const MotionBoxState& state, const Vector2_f& scaling,
+                    std::array<Vector3_f, 4>* box_lines);
+
+// Returns top-left and bottom right corner of the bounding box
+// of the MotionBoxState.
+void MotionBoxBoundingBox(const MotionBoxState& state, Vector2_f* top_left,
+                          Vector2_f* bottom_right);
+
+// Adds all inliers from state to the inlier map (as id, score) tuple.
+// If id already exist, score is updated to be the maximum of current and
+// existing score.
+inline void MotionBoxInliers(const MotionBoxState& state,
+                             std::unordered_map<int, int>* inliers) {
+  CHECK(inliers);
+  const int num_inliers = state.inlier_ids_size();
+  DCHECK_EQ(num_inliers, state.inlier_length_size());
+
+  for (int k = 0; k < num_inliers; ++k) {
+    (*inliers)[state.inlier_ids(k)] =
+        std::max<int>((*inliers)[state.inlier_ids(k)], state.inlier_length(k));
+  }
+}
+
+// Adds all outliers from state to the outlier map.
+inline void MotionBoxOutliers(const MotionBoxState& state,
+                              std::unordered_set<int>* outliers) {
+  for (int id : state.outlier_ids()) {
+    outliers->insert(id);
+  }
+}
+
+// Returns inlier locations from state (normalized in [0, 1] domain).
+void MotionBoxInlierLocations(const MotionBoxState& state,
+                              std::vector<Vector2_f>* inlier_pos);
+// Same for outlier positions.
+void MotionBoxOutlierLocations(const MotionBoxState& state,
+                               std::vector<Vector2_f>* outlier_pos);
+
+// Get corners of rotated rectangle. Note that the quad component in
+// MotionBoxState is not used in this function. Only the rotated rectangle
+// is used.
+// Inputs:
+//   -- state: the MotionBoxState where we extract the rotated rectangle
+//   -- scaling: additional scaling we apply on x and y axis
+// Output:
+//   corners in counter-clockwise order
+std::array<Vector2_f, 4> GetCornersOfRotatedRect(const MotionBoxState& state,
+                                                 const Vector2_f& scaling);
+
+// Use the position, width, and height in MotionBoxState to initialize
+// the quad. Only use it when you want to get homography for tracking.
+void InitializeQuadInMotionBoxState(MotionBoxState* state);
+
+// Initializes inliers and outliers related fields in MotionBoxState from
+// TrackingData. The box or quad position will be read from `state` so they need
+// to be set beforehand.
+void InitializeInliersOutliersInMotionBoxState(const TrackingData& tracking,
+                                               MotionBoxState* state);
+
+// Initializes pnp_homography field in MotionBoxState using perspective
+// transform between a physical rectangle with specified aspect ratio and a
+// screen quad.
+void InitializePnpHomographyInMotionBoxState(
+    const TrackingData& tracking, const TrackStepOptions& track_step_options,
+    MotionBoxState* state);
+
+// Represents the motion of a feature at pos between frames, differentiating
+// object from background motion (supplied via a MotionVectorFrame).
+struct MotionVector {
+  MotionVector() : pos(0, 0), background(0, 0), object(0, 0) {}
+
+  MotionVector(const Vector2_f& pos_, const Vector2_f& background_,
+               const Vector2_f& object_)
+      : pos(pos_), background(background_), object(object_) {}
+
+  Vector2_f Location() const { return pos; }
+  Vector2_f MatchLocation() const { return pos + background + object; }
+  Vector2_f Motion() const { return background + object; }
+
+  // Position of the feature in normalized domain [0, 1].
+  Vector2_f pos;
+  // Motion due to background (i.e. camera motion).
+  Vector2_f background;
+  // Motion due to foreground (i.e. object motion in addition to background).
+  // If feature belong to background, object motion is nearly zero.
+  Vector2_f object;
+
+  int track_id = -1;
+
+  // Returns the MotionVector stored in the internal state at index.
+  static MotionVector FromInternalState(const MotionBoxInternalState& internal,
+                                        int index);
+};
+
+constexpr float kTrackingDefaultFps = 30.0;
+
+// Holds motion vectors and background model for each frame.
+// Note: Specified in the aspect preserving domain under uniform scaling,
+// longest dimension normalized to 1, i.e. if aspect_ratio >= 1, width is
+// normalized to 1 otherwise height is normalized to 1.
+struct MotionVectorFrame {
+  std::vector<MotionVector> motion_vectors;
+
+  Homography background_model;
+  bool valid_background_model = true;
+  bool is_duplicated = false;      // Set if frame is duplicated w.r.t.
+                                   // previous one.
+  bool is_chunk_boundary = false;  // Set if this is the first frame in a chunk.
+
+  float duration_ms = 1000.0f / kTrackingDefaultFps;
+
+  // Aspect ratio (w/h) of the original frame.
+  float aspect_ratio = 1.0f;
+};
+
+// Transforms TrackingData to MotionVectorFrame, ready to be used by tracking
+// algorithm (so the MotionVectorFrame data is denormalized).
+void MotionVectorFrameFromTrackingData(const TrackingData& tracking_data,
+                                       MotionVectorFrame* motion_vector_frame);
+
+// Transform TrackingData to feature positions and descriptors, ready to be used
+// by detection (re-acquisition) algorithm (so the "features" is denomalized).
+// Descriptors with all 0s will be discarded.
+void FeatureAndDescriptorFromTrackingData(
+    const TrackingData& tracking_data, std::vector<Vector2_f>* features,
+    std::vector<std::string>* descriptors);
+
+// Inverts MotionVectorFrame (by default defined as motion from current to
+// previous frame) to hold motion from previous to current frame.
+void InvertMotionVectorFrame(const MotionVectorFrame& input,
+                             MotionVectorFrame* output);
+
+// Returns duration in ms for this chunk item.
+float TrackingDataDurationMs(const TrackingDataChunk::Item& item);
+
+// Returns feature indices that are within the given box. If the box size isn't
+// big enough to cover sufficient features (i.e., min_num_features), this will
+// iteratively enlarge the box size (up to max_enlarge_size) to include more
+// features. The argument box_scaling is used in MotionBoxLines() to get
+// properly scaled box corners. Note: box_scaling and max_enlarge_size need to
+// be in normalized image space.
+// TODO: Add unit test.
+void GetFeatureIndicesWithinBox(const std::vector<Vector2_f>& features,
+                                const MotionBoxState& box_state,
+                                const Vector2_f& box_scaling,
+                                float max_enlarge_size, int min_num_features,
+                                std::vector<int>* inlier_indices);
+
+// Represents a moving box over time. Initial position is supplied via
+// ResetAtFrame, and subsequent positions for previous and next frames are
+// determined via tracking by TrackStep method.
+// Example usage:
+// // Assuming metadata is available: vector<MotionVectorFrame> mvf;
+// MotionBoxState box_state;
+// // Set to center 20%.
+// box_state.set_pos_x(0.4);
+// box_state.set_pos_y(0.4);
+// box_state.set_width(0.2);
+// box_state.set_height(0.2);
+//
+// // Initialize first position at frame 5.
+// MotionBox motion_box(TrackStepOptions());
+// motion_box.ResetAtFrame(4, box_state);
+// // Track 4 frames backward and forward in time.
+// for (int i = 0; i < 4; ++i) {
+//   // Tracking steps need to be called contiguously, as otherwise no
+//   // prior location for the track is present and TrackStep will fail.
+//   // Backward.
+//   motion_box.TrackStep(4 - i, mvf[4 -i], false, nullptr);
+//   // Get position -> consume for display, etc.
+//   motion_box.StateAtFrame(4 - i);
+//
+//   // Forward.
+//   motion_box.TrackStep(4 + i, mvf[4 -i], true, nullptr);
+//   // Get position -> consume.
+//   motion_box.StateAtFrame(4 + i);
+// }
+class MotionBox {
+ public:
+  explicit MotionBox(const TrackStepOptions& track_step_options)
+      : options_(track_step_options) {}
+
+  MotionBox() = default;
+
+  // Sets and overwrites MotionBoxState at specified frame. Use to supply
+  // initial position.
+  void ResetAtFrame(int frame, const MotionBoxState& state);
+
+  // Tracks MotionBox from state at from_frame either forward or backward in
+  // time, based on the passed MotionVectorFrame. (MotionVectorFrame has to
+  // correspond to requested tracking direction, this is not checked against).
+  // Returns true if tracking was successful.
+  // Note: It is assumed that from_frame already has a valid location, either
+  // via ResetAtFrame or previous successful execution of TrackStep. That is
+  // TrackStep needs to be called contiguously from a initialized position
+  // via ResetFrame. Otherwise no prior location for the track is present (at
+  // from_frame) and TrackStep will fail (return false).
+  bool TrackStep(int from_frame, const MotionVectorFrame& motion_vectors,
+                 bool forward);
+
+  MotionBoxState StateAtFrame(int frame) const {
+    if (frame < queue_start_ ||
+        frame >= queue_start_ + static_cast<int>(states_.size())) {
+      LOG(ERROR) << "Requesting state at unknown frame " << frame
+                 << ". Returning UNTRACKED.";
+      MotionBoxState invalid;
+      invalid.set_track_status(MotionBoxState::BOX_UNTRACKED);
+      return invalid;
+    } else {
+      MotionBoxState result = states_[frame - queue_start_];
+      if (!options_.return_internal_state()) {
+        result.clear_internal();
+      }
+      return result;
+    }
+  }
+
+  MotionBoxState* MutableStateAtFrame(int frame) {
+    if (frame < queue_start_ || frame >= queue_start_ + states_.size()) {
+      return NULL;
+    } else {
+      return &states_[frame - queue_start_];
+    }
+  }
+
+  bool TrackableFromFrame(int frame) const {
+    return StateAtFrame(frame).track_status() >= MotionBoxState::BOX_TRACKED;
+  }
+
+  void set_start_track(int frame) { start_track_ = frame; }
+  int start_track() const { return start_track_; }
+  void set_end_track(int frame) { end_track_ = frame; }
+  int end_track() const { return end_track_; }
+
+  void TrimFront(const int cache_size) {
+    int trim_count = states_.size() - cache_size;
+    if (trim_count > 0) {
+      queue_start_ += trim_count;
+      while (trim_count-- > 0) {
+        states_.pop_front();
+      }
+    }
+  }
+
+  void TrimBack(const int cache_size) {
+    int trim_count = states_.size() - cache_size;
+    if (trim_count > 0) {
+      while (trim_count-- > 0) {
+        states_.pop_back();
+      }
+    }
+  }
+
+  // If this variable is set to true, then TrackStep would print warning
+  // messages when tracking is failed.
+  // Default value is true and is set in tracking.cc.
+  static bool print_motion_box_warnings_;
+
+ private:
+  // Determines next position from curr_pos based on tracking data in
+  // motion_vectors. Also receives history of the last N positions.
+  void TrackStepImplDeNormalized(
+      int frome_frame, const MotionBoxState& curr_pos,
+      const MotionVectorFrame& motion_vectors,
+      const std::vector<const MotionBoxState*>& history,
+      MotionBoxState* next_pos) const;
+
+  // Pre-normalization wrapper for above function. De-normalizes domain
+  // to aspect preserving domain and velocity to current frame period.
+  void TrackStepImpl(int from_frame, const MotionBoxState& curr_pos,
+                     const MotionVectorFrame& motion_frame,
+                     const std::vector<const MotionBoxState*>& history,
+                     MotionBoxState* next_pos) const;
+
+  // Implementation functions for above TrackStepImpl.
+  // Returns bounding box for start position and the expansion magnitude
+  // (normalized) that was applied.
+  void GetStartPosition(const MotionBoxState& curr_pos, float aspect_ratio,
+                        float* expand_mag, Vector2_f* top_left,
+                        Vector2_f* bottom_right) const;
+
+  // Outputs spatial sigma in x and y for spatial weighting.
+  // Pass current box_state and inverse box domain size.
+  void GetSpatialGaussWeights(const MotionBoxState& box_state,
+                              const Vector2_f& inv_box_domain,
+                              float* spatial_gauss_x,
+                              float* spatial_gauss_y) const;
+
+  // Outputs subset of motion_vectors that are within the specified domain
+  // (top_left to bottom_right). Only searches over the range specified via
+  // start and end idx.
+  // Each vector is weighted based on gaussian proximity, similar motion,
+  // track continuity, etc. which forms the prior weight of each feature.
+  // Features are binned into a grid of fixed dimension for density analysis.
+  // Also output number of vectors with good prior weights (> 0.1), and number
+  // of continued inliers.
+  // Returns true on success, false on failure. When it returns false, the
+  // output values are not reliable.
+  bool GetVectorsAndWeights(
+      const std::vector<MotionVector>& motion_vectors, int start_idx,
+      int end_idx, const Vector2_f& top_left, const Vector2_f& bottom_right,
+      const MotionBoxState& box_state, bool valid_background_model,
+      bool is_chunk_boundary,
+      float temporal_scale,  // Scale for velocity from standard frame period.
+      float expand_mag, const std::vector<const MotionBoxState*>& history,
+      std::vector<const MotionVector*>* vectors,
+      std::vector<float>* prior_weights, int* number_of_good_prior,
+      int* number_of_cont_inliers) const;
+
+  // Initializes weights by performing multiple ransac rounds from vectors.
+  // Error is scaled by irls scale along parallel and orthogonal direction.
+  void TranslationIrlsInitialization(
+      const std::vector<const MotionVector*>& vectors,
+      const Vector2_f& irls_scale, std::vector<float>* weights) const;
+
+  // Wrapper function, estimating object motion w.r.t. various degrees
+  // of freedom.
+  void EstimateObjectMotion(
+      const std::vector<const MotionVector*>& motion_vectors,
+      const std::vector<float>& prior_weights, int num_continued_inliers,
+      const Vector2_f& irls_scale, std::vector<float>* weights,
+      Vector2_f* object_translation, LinearSimilarityModel* object_similarity,
+      Homography* object_homography) const;
+
+  // Perform IRLS estimation of the passed motion_vector's object motion.
+  // Each vector is weighted by original_weight / estimation_error, where
+  // estimation_error is refined in each estimation round.
+  // Outputs final translation and resulting irls weights (1.0 /
+  // estimation_error, i.e. with prior bias).
+  void EstimateTranslation(
+      const std::vector<const MotionVector*>& motion_vectors,
+      const std::vector<float>& orig_weights, const Vector2_f& irls_scale,
+      std::vector<float>* weights, Vector2_f* translation) const;
+
+  // Same as above for similarity. Returns false on failure (numerical
+  // instability is most common case here).
+  bool EstimateSimilarity(
+      const std::vector<const MotionVector*>& motion_vectors,
+      const std::vector<float>& orig_weights, const Vector2_f& irls_scale,
+      std::vector<float>* weights, LinearSimilarityModel* lin_sim) const;
+
+  // Same as above for homograph.
+  bool EstimateHomography(
+      const std::vector<const MotionVector*>& motion_vectors,
+      const std::vector<float>& prior_weights, const Vector2_f& irls_scale,
+      std::vector<float>* weights, Homography* object_homography) const;
+
+  // Perform 6DoF perspective transform based homography estimation using
+  // motion_vector's object + background motion.
+  // weights are used to determine whether a vector is inlier or outliers.
+  // The perspective solver will exclude those vectors with weights smaller than
+  // kMaxOutlierWeight (0.1).
+  bool EstimatePnpHomography(
+      const MotionBoxState& curr_pos,
+      const std::vector<const MotionVector*>& motion_vectors,
+      const std::vector<float>& weights, float domain_x, float domain_y,
+      Homography* pnp_homography) const;
+
+  // Apply pre-computed perspective transform based homography to the next pos.
+  void ApplyObjectMotionPerspectively(const MotionBoxState& curr_pos,
+                                      const Homography& pnp_homography,
+                                      float domain_x, float domain_y,
+                                      MotionBoxState* next_pos) const;
+
+  // Scores every vector after translation estimation into inliers and outliers
+  // (based on post_estimation_weights, inlierness is a measure in [0, 1]),
+  // and records result in next_pos as well as returning inlierness per vector
+  // in inlier_weights.
+  // Also computes the following statistics:
+  // - inlier_density: Local density for each inlier, i.e. measure of how many
+  //                   other inliers are close to that point. In [0, 1].
+  // - continued_inliers: Number of inliers that continue to be present already
+  //                      in curr_pos (same track id)
+  // - swapped_inliers: Number of inliers that are outliers in curr_pos (same
+  //                    track id).
+  // - motion_inliers: Number of inliers of similar motion as previous state.
+  //                   This measure is complementary to above continued inliers
+  //                   in case object is moving significantly, in which case
+  //                   tracks tend to be short lived.
+  // - kinetic_average: Average object norm of all inliers weighted by
+  //                    pre_estimation_weights.
+  void ScoreAndRecordInliers(const MotionBoxState& curr_pos,
+                             const std::vector<const MotionVector*>& vectors,
+                             const std::vector<Vector2_f>& grid_positions,
+                             const std::vector<float>& pre_estimation_weights,
+                             const std::vector<float>& post_estimation_weights,
+                             float background_discrimination,
+                             MotionBoxState* next_pos,
+                             std::vector<float>* inlier_weights,
+                             std::vector<float>* inlier_density,
+                             int* continued_inliers, int* swapped_inliers,
+                             float* motion_inliers,
+                             float* kinetic_average) const;
+
+  // Computes motion disparity (in [0, 1]), that is how well does the current
+  // object motion agree with the previous object motion.
+  // 0 indicates perfect match, 1 indicates signicant difference.
+  float ComputeMotionDisparity(const MotionBoxState& curr_pos,
+                               const Vector2_f& irls_scale,
+                               float continued_inliers, int num_inliers,
+                               const Vector2_f& object_motion) const;
+
+  // Computes inlier center and extent (vector positions weighted by
+  // weights and density).
+  // Sets center inlier center, if inlier_weight is above min_inlier_sum,
+  // else to the Motion box center.
+  void ComputeInlierCenterAndExtent(
+      const std::vector<const MotionVector*>& motion_vectors,
+      const std::vector<float>& weights, const std::vector<float>& density,
+      const MotionBoxState& state, float* min_inlier_sum, Vector2_f* center,
+      Vector2_f* extent) const;
+
+  float ScaleEstimate(const std::vector<const MotionVector*>& motion_vectors,
+                      const std::vector<float>& weights, float min_sum) const;
+
+  // Applies spring force from box_state's position to center_of_interest, if
+  // difference is above rel_threshold. Correcting force equals difference
+  // above threshold times the spring_force coefficient.
+  void ApplySpringForce(const Vector2_f& center_of_interest,
+                        const float rel_threshold, const float spring_force,
+                        MotionBoxState* box_state) const;
+
+  // Compute the tracking confidence and return the value.
+  // The confidence is a float value in [0, 1], with 0 being least confident,
+  // and 1 being most confident.
+  float ComputeTrackingConfidence(const MotionBoxState& motion_box_state) const;
+
+ private:
+  class ObjectMotionValidator {
+   public:
+    static bool IsValidSimilarity(
+        const LinearSimilarityModel& linear_similarity_model, float max_scale,
+        float max_rotation) {
+      SimilarityModel similarity_model =
+          LinearSimilarityAdapter::ToSimilarity(linear_similarity_model);
+
+      if (similarity_model.scale() < 1.0f / max_scale ||
+          similarity_model.scale() > max_scale ||
+          std::abs(similarity_model.rotation()) > max_rotation) {
+        return false;
+      }
+      return true;
+    }
+
+    static bool IsValidHomography(const Homography& homography, float max_scale,
+                                  float max_rotation) {
+      // Filter out abnormal homography. Otherwise the determinant of
+      // projected affine matrix will be negative.
+      if (!IsInverseStable(homography)) {
+        LOG(WARNING) << "Homography matrix is not stable.";
+        return false;
+      }
+
+      LinearSimilarityModel similarity_model =
+          LinearSimilarityAdapter::ProjectFrom(homography, 1.0f, 1.0f);
+      return IsValidSimilarity(similarity_model, max_scale, max_rotation);
+    }
+
+    // Check if it is a convex quad.
+    static bool IsValidQuad(const MotionBoxState::Quad& quad) {
+      const int kQuadVerticesSize = 8;
+      CHECK_EQ(quad.vertices_size(), kQuadVerticesSize);
+      for (int a = 0; a < kQuadVerticesSize; a += 2) {
+        int b = (a + 2) % kQuadVerticesSize;
+        int c = (a - 2 + kQuadVerticesSize) % kQuadVerticesSize;
+        Vector2_f ab(quad.vertices(b) - quad.vertices(a),
+                     quad.vertices(b + 1) - quad.vertices(a + 1));
+        Vector2_f ac(quad.vertices(c) - quad.vertices(a),
+                     quad.vertices(c + 1) - quad.vertices(a + 1));
+
+        // Since quad's vertices is defined in counter-clockwise manner, we only
+        // accept negative cross product.
+        if (ab.CrossProd(ac) >= 0) {
+          return false;
+        }
+      }
+
+      return true;
+    }
+
+    // Check if all the 4 corners of the quad are out of FOV.
+    static bool IsQuadOutOfFov(const MotionBoxState::Quad& quad,
+                               const Vector2_f& fov) {
+      const int kQuadVerticesSize = 8;
+      CHECK_EQ(quad.vertices_size(), kQuadVerticesSize);
+      bool too_far = true;
+      for (int j = 0; j < kQuadVerticesSize; j += 2) {
+        if (quad.vertices(j) < fov.x() && quad.vertices(j) > 0.0f &&
+            quad.vertices(j + 1) < fov.y() && quad.vertices(j + 1) > 0.0f) {
+          too_far = false;
+          break;
+        }
+      }
+      return too_far;
+    }
+  };
+
+  class DistanceWeightsComputer {
+   public:
+    DistanceWeightsComputer(const MotionBoxState& initial_state,
+                            const MotionBoxState& current_state,
+                            const TrackStepOptions& options);
+
+    // Compute distance weight based on input motion vector position.
+    float ComputeDistanceWeight(const MotionVector& test_vector);
+
+   private:
+    Homography ComputeHomographyFromQuad(const MotionBoxState::Quad& src_quad,
+                                         const MotionBoxState::Quad& dst_quad);
+
+    float cos_neg_a_;
+    float sin_neg_a_;
+    float spatial_gauss_x_;
+    float spatial_gauss_y_;
+    Vector2_f inv_box_domain_;
+    Vector2_f box_center_;
+    Vector2_f box_center_transformed_;
+    bool is_large_rotation_ = false;
+    Homography homography_;  // homography from current box to initial box
+    TrackStepOptions::TrackingDegrees tracking_degrees_;
+  };
+
+  TrackStepOptions options_;
+  std::deque<MotionBoxState> states_;
+  int queue_start_;
+
+  int start_track_;
+  int end_track_;
+
+  MotionBoxState initial_state_;
+};
+
+}  // namespace mediapipe.
+
+#endif  // MEDIAPIPE_UTIL_TRACKING_TRACKING_H_
diff --git a/mediapipe/util/tracking/tracking.proto b/mediapipe/util/tracking/tracking.proto
new file mode 100644
index 000000000..ff003801f
--- /dev/null
+++ b/mediapipe/util/tracking/tracking.proto
@@ -0,0 +1,415 @@
+// Copyright 2019 The MediaPipe Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+syntax = "proto2";
+
+package mediapipe;
+
+import "mediapipe/util/tracking/motion_models.proto";
+
+// Next tag: 38
+message MotionBoxState {
+  // Position (top-left corner) and fixed size of the current MotionBox,
+  // specified w.r.t. normalized domain (in [0, 1] along both dimensions).
+  optional float pos_x = 1;
+  optional float pos_y = 2;
+  optional float width = 3;
+  optional float height = 4;
+
+  // Optional degrees of freedom; scale and rotation w.r.t. center of the box,
+  // i.e. [pos_x, pos_y] + 0.5 * [width, height].
+  // To activate see TrackStepOptions::TrackingDegrees.
+  optional float scale = 5 [default = 1.0];
+  optional float rotation = 30 [default = 0.0];  // in radians.
+
+  message Quad {
+    // Vertex 0 is according to x_0 = vertices(0), y_0 = vertices(1)
+    // Vertex 1 is according to x_1 = vertices(2), y_1 = vertices(3)
+    // Vertex 2 is according to x_2 = vertices(4), y_2 = vertices(5)
+    // Vertex 3 is according to x_3 = vertices(6), y_3 = vertices(7)
+    // Order of vertices should be aligned in counter-clockwise manner
+    //          0---------3
+    //          |         |
+    //          |         |
+    //          1---------2
+    repeated float vertices = 1;
+  }
+
+  // This field is only used when we try to track under
+  // TRACKING_DEGREE_OBJECT_PERSPECTIVE.
+  optional Quad quad = 34;
+
+  // Aspect ratio (width / height) for the tracked rectangle in physical space.
+  optional float aspect_ratio = 35;
+
+  // Whether we want this box to be potentially grouped with other boxes
+  // to track together. This is useful for tracking small boxes that lie
+  // on a plane. For example, when we detect a plane,
+  // track the plane, then all boxes within the plane can share the same
+  // homography transform.
+  optional bool request_grouping = 37 [default = false];
+
+  // For quad tracking using pnp solver,
+  // Whether we use perspective-n-points to track quad between frames.
+  // That mode requires:
+  //   1. The quad which is being tracked is an rectangle in the physical world.
+  //   2. The `asepct_ratio` field has to be set in MotionBoxState.
+  optional Homography pnp_homography = 36;
+
+  // Object velocity in x and y, specified as normalized spatial unit per
+  // standard frame period (here calibrated w.r.t. kTrackingDefaultFps = 30
+  // FPS), that is 33.3 ms. Object velocity refers to velocity after
+  // subtracting camera motion.
+  // If current frame period is 66.67 ms (i.e. 15 fps); actual velocity is
+  // obtained by multipling with a factor of 2. Similar for 60 fps factor
+  // is 0.5f.
+  // Standard frame period is chosen for legacy reasons to keep TrackStepOptions
+  // defaults.
+  optional float dx = 7;
+  optional float dy = 8;
+
+  // Weighted average of object velocity magnitude of inlier points (expressed
+  // in normalized spatial units per standard frame period).
+  optional float kinetic_energy = 17;
+
+  // Specifies how valid the prior was in the last step.
+  optional float prior_weight = 9;
+
+  // Tracking status indicating result of tracking:
+  //   UNTRACKED: Box can not be tracked
+  //              (either out of bound or too many tracking failures).
+  //   EMPTY: Box has size of <= 0 along at least on of its dimensions
+  //          (collapsed).
+  //   NO_FEATURES: No features found within the box, tracking is not possible.
+  //   TRACKED: Successful tracking.
+  //   DUPLICATED: Successful tracked, but duplicated from previous result as
+  //   frame was duplicated.
+  //   BOX_TRACKED_OUT_OF_BOUND: Successful tracked, out of bound from screen
+  //   area. Will advance by camera motion. Only used for static objects.
+  enum TrackStatus {
+    BOX_UNTRACKED = 0;
+    BOX_EMPTY = 1;
+    BOX_NO_FEATURES = 2;
+    BOX_TRACKED = 3;
+    BOX_DUPLICATED = 4;
+    BOX_TRACKED_OUT_OF_BOUND = 5;
+  }
+
+  optional TrackStatus track_status = 10 [default = BOX_UNTRACKED];
+
+  // Spatial prior (presence of inliers, i.e. where is the object located within
+  // the box that is currently being tracked) as a pair of
+  // a) prior (in [0, 1]) and
+  // b) confidence (number of features converted to score within
+  // [0, 1]).
+  // Prior is defined over a grid of size spatial_prior_grid_size x
+  // spatial_prior_grid_size.
+  optional int32 spatial_prior_grid_size = 11 [default = 10];
+  repeated float spatial_prior = 12 [packed = true];
+  repeated float spatial_confidence = 13 [packed = true];
+
+  // Difference score between previous prior and current prior (in [0, 1]).
+  // Currently not used.
+  optional float prior_diff = 14;
+
+  // Score determining how much predicted motion disagrees with measured motion.
+  // If measured motion deviates strongly from predicted motion, disparity is
+  // +/-1, if motion agrees with predicted motion, disparity is 0.
+  // Sign indicates measured motion is accelerating (> 0)
+  // or de-accelerating (< 0) w.r.t. predicted motion.
+  optional float motion_disparity = 15;
+
+  // Score determining how discriminative estimated motion model is.
+  // In [0, 1] where 0 no discrimination w.r.t. background and 1
+  // high discrimination.
+  optional float background_discrimination = 16;
+
+  // Center of mass for inliers after tracking (center of feature that were used
+  // for motion estimation)
+  optional float inlier_center_x = 18;
+  optional float inlier_center_y = 19;
+
+  // Approximate number of inliers (each features scores a zero [outlier]
+  // or one [inlier]).
+  optional float inlier_sum = 24;
+
+  // Ratio of above inlier_sum to average inlier_sum across last states.
+  optional float inlier_ratio = 25;
+
+  // Extent (width and height of inliers).
+  optional float inlier_width = 22;
+  optional float inlier_height = 23;
+
+  // Set of current inlier tracking ids.
+  repeated uint32 inlier_ids = 26 [packed = true];
+  // Corresponding x,y coordinates for each inlier.
+  repeated uint32 inlier_id_match_pos = 31 [packed = true];
+  // Corresponding inlier score (currently: length of inlier observed).
+  repeated uint32 inlier_length = 27 [packed = true];
+
+  // Set of outlier ids.
+  repeated uint32 outlier_ids = 28 [packed = true];
+  // Corresponding x,y coordinates for each outlier.
+  repeated uint32 outlier_id_match_pos = 32 [packed = true];
+
+  // Confidence of box tracked in the range [0, 1], with 0 being least
+  // confident, and 1 being most confident. A reasonable threshold is 0.5
+  // to filter out unconfident boxes.
+  optional float tracking_confidence = 33;
+
+  // Additional internal state.
+  optional MotionBoxInternalState internal = 29;
+
+  reserved 20, 21;
+}
+
+// Captures additional internal state info about the tracking.
+message MotionBoxInternalState {
+  // Stores all motion vectors that were used for tracking
+  // as packed arrays, capturing position, object motion, camera motion,
+  // tracking id and corresponding inlier weight.
+  repeated float pos_x = 1 [packed = true];
+  repeated float pos_y = 2 [packed = true];
+  repeated float dx = 3 [packed = true];
+  repeated float dy = 4 [packed = true];
+  repeated float camera_dx = 5 [packed = true];
+  repeated float camera_dy = 6 [packed = true];
+  repeated int32 track_id = 7 [packed = true];
+
+  // Within [0, 1]. 0 = outlier; 1 = inlier.
+  repeated float inlier_score = 8 [packed = true];
+}
+
+// Next tag: 42
+message TrackStepOptions {
+  // Degrees of freedom being used for tracking. By default tracker only uses
+  // translation. Additionally scale and rotation from the camera motion
+  // and / or object motion can be taken into account.
+  enum TrackingDegrees {
+    TRACKING_DEGREE_TRANSLATION = 0;
+
+    // Additional tracking degrees according to camera motion.
+    TRACKING_DEGREE_CAMERA_SCALE = 1;
+    TRACKING_DEGREE_CAMERA_ROTATION = 2;
+    TRACKING_DEGREE_CAMERA_ROTATION_SCALE = 3;
+    // TODO: Implement!
+    TRACKING_DEGREE_CAMERA_PERSPECTIVE = 4;
+
+    // Tracking degrees modeling object motion. Note that additional
+    // object degrees of freedom are only applied when estimation is deemed
+    // stable, in particular sufficient inliers are present.
+    // By default, does NOT apply camera motion. If that is desired set
+    // the flag: track_object_and_camera to true.
+    TRACKING_DEGREE_OBJECT_SCALE = 5;
+    TRACKING_DEGREE_OBJECT_ROTATION = 6;
+    TRACKING_DEGREE_OBJECT_ROTATION_SCALE = 7;
+    TRACKING_DEGREE_OBJECT_PERSPECTIVE = 8;
+  }
+
+  optional TrackingDegrees tracking_degrees = 28
+      [default = TRACKING_DEGREE_TRANSLATION];
+
+  // If set and one of the TRACKING_DEGREE_OBJECT degrees are set also applies
+  // camera motion in addition to the object motion.
+  optional bool track_object_and_camera = 32 [default = false];
+
+  // Number of iterations to iteratively estimate model and re-estimate
+  // influence of each vector.
+  optional int32 irls_iterations = 1 [default = 5];
+
+  // Gaussian spatial prior sigma relative to box size.
+  // For motivation, see this plot: http://goo.gl/BCfcy.
+  optional float spatial_sigma = 2 [default = 0.15];
+
+  // Gaussian velocity prior sigma. It is computed as the maximum of the
+  // absolute minimum sigma (in normalized domain) and the relative sigma
+  // w.r.t. previous motion.
+  optional float min_motion_sigma = 3 [default = 0.002];
+  optional float relative_motion_sigma = 4 [default = 0.3];
+
+  // Settings for motion disparity. Difference between previous and current
+  // motion magnitude is scored linearly, from motion_disparity_low_level to
+  // motion_disparity_high_level (mapped to score of 0 and 1 respectively).
+  // Motivation is to ensure acceleration between frames are within reasonable
+  // bounds.
+  // Represents a maximum acceleration of around 4 - 5 pixels per frame in 360p
+  // video to be unpenalized, with accelerations of around >= 10 pixels being
+  // considered inconsitent with prediction.
+  optional float motion_disparity_low_level = 6 [default = 8e-3];
+  optional float motion_disparity_high_level = 7 [default = 1.6e-2];
+
+  // Motion disparity decays across frames. Disparity of previous frame decays
+  // over time. If disparity in current frame is not higher, i.e. the larger
+  // of the current and decayed disparity is taken.
+  // Motivation is, that if acceleration was unreasonable high (and we likely
+  // lost tracking) we enter a stage of trying to regain tracking by looking for
+  // vectors that agree with the previous prediction.
+  optional float disparity_decay = 8 [default = 0.8];
+
+  // Object motion is given as linear combination of previous and measured
+  // motion depending on the motion_disparity (a high disparity is giving high
+  // weight to the previous motion).
+  // We enforce at least a minimum of the below motion_prior_weight regardless
+  // of the motion disparity.
+  optional float motion_prior_weight = 9 [default = 0.2];
+
+  // Settings for motion discrimination.
+  //
+  // Current motion magnitude is scored linearly,
+  // from background_discrimination_low_level to
+  // background_discrimination_high_level (mapped to score of 0 and 1
+  // respectively).
+  // Motivation is that high object motions are easy to discriminate from the
+  // background, whereas small object motions are virtually indistinguishable.
+  // Represents a range of 2 - 4 pixels for 360p video.
+  optional float background_discrimination_low_level = 10 [default = 4e-3];
+  optional float background_discrimination_high_level = 11 [default = 8e-3];
+
+  // Spring force settings. If difference between predicted center of the box in
+  // the next frame and the predicted center of the inliers deviates by more
+  // than inlier_center_relative_distance times the box [width|height]
+  // a spring force is applied to the box. The amount of force is spring_force
+  // times the difference.
+  optional float inlier_center_relative_distance = 12 [default = 0.1];
+  optional float inlier_spring_force = 13 [default = 0.3];
+
+  // Same as above, but for the center of large motion magnitudes.
+  optional float kinetic_center_relative_distance = 14 [default = 0.4];
+  optional float kinetic_spring_force = 15 [default = 0.5];
+
+  // Spring force towards large motions is only applied when kinetic energy is
+  // above the specified threshold.
+  optional float kinetic_spring_force_min_kinetic_energy = 21 [default = 3e-3];
+
+  // Bias of old velocity during update step.
+  optional float velocity_update_weight = 16 [default = 0.7];
+
+  // Maximum number of frames considered to be tracking failures ->
+  // If over threshold, box is considered untrackable.
+  optional int32 max_track_failures = 17 [default = 10];
+
+  // Domain used for tracking is always larger than the current box.
+  // If current motion is not negligible, box is expanded in the direction the
+  // motion, otherwise expanded in all directions by the amount specified below
+  // (w.r.t. normalized domain).
+  optional float expansion_size = 18 [default = 0.05];
+
+  // Features are scored based on the magnitude of their irls weights, mapped to
+  // [0, 1] using the following range. The range represents roughly 3 - 1.5
+  // pixels error for 360p video.
+  optional float inlier_low_weight = 19 [default = 250];
+  optional float inlier_high_weight = 20 [default = 500];
+
+  // Kinetic energy decays over time by the specified rate.
+  optional float kinetic_energy_decay = 22 [default = 0.98];
+
+  // Amount by which prior is increased/decreased in case of valid/invalid
+  // measurements.
+  optional float prior_weight_increase = 23 [default = 0.2];
+
+  // We map the amount of present kinetic energy linearly to the domain [0, 1]
+  // describing if an object is static (0) or moving (1).
+  optional float low_kinetic_energy = 24 [default = 1e-3];   // ~0.4 pix
+  optional float high_kinetic_energy = 25 [default = 4e-3];  // ~3 pix
+
+  // Outputs internal state to MotionBoxState.
+  optional bool return_internal_state = 26 [default = false];
+
+  // Specifies which weights are stored in the internal state. By default
+  // post-estimation weights are stored, otherwise pre-estimation weights
+  // are stored.
+  optional bool use_post_estimation_weights_for_state = 29 [default = true];
+
+  // Computes spatial grid of inliers and stores it in the MotionBoxState.
+  optional bool compute_spatial_prior = 27 [default = false];
+
+  // Irls initialization by performing several rounds of RANSAC to preselect
+  // features for motion estimation scoring outliers low and inliers to be at
+  // least of median inlier weight.
+  message IrlsInitialization {
+    optional bool activated = 1 [default = false];
+
+    // Rounds of RANSAC.
+    optional int32 rounds = 2 [default = 50];
+
+    // Normalized cutoff threshold for a vector to be considered an inlier.
+    optional float cutoff = 3 [default = 0.005];
+  }
+  optional IrlsInitialization irls_initialization = 30;
+
+  // Ratio between static motion and temporal scale. This is actually
+  // the threshold on speed, under which we consider static (non-moving object).
+  optional float static_motion_temporal_ratio = 33 [default = 3e-3];
+
+  // Different control parameters to terminate tracking when
+  // occlusion occurs.
+  message CancelTrackingWithOcclusionOptions {
+    optional bool activated = 1 [default = false];
+    optional float min_motion_continuity = 2 [default = 0.4];
+    optional float min_inlier_ratio = 3 [default = 0.1];
+  }
+  optional CancelTrackingWithOcclusionOptions
+      cancel_tracking_with_occlusion_options = 34;
+
+  // If number of continued inliers is less than this number, then the object
+  // motion model will fall back to translation model.
+  // Set this min_continued_inliers threshold to a low number to make sure
+  // they follow local object rotation and scale, but it may result in un-robust
+  // rotation and scale estimation if the threshold is too low. Recommend that
+  // you don't set a number < 4.
+  optional int32 object_similarity_min_contd_inliers = 35 [default = 30];
+
+  // Maximum acceptable scale component of object similarity transform.
+  // Minimum scale is computed as 1.0 / max_scale.
+  // Exclusive for tracking a box with similarity.
+  optional float box_similarity_max_scale = 36 [default = 1.05];
+
+  // Maximum acceptable object similarity rotation in radians.
+  optional float box_similarity_max_rotation = 37 [default = 0.2];
+
+  // Homography transform will first be projected to similarity, and the scale
+  // component of the similarity transform should be within the range of
+  // [1.0 / max_scale, max_scale].
+  optional float quad_homography_max_scale = 38 [default = 1.2];
+
+  // The rotation component of the projected similarity should be smaller than
+  // this maximum rotation threshold.
+  optional float quad_homography_max_rotation = 39 [default = 0.3];
+
+  // Pre-calibrated camera intrinsics parameters, including focal length, center
+  // point, distortion coefficients (only 3 radial factors) and image width /
+  // height. The image formation model is described here:
+  // https://docs.opencv.org/2.4/doc/tutorials/calib3d/camera_calibration/camera_calibration.html
+  // Only used for quad tracking mode. Leave it empty if unknown.
+  message CameraIntrinsics {
+    optional float fx = 1;
+    optional float fy = 2;
+    optional float cx = 3;
+    optional float cy = 4;
+    optional float k0 = 5;
+    optional float k1 = 6;
+    optional float k2 = 7;
+    optional int32 w = 8;
+    optional int32 h = 9;
+  }
+  optional CameraIntrinsics camera_intrinsics = 40;
+
+  // Specifically for quad tracking (aka TRACKING_DEGREE_OBJECT_PERSPECTIVE
+  // mode), if aspect_ratio field is set in start pos, pnp tracking will be
+  // deployed. If aspect_ratio is unknown (not set), but forced_pnp_tracking is
+  // true, we will first estimate the aspect ratio for the 3D quadrangle, then
+  // perform pnp tracking. If aspect_ratio is unknown and pnp tracking is not
+  // forced, general homography tracking will be deployed.
+  optional bool forced_pnp_tracking = 41 [default = false];
+}
diff --git a/mediapipe/util/tracking/tracking_visualization_utilities.cc b/mediapipe/util/tracking/tracking_visualization_utilities.cc
new file mode 100644
index 000000000..c3ec91563
--- /dev/null
+++ b/mediapipe/util/tracking/tracking_visualization_utilities.cc
@@ -0,0 +1,224 @@
+// Copyright 2019 The MediaPipe Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "mediapipe/util/tracking/tracking_visualization_utilities.h"
+
+#include "absl/strings/str_format.h"
+#include "mediapipe/framework/port/opencv_imgproc_inc.h"
+#include "mediapipe/util/tracking/box_tracker.h"
+#include "mediapipe/util/tracking/tracking.h"
+
+namespace mediapipe {
+
+void RenderState(const MotionBoxState& box_state, bool print_stats,
+                 cv::Mat* frame) {
+#ifndef NO_RENDERING
+  CHECK(frame != nullptr);
+
+  const int frame_width = frame->cols;
+  const int frame_height = frame->rows;
+
+  const Vector2_f top_left(box_state.pos_x() * frame_width,
+                           box_state.pos_y() * frame_height);
+
+  cv::rectangle(*frame,
+                cv::Point(box_state.inlier_center_x() * frame_width - 2,
+                          box_state.inlier_center_y() * frame_height - 2),
+                cv::Point(box_state.inlier_center_x() * frame_width + 2,
+                          box_state.inlier_center_y() * frame_height + 2),
+                cv::Scalar(255, 255, 0, 255), 2);
+
+  cv::rectangle(
+      *frame,
+      cv::Point(
+          (box_state.inlier_center_x() - box_state.inlier_width() * 0.5) *
+              frame_width,
+          (box_state.inlier_center_y() - box_state.inlier_height() * 0.5) *
+              frame_height),
+      cv::Point(
+          (box_state.inlier_center_x() + box_state.inlier_width() * 0.5) *
+              frame_width,
+          (box_state.inlier_center_y() + box_state.inlier_height() * 0.5) *
+              frame_height),
+      cv::Scalar(0, 0, 255, 255), 1);
+
+  std::vector<Vector2_f> inlier_locs;
+  std::vector<Vector2_f> outlier_locs;
+  MotionBoxInlierLocations(box_state, &inlier_locs);
+  MotionBoxOutlierLocations(box_state, &outlier_locs);
+  float scale_x = 1.0f;
+  float scale_y = 1.0f;
+  ScaleFromAspect(frame_width * 1.0f / frame_height, true, &scale_x, &scale_y);
+
+  for (const Vector2_f& loc : inlier_locs) {
+    cv::circle(*frame,
+               cv::Point(loc.x() * scale_x * frame_width,
+                         loc.y() * scale_y * frame_height),
+               4.0, cv::Scalar(0, 255, 0, 128), 1);
+  }
+  for (const Vector2_f& loc : outlier_locs) {
+    cv::circle(*frame,
+               cv::Point(loc.x() * scale_x * frame_width,
+                         loc.y() * scale_y * frame_height),
+               4.0, cv::Scalar(255, 0, 0, 128), 1);
+  }
+
+  if (!print_stats) {
+    return;
+  }
+
+  std::vector<std::string> stats;
+  stats.push_back(
+      absl::StrFormat("Motion: %.4f, %.4f", box_state.dx(), box_state.dy()));
+  stats.push_back(
+      absl::StrFormat("KinEnergy: %.4f", box_state.kinetic_energy()));
+  stats.push_back(
+      absl::StrFormat("Disparity: %.2f", box_state.motion_disparity()));
+  stats.push_back(absl::StrFormat("Discrimination: %.2f",
+                                  box_state.background_discrimination()));
+  stats.push_back(
+      absl::StrFormat("InlierRatio: %2.2f", box_state.inlier_ratio()));
+  stats.push_back(
+      absl::StrFormat("InlierNum: %3d", box_state.inlier_ids_size()));
+
+  stats.push_back(absl::StrFormat("Prior: %.2f", box_state.prior_weight()));
+  stats.push_back(absl::StrFormat("TrackingConfidence: %.2f",
+                                  box_state.tracking_confidence()));
+
+  for (int k = 0; k < stats.size(); ++k) {
+    // Display some stats below the box.
+    cv::putText(*frame, stats[k],
+                cv::Point(top_left.x(), top_left.y() + (k + 1) * 12),
+                cv::FONT_HERSHEY_PLAIN, 1.0, cv::Scalar(0, 0, 0, 255));
+
+    cv::putText(*frame, stats[k],
+                cv::Point(top_left.x() + 1, top_left.y() + (k + 1) * 12 + 1),
+                cv::FONT_HERSHEY_PLAIN, 1.0, cv::Scalar(255, 255, 255, 255));
+  }
+
+  // Visualize locking state via heuristically chosen thresholds. Locking
+  // state is purely for visualization/illustration and has no further
+  // meaning.
+  std::string lock_text;
+  cv::Scalar lock_color;
+  if (box_state.motion_disparity() > 0.8) {
+    lock_text = "Lock lost";
+    lock_color = cv::Scalar(255, 0, 0, 255);
+  } else if (box_state.motion_disparity() > 0.4) {
+    lock_text = "Aquiring lock";
+    lock_color = cv::Scalar(255, 255, 0, 255);
+  } else if (box_state.motion_disparity() > 0.1) {
+    lock_text = "Locked";
+    lock_color = cv::Scalar(0, 255, 0, 255);
+  }
+
+  cv::putText(*frame, lock_text, cv::Point(top_left.x() + 1, top_left.y() - 4),
+              cv::FONT_HERSHEY_PLAIN, 0.8, cv::Scalar(255, 255, 255, 255));
+
+  cv::putText(*frame, lock_text, cv::Point(top_left.x(), top_left.y() - 5),
+              cv::FONT_HERSHEY_PLAIN, 0.8, lock_color);
+#else
+  LOG(FATAL) << "Code stripped out because of NO_RENDERING";
+#endif
+}
+
+void RenderInternalState(const MotionBoxInternalState& internal,
+                         cv::Mat* frame) {
+#ifndef NO_RENDERING
+  CHECK(frame != nullptr);
+
+  const int num_vectors = internal.pos_x_size();
+
+  // Determine max inlier score for visualization.
+  float max_score = 0;
+  for (int k = 0; k < num_vectors; ++k) {
+    max_score = std::max(max_score, internal.inlier_score(k));
+  }
+
+  float alpha_scale = 1.0f;
+  if (max_score > 0) {
+    alpha_scale = 1.0f / max_score;
+  }
+
+  const int frame_width = frame->cols;
+  const int frame_height = frame->rows;
+  for (int k = 0; k < num_vectors; ++k) {
+    const MotionVector v = MotionVector::FromInternalState(internal, k);
+    cv::Point p1(v.pos.x() * frame_width, v.pos.y() * frame_height);
+    Vector2_f match = v.pos + v.object;
+
+    cv::Point p2(match.x() * frame_width, match.y() * frame_height);
+    const float alpha = internal.inlier_score(k) * alpha_scale;
+
+    cv::Scalar color_scaled(0 * alpha + (1.0f - alpha) * 255,
+                            255 * alpha + (1.0f - alpha) * 0,
+                            0 * alpha + (1.0f - alpha) * 0);
+
+    cv::line(*frame, p1, p2, color_scaled, 1, CV_AA);
+    cv::circle(*frame, p1, 2.0, color_scaled, 1);
+  }
+#else
+  LOG(FATAL) << "Code stripped out because of NO_RENDERING";
+#endif
+}
+
+void RenderTrackingData(const TrackingData& data, cv::Mat* mat,
+                        bool antialiasing) {
+#ifndef NO_RENDERING
+  CHECK(mat != nullptr);
+
+  MotionVectorFrame mvf;
+  MotionVectorFrameFromTrackingData(data, &mvf);
+  const float aspect_ratio = mvf.aspect_ratio;
+  float scale_x, scale_y;
+  ScaleFromAspect(aspect_ratio, true, &scale_x, &scale_y);
+  scale_x *= mat->cols;
+  scale_y *= mat->rows;
+
+  for (const auto& motion_vector : mvf.motion_vectors) {
+    Vector2_f pos = motion_vector.Location();
+    Vector2_f match = motion_vector.MatchLocation();
+
+    cv::Point p1(pos.x() * scale_x, pos.y() * scale_y);
+    cv::Point p2(match.x() * scale_x, match.y() * scale_y);
+
+    // iOS cannot display a width 1 antialiased line, so we provide
+    // an option for 8-connected drawing instead.
+    cv::line(*mat, p1, p2, cv::Scalar(0, 255, 0, 255), 1,
+             antialiasing ? CV_AA : 8);
+  }
+#else
+  LOG(FATAL) << "Code stripped out because of NO_RENDERING";
+#endif
+}
+
+void RenderBox(const TimedBoxProto& box_proto, cv::Mat* mat) {
+#ifndef NO_RENDERING
+  CHECK(mat != nullptr);
+
+  TimedBox box = TimedBox::FromProto(box_proto);
+  std::array<Vector2_f, 4> corners = box.Corners(mat->cols, mat->rows);
+
+  for (int k = 0; k < 4; ++k) {
+    cv::line(*mat, cv::Point2f(corners[k].x(), corners[k].y()),
+             cv::Point2f(corners[(k + 1) % 4].x(), corners[(k + 1) % 4].y()),
+             cv::Scalar(255, 0, 0, 255),  // Red.
+             4);
+  }
+#else
+  LOG(FATAL) << "Code stripped out because of NO_RENDERING";
+#endif
+}
+
+}  // namespace mediapipe
diff --git a/mediapipe/util/tracking/tracking_visualization_utilities.h b/mediapipe/util/tracking/tracking_visualization_utilities.h
new file mode 100644
index 000000000..ede86a033
--- /dev/null
+++ b/mediapipe/util/tracking/tracking_visualization_utilities.h
@@ -0,0 +1,43 @@
+// Copyright 2019 The MediaPipe Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef MEDIAPIPE_UTIL_TRACKING_TRACKING_VISUALIZATION_UTILITIES_H_
+#define MEDIAPIPE_UTIL_TRACKING_TRACKING_VISUALIZATION_UTILITIES_H_
+
+#include "mediapipe/framework/port/opencv_core_inc.h"
+#include "mediapipe/util/tracking/box_tracker.pb.h"
+#include "mediapipe/util/tracking/flow_packager.pb.h"
+#include "mediapipe/util/tracking/tracking.pb.h"
+
+namespace mediapipe {
+
+// Visualizes state to frame. Also overlays statistics if print_stats is set.
+void RenderState(const MotionBoxState& box_state, bool print_stats,
+                 cv::Mat* frame);
+
+// Visualizes internal tracking state to frame.
+void RenderInternalState(const MotionBoxInternalState& internal,
+                         cv::Mat* frame);
+
+// Visualizes tracking data (specifically, the motion_vectors) to frame.
+// Optional third parameter can be used to disable antialiasing.
+void RenderTrackingData(const TrackingData& data, cv::Mat* mat,
+                        bool antialiasing = false);
+
+// Visualize TimeBoxProto onto image.
+void RenderBox(const TimedBoxProto& box_proto, cv::Mat* mat);
+
+}  // namespace mediapipe
+
+#endif  // MEDIAPIPE_UTIL_TRACKING_TRACKING_VISUALIZATION_UTILITIES_H_
diff --git a/third_party/ceres_solver_9bf9588988236279e1262f75d7f4d85711dfa172.diff b/third_party/ceres_solver_9bf9588988236279e1262f75d7f4d85711dfa172.diff
new file mode 100644
index 000000000..5b2661516
--- /dev/null
+++ b/third_party/ceres_solver_9bf9588988236279e1262f75d7f4d85711dfa172.diff
@@ -0,0 +1,14 @@
+diff --git a/bazel/ceres.bzl b/bazel/ceres.bzl
+index d90e5a3..4420565 100644
+--- a/bazel/ceres.bzl
++++ b/bazel/ceres.bzl
+@@ -202,6 +203,6 @@ def ceres_library(name,
+         ],
+         visibility = ["//visibility:public"],
+         deps = [
+-            "@com_github_eigen_eigen//:eigen",
+-            "@com_github_google_glog//:glog",
++            "@eigen_archive//:eigen",
++            "@com_github_glog_glog//:glog",
+         ],
+     )
\ No newline at end of file
diff --git a/third_party/opencv_linux.BUILD b/third_party/opencv_linux.BUILD
index ef2aeb49c..4d2eb254e 100644
--- a/third_party/opencv_linux.BUILD
+++ b/third_party/opencv_linux.BUILD
@@ -7,6 +7,7 @@ exports_files(["LICENSE"])
 
 # The following build rule assumes that OpenCV is installed by
 # 'apt-get install libopencv-core-dev libopencv-highgui-dev \'
+# '                libopencv-calib3d-dev libopencv-features2d-dev \'
 # '                libopencv-imgproc-dev libopencv-video-dev' on Debian/Ubuntu.
 # If you install OpenCV separately, please modify the build rule accordingly.
 cc_library(
diff --git a/third_party/org_tensorflow_e3a7bdbebb99352351a19e2e403136166aa52934.diff b/third_party/org_tensorflow_e3a7bdbebb99352351a19e2e403136166aa52934.diff
new file mode 100644
index 000000000..4f8a10f77
--- /dev/null
+++ b/third_party/org_tensorflow_e3a7bdbebb99352351a19e2e403136166aa52934.diff
@@ -0,0 +1,19 @@
+diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl
+index a0546b5f83..c7a9f39ded 100755
+--- a/tensorflow/workspace.bzl
++++ b/tensorflow/workspace.bzl
+@@ -172,9 +172,9 @@ def tf_repositories(path_prefix = "", tf_repo_name = ""):
+         name = "eigen_archive",
+         build_file = clean_dep("//third_party:eigen.BUILD"),
+         patch_file = clean_dep("//third_party/eigen3:gpu_packet_math.patch"),
+-        sha256 = "6d8ed482addd14892d7b0bd98fec2c02f18fdab97775bda68c3f2a99ffb190fb",
+-        strip_prefix = "eigen-eigen-66be6c76fc01",
++        sha256 = "add24720f99ab4f3222f4c8a887f2609554cf9187d4f7d24a777a151a0ee2548",
++        strip_prefix = "eigen-git-mirror-4898dcdb06f1b1b0441b8e15119764793f8997e2",
+         urls = [
+-            "https://storage.googleapis.com/mirror.tensorflow.org/bitbucket.org/eigen/eigen/get/66be6c76fc01.tar.gz",
+-            "https://bitbucket.org/eigen/eigen/get/66be6c76fc01.tar.gz",
++            "https://storage.googleapis.com/mirror.tensorflow.org/github.com/eigenteam/eigen-git-mirror/archive/4898dcdb06f1b1b0441b8e15119764793f8997e2.tar.gz",
++            "https://github.com/eigenteam/eigen-git-mirror/archive/4898dcdb06f1b1b0441b8e15119764793f8997e2.tar.gz",
+         ],
+     )
\ No newline at end of file