Project import generated by Copybara.

GitOrigin-RevId: 4419aaa472eeb91123d1f8576188166ee0e5ea69
2020-03-10 18:07:12 -07:00 · 2020-03-10 18:07:12 -07:00 · 3b6d3c4058
commit 3b6d3c4058
parent 252a5713c7
104 changed files with 7441 additions and 88 deletions
--- a/README.md
+++ b/README.md
@ -15,6 +15,7 @@
 * [Hair Segmentation](mediapipe/docs/hair_segmentation_mobile_gpu.md) [[Web Demo]](https://viz.mediapipe.dev/runner/demos/hair_segmentation/hair_segmentation.html)
 * [Object Detection](mediapipe/docs/object_detection_mobile_gpu.md)
 * [Object Detection and Tracking](mediapipe/docs/object_tracking_mobile_gpu.md)
+* [Objectron: 3D Object Detection and Tracking](mediapipe/docs/objectron_mobile_gpu.md)
 * [AutoFlip](mediapipe/docs/autoflip.md)

 ![face_detection](mediapipe/docs/images/mobile/face_detection_android_gpu_small.gif)
@ -43,6 +44,8 @@ A web-based visualizer is hosted on [viz.mediapipe.dev](https://viz.mediapipe.de
 *  [YouTube Channel](https://www.youtube.com/channel/UCObqmpuSMx-usADtL_qdMAw)

 ## Publications
+* [MediaPipe Objectron: Real-time 3D Object Detection on Mobile Devices](https://mediapipe.page.link/objectron-aiblog)
+* [AutoFlip: An Open Source Framework for Intelligent Video Reframing](https://mediapipe.page.link/autoflip)
 * [Google Developer Blog: MediaPipe on the Web](https://mediapipe.page.link/webdevblog)
 * [Google Developer Blog: Object Detection and Tracking using MediaPipe](https://mediapipe.page.link/objecttrackingblog)
 * [On-Device, Real-Time Hand Tracking with MediaPipe](https://ai.googleblog.com/2019/08/on-device-real-time-hand-tracking-with.html)
@ -63,7 +66,7 @@ A web-based visualizer is hosted on [viz.mediapipe.dev](https://viz.mediapipe.de
 *  [Discuss](https://groups.google.com/forum/#!forum/mediapipe) - General community discussion around MediaPipe

 ## Alpha Disclaimer
-MediaPipe is currently in alpha for v0.6. We are still making breaking API changes and expect to get to stable API by v1.0.
+MediaPipe is currently in alpha for v0.7. We are still making breaking API changes and expect to get to stable API by v1.0.

 ## Contributing
 We welcome contributions. Please follow these [guidelines](./CONTRIBUTING.md).
--- a/mediapipe/calculators/image/image_cropping_calculator.cc
+++ b/mediapipe/calculators/image/image_cropping_calculator.cc
@ -75,11 +75,28 @@ REGISTER_CALCULATOR(ImageCroppingCalculator);
  }
 #endif  //  !MEDIAPIPE_DISABLE_GPU

-  RET_CHECK(cc->Inputs().HasTag(kRectTag) ^ cc->Inputs().HasTag(kNormRectTag) ^
-            (cc->Options<mediapipe::ImageCroppingCalculatorOptions>()
-                 .has_norm_width() &&
-             cc->Options<mediapipe::ImageCroppingCalculatorOptions>()
-                 .has_norm_height()));
+  int flags = 0;
+  if (cc->Inputs().HasTag(kRectTag)) {
+    ++flags;
+  }
+  if (cc->Inputs().HasTag(kWidthTag) && cc->Inputs().HasTag(kHeightTag)) {
+    ++flags;
+  }
+  if (cc->Inputs().HasTag(kNormRectTag)) {
+    ++flags;
+  }
+  if (cc->Options<mediapipe::ImageCroppingCalculatorOptions>()
+          .has_norm_width() &&
+      cc->Options<mediapipe::ImageCroppingCalculatorOptions>()
+          .has_norm_height()) {
+    ++flags;
+  }
+  if (cc->Options<mediapipe::ImageCroppingCalculatorOptions>().has_width() &&
+      cc->Options<mediapipe::ImageCroppingCalculatorOptions>().has_height()) {
+    ++flags;
+  }
+  RET_CHECK(flags == 1) << "Illegal combination of input streams/options.";
+
  if (cc->Inputs().HasTag(kRectTag)) {
    cc->Inputs().Tag(kRectTag).Set<Rect>();
  }
--- a/mediapipe/calculators/util/BUILD
+++ b/mediapipe/calculators/util/BUILD
@ -39,6 +39,15 @@ proto_library(
    ],
 )

+proto_library(
+    name = "timed_box_list_id_to_label_calculator_proto",
+    srcs = ["timed_box_list_id_to_label_calculator.proto"],
+    visibility = ["//visibility:public"],
+    deps = [
+        "//mediapipe/framework:calculator_proto",
+    ],
+)
+
 proto_library(
    name = "latency_proto",
    srcs = ["latency.proto"],
@ -113,6 +122,18 @@ mediapipe_cc_proto_library(
    ],
 )

+mediapipe_cc_proto_library(
+    name = "timed_box_list_id_to_label_calculator_cc_proto",
+    srcs = ["timed_box_list_id_to_label_calculator.proto"],
+    cc_deps = [
+        "//mediapipe/framework:calculator_cc_proto",
+    ],
+    visibility = ["//visibility:public"],
+    deps = [
+        ":timed_box_list_id_to_label_calculator_proto",
+    ],
+)
+
 mediapipe_cc_proto_library(
    name = "latency_cc_proto",
    srcs = ["latency.proto"],
@ -313,6 +334,34 @@ cc_library(
    alwayslink = 1,
 )

+cc_library(
+    name = "timed_box_list_id_to_label_calculator",
+    srcs = ["timed_box_list_id_to_label_calculator.cc"],
+    visibility = ["//visibility:public"],
+    deps = [
+        ":timed_box_list_id_to_label_calculator_cc_proto",
+        "//mediapipe/framework/port:status",
+        "//mediapipe/framework:calculator_framework",
+        "//mediapipe/framework:packet",
+        "//mediapipe/util/tracking:box_tracker_cc_proto",
+        "//mediapipe/util:resource_util",
+    ] + select({
+        "//mediapipe:android": [
+            "//mediapipe/util/android/file/base",
+        ],
+        "//mediapipe:apple": [
+            "//mediapipe/util/android/file/base",
+        ],
+        "//mediapipe:macos": [
+            "//mediapipe/framework/port:file_helpers",
+        ],
+        "//conditions:default": [
+            "//mediapipe/framework/port:file_helpers",
+        ],
+    }),
+    alwayslink = 1,
+)
+
 cc_library(
    name = "non_max_suppression_calculator",
    srcs = ["non_max_suppression_calculator.cc"],
--- a/mediapipe/calculators/util/detection_label_id_to_text_calculator.cc
+++ b/mediapipe/calculators/util/detection_label_id_to_text_calculator.cc
@ -12,10 +12,10 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

-#include "mediapipe//framework/packet.h"
 #include "mediapipe/calculators/util/detection_label_id_to_text_calculator.pb.h"
 #include "mediapipe/framework/calculator_framework.h"
 #include "mediapipe/framework/formats/detection.pb.h"
+#include "mediapipe/framework/packet.h"
 #include "mediapipe/framework/port/status.h"
 #include "mediapipe/util/resource_util.h"

--- a/mediapipe/calculators/util/timed_box_list_id_to_label_calculator.cc
+++ b/mediapipe/calculators/util/timed_box_list_id_to_label_calculator.cc
@ -0,0 +1,105 @@
+// Copyright 2019 The MediaPipe Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "mediapipe/calculators/util/timed_box_list_id_to_label_calculator.pb.h"
+#include "mediapipe/framework/calculator_framework.h"
+#include "mediapipe/framework/packet.h"
+#include "mediapipe/framework/port/status.h"
+#include "mediapipe/util/resource_util.h"
+#include "mediapipe/util/tracking/box_tracker.pb.h"
+
+#if defined(MEDIAPIPE_MOBILE)
+#include "mediapipe/util/android/file/base/file.h"
+#include "mediapipe/util/android/file/base/helpers.h"
+#else
+#include "mediapipe/framework/port/file_helpers.h"
+#endif
+
+namespace mediapipe {
+
+using mediapipe::TimedBoxProto;
+using mediapipe::TimedBoxProtoList;
+
+// Takes a label map (from label IDs to names), and populate the label field in
+// TimedBoxProto according to it's ID.
+//
+// Example usage:
+// node {
+//   calculator: "TimedBoxListIdToLabelCalculator"
+//   input_stream: "input_timed_box_list"
+//   output_stream: "output_timed_box_list"
+//   node_options: {
+//     [mediapipe.TimedBoxListIdToLabelCalculatorOptions] {
+//       label_map_path: "labelmap.txt"
+//     }
+//   }
+// }
+class TimedBoxListIdToLabelCalculator : public CalculatorBase {
+ public:
+  static ::mediapipe::Status GetContract(CalculatorContract* cc);
+
+  ::mediapipe::Status Open(CalculatorContext* cc) override;
+  ::mediapipe::Status Process(CalculatorContext* cc) override;
+
+ private:
+  std::unordered_map<int, std::string> label_map_;
+};
+REGISTER_CALCULATOR(TimedBoxListIdToLabelCalculator);
+
+::mediapipe::Status TimedBoxListIdToLabelCalculator::GetContract(
+    CalculatorContract* cc) {
+  cc->Inputs().Index(0).Set<TimedBoxProtoList>();
+  cc->Outputs().Index(0).Set<TimedBoxProtoList>();
+
+  return ::mediapipe::OkStatus();
+}
+
+::mediapipe::Status TimedBoxListIdToLabelCalculator::Open(
+    CalculatorContext* cc) {
+  cc->SetOffset(TimestampDiff(0));
+
+  const auto& options =
+      cc->Options<::mediapipe::TimedBoxListIdToLabelCalculatorOptions>();
+
+  std::string string_path;
+  ASSIGN_OR_RETURN(string_path, PathToResourceAsFile(options.label_map_path()));
+  std::string label_map_string;
+  MP_RETURN_IF_ERROR(file::GetContents(string_path, &label_map_string));
+
+  std::istringstream stream(label_map_string);
+  std::string line;
+  int i = 0;
+  while (std::getline(stream, line)) {
+    label_map_[i++] = line;
+  }
+  return ::mediapipe::OkStatus();
+}
+
+::mediapipe::Status TimedBoxListIdToLabelCalculator::Process(
+    CalculatorContext* cc) {
+  const auto& input_list = cc->Inputs().Index(0).Get<TimedBoxProtoList>();
+  auto output_list = absl::make_unique<TimedBoxProtoList>();
+  for (const auto& input_box : input_list.box()) {
+    TimedBoxProto* box_ptr = output_list->add_box();
+    *box_ptr = input_box;
+
+    if (label_map_.find(input_box.id()) != label_map_.end()) {
+      box_ptr->set_label(label_map_[input_box.id()]);
+    }
+  }
+  cc->Outputs().Index(0).Add(output_list.release(), cc->InputTimestamp());
+  return ::mediapipe::OkStatus();
+}
+
+}  // namespace mediapipe
--- a/mediapipe/calculators/util/timed_box_list_id_to_label_calculator.proto
+++ b/mediapipe/calculators/util/timed_box_list_id_to_label_calculator.proto
@ -0,0 +1,28 @@
+// Copyright 2019 The MediaPipe Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+syntax = "proto2";
+
+package mediapipe;
+
+import "mediapipe/framework/calculator.proto";
+
+message TimedBoxListIdToLabelCalculatorOptions {
+  extend mediapipe.CalculatorOptions {
+    optional TimedBoxListIdToLabelCalculatorOptions ext = 297701606;
+  }
+
+  // Path to a label map file for getting the actual name of detected classes.
+  optional string label_map_path = 1;
+}
--- a/mediapipe/calculators/util/timed_box_list_to_render_data_calculator.cc
+++ b/mediapipe/calculators/util/timed_box_list_to_render_data_calculator.cc
@ -66,6 +66,25 @@ void AddTimedBoxProtoToRenderData(
    rect->set_bottom(box_proto.bottom());
    rect->set_rotation(box_proto.rotation());
  }
+
+  if (box_proto.has_label()) {
+    auto* label_annotation = render_data->add_render_annotations();
+    label_annotation->mutable_color()->set_r(options.box_color().r());
+    label_annotation->mutable_color()->set_g(options.box_color().g());
+    label_annotation->mutable_color()->set_b(options.box_color().b());
+    label_annotation->set_thickness(options.thickness());
+    RenderAnnotation::Text* text = label_annotation->mutable_text();
+    text->set_display_text(box_proto.label());
+    text->set_normalized(true);
+    constexpr float text_left_start = 0.3f;
+    text->set_left((1.0f - text_left_start) * box_proto.left() +
+                   text_left_start * box_proto.right());
+    constexpr float text_baseline = 0.6f;
+    text->set_baseline(text_baseline * box_proto.bottom() +
+                       (1.0f - text_baseline) * box_proto.top());
+    constexpr float text_height = 0.2f;
+    text->set_font_height((box_proto.bottom() - box_proto.top()) * text_height);
+  }
 }

 }  // namespace
--- a/mediapipe/docs/autoflip.md
+++ b/mediapipe/docs/autoflip.md
@ -15,6 +15,9 @@ For overall context on AutoFlip, please read this

 Run the following command to build the AutoFlip pipeline:

+Note: AutoFlip currently only works with OpenCV 3 . Please verify your OpenCV
+version beforehand.
+
 ```bash
 bazel build -c opt --define MEDIAPIPE_DISABLE_GPU=1 mediapipe/examples/desktop/autoflip:run_autoflip
 ```
--- a/mediapipe/docs/examples.md
+++ b/mediapipe/docs/examples.md
@ -44,6 +44,14 @@ graphs can be easily adapted to run on CPU v.s. GPU.
 [Object Detection and Tracking with GPU](./object_tracking_mobile_gpu.md) illustrates how to
 use MediaPipe for object detection and tracking.

+### Objectron: 3D Object Detection and Tracking with GPU
+
+[MediaPipe Objectron is 3D Object Detection with GPU](./objectron_mobile_gpu.md)
+illustrates mobile real-time 3D object detection and tracking pipeline for every
+day objects like shoes and chairs
+
+*   [Android](./objectron_mobile_gpu.md)
+
 ### Face Detection with GPU

 [Face Detection with GPU](./face_detection_mobile_gpu.md) illustrates how to use
--- a/mediapipe/docs/images/mobile/object_detection_3d_android_gpu.png
+++ b/mediapipe/docs/images/mobile/object_detection_3d_android_gpu.png
--- a/mediapipe/docs/images/mobile/objectron_chair_android_gpu.gif
+++ b/mediapipe/docs/images/mobile/objectron_chair_android_gpu.gif
--- a/mediapipe/docs/images/mobile/objectron_detection_subgraph.png
+++ b/mediapipe/docs/images/mobile/objectron_detection_subgraph.png
--- a/mediapipe/docs/images/mobile/objectron_shoe_android_gpu.gif
+++ b/mediapipe/docs/images/mobile/objectron_shoe_android_gpu.gif
--- a/mediapipe/docs/images/mobile/objectron_tracking_subgraph.png
+++ b/mediapipe/docs/images/mobile/objectron_tracking_subgraph.png
--- a/mediapipe/docs/install.md
+++ b/mediapipe/docs/install.md
@ -364,8 +364,10 @@ To build and run iOS apps:

 ### Installing on Windows Subsystem for Linux (WSL)

-Note: WSL has historically not provided access to USB cameras. Mediapipe can use
-a video file as input.
+Note: The pre-built OpenCV packages don't support cameras in WSL. Unless you
+[compile](https://funvision.blogspot.com/2019/12/opencv-web-camera-and-video-streams-in.html)
+OpenCV with FFMPEG and GStreamer in WSL, the live demos won't work with any
+cameras. Alternatively, you use a video file as input.

 1.  Follow the
    [instruction](https://docs.microsoft.com/en-us/windows/wsl/install-win10) to
@ -373,7 +375,7 @@ a video file as input.

 2.  Install Windows ADB and start the ADB server in Windows.

-    Note: Window’s and WSL’s adb versions must be the same version, e.g., if WSL
+    Note: Windows' and WSL’s adb versions must be the same version, e.g., if WSL
    has ADB 1.0.39, you need to download the corresponding Windows ADB from
    [here](https://dl.google.com/android/repository/platform-tools_r26.0.1-windows.zip).

--- a/mediapipe/docs/object_detection_desktop.md
+++ b/mediapipe/docs/object_detection_desktop.md
@ -26,6 +26,7 @@ To build and run the TensorFlow example on desktop, run:
 $ bazel build -c opt \
    --define MEDIAPIPE_DISABLE_GPU=1 \
    --define no_aws_support=true \
+    --linkopt=-s \
    mediapipe/examples/desktop/object_detection:object_detection_tensorflow

 # It should print:
--- a/mediapipe/docs/objectron_mobile_gpu.md
+++ b/mediapipe/docs/objectron_mobile_gpu.md
@ -0,0 +1,489 @@
+# MediaPipe Objectron (GPU)
+
+This doc focuses on the
+[below example graph](https://github.com/google/mediapipe/tree/master/mediapipe/graphs/object_detection_3d/shoe_classic_occlusion_tracking.pbtxt)
+that performs 3D object detection and tracking with TensorFlow Lite on GPU.
+
+Objectron for shoes                                                             | Objectron for chairs
+:-----------------------------------------------------------------------------: | :------------------:
+![objectron_shoe_android_gpu_gif](images/mobile/objectron_shoe_android_gpu.gif) | ![objectron_chair_android_gpu_gif](images/mobile/objectron_chair_android_gpu.gif)
+
+For overall context on MediaPipe Objectron, please read the
+[Google AI Blog](https://mediapipe.page.link/objectron-aiblog). The Objectron's
+ML model (see also the [model card](https://mediapipe.page.link/objectron-mc))
+estimates a 3D bounding box for the detected object.
+
+## Android
+
+[Source](https://github.com/google/mediapipe/tree/master/mediapipe/examples/android/src/java/com/google/mediapipe/apps/objectdetection3d)
+
+An arm64 build of Objectron for shoes can be
+[downloaded here](https://drive.google.com/open?id=1S0K4hbWt3o31FfQ4QU3Rz7IHrvOUMx1d),
+and for chairs can be
+[downloaded here](https://drive.google.com/open?id=1MM8K-13bXLCVS1EHQ-KgkVyEahEPrKej).
+
+To build and install the Objectron for shoes:
+
+```bash
+bazel build -c opt --config android_arm64 mediapipe/examples/android/src/java/com/google/mediapipe/apps/objectdetection3d:objectdetection3d
+```
+
+Similarly to build and install the Objectron for chairs, add **--define
+chair=true** flag to build command.
+
+```bash
+bazel build -c opt --define chair=true --config android_arm64 mediapipe/examples/android/src/java/com/google/mediapipe/apps/objectdetection3d:objectdetection3d
+```
+
+Once the app is built, install in on Android device with:
+
+```bash
+adb install bazel-bin/mediapipe/examples/android/src/java/com/google/mediapipe/apps/objectdetection3d/objectdetection3d.apk
+```
+
+## Graph
+
+The Objectron main graph internally utilizes the Objectron detection subgraph,
+and the Objectron tracking subgraph. To visualize the graph as shown above, copy
+the text specification of the graph below and paste it into
+[MediaPipe Visualizer](https://viz.mediapipe.dev/).
+
+### Main Graph
+
+This is the main graph for the shoe detector. This graph runs detection and
+tracking and renders the output to the display.
+
+![object_detection_mobile_gpu_graph](images/mobile/object_detection_3d_android_gpu.png)
+
+[Source pbtxt file](https://github.com/google/mediapipe/tree/master/mediapipe/graphs/object_detection_3d/shoe_classic_occlusion_tracking.pbtxt)
+
+```bash
+# MediaPipe object detection 3D with tracking graph.
+
+# Images on GPU coming into and out of the graph.
+input_stream: "input_video"
+output_stream: "output_video"
+
+# Creates a copy of the input_video stream. At the end of the graph, the
+# GlAnimationOverlayCalculator will consume the input_video texture and draws
+# on top of it.
+node: {
+  calculator: "GlScalerCalculator"
+  input_stream: "VIDEO:input_video"
+  output_stream: "VIDEO:input_video_copy"
+}
+
+# Resamples the images by specific frame rate. This calculator is used to
+# control the frequecy of subsequent calculators/subgraphs, e.g. less power
+# consumption for expensive process.
+node {
+  calculator: "PacketResamplerCalculator"
+  input_stream: "DATA:input_video_copy"
+  output_stream: "DATA:sampled_input_video"
+  node_options: {
+    [type.googleapis.com/mediapipe.PacketResamplerCalculatorOptions] {
+      frame_rate: 5
+    }
+  }
+}
+
+node {
+  calculator: "ObjectronDetectionSubgraphGpu"
+  input_stream: "IMAGE_GPU:sampled_input_video"
+  output_stream: "ANNOTATIONS:objects"
+}
+
+node {
+  calculator: "ObjectronTrackingSubgraphGpu"
+  input_stream: "FRAME_ANNOTATION:objects"
+  input_stream: "IMAGE_GPU:input_video_copy"
+  output_stream: "LIFTED_FRAME_ANNOTATION:lifted_tracked_objects"
+}
+
+# The rendering nodes:
+# We are rendering two meshes: 1) a 3D bounding box, which we overlay directly
+# on the texture, and 2) a shoe CAD model, which we use as an occlusion mask.
+# These models are designed using different tools, so we supply a transformation
+# to bring both of them to the Objectron's coordinate system.
+
+# Creates a model matrices for the tracked object given the lifted 3D points.
+# This calculator does two things: 1) Estimates object's pose (orientation,
+# translation, and scale) from the 3D vertices, and
+# 2) bring the object from the objectron's coordinate system to the renderer
+# (OpenGL) coordinate system. Since the final goal is to render a mesh file on
+# top of the object, we also supply a transformation to bring the mesh to the
+# objectron's coordinate system, and rescale mesh to the unit size.
+node {
+  calculator: "AnnotationsToModelMatricesCalculator"
+  input_stream: "ANNOTATIONS:lifted_tracked_objects"
+  output_stream: "MODEL_MATRICES:model_matrices"
+  node_options: {
+    [type.googleapis.com/mediapipe.AnnotationsToModelMatricesCalculatorOptions] {
+      # Re-scale the CAD model to the size of a unit box
+      model_scale: 0.05
+      model_scale: 0.05
+      model_scale: 0.05
+      # Bring the box CAD model to objectron's coordinate system. This
+      # is equivalent of -pi/2 rotation along the y-axis (right-hand rule):
+      # Eigen::AngleAxisf(-M_PI / 2., Eigen::Vector3f::UnitY())
+      model_transformation: 0.0
+      model_transformation: 0.0
+      model_transformation: -1.0
+      model_transformation: 0.0
+      model_transformation: 0.0
+      model_transformation: 1.0
+      model_transformation: 0.0
+      model_transformation: 0.0
+      model_transformation: 1.0
+      model_transformation: 0.0
+      model_transformation: 0.0
+      model_transformation: 0.0
+      model_transformation: 0.0
+      model_transformation: 0.0
+      model_transformation: 0.0
+      model_transformation: 1.0
+    }
+  }
+}
+
+# Compute the model matrices for the CAD model of the shoe, to be used as an
+# occlusion mask. The model will be rendered at the exact same location as the
+# bounding box.
+node {
+  calculator: "AnnotationsToModelMatricesCalculator"
+  input_stream: "ANNOTATIONS:lifted_tracked_objects"
+  output_stream: "MODEL_MATRICES:mask_model_matrices"
+  #input_side_packet: "MODEL_SCALE:model_scale"
+  node_options: {
+    [type.googleapis.com/mediapipe.AnnotationsToModelMatricesCalculatorOptions] {
+      # Re-scale the CAD model to the size of a unit box
+      model_scale: 0.45
+      model_scale: 0.25
+      model_scale: 0.15
+      # Bring the shoe CAD model to Deep Pursuit 3D's coordinate system. This
+      # is equivalent of -pi/2 rotation along the x-axis (right-hand rule):
+      # Eigen::AngleAxisf(-M_PI / 2., Eigen::Vector3f::UnitX())
+      model_transformation: 1.0
+      model_transformation: 0.0
+      model_transformation: 0.0
+      model_transformation: 0.0
+      model_transformation: 0.0
+      model_transformation: 0.0
+      model_transformation: 1.0
+      model_transformation: 0.0
+      model_transformation: 0.0
+      model_transformation: -1.0
+      model_transformation: 0.0
+      model_transformation: 0.0
+      model_transformation: 0.0
+      model_transformation: 0.0
+      model_transformation: 0.0
+      model_transformation: 1.0
+    }
+  }
+}
+
+# Render everything together. First we render the 3D bounding box animation,
+# then we render the occlusion mask.
+node: {
+  calculator: "GlAnimationOverlayCalculator"
+  input_stream: "VIDEO:input_video"
+  input_stream: "MODEL_MATRICES:model_matrices"
+  input_stream: "MASK_MODEL_MATRICES:mask_model_matrices"
+  output_stream: "output_video"
+  input_side_packet: "TEXTURE:box_texture"
+  input_side_packet: "ANIMATION_ASSET:box_asset_name"
+  input_side_packet: "MASK_TEXTURE:obj_texture"
+  input_side_packet: "MASK_ASSET:obj_asset_name"
+  node_options: {
+    [type.googleapis.com/mediapipe.GlAnimationOverlayCalculatorOptions] {
+      # Output resolution is 480x640 with the aspect ratio of 0.75
+      aspect_ratio: 0.75
+      vertical_fov_degrees: 70.
+      animation_speed_fps: 25
+    }
+  }
+}
+
+```
+
+### Objectron Detection Subgraph
+
+Objectron detection subgraph uses the *TfLiteInferenceCalculator* to run
+inference and decodes the output tensor to *FrameAnnotation* protobuf. The
+*FrameAnnotation* contains nine keypoints: the bounding box's center, as well as
+its eight vertices. The boxes will be passed to the Objectron tracking subgraph.
+
+![object_detection_subgraph](images/mobile/objectron_detection_subgraph.png)
+
+[Source pbtxt file](https://github.com/google/mediapipe/tree/master/mediapipe/graphs/object_detection_3d/subgraphs/objectron_detection_gpu.pbtxt)
+
+```bash
+# MediaPipe Objectron detection gpu subgraph
+
+type: "ObjectronDetectionSubgraphGpu"
+
+input_stream: "IMAGE_GPU:input_video"
+output_stream: "ANNOTATIONS:objects"
+
+# Transforms the input image on GPU to a 480x640 image. To scale the input
+# image, the scale_mode option is set to FIT to preserve the aspect ratio,
+# resulting in potential letterboxing in the transformed image.
+node: {
+  calculator: "ImageTransformationCalculator"
+  input_stream: "IMAGE_GPU:input_video"
+  output_stream: "IMAGE_GPU:transformed_input_video"
+  node_options: {
+    [type.googleapis.com/mediapipe.ImageTransformationCalculatorOptions] {
+      output_width: 480
+      output_height: 640
+      scale_mode: FIT
+    }
+  }
+}
+
+# Converts the transformed input image on GPU into an image tensor stored as a
+# TfLiteTensor.
+node {
+  calculator: "TfLiteConverterCalculator"
+  input_stream: "IMAGE_GPU:transformed_input_video"
+  output_stream: "TENSORS_GPU:image_tensor"
+}
+
+# Generates a single side packet containing a TensorFlow Lite op resolver that
+# supports custom ops needed by the model used in this graph.
+node {
+  calculator: "TfLiteCustomOpResolverCalculator"
+  output_side_packet: "opresolver"
+  node_options: {
+    [type.googleapis.com/mediapipe.TfLiteCustomOpResolverCalculatorOptions] {
+      use_gpu: true
+    }
+  }
+}
+
+# Runs a TensorFlow Lite model on GPU that takes an image tensor and outputs a
+# vector of tensors representing, for instance, detection boxes/keypoints and
+# scores.
+node {
+  calculator: "TfLiteInferenceCalculator"
+  input_stream: "TENSORS_GPU:image_tensor"
+  output_stream: "TENSORS:detection_tensors"
+  input_side_packet: "CUSTOM_OP_RESOLVER:opresolver"
+  node_options: {
+    [type.googleapis.com/mediapipe.TfLiteInferenceCalculatorOptions] {
+      model_path: "object_detection_3d.tflite"
+    }
+  }
+}
+
+# Decodes the model's output tensor (the heatmap and the distance fields) to 2D
+# keypoints. There are nine 2D keypoints: one center keypoint and eight vertices
+# for the 3D bounding box. The calculator parameters determine's the decoder's
+# sensitivity.
+node {
+  calculator: "TfLiteTensorsToObjectsCalculator"
+  input_stream: "TENSORS:detection_tensors"
+  output_stream: "ANNOTATIONS:objects"
+  node_options: {
+    [type.googleapis.com/mediapipe.TfLiteTensorsToObjectsCalculatorOptions] {
+      num_classes: 1
+      num_keypoints: 9
+      decoder_config {
+        heatmap_threshold: 0.6
+        local_max_distance: 2
+        offset_scale_coef: 1.0
+        voting_radius: 2
+        voting_allowance: 1
+        voting_threshold: 0.2
+      }
+    }
+  }
+}
+```
+
+### Object Tracking Subgraph
+
+Object tracking subgraph uses a *BoxTracker* calculator which is a generic
+tracking library, also used in
+[Mediapipe's 2D Object Detection and Tracking](https://github.com/google/mediapipe/tree/master/mediapipe/g3doc/object_tracking_mobile_gpu.md).
+The tracking runs every frame and when a new detection is available, it
+consolidates the detection and tracking results. The tracker tracks the box with
+its 2D keypoints, so at the end we lift the 2D keypoints to 3D using EPnP
+algorithm in *Lift2DFrameAnnotationTo3D* Calculator.
+
+![object_tracking_subgraph](images/mobile/objectron_tracking_subgraph.png)
+
+[Source pbtxt file](https://github.com/google/mediapipe/tree/master/mediapipe/graphs/object_detection_3d/subgraphs/objectron_tracking_gpu.pbtxt)
+
+```bash
+# MediaPipe Objectron tracking gpu subgraph
+
+type: "ObjectronTrackingSubgraphGpu"
+
+input_stream: "FRAME_ANNOTATION:objects"
+input_stream: "IMAGE_GPU:input_video"
+output_stream: "LIFTED_FRAME_ANNOTATION:lifted_tracked_objects"
+
+
+# Converts the detected keypoints to Boxes, used by the tracking subgraph.
+node {
+  calculator: "FrameAnnotationToTimedBoxListCalculator"
+  input_stream: "FRAME_ANNOTATION:objects"
+  output_stream: "BOXES:start_pos"
+}
+
+node: {
+  calculator: "ImageTransformationCalculator"
+  input_stream: "IMAGE_GPU:input_video"
+  output_stream: "IMAGE_GPU:downscaled_input_video"
+  node_options: {
+    [type.googleapis.com/mediapipe.ImageTransformationCalculatorOptions] {
+      output_width: 240
+      output_height: 320
+    }
+  }
+}
+
+# Converts GPU buffer to ImageFrame for processing tracking.
+node: {
+  calculator: "GpuBufferToImageFrameCalculator"
+  input_stream: "downscaled_input_video"
+  output_stream: "downscaled_input_video_cpu"
+}
+
+# Performs motion analysis on an incoming video stream.
+node: {
+  calculator: "MotionAnalysisCalculator"
+  input_stream: "VIDEO:downscaled_input_video_cpu"
+  output_stream: "CAMERA:camera_motion"
+  output_stream: "FLOW:region_flow"
+
+  node_options: {
+    [type.googleapis.com/mediapipe.MotionAnalysisCalculatorOptions]: {
+      analysis_options {
+        analysis_policy: ANALYSIS_POLICY_CAMERA_MOBILE
+        flow_options {
+          fast_estimation_min_block_size: 100
+          top_inlier_sets: 1
+          frac_inlier_error_threshold: 3e-3
+          downsample_mode: DOWNSAMPLE_TO_INPUT_SIZE
+          verification_distance: 5.0
+          verify_long_feature_acceleration: true
+          verify_long_feature_trigger_ratio: 0.1
+          tracking_options {
+            max_features: 500
+            adaptive_extraction_levels: 2
+            min_eig_val_settings {
+              adaptive_lowest_quality_level: 2e-4
+            }
+            klt_tracker_implementation: KLT_OPENCV
+          }
+        }
+      }
+    }
+  }
+}
+
+# Reads optical flow fields defined in
+# mediapipe/framework/formats/motion/optical_flow_field.h,
+# returns a VideoFrame with 2 channels (v_x and v_y), each channel is quantized
+# to 0-255.
+node: {
+  calculator: "FlowPackagerCalculator"
+  input_stream: "FLOW:region_flow"
+  input_stream: "CAMERA:camera_motion"
+  output_stream: "TRACKING:tracking_data"
+
+  node_options: {
+    [type.googleapis.com/mediapipe.FlowPackagerCalculatorOptions]: {
+      flow_packager_options: {
+        binary_tracking_data_support: false
+      }
+    }
+  }
+}
+
+# Tracks box positions over time.
+node: {
+  calculator: "BoxTrackerCalculator"
+  input_stream: "TRACKING:tracking_data"
+  input_stream: "TRACK_TIME:input_video"
+  input_stream: "START_POS:start_pos"
+  input_stream: "CANCEL_OBJECT_ID:cancel_object_id"
+  input_stream_info: {
+    tag_index: "CANCEL_OBJECT_ID"
+    back_edge: true
+  }
+  output_stream: "BOXES:boxes"
+
+  input_stream_handler {
+    input_stream_handler: "SyncSetInputStreamHandler"
+    options {
+      [mediapipe.SyncSetInputStreamHandlerOptions.ext] {
+        sync_set {
+          tag_index: "TRACKING"
+          tag_index: "TRACK_TIME"
+        }
+        sync_set {
+          tag_index: "START_POS"
+        }
+        sync_set {
+          tag_index: "CANCEL_OBJECT_ID"
+        }
+      }
+    }
+  }
+
+  node_options: {
+    [type.googleapis.com/mediapipe.BoxTrackerCalculatorOptions]: {
+      tracker_options: {
+        track_step_options {
+          track_object_and_camera: true
+          tracking_degrees: TRACKING_DEGREE_OBJECT_ROTATION_SCALE
+          inlier_spring_force: 0.0
+          static_motion_temporal_ratio: 3e-2
+        }
+      }
+      visualize_tracking_data: false
+      streaming_track_data_cache_size: 100
+    }
+  }
+}
+
+# Consolidates tracking and detection results.
+node {
+  calculator: "FrameAnnotationTrackerCalculator"
+  input_stream: "FRAME_ANNOTATION:objects"
+  input_stream: "TRACKED_BOXES:boxes"
+  output_stream: "TRACKED_FRAME_ANNOTATION:tracked_objects"
+  output_stream: "CANCEL_OBJECT_ID:cancel_object_id"
+  node_options: {
+    [type.googleapis.com/mediapipe.FrameAnnotationTrackerCalculatorOptions] {
+      img_width: 240
+      img_height: 320
+    }
+  }
+
+  input_stream_handler {
+    input_stream_handler: "SyncSetInputStreamHandler"
+    options {
+      [mediapipe.SyncSetInputStreamHandlerOptions.ext] {
+        sync_set {
+          tag_index: "FRAME_ANNOTATION"
+        }
+        sync_set {
+          tag_index: "TRACKED_BOXES"
+        }
+      }
+    }
+  }
+}
+
+# Lift the tracked 2D keypoints to 3D using EPnP algorithm.
+node {
+  calculator: "Lift2DFrameAnnotationTo3DCalculator"
+  input_stream: "FRAME_ANNOTATION:tracked_objects"
+  output_stream: "LIFTED_FRAME_ANNOTATION:lifted_tracked_objects"
+}
+```
--- a/mediapipe/docs/youtube_8m.md
+++ b/mediapipe/docs/youtube_8m.md
@ -61,6 +61,8 @@ videos.
    ```bash
    # cd to the root directory of the MediaPipe repo
    cd -
+
+    pip3 install tf_slim
    python -m mediapipe.examples.desktop.youtube8m.generate_vggish_frozen_graph
    ```

@ -78,7 +80,7 @@ videos.
 5.  Run the MediaPipe binary to extract the features.

    ```bash
-    bazel build -c opt \
+    bazel build -c opt --linkopt=-s \
      --define MEDIAPIPE_DISABLE_GPU=1 --define no_aws_support=true \
      mediapipe/examples/desktop/youtube8m:extract_yt8m_features

@ -126,13 +128,13 @@ the inference for both local videos and the dataset
 2.  Build the inference binary.

    ```bash
-    bazel build -c opt --define='MEDIAPIPE_DISABLE_GPU=1' \
+    bazel build -c opt --define='MEDIAPIPE_DISABLE_GPU=1' --linkopt=-s \
      mediapipe/examples/desktop/youtube8m:model_inference
    ```

 3.  Run the python web server.

-    Note: pip install absl-py
+    Note: pip3 install absl-py

    ```bash
    python mediapipe/examples/desktop/youtube8m/viewer/server.py --root `pwd`
@ -162,7 +164,7 @@ the inference for both local videos and the dataset
 3.  Build and run the inference binary.

    ```bash
-    bazel build -c opt --define='MEDIAPIPE_DISABLE_GPU=1' \
+    bazel build -c opt --define='MEDIAPIPE_DISABLE_GPU=1' --linkopt=-s \
      mediapipe/examples/desktop/youtube8m:model_inference

    # segment_size is the number of seconds window of frames.
--- a/mediapipe/examples/android/src/java/com/google/mediapipe/apps/objectdetection3d/AndroidManifest.xml
+++ b/mediapipe/examples/android/src/java/com/google/mediapipe/apps/objectdetection3d/AndroidManifest.xml
@ -0,0 +1,33 @@
+<?xml version="1.0" encoding="utf-8"?>
+<manifest xmlns:android="http://schemas.android.com/apk/res/android"
+    package="com.google.mediapipe.apps.objectdetection3d">
+
+  <uses-sdk
+      android:minSdkVersion="21"
+      android:targetSdkVersion="27" />
+
+  <!-- For using the camera -->
+  <uses-permission android:name="android.permission.CAMERA" />
+  <uses-feature android:name="android.hardware.camera" />
+  <uses-feature android:name="android.hardware.camera.autofocus" />
+  <!-- For MediaPipe -->
+  <uses-feature android:glEsVersion="0x00020000" android:required="true" />
+
+
+  <application
+      android:allowBackup="true"
+      android:label="@string/app_name"
+      android:supportsRtl="true"
+      android:theme="@style/AppTheme">
+      <activity
+          android:name=".MainActivity"
+          android:exported="true"
+          android:screenOrientation="portrait">
+          <intent-filter>
+              <action android:name="android.intent.action.MAIN" />
+              <category android:name="android.intent.category.LAUNCHER" />
+          </intent-filter>
+      </activity>
+  </application>
+
+</manifest>
--- a/mediapipe/examples/android/src/java/com/google/mediapipe/apps/objectdetection3d/BUILD
+++ b/mediapipe/examples/android/src/java/com/google/mediapipe/apps/objectdetection3d/BUILD
@ -0,0 +1,115 @@
+# Copyright 2019 The MediaPipe Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+licenses(["notice"])  # Apache 2.0
+
+package(default_visibility = ["//visibility:private"])
+
+cc_binary(
+    name = "libmediapipe_jni.so",
+    linkshared = 1,
+    linkstatic = 1,
+    deps = [
+        "//mediapipe/graphs/object_detection_3d:mobile_calculators",
+        "//mediapipe/java/com/google/mediapipe/framework/jni:mediapipe_framework_jni",
+    ],
+)
+
+cc_library(
+    name = "mediapipe_jni_lib",
+    srcs = [":libmediapipe_jni.so"],
+    alwayslink = 1,
+)
+
+# To use the "chair" model instead of the default "shoes" model,
+# add "--define chair=true" to the bazel build command.
+config_setting(
+    name = "use_chair_model",
+    define_values = {
+        "chair": "true",
+    },
+)
+
+# Maps the binary graph to an alias (e.g., the app name) for convenience so that the alias can be
+# easily incorporated into the app via, for example,
+# MainActivity.BINARY_GRAPH_NAME = "appname.binarypb".
+genrule(
+    name = "binary_graph",
+    srcs = select({
+        "//conditions:default": ["//mediapipe/graphs/object_detection_3d:mobile_gpu_binary_graph_shoe"],
+        ":use_chair_model": ["//mediapipe/graphs/object_detection_3d:mobile_gpu_binary_graph_chair"],
+    }),
+    outs = ["objectdetection3d.binarypb"],
+    cmd = "cp $< $@",
+)
+
+genrule(
+    name = "model",
+    srcs = select({
+        "//conditions:default": ["//mediapipe/models:object_detection_3d_sneakers.tflite"],
+        ":use_chair_model": ["//mediapipe/models:object_detection_3d_chair.tflite"],
+    }),
+    outs = ["object_detection_3d.tflite"],
+    cmd = "cp $< $@",
+)
+
+android_library(
+    name = "mediapipe_lib",
+    srcs = glob(["*.java"]),
+    assets = [
+        ":binary_graph",
+        ":model",
+        "//mediapipe/examples/android/src/java/com/google/mediapipe/apps/objectdetection3d/assets:box.obj.uuu",
+        "//mediapipe/examples/android/src/java/com/google/mediapipe/apps/objectdetection3d/assets:classic_colors.png",
+    ] + select({
+        "//conditions:default": [
+            "//mediapipe/examples/android/src/java/com/google/mediapipe/apps/objectdetection3d/assets/sneaker:model.obj.uuu",
+            "//mediapipe/examples/android/src/java/com/google/mediapipe/apps/objectdetection3d/assets/sneaker:texture.bmp",
+        ],
+        ":use_chair_model": [
+            "//mediapipe/examples/android/src/java/com/google/mediapipe/apps/objectdetection3d/assets/chair:model.obj.uuu",
+            "//mediapipe/examples/android/src/java/com/google/mediapipe/apps/objectdetection3d/assets/chair:texture.bmp",
+        ],
+    }),
+    assets_dir = "",
+    manifest = "AndroidManifest.xml",
+    resource_files = glob(["res/**"]),
+    deps = [
+        ":mediapipe_jni_lib",
+        "//mediapipe/framework/formats:landmark_java_proto_lite",
+        "//mediapipe/java/com/google/mediapipe/components:android_camerax_helper",
+        "//mediapipe/java/com/google/mediapipe/components:android_components",
+        "//mediapipe/java/com/google/mediapipe/framework:android_framework",
+        "//mediapipe/java/com/google/mediapipe/glutil",
+        "//third_party:androidx_appcompat",
+        "//third_party:androidx_constraint_layout",
+        "//third_party:androidx_legacy_support_v4",
+        "//third_party:androidx_recyclerview",
+        "//third_party:opencv",
+        "@androidx_concurrent_futures//jar",
+        "@androidx_lifecycle//jar",
+        "@com_google_code_findbugs//jar",
+        "@com_google_guava_android//jar",
+    ],
+)
+
+android_binary(
+    name = "objectdetection3d",
+    manifest = "AndroidManifest.xml",
+    manifest_values = {"applicationId": "com.google.mediapipe.apps.objectdetection3d"},
+    multidex = "native",
+    deps = [
+        ":mediapipe_lib",
+    ],
+)
--- a/mediapipe/examples/android/src/java/com/google/mediapipe/apps/objectdetection3d/MainActivity.java
+++ b/mediapipe/examples/android/src/java/com/google/mediapipe/apps/objectdetection3d/MainActivity.java
@ -0,0 +1,280 @@
+// Copyright 2019 The MediaPipe Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package com.google.mediapipe.apps.objectdetection3d;
+
+import android.graphics.Bitmap;
+import android.graphics.BitmapFactory;
+import android.graphics.SurfaceTexture;
+import android.os.Bundle;
+import androidx.appcompat.app.AppCompatActivity;
+import android.util.Log;
+import android.util.Size;
+import android.view.SurfaceHolder;
+import android.view.SurfaceView;
+import android.view.View;
+import android.view.ViewGroup;
+import com.google.mediapipe.components.CameraHelper;
+import com.google.mediapipe.components.CameraXPreviewHelper;
+import com.google.mediapipe.components.ExternalTextureConverter;
+import com.google.mediapipe.components.FrameProcessor;
+import com.google.mediapipe.components.PermissionHelper;
+import com.google.mediapipe.framework.AndroidAssetUtil;
+import com.google.mediapipe.framework.AndroidPacketCreator;
+import com.google.mediapipe.framework.Packet;
+import com.google.mediapipe.glutil.EglManager;
+import java.io.InputStream;
+import java.util.HashMap;
+import java.util.Map;
+
+/** Main activity of MediaPipe example apps. */
+public class MainActivity extends AppCompatActivity {
+  private static final String TAG = "MainActivity";
+
+  private static final String BINARY_GRAPH_NAME = "objectdetection3d.binarypb";
+  private static final String INPUT_VIDEO_STREAM_NAME = "input_video";
+  private static final String OUTPUT_VIDEO_STREAM_NAME = "output_video";
+
+  private static final String OBJ_TEXTURE = "texture.bmp";
+  private static final String OBJ_FILE = "model.obj.uuu";
+  private static final String BOX_TEXTURE = "classic_colors.png";
+  private static final String BOX_FILE = "box.obj.uuu";
+
+  private static final CameraHelper.CameraFacing CAMERA_FACING = CameraHelper.CameraFacing.BACK;
+
+  // Flips the camera-preview frames vertically before sending them into FrameProcessor to be
+  // processed in a MediaPipe graph, and flips the processed frames back when they are displayed.
+  // This is needed because OpenGL represents images assuming the image origin is at the bottom-left
+  // corner, whereas MediaPipe in general assumes the image origin is at top-left.
+  private static final boolean FLIP_FRAMES_VERTICALLY = true;
+
+  // Target resolution should be 4:3 for this application, as expected by the model and tracker.
+  private static final Size TARGET_RESOLUTION = new Size(1280, 960);
+
+  static {
+    // Load all native libraries needed by the app.
+    System.loadLibrary("mediapipe_jni");
+    System.loadLibrary("opencv_java3");
+  }
+
+  // {@link SurfaceTexture} where the camera-preview frames can be accessed.
+  private SurfaceTexture previewFrameTexture;
+  // {@link SurfaceView} that displays the camera-preview frames processed by a MediaPipe graph.
+  private SurfaceView previewDisplayView;
+
+  // Creates and manages an {@link EGLContext}.
+  private EglManager eglManager;
+  // Sends camera-preview frames into a MediaPipe graph for processing, and displays the processed
+  // frames onto a {@link Surface}.
+  private FrameProcessor processor;
+  // Converts the GL_TEXTURE_EXTERNAL_OES texture from Android camera into a regular texture to be
+  // consumed by {@link FrameProcessor} and the underlying MediaPipe graph.
+  private ExternalTextureConverter converter;
+
+  // Handles camera access via the {@link CameraX} Jetpack support library.
+  private CameraXPreviewHelper cameraHelper;
+
+  // Assets.
+  private Bitmap objTexture = null;
+  private Bitmap boxTexture = null;
+
+  Size cameraImageSize;
+
+  @Override
+  protected void onCreate(Bundle savedInstanceState) {
+    super.onCreate(savedInstanceState);
+    setContentView(R.layout.activity_main);
+
+    previewDisplayView = new SurfaceView(this);
+    setupPreviewDisplayView();
+
+    // Initialize asset manager so that MediaPipe native libraries can access the app assets, e.g.,
+    // binary graphs.
+    AndroidAssetUtil.initializeNativeAssetManager(this);
+
+    eglManager = new EglManager(null);
+    processor =
+        new FrameProcessor(
+            this,
+            eglManager.getNativeContext(),
+            BINARY_GRAPH_NAME,
+            INPUT_VIDEO_STREAM_NAME,
+            OUTPUT_VIDEO_STREAM_NAME);
+    processor.getVideoSurfaceOutput().setFlipY(FLIP_FRAMES_VERTICALLY);
+
+    prepareDemoAssets();
+    AndroidPacketCreator packetCreator = processor.getPacketCreator();
+    Map<String, Packet> inputSidePackets = new HashMap<>();
+    inputSidePackets.put("obj_asset_name", packetCreator.createString(OBJ_FILE));
+    inputSidePackets.put("box_asset_name", packetCreator.createString(BOX_FILE));
+    inputSidePackets.put("obj_texture", packetCreator.createRgbaImageFrame(objTexture));
+    inputSidePackets.put("box_texture", packetCreator.createRgbaImageFrame(boxTexture));
+    processor.setInputSidePackets(inputSidePackets);
+
+    PermissionHelper.checkAndRequestCameraPermissions(this);
+  }
+
+  @Override
+  protected void onResume() {
+    super.onResume();
+    converter = new ExternalTextureConverter(eglManager.getContext());
+    converter.setFlipY(FLIP_FRAMES_VERTICALLY);
+    converter.setConsumer(processor);
+    if (PermissionHelper.cameraPermissionsGranted(this)) {
+      startCamera();
+    }
+  }
+
+  @Override
+  protected void onPause() {
+    super.onPause();
+    converter.close();
+  }
+
+  @Override
+  public void onRequestPermissionsResult(
+      int requestCode, String[] permissions, int[] grantResults) {
+    super.onRequestPermissionsResult(requestCode, permissions, grantResults);
+    PermissionHelper.onRequestPermissionsResult(requestCode, permissions, grantResults);
+  }
+
+  private void setupPreviewDisplayView() {
+    previewDisplayView.setVisibility(View.GONE);
+    ViewGroup viewGroup = findViewById(R.id.preview_display_layout);
+    viewGroup.addView(previewDisplayView);
+
+    previewDisplayView
+        .getHolder()
+        .addCallback(
+            new SurfaceHolder.Callback() {
+              @Override
+              public void surfaceCreated(SurfaceHolder holder) {
+                processor.getVideoSurfaceOutput().setSurface(holder.getSurface());
+              }
+
+              @Override
+              public void surfaceChanged(SurfaceHolder holder, int format, int width, int height) {
+                // (Re-)Compute the ideal size of the camera-preview display (the area that the
+                // camera-preview frames get rendered onto, potentially with scaling and rotation)
+                // based on the size of the SurfaceView that contains the display.
+                Size viewSize = new Size(height, height * 3 / 4); // Prefer 3:4 aspect ratio.
+                Size displaySize = cameraHelper.computeDisplaySizeFromViewSize(viewSize);
+                boolean isCameraRotated = cameraHelper.isCameraRotated();
+                cameraImageSize = cameraHelper.getFrameSize();
+
+                // Connect the converter to the camera-preview frames as its input (via
+                // previewFrameTexture), and configure the output width and height as the computed
+                // display size.
+                converter.setSurfaceTextureAndAttachToGLContext(
+                    previewFrameTexture,
+                    isCameraRotated ? displaySize.getHeight() : displaySize.getWidth(),
+                    isCameraRotated ? displaySize.getWidth() : displaySize.getHeight());
+                processor.setOnWillAddFrameListener(
+                    (timestamp) -> {
+                      try {
+                        int cameraTextureWidth =
+                            isCameraRotated
+                                ? cameraImageSize.getHeight()
+                                : cameraImageSize.getWidth();
+                        int cameraTextureHeight =
+                            isCameraRotated
+                                ? cameraImageSize.getWidth()
+                                : cameraImageSize.getHeight();
+
+                        // Find limiting side and scale to 3:4 aspect ratio
+                        float aspectRatio =
+                            (float) cameraTextureWidth / (float) cameraTextureHeight;
+                        if (aspectRatio > 3.0 / 4.0) {
+                          // width too big
+                          cameraTextureWidth = (int) ((float) cameraTextureHeight * 3.0 / 4.0);
+                        } else {
+                          // height too big
+                          cameraTextureHeight = (int) ((float) cameraTextureWidth * 4.0 / 3.0);
+                        }
+                        Packet widthPacket =
+                            processor.getPacketCreator().createInt32(cameraTextureWidth);
+                        Packet heightPacket =
+                            processor.getPacketCreator().createInt32(cameraTextureHeight);
+
+                        try {
+                          processor
+                              .getGraph()
+                              .addPacketToInputStream("input_width", widthPacket, timestamp);
+                          processor
+                              .getGraph()
+                              .addPacketToInputStream("input_height", heightPacket, timestamp);
+                        } catch (Exception e) {
+                          Log.e(
+                              TAG,
+                              "MediaPipeException encountered adding packets to width and height"
+                                  + " input streams.");
+                        }
+                        widthPacket.release();
+                        heightPacket.release();
+                      } catch (IllegalStateException ise) {
+                        Log.e(
+                            TAG,
+                            "Exception while adding packets to width and height input streams.");
+                      }
+                    });
+              }
+
+              @Override
+              public void surfaceDestroyed(SurfaceHolder holder) {
+                processor.getVideoSurfaceOutput().setSurface(null);
+              }
+            });
+  }
+
+  private void startCamera() {
+    cameraHelper = new CameraXPreviewHelper();
+    cameraHelper.setOnCameraStartedListener(
+        surfaceTexture -> {
+          previewFrameTexture = surfaceTexture;
+          // Make the display view visible to start showing the preview. This triggers the
+          // SurfaceHolder.Callback added to (the holder of) previewDisplayView.
+          previewDisplayView.setVisibility(View.VISIBLE);
+        });
+    cameraHelper.startCamera(
+        this, CAMERA_FACING, /*surfaceTexture=*/ null, /*targetSize=*/ TARGET_RESOLUTION);
+    cameraImageSize = cameraHelper.getFrameSize();
+  }
+
+  private void prepareDemoAssets() {
+    AndroidAssetUtil.initializeNativeAssetManager(this);
+    // We render from raw data with openGL, so disable decoding preprocessing
+    BitmapFactory.Options decodeOptions = new BitmapFactory.Options();
+    decodeOptions.inScaled = false;
+    decodeOptions.inDither = false;
+    decodeOptions.inPremultiplied = false;
+
+    try {
+      InputStream inputStream = getAssets().open(OBJ_TEXTURE);
+      objTexture = BitmapFactory.decodeStream(inputStream, null /*outPadding*/, decodeOptions);
+      inputStream.close();
+    } catch (Exception e) {
+      Log.e(TAG, "Error parsing object texture; error: " + e);
+      throw new IllegalStateException(e);
+    }
+
+    try {
+      InputStream inputStream = getAssets().open(BOX_TEXTURE);
+      boxTexture = BitmapFactory.decodeStream(inputStream, null /*outPadding*/, decodeOptions);
+      inputStream.close();
+    } catch (Exception e) {
+      Log.e(TAG, "Error parsing box texture; error: " + e);
+      throw new RuntimeException(e);
+    }
+  }
+}
--- a/mediapipe/examples/android/src/java/com/google/mediapipe/apps/objectdetection3d/assets/BUILD
+++ b/mediapipe/examples/android/src/java/com/google/mediapipe/apps/objectdetection3d/assets/BUILD
@ -0,0 +1,21 @@
+# Copyright 2019 The MediaPipe Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+licenses(["notice"])  # Apache 2.0
+
+package(default_visibility = ["//visibility:public"])
+
+exports_files(
+    srcs = glob(["**"]),
+)
--- a/mediapipe/examples/android/src/java/com/google/mediapipe/apps/objectdetection3d/assets/box.obj.uuu
+++ b/mediapipe/examples/android/src/java/com/google/mediapipe/apps/objectdetection3d/assets/box.obj.uuu
--- a/mediapipe/examples/android/src/java/com/google/mediapipe/apps/objectdetection3d/assets/chair/BUILD
+++ b/mediapipe/examples/android/src/java/com/google/mediapipe/apps/objectdetection3d/assets/chair/BUILD
@ -0,0 +1,21 @@
+# Copyright 2019 The MediaPipe Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+licenses(["notice"])  # Apache 2.0
+
+package(default_visibility = ["//visibility:public"])
+
+exports_files(
+    srcs = glob(["**"]),
+)
--- a/mediapipe/examples/android/src/java/com/google/mediapipe/apps/objectdetection3d/assets/chair/model.obj.uuu
+++ b/mediapipe/examples/android/src/java/com/google/mediapipe/apps/objectdetection3d/assets/chair/model.obj.uuu
--- a/mediapipe/examples/android/src/java/com/google/mediapipe/apps/objectdetection3d/assets/chair/texture.bmp
+++ b/mediapipe/examples/android/src/java/com/google/mediapipe/apps/objectdetection3d/assets/chair/texture.bmp
--- a/mediapipe/examples/android/src/java/com/google/mediapipe/apps/objectdetection3d/assets/classic_colors.png
+++ b/mediapipe/examples/android/src/java/com/google/mediapipe/apps/objectdetection3d/assets/classic_colors.png
--- a/mediapipe/examples/android/src/java/com/google/mediapipe/apps/objectdetection3d/assets/colors.bmp
+++ b/mediapipe/examples/android/src/java/com/google/mediapipe/apps/objectdetection3d/assets/colors.bmp
--- a/mediapipe/examples/android/src/java/com/google/mediapipe/apps/objectdetection3d/assets/sneaker/BUILD
+++ b/mediapipe/examples/android/src/java/com/google/mediapipe/apps/objectdetection3d/assets/sneaker/BUILD
@ -0,0 +1,21 @@
+# Copyright 2019 The MediaPipe Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+licenses(["notice"])  # Apache 2.0
+
+package(default_visibility = ["//visibility:public"])
+
+exports_files(
+    srcs = glob(["**"]),
+)
--- a/mediapipe/examples/android/src/java/com/google/mediapipe/apps/objectdetection3d/assets/sneaker/model.obj.uuu
+++ b/mediapipe/examples/android/src/java/com/google/mediapipe/apps/objectdetection3d/assets/sneaker/model.obj.uuu
--- a/mediapipe/examples/android/src/java/com/google/mediapipe/apps/objectdetection3d/assets/sneaker/texture.bmp
+++ b/mediapipe/examples/android/src/java/com/google/mediapipe/apps/objectdetection3d/assets/sneaker/texture.bmp
--- a/mediapipe/examples/android/src/java/com/google/mediapipe/apps/objectdetection3d/res/layout/activity_main.xml
+++ b/mediapipe/examples/android/src/java/com/google/mediapipe/apps/objectdetection3d/res/layout/activity_main.xml
@ -0,0 +1,20 @@
+<?xml version="1.0" encoding="utf-8"?>
+<androidx.constraintlayout.widget.ConstraintLayout xmlns:android="http://schemas.android.com/apk/res/android"
+    xmlns:app="http://schemas.android.com/apk/res-auto"
+    xmlns:tools="http://schemas.android.com/tools"
+    android:layout_width="match_parent"
+    android:layout_height="match_parent">
+
+    <FrameLayout
+        android:id="@+id/preview_display_layout"
+        android:layout_width="fill_parent"
+        android:layout_height="fill_parent"
+        android:layout_weight="1">
+        <TextView
+            android:id="@+id/no_camera_access_view"
+            android:layout_height="fill_parent"
+            android:layout_width="fill_parent"
+            android:gravity="center"
+            android:text="@string/no_camera_access" />
+    </FrameLayout>
+</androidx.constraintlayout.widget.ConstraintLayout>
--- a/mediapipe/examples/android/src/java/com/google/mediapipe/apps/objectdetection3d/res/values/colors.xml
+++ b/mediapipe/examples/android/src/java/com/google/mediapipe/apps/objectdetection3d/res/values/colors.xml
@ -0,0 +1,6 @@
+<?xml version="1.0" encoding="utf-8"?>
+<resources>
+    <color name="colorPrimary">#008577</color>
+    <color name="colorPrimaryDark">#00574B</color>
+    <color name="colorAccent">#D81B60</color>
+</resources>
--- a/mediapipe/examples/android/src/java/com/google/mediapipe/apps/objectdetection3d/res/values/strings.xml
+++ b/mediapipe/examples/android/src/java/com/google/mediapipe/apps/objectdetection3d/res/values/strings.xml
@ -0,0 +1,4 @@
+<resources>
+    <string name="app_name" translatable="false">Object Detection 3D</string>
+    <string name="no_camera_access" translatable="false">Please grant camera permissions.</string>
+</resources>
--- a/mediapipe/examples/android/src/java/com/google/mediapipe/apps/objectdetection3d/res/values/styles.xml
+++ b/mediapipe/examples/android/src/java/com/google/mediapipe/apps/objectdetection3d/res/values/styles.xml
@ -0,0 +1,11 @@
+<resources>
+
+    <!-- Base application theme. -->
+    <style name="AppTheme" parent="Theme.AppCompat.Light.DarkActionBar">
+        <!-- Customize your theme here. -->
+        <item name="colorPrimary">@color/colorPrimary</item>
+        <item name="colorPrimaryDark">@color/colorPrimaryDark</item>
+        <item name="colorAccent">@color/colorAccent</item>
+    </style>
+
+</resources>
--- a/mediapipe/examples/coral/Dockerfile
+++ b/mediapipe/examples/coral/Dockerfile
@ -63,7 +63,7 @@ COPY . /mediapipe/

 # Install bazel

-ARG BAZEL_VERSION=0.29.1
+ARG BAZEL_VERSION=1.1.0
 RUN mkdir /bazel && \
    wget --no-check-certificate -O /bazel/installer.sh "https://github.com/bazelbuild/bazel/releases/download/${BAZEL_VERSION}/bazel-${BAZEL_VERSION}-installer-linux-x86_64.sh" && \
    wget --no-check-certificate -O  /bazel/LICENSE.txt "https://raw.githubusercontent.com/bazelbuild/bazel/master/LICENSE" && \
--- a/mediapipe/examples/coral/README.md
+++ b/mediapipe/examples/coral/README.md
@ -1,9 +1,11 @@
 # Coral Dev Board Setup (experimental)

-**Dislaimer**: Running MediaPipe on Coral is experimental, and this process may
+**Disclaimer**: Running MediaPipe on Coral is experimental, and this process may
 not be exact and is subject to change. These instructions have only been tested
-on the [Coral Dev Board](https://coral.ai/products/dev-board/) with Mendel 4.0,
-and may vary for different devices and workstations.
+on the [Coral Dev Board](https://coral.ai/products/dev-board/)
+running [Mendel Enterprise Day 13](https://coral.ai/software/) OS and
+using [Diploria2](https://github.com/google-coral/edgetpu/tree/diploria2)
+edgetpu libs, and may vary for different devices and workstations.

 This file describes how to prepare a Coral Dev Board and setup a Linux
 Docker container for building MediaPipe applications that run on Edge TPU.
@ -16,10 +18,12 @@ Docker container for building MediaPipe applications that run on Edge TPU.

 * Setup the coral device via [here](https://coral.withgoogle.com/docs/dev-board/get-started/), and ensure the _mdt_ command works

+        Note:   alias mdt="python3 -m mdt.main"    may be needed on some systems
+
 * (on coral device) prepare MediaPipe

        cd ~
-        sudo apt-get install -y git
+        sudo apt-get update && sudo apt-get install -y git
        git clone https://github.com/google/mediapipe.git
        mkdir mediapipe/bazel-bin

--- a/mediapipe/examples/coral/WORKSPACE
+++ b/mediapipe/examples/coral/WORKSPACE
@ -10,19 +10,25 @@ http_archive(
    sha256 = "2ef429f5d7ce7111263289644d233707dba35e39696377ebab8b0bc701f7818e",
 )
 load("@bazel_skylib//lib:versions.bzl", "versions")
-versions.check(minimum_bazel_version = "0.24.1")
+versions.check(minimum_bazel_version = "1.0.0",
+               maximum_bazel_version = "1.2.1")

-# ABSL cpp library.
+
+# ABSL cpp library lts_2020_02_25
 http_archive(
    name = "com_google_absl",
-    # Head commit on 2019-04-12.
-    # TODO: Switch to the latest absl version when the problem gets
-    # fixed.
    urls = [
-        "https://github.com/abseil/abseil-cpp/archive/a02f62f456f2c4a7ecf2be3104fe0c6e16fbad9a.tar.gz",
+        "https://github.com/abseil/abseil-cpp/archive/20200225.tar.gz",
    ],
-    sha256 = "d437920d1434c766d22e85773b899c77c672b8b4865d5dc2cd61a29fdff3cf03",
-    strip_prefix = "abseil-cpp-a02f62f456f2c4a7ecf2be3104fe0c6e16fbad9a",
+    # Remove after https://github.com/abseil/abseil-cpp/issues/326 is solved.
+    patches = [
+        "@//third_party:com_google_absl_f863b622fe13612433fdf43f76547d5edda0c93001.diff"
+    ],
+    patch_args = [
+        "-p1",
+    ],
+    strip_prefix = "abseil-cpp-20200225",
+    sha256 = "728a813291bdec2aa46eab8356ace9f75ac2ed9dfe2df5ab603c4e6c09f1c353"
 )

 http_archive(
@ -72,6 +78,14 @@ http_archive(
    ],
 )

+# easyexif
+http_archive(
+    name = "easyexif",
+    url = "https://github.com/mayanklahiri/easyexif/archive/master.zip",
+    strip_prefix = "easyexif-master",
+    build_file = "@//third_party:easyexif.BUILD",
+)
+
 # libyuv
 http_archive(
    name = "libyuv",
@ -103,15 +117,23 @@ http_archive(
    ],
 )

-# 2019-11-12
-_TENSORFLOW_GIT_COMMIT = "a5f9bcd64453ff3d1f64cb4da4786db3d2da7f82"
-_TENSORFLOW_SHA256= "f2b6f2ab2ffe63e86eccd3ce4bea6b7197383d726638dfeeebcdc1e7de73f075"
+# 2020-02-12
+# The last commit before TensorFlow switched to Bazel 2.0
+_TENSORFLOW_GIT_COMMIT = "77e9ffb9b2bfb1a4f7056e62d84039626923e328"
+_TENSORFLOW_SHA256= "176ccd82f7dd17c5e117b50d353603b129c7a6ccbfebd522ca47cc2a40f33f13"
 http_archive(
    name = "org_tensorflow",
    urls = [
      "https://mirror.bazel.build/github.com/tensorflow/tensorflow/archive/%s.tar.gz" % _TENSORFLOW_GIT_COMMIT,
      "https://github.com/tensorflow/tensorflow/archive/%s.tar.gz" % _TENSORFLOW_GIT_COMMIT,
    ],
+    # A compatibility patch
+    patches = [
+        "@//third_party:org_tensorflow_528e22eae8bf3206189a066032c66e9e5c9b4a61.diff"
+    ],
+    patch_args = [
+        "-p1",
+    ],
    strip_prefix = "tensorflow-%s" % _TENSORFLOW_GIT_COMMIT,
    sha256 = _TENSORFLOW_SHA256,
 )
@ -119,8 +141,22 @@ http_archive(
 load("@org_tensorflow//tensorflow:workspace.bzl", "tf_workspace")
 tf_workspace(tf_repo_name = "org_tensorflow")

+http_archive(
+    name = "ceres_solver",
+    url = "https://github.com/ceres-solver/ceres-solver/archive/1.14.0.zip",
+    patches = [
+        "@//third_party:ceres_solver_9bf9588988236279e1262f75d7f4d85711dfa172.diff"
+    ],
+    patch_args = [
+        "-p1",
+    ],
+    strip_prefix = "ceres-solver-1.14.0",
+    sha256 = "5ba6d0db4e784621fda44a50c58bb23b0892684692f0c623e2063f9c19f192f1"
+)
+
 # Please run
 # $ sudo apt-get install libopencv-core-dev libopencv-highgui-dev \
+#                        libopencv-calib3d-dev libopencv-features2d-dev \
 #                        libopencv-imgproc-dev libopencv-video-dev
 new_local_repository(
    name = "linux_opencv",
@ -149,11 +185,10 @@ new_local_repository(

 http_archive(
    name = "android_opencv",
-    sha256 = "056b849842e4fa8751d09edbb64530cfa7a63c84ccd232d0ace330e27ba55d0b",
    build_file = "@//third_party:opencv_android.BUILD",
    strip_prefix = "OpenCV-android-sdk",
    type = "zip",
-    url = "https://github.com/opencv/opencv/releases/download/4.1.0/opencv-4.1.0-android-sdk.zip",
+    url = "https://github.com/opencv/opencv/releases/download/3.4.3/opencv-3.4.3-android-sdk.zip",
 )

 # After OpenCV 3.2.0, the pre-compiled opencv2.framework has google protobuf symbols, which will
@ -184,13 +219,18 @@ maven_install(
    artifacts = [
        "androidx.annotation:annotation:aar:1.1.0",
        "androidx.appcompat:appcompat:aar:1.1.0-rc01",
+        "androidx.camera:camera-core:aar:1.0.0-alpha06",
+        "androidx.camera:camera-camera2:aar:1.0.0-alpha06",
        "androidx.constraintlayout:constraintlayout:aar:1.1.3",
        "androidx.core:core:aar:1.1.0-rc03",
        "androidx.legacy:legacy-support-v4:aar:1.0.0",
        "androidx.recyclerview:recyclerview:aar:1.1.0-beta02",
        "com.google.android.material:material:aar:1.0.0-rc01",
    ],
-    repositories = ["https://dl.google.com/dl/android/maven2"],
+    repositories = [
+        "https://dl.google.com/dl/android/maven2",
+        "https://repo1.maven.org/maven2",
+    ],
 )

 maven_server(
@ -206,10 +246,10 @@ maven_jar(
 )

 maven_jar(
-     name = "androidx_concurrent_futures",
-     artifact = "androidx.concurrent:concurrent-futures:1.0.0-alpha03",
-     sha1 = "b528df95c7e2fefa2210c0c742bf3e491c1818ae",
-     server = "google_server",
+    name = "androidx_concurrent_futures",
+    artifact = "androidx.concurrent:concurrent-futures:1.0.0-alpha03",
+    sha1 = "b528df95c7e2fefa2210c0c742bf3e491c1818ae",
+    server = "google_server",
 )

 maven_jar(
@ -285,10 +325,13 @@ http_archive(
    build_file = "@//third_party:google_toolbox_for_mac.BUILD",
 )

+### Coral ###

-# Coral
 #COMMIT=$(git ls-remote https://github.com/google-coral/crosstool master | awk '{print $1}')
 #SHA256=$(curl -L "https://github.com/google-coral/crosstool/archive/${COMMIT}.tar.gz" | sha256sum | awk '{print $1}')
+# Oct 2019
+#COMMIT=9e00d5be43bf001f883b5700f5d04882fea00229
+#SHA256=cb31b1417ccdcf7dd9fca5ec63e1571672372c30427730255997a547569d2feb
 http_archive(
    name = "coral_crosstool",
    sha256 = "cb31b1417ccdcf7dd9fca5ec63e1571672372c30427730255997a547569d2feb",
--- a/mediapipe/examples/coral/setup.sh
+++ b/mediapipe/examples/coral/setup.sh
@ -8,7 +8,7 @@ echo '  sh mediapipe/examples/coral/setup.sh  '

 sleep 3

-mkdir opencv32_arm64_libs
+mkdir -p opencv32_arm64_libs

 cp mediapipe/examples/coral/update_sources.sh update_sources.sh
 chmod +x update_sources.sh
--- a/mediapipe/examples/desktop/autoflip/README.md
+++ b/mediapipe/examples/desktop/autoflip/README.md
@ -11,6 +11,8 @@

 2.  Build and run the run_autoflip binary to process a local video.

+Note: AutoFlip currently only works with OpenCV 3 . Please verify your OpenCV version beforehand.
+
    ```bash
    bazel build -c opt --define MEDIAPIPE_DISABLE_GPU=1 \
      mediapipe/examples/desktop/autoflip:run_autoflip
--- a/mediapipe/examples/desktop/media_sequence/charades_dataset.py
+++ b/mediapipe/examples/desktop/media_sequence/charades_dataset.py
@ -63,12 +63,15 @@ import random
 import subprocess
 import sys
 import tempfile
-import urllib
 import zipfile
+
 from absl import app
 from absl import flags
 from absl import logging
+from six.moves import range
+from six.moves import urllib
 import tensorflow.compat.v1 as tf
+
 from mediapipe.util.sequence import media_sequence as ms


@ -218,7 +221,7 @@ class Charades(object):
      return output_dict

    if split not in SPLITS:
-      raise ValueError("Split %s not in %s" % split, str(SPLITS.keys()))
+      raise ValueError("Split %s not in %s" % split, str(list(SPLITS.keys())))
    all_shards = tf.io.gfile.glob(
        os.path.join(self.path_to_data, SPLITS[split][0] + "-*-of-*"))
    random.shuffle(all_shards)
@ -329,7 +332,7 @@ class Charades(object):
    if sys.version_info >= (3, 0):
      urlretrieve = urllib.request.urlretrieve
    else:
-      urlretrieve = urllib.urlretrieve
+      urlretrieve = urllib.request.urlretrieve
    logging.info("Creating data directory.")
    tf.io.gfile.makedirs(self.path_to_data)
    logging.info("Downloading license.")
--- a/mediapipe/examples/desktop/media_sequence/demo_dataset.py
+++ b/mediapipe/examples/desktop/media_sequence/demo_dataset.py
@ -57,11 +57,12 @@ import random
 import subprocess
 import sys
 import tempfile
-import urllib

 from absl import app
 from absl import flags
 from absl import logging
+from six.moves import range
+from six.moves import urllib
 import tensorflow.compat.v1 as tf

 from mediapipe.util.sequence import media_sequence as ms
@ -198,7 +199,7 @@ class DemoDataset(object):
    if sys.version_info >= (3, 0):
      urlretrieve = urllib.request.urlretrieve
    else:
-      urlretrieve = urllib.urlretrieve
+      urlretrieve = urllib.request.urlretrieve
    for split in SPLITS:
      reader = csv.DictReader(SPLITS[split].split("\n"))
      all_metadata = []
--- a/mediapipe/examples/desktop/media_sequence/kinetics_dataset.py
+++ b/mediapipe/examples/desktop/media_sequence/kinetics_dataset.py
@ -73,11 +73,13 @@ import subprocess
 import sys
 import tarfile
 import tempfile
-import urllib

 from absl import app
 from absl import flags
 from absl import logging
+from six.moves import range
+from six.moves import urllib
+from six.moves import zip
 import tensorflow.compat.v1 as tf

 from mediapipe.util.sequence import media_sequence as ms
@ -96,15 +98,15 @@ FILEPATTERN = "kinetics_700_%s_25fps_rgb_flow"
 SPLITS = {
    "train": {
        "shards": 1000,
-        "examples": 540247
+        "examples": 538779
    },
    "validate": {
        "shards": 100,
-        "examples": 34610
+        "examples": 34499
    },
    "test": {
        "shards": 100,
-        "examples": 69103
+        "examples": 68847
    },
    "custom": {
        "csv": None,  # Add a CSV for your own data here.
@ -198,7 +200,7 @@ class Kinetics(object):
      return output_dict

    if split not in SPLITS:
-      raise ValueError("Split %s not in %s" % split, str(SPLITS.keys()))
+      raise ValueError("Split %s not in %s" % split, str(list(SPLITS.keys())))
    all_shards = tf.io.gfile.glob(
        os.path.join(self.path_to_data, FILEPATTERN % split + "-*-of-*"))
    random.shuffle(all_shards)
@ -302,11 +304,12 @@ class Kinetics(object):
          continue
        # rename the row with a constitent set of names.
        if len(csv_row) == 5:
-          row = dict(zip(["label_name", "video", "start", "end", "split"],
-                         csv_row))
+          row = dict(
+              list(
+                  zip(["label_name", "video", "start", "end", "split"],
+                      csv_row)))
        else:
-          row = dict(zip(["video", "start", "end", "split"],
-                         csv_row))
+          row = dict(list(zip(["video", "start", "end", "split"], csv_row)))
        metadata = tf.train.SequenceExample()
        ms.set_example_id(bytes23(row["video"] + "_" + row["start"]),
                          metadata)
@ -328,7 +331,7 @@ class Kinetics(object):
    if sys.version_info >= (3, 0):
      urlretrieve = urllib.request.urlretrieve
    else:
-      urlretrieve = urllib.urlretrieve
+      urlretrieve = urllib.request.urlretrieve
    logging.info("Creating data directory.")
    tf.io.gfile.makedirs(self.path_to_data)
    logging.info("Downloading annotations.")
@ -404,7 +407,7 @@ class Kinetics(object):
        assert NUM_CLASSES == num_keys, (
            "Found %d labels for split: %s, should be %d" % (
                num_keys, name, NUM_CLASSES))
-        label_map = dict(zip(classes, range(len(classes))))
+        label_map = dict(list(zip(classes, list(range(len(classes))))))
      if SPLITS[name]["examples"] > 0:
        assert SPLITS[name]["examples"] == num_examples, (
            "Found %d examples for split: %s, should be %d" % (
--- a/mediapipe/examples/desktop/youtube8m/README.md
+++ b/mediapipe/examples/desktop/youtube8m/README.md
@ -30,6 +30,8 @@
    ```bash
    # cd to the root directory of the MediaPipe repo
    cd -
+
+    pip3 install tf_slim
    python -m mediapipe.examples.desktop.youtube8m.generate_vggish_frozen_graph
    ```

@ -47,7 +49,7 @@
 5.  Run the MediaPipe binary to extract the features.

    ```bash
-    bazel build -c opt \
+    bazel build -c opt --linkopt=-s \
      --define MEDIAPIPE_DISABLE_GPU=1 --define no_aws_support=true \
      mediapipe/examples/desktop/youtube8m:extract_yt8m_features

@ -87,7 +89,7 @@
 3.  Build and run the inference binary.

    ```bash
-    bazel build -c opt --define='MEDIAPIPE_DISABLE_GPU=1' \
+    bazel build -c opt --define='MEDIAPIPE_DISABLE_GPU=1' --linkopt=-s \
    mediapipe/examples/desktop/youtube8m:model_inference

    GLOG_logtostderr=1 bazel-bin/mediapipe/examples/desktop/youtube8m/model_inference \
@ -113,13 +115,13 @@
 2.  Build the inference binary.

    ```bash
-    bazel build -c opt --define='MEDIAPIPE_DISABLE_GPU=1' \
+    bazel build -c opt --define='MEDIAPIPE_DISABLE_GPU=1' --linkopt=-s \
      mediapipe/examples/desktop/youtube8m:model_inference
    ```

 3.  Run the python web server.

-    Note: pip install absl-py
+    Note: pip3 install absl-py

    ```bash
    python mediapipe/examples/desktop/youtube8m/viewer/server.py --root `pwd`
@ -142,7 +144,7 @@
 3.  Build and run the inference binary.

    ```bash
-    bazel build -c opt --define='MEDIAPIPE_DISABLE_GPU=1' \
+    bazel build -c opt --define='MEDIAPIPE_DISABLE_GPU=1' --linkopt=-s \
      mediapipe/examples/desktop/youtube8m:model_inference

    # segment_size is the number of seconds window of frames.
--- a/mediapipe/examples/desktop/youtube8m/generate_vggish_frozen_graph.py
+++ b/mediapipe/examples/desktop/youtube8m/generate_vggish_frozen_graph.py
@ -25,7 +25,7 @@ import sys

 from absl import app
 import tensorflow.compat.v1 as tf
-from tensorflow.compat.v1.python.tools import freeze_graph
+from tensorflow.python.tools import freeze_graph

 BASE_DIR = '/tmp/mediapipe/'

--- a/mediapipe/framework/BUILD
+++ b/mediapipe/framework/BUILD
@ -1078,10 +1078,16 @@ cc_library(
 cc_library(
    name = "port",
    hdrs = ["port.h"],
+    defines = select({
+        "//conditions:default": [],
+    }) + select({
+        "//conditions:default": [],
+        "//mediapipe/gpu:disable_gpu": ["MEDIAPIPE_DISABLE_GPU"],
+    }),
    visibility = [
        "//mediapipe/framework:__subpackages__",
        "//mediapipe/framework/port:__pkg__",
-        "//mediapipe/util:__pkg__",
+        "//mediapipe/util:__subpackages__",
    ],
 )

--- a/mediapipe/framework/deps/status_macros.h
+++ b/mediapipe/framework/deps/status_macros.h
@ -134,17 +134,21 @@
 // Example: Logging the error on failure.
 //   ASSIGN_OR_RETURN(ValueType value, MaybeGetValue(query), _.LogError());
 //
-#define ASSIGN_OR_RETURN(...)                                              \
-  STATUS_MACROS_IMPL_GET_VARIADIC_(__VA_ARGS__,                            \
-                                   STATUS_MACROS_IMPL_ASSIGN_OR_RETURN_3_, \
-                                   STATUS_MACROS_IMPL_ASSIGN_OR_RETURN_2_) \
+#define ASSIGN_OR_RETURN(...)                                                \
+  STATUS_MACROS_IMPL_GET_VARIADIC_((__VA_ARGS__,                             \
+                                    STATUS_MACROS_IMPL_ASSIGN_OR_RETURN_3_,  \
+                                    STATUS_MACROS_IMPL_ASSIGN_OR_RETURN_2_)) \
  (__VA_ARGS__)

 // =================================================================
 // == Implementation details, do not rely on anything below here. ==
 // =================================================================

-#define STATUS_MACROS_IMPL_GET_VARIADIC_(_1, _2, _3, NAME, ...) NAME
+// MSVC incorrectly expands variadic macros, splice together a macro call to
+// work around the bug.
+#define STATUS_MACROS_IMPL_GET_VARIADIC_HELPER_(_1, _2, _3, NAME, ...) NAME
+#define STATUS_MACROS_IMPL_GET_VARIADIC_(args) \
+  STATUS_MACROS_IMPL_GET_VARIADIC_HELPER_ args

 #define STATUS_MACROS_IMPL_ASSIGN_OR_RETURN_2_(lhs, rexpr) \
  STATUS_MACROS_IMPL_ASSIGN_OR_RETURN_3_(lhs, rexpr, std::move(_))
--- a/mediapipe/framework/encode_binary_proto.bzl
+++ b/mediapipe/framework/encode_binary_proto.bzl
@ -99,7 +99,12 @@ def _encode_binary_proto_impl(ctx):
        ),
        mnemonic = "EncodeProto",
    )
-    return struct(files = depset([binarypb]))
+
+    output_depset = depset([binarypb])
+    return [DefaultInfo(
+        files = output_depset,
+        data_runfiles = ctx.runfiles(transitive_files = output_depset),
+    )]

 encode_binary_proto = rule(
    implementation = _encode_binary_proto_impl,
--- a/mediapipe/framework/profiler/sharded_map.h
+++ b/mediapipe/framework/profiler/sharded_map.h
@ -131,7 +131,7 @@ class ShardedMap {
      return *this;
    }
    inline bool operator==(const Iterator& other) const {
-      return iter_ == other.iter_;
+      return shard_ == other.shard_ && iter_ == other.iter_;
    }
    inline bool operator!=(const Iterator& other) const {
      return !operator==(other);
@ -154,7 +154,10 @@ class ShardedMap {
        : shard_(shard), iter_(iter), map_(map) {}
    // Releases all resources.
    inline void Clear() ABSL_NO_THREAD_SAFETY_ANALYSIS {
-      if (map_ && iter_ != map_->maps_.back().end()) {
+      if (!map_) return;
+      bool is_end = (shard_ == map_->maps_.size() - 1 &&
+                     iter_ == map_->maps_[shard_].end());
+      if (!is_end) {
        map_->mutexes_[shard_].Unlock();
      }
      map_ = nullptr;
--- a/mediapipe/framework/timestamp.h
+++ b/mediapipe/framework/timestamp.h
@ -100,7 +100,6 @@ class Timestamp {
  }

  // Special values.
-
  static Timestamp Unset();
  static Timestamp Unstarted();
  static Timestamp PreStream();
--- a/mediapipe/framework/tool/subgraph_expansion.cc
+++ b/mediapipe/framework/tool/subgraph_expansion.cc
@ -264,6 +264,10 @@ static ::mediapipe::Status PrefixNames(std::string prefix,
        generator.mutable_input_side_packet(), replace_names));
    MP_RETURN_IF_ERROR(TransformStreamNames(
        generator.mutable_output_side_packet(), replace_names));
+
+    // Remove input side packets ignored by the subgraph-node.
+    MP_RETURN_IF_ERROR(RemoveIgnoredStreams(
+        generator.mutable_input_side_packet(), ignored_input_side_packets));
  }
  return ::mediapipe::OkStatus();
 }
--- a/mediapipe/gpu/gpu_buffer_multi_pool.cc
+++ b/mediapipe/gpu/gpu_buffer_multi_pool.cc
@ -105,17 +105,27 @@ GpuBuffer GpuBufferMultiPool::GetBuffer(int width, int height,
  BufferSpec key(width, height, format);
  auto pool_it = pools_.find(key);
  if (pool_it == pools_.end()) {
-    // Discard the oldest pool in order of creation.
-    // TODO: implement a better policy.
+    // Discard the least recently used pool in LRU cache.
    if (pools_.size() >= kMaxPoolCount) {
-      auto old_spec = buffer_specs_.front();
-      buffer_specs_.pop();
+      auto old_spec = buffer_specs_.front();  // Front has LRU.
+      buffer_specs_.pop_front();
      pools_.erase(old_spec);
    }
-    buffer_specs_.push(key);
+    buffer_specs_.push_back(key);  // Push new spec to back.
    std::tie(pool_it, std::ignore) =
        pools_.emplace(std::piecewise_construct, std::forward_as_tuple(key),
                       std::forward_as_tuple(MakeSimplePool(key)));
+  } else {
+    // Find and move current 'key' spec to back, keeping others in same order.
+    auto specs_it = buffer_specs_.begin();
+    while (specs_it != buffer_specs_.end()) {
+      if (*specs_it == key) {
+        buffer_specs_.erase(specs_it);
+        break;
+      }
+      ++specs_it;
+    }
+    buffer_specs_.push_back(key);
  }
  return GetBufferFromSimplePool(pool_it->first, pool_it->second);
 }
--- a/mediapipe/gpu/gpu_buffer_multi_pool.h
+++ b/mediapipe/gpu/gpu_buffer_multi_pool.h
@ -22,8 +22,8 @@
 #ifndef MEDIAPIPE_GPU_GPU_BUFFER_MULTI_POOL_H_
 #define MEDIAPIPE_GPU_GPU_BUFFER_MULTI_POOL_H_

+#include <deque>
 #include <limits>
-#include <queue>
 #include <unordered_map>

 #include "absl/synchronization/mutex.h"
@ -110,7 +110,7 @@ class GpuBufferMultiPool {
      ABSL_GUARDED_BY(mutex_);
  // A queue of BufferSpecs to keep track of the age of each BufferSpec added to
  // the pool.
-  std::queue<BufferSpec> buffer_specs_;
+  std::deque<BufferSpec> buffer_specs_;

 #ifdef __APPLE__
  // Texture caches used with this pool.
--- a/mediapipe/gpu/metal.bzl
+++ b/mediapipe/gpu/metal.bzl
@ -73,13 +73,15 @@ def _metal_compiler_args(ctx, src, obj, minimum_os_version, copts, diagnostics,

 def _metal_compiler_inputs(srcs, hdrs, deps = []):
    """Determines the list of inputs required for a compile action."""
-    objc_providers = [x.objc for x in deps if hasattr(x, "objc")]

-    objc_files = depset()
-    for objc in objc_providers:
-        objc_files += objc.header
+    cc_infos = [dep[CcInfo] for dep in deps if CcInfo in dep]

-    return srcs + hdrs + objc_files.to_list()
+    dep_headers = depset(transitive = [
+        cc_info.compilation_context.headers
+        for cc_info in cc_infos
+    ])
+
+    return depset(srcs + hdrs, transitive = [dep_headers])

 def _metal_library_impl(ctx):
    """Implementation for metal_library Skylark rule."""
@ -144,11 +146,22 @@ def _metal_library_impl(ctx):
        **additional_params
    )

+    cc_infos = [dep[CcInfo] for dep in ctx.attr.deps if CcInfo in dep]
+    if ctx.files.hdrs:
+        cc_infos.append(
+            CcInfo(
+                compilation_context = cc_common.create_compilation_context(
+                    headers = depset([f for f in ctx.files.hdrs]),
+                ),
+            ),
+        )
+
    return [
        DefaultInfo(
            files = depset([output_lib]),
        ),
        objc_provider,
+        cc_common.merge_cc_infos(cc_infos = cc_infos),
        # Return the provider for the new bundling logic of rules_apple.
        resources.bucketize_typed([output_lib], "unprocessed"),
    ]
@ -156,7 +169,7 @@ def _metal_library_impl(ctx):
 METAL_LIBRARY_ATTRS = dicts.add(apple_support.action_required_attrs(), {
    "srcs": attr.label_list(allow_files = [".metal"], allow_empty = False),
    "hdrs": attr.label_list(allow_files = [".h"]),
-    "deps": attr.label_list(providers = [["objc"]]),
+    "deps": attr.label_list(providers = [["objc", CcInfo]]),
    "copts": attr.string_list(),
    "minimum_os_version": attr.string(),
 })
--- a/mediapipe/graphs/object_detection_3d/BUILD
+++ b/mediapipe/graphs/object_detection_3d/BUILD
@ -0,0 +1,56 @@
+# Copyright 2019 The MediaPipe Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+load(
+    "//mediapipe/framework/tool:mediapipe_graph.bzl",
+    "mediapipe_binary_graph",
+)
+
+licenses(["notice"])  # Apache 2.0
+
+package(default_visibility = ["//visibility:public"])
+
+exports_files(glob([
+    "*.pbtxt",
+]))
+
+cc_library(
+    name = "mobile_calculators",
+    visibility = ["//visibility:public"],
+    deps = [
+        "//mediapipe/calculators/core:packet_resampler_calculator",
+        "//mediapipe/calculators/image:image_cropping_calculator",
+        "//mediapipe/gpu:gl_scaler_calculator",
+        "//mediapipe/graphs/object_detection_3d/calculators:annotations_to_model_matrices_calculator",
+        "//mediapipe/graphs/object_detection_3d/calculators:gl_animation_overlay_calculator",
+        "//mediapipe/graphs/object_detection_3d/subgraphs:objectron_detection_gpu",
+        "//mediapipe/graphs/object_detection_3d/subgraphs:objectron_tracking_gpu",
+    ],
+)
+
+mediapipe_binary_graph(
+    name = "mobile_gpu_binary_graph_shoe",
+    graph = "shoe_classic_occlusion_tracking.pbtxt",
+    output_name = "mobile_gpu_shoe.binarypb",
+    visibility = ["//visibility:public"],
+    deps = [":mobile_calculators"],
+)
+
+mediapipe_binary_graph(
+    name = "mobile_gpu_binary_graph_chair",
+    graph = "chair_classic_occlusion_tracking.pbtxt",
+    output_name = "mobile_gpu_chair.binarypb",
+    visibility = ["//visibility:public"],
+    deps = [":mobile_calculators"],
+)
--- a/mediapipe/graphs/object_detection_3d/calculators/BUILD
+++ b/mediapipe/graphs/object_detection_3d/calculators/BUILD
@ -0,0 +1,476 @@
+# Copyright 2020 The MediaPipe Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+load("//mediapipe/framework/port:build_config.bzl", "mediapipe_cc_proto_library")
+
+licenses(["notice"])  # Apache 2.0
+
+package(default_visibility = ["//visibility:private"])
+
+proto_library(
+    name = "object_proto",
+    srcs = [
+        "object.proto",
+    ],
+)
+
+proto_library(
+    name = "a_r_capture_metadata_proto",
+    srcs = [
+        "a_r_capture_metadata.proto",
+    ],
+)
+
+proto_library(
+    name = "annotation_proto",
+    srcs = [
+        "annotation_data.proto",
+    ],
+    deps = [
+        ":a_r_capture_metadata_proto",
+        ":object_proto",
+    ],
+)
+
+proto_library(
+    name = "belief_decoder_config_proto",
+    srcs = [
+        "belief_decoder_config.proto",
+    ],
+)
+
+proto_library(
+    name = "camera_parameters_proto",
+    srcs = [
+        "camera_parameters.proto",
+    ],
+)
+
+proto_library(
+    name = "frame_annotation_tracker_calculator_proto",
+    srcs = ["frame_annotation_tracker_calculator.proto"],
+    deps = [
+        "//mediapipe/framework:calculator_proto",
+    ],
+)
+
+proto_library(
+    name = "gl_animation_overlay_calculator_proto",
+    srcs = ["gl_animation_overlay_calculator.proto"],
+    visibility = ["//visibility:public"],
+    deps = ["//mediapipe/framework:calculator_proto"],
+)
+
+proto_library(
+    name = "tflite_tensors_to_objects_calculator_proto",
+    srcs = ["tflite_tensors_to_objects_calculator.proto"],
+    visibility = ["//visibility:public"],
+    deps = [
+        ":belief_decoder_config_proto",
+        "//mediapipe/framework:calculator_proto",
+    ],
+)
+
+proto_library(
+    name = "lift_2d_frame_annotation_to_3d_calculator_proto",
+    srcs = ["lift_2d_frame_annotation_to_3d_calculator.proto"],
+    visibility = ["//visibility:public"],
+    deps = [
+        ":belief_decoder_config_proto",
+        "//mediapipe/framework:calculator_proto",
+    ],
+)
+
+proto_library(
+    name = "annotations_to_model_matrices_calculator_proto",
+    srcs = ["annotations_to_model_matrices_calculator.proto"],
+    visibility = ["//visibility:public"],
+    deps = [
+        "//mediapipe/framework:calculator_proto",
+    ],
+)
+
+proto_library(
+    name = "model_matrix_proto",
+    srcs = ["model_matrix.proto"],
+    visibility = ["//visibility:public"],
+    deps = [
+        "//mediapipe/framework:calculator_proto",
+    ],
+)
+
+proto_library(
+    name = "annotations_to_render_data_calculator_proto",
+    srcs = ["annotations_to_render_data_calculator.proto"],
+    visibility = ["//visibility:public"],
+    deps = [
+        "//mediapipe/framework:calculator_proto",
+        "//mediapipe/util:color_proto",
+    ],
+)
+
+mediapipe_cc_proto_library(
+    name = "object_cc_proto",
+    srcs = ["object.proto"],
+    visibility = ["//visibility:public"],
+    deps = [":object_proto"],
+)
+
+mediapipe_cc_proto_library(
+    name = "a_r_capture_metadata_cc_proto",
+    srcs = ["a_r_capture_metadata.proto"],
+    visibility = ["//visibility:public"],
+    deps = [":a_r_capture_metadata_proto"],
+)
+
+mediapipe_cc_proto_library(
+    name = "annotation_cc_proto",
+    srcs = ["annotation_data.proto"],
+    cc_deps = [
+        ":a_r_capture_metadata_cc_proto",
+        ":object_cc_proto",
+    ],
+    visibility = ["//visibility:public"],
+    deps = [":annotation_proto"],
+)
+
+mediapipe_cc_proto_library(
+    name = "camera_parameters_cc_proto",
+    srcs = ["camera_parameters.proto"],
+    visibility = ["//visibility:public"],
+    deps = [":camera_parameters_proto"],
+)
+
+mediapipe_cc_proto_library(
+    name = "frame_annotation_tracker_calculator_cc_proto",
+    srcs = ["frame_annotation_tracker_calculator.proto"],
+    cc_deps = [
+        "//mediapipe/framework:calculator_cc_proto",
+    ],
+    visibility = ["//visibility:public"],
+    deps = [":frame_annotation_tracker_calculator_proto"],
+)
+
+mediapipe_cc_proto_library(
+    name = "gl_animation_overlay_calculator_cc_proto",
+    srcs = ["gl_animation_overlay_calculator.proto"],
+    cc_deps = [
+        "//mediapipe/framework:calculator_cc_proto",
+    ],
+    visibility = ["//visibility:public"],
+    deps = [":gl_animation_overlay_calculator_proto"],
+)
+
+mediapipe_cc_proto_library(
+    name = "belief_decoder_config_cc_proto",
+    srcs = ["belief_decoder_config.proto"],
+    visibility = ["//visibility:public"],
+    deps = [":belief_decoder_config_proto"],
+)
+
+mediapipe_cc_proto_library(
+    name = "tflite_tensors_to_objects_calculator_cc_proto",
+    srcs = ["tflite_tensors_to_objects_calculator.proto"],
+    cc_deps = [
+        ":belief_decoder_config_cc_proto",
+        "//mediapipe/framework:calculator_cc_proto",
+    ],
+    visibility = ["//visibility:public"],
+    deps = [":tflite_tensors_to_objects_calculator_proto"],
+)
+
+mediapipe_cc_proto_library(
+    name = "lift_2d_frame_annotation_to_3d_calculator_cc_proto",
+    srcs = ["lift_2d_frame_annotation_to_3d_calculator.proto"],
+    cc_deps = [
+        ":belief_decoder_config_cc_proto",
+        "//mediapipe/framework:calculator_cc_proto",
+    ],
+    visibility = ["//visibility:public"],
+    deps = [":lift_2d_frame_annotation_to_3d_calculator_proto"],
+)
+
+mediapipe_cc_proto_library(
+    name = "annotations_to_model_matrices_calculator_cc_proto",
+    srcs = ["annotations_to_model_matrices_calculator.proto"],
+    cc_deps = ["//mediapipe/framework:calculator_cc_proto"],
+    visibility = ["//visibility:public"],
+    deps = [":annotations_to_model_matrices_calculator_proto"],
+)
+
+mediapipe_cc_proto_library(
+    name = "model_matrix_cc_proto",
+    srcs = ["model_matrix.proto"],
+    cc_deps = ["//mediapipe/framework:calculator_cc_proto"],
+    visibility = ["//visibility:public"],
+    deps = [":model_matrix_proto"],
+)
+
+mediapipe_cc_proto_library(
+    name = "annotations_to_render_data_calculator_cc_proto",
+    srcs = ["annotations_to_render_data_calculator.proto"],
+    cc_deps = [
+        "//mediapipe/framework:calculator_cc_proto",
+        "//mediapipe/util:color_cc_proto",
+    ],
+    visibility = ["//visibility:public"],
+    deps = [":annotations_to_render_data_calculator_proto"],
+)
+
+cc_library(
+    name = "box_util",
+    srcs = ["box_util.cc"],
+    hdrs = ["box_util.h"],
+    deps = [
+        "//mediapipe/framework/port:logging",
+        "//mediapipe/framework/port:opencv_core",
+        "//mediapipe/framework/port:opencv_imgproc",
+        "//mediapipe/util/tracking:box_tracker_cc_proto",
+    ],
+)
+
+cc_library(
+    name = "frame_annotation_tracker",
+    srcs = ["frame_annotation_tracker.cc"],
+    hdrs = ["frame_annotation_tracker.h"],
+    deps = [
+        ":annotation_cc_proto",
+        ":box_util",
+        "//mediapipe/framework/port:integral_types",
+        "//mediapipe/framework/port:logging",
+        "//mediapipe/util/tracking:box_tracker_cc_proto",
+        "@com_google_absl//absl/container:btree",
+        "@com_google_absl//absl/container:flat_hash_set",
+    ],
+)
+
+cc_library(
+    name = "gl_animation_overlay_calculator",
+    srcs = ["gl_animation_overlay_calculator.cc"],
+    visibility = ["//visibility:public"],
+    deps = [
+        ":camera_parameters_cc_proto",
+        ":gl_animation_overlay_calculator_cc_proto",
+        ":model_matrix_cc_proto",
+        "//mediapipe/framework:calculator_framework",
+        "//mediapipe/framework/port:ret_check",
+        "//mediapipe/framework/port:status",
+        "//mediapipe/gpu:gl_calculator_helper",
+        "//mediapipe/gpu:shader_util",
+        "//mediapipe/util/android:asset_manager_util",
+    ],
+    alwayslink = 1,
+)
+
+cc_library(
+    name = "decoder",
+    srcs = [
+        "decoder.cc",
+    ],
+    hdrs = [
+        "decoder.h",
+    ],
+    deps = [
+        ":annotation_cc_proto",
+        ":belief_decoder_config_cc_proto",
+        "//mediapipe/framework/port:logging",
+        "//mediapipe/framework/port:opencv_core",
+        "//mediapipe/framework/port:opencv_imgproc",
+        "//mediapipe/framework/port:status",
+        "@com_google_absl//absl/status",
+        "@eigen_archive//:eigen",
+    ],
+)
+
+cc_library(
+    name = "tensor_util",
+    srcs = [
+        "tensor_util.cc",
+    ],
+    hdrs = [
+        "tensor_util.h",
+    ],
+    deps = [
+        "//mediapipe/framework/port:logging",
+        "//mediapipe/framework/port:opencv_core",
+        "@org_tensorflow//tensorflow/lite:framework",
+    ],
+)
+
+cc_library(
+    name = "box",
+    srcs = [
+        "box.cc",
+        "model.cc",
+    ],
+    hdrs = [
+        "box.h",
+        "model.h",
+        "types.h",
+    ],
+    deps = [
+        ":annotation_cc_proto",
+        ":object_cc_proto",
+        "//mediapipe/framework/port:logging",
+        "@eigen_archive//:eigen",
+    ],
+)
+
+cc_library(
+    name = "frame_annotation_to_timed_box_list_calculator",
+    srcs = ["frame_annotation_to_timed_box_list_calculator.cc"],
+    visibility = ["//visibility:public"],
+    deps = [
+        ":annotation_cc_proto",
+        ":box_util",
+        "//mediapipe/framework:calculator_framework",
+        "//mediapipe/framework/port:opencv_core",
+        "//mediapipe/framework/port:opencv_imgproc",
+        "//mediapipe/framework/port:ret_check",
+        "//mediapipe/framework/port:status",
+        "//mediapipe/util/tracking:box_tracker_cc_proto",
+        "@com_google_absl//absl/memory",
+    ],
+    alwayslink = 1,
+)
+
+cc_library(
+    name = "frame_annotation_tracker_calculator",
+    srcs = ["frame_annotation_tracker_calculator.cc"],
+    visibility = ["//visibility:public"],
+    deps = [
+        ":annotation_cc_proto",
+        ":frame_annotation_tracker",
+        ":frame_annotation_tracker_calculator_cc_proto",
+        "//mediapipe/framework:calculator_framework",
+        "//mediapipe/framework/port:ret_check",
+        "//mediapipe/framework/port:status",
+        "//mediapipe/util/tracking:box_tracker_cc_proto",
+        "@com_google_absl//absl/container:flat_hash_set",
+        "@com_google_absl//absl/memory",
+    ],
+    alwayslink = 1,
+)
+
+cc_library(
+    name = "tflite_tensors_to_objects_calculator",
+    srcs = ["tflite_tensors_to_objects_calculator.cc"],
+    visibility = ["//visibility:public"],
+    deps = [
+        ":annotation_cc_proto",
+        ":belief_decoder_config_cc_proto",
+        ":decoder",
+        ":tensor_util",
+        ":tflite_tensors_to_objects_calculator_cc_proto",
+        "//mediapipe/framework:calculator_framework",
+        "//mediapipe/framework/deps:file_path",
+        "//mediapipe/framework/formats:detection_cc_proto",
+        "//mediapipe/framework/port:opencv_core",
+        "//mediapipe/framework/port:ret_check",
+        "@com_google_absl//absl/memory",
+        "@com_google_absl//absl/strings:str_format",
+        "@com_google_absl//absl/types:span",
+        "@eigen_archive//:eigen",
+        "@org_tensorflow//tensorflow/lite:framework",
+    ],
+    alwayslink = 1,
+)
+
+cc_library(
+    name = "lift_2d_frame_annotation_to_3d_calculator",
+    srcs = ["lift_2d_frame_annotation_to_3d_calculator.cc"],
+    visibility = ["//visibility:public"],
+    deps = [
+        ":annotation_cc_proto",
+        ":belief_decoder_config_cc_proto",
+        ":decoder",
+        ":lift_2d_frame_annotation_to_3d_calculator_cc_proto",
+        ":tensor_util",
+        ":tflite_tensors_to_objects_calculator_cc_proto",
+        "//mediapipe/framework:calculator_framework",
+        "//mediapipe/framework/deps:file_path",
+        "//mediapipe/framework/formats:detection_cc_proto",
+        "//mediapipe/framework/port:opencv_core",
+        "//mediapipe/framework/port:ret_check",
+        "@com_google_absl//absl/memory",
+        "@com_google_absl//absl/strings:str_format",
+        "@com_google_absl//absl/types:span",
+        "@eigen_archive//:eigen",
+        "@org_tensorflow//tensorflow/lite:framework",
+    ],
+    alwayslink = 1,
+)
+
+cc_library(
+    name = "annotations_to_model_matrices_calculator",
+    srcs = ["annotations_to_model_matrices_calculator.cc"],
+    visibility = ["//visibility:public"],
+    deps = [
+        ":annotation_cc_proto",
+        ":annotations_to_model_matrices_calculator_cc_proto",
+        ":box",
+        ":model_matrix_cc_proto",
+        "//mediapipe/framework:calculator_framework",
+        "//mediapipe/framework:calculator_options_cc_proto",
+        "//mediapipe/framework/port:ret_check",
+        "//mediapipe/framework/port:status",
+        "//mediapipe/util:color_cc_proto",
+        "@com_google_absl//absl/memory",
+        "@com_google_absl//absl/strings",
+        "@eigen_archive//:eigen",
+    ],
+    alwayslink = 1,
+)
+
+cc_library(
+    name = "annotations_to_render_data_calculator",
+    srcs = ["annotations_to_render_data_calculator.cc"],
+    visibility = ["//visibility:public"],
+    deps = [
+        ":annotation_cc_proto",
+        ":annotations_to_render_data_calculator_cc_proto",
+        "//mediapipe/framework:calculator_framework",
+        "//mediapipe/framework:calculator_options_cc_proto",
+        "//mediapipe/framework/port:ret_check",
+        "//mediapipe/util:color_cc_proto",
+        "//mediapipe/util:render_data_cc_proto",
+        "@com_google_absl//absl/memory",
+        "@com_google_absl//absl/strings",
+    ],
+    alwayslink = 1,
+)
+
+cc_test(
+    name = "box_util_test",
+    srcs = ["box_util_test.cc"],
+    deps = [
+        ":box_util",
+        "//mediapipe/framework/port:gtest_main",
+        "//mediapipe/framework/port:opencv_core",
+        "//mediapipe/util/tracking:box_tracker_cc_proto",
+    ],
+)
+
+cc_test(
+    name = "frame_annotation_tracker_test",
+    srcs = ["frame_annotation_tracker_test.cc"],
+    deps = [
+        ":annotation_cc_proto",
+        ":frame_annotation_tracker",
+        "//mediapipe/framework/port:gtest_main",
+        "//mediapipe/framework/port:logging",
+        "//mediapipe/util/tracking:box_tracker_cc_proto",
+        "@com_google_absl//absl/container:flat_hash_set",
+    ],
+)
--- a/mediapipe/graphs/object_detection_3d/calculators/a_r_capture_metadata.proto
+++ b/mediapipe/graphs/object_detection_3d/calculators/a_r_capture_metadata.proto
@ -0,0 +1,551 @@
+// Copyright 2020 The MediaPipe Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+syntax = "proto2";
+
+package mediapipe;
+
+// Info about the camera characteristics used to capture images and depth data.
+// See developer.apple.com/documentation/avfoundation/avcameracalibrationdata
+// for more information.
+message AVCameraCalibrationData {
+  // 3x3 row-major matrix relating a camera's internal properties to an ideal
+  // pinhole-camera model.
+  // See
+  // developer.apple.com/documentation/avfoundation/avcameracalibrationdata/2881135-intrinsicmatrix
+  // for detailed usage information.
+  repeated float intrinsic_matrix = 1 [packed = true];
+
+  // The image dimensions to which the intrinsic_matrix values are relative.
+  optional float intrinsic_matrix_reference_dimension_width = 2;
+  optional float intrinsic_matrix_reference_dimension_height = 3;
+
+  // 3x4 row-major matrix relating a camera's position and orientation to a
+  // world or scene coordinate system. Consists of a unitless 3x3 rotation
+  // matrix (R) on the left and a translation (t) 3x1 vector on the right. The
+  // translation vector's units are millimeters. For example:
+  //
+  //            |r1,1  r2,1  r3,1 | t1|
+  //  [R | t] = |r1,2  r2,2  r3,2 | t2|
+  //            |r1,3  r2,3  r3,3 | t3|
+  //
+  //  is stored as [r11, r21, r31, t1, r12, r22, r32, t2, ...]
+  //
+  // See
+  // developer.apple.com/documentation/avfoundation/avcameracalibrationdata/2881130-extrinsicmatrix?language=objc
+  // for more information.
+  repeated float extrinsic_matrix = 4 [packed = true];
+
+  // The size, in millimeters, of one image pixel.
+  optional float pixel_size = 5;
+
+  // A list of floating-point values describing radial distortions imparted by
+  // the camera lens, for use in rectifying camera images.
+  // See
+  // developer.apple.com/documentation/avfoundation/avcameracalibrationdata/2881129-lensdistortionlookuptable?language=objc
+  // for more information.
+  repeated float lens_distortion_lookup_values = 6 [packed = true];
+
+  // A list of floating-point values describing radial distortions for use in
+  // reapplying camera geometry to a rectified image.
+  // See
+  // developer.apple.com/documentation/avfoundation/avcameracalibrationdata/2881132-inverselensdistortionlookuptable?language=objc
+  // for more information.
+  repeated float inverse_lens_distortion_lookup_values = 7 [packed = true];
+
+  // The offset of the distortion center of the camera lens from the top-left
+  // corner of the image.
+  // See
+  // developer.apple.com/documentation/avfoundation/avcameracalibrationdata/2881131-lensdistortioncenter?language=objc
+  // for more information.
+  optional float lens_distortion_center_x = 8;
+  optional float lens_distortion_center_y = 9;
+}
+
+// Container for depth data information.
+// See developer.apple.com/documentation/avfoundation/avdepthdata for more info.
+message AVDepthData {
+  // PNG representation of the grayscale depth data map. See discussion about
+  // depth_data_map_original_minimum_value, below, for information about how
+  // to interpret the pixel values.
+  optional bytes depth_data_map = 1;
+
+  // Pixel format type of the original captured depth data.
+  // See
+  // developer.apple.com/documentation/corevideo/1563591-pixel_format_identifiers?language=objc
+  // for the complete list of possible pixel format types. This value represents
+  // a string for the associated OSType/FourCharCode.
+  optional string depth_data_type = 2;
+
+  // Indicates the general accuracy of the depth_data_map.
+  // See developer.apple.com/documentation/avfoundation/avdepthdataaccuracy for
+  // more information.
+  enum Accuracy {
+    UNDEFINED_ACCURACY = 0;
+    // Values in the depth map are usable for foreground/background separation
+    // but are not absolutely accurate in the physical world.
+    RELATIVE = 1;
+    // Values in the depth map are absolutely accurate in the physical world.
+    ABSOLUTE = 2;
+  }
+  optional Accuracy depth_data_accuracy = 3 [default = RELATIVE];
+
+  // Indicates whether the depth_data_map contains temporally smoothed data.
+  optional bool depth_data_filtered = 4;
+
+  // Quality of the depth_data_map.
+  enum Quality {
+    UNDEFINED_QUALITY = 0;
+    HIGH = 1;
+    LOW = 2;
+  }
+  optional Quality depth_data_quality = 5;
+
+  // Associated calibration data for the depth_data_map.
+  optional AVCameraCalibrationData camera_calibration_data = 6;
+
+  // The original range of values expressed by the depth_data_map, before
+  // grayscale normalization. For example, if the minimum and maximum values
+  // indicate a range of [0.5, 2.2], and the depth_data_type value indicates
+  // it was a depth map, then white pixels (255, 255, 255) will map to 0.5 and
+  // black pixels (0, 0, 0) will map to 2.2 with the grayscale range linearly
+  // interpolated inbetween. Conversely, if the depth_data_type value indicates
+  // it was a disparity map, then white pixels will map to 2.2 and black pixels
+  // will map to 0.5.
+  optional float depth_data_map_original_minimum_value = 7;
+  optional float depth_data_map_original_maximum_value = 8;
+
+  // The width of the depth buffer map.
+  optional int32 depth_data_map_width = 9;
+
+  // The height of the depth buffer map.
+  optional int32 depth_data_map_height = 10;
+
+  // The row-major flattened array of the depth buffer map pixels. This will be
+  // either a float32 or float16 byte array, depending on 'depth_data_type'.
+  optional bytes depth_data_map_raw_values = 11;
+}
+
+// Estimated scene lighting information associated with a captured video frame.
+// See developer.apple.com/documentation/arkit/arlightestimate for more info.
+message ARLightEstimate {
+  // The estimated intensity, in lumens, of ambient light throughout the scene.
+  optional double ambient_intensity = 1;
+
+  // The estimated color temperature, in degrees Kelvin, of ambient light
+  // throughout the scene.
+  optional double ambient_color_temperature = 2;
+
+  // Data describing the estimated lighting environment in all directions.
+  // Second-level spherical harmonics in separate red, green, and blue data
+  // planes. Thus, this buffer contains 3 sets of 9 coefficients, or a total of
+  // 27 values.
+  // See
+  // https://developer.apple.com/documentation/arkit/ardirectionallightestimate/2928222-sphericalharmonicscoefficients?language=objc
+  // for more information.
+  repeated float spherical_harmonics_coefficients = 3 [packed = true];
+
+  message DirectionVector {
+    optional float x = 1;
+    optional float y = 2;
+    optional float z = 3;
+  }
+  // A vector indicating the orientation of the strongest directional light
+  // source, normalized in the world-coordinate space.
+  // See
+  // https://developer.apple.com/documentation/arkit/ardirectionallightestimate/2928221-primarylightdirection?language=objc
+  // for more information;
+  optional DirectionVector primary_light_direction = 4;
+
+  // The estimated intensity, in lumens, of the strongest directional light
+  // source in the scene.
+  // See
+  // https://developer.apple.com/documentation/arkit/ardirectionallightestimate/2928219-primarylightintensity?language=objc
+  // for more information.
+  optional float primary_light_intensity = 5;
+}
+
+// Information about the camera position and imaging characteristics for a
+// captured video frame.
+// See developer.apple.com/documentation/arkit/arcamera for more information.
+message ARCamera {
+  // The general quality of position tracking available when the camera captured
+  // a frame.
+  enum TrackingState {
+    UNDEFINED_TRACKING_STATE = 0;
+    // Camera position tracking is not available.
+    UNAVAILABLE = 1;
+    // Tracking is available, but the quality of results is questionable.
+    LIMITED = 2;
+    // Camera position tracking is providing optimal results.
+    NORMAL = 3;
+  }
+  optional TrackingState tracking_state = 1 [default = UNAVAILABLE];
+
+  // A possible diagnosis for limited position tracking quality as of when the
+  // frame was captured.
+  enum TrackingStateReason {
+    UNDEFINED_TRACKING_STATE_REASON = 0;
+    // The current tracking state is not limited.
+    NONE = 1;
+    // Not yet enough camera or motion data to provide tracking information.
+    INITIALIZING = 2;
+    // The device is moving too fast for accurate image-based position tracking.
+    EXCESSIVE_MOTION = 3;
+    // Not enough distinguishable features for image-based position tracking.
+    INSUFFICIENT_FEATURES = 4;
+    // Tracking is limited due to a relocalization in progress.
+    RELOCALIZING = 5;
+  }
+  optional TrackingStateReason tracking_state_reason = 2 [default = NONE];
+
+  // 4x4 row-major matrix expressing position and orientation of the camera in
+  // world coordinate space.
+  // See developer.apple.com/documentation/arkit/arcamera/2866108-transform for
+  // more information.
+  repeated float transform = 3 [packed = true];
+
+  // The orientation of the camera, expressed as roll, pitch, and yaw values.
+  message EulerAngles {
+    optional float roll = 1;
+    optional float pitch = 2;
+    optional float yaw = 3;
+  }
+  optional EulerAngles euler_angles = 4;
+
+  // The width and height, in pixels, of the captured camera image.
+  optional int32 image_resolution_width = 5;
+  optional int32 image_resolution_height = 6;
+
+  // 3x3 row-major matrix that converts between the 2D camera plane and 3D world
+  // coordinate space.
+  // See developer.apple.com/documentation/arkit/arcamera/2875730-intrinsics for
+  // usage information.
+  repeated float intrinsics = 7 [packed = true];
+
+  // 4x4 row-major transform matrix appropriate for rendering 3D content to
+  // match the image captured by the camera.
+  // See
+  // developer.apple.com/documentation/arkit/arcamera/2887458-projectionmatrix
+  // for usage information.
+  repeated float projection_matrix = 8 [packed = true];
+
+  // 4x4 row-major transform matrix appropriate for converting from world-space
+  // to camera space. Relativized for the captured_image orientation (i.e.
+  // UILandscapeOrientationRight).
+  // See
+  // https://developer.apple.com/documentation/arkit/arcamera/2921672-viewmatrixfororientation?language=objc
+  // for more information.
+  repeated float view_matrix = 9 [packed = true];
+}
+
+// Container for a 3D mesh describing face topology.
+message ARFaceGeometry {
+  // Each vertex represents a 3D point in the face mesh, in the face coordinate
+  // space.
+  // See developer.apple.com/documentation/arkit/arfacegeometry/2928201-vertices
+  // for more information.
+  message Vertex {
+    optional float x = 1;
+    optional float y = 2;
+    optional float z = 3;
+  }
+  repeated Vertex vertices = 1;
+
+  // The number of elements in the vertices list.
+  optional int32 vertex_count = 2;
+
+  // Each texture coordinate represents UV texture coordinates for the vertex at
+  // the corresponding index in the vertices buffer.
+  // See
+  // developer.apple.com/documentation/arkit/arfacegeometry/2928203-texturecoordinates
+  // for more information.
+  message TextureCoordinate {
+    optional float u = 1;
+    optional float v = 2;
+  }
+  repeated TextureCoordinate texture_coordinates = 3;
+
+  // The number of elements in the texture_coordinates list.
+  optional int32 texture_coordinate_count = 4;
+
+  // Each integer value in this ordered list represents an index into the
+  // vertices and texture_coordinates lists. Each set of three indices
+  // identifies the vertices comprising a single triangle in the mesh. Each set
+  // of three indices forms a triangle, so the number of indices in the
+  // triangle_indices buffer is three times the triangle_count value.
+  // See
+  // developer.apple.com/documentation/arkit/arfacegeometry/2928199-triangleindices
+  // for more information.
+  repeated int32 triangle_indices = 5 [packed = true];
+
+  // The number of triangles described by the triangle_indices buffer.
+  // See
+  // developer.apple.com/documentation/arkit/arfacegeometry/2928207-trianglecount
+  // for more information.
+  optional int32 triangle_count = 6;
+}
+
+// Contains a list of blend shape entries wherein each item maps a specific
+// blend shape location to its associated coefficient.
+message ARBlendShapeMap {
+  message MapEntry {
+    // Identifier for the specific facial feature.
+    // See developer.apple.com/documentation/arkit/arblendshapelocation for a
+    // complete list of identifiers.
+    optional string blend_shape_location = 1;
+
+    // Indicates the current position of the feature relative to its neutral
+    // configuration, ranging from 0.0 (neutral) to 1.0 (maximum movement).
+    optional float blend_shape_coefficient = 2;
+  }
+  repeated MapEntry entries = 1;
+}
+
+// Information about the pose, topology, and expression of a detected face.
+// See developer.apple.com/documentation/arkit/arfaceanchor for more info.
+message ARFaceAnchor {
+  // A coarse triangle mesh representing the topology of the detected face.
+  optional ARFaceGeometry geometry = 1;
+
+  // A map of named coefficients representing the detected facial expression in
+  // terms of the movement of specific facial features.
+  optional ARBlendShapeMap blend_shapes = 2;
+
+  // 4x4 row-major matrix encoding the position, orientation, and scale of the
+  // anchor relative to the world coordinate space.
+  // See
+  // https://developer.apple.com/documentation/arkit/aranchor/2867981-transform?language=objc
+  // for more information.
+  repeated float transform = 3;
+
+  // Indicates whether the anchor's transform is valid. Frames that have a face
+  // anchor with this value set to NO should probably be ignored.
+  optional bool is_tracked = 4;
+}
+
+// Container for a 3D mesh.
+message ARPlaneGeometry {
+  message Vertex {
+    optional float x = 1;
+    optional float y = 2;
+    optional float z = 3;
+  }
+
+  // Each texture coordinate represents UV texture coordinates for the vertex at
+  // the corresponding index in the vertices buffer.
+  // See
+  // https://developer.apple.com/documentation/arkit/arfacegeometry/2928203-texturecoordinates
+  // for more information.
+  message TextureCoordinate {
+    optional float u = 1;
+    optional float v = 2;
+  }
+
+  // A buffer of vertex positions for each point in the plane mesh.
+  repeated Vertex vertices = 1;
+
+  // The number of elements in the vertices buffer.
+  optional int32 vertex_count = 2;
+
+  // A buffer of texture coordinate values for each point in the plane mesh.
+  repeated TextureCoordinate texture_coordinates = 3;
+
+  // The number of elements in the texture_coordinates buffer.
+  optional int32 texture_coordinate_count = 4;
+
+  // Each integer value in this ordered list represents an index into the
+  // vertices and texture_coordinates lists. Each set of three indices
+  // identifies the vertices comprising a single triangle in the mesh. Each set
+  // of three indices forms a triangle, so the number of indices in the
+  // triangle_indices buffer is three times the triangle_count value.
+  // See
+  // https://developer.apple.com/documentation/arkit/arplanegeometry/2941051-triangleindices
+  // for more information.
+  repeated int32 triangle_indices = 5 [packed = true];
+
+  // Each set of three indices forms a triangle, so the number of indices in the
+  // triangle_indices buffer is three times the triangle_count value.
+  // See
+  // https://developer.apple.com/documentation/arkit/arplanegeometry/2941058-trianglecount
+  // for more information.
+  optional int32 triangle_count = 6;
+
+  // Each value in this buffer represents the position of a vertex along the
+  // boundary polygon of the estimated plane. The owning plane anchor's
+  // transform matrix defines the coordinate system for these points.
+  // See
+  // https://developer.apple.com/documentation/arkit/arplanegeometry/2941052-boundaryvertices
+  // for more information.
+  repeated Vertex boundary_vertices = 7;
+
+  // The number of elements in the boundary_vertices buffer.
+  optional int32 boundary_vertex_count = 8;
+}
+
+// Information about the position and orientation of a real-world flat surface.
+// See https://developer.apple.com/documentation/arkit/arplaneanchor for more
+// information.
+message ARPlaneAnchor {
+  enum Alignment {
+    UNDEFINED = 0;
+    // The plane is perpendicular to gravity.
+    HORIZONTAL = 1;
+    // The plane is parallel to gravity.
+    VERTICAL = 2;
+  }
+
+  // Wrapper for a 3D point / vector within the plane. See extent and center
+  // values for more information.
+  message PlaneVector {
+    optional float x = 1;
+    optional float y = 2;
+    optional float z = 3;
+  }
+
+  enum PlaneClassification {
+    NONE = 0;
+    WALL = 1;
+    FLOOR = 2;
+    CEILING = 3;
+    TABLE = 4;
+    SEAT = 5;
+  }
+
+  // The classification status for the plane.
+  enum PlaneClassificationStatus {
+    // The classfication process for the plane anchor has completed but the
+    // result is inconclusive.
+    UNKNOWN = 0;
+    // No classication information can be provided (set on error or if the
+    // device does not support plane classification).
+    UNAVAILABLE = 1;
+    // The classification process has not completed.
+    UNDETERMINED = 2;
+    // The classfication process for the plane anchor has completed.
+    KNOWN = 3;
+  }
+
+  // The ID of the plane.
+  optional string identifier = 1;
+
+  // 4x4 row-major matrix encoding the position, orientation, and scale of the
+  // anchor relative to the world coordinate space.
+  // See
+  // https://developer.apple.com/documentation/arkit/aranchor/2867981-transform
+  // for more information.
+  repeated float transform = 2;
+
+  // The general orientation of the detected plane with respect to gravity.
+  optional Alignment alignment = 3;
+
+  // A coarse triangle mesh representing the general shape of the detected
+  // plane.
+  optional ARPlaneGeometry geometry = 4;
+
+  // The center point of the plane relative to its anchor position.
+  // Although the type of this property is a 3D vector, a plane anchor is always
+  // two-dimensional, and is always positioned in only the x and z directions
+  // relative to its transform position. (That is, the y-component of this
+  // vector is always zero.)
+  // See
+  // https://developer.apple.com/documentation/arkit/arplaneanchor/2882056-center
+  // for more information.
+  optional PlaneVector center = 5;
+
+  // The estimated width and length of the detected plane.
+  // See
+  // https://developer.apple.com/documentation/arkit/arplaneanchor/2882055-extent
+  // for more information.
+  optional PlaneVector extent = 6;
+
+  // A Boolean value that indicates whether plane classification is available on
+  // the current device. On devices without plane classification support, all
+  // plane anchors report a classification value of NONE
+  // and a classification_status value of UNAVAILABLE.
+  optional bool classification_supported = 7;
+
+  // A general characterization of what kind of real-world surface the plane
+  // anchor represents.
+  // See
+  // https://developer.apple.com/documentation/arkit/arplaneanchor/2990936-classification
+  // for more information.
+  optional PlaneClassification classification = 8;
+
+  // The current state of ARKit's process for classifying the plane anchor.
+  // When this property's value is KNOWN, the classification property represents
+  // ARKit's characterization of the real-world surface corresponding to the
+  // plane anchor.
+  // See
+  // https://developer.apple.com/documentation/arkit/arplaneanchor/2990937-classificationstatus
+  // for more information.
+  optional PlaneClassificationStatus classification_status = 9;
+}
+
+// A collection of points in the world coordinate space.
+// See https://developer.apple.com/documentation/arkit/arpointcloud for more
+// information.
+message ARPointCloud {
+  message Point {
+    optional float x = 1;
+    optional float y = 2;
+    optional float z = 3;
+  }
+
+  // The number of points in the cloud.
+  optional int32 count = 1;
+
+  // The list of detected points.
+  repeated Point point = 2;
+
+  // A list of unique identifiers corresponding to detected feature points.
+  // Each identifier in this list corresponds to the point at the same index
+  // in the points array.
+  repeated int64 identifier = 3 [packed = true];
+}
+
+// Video image and face position tracking information.
+// See developer.apple.com/documentation/arkit/arframe for more information.
+message ARFrame {
+  // The timestamp for the frame.
+  optional double timestamp = 1;
+
+  // The depth data associated with the frame. Not all frames have depth data.
+  optional AVDepthData depth_data = 2;
+
+  // The depth data object timestamp associated with the frame. May differ from
+  // the frame timestamp value. Is only set when the frame has depth_data.
+  optional double depth_data_timestamp = 3;
+
+  // Camera information associated with the frame.
+  optional ARCamera camera = 4;
+
+  // Light information associated with the frame.
+  optional ARLightEstimate light_estimate = 5;
+
+  // Face anchor information associated with the frame. Not all frames have an
+  // active face anchor.
+  optional ARFaceAnchor face_anchor = 6;
+
+  // Plane anchors associated with the frame. Not all frames have a plane
+  // anchor. Plane anchors and face anchors are mutually exclusive.
+  repeated ARPlaneAnchor plane_anchor = 7;
+
+  // The current intermediate results of the scene analysis used to perform
+  // world tracking.
+  // See
+  // https://developer.apple.com/documentation/arkit/arframe/2887449-rawfeaturepoints
+  // for more information.
+  optional ARPointCloud raw_feature_points = 8;
+}
--- a/mediapipe/graphs/object_detection_3d/calculators/annotation_data.proto
+++ b/mediapipe/graphs/object_detection_3d/calculators/annotation_data.proto
@ -0,0 +1,92 @@
+// Copyright 2020 The MediaPipe Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+syntax = "proto3";
+
+package mediapipe;
+
+import "mediapipe/graphs/object_detection_3d/calculators/a_r_capture_metadata.proto";
+import "mediapipe/graphs/object_detection_3d/calculators/object.proto";
+
+// Projection of a 3D point on an image, and its metric depth.
+message NormalizedPoint2D {
+  // x-y position of the 2d keypoint in the image coordinate system.
+  // u,v \in [0, 1], where top left corner is (0, 0) and the bottom-right corner
+  // is (1, 1).
+  float x = 1;
+  float y = 2;
+
+  // The depth of the point in the camera coordinate system (in meters).
+  float depth = 3;
+}
+
+// The 3D point in the camera coordinate system, the scales are in meters.
+message Point3D {
+  float x = 1;
+  float y = 2;
+  float z = 3;
+}
+
+message AnnotatedKeyPoint {
+  int32 id = 1;
+  Point3D point_3d = 2;
+  NormalizedPoint2D point_2d = 3;
+}
+
+message ObjectAnnotation {
+  // Reference to the object identifier in ObjectInstance.
+  int32 object_id = 1;
+
+  // For each objects, list all the annotated keypoints here.
+  // E.g. for bounding-boxes, we have 8 keypoints, hands = 21 keypoints, etc.
+  // These normalized points are the projection of the Object's 3D keypoint
+  // on the current frame's camera poses.
+  repeated AnnotatedKeyPoint keypoints = 2;
+
+  // Visibiity of this annotation in a frame.
+  float visibility = 3;
+}
+
+message FrameAnnotation {
+  // Unique frame id, corresponds to images.
+  int32 frame_id = 1;
+
+  // List of the annotated objects in this frame. Depending on how many object
+  // are observable in this frame, we might have non or as much as
+  // sequence.objects_size() annotations.
+  repeated ObjectAnnotation annotations = 2;
+
+  // Information about the camera transformation (in the world coordinate) and
+  // imaging characteristics for a captured video frame.
+  ARCamera camera = 3;
+
+  // The timestamp for the frame.
+  double timestamp = 4;
+
+  // Plane center and normal in camera frame.
+  repeated float plane_center = 5;
+  repeated float plane_normal = 6;
+}
+
+// The sequence protocol contains the annotation data for the entire video clip.
+message Sequence {
+  // List of all the annotated 3D objects in this sequence in the world
+  // Coordinate system. Given the camera poses of each frame (also in the
+  // world-coordinate) these objects bounding boxes can be projected to each
+  // frame to get the per-frame annotation (i.e. image_annotation below).
+  repeated Object objects = 1;
+
+  // List of annotated data per each frame in sequence + frame information.
+  repeated FrameAnnotation frame_annotations = 2;
+}
--- a/mediapipe/graphs/object_detection_3d/calculators/annotations_to_model_matrices_calculator.cc
+++ b/mediapipe/graphs/object_detection_3d/calculators/annotations_to_model_matrices_calculator.cc
@ -0,0 +1,209 @@
+// Copyright 2020 The MediaPipe Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <memory>
+
+#include "Eigen/Dense"
+#include "Eigen/src/Core/util/Constants.h"
+#include "Eigen/src/Geometry/Quaternion.h"
+#include "absl/memory/memory.h"
+#include "absl/strings/str_cat.h"
+#include "absl/strings/str_join.h"
+#include "mediapipe/framework/calculator_framework.h"
+#include "mediapipe/framework/calculator_options.pb.h"
+#include "mediapipe/framework/port/ret_check.h"
+#include "mediapipe/framework/port/status.h"
+#include "mediapipe/graphs/object_detection_3d/calculators/annotation_data.pb.h"
+#include "mediapipe/graphs/object_detection_3d/calculators/annotations_to_model_matrices_calculator.pb.h"
+#include "mediapipe/graphs/object_detection_3d/calculators/box.h"
+#include "mediapipe/graphs/object_detection_3d/calculators/model_matrix.pb.h"
+#include "mediapipe/util/color.pb.h"
+
+namespace mediapipe {
+
+namespace {
+
+constexpr char kAnnotationTag[] = "ANNOTATIONS";
+constexpr char kModelMatricesTag[] = "MODEL_MATRICES";
+
+using Matrix4fRM = Eigen::Matrix<float, 4, 4, Eigen::RowMajor>;
+
+}  // namespace
+
+// Converts the box prediction from Objectron Model to the Model matrices
+// to be rendered.
+//
+// Input:
+//  ANNOTATIONS - Frame annotations with lifted 3D points, the points are in
+//     Objectron coordinate system.
+// Output:
+//  MODEL_MATRICES - Result ModelMatrices, in OpenGL coordinate system.
+//
+// Usage example:
+// node {
+//  calculator: "AnnotationsToModelMatricesCalculator"
+//  input_stream: "ANNOTATIONS:objects"
+//  output_stream: "MODEL_MATRICES:model_matrices"
+//}
+
+class AnnotationsToModelMatricesCalculator : public CalculatorBase {
+ public:
+  AnnotationsToModelMatricesCalculator() {}
+  ~AnnotationsToModelMatricesCalculator() override {}
+  AnnotationsToModelMatricesCalculator(
+      const AnnotationsToModelMatricesCalculator&) = delete;
+  AnnotationsToModelMatricesCalculator& operator=(
+      const AnnotationsToModelMatricesCalculator&) = delete;
+
+  static ::mediapipe::Status GetContract(CalculatorContract* cc);
+
+  ::mediapipe::Status Open(CalculatorContext* cc) override;
+
+  ::mediapipe::Status Process(CalculatorContext* cc) override;
+
+ private:
+  ::mediapipe::Status GetModelMatricesForAnnotations(
+      const FrameAnnotation& annotations,
+      TimedModelMatrixProtoList* model_matrix_list);
+
+  AnnotationsToModelMatricesCalculatorOptions options_;
+  Eigen::Vector3f model_scale_;
+  Matrix4fRM model_transformation_;
+};
+REGISTER_CALCULATOR(AnnotationsToModelMatricesCalculator);
+
+::mediapipe::Status AnnotationsToModelMatricesCalculator::GetContract(
+    CalculatorContract* cc) {
+  RET_CHECK(cc->Inputs().HasTag(kAnnotationTag)) << "No input stream found.";
+  if (cc->Inputs().HasTag(kAnnotationTag)) {
+    cc->Inputs().Tag(kAnnotationTag).Set<FrameAnnotation>();
+  }
+
+  if (cc->Outputs().HasTag(kModelMatricesTag)) {
+    cc->Outputs().Tag(kModelMatricesTag).Set<TimedModelMatrixProtoList>();
+  }
+  return ::mediapipe::OkStatus();
+}
+
+::mediapipe::Status AnnotationsToModelMatricesCalculator::Open(
+    CalculatorContext* cc) {
+  RET_CHECK(cc->Inputs().HasTag(kAnnotationTag));
+
+  cc->SetOffset(TimestampDiff(0));
+  options_ = cc->Options<AnnotationsToModelMatricesCalculatorOptions>();
+
+  if (options_.model_scale_size() == 3) {
+    model_scale_ =
+        Eigen::Map<const Eigen::Vector3f>(options_.model_scale().data());
+  } else {
+    model_scale_.setOnes();
+  }
+
+  if (options_.model_transformation_size() == 16) {
+    model_transformation_ =
+        Eigen::Map<const Matrix4fRM>(options_.model_transformation().data());
+  } else {
+    model_transformation_.setIdentity();
+  }
+
+  return ::mediapipe::OkStatus();
+}
+
+::mediapipe::Status AnnotationsToModelMatricesCalculator::Process(
+    CalculatorContext* cc) {
+  auto model_matrices = std::make_unique<TimedModelMatrixProtoList>();
+
+  const FrameAnnotation& annotations =
+      cc->Inputs().Tag(kAnnotationTag).Get<FrameAnnotation>();
+
+  if (!GetModelMatricesForAnnotations(annotations, model_matrices.get()).ok()) {
+    return ::mediapipe::InvalidArgumentError(
+        "Error in GetModelMatricesForBoxes");
+  }
+  cc->Outputs()
+      .Tag(kModelMatricesTag)
+      .Add(model_matrices.release(), cc->InputTimestamp());
+
+  return ::mediapipe::OkStatus();
+}
+
+::mediapipe::Status
+AnnotationsToModelMatricesCalculator::GetModelMatricesForAnnotations(
+    const FrameAnnotation& annotations,
+    TimedModelMatrixProtoList* model_matrix_list) {
+  if (model_matrix_list == nullptr) {
+    return ::mediapipe::InvalidArgumentError("model_matrix_list is nullptr");
+  }
+  model_matrix_list->clear_model_matrix();
+
+  Box box("category");
+  for (const auto& object : annotations.annotations()) {
+    TimedModelMatrixProto* model_matrix = model_matrix_list->add_model_matrix();
+    model_matrix->set_id(object.object_id());
+
+    // Fit a box to the original vertices to estimate the scale of the box
+    std::vector<Eigen::Vector3f> vertices;
+    for (const auto& keypoint : object.keypoints()) {
+      const auto& point = keypoint.point_3d();
+      Eigen::Vector3f p(point.x(), point.y(), point.z());
+      vertices.emplace_back(p);
+    }
+    box.Fit(vertices);
+
+    // Re-scale the box if necessary
+    Eigen::Vector3f estimated_scale = box.GetScale();
+    vertices.clear();
+    for (const auto& keypoint : object.keypoints()) {
+      const auto& point = keypoint.point_3d();
+      Eigen::Vector3f p(point.x(), point.y(), point.z());
+      vertices.emplace_back(p);
+    }
+    box.Fit(vertices);
+
+    Matrix4fRM object_transformation = box.GetTransformation();
+    Matrix4fRM model_view;
+    Matrix4fRM pursuit_model;
+    // The reference view is
+    //
+    // ref <<  0.,  0.,  1.,  0.,
+    //        -1.,  0., 0.,  0.,
+    //         0.,  -1.,  0.,  0.,
+    //         0.,  0.,  0.,  1.;
+    // We have pursuit_model * model = model_view, to get pursuit_model:
+    // pursuit_model = model_view * model^-1
+    // clang-format off
+    pursuit_model << 0.0, 1.0, 0.0, 0.0,
+                     1.0, 0.0, 0.0, 0.0,
+                     0.0, 0.0, 1.0, 0.0,
+                     0.0, 0.0, 0.0, 1.0;
+    // clang-format on
+
+    // Re-scale the CAD model to the scale of the estimated bounding box.
+    const Eigen::Vector3f scale = model_scale_.cwiseProduct(estimated_scale);
+    const Matrix4fRM model =
+        model_transformation_.array().colwise() * scale.homogeneous().array();
+
+    // Finally compute the model_view matrix.
+    model_view = pursuit_model * object_transformation * model;
+
+    for (int i = 0; i < model_view.rows(); ++i) {
+      for (int j = 0; j < model_view.cols(); ++j) {
+        model_matrix->add_matrix_entries(model_view(i, j));
+      }
+    }
+  }
+  return ::mediapipe::OkStatus();
+}
+
+}  // namespace mediapipe
--- a/mediapipe/graphs/object_detection_3d/calculators/annotations_to_model_matrices_calculator.proto
+++ b/mediapipe/graphs/object_detection_3d/calculators/annotations_to_model_matrices_calculator.proto
@ -0,0 +1,33 @@
+// Copyright 2020 The MediaPipe Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+syntax = "proto2";
+
+package mediapipe;
+
+import "mediapipe/framework/calculator.proto";
+
+message AnnotationsToModelMatricesCalculatorOptions {
+  extend CalculatorOptions {
+    optional AnnotationsToModelMatricesCalculatorOptions ext = 290166283;
+  }
+
+  // Vector of size 3 indicating the scale vector [x, y, z]. We will re-scale
+  // the model size with this vector. (Defaults to [1., 1., 1.])
+  repeated float model_scale = 1;
+
+  // 4x4 Row major matrix denoting the transformation from the model to the
+  // Deep Pursuit 3D coordinate system (where front is +z, and up is +y).
+  repeated float model_transformation = 2;
+}
--- a/mediapipe/graphs/object_detection_3d/calculators/annotations_to_render_data_calculator.cc
+++ b/mediapipe/graphs/object_detection_3d/calculators/annotations_to_render_data_calculator.cc
@ -0,0 +1,273 @@
+// Copyright 2020 The MediaPipe Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "absl/memory/memory.h"
+#include "absl/strings/str_cat.h"
+#include "absl/strings/str_join.h"
+#include "mediapipe/framework/calculator_framework.h"
+#include "mediapipe/framework/calculator_options.pb.h"
+#include "mediapipe/framework/port/ret_check.h"
+#include "mediapipe/graphs/object_detection_3d/calculators/annotation_data.pb.h"
+#include "mediapipe/graphs/object_detection_3d/calculators/annotations_to_render_data_calculator.pb.h"
+#include "mediapipe/util/color.pb.h"
+#include "mediapipe/util/render_data.pb.h"
+
+namespace mediapipe {
+
+namespace {
+
+constexpr char kAnnotationTag[] = "ANNOTATIONS";
+constexpr char kRenderDataTag[] = "RENDER_DATA";
+constexpr char kKeypointLabel[] = "KEYPOINT";
+constexpr int kMaxLandmarkThickness = 18;
+
+inline void SetColor(RenderAnnotation* annotation, const Color& color) {
+  annotation->mutable_color()->set_r(color.r());
+  annotation->mutable_color()->set_g(color.g());
+  annotation->mutable_color()->set_b(color.b());
+}
+
+// Remap x from range [lo hi] to range [0 1] then multiply by scale.
+inline float Remap(float x, float lo, float hi, float scale) {
+  return (x - lo) / (hi - lo + 1e-6) * scale;
+}
+
+inline void GetMinMaxZ(const FrameAnnotation& annotations, float* z_min,
+                       float* z_max) {
+  *z_min = std::numeric_limits<float>::max();
+  *z_max = std::numeric_limits<float>::min();
+  // Use a global depth scale for all the objects in the scene
+  for (const auto& object : annotations.annotations()) {
+    for (const auto& keypoint : object.keypoints()) {
+      *z_min = std::min(keypoint.point_2d().depth(), *z_min);
+      *z_max = std::max(keypoint.point_2d().depth(), *z_max);
+    }
+  }
+}
+
+void SetColorSizeValueFromZ(float z, float z_min, float z_max,
+                            RenderAnnotation* render_annotation) {
+  const int color_value = 255 - static_cast<int>(Remap(z, z_min, z_max, 255));
+  ::mediapipe::Color color;
+  color.set_r(color_value);
+  color.set_g(color_value);
+  color.set_b(color_value);
+  SetColor(render_annotation, color);
+  const int thickness = static_cast<int>((1.f - Remap(z, z_min, z_max, 1)) *
+                                         kMaxLandmarkThickness);
+  render_annotation->set_thickness(thickness);
+}
+
+}  // namespace
+
+// A calculator that converts FrameAnnotation proto to RenderData proto for
+// visualization. The input should be the FrameAnnotation proto buffer. It is
+// also possible to specify the connections between landmarks.
+//
+// Example config:
+// node {
+//   calculator: "AnnotationsToRenderDataCalculator"
+//   input_stream: "ANNOTATIONS:annotations"
+//   output_stream: "RENDER_DATA:render_data"
+//   options {
+//     [AnnotationsToRenderDataCalculator.ext] {
+//       landmark_connections: [0, 1, 1, 2]
+//       landmark_color { r: 0 g: 255 b: 0 }
+//       connection_color { r: 0 g: 255 b: 0 }
+//       thickness: 4.0
+//     }
+//   }
+// }
+class AnnotationsToRenderDataCalculator : public CalculatorBase {
+ public:
+  AnnotationsToRenderDataCalculator() {}
+  ~AnnotationsToRenderDataCalculator() override {}
+  AnnotationsToRenderDataCalculator(const AnnotationsToRenderDataCalculator&) =
+      delete;
+  AnnotationsToRenderDataCalculator& operator=(
+      const AnnotationsToRenderDataCalculator&) = delete;
+
+  static ::mediapipe::Status GetContract(CalculatorContract* cc);
+
+  ::mediapipe::Status Open(CalculatorContext* cc) override;
+
+  ::mediapipe::Status Process(CalculatorContext* cc) override;
+
+ private:
+  static void SetRenderAnnotationColorThickness(
+      const AnnotationsToRenderDataCalculatorOptions& options,
+      RenderAnnotation* render_annotation);
+  static RenderAnnotation* AddPointRenderData(
+      const AnnotationsToRenderDataCalculatorOptions& options,
+      RenderData* render_data);
+
+  // Add a command to draw a line in the rendering queue. The line is drawn from
+  // (start_x, start_y) to (end_x, end_y). The input x,y can either be in pixel
+  // or normalized coordinate [0, 1] as indicated by the normalized flag.
+  static void AddConnectionToRenderData(
+      float start_x, float start_y, float end_x, float end_y,
+      const AnnotationsToRenderDataCalculatorOptions& options, bool normalized,
+      RenderData* render_data);
+
+  // Same as above function. Instead of using color data to render the line, it
+  // re-colors the line according to the two depth value. gray_val1 is the color
+  // of the starting point and gray_val2 is the color of the ending point. The
+  // line is colored using gradient color from gray_val1 to gray_val2. The
+  // gray_val ranges from [0 to 255] for black to white.
+  static void AddConnectionToRenderData(
+      float start_x, float start_y, float end_x, float end_y,
+      const AnnotationsToRenderDataCalculatorOptions& options, bool normalized,
+      int gray_val1, int gray_val2, RenderData* render_data);
+
+  AnnotationsToRenderDataCalculatorOptions options_;
+};
+REGISTER_CALCULATOR(AnnotationsToRenderDataCalculator);
+
+::mediapipe::Status AnnotationsToRenderDataCalculator::GetContract(
+    CalculatorContract* cc) {
+  RET_CHECK(cc->Inputs().HasTag(kAnnotationTag)) << "No input stream found.";
+  if (cc->Inputs().HasTag(kAnnotationTag)) {
+    cc->Inputs().Tag(kAnnotationTag).Set<FrameAnnotation>();
+  }
+  cc->Outputs().Tag(kRenderDataTag).Set<RenderData>();
+
+  return ::mediapipe::OkStatus();
+}
+
+::mediapipe::Status AnnotationsToRenderDataCalculator::Open(
+    CalculatorContext* cc) {
+  cc->SetOffset(TimestampDiff(0));
+  options_ = cc->Options<AnnotationsToRenderDataCalculatorOptions>();
+
+  return ::mediapipe::OkStatus();
+}
+
+::mediapipe::Status AnnotationsToRenderDataCalculator::Process(
+    CalculatorContext* cc) {
+  auto render_data = absl::make_unique<RenderData>();
+  bool visualize_depth = options_.visualize_landmark_depth();
+  float z_min = 0.f;
+  float z_max = 0.f;
+
+  if (cc->Inputs().HasTag(kAnnotationTag)) {
+    const auto& annotations =
+        cc->Inputs().Tag(kAnnotationTag).Get<FrameAnnotation>();
+    RET_CHECK_EQ(options_.landmark_connections_size() % 2, 0)
+        << "Number of entries in landmark connections must be a multiple of 2";
+
+    if (visualize_depth) {
+      GetMinMaxZ(annotations, &z_min, &z_max);
+      // Only change rendering if there are actually z values other than 0.
+      visualize_depth &= ((z_max - z_min) > 1e-3);
+    }
+
+    for (const auto& object : annotations.annotations()) {
+      for (const auto& keypoint : object.keypoints()) {
+        auto* keypoint_data_render =
+            AddPointRenderData(options_, render_data.get());
+        auto* point = keypoint_data_render->mutable_point();
+        if (visualize_depth) {
+          SetColorSizeValueFromZ(keypoint.point_2d().depth(), z_min, z_max,
+                                 keypoint_data_render);
+        }
+
+        point->set_normalized(true);
+        point->set_x(keypoint.point_2d().x());
+        point->set_y(keypoint.point_2d().y());
+      }
+
+      // Add edges
+      for (int i = 0; i < options_.landmark_connections_size(); i += 2) {
+        const auto& ld0 =
+            object.keypoints(options_.landmark_connections(i)).point_2d();
+        const auto& ld1 =
+            object.keypoints(options_.landmark_connections(i + 1)).point_2d();
+        const bool normalized = true;
+
+        if (visualize_depth) {
+          const int gray_val1 =
+              255 - static_cast<int>(Remap(ld0.depth(), z_min, z_max, 255));
+          const int gray_val2 =
+              255 - static_cast<int>(Remap(ld1.depth(), z_min, z_max, 255));
+          AddConnectionToRenderData(ld0.x(), ld0.y(), ld1.x(), ld1.y(),
+                                    options_, normalized, gray_val1, gray_val2,
+                                    render_data.get());
+        } else {
+          AddConnectionToRenderData(ld0.x(), ld0.y(), ld1.x(), ld1.y(),
+                                    options_, normalized, render_data.get());
+        }
+      }
+    }
+  }
+
+  cc->Outputs()
+      .Tag(kRenderDataTag)
+      .Add(render_data.release(), cc->InputTimestamp());
+
+  return ::mediapipe::OkStatus();
+}
+
+void AnnotationsToRenderDataCalculator::AddConnectionToRenderData(
+    float start_x, float start_y, float end_x, float end_y,
+    const AnnotationsToRenderDataCalculatorOptions& options, bool normalized,
+    int gray_val1, int gray_val2, RenderData* render_data) {
+  auto* connection_annotation = render_data->add_render_annotations();
+  RenderAnnotation::GradientLine* line =
+      connection_annotation->mutable_gradient_line();
+  line->set_x_start(start_x);
+  line->set_y_start(start_y);
+  line->set_x_end(end_x);
+  line->set_y_end(end_y);
+  line->set_normalized(normalized);
+  line->mutable_color1()->set_r(gray_val1);
+  line->mutable_color1()->set_g(gray_val1);
+  line->mutable_color1()->set_b(gray_val1);
+  line->mutable_color2()->set_r(gray_val2);
+  line->mutable_color2()->set_g(gray_val2);
+  line->mutable_color2()->set_b(gray_val2);
+  connection_annotation->set_thickness(options.thickness());
+}
+
+void AnnotationsToRenderDataCalculator::AddConnectionToRenderData(
+    float start_x, float start_y, float end_x, float end_y,
+    const AnnotationsToRenderDataCalculatorOptions& options, bool normalized,
+    RenderData* render_data) {
+  auto* connection_annotation = render_data->add_render_annotations();
+  RenderAnnotation::Line* line = connection_annotation->mutable_line();
+  line->set_x_start(start_x);
+  line->set_y_start(start_y);
+  line->set_x_end(end_x);
+  line->set_y_end(end_y);
+  line->set_normalized(normalized);
+  SetColor(connection_annotation, options.connection_color());
+  connection_annotation->set_thickness(options.thickness());
+}
+
+RenderAnnotation* AnnotationsToRenderDataCalculator::AddPointRenderData(
+    const AnnotationsToRenderDataCalculatorOptions& options,
+    RenderData* render_data) {
+  auto* landmark_data_annotation = render_data->add_render_annotations();
+  landmark_data_annotation->set_scene_tag(kKeypointLabel);
+  SetRenderAnnotationColorThickness(options, landmark_data_annotation);
+  return landmark_data_annotation;
+}
+
+void AnnotationsToRenderDataCalculator::SetRenderAnnotationColorThickness(
+    const AnnotationsToRenderDataCalculatorOptions& options,
+    RenderAnnotation* render_annotation) {
+  SetColor(render_annotation, options.landmark_color());
+  render_annotation->set_thickness(options.thickness());
+}
+
+}  // namespace mediapipe
--- a/mediapipe/graphs/object_detection_3d/calculators/annotations_to_render_data_calculator.proto
+++ b/mediapipe/graphs/object_detection_3d/calculators/annotations_to_render_data_calculator.proto
@ -0,0 +1,43 @@
+// Copyright 2020 The MediaPipe Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+syntax = "proto2";
+
+package mediapipe;
+
+import "mediapipe/framework/calculator.proto";
+import "mediapipe/util/color.proto";
+
+message AnnotationsToRenderDataCalculatorOptions {
+  extend CalculatorOptions {
+    optional AnnotationsToRenderDataCalculatorOptions ext = 267644238;
+  }
+
+  // Specifies the landmarks to be connected in the drawing. For example, the
+  // landmark_connections value of [0, 1, 1, 2] specifies two connections: one
+  // that connects landmarks with index 0 and 1, and another that connects
+  // landmarks with index 1 and 2.
+  repeated int32 landmark_connections = 1;
+
+  // Color of the landmarks.
+  optional Color landmark_color = 2;
+  // Color of the connections.
+  optional Color connection_color = 3;
+
+  // Thickness of the drawing of landmarks and connections.
+  optional double thickness = 4 [default = 1.0];
+
+  // Change color and size of rendered landmarks based on its z value.
+  optional bool visualize_landmark_depth = 5 [default = true];
+}
--- a/mediapipe/graphs/object_detection_3d/calculators/belief_decoder_config.proto
+++ b/mediapipe/graphs/object_detection_3d/calculators/belief_decoder_config.proto
@ -0,0 +1,38 @@
+// Copyright 2020 The MediaPipe Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+syntax = "proto2";
+
+package mediapipe;
+
+message BeliefDecoderConfig {
+  optional float heatmap_threshold = 1 [default = 0.9];
+  // Maximum distance in pixels between two local max heatmap values.
+  optional float local_max_distance = 2 [default = 10.0];
+  // Coefficient of offset_scale.
+  // offset_scale = offset_scale_coef * min(rows, cols).
+  // offset_scale is used to multiply the offset predictions from the network.
+  optional float offset_scale_coef = 3 [default = 0.5, deprecated = true];
+
+  // The radius for vertex voting. Use no voting if the radius is less than or
+  // euqal to 1. Example: 10.
+  optional int32 voting_radius = 4;
+
+  // The number of pixels to determine whether two points are the same.
+  // Example: 5 (voting_radius / 2).
+  optional int32 voting_allowance = 5;
+
+  // The threshold of beliefs, with which the points can vote. Example: 0.2.
+  optional float voting_threshold = 6;
+}
--- a/mediapipe/graphs/object_detection_3d/calculators/box.cc
+++ b/mediapipe/graphs/object_detection_3d/calculators/box.cc
@ -0,0 +1,255 @@
+// Copyright 2020 The MediaPipe Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "mediapipe/graphs/object_detection_3d/calculators/box.h"
+
+#include "Eigen/src/Core/util/Constants.h"
+#include "mediapipe/framework/port/logging.h"
+
+namespace mediapipe {
+
+namespace {
+constexpr int kFrontFaceId = 4;
+constexpr int kTopFaceId = 2;
+constexpr int kNumKeypoints = 8 + 1;
+constexpr int kNumberOfAxis = 3;
+constexpr int kEdgesPerAxis = 4;
+
+}  // namespace
+
+Box::Box(const std::string& category)
+    : Model(kBoundingBox, kNumKeypoints, category),
+      bounding_box_(kNumKeypoints) {
+  transformation_.setIdentity();
+
+  scale_ << 0.1, 0.1, 0.1;
+
+  // The vertices are ordered according to the left-hand rule, so the normal
+  // vector of each face will point inward the box.
+  faces_.push_back({5, 6, 8, 7});  // +x on yz plane
+  faces_.push_back({1, 3, 4, 2});  // -x on yz plane
+
+  faces_.push_back({3, 7, 8, 4});  // +y on xz plane = top
+  faces_.push_back({1, 2, 6, 5});  // -y on xz plane
+
+  faces_.push_back({2, 4, 8, 6});  // +z on xy plane = front
+  faces_.push_back({1, 5, 7, 3});  // -z on xy plane
+
+  // Add the edges in the cube, they are sorted according to axis (x-y-z).
+  edges_.push_back({1, 5});
+  edges_.push_back({2, 6});
+  edges_.push_back({3, 7});
+  edges_.push_back({4, 8});
+
+  edges_.push_back({1, 3});
+  edges_.push_back({5, 7});
+  edges_.push_back({2, 4});
+  edges_.push_back({6, 8});
+
+  edges_.push_back({1, 2});
+  edges_.push_back({3, 4});
+  edges_.push_back({5, 6});
+  edges_.push_back({7, 8});
+  Update();
+}
+
+void Box::Update() {
+  // Compute the eight vertices of the bounding box from Box's parameters
+  auto w = scale_[0] / 2.f;
+  auto h = scale_[1] / 2.f;
+  auto d = scale_[2] / 2.f;
+
+  // Define the local coordinate system, w.r.t. the center of the boxs
+  bounding_box_[0] << 0., 0., 0.;
+  bounding_box_[1] << -w, -h, -d;
+  bounding_box_[2] << -w, -h, +d;
+  bounding_box_[3] << -w, +h, -d;
+  bounding_box_[4] << -w, +h, +d;
+  bounding_box_[5] << +w, -h, -d;
+  bounding_box_[6] << +w, -h, +d;
+  bounding_box_[7] << +w, +h, -d;
+  bounding_box_[8] << +w, +h, +d;
+
+  // Convert to world coordinate system
+  for (int i = 0; i < kNumKeypoints; ++i) {
+    bounding_box_[i] =
+        transformation_.topLeftCorner<3, 3>() * bounding_box_[i] +
+        transformation_.col(3).head<3>();
+  }
+}
+
+void Box::Adjust(const std::vector<float>& variables) {
+  Eigen::Vector3f translation;
+  translation << variables[0], variables[1], variables[2];
+  SetTranslation(translation);
+
+  const float roll = variables[3];
+  const float pitch = variables[4];
+  const float yaw = variables[5];
+  SetRotation(roll, pitch, yaw);
+
+  Eigen::Vector3f scale;
+  scale << variables[6], variables[7], variables[8];
+
+  SetScale(scale);
+  Update();
+}
+
+float* Box::GetVertex(size_t vertex_id) {
+  CHECK_LT(vertex_id, kNumKeypoints);
+  return bounding_box_[vertex_id].data();
+}
+
+const float* Box::GetVertex(size_t vertex_id) const {
+  CHECK_LT(vertex_id, kNumKeypoints);
+  return bounding_box_[vertex_id].data();
+}
+
+bool Box::InsideTest(const Eigen::Vector3f& point, int check_axis) const {
+  const float* v0 = GetVertex(1);
+  const float* v1 = GetVertex(2);
+  const float* v2 = GetVertex(3);
+  const float* v4 = GetVertex(5);
+
+  switch (check_axis) {
+    case 1:
+      return (v0[0] <= point[0] && point[0] <= v1[0]);  // X-axis
+    case 2:
+      return (v0[1] <= point[1] && point[1] <= v2[1]);  // Y-axis
+    case 3:
+      return (v0[2] <= point[2] && point[2] <= v4[2]);  // Z-axis
+    default:
+      return false;
+  }
+}
+
+void Box::Deserialize(const Object& obj) {
+  CHECK_EQ(obj.keypoints_size(), kNumKeypoints);
+  Model::Deserialize(obj);
+}
+
+void Box::Serialize(Object* obj) {
+  Model::Serialize(obj);
+  obj->set_type(Object::BOUNDING_BOX);
+  std::vector<Vector3f> local_bounding_box(9);
+  // Define the local coordinate system, w.r.t. the center of the boxs
+  local_bounding_box[0] << 0., 0., 0.;
+  local_bounding_box[1] << -0.5, -0.5, -0.5;
+  local_bounding_box[2] << -0.5, -0.5, +0.5;
+  local_bounding_box[3] << -0.5, +0.5, -0.5;
+  local_bounding_box[4] << -0.5, +0.5, +0.5;
+  local_bounding_box[5] << +0.5, -0.5, -0.5;
+  local_bounding_box[6] << +0.5, -0.5, +0.5;
+  local_bounding_box[7] << +0.5, +0.5, -0.5;
+  local_bounding_box[8] << +0.5, +0.5, +0.5;
+  for (int i = 0; i < kNumKeypoints; ++i) {
+    KeyPoint* keypoint = obj->add_keypoints();
+    keypoint->set_x(local_bounding_box[i][0]);
+    keypoint->set_y(local_bounding_box[i][1]);
+    keypoint->set_z(local_bounding_box[i][2]);
+    keypoint->set_confidence_radius(0.);
+  }
+}
+
+const Face& Box::GetFrontFace() const { return faces_[kFrontFaceId]; }
+
+const Face& Box::GetTopFace() const { return faces_[kTopFaceId]; }
+
+std::pair<Vector3f, Vector3f> Box::GetGroundPlane() const {
+  const Vector3f gravity = Vector3f(0., 1., 0.);
+  int ground_plane_id = 0;
+  float ground_plane_error = 10.0;
+
+  auto get_face_center = [&](const Face& face) {
+    Vector3f center = Vector3f::Zero();
+    for (const int vertex_id : face) {
+      center += Map<const Vector3f>(GetVertex(vertex_id));
+    }
+    center /= face.size();
+    return center;
+  };
+
+  auto get_face_normal = [&](const Face& face, const Vector3f& center) {
+    Vector3f v1 = Map<const Vector3f>(GetVertex(face[0])) - center;
+    Vector3f v2 = Map<const Vector3f>(GetVertex(face[1])) - center;
+    Vector3f normal = v1.cross(v2);
+    return normal;
+  };
+
+  // The ground plane is defined as a plane aligned with gravity.
+  // gravity is the (0, 1, 0) vector in the world coordinate system.
+  const auto& faces = GetFaces();
+  for (int face_id = 0; face_id < faces.size(); face_id += 2) {
+    const auto& face = faces[face_id];
+    Vector3f center = get_face_center(face);
+    Vector3f normal = get_face_normal(face, center);
+    Vector3f w = gravity.cross(normal);
+    const float w_sq_norm = w.squaredNorm();
+    if (w_sq_norm < ground_plane_error) {
+      ground_plane_error = w_sq_norm;
+      ground_plane_id = face_id;
+    }
+  }
+
+  Vector3f center = get_face_center(faces[ground_plane_id]);
+  Vector3f normal = get_face_normal(faces[ground_plane_id], center);
+
+  // For each face, we also have a parallel face that it's normal is also
+  // aligned with gravity vector. We pick the face with lower height (y-value).
+  // The parallel to face 0 is 1, face 2 is 3, and face 4 is 5.
+  int parallel_face_id = ground_plane_id + 1;
+  const auto& parallel_face = faces[parallel_face_id];
+  Vector3f parallel_face_center = get_face_center(parallel_face);
+  Vector3f parallel_face_normal =
+      get_face_normal(parallel_face, parallel_face_center);
+  if (parallel_face_center[1] < center[1]) {
+    center = parallel_face_center;
+    normal = parallel_face_normal;
+  }
+  return {center, normal};
+}
+
+template <typename T>
+void Box::Fit(const std::vector<T>& vertices) {
+  CHECK_EQ(vertices.size(), kNumKeypoints);
+  scale_.setZero();
+  // The scale would remain invariant under rotation and translation.
+  // We can safely estimate the scale from the oriented box.
+  for (int axis = 0; axis < kNumberOfAxis; ++axis) {
+    for (int edge_id = 0; edge_id < kEdgesPerAxis; ++edge_id) {
+      // The edges are stored in quadruples according to each axis
+      const std::array<int, 2>& edge = edges_[axis * kEdgesPerAxis + edge_id];
+      scale_[axis] += (vertices[edge[0]] - vertices[edge[1]]).norm();
+    }
+    scale_[axis] /= kEdgesPerAxis;
+  }
+  // Create a scaled axis-aligned box
+  transformation_.setIdentity();
+  Update();
+
+  using MatrixN3_RM = Eigen::Matrix<float, kNumKeypoints, 3, Eigen::RowMajor>;
+  Eigen::Map<const MatrixN3_RM> v(vertices[0].data());
+  Eigen::Map<const MatrixN3_RM> system(bounding_box_[0].data());
+  auto system_h = system.rowwise().homogeneous().eval();
+  auto system_g = system_h.colPivHouseholderQr();
+  auto solution = system_g.solve(v).eval();
+  transformation_.topLeftCorner<3, 4>() = solution.transpose();
+  Update();
+}
+
+template void Box::Fit<Vector3f>(const std::vector<Vector3f>&);
+template void Box::Fit<Map<Vector3f>>(const std::vector<Map<Vector3f>>&);
+template void Box::Fit<Map<const Vector3f>>(
+    const std::vector<Map<const Vector3f>>&);
+}  // namespace mediapipe
--- a/mediapipe/graphs/object_detection_3d/calculators/box.h
+++ b/mediapipe/graphs/object_detection_3d/calculators/box.h
@ -0,0 +1,132 @@
+// Copyright 2020 The MediaPipe Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef MEDIAPIPE_GRAPHS_OBJECT_DETECTION_3D_BOX_H_
+#define MEDIAPIPE_GRAPHS_OBJECT_DETECTION_3D_BOX_H_
+
+#include <vector>
+
+#include "mediapipe/graphs/object_detection_3d/calculators/model.h"
+
+namespace mediapipe {
+
+// Model for the bounding box in 3D
+// The box has 9 degrees of freedom, which uniquely defines 8 keypoints in the
+// fixed world-coordinate system.
+//
+// The 8 keypoints are defined as follows
+//
+//  kp-id  axis
+//  0      000    ---
+//  1      001    --+
+//  2      010    -+-
+//  3      011    -++
+//  4      100    +--
+//  5      101    +-+
+//  6      110    ++-
+//  7      111    +++
+//
+// where xyz means positive or negative vector along the axis where the center
+// of the box is the origin. The resulting bounding box is
+//
+//              x                              x
+//      0 + + + + + + + + 4                 .-------
+//      +\                +\                |\
+//      + \ y             + \             z | \ y
+//      +  \              +  \              |  \
+//      +   2 + + + + + + + + 6
+//    z +   +             +   +
+//      +   +             +   +
+//      +   +     C       +   +
+//      +   +             +   +
+//      1 + + + + + + + + 5   +
+//       \  +              \  +
+//        \ +               \ +
+//         \+                \+
+//          3 + + + + + + + + 7
+//
+// World coordinate system: +y is up (aligned with gravity),
+// +z is toward the user, +x follows right hand rule.
+// The front face is defined as +z axis on xy plane.
+// The top face is defined as +y axis on xz plane.
+//
+
+class Box : public Model {
+ public:
+  EIGEN_MAKE_ALIGNED_OPERATOR_NEW
+
+  explicit Box(const std::string& category);
+  ~Box() override = default;
+
+  bool InsideTest(const Vector3f& point, int check_axis) const;
+
+  const std::vector<Face>& GetFaces() const { return faces_; }
+  const Face& GetFace(size_t face_id) const { return faces_[face_id]; }
+
+  const std::vector<std::array<int, 2>>& GetEdges() const { return edges_; }
+  const std::array<int, 2>& GetEdge(size_t edge_id) const {
+    return edges_[edge_id];
+  }
+
+  // Returns the keypoints for the front face of the box.
+  // The front face is defind as a face with +z normal vector on xy plane
+  // In Box's c'tor, the top face is set to {1, 3, 7, 5}
+  const Face& GetFrontFace() const;
+
+  // Returns the keypoints for the top face of the box.
+  // The top face is defind as a face with +z normal vector on xy plane
+  // In Box's c'tor, the top face is set to {1, 3, 7, 5}
+  const Face& GetTopFace() const;
+
+  void Update() override;
+  void Adjust(const std::vector<float>& variables) override;
+  float* GetVertex(size_t vertex_id) override;
+  const float* GetVertex(size_t vertex_id) const override;
+  void Deserialize(const Object& obj) override;
+  void Serialize(Object* obj) override;
+
+  // Computes the plane center and the normal vector for the plane the object
+  // is sitting on in the world cooordinate system. The normal vector is roughly
+  // aligned with gravity.
+  std::pair<Vector3f, Vector3f> GetGroundPlane() const;
+
+  // Estimates a box 9-dof parameters from the given vertices. Directly computes
+  // the scale of the box, then solves for orientation and translation.
+  // Expects a std::vector of size 9 of a Eigen::Vector3f or mapped Vector3f.
+  // If mapping proto messages, we recommend to use the Map<const Vector3f>.
+  // For example:
+  //
+  // using T = Map<const Vector3f>;
+  // std::vector<T> vertices;
+  // for (const auto& point : message) { // point is a repeated float message.
+  //   T p(point.data());
+  //   vertices.emplace_back(p);
+  // }
+  // box.Fit<T>(vertices);
+  //
+  // The Points must be arranged as 1 + 8 (center keypoint followed by 8 box
+  // vertices) vector. This function will overwrite the scale and transformation
+  // properties of the class.
+  template <typename T = Eigen::Map<const Vector3f>>
+  void Fit(const std::vector<T>& vertices);
+
+ private:
+  std::vector<Face> faces_;
+  std::vector<std::array<int, 2>> edges_;
+  std::vector<Vector3f> bounding_box_;
+};
+
+}  // namespace mediapipe
+
+#endif  // MEDIAPIPE_GRAPHS_OBJECT_DETECTION_3D_BOX_H_
--- a/mediapipe/graphs/object_detection_3d/calculators/box_util.cc
+++ b/mediapipe/graphs/object_detection_3d/calculators/box_util.cc
@ -0,0 +1,153 @@
+// Copyright 2020 The MediaPipe Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "mediapipe/graphs/object_detection_3d/calculators/box_util.h"
+
+#include <math.h>
+
+#include "mediapipe/framework/port/logging.h"
+#include "mediapipe/framework/port/opencv_core_inc.h"
+#include "mediapipe/framework/port/opencv_imgproc_inc.h"
+#include "mediapipe/util/tracking/box_tracker.pb.h"
+
+namespace mediapipe {
+void ComputeBoundingRect(const std::vector<cv::Point2f>& points,
+                         mediapipe::TimedBoxProto* box) {
+  CHECK(box != nullptr);
+  float top = 1.0f;
+  float bottom = 0.0f;
+  float left = 1.0f;
+  float right = 0.0f;
+  for (const auto& point : points) {
+    top = std::min(top, point.y);
+    bottom = std::max(bottom, point.y);
+    left = std::min(left, point.x);
+    right = std::max(right, point.x);
+  }
+  box->set_top(top);
+  box->set_bottom(bottom);
+  box->set_left(left);
+  box->set_right(right);
+  // We are currently only doing axis aligned bounding box. If we need to
+  // compute rotated bounding box, then we need the original image aspect ratio,
+  // map back to original image space, compute cv::convexHull, then for each
+  // edge of the hull, rotate according to edge orientation, find the box.
+  box->set_rotation(0.0f);
+}
+
+float ComputeBoxIoU(const TimedBoxProto& box1, const TimedBoxProto& box2) {
+  cv::Point2f box1_center((box1.left() + box1.right()) * 0.5f,
+                          (box1.top() + box1.bottom()) * 0.5f);
+  cv::Size2f box1_size(box1.right() - box1.left(), box1.bottom() - box1.top());
+  cv::RotatedRect rect1(box1_center, box1_size,
+                        -box1.rotation() * 180.0f / M_PI);
+  cv::Point2f box2_center((box2.left() + box2.right()) * 0.5f,
+                          (box2.top() + box2.bottom()) * 0.5f);
+  cv::Size2f box2_size(box2.right() - box2.left(), box2.bottom() - box2.top());
+  cv::RotatedRect rect2(box2_center, box2_size,
+                        -box2.rotation() * 180.0f / M_PI);
+  std::vector<cv::Point2f> intersections_unsorted;
+  std::vector<cv::Point2f> intersections;
+  cv::rotatedRectangleIntersection(rect1, rect2, intersections_unsorted);
+  if (intersections_unsorted.size() < 3) {
+    return 0.0f;
+  }
+  cv::convexHull(intersections_unsorted, intersections);
+
+  // We use Shoelace formula to compute area of polygons.
+  float intersection_area = 0.0f;
+  for (int i = 0; i < intersections.size(); ++i) {
+    const auto& curr_pt = intersections[i];
+    const int i_next = (i + 1) == intersections.size() ? 0 : (i + 1);
+    const auto& next_pt = intersections[i_next];
+    intersection_area += (curr_pt.x * next_pt.y - next_pt.x * curr_pt.y);
+  }
+  intersection_area = std::abs(intersection_area) * 0.5f;
+
+  // Compute union area
+  const float union_area =
+      rect1.size.area() + rect2.size.area() - intersection_area + 1e-5f;
+
+  const float iou = intersection_area / union_area;
+  return iou;
+}
+
+std::vector<cv::Point2f> ComputeBoxCorners(const TimedBoxProto& box,
+                                           float width, float height) {
+  // Rotate 4 corner w.r.t. center.
+  const cv::Point2f center(0.5f * (box.left() + box.right()) * width,
+                           0.5f * (box.top() + box.bottom()) * height);
+  const std::vector<cv::Point2f> corners{
+      cv::Point2f(box.left() * width, box.top() * height),
+      cv::Point2f(box.left() * width, box.bottom() * height),
+      cv::Point2f(box.right() * width, box.bottom() * height),
+      cv::Point2f(box.right() * width, box.top() * height)};
+
+  const float cos_a = std::cos(box.rotation());
+  const float sin_a = std::sin(box.rotation());
+  std::vector<cv::Point2f> transformed_corners(4);
+  for (int k = 0; k < 4; ++k) {
+    // Scale and rotate w.r.t. center.
+    const cv::Point2f rad = corners[k] - center;
+    const cv::Point2f rot_rad(cos_a * rad.x - sin_a * rad.y,
+                              sin_a * rad.x + cos_a * rad.y);
+    transformed_corners[k] = center + rot_rad;
+    transformed_corners[k].x /= width;
+    transformed_corners[k].y /= height;
+  }
+  return transformed_corners;
+}
+
+cv::Mat PerspectiveTransformBetweenBoxes(const TimedBoxProto& src_box,
+                                         const TimedBoxProto& dst_box,
+                                         const float aspect_ratio) {
+  std::vector<cv::Point2f> box1_corners =
+      ComputeBoxCorners(src_box, /*width*/ aspect_ratio, /*height*/ 1.0f);
+  std::vector<cv::Point2f> box2_corners =
+      ComputeBoxCorners(dst_box, /*width*/ aspect_ratio, /*height*/ 1.0f);
+  cv::Mat affine_transform = cv::getPerspectiveTransform(
+      /*src*/ box1_corners, /*dst*/ box2_corners);
+  cv::Mat output_affine;
+  affine_transform.convertTo(output_affine, CV_32FC1);
+  return output_affine;
+}
+
+cv::Point2f MapPoint(const TimedBoxProto& src_box, const TimedBoxProto& dst_box,
+                     const cv::Point2f& src_point, float width, float height) {
+  const cv::Point2f src_center(
+      0.5f * (src_box.left() + src_box.right()) * width,
+      0.5f * (src_box.top() + src_box.bottom()) * height);
+  const cv::Point2f dst_center(
+      0.5f * (dst_box.left() + dst_box.right()) * width,
+      0.5f * (dst_box.top() + dst_box.bottom()) * height);
+  const float scale_x =
+      (dst_box.right() - dst_box.left()) / (src_box.right() - src_box.left());
+  const float scale_y =
+      (dst_box.bottom() - dst_box.top()) / (src_box.bottom() - src_box.top());
+  const float rotation = dst_box.rotation() - src_box.rotation();
+  const cv::Point2f rad =
+      cv::Point2f(src_point.x * width, src_point.y * height) - src_center;
+  const float rad_x = rad.x * scale_x;
+  const float rad_y = rad.y * scale_y;
+  const float cos_a = std::cos(rotation);
+  const float sin_a = std::sin(rotation);
+  const cv::Point2f rot_rad(cos_a * rad_x - sin_a * rad_y,
+                            sin_a * rad_x + cos_a * rad_y);
+  const cv::Point2f dst_point_image = dst_center + rot_rad;
+  const cv::Point2f dst_point(dst_point_image.x / width,
+                              dst_point_image.y / height);
+  return dst_point;
+}
+
+}  // namespace mediapipe
--- a/mediapipe/graphs/object_detection_3d/calculators/box_util.h
+++ b/mediapipe/graphs/object_detection_3d/calculators/box_util.h
@ -0,0 +1,50 @@
+// Copyright 2020 The MediaPipe Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef MEDIAPIPE_GRAPHS_OBJECT_DETECTION_3D_BOX_UTIL_H_
+#define MEDIAPIPE_GRAPHS_OBJECT_DETECTION_3D_BOX_UTIL_H_
+
+#include "mediapipe/framework/port/opencv_core_inc.h"
+#include "mediapipe/util/tracking/box_tracker.pb.h"
+
+namespace mediapipe {
+
+// This function fills the geometry of the TimedBoxProto. Id, timestamp etc.
+// need to be set outside this function.
+void ComputeBoundingRect(const std::vector<cv::Point2f>& points,
+                         mediapipe::TimedBoxProto* box);
+
+// This function computes the intersection over union between two boxes.
+float ComputeBoxIoU(const TimedBoxProto& box1, const TimedBoxProto& box2);
+
+// Computes corners of the box.
+// width and height are image width and height, which is typically
+// needed since the box is in normalized coordinates.
+std::vector<cv::Point2f> ComputeBoxCorners(const TimedBoxProto& box,
+                                           float width, float height);
+
+// Computes the perspective transform from box1 to box2.
+// The input argument aspect_ratio is width / height of the image.
+// The returned matrix should be a 3x3 matrix.
+cv::Mat PerspectiveTransformBetweenBoxes(const TimedBoxProto& src_box,
+                                         const TimedBoxProto& dst_box,
+                                         const float aspect_ratio);
+
+// Map point according to source and destination box location.
+cv::Point2f MapPoint(const TimedBoxProto& src_box, const TimedBoxProto& dst_box,
+                     const cv::Point2f& src_point, float width, float height);
+
+}  // namespace mediapipe
+
+#endif  // MEDIAPIPE_GRAPHS_OBJECT_DETECTION_3D_BOX_UTIL_H_
--- a/mediapipe/graphs/object_detection_3d/calculators/box_util_test.cc
+++ b/mediapipe/graphs/object_detection_3d/calculators/box_util_test.cc
@ -0,0 +1,123 @@
+// Copyright 2020 The MediaPipe Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "mediapipe/graphs/object_detection_3d/calculators/box_util.h"
+
+#include "mediapipe/framework/port/gmock.h"
+#include "mediapipe/framework/port/gtest.h"
+#include "mediapipe/framework/port/opencv_core_inc.h"
+#include "mediapipe/util/tracking/box_tracker.pb.h"
+
+namespace mediapipe {
+namespace {
+
+TEST(BoxUtilTest, TestComputeBoundingRect) {
+  std::vector<cv::Point2f> points{
+      cv::Point2f(0.35f, 0.25f), cv::Point2f(0.3f, 0.3f),
+      cv::Point2f(0.2f, 0.4f),   cv::Point2f(0.3f, 0.1f),
+      cv::Point2f(0.2f, 0.2f),   cv::Point2f(0.5f, 0.3f),
+      cv::Point2f(0.4f, 0.4f),   cv::Point2f(0.5f, 0.1f),
+      cv::Point2f(0.4f, 0.2f)};
+  TimedBoxProto box;
+  ComputeBoundingRect(points, &box);
+  EXPECT_FLOAT_EQ(0.1f, box.top());
+  EXPECT_FLOAT_EQ(0.4f, box.bottom());
+  EXPECT_FLOAT_EQ(0.2f, box.left());
+  EXPECT_FLOAT_EQ(0.5f, box.right());
+}
+
+TEST(BoxUtilTest, TestComputeBoxIoU) {
+  TimedBoxProto box1;
+  box1.set_top(0.2f);
+  box1.set_bottom(0.6f);
+  box1.set_left(0.1f);
+  box1.set_right(0.3f);
+  box1.set_rotation(0.0f);
+  TimedBoxProto box2 = box1;
+  box2.set_rotation(/*pi/2*/ 1.570796f);
+  const float box_area =
+      (box1.bottom() - box1.top()) * (box1.right() - box1.left());
+  const float box_intersection =
+      (box1.right() - box1.left()) * (box1.right() - box1.left());
+  const float expected_iou =
+      box_intersection / (box_area * 2 - box_intersection);
+  EXPECT_NEAR(expected_iou, ComputeBoxIoU(box1, box2), 3e-5f);
+
+  TimedBoxProto box3;
+  box3.set_top(0.2f);
+  box3.set_bottom(0.6f);
+  box3.set_left(0.5f);
+  box3.set_right(0.7f);
+  EXPECT_NEAR(0.0f, ComputeBoxIoU(box1, box3), 3e-5f);
+}
+
+TEST(BoxUtilTest, TestPerspectiveTransformBetweenBoxes) {
+  TimedBoxProto box1;
+  const float height = 4.0f;
+  const float width = 3.0f;
+  box1.set_top(1.0f / height);
+  box1.set_bottom(2.0f / height);
+  box1.set_left(1.0f / width);
+  box1.set_right(2.0f / width);
+  TimedBoxProto box2;
+  box2.set_top(1.0f / height);
+  box2.set_bottom(2.0f / height);
+  box2.set_left(1.0f / width);
+  box2.set_right(2.0f / width);
+  box2.set_rotation(/*pi/4*/ -0.785398f);
+  cv::Mat transform =
+      PerspectiveTransformBetweenBoxes(box1, box2, width / height);
+  const float kTolerence = 1e-5f;
+  const cv::Vec3f original_position(1.5f / width, 1.0f / height, 1.0f);
+  const cv::Mat transformed_position = transform * cv::Mat(original_position);
+  EXPECT_NEAR(
+      (1.5f - 0.5f * std::sqrt(2) / 2.0f) / width,
+      transformed_position.at<float>(0) / transformed_position.at<float>(2),
+      kTolerence);
+  EXPECT_NEAR(
+      (1.5f - 0.5f * std::sqrt(2) / 2.0f) / height,
+      transformed_position.at<float>(1) / transformed_position.at<float>(2),
+      kTolerence);
+}
+
+TEST(BoxUtilTest, TestMapPoint) {
+  const float height = 4.0f;
+  const float width = 3.0f;
+  TimedBoxProto box1;
+  box1.set_top(1.0f / height);
+  box1.set_bottom(2.0f / height);
+  box1.set_left(1.0f / width);
+  box1.set_right(2.0f / width);
+  TimedBoxProto box2;
+  box2.set_top(1.0f / height);
+  box2.set_bottom(2.0f / height);
+  box2.set_left(1.0f / width);
+  box2.set_right(2.0f / width);
+  box2.set_rotation(/*pi/4*/ -0.785398f);
+
+  cv::Point2f src_point1(1.2f / width, 1.4f / height);
+  cv::Point2f src_point2(1.3f / width, 1.8f / height);
+  const float distance1 = std::sqrt(0.1 * 0.1 + 0.4 * 0.4);
+  cv::Point2f dst_point1 = MapPoint(box1, box2, src_point1, width, height);
+  cv::Point2f dst_point2 = MapPoint(box1, box2, src_point2, width, height);
+  const float distance2 =
+      std::sqrt((dst_point1.x * width - dst_point2.x * width) *
+                    (dst_point1.x * width - dst_point2.x * width) +
+                (dst_point1.y * height - dst_point2.y * height) *
+                    (dst_point1.y * height - dst_point2.y * height));
+  EXPECT_NEAR(distance1, distance2, 1e-5f);
+}
+
+}  // namespace
+}  // namespace mediapipe
--- a/mediapipe/graphs/object_detection_3d/calculators/camera_parameters.proto
+++ b/mediapipe/graphs/object_detection_3d/calculators/camera_parameters.proto
@ -0,0 +1,47 @@
+// Copyright 2020 The MediaPipe Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+syntax = "proto2";
+
+package mediapipe;
+
+message CameraParametersProto {
+  // This number is non-negative, it represents camera height above ground
+  // normalized by focal length.
+  optional float height_above_ground = 1 [default = 100.0];
+  // Width of image in portrait orientation normalized by focal length
+  optional float portrait_width = 2 [default = 1.0103];
+  // Height of image in portrait orientation normalized by focal length
+  optional float portrait_height = 3 [default = 1.3435];
+  enum ImageOrientation {
+    PORTRAIT_ORIENTATION = 0;
+    LANDSCAPE_ORIENTATION = 1;
+  }
+  // The input image orientation
+  optional ImageOrientation image_orientation = 4
+      [default = PORTRAIT_ORIENTATION];
+
+  // This defines the projection method from 2D screen to 3D.
+  enum ProjectionMode {
+    UNSPECIFIED = 0;
+    // Projects 2D point to ground plane (horizontal plane).
+    GROUND_PLANE = 1;
+    // Projects 2D point to sphere.
+    SPHERE = 2;
+  }
+  optional ProjectionMode projection_mode = 5 [default = GROUND_PLANE];
+  // Radius of sphere when using the SPHERE projection mode above.
+  // The value is normalized by focal length.
+  optional float projection_sphere_radius = 6 [default = 100.0];
+}
--- a/mediapipe/graphs/object_detection_3d/calculators/decoder.cc
+++ b/mediapipe/graphs/object_detection_3d/calculators/decoder.cc
@ -0,0 +1,257 @@
+// Copyright 2020 The MediaPipe Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "mediapipe/graphs/object_detection_3d/calculators/decoder.h"
+
+#include <limits>
+
+#include "Eigen/Dense"
+#include "mediapipe/framework/port/canonical_errors.h"
+#include "mediapipe/framework/port/logging.h"
+#include "mediapipe/framework/port/opencv_imgproc_inc.h"
+#include "mediapipe/framework/port/status.h"
+#include "mediapipe/graphs/object_detection_3d/calculators/annotation_data.pb.h"
+
+namespace mediapipe {
+constexpr int Decoder::kNumOffsetmaps = 16;
+
+namespace {
+void SetPoint3d(float x, float y, float z, Point3D* point_3d) {
+  point_3d->set_x(x);
+  point_3d->set_y(y);
+  point_3d->set_z(z);
+}
+}  // namespace
+
+FrameAnnotation Decoder::DecodeBoundingBoxKeypoints(
+    const cv::Mat& heatmap, const cv::Mat& offsetmap) const {
+  CHECK_EQ(1, heatmap.channels());
+  CHECK_EQ(kNumOffsetmaps, offsetmap.channels());
+  CHECK_EQ(heatmap.cols, offsetmap.cols);
+  CHECK_EQ(heatmap.rows, offsetmap.rows);
+
+  const float offset_scale = std::min(offsetmap.cols, offsetmap.rows);
+  const std::vector<cv::Point> center_points = ExtractCenterKeypoints(heatmap);
+  std::vector<BeliefBox> boxes;
+  for (const auto& center_point : center_points) {
+    BeliefBox box;
+    box.box_2d.emplace_back(center_point.x, center_point.y);
+    const int center_x = static_cast<int>(std::round(center_point.x));
+    const int center_y = static_cast<int>(std::round(center_point.y));
+    box.belief = heatmap.at<float>(center_y, center_x);
+    if (config_.voting_radius() > 1) {
+      DecodeByVoting(heatmap, offsetmap, center_x, center_y, offset_scale,
+                     offset_scale, &box);
+    } else {
+      DecodeByPeak(offsetmap, center_x, center_y, offset_scale, offset_scale,
+                   &box);
+    }
+    if (IsNewBox(&boxes, &box)) {
+      boxes.push_back(std::move(box));
+    }
+  }
+
+  const float x_scale = 1.0f / offsetmap.cols;
+  const float y_scale = 1.0f / offsetmap.rows;
+  FrameAnnotation frame_annotations;
+  for (const auto& box : boxes) {
+    auto* object = frame_annotations.add_annotations();
+    for (const auto& point : box.box_2d) {
+      auto* point2d = object->add_keypoints()->mutable_point_2d();
+      point2d->set_x(point.first * x_scale);
+      point2d->set_y(point.second * y_scale);
+    }
+  }
+  return frame_annotations;
+}
+
+void Decoder::DecodeByPeak(const cv::Mat& offsetmap, int center_x, int center_y,
+                           float offset_scale_x, float offset_scale_y,
+                           BeliefBox* box) const {
+  const auto& offset = offsetmap.at<cv::Vec<float, kNumOffsetmaps>>(
+      /*row*/ center_y, /*col*/ center_x);
+  for (int i = 0; i < kNumOffsetmaps / 2; ++i) {
+    const float x_offset = offset[2 * i] * offset_scale_x;
+    const float y_offset = offset[2 * i + 1] * offset_scale_y;
+    box->box_2d.emplace_back(center_x + x_offset, center_y + y_offset);
+  }
+}
+
+void Decoder::DecodeByVoting(const cv::Mat& heatmap, const cv::Mat& offsetmap,
+                             int center_x, int center_y, float offset_scale_x,
+                             float offset_scale_y, BeliefBox* box) const {
+  // Votes at the center.
+  const auto& center_offset = offsetmap.at<cv::Vec<float, kNumOffsetmaps>>(
+      /*row*/ center_y, /*col*/ center_x);
+  std::vector<float> center_votes(kNumOffsetmaps, 0.f);
+  for (int i = 0; i < kNumOffsetmaps / 2; ++i) {
+    center_votes[2 * i] = center_x + center_offset[2 * i] * offset_scale_x;
+    center_votes[2 * i + 1] =
+        center_y + center_offset[2 * i + 1] * offset_scale_y;
+  }
+
+  // Find voting window.
+  int x_min = std::max(0, center_x - config_.voting_radius());
+  int y_min = std::max(0, center_y - config_.voting_radius());
+  int width = std::min(heatmap.cols - x_min, config_.voting_radius() * 2 + 1);
+  int height = std::min(heatmap.rows - y_min, config_.voting_radius() * 2 + 1);
+  cv::Rect rect(x_min, y_min, width, height);
+  cv::Mat heat = heatmap(rect);
+  cv::Mat offset = offsetmap(rect);
+
+  for (int i = 0; i < kNumOffsetmaps / 2; ++i) {
+    float x_sum = 0.f;
+    float y_sum = 0.f;
+    float votes = 0.f;
+    for (int r = 0; r < heat.rows; ++r) {
+      for (int c = 0; c < heat.cols; ++c) {
+        const float belief = heat.at<float>(r, c);
+        if (belief < config_.voting_threshold()) {
+          continue;
+        }
+        float offset_x =
+            offset.at<cv::Vec<float, kNumOffsetmaps>>(r, c)[2 * i] *
+            offset_scale_x;
+        float offset_y =
+            offset.at<cv::Vec<float, kNumOffsetmaps>>(r, c)[2 * i + 1] *
+            offset_scale_y;
+        float vote_x = c + rect.x + offset_x;
+        float vote_y = r + rect.y + offset_y;
+        float x_diff = std::abs(vote_x - center_votes[2 * i]);
+        float y_diff = std::abs(vote_y - center_votes[2 * i + 1]);
+        if (x_diff > config_.voting_allowance() ||
+            y_diff > config_.voting_allowance()) {
+          continue;
+        }
+        x_sum += vote_x * belief;
+        y_sum += vote_y * belief;
+        votes += belief;
+      }
+    }
+    box->box_2d.emplace_back(x_sum / votes, y_sum / votes);
+  }
+}
+
+bool Decoder::IsNewBox(std::vector<BeliefBox>* boxes, BeliefBox* box) const {
+  for (auto& b : *boxes) {
+    if (IsIdentical(b, *box)) {
+      if (b.belief < box->belief) {
+        std::swap(b, *box);
+      }
+      return false;
+    }
+  }
+  return true;
+}
+
+bool Decoder::IsIdentical(const BeliefBox& box_1,
+                          const BeliefBox& box_2) const {
+  // Skip the center point.
+  for (int i = 1; i < box_1.box_2d.size(); ++i) {
+    const float x_diff =
+        std::abs(box_1.box_2d[i].first - box_2.box_2d[i].first);
+    const float y_diff =
+        std::abs(box_1.box_2d[i].second - box_2.box_2d[i].second);
+    if (x_diff > config_.voting_allowance() ||
+        y_diff > config_.voting_allowance()) {
+      return false;
+    }
+  }
+  return true;
+}
+
+std::vector<cv::Point> Decoder::ExtractCenterKeypoints(
+    const cv::Mat& center_heatmap) const {
+  cv::Mat max_filtered_heatmap(center_heatmap.rows, center_heatmap.cols,
+                               center_heatmap.type());
+  const int kernel_size =
+      static_cast<int>(config_.local_max_distance() * 2 + 1 + 0.5f);
+  const cv::Size morph_size(kernel_size, kernel_size);
+  cv::dilate(center_heatmap, max_filtered_heatmap,
+             cv::getStructuringElement(cv::MORPH_RECT, morph_size));
+  cv::Mat peak_map;
+  cv::bitwise_and((center_heatmap >= max_filtered_heatmap),
+                  (center_heatmap >= config_.heatmap_threshold()), peak_map);
+  std::vector<cv::Point> locations;  // output, locations of non-zero pixels
+  cv::findNonZero(peak_map, locations);
+  return locations;
+}
+
+absl::Status Decoder::Lift2DTo3D(
+    const Eigen::Matrix<float, 4, 4, Eigen::RowMajor>& projection_matrix,
+    bool portrait, FrameAnnotation* estimated_box) const {
+  CHECK(estimated_box != nullptr);
+  const float fx = projection_matrix(0, 0);
+  const float fy = projection_matrix(1, 1);
+  const float cx = projection_matrix(0, 2);
+  const float cy = projection_matrix(1, 2);
+  for (auto& annotation : *estimated_box->mutable_annotations()) {
+    Eigen::Matrix<float, 16, 12, Eigen::RowMajor> m =
+        Eigen::Matrix<float, 16, 12, Eigen::RowMajor>::Zero(16, 12);
+    CHECK_EQ(9, annotation.keypoints_size());
+    float u, v;
+    for (int i = 0; i < 8; ++i) {
+      const auto& keypoint2d = annotation.keypoints(i + 1).point_2d();
+      if (portrait) {
+        // swap x and y given that our image is in portrait orientation
+        u = keypoint2d.y() * 2 - 1;
+        v = keypoint2d.x() * 2 - 1;
+      } else {
+        u = keypoint2d.x() * 2 - 1;
+        v = 1 - keypoint2d.y() * 2;  // (1 - keypoint2d.y()) * 2 - 1
+      }
+      for (int j = 0; j < 4; ++j) {
+        // For each of the 4 control points, formulate two rows of the
+        // m matrix (two equations).
+        const float control_alpha = epnp_alpha_(i, j);
+        m(i * 2, j * 3) = fx * control_alpha;
+        m(i * 2, j * 3 + 2) = (cx + u) * control_alpha;
+        m(i * 2 + 1, j * 3 + 1) = fy * control_alpha;
+        m(i * 2 + 1, j * 3 + 2) = (cy + v) * control_alpha;
+      }
+    }
+    // This is a self adjoint matrix. Use SelfAdjointEigenSolver for a fast
+    // and stable solution.
+    Eigen::Matrix<float, 12, 12, Eigen::RowMajor> mt_m = m.transpose() * m;
+    Eigen::SelfAdjointEigenSolver<Eigen::Matrix<float, 12, 12, Eigen::RowMajor>>
+        eigen_solver(mt_m);
+    if (eigen_solver.info() != Eigen::Success) {
+      return absl::AbortedError("Eigen decomposition failed.");
+    }
+    CHECK_EQ(12, eigen_solver.eigenvalues().size());
+    // Eigenvalues are sorted in increasing order for SelfAdjointEigenSolver
+    // only! If you use other Eigen Solvers, it's not guaranteed to be in
+    // increasing order. Here, we just take the eigen vector corresponding
+    // to first/smallest eigen value, since we used SelfAdjointEigenSolver.
+    Eigen::VectorXf eigen_vec = eigen_solver.eigenvectors().col(0);
+    Eigen::Map<Eigen::Matrix<float, 4, 3, Eigen::RowMajor>> control_matrix(
+        eigen_vec.data());
+    if (control_matrix(0, 2) > 0) {
+      control_matrix = -control_matrix;
+    }
+    // First set the center keypoint.
+    SetPoint3d(control_matrix(0, 0), control_matrix(0, 1), control_matrix(0, 2),
+               annotation.mutable_keypoints(0)->mutable_point_3d());
+    // Then set the 8 vertices.
+    Eigen::Matrix<float, 8, 3, Eigen::RowMajor> vertices =
+        epnp_alpha_ * control_matrix;
+    for (int i = 0; i < 8; ++i) {
+      SetPoint3d(vertices(i, 0), vertices(i, 1), vertices(i, 2),
+                 annotation.mutable_keypoints(i + 1)->mutable_point_3d());
+    }
+  }
+  return absl::OkStatus();
+}
+
+}  // namespace mediapipe
--- a/mediapipe/graphs/object_detection_3d/calculators/decoder.h
+++ b/mediapipe/graphs/object_detection_3d/calculators/decoder.h
@ -0,0 +1,109 @@
+// Copyright 2020 The MediaPipe Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef MEDIAPIPE_GRAPHS_OBJECT_DETECTION_3D_DECODER_H_
+#define MEDIAPIPE_GRAPHS_OBJECT_DETECTION_3D_DECODER_H_
+
+#include <vector>
+
+#include "Eigen/Dense"
+#include "absl/status/status.h"
+#include "mediapipe/framework/port/opencv_core_inc.h"
+#include "mediapipe/graphs/object_detection_3d/calculators/annotation_data.pb.h"
+#include "mediapipe/graphs/object_detection_3d/calculators/belief_decoder_config.pb.h"
+
+namespace mediapipe {
+
+// Decodes 3D bounding box from heatmaps and offset maps. In the future,
+// if we want to develop decoder for generic skeleton, then we need to
+// generalize this class, and make a few child classes.
+class Decoder {
+ public:
+  static const int kNumOffsetmaps;
+
+  explicit Decoder(const BeliefDecoderConfig& config) : config_(config) {
+    epnp_alpha_ << 4.0f, -1.0f, -1.0f, -1.0f, 2.0f, -1.0f, -1.0f, 1.0f, 2.0f,
+        -1.0f, 1.0f, -1.0f, 0.0f, -1.0f, 1.0f, 1.0f, 2.0f, 1.0f, -1.0f, -1.0f,
+        0.0f, 1.0f, -1.0f, 1.0f, 0.0f, 1.0f, 1.0f, -1.0f, -2.0f, 1.0f, 1.0f,
+        1.0f;
+  }
+
+  // Decodes bounding boxes from predicted heatmap and offset maps.
+  // Input:
+  //   heatmap: a single channel cv::Mat representing center point heatmap
+  //   offsetmap: a 16 channel cv::Mat representing the 16 offset maps
+  //              (2 for each of the 8 vertices)
+  // Output:
+  //   Outputs 3D bounding boxes 2D vertices, represented by 'point_2d' field
+  //   in each 'keypoints' field of object annotations.
+  FrameAnnotation DecodeBoundingBoxKeypoints(const cv::Mat& heatmap,
+                                             const cv::Mat& offsetmap) const;
+
+  // Lifts the estimated 2D projections of bounding box vertices to 3D.
+  // This function uses the EPnP approach described in this paper:
+  // https://icwww.epfl.ch/~lepetit/papers/lepetit_ijcv08.pdf .
+  // Input:
+  //   projection_matrix: the projection matrix from 3D coordinate
+  //     to screen coordinate.
+  //     The 2D screen coordinate is defined as: u is along the long
+  //     edge of the device, pointing down; v is along the short edge
+  //     of the device, pointing right.
+  //   portrait: a boolen variable indicating whether our images are
+  //     obtained in portrait orientation or not.
+  //   estimated_box: annotation with point_2d field populated with
+  //     2d vertices.
+  // Output:
+  //   estimated_box: annotation with point_3d field populated with
+  //     3d vertices.
+  absl::Status Lift2DTo3D(
+      const Eigen::Matrix<float, 4, 4, Eigen::RowMajor>& projection_matrix,
+      bool portrait, FrameAnnotation* estimated_box) const;
+
+ private:
+  struct BeliefBox {
+    float belief;
+    std::vector<std::pair<float, float>> box_2d;
+  };
+
+  std::vector<cv::Point> ExtractCenterKeypoints(
+      const cv::Mat& center_heatmap) const;
+
+  // Decodes 2D keypoints at the peak point.
+  void DecodeByPeak(const cv::Mat& offsetmap, int center_x, int center_y,
+                    float offset_scale_x, float offset_scale_y,
+                    BeliefBox* box) const;
+
+  // Decodes 2D keypoints by voting around the peak.
+  void DecodeByVoting(const cv::Mat& heatmap, const cv::Mat& offsetmap,
+                      int center_x, int center_y, float offset_scale_x,
+                      float offset_scale_y, BeliefBox* box) const;
+
+  // Returns true if it is a new box. Otherwise, it may replace an existing box
+  // if the new box's belief is higher.
+  bool IsNewBox(std::vector<BeliefBox>* boxes, BeliefBox* box) const;
+
+  // Returns true if the two boxes are identical.
+  bool IsIdentical(const BeliefBox& box_1, const BeliefBox& box_2) const;
+
+  BeliefDecoderConfig config_;
+  // Following equation (1) in this paper
+  // https://icwww.epfl.ch/~lepetit/papers/lepetit_ijcv08.pdf,
+  // this variable denotes the coefficients for the 4 control points
+  // for each of the 8 3D box vertices.
+  Eigen::Matrix<float, 8, 4, Eigen::RowMajor> epnp_alpha_;
+};
+
+}  // namespace mediapipe
+
+#endif  // MEDIAPIPE_GRAPHS_OBJECT_DETECTION_3D_DECODER_H_
--- a/mediapipe/graphs/object_detection_3d/calculators/frame_annotation_to_timed_box_list_calculator.cc
+++ b/mediapipe/graphs/object_detection_3d/calculators/frame_annotation_to_timed_box_list_calculator.cc
@ -0,0 +1,115 @@
+// Copyright 2020 The MediaPipe Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <utility>
+
+#include "absl/memory/memory.h"
+#include "mediapipe/framework/calculator_framework.h"
+#include "mediapipe/framework/port/opencv_core_inc.h"
+#include "mediapipe/framework/port/opencv_imgproc_inc.h"
+#include "mediapipe/framework/port/ret_check.h"
+#include "mediapipe/framework/port/status.h"
+#include "mediapipe/graphs/object_detection_3d/calculators/annotation_data.pb.h"
+#include "mediapipe/graphs/object_detection_3d/calculators/box_util.h"
+#include "mediapipe/util/tracking/box_tracker.pb.h"
+
+namespace {
+constexpr char kInputStreamTag[] = "FRAME_ANNOTATION";
+constexpr char kOutputStreamTag[] = "BOXES";
+}  // namespace
+
+namespace mediapipe {
+
+// Convert FrameAnnotation 3d bounding box detections to TimedBoxListProto
+// 2d bounding boxes.
+//
+// Input:
+//  FRAME_ANNOTATION - 3d bounding box annotation.
+// Output:
+//  BOXES - 2d bounding box enclosing the projection of 3d box.
+//
+// Usage example:
+// node {
+//   calculator: "FrameAnnotationToTimedBoxListCalculator"
+//   input_stream: "FRAME_ANNOTATION:frame_annotation"
+//   output_stream: "BOXES:boxes"
+// }
+class FrameAnnotationToTimedBoxListCalculator : public CalculatorBase {
+ public:
+  static ::mediapipe::Status GetContract(CalculatorContract* cc);
+
+  ::mediapipe::Status Open(CalculatorContext* cc) override;
+  ::mediapipe::Status Process(CalculatorContext* cc) override;
+  ::mediapipe::Status Close(CalculatorContext* cc) override;
+};
+REGISTER_CALCULATOR(FrameAnnotationToTimedBoxListCalculator);
+
+::mediapipe::Status FrameAnnotationToTimedBoxListCalculator::GetContract(
+    CalculatorContract* cc) {
+  RET_CHECK(!cc->Inputs().GetTags().empty());
+  RET_CHECK(!cc->Outputs().GetTags().empty());
+
+  if (cc->Inputs().HasTag(kInputStreamTag)) {
+    cc->Inputs().Tag(kInputStreamTag).Set<FrameAnnotation>();
+  }
+
+  if (cc->Outputs().HasTag(kOutputStreamTag)) {
+    cc->Outputs().Tag(kOutputStreamTag).Set<TimedBoxProtoList>();
+  }
+  return ::mediapipe::OkStatus();
+}
+
+::mediapipe::Status FrameAnnotationToTimedBoxListCalculator::Open(
+    CalculatorContext* cc) {
+  return ::mediapipe::OkStatus();
+}
+
+::mediapipe::Status FrameAnnotationToTimedBoxListCalculator::Process(
+    CalculatorContext* cc) {
+  if (cc->Inputs().HasTag(kInputStreamTag) &&
+      !cc->Inputs().Tag(kInputStreamTag).IsEmpty()) {
+    const auto& frame_annotation =
+        cc->Inputs().Tag(kInputStreamTag).Get<FrameAnnotation>();
+    auto output_objects = absl::make_unique<TimedBoxProtoList>();
+    for (const auto& annotation : frame_annotation.annotations()) {
+      std::vector<cv::Point2f> key_points;
+      for (const auto& keypoint : annotation.keypoints()) {
+        key_points.push_back(
+            cv::Point2f(keypoint.point_2d().x(), keypoint.point_2d().y()));
+      }
+      TimedBoxProto* added_box = output_objects->add_box();
+      ComputeBoundingRect(key_points, added_box);
+      added_box->set_id(annotation.object_id());
+      const int64 time_msec =
+          static_cast<int64>(std::round(frame_annotation.timestamp() / 1000));
+      added_box->set_time_msec(time_msec);
+    }
+
+    // Output
+    if (cc->Outputs().HasTag(kOutputStreamTag)) {
+      cc->Outputs()
+          .Tag(kOutputStreamTag)
+          .Add(output_objects.release(), cc->InputTimestamp());
+    }
+  }
+
+  return ::mediapipe::OkStatus();
+}
+
+::mediapipe::Status FrameAnnotationToTimedBoxListCalculator::Close(
+    CalculatorContext* cc) {
+  return ::mediapipe::OkStatus();
+}
+
+}  // namespace mediapipe
--- a/mediapipe/graphs/object_detection_3d/calculators/frame_annotation_tracker.cc
+++ b/mediapipe/graphs/object_detection_3d/calculators/frame_annotation_tracker.cc
@ -0,0 +1,102 @@
+// Copyright 2020 The MediaPipe Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "mediapipe/graphs/object_detection_3d/calculators/frame_annotation_tracker.h"
+
+#include "absl/container/flat_hash_set.h"
+#include "mediapipe/framework/port/logging.h"
+#include "mediapipe/graphs/object_detection_3d/calculators/annotation_data.pb.h"
+#include "mediapipe/graphs/object_detection_3d/calculators/box_util.h"
+#include "mediapipe/util/tracking/box_tracker.pb.h"
+
+namespace mediapipe {
+
+void FrameAnnotationTracker::AddDetectionResult(
+    const FrameAnnotation& frame_annotation) {
+  const int64 time_us =
+      static_cast<int64>(std::round(frame_annotation.timestamp()));
+  for (const auto& object_annotation : frame_annotation.annotations()) {
+    detected_objects_[time_us + object_annotation.object_id()] =
+        object_annotation;
+  }
+}
+
+FrameAnnotation FrameAnnotationTracker::ConsolidateTrackingResult(
+    const TimedBoxProtoList& tracked_boxes,
+    absl::flat_hash_set<int>* cancel_object_ids) {
+  CHECK(cancel_object_ids != nullptr);
+  FrameAnnotation frame_annotation;
+  std::vector<int64> keys_to_be_deleted;
+  for (const auto& detected_obj : detected_objects_) {
+    const int object_id = detected_obj.second.object_id();
+    if (cancel_object_ids->contains(object_id)) {
+      // Remember duplicated detections' keys.
+      keys_to_be_deleted.push_back(detected_obj.first);
+      continue;
+    }
+    TimedBoxProto ref_box;
+    for (const auto& box : tracked_boxes.box()) {
+      if (box.id() == object_id) {
+        ref_box = box;
+        break;
+      }
+    }
+    if (!ref_box.has_id() || ref_box.id() < 0) {
+      LOG(ERROR) << "Can't find matching tracked box for object id: "
+                 << object_id << ". Likely lost tracking of it.";
+      keys_to_be_deleted.push_back(detected_obj.first);
+      continue;
+    }
+
+    // Find duplicated boxes
+    for (const auto& box : tracked_boxes.box()) {
+      if (box.id() != object_id) {
+        if (ComputeBoxIoU(ref_box, box) > iou_threshold_) {
+          cancel_object_ids->insert(box.id());
+        }
+      }
+    }
+
+    // Map ObjectAnnotation from detection to tracked time.
+    // First, gather all keypoints from source detection.
+    std::vector<cv::Point2f> key_points;
+    for (const auto& keypoint : detected_obj.second.keypoints()) {
+      key_points.push_back(
+          cv::Point2f(keypoint.point_2d().x(), keypoint.point_2d().y()));
+    }
+    // Second, find source box.
+    TimedBoxProto src_box;
+    ComputeBoundingRect(key_points, &src_box);
+    ObjectAnnotation* tracked_obj = frame_annotation.add_annotations();
+    tracked_obj->set_object_id(ref_box.id());
+    // Finally, map all keypoints in the source detection to tracked location.
+    for (const auto& keypoint : detected_obj.second.keypoints()) {
+      cv::Point2f dst = MapPoint(
+          src_box, ref_box,
+          cv::Point2f(keypoint.point_2d().x(), keypoint.point_2d().y()),
+          img_width_, img_height_);
+      auto* dst_point = tracked_obj->add_keypoints()->mutable_point_2d();
+      dst_point->set_x(dst.x);
+      dst_point->set_y(dst.y);
+    }
+  }
+
+  for (const auto& key : keys_to_be_deleted) {
+    detected_objects_.erase(key);
+  }
+
+  return frame_annotation;
+}
+
+}  // namespace mediapipe
--- a/mediapipe/graphs/object_detection_3d/calculators/frame_annotation_tracker.h
+++ b/mediapipe/graphs/object_detection_3d/calculators/frame_annotation_tracker.h
@ -0,0 +1,62 @@
+// Copyright 2020 The MediaPipe Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef MEDIAPIPE_GRAPHS_OBJECT_DETECTION_3D_FRAME_ANNOTATION_TRACKER_H_
+#define MEDIAPIPE_GRAPHS_OBJECT_DETECTION_3D_FRAME_ANNOTATION_TRACKER_H_
+
+#include <functional>
+
+#include "absl/container/btree_map.h"
+#include "absl/container/flat_hash_set.h"
+#include "mediapipe/framework/port/integral_types.h"
+#include "mediapipe/graphs/object_detection_3d/calculators/annotation_data.pb.h"
+#include "mediapipe/util/tracking/box_tracker.pb.h"
+
+namespace mediapipe {
+
+class FrameAnnotationTracker {
+ public:
+  // If two bounding boxes have IoU over iou_threshold, then we consider them
+  // describing the same object.
+  FrameAnnotationTracker(float iou_threshold, float img_width, float img_height)
+      : iou_threshold_(iou_threshold),
+        img_width_(img_width),
+        img_height_(img_height) {}
+
+  // Adds detection results from an external detector.
+  void AddDetectionResult(const FrameAnnotation& frame_annotation);
+
+  // Consolidates tracking result from an external tracker, associates with
+  // the detection result by the object id, and produces the corresponding
+  // result in FrameAnnotation. When there are duplicates, output the ids that
+  // need to be cancelled in cancel_object_ids.
+  // Note that the returned FrameAnnotation is missing timestamp. Need to fill
+  // that field.
+  FrameAnnotation ConsolidateTrackingResult(
+      const TimedBoxProtoList& tracked_boxes,
+      absl::flat_hash_set<int>* cancel_object_ids);
+
+ private:
+  float iou_threshold_;
+  float img_width_;
+  float img_height_;
+  // Cached detection results over time.
+  // Key is timestamp_us + object_id.
+  absl::btree_map<int64, ObjectAnnotation, std::greater<int64>>
+      detected_objects_;
+};
+
+}  // namespace mediapipe
+
+#endif  // MEDIAPIPE_GRAPHS_OBJECT_DETECTION_3D_FRAME_ANNOTATION_TRACKER_H_
--- a/mediapipe/graphs/object_detection_3d/calculators/frame_annotation_tracker_calculator.cc
+++ b/mediapipe/graphs/object_detection_3d/calculators/frame_annotation_tracker_calculator.cc
@ -0,0 +1,137 @@
+// Copyright 2020 The MediaPipe Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "absl/container/flat_hash_set.h"
+#include "absl/memory/memory.h"
+#include "mediapipe/framework/calculator_framework.h"
+#include "mediapipe/framework/port/ret_check.h"
+#include "mediapipe/framework/port/status.h"
+#include "mediapipe/graphs/object_detection_3d/calculators/annotation_data.pb.h"
+#include "mediapipe/graphs/object_detection_3d/calculators/frame_annotation_tracker.h"
+#include "mediapipe/graphs/object_detection_3d/calculators/frame_annotation_tracker_calculator.pb.h"
+#include "mediapipe/util/tracking/box_tracker.pb.h"
+
+namespace {
+constexpr char kInputFrameAnnotationTag[] = "FRAME_ANNOTATION";
+constexpr char kInputTrackedBoxesTag[] = "TRACKED_BOXES";
+constexpr char kOutputTrackedFrameAnnotationTag[] = "TRACKED_FRAME_ANNOTATION";
+constexpr char kOutputCancelObjectIdTag[] = "CANCEL_OBJECT_ID";
+}  // namespace
+
+namespace mediapipe {
+
+// Tracks frame annotations seeded/updated by FRAME_ANNOTATION input_stream.
+// When using this calculator, make sure FRAME_ANNOTATION and TRACKED_BOXES
+// are in different sync set.
+//
+// Input:
+//  FRAME_ANNOTATION - frame annotation.
+//  TRACKED_BOXES - 2d box tracking result
+// Output:
+//  TRACKED_FRAME_ANNOTATION - annotation inferred from 2d tracking result.
+//  CANCEL_OBJECT_ID - object id that needs to be cancelled from the tracker.
+//
+// Usage example:
+// node {
+//   calculator: "FrameAnnotationTrackerCalculator"
+//   input_stream: "FRAME_ANNOTATION:frame_annotation"
+//   input_stream: "TRACKED_BOXES:tracked_boxes"
+//   output_stream: "TRACKED_FRAME_ANNOTATION:tracked_frame_annotation"
+//   output_stream: "CANCEL_OBJECT_ID:cancel_object_id"
+// }
+class FrameAnnotationTrackerCalculator : public CalculatorBase {
+ public:
+  static ::mediapipe::Status GetContract(CalculatorContract* cc);
+
+  ::mediapipe::Status Open(CalculatorContext* cc) override;
+  ::mediapipe::Status Process(CalculatorContext* cc) override;
+  ::mediapipe::Status Close(CalculatorContext* cc) override;
+
+ private:
+  std::unique_ptr<FrameAnnotationTracker> frame_annotation_tracker_;
+};
+REGISTER_CALCULATOR(FrameAnnotationTrackerCalculator);
+
+::mediapipe::Status FrameAnnotationTrackerCalculator::GetContract(
+    CalculatorContract* cc) {
+  RET_CHECK(!cc->Inputs().GetTags().empty());
+  RET_CHECK(!cc->Outputs().GetTags().empty());
+
+  if (cc->Inputs().HasTag(kInputFrameAnnotationTag)) {
+    cc->Inputs().Tag(kInputFrameAnnotationTag).Set<FrameAnnotation>();
+  }
+  if (cc->Inputs().HasTag(kInputTrackedBoxesTag)) {
+    cc->Inputs().Tag(kInputTrackedBoxesTag).Set<TimedBoxProtoList>();
+  }
+  if (cc->Outputs().HasTag(kOutputTrackedFrameAnnotationTag)) {
+    cc->Outputs().Tag(kOutputTrackedFrameAnnotationTag).Set<FrameAnnotation>();
+  }
+  if (cc->Outputs().HasTag(kOutputCancelObjectIdTag)) {
+    cc->Outputs().Tag(kOutputCancelObjectIdTag).Set<int>();
+  }
+  return ::mediapipe::OkStatus();
+}
+
+::mediapipe::Status FrameAnnotationTrackerCalculator::Open(
+    CalculatorContext* cc) {
+  const auto& options = cc->Options<FrameAnnotationTrackerCalculatorOptions>();
+  frame_annotation_tracker_ = absl::make_unique<FrameAnnotationTracker>(
+      options.iou_threshold(), options.img_width(), options.img_height());
+  return ::mediapipe::OkStatus();
+}
+
+::mediapipe::Status FrameAnnotationTrackerCalculator::Process(
+    CalculatorContext* cc) {
+  if (cc->Inputs().HasTag(kInputFrameAnnotationTag) &&
+      !cc->Inputs().Tag(kInputFrameAnnotationTag).IsEmpty()) {
+    frame_annotation_tracker_->AddDetectionResult(
+        cc->Inputs().Tag(kInputFrameAnnotationTag).Get<FrameAnnotation>());
+  }
+  if (cc->Inputs().HasTag(kInputTrackedBoxesTag) &&
+      !cc->Inputs().Tag(kInputTrackedBoxesTag).IsEmpty() &&
+      cc->Outputs().HasTag(kOutputTrackedFrameAnnotationTag)) {
+    absl::flat_hash_set<int> cancel_object_ids;
+    auto output_frame_annotation = absl::make_unique<FrameAnnotation>();
+    *output_frame_annotation =
+        frame_annotation_tracker_->ConsolidateTrackingResult(
+            cc->Inputs().Tag(kInputTrackedBoxesTag).Get<TimedBoxProtoList>(),
+            &cancel_object_ids);
+    output_frame_annotation->set_timestamp(cc->InputTimestamp().Microseconds());
+
+    cc->Outputs()
+        .Tag(kOutputTrackedFrameAnnotationTag)
+        .Add(output_frame_annotation.release(), cc->InputTimestamp());
+
+    if (cc->Outputs().HasTag(kOutputCancelObjectIdTag)) {
+      auto packet_timestamp = cc->InputTimestamp();
+      for (const auto& id : cancel_object_ids) {
+        // The timestamp is incremented (by 1 us) because currently the box
+        // tracker calculator only accepts one cancel object ID for any given
+        // timestamp.
+        cc->Outputs()
+            .Tag(kOutputCancelObjectIdTag)
+            .AddPacket(mediapipe::MakePacket<int>(id).At(packet_timestamp++));
+      }
+    }
+  }
+
+  return ::mediapipe::OkStatus();
+}
+
+::mediapipe::Status FrameAnnotationTrackerCalculator::Close(
+    CalculatorContext* cc) {
+  return ::mediapipe::OkStatus();
+}
+
+}  // namespace mediapipe
--- a/mediapipe/graphs/object_detection_3d/calculators/frame_annotation_tracker_calculator.proto
+++ b/mediapipe/graphs/object_detection_3d/calculators/frame_annotation_tracker_calculator.proto
@ -0,0 +1,36 @@
+// Copyright 2020 The MediaPipe Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// The option proto for the FrameAnnotationTrackerCalculatorOptions.
+
+syntax = "proto2";
+
+package mediapipe;
+
+import "mediapipe/framework/calculator.proto";
+
+message FrameAnnotationTrackerCalculatorOptions {
+  extend CalculatorOptions {
+    optional FrameAnnotationTrackerCalculatorOptions ext = 291291253;
+  }
+
+  // The threshold on intersection-over-union (IoU). We consider
+  // boxes with IoU larger than this threshold to be the duplicates.
+  optional float iou_threshold = 1 [default = 0.5];
+
+  // We need image dimension to properly compute annotation locations.
+  optional float img_width = 2;
+
+  optional float img_height = 3;
+}
--- a/mediapipe/graphs/object_detection_3d/calculators/frame_annotation_tracker_test.cc
+++ b/mediapipe/graphs/object_detection_3d/calculators/frame_annotation_tracker_test.cc
@ -0,0 +1,143 @@
+// Copyright 2020 The MediaPipe Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "mediapipe/graphs/object_detection_3d/calculators/frame_annotation_tracker.h"
+
+#include "absl/container/flat_hash_set.h"
+#include "mediapipe/framework/port/gmock.h"
+#include "mediapipe/framework/port/gtest.h"
+#include "mediapipe/framework/port/logging.h"
+#include "mediapipe/graphs/object_detection_3d/calculators/annotation_data.pb.h"
+#include "mediapipe/util/tracking/box_tracker.pb.h"
+
+namespace mediapipe {
+namespace {
+
+// Create a new object annotation by shifting a reference
+// object annotation.
+ObjectAnnotation ShiftObject2d(const ObjectAnnotation& ref_obj, float dx,
+                               float dy) {
+  ObjectAnnotation obj = ref_obj;
+  for (auto& keypoint : *(obj.mutable_keypoints())) {
+    const float ref_x = keypoint.point_2d().x();
+    const float ref_y = keypoint.point_2d().y();
+    keypoint.mutable_point_2d()->set_x(ref_x + dx);
+    keypoint.mutable_point_2d()->set_y(ref_y + dy);
+  }
+  return obj;
+}
+
+TimedBoxProto ShiftBox(const TimedBoxProto& ref_box, float dx, float dy) {
+  TimedBoxProto box = ref_box;
+  box.set_top(ref_box.top() + dy);
+  box.set_bottom(ref_box.bottom() + dy);
+  box.set_left(ref_box.left() + dx);
+  box.set_right(ref_box.right() + dx);
+  return box;
+}
+
+// Constructs a fixed ObjectAnnotation.
+ObjectAnnotation ConstructFixedObject(
+    const std::vector<std::vector<float>>& points) {
+  ObjectAnnotation obj;
+  for (const auto& point : points) {
+    auto* keypoint = obj.add_keypoints();
+    CHECK_EQ(2, point.size());
+    keypoint->mutable_point_2d()->set_x(point[0]);
+    keypoint->mutable_point_2d()->set_y(point[1]);
+  }
+  return obj;
+}
+
+TEST(FrameAnnotationTrackerTest, TestConsolidation) {
+  // Add 4 detections represented by FrameAnnotation, of which 3 correspond
+  // to the same object.
+  ObjectAnnotation object1, object2, object3, object4;
+  // The bounding rectangle for these object keypoints is:
+  // x: [0.2, 0.5], y: [0.1, 0.4]
+  object3 = ConstructFixedObject({{0.35f, 0.25f},
+                                  {0.3f, 0.3f},
+                                  {0.2f, 0.4f},
+                                  {0.3f, 0.1f},
+                                  {0.2f, 0.2f},
+                                  {0.5f, 0.3f},
+                                  {0.4f, 0.4f},
+                                  {0.5f, 0.1f},
+                                  {0.4f, 0.2f}});
+  object3.set_object_id(3);
+  object1 = ShiftObject2d(object3, -0.05f, -0.05f);
+  object1.set_object_id(1);
+  object2 = ShiftObject2d(object3, 0.05f, 0.05f);
+  object2.set_object_id(2);
+  object4 = ShiftObject2d(object3, 0.2f, 0.2f);
+  object4.set_object_id(4);
+  FrameAnnotation frame_annotation_1;
+  frame_annotation_1.set_timestamp(30 * 1000);  // 30ms
+  *(frame_annotation_1.add_annotations()) = object1;
+  *(frame_annotation_1.add_annotations()) = object4;
+  FrameAnnotation frame_annotation_2;
+  frame_annotation_2.set_timestamp(60 * 1000);  // 60ms
+  *(frame_annotation_2.add_annotations()) = object2;
+  FrameAnnotation frame_annotation_3;
+  frame_annotation_3.set_timestamp(90 * 1000);  // 90ms
+  *(frame_annotation_3.add_annotations()) = object3;
+
+  FrameAnnotationTracker frame_annotation_tracker(/*iou_threshold*/ 0.5f, 1.0f,
+                                                  1.0f);
+  frame_annotation_tracker.AddDetectionResult(frame_annotation_1);
+  frame_annotation_tracker.AddDetectionResult(frame_annotation_2);
+  frame_annotation_tracker.AddDetectionResult(frame_annotation_3);
+
+  TimedBoxProtoList timed_box_proto_list;
+  TimedBoxProto* timed_box_proto = timed_box_proto_list.add_box();
+  timed_box_proto->set_top(0.4f);
+  timed_box_proto->set_bottom(0.7f);
+  timed_box_proto->set_left(0.6f);
+  timed_box_proto->set_right(0.9f);
+  timed_box_proto->set_id(3);
+  timed_box_proto->set_time_msec(150);
+  timed_box_proto = timed_box_proto_list.add_box();
+  *timed_box_proto = ShiftBox(timed_box_proto_list.box(0), 0.01f, 0.01f);
+  timed_box_proto->set_id(1);
+  timed_box_proto->set_time_msec(150);
+  timed_box_proto = timed_box_proto_list.add_box();
+  *timed_box_proto = ShiftBox(timed_box_proto_list.box(0), -0.01f, -0.01f);
+  timed_box_proto->set_id(2);
+  timed_box_proto->set_time_msec(150);
+  absl::flat_hash_set<int> cancel_object_ids;
+  FrameAnnotation tracked_detection =
+      frame_annotation_tracker.ConsolidateTrackingResult(timed_box_proto_list,
+                                                         &cancel_object_ids);
+  EXPECT_EQ(2, cancel_object_ids.size());
+  EXPECT_EQ(1, cancel_object_ids.count(1));
+  EXPECT_EQ(1, cancel_object_ids.count(2));
+  EXPECT_EQ(1, tracked_detection.annotations_size());
+  EXPECT_EQ(3, tracked_detection.annotations(0).object_id());
+  EXPECT_EQ(object3.keypoints_size(),
+            tracked_detection.annotations(0).keypoints_size());
+  const float x_offset = 0.4f;
+  const float y_offset = 0.3f;
+  const float tolerance = 1e-5f;
+  for (int i = 0; i < object3.keypoints_size(); ++i) {
+    const auto& point_2d =
+        tracked_detection.annotations(0).keypoints(i).point_2d();
+    EXPECT_NEAR(point_2d.x(), object3.keypoints(i).point_2d().x() + x_offset,
+                tolerance);
+    EXPECT_NEAR(point_2d.y(), object3.keypoints(i).point_2d().y() + y_offset,
+                tolerance);
+  }
+}
+
+}  // namespace
+}  // namespace mediapipe
--- a/mediapipe/graphs/object_detection_3d/calculators/gl_animation_overlay_calculator.cc
+++ b/mediapipe/graphs/object_detection_3d/calculators/gl_animation_overlay_calculator.cc
@ -0,0 +1,760 @@
+// Copyright 2020 The MediaPipe Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#if defined(__ANDROID__)
+#include "mediapipe/util/android/asset_manager_util.h"
+#else
+#include <fstream>
+#include <iostream>
+#endif
+
+#include "mediapipe/framework/calculator_framework.h"
+#include "mediapipe/framework/port/ret_check.h"
+#include "mediapipe/framework/port/status.h"
+#include "mediapipe/gpu/gl_calculator_helper.h"
+#include "mediapipe/gpu/shader_util.h"
+#include "mediapipe/graphs/object_detection_3d/calculators/camera_parameters.pb.h"
+#include "mediapipe/graphs/object_detection_3d/calculators/gl_animation_overlay_calculator.pb.h"
+#include "mediapipe/graphs/object_detection_3d/calculators/model_matrix.pb.h"
+
+namespace mediapipe {
+
+namespace {
+
+#if defined(GL_DEBUG)
+#define GLCHECK(command) \
+  command;               \
+  if (int err = glGetError()) LOG(ERROR) << "GL error detected: " << err;
+#else
+#define GLCHECK(command) command
+#endif
+
+// For ease of use, we prefer ImageFrame on Android and GpuBuffer otherwise.
+#if defined(__ANDROID__)
+typedef ImageFrame AssetTextureFormat;
+#else
+typedef GpuBuffer AssetTextureFormat;
+#endif
+
+enum { ATTRIB_VERTEX, ATTRIB_TEXTURE_POSITION, NUM_ATTRIBUTES };
+static const int kNumMatrixEntries = 16;
+
+// Hard-coded MVP Matrix for testing.
+static const float kModelMatrix[] = {0.83704215,  -0.36174262, 0.41049102, 0.0,
+                                     0.06146407,  0.8076706,   0.5864218,  0.0,
+                                     -0.54367524, -0.4656292,  0.69828844, 0.0,
+                                     0.0,         0.0,         -98.64117,  1.0};
+
+// Loads a texture from an input side packet, and streams in an animation file
+// from a filename given in another input side packet, and renders the animation
+// over the screen according to the input timestamp and desired animation FPS.
+//
+// Inputs:
+//   VIDEO (GpuBuffer, optional):
+//     If provided, the input buffer will be assumed to be unique, and will be
+//     consumed by this calculator and rendered to directly.  The output video
+//     buffer will then be the released reference to the input video buffer.
+//   MODEL_MATRICES (TimedModelMatrixProtoList, optional):
+//     If provided, will set the model matrices for the objects to be rendered
+//     during future rendering calls.
+//
+// Input side packets:
+//   TEXTURE (ImageFrame on Android / GpuBuffer on iOS, required):
+//     Texture to use with animation file.
+//   ANIMATION_ASSET (String, required):
+//     Path of animation file to load and render. Should be generated by
+//     //java/com/google/android/apps/motionstills/SimpleObjEncryptor with
+//     --compressed_mode=true.  See comments and documentation there for more
+//     information on custom .obj.uuu file format.
+//   CAMERA_PARAMETERS_PROTO_STRING (String, optional):
+//     Serialized proto std::string of CameraParametersProto. We need this to
+//     get the right aspect ratio and field of view.
+// Options:
+//   aspect_ratio: the ratio between the rendered image width and height.
+//     It will be ignored if CAMERA_PARAMETERS_PROTO_STRING input side packet
+//     is provided.
+//   vertical_fov_degrees: vertical field of view in degrees.
+//     It will be ignored if CAMERA_PARAMETERS_PROTO_STRING input side packet
+//     is provided.
+//   z_clipping_plane_near: near plane value for z-clipping.
+//   z_clipping_plane_far: far plane value for z-clipping.
+//   animation_speed_fps: speed at which to cycle through animation frames (in
+//     frames per second).
+//
+// Outputs:
+//   OUTPUT, or index 0 (GpuBuffer):
+//     Frames filled with the given texture.
+
+// Simple helper-struct for containing the parsed geometry data from a 3D
+// animation frame for rendering.
+
+struct TriangleMesh {
+  int index_count = 0;  // Needed for glDrawElements rendering call
+  std::unique_ptr<float[]> vertices = nullptr;
+  std::unique_ptr<float[]> texture_coords = nullptr;
+  std::unique_ptr<int16[]> triangle_indices = nullptr;
+};
+
+typedef std::unique_ptr<float[]> ModelMatrix;
+
+}  // namespace
+
+class GlAnimationOverlayCalculator : public CalculatorBase {
+ public:
+  GlAnimationOverlayCalculator() {}
+  ~GlAnimationOverlayCalculator();
+
+  static ::mediapipe::Status GetContract(CalculatorContract *cc);
+
+  ::mediapipe::Status Open(CalculatorContext *cc) override;
+  ::mediapipe::Status Process(CalculatorContext *cc) override;
+
+ private:
+  bool has_video_stream_ = false;
+  bool has_model_matrix_stream_ = false;
+  bool has_mask_model_matrix_stream_ = false;
+  bool has_occlusion_mask_ = false;
+
+  GlCalculatorHelper helper_;
+  bool initialized_ = false;
+  GlTexture texture_;
+  GlTexture mask_texture_;
+
+  GLuint renderbuffer_ = 0;
+  bool depth_buffer_created_ = false;
+
+  GLuint program_ = 0;
+  GLint texture_uniform_ = -1;
+  GLint perspective_matrix_uniform_ = -1;
+  GLint model_matrix_uniform_ = -1;
+
+  std::vector<TriangleMesh> triangle_meshes_;
+  std::vector<TriangleMesh> mask_meshes_;
+  Timestamp animation_start_time_;
+  int frame_count_ = 0;
+  float animation_speed_fps_;
+
+  std::vector<ModelMatrix> current_model_matrices_;
+  std::vector<ModelMatrix> current_mask_model_matrices_;
+
+  // Perspective matrix for rendering, to be applied to all model matrices
+  // prior to passing through to the shader as a MVP matrix.  Initialized during
+  // first image packet read.
+  float perspective_matrix_[kNumMatrixEntries];
+
+  void ComputeAspectRatioAndFovFromCameraParameters(
+      const CameraParametersProto &camera_parameters, float *aspect_ratio,
+      float *vertical_fov_degrees);
+  int GetAnimationFrameIndex(Timestamp timestamp);
+  ::mediapipe::Status GlSetup();
+  ::mediapipe::Status GlBind(const TriangleMesh &triangle_mesh,
+                             const GlTexture &texture);
+  ::mediapipe::Status GlRender(const TriangleMesh &triangle_mesh,
+                               const float *model_matrix);
+  void InitializePerspectiveMatrix(float aspect_ratio,
+                                   float vertical_fov_degrees, float z_near,
+                                   float z_far);
+  void LoadModelMatrices(const TimedModelMatrixProtoList &model_matrices,
+                         std::vector<ModelMatrix> *current_model_matrices);
+
+#if !defined(__ANDROID__)
+  // Asset loading routine for all non-Android platforms.
+  bool LoadAnimation(const std::string &filename);
+#else
+  // Asset loading for all Android platforms.
+  bool LoadAnimationAndroid(const std::string &filename,
+                            std::vector<TriangleMesh> *mesh);
+  bool ReadBytesFromAsset(AAsset *asset, void *buffer, int num_bytes_to_read);
+#endif
+};
+REGISTER_CALCULATOR(GlAnimationOverlayCalculator);
+
+// static
+::mediapipe::Status GlAnimationOverlayCalculator::GetContract(
+    CalculatorContract *cc) {
+  MP_RETURN_IF_ERROR(
+      GlCalculatorHelper::SetupInputSidePackets(&(cc->InputSidePackets())));
+  if (cc->Inputs().HasTag("VIDEO")) {
+    // Currently used only for size and timestamp.
+    cc->Inputs().Tag("VIDEO").Set<GpuBuffer>();
+  }
+  TagOrIndex(&(cc->Outputs()), "OUTPUT", 0).Set<GpuBuffer>();
+
+  if (cc->Inputs().HasTag("MODEL_MATRICES")) {
+    cc->Inputs().Tag("MODEL_MATRICES").Set<TimedModelMatrixProtoList>();
+  }
+  if (cc->Inputs().HasTag("MASK_MODEL_MATRICES")) {
+    cc->Inputs().Tag("MASK_MODEL_MATRICES").Set<TimedModelMatrixProtoList>();
+  }
+
+  cc->InputSidePackets().Tag("TEXTURE").Set<AssetTextureFormat>();
+  cc->InputSidePackets().Tag("ANIMATION_ASSET").Set<std::string>();
+  if (cc->InputSidePackets().HasTag("CAMERA_PARAMETERS_PROTO_STRING")) {
+    cc->InputSidePackets()
+        .Tag("CAMERA_PARAMETERS_PROTO_STRING")
+        .Set<std::string>();
+  }
+
+  if (cc->InputSidePackets().HasTag("MASK_TEXTURE")) {
+    cc->InputSidePackets().Tag("MASK_TEXTURE").Set<AssetTextureFormat>();
+  }
+  if (cc->InputSidePackets().HasTag("MASK_ASSET")) {
+    cc->InputSidePackets().Tag("MASK_ASSET").Set<std::string>();
+  }
+
+  return ::mediapipe::OkStatus();
+}
+
+// Helper function for initializing our perspective matrix.
+void GlAnimationOverlayCalculator::InitializePerspectiveMatrix(
+    float aspect_ratio, float fov_degrees, float z_near, float z_far) {
+  // Standard perspective projection matrix calculations.
+  const float f = 1.0f / std::tan(fov_degrees * M_PI / 360.0f);
+  for (int i = 0; i < kNumMatrixEntries; i++) {
+    perspective_matrix_[i] = 0;
+  }
+  const float denom = 1.0f / (z_near - z_far);
+  perspective_matrix_[0] = f / aspect_ratio;
+  perspective_matrix_[5] = f;
+  perspective_matrix_[10] = (z_near + z_far) * denom;
+  perspective_matrix_[11] = -1.0f;
+  perspective_matrix_[14] = 2.0f * z_far * z_near * denom;
+}
+
+#if defined(__ANDROID__)
+// Helper function for reading in a specified number of bytes from an Android
+// asset.  Returns true if successfully reads in all bytes into buffer.
+bool GlAnimationOverlayCalculator::ReadBytesFromAsset(AAsset *asset,
+                                                      void *buffer,
+                                                      int num_bytes_to_read) {
+  // Most file systems use block sizes of 4KB or 8KB; ideally we'd choose a
+  // small multiple of the block size for best input streaming performance, so
+  // we go for a reasobably safe buffer size of 8KB = 8*1024 bytes.
+  static const int kMaxChunkSize = 8192;
+
+  int bytes_left = num_bytes_to_read;
+  int bytes_read = 1;  // any value > 0 here just to start looping.
+
+  // Treat as uint8_t array so we can deal in single byte arithmetic easily.
+  uint8_t *currBufferIndex = reinterpret_cast<uint8_t *>(buffer);
+  while (bytes_read > 0 && bytes_left > 0) {
+    bytes_read = AAsset_read(asset, (void *)currBufferIndex,
+                             std::min(bytes_left, kMaxChunkSize));
+    bytes_left -= bytes_read;
+    currBufferIndex += bytes_read;
+  }
+  // At least log any I/O errors encountered.
+  if (bytes_read < 0) {
+    LOG(ERROR) << "Error reading from AAsset: " << bytes_read;
+    return false;
+  }
+  if (bytes_left > 0) {
+    // Reached EOF before reading in specified number of bytes.
+    LOG(WARNING) << "Reached EOF before reading in specified number of bytes.";
+    return false;
+  }
+  return true;
+}
+
+// The below asset streaming code is Android-only, making use of the platform
+// JNI helper classes AAssetManager and AAsset.
+bool GlAnimationOverlayCalculator::LoadAnimationAndroid(
+    const std::string &filename, std::vector<TriangleMesh> *meshes) {
+  mediapipe::AssetManager *mediapipe_asset_manager =
+      Singleton<mediapipe::AssetManager>::get();
+  AAssetManager *asset_manager = mediapipe_asset_manager->GetAssetManager();
+  if (!asset_manager) {
+    LOG(ERROR) << "Failed to access Android asset manager.";
+    return false;
+  }
+
+  // New read-bytes stuff here!  First we open file for streaming.
+  AAsset *asset = AAssetManager_open(asset_manager, filename.c_str(),
+                                     AASSET_MODE_STREAMING);
+  if (!asset) {
+    LOG(ERROR) << "Failed to open animation asset: " << filename;
+    return false;
+  }
+
+  // And now, while we are able to stream in more frames, we do so.
+  frame_count_ = 0;
+  int32 lengths[3];
+  while (ReadBytesFromAsset(asset, (void *)lengths, sizeof(lengths[0]) * 3)) {
+    // About to start reading the next animation frame.  Stream it in here.
+    // Each frame stores first the object counts of its three arrays
+    // (vertices, texture coordinates, triangle indices; respectively), and
+    // then stores each of those arrays as a byte dump, in order.
+    meshes->emplace_back();
+    TriangleMesh &triangle_mesh = meshes->back();
+    // Try to read in vertices (4-byte floats)
+    triangle_mesh.vertices.reset(new float[lengths[0]]);
+    if (!ReadBytesFromAsset(asset, (void *)triangle_mesh.vertices.get(),
+                            sizeof(float) * lengths[0])) {
+      LOG(ERROR) << "Failed to read vertices for frame " << frame_count_;
+      return false;
+    }
+    // Try to read in texture coordinates (4-byte floats)
+    triangle_mesh.texture_coords.reset(new float[lengths[1]]);
+    if (!ReadBytesFromAsset(asset, (void *)triangle_mesh.texture_coords.get(),
+                            sizeof(float) * lengths[1])) {
+      LOG(ERROR) << "Failed to read tex-coords for frame " << frame_count_;
+      return false;
+    }
+    // Try to read in indices (2-byte shorts)
+    triangle_mesh.index_count = lengths[2];
+    triangle_mesh.triangle_indices.reset(new int16[lengths[2]]);
+    if (!ReadBytesFromAsset(asset, (void *)triangle_mesh.triangle_indices.get(),
+                            sizeof(int16) * lengths[2])) {
+      LOG(ERROR) << "Failed to read indices for frame " << frame_count_;
+      return false;
+    }
+    frame_count_++;
+  }
+  AAsset_close(asset);
+
+  LOG(INFO) << "Finished parsing " << frame_count_ << " animation frames.";
+  if (meshes->empty()) {
+    LOG(ERROR) << "No animation frames were parsed!  Erroring out calculator.";
+    return false;
+  }
+  return true;
+}
+
+#else  // defined(__ANDROID__)
+
+bool GlAnimationOverlayCalculator::LoadAnimation(const std::string &filename) {
+  std::ifstream infile(filename.c_str(), std::ifstream::binary);
+  if (!infile) {
+    LOG(ERROR) << "Error opening asset with filename: " << filename;
+    return false;
+  }
+
+  frame_count_ = 0;
+  int32 lengths[3];
+  while (true) {
+    // See if we have more initial size counts to read in.
+    infile.read((char *)(lengths), sizeof(lengths[0]) * 3);
+    if (!infile) {
+      // No more frames to read.  Close out.
+      infile.close();
+      break;
+    }
+
+    triangle_meshes_.emplace_back();
+    TriangleMesh &triangle_mesh = triangle_meshes_.back();
+
+    // Try to read in vertices (4-byte floats).
+    triangle_mesh.vertices.reset(new float[lengths[0]]);
+    infile.read((char *)(triangle_mesh.vertices.get()),
+                sizeof(float) * lengths[0]);
+    if (!infile) {
+      LOG(ERROR) << "Failed to read vertices for frame " << frame_count_;
+      return false;
+    }
+
+    // Try to read in texture coordinates (4-byte floats)
+    triangle_mesh.texture_coords.reset(new float[lengths[1]]);
+    infile.read((char *)(triangle_mesh.texture_coords.get()),
+                sizeof(float) * lengths[1]);
+    if (!infile) {
+      LOG(ERROR) << "Failed to read texture coordinates for frame "
+                 << frame_count_;
+      return false;
+    }
+
+    // Try to read in the triangle indices (2-byte shorts)
+    triangle_mesh.index_count = lengths[2];
+    triangle_mesh.triangle_indices.reset(new int16[lengths[2]]);
+    infile.read((char *)(triangle_mesh.triangle_indices.get()),
+                sizeof(int16) * lengths[2]);
+    if (!infile) {
+      LOG(ERROR) << "Failed to read triangle indices for frame "
+                 << frame_count_;
+      return false;
+    }
+    frame_count_++;
+  }
+
+  LOG(INFO) << "Finished parsing " << frame_count_ << " animation frames.";
+  if (triangle_meshes_.empty()) {
+    LOG(ERROR) << "No animation frames were parsed!  Erroring out calculator.";
+    return false;
+  }
+  return true;
+}
+
+#endif
+
+void GlAnimationOverlayCalculator::ComputeAspectRatioAndFovFromCameraParameters(
+    const CameraParametersProto &camera_parameters, float *aspect_ratio,
+    float *vertical_fov_degrees) {
+  CHECK(aspect_ratio != nullptr);
+  CHECK(vertical_fov_degrees != nullptr);
+  *aspect_ratio =
+      camera_parameters.portrait_width() / camera_parameters.portrait_height();
+  *vertical_fov_degrees =
+      std::atan(camera_parameters.portrait_height() * 0.5f) * 2 * 180 / M_PI;
+}
+
+::mediapipe::Status GlAnimationOverlayCalculator::Open(CalculatorContext *cc) {
+  cc->SetOffset(TimestampDiff(0));
+  MP_RETURN_IF_ERROR(helper_.Open(cc));
+
+  const auto &options = cc->Options<GlAnimationOverlayCalculatorOptions>();
+
+  animation_speed_fps_ = options.animation_speed_fps();
+
+  // Construct projection matrix using input side packets or option
+  float aspect_ratio;
+  float vertical_fov_degrees;
+  if (cc->InputSidePackets().HasTag("CAMERA_PARAMETERS_PROTO_STRING")) {
+    const std::string &camera_parameters_proto_string =
+        cc->InputSidePackets()
+            .Tag("CAMERA_PARAMETERS_PROTO_STRING")
+            .Get<std::string>();
+    CameraParametersProto camera_parameters_proto;
+    camera_parameters_proto.ParseFromString(camera_parameters_proto_string);
+    ComputeAspectRatioAndFovFromCameraParameters(
+        camera_parameters_proto, &aspect_ratio, &vertical_fov_degrees);
+  } else {
+    aspect_ratio = options.aspect_ratio();
+    vertical_fov_degrees = options.vertical_fov_degrees();
+  }
+
+  // when constructing projection matrix.
+  InitializePerspectiveMatrix(aspect_ratio, vertical_fov_degrees,
+                              options.z_clipping_plane_near(),
+                              options.z_clipping_plane_far());
+
+  // See what streams we have.
+  has_video_stream_ = cc->Inputs().HasTag("VIDEO");
+  has_model_matrix_stream_ = cc->Inputs().HasTag("MODEL_MATRICES");
+  has_mask_model_matrix_stream_ = cc->Inputs().HasTag("MASK_MODEL_MATRICES");
+
+  // Try to load in the animation asset in a platform-specific manner.
+  const std::string &asset_name =
+      cc->InputSidePackets().Tag("ANIMATION_ASSET").Get<std::string>();
+  bool loaded_animation = false;
+#if defined(__ANDROID__)
+  if (cc->InputSidePackets().HasTag("MASK_ASSET")) {
+    has_occlusion_mask_ = true;
+    const std::string &mask_asset_name =
+        cc->InputSidePackets().Tag("MASK_ASSET").Get<std::string>();
+    loaded_animation = LoadAnimationAndroid(mask_asset_name, &mask_meshes_);
+    if (!loaded_animation) {
+      LOG(ERROR) << "Failed to load mask asset.";
+      return ::mediapipe::UnknownError("Failed to load mask asset.");
+    }
+  }
+  loaded_animation = LoadAnimationAndroid(asset_name, &triangle_meshes_);
+#else
+  loaded_animation = LoadAnimation(asset_name);
+#endif
+  if (!loaded_animation) {
+    LOG(ERROR) << "Failed to load animation asset.";
+    return ::mediapipe::UnknownError("Failed to load animation asset.");
+  }
+
+  return helper_.RunInGlContext([this, &cc]() -> ::mediapipe::Status {
+    if (cc->InputSidePackets().HasTag("MASK_TEXTURE")) {
+      const auto &mask_texture =
+          cc->InputSidePackets().Tag("MASK_TEXTURE").Get<AssetTextureFormat>();
+      mask_texture_ = helper_.CreateSourceTexture(mask_texture);
+    }
+
+    // Load in our asset's texture data
+    const auto &input_texture =
+        cc->InputSidePackets().Tag("TEXTURE").Get<AssetTextureFormat>();
+    texture_ = helper_.CreateSourceTexture(input_texture);
+    VLOG(2) << "Input texture size: " << texture_.width() << ", "
+            << texture_.height() << std::endl;
+
+    return ::mediapipe::OkStatus();
+  });
+}
+
+int GlAnimationOverlayCalculator::GetAnimationFrameIndex(Timestamp timestamp) {
+  double seconds_delta = timestamp.Seconds() - animation_start_time_.Seconds();
+  int64_t frame_index =
+      static_cast<int64_t>(seconds_delta * animation_speed_fps_);
+  frame_index %= frame_count_;
+  return static_cast<int>(frame_index);
+}
+
+void GlAnimationOverlayCalculator::LoadModelMatrices(
+    const TimedModelMatrixProtoList &model_matrices,
+    std::vector<ModelMatrix> *current_model_matrices) {
+  current_model_matrices->clear();
+  for (int i = 0; i < model_matrices.model_matrix_size(); ++i) {
+    const auto &model_matrix = model_matrices.model_matrix(i);
+    CHECK(model_matrix.matrix_entries_size() == kNumMatrixEntries)
+        << "Invalid Model Matrix";
+    current_model_matrices->emplace_back();
+    ModelMatrix &new_matrix = current_model_matrices->back();
+    new_matrix.reset(new float[kNumMatrixEntries]);
+    for (int j = 0; j < kNumMatrixEntries; j++) {
+      // Model matrices streamed in using ROW-MAJOR format, but we want
+      // COLUMN-MAJOR for rendering, so we transpose here.
+      int col = j % 4;
+      int row = j / 4;
+      new_matrix[row + col * 4] = model_matrix.matrix_entries(j);
+    }
+  }
+}
+
+::mediapipe::Status GlAnimationOverlayCalculator::Process(
+    CalculatorContext *cc) {
+  return helper_.RunInGlContext([this, &cc]() -> ::mediapipe::Status {
+    if (!initialized_) {
+      MP_RETURN_IF_ERROR(GlSetup());
+      initialized_ = true;
+      animation_start_time_ = cc->InputTimestamp();
+    }
+
+    // Process model matrices, if any are being streamed in, and update our
+    // list.
+    if (has_model_matrix_stream_ &&
+        !cc->Inputs().Tag("MODEL_MATRICES").IsEmpty()) {
+      const TimedModelMatrixProtoList &model_matrices =
+          cc->Inputs().Tag("MODEL_MATRICES").Get<TimedModelMatrixProtoList>();
+      LoadModelMatrices(model_matrices, &current_model_matrices_);
+    }
+    if (has_mask_model_matrix_stream_ &&
+        !cc->Inputs().Tag("MASK_MODEL_MATRICES").IsEmpty()) {
+      const TimedModelMatrixProtoList &model_matrices =
+          cc->Inputs()
+              .Tag("MASK_MODEL_MATRICES")
+              .Get<TimedModelMatrixProtoList>();
+      LoadModelMatrices(model_matrices, &current_mask_model_matrices_);
+    }
+
+    // Arbitrary default width and height for output destination texture, in the
+    // event that we don't have a valid and unique input buffer to overlay.
+    int width = 640;
+    int height = 480;
+
+    GlTexture dst;
+    std::unique_ptr<GpuBuffer> input_frame(nullptr);
+    if (has_video_stream_ && !(cc->Inputs().Tag("VIDEO").IsEmpty())) {
+      auto result = cc->Inputs().Tag("VIDEO").Value().Consume<GpuBuffer>();
+      if (result.ok()) {
+        input_frame = std::move(result).ValueOrDie();
+#if !MEDIAPIPE_GPU_BUFFER_USE_CV_PIXEL_BUFFER
+        input_frame->GetGlTextureBufferSharedPtr()->Reuse();
+#endif
+        width = input_frame->width();
+        height = input_frame->height();
+        dst = helper_.CreateSourceTexture(*input_frame);
+      } else {
+        LOG(ERROR) << "Unable to consume input video frame for overlay!";
+        LOG(ERROR) << "Status returned was: " << result.status();
+        dst = helper_.CreateDestinationTexture(width, height);
+      }
+    } else if (!has_video_stream_) {
+      dst = helper_.CreateDestinationTexture(width, height);
+    } else {
+      // We have an input video stream, but not for this frame. Don't render!
+      return ::mediapipe::OkStatus();
+    }
+    helper_.BindFramebuffer(dst);
+
+    if (!depth_buffer_created_) {
+      // Create our private depth buffer.
+      GLCHECK(glGenRenderbuffers(1, &renderbuffer_));
+      GLCHECK(glBindRenderbuffer(GL_RENDERBUFFER, renderbuffer_));
+      GLCHECK(glRenderbufferStorage(GL_RENDERBUFFER, GL_DEPTH_COMPONENT16,
+                                    width, height));
+      GLCHECK(glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT,
+                                        GL_RENDERBUFFER, renderbuffer_));
+      GLCHECK(glBindRenderbuffer(GL_RENDERBUFFER, 0));
+      depth_buffer_created_ = true;
+    }
+
+    // Re-bind our depth renderbuffer to our FBO depth attachment here.
+    GLCHECK(glBindRenderbuffer(GL_RENDERBUFFER, renderbuffer_));
+    GLCHECK(glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT,
+                                      GL_RENDERBUFFER, renderbuffer_));
+    GLenum status = GLCHECK(glCheckFramebufferStatus(GL_FRAMEBUFFER));
+    if (status != GL_FRAMEBUFFER_COMPLETE) {
+      LOG(ERROR) << "Incomplete framebuffer with status: " << status;
+    }
+    GLCHECK(glClear(GL_DEPTH_BUFFER_BIT));
+
+    if (has_occlusion_mask_) {
+      glColorMask(GL_FALSE, GL_FALSE, GL_FALSE, GL_FALSE);
+      const TriangleMesh &mask_frame = mask_meshes_.front();
+      MP_RETURN_IF_ERROR(GlBind(mask_frame, mask_texture_));
+      // Draw objects using our latest model matrix stream packet.
+      for (const ModelMatrix &model_matrix : current_mask_model_matrices_) {
+        MP_RETURN_IF_ERROR(GlRender(mask_frame, model_matrix.get()));
+      }
+    }
+
+    glColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE);
+    int frame_index = GetAnimationFrameIndex(cc->InputTimestamp());
+    const TriangleMesh &current_frame = triangle_meshes_[frame_index];
+    MP_RETURN_IF_ERROR(GlBind(current_frame, texture_));
+    if (has_model_matrix_stream_) {
+      // Draw objects using our latest model matrix stream packet.
+      for (const ModelMatrix &model_matrix : current_model_matrices_) {
+        MP_RETURN_IF_ERROR(GlRender(current_frame, model_matrix.get()));
+      }
+    } else {
+      // Just draw one object to a static model matrix.
+      MP_RETURN_IF_ERROR(GlRender(current_frame, kModelMatrix));
+    }
+
+    // Disable vertex attributes
+    GLCHECK(glEnableVertexAttribArray(ATTRIB_VERTEX));
+    GLCHECK(glEnableVertexAttribArray(ATTRIB_TEXTURE_POSITION));
+
+    // Disable depth test
+    GLCHECK(glDisable(GL_DEPTH_TEST));
+
+    // Unbind texture
+    GLCHECK(glActiveTexture(GL_TEXTURE1));
+    GLCHECK(glBindTexture(texture_.target(), 0));
+
+    // Unbind depth buffer
+    GLCHECK(glBindRenderbuffer(GL_RENDERBUFFER, 0));
+
+    GLCHECK(glFlush());
+
+    auto output = dst.GetFrame<GpuBuffer>();
+    dst.Release();
+    TagOrIndex(&(cc->Outputs()), "OUTPUT", 0)
+        .Add(output.release(), cc->InputTimestamp());
+    GLCHECK(glFrontFace(GL_CCW));
+    return ::mediapipe::OkStatus();
+  });
+}
+
+::mediapipe::Status GlAnimationOverlayCalculator::GlSetup() {
+  // Load vertex and fragment shaders
+  const GLint attr_location[NUM_ATTRIBUTES] = {
+      ATTRIB_VERTEX,
+      ATTRIB_TEXTURE_POSITION,
+  };
+  const GLchar *attr_name[NUM_ATTRIBUTES] = {
+      "position",
+      "texture_coordinate",
+  };
+
+  const GLchar *vert_src = R"(
+    // Perspective projection matrix for rendering / clipping
+    uniform mat4 perspectiveMatrix;
+
+    // Matrix defining the currently rendered object model
+    uniform mat4 modelMatrix;
+
+    // vertex position in threespace
+    attribute vec4 position;
+
+    // texture coordinate for each vertex in normalized texture space (0..1)
+    attribute mediump vec4 texture_coordinate;
+
+    // texture coordinate for fragment shader (will be interpolated)
+    varying mediump vec2 sample_coordinate;
+
+    void main() {
+      sample_coordinate = texture_coordinate.xy;
+      mat4 mvpMatrix = perspectiveMatrix * modelMatrix;
+      gl_Position = mvpMatrix * position;
+    }
+  )";
+
+  const GLchar *frag_src = R"(
+    precision mediump float;
+
+    varying vec2 sample_coordinate;  // texture coordinate (0..1)
+    uniform sampler2D texture;  // texture to shade with
+
+    void main() {
+      gl_FragColor = texture2D(texture, sample_coordinate);
+    }
+  )";
+
+  // Shader program
+  GLCHECK(GlhCreateProgram(vert_src, frag_src, NUM_ATTRIBUTES,
+                           (const GLchar **)&attr_name[0], attr_location,
+                           &program_));
+  RET_CHECK(program_) << "Problem initializing the program.";
+  texture_uniform_ = GLCHECK(glGetUniformLocation(program_, "texture"));
+  perspective_matrix_uniform_ =
+      GLCHECK(glGetUniformLocation(program_, "perspectiveMatrix"));
+  model_matrix_uniform_ =
+      GLCHECK(glGetUniformLocation(program_, "modelMatrix"));
+  return ::mediapipe::OkStatus();
+}
+
+::mediapipe::Status GlAnimationOverlayCalculator::GlBind(
+    const TriangleMesh &triangle_mesh, const GlTexture &texture) {
+  GLCHECK(glUseProgram(program_));
+
+  // Disable backface culling to allow occlusion effects.
+  // Some options for solid arbitrary 3D geometry rendering
+  GLCHECK(glEnable(GL_BLEND));
+  GLCHECK(glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA));
+  GLCHECK(glEnable(GL_DEPTH_TEST));
+  GLCHECK(glFrontFace(GL_CW));
+  GLCHECK(glDepthMask(GL_TRUE));
+  GLCHECK(glDepthFunc(GL_LESS));
+
+  // Clear our depth buffer before starting draw calls
+  GLCHECK(glVertexAttribPointer(ATTRIB_VERTEX, 3, GL_FLOAT, 0, 0,
+                                triangle_mesh.vertices.get()));
+  GLCHECK(glEnableVertexAttribArray(ATTRIB_VERTEX));
+  GLCHECK(glVertexAttribPointer(ATTRIB_TEXTURE_POSITION, 2, GL_FLOAT, 0, 0,
+                                triangle_mesh.texture_coords.get()));
+  GLCHECK(glEnableVertexAttribArray(ATTRIB_TEXTURE_POSITION));
+  GLCHECK(glActiveTexture(GL_TEXTURE1));
+  GLCHECK(glBindTexture(texture.target(), texture.name()));
+
+  // We previously bound it to GL_TEXTURE1
+  GLCHECK(glUniform1i(texture_uniform_, 1));
+
+  GLCHECK(glUniformMatrix4fv(perspective_matrix_uniform_, 1, GL_FALSE,
+                             perspective_matrix_));
+  return ::mediapipe::OkStatus();
+}
+
+::mediapipe::Status GlAnimationOverlayCalculator::GlRender(
+    const TriangleMesh &triangle_mesh, const float *model_matrix) {
+  GLCHECK(glUniformMatrix4fv(model_matrix_uniform_, 1, GL_FALSE, model_matrix));
+  GLCHECK(glDrawElements(GL_TRIANGLES, triangle_mesh.index_count,
+                         GL_UNSIGNED_SHORT,
+                         triangle_mesh.triangle_indices.get()));
+  return ::mediapipe::OkStatus();
+}
+
+GlAnimationOverlayCalculator::~GlAnimationOverlayCalculator() {
+  helper_.RunInGlContext([this] {
+    if (program_) {
+      GLCHECK(glDeleteProgram(program_));
+      program_ = 0;
+    }
+    if (depth_buffer_created_) {
+      GLCHECK(glDeleteRenderbuffers(1, &renderbuffer_));
+      renderbuffer_ = 0;
+    }
+    if (texture_.width() > 0) {
+      texture_.Release();
+    }
+    if (mask_texture_.width() > 0) {
+      mask_texture_.Release();
+    }
+  });
+}
+
+}  // namespace mediapipe
--- a/mediapipe/graphs/object_detection_3d/calculators/gl_animation_overlay_calculator.proto
+++ b/mediapipe/graphs/object_detection_3d/calculators/gl_animation_overlay_calculator.proto
@ -0,0 +1,41 @@
+// Copyright 2019 The MediaPipe Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+syntax = "proto2";
+
+package mediapipe;
+
+import "mediapipe/framework/calculator.proto";
+
+message GlAnimationOverlayCalculatorOptions {
+  extend CalculatorOptions {
+    optional GlAnimationOverlayCalculatorOptions ext = 174760573;
+  }
+
+  // Default aspect ratio of rendering target width over height.
+  // This specific value is for 3:4 view. Do not change this default value.
+  optional float aspect_ratio = 1 [default = 0.75];
+  // Default vertical field of view in degrees. This specific default value
+  // is arbitrary. Do not change this default value. If you want to use
+  // a different vertical_fov_degrees, set it in the options.
+  optional float vertical_fov_degrees = 2 [default = 70.0];
+
+  // Perspective projection matrix z-clipping near plane value.
+  optional float z_clipping_plane_near = 3 [default = 0.1];
+  // Perspective projection matrix z-clipping far plane value.
+  optional float z_clipping_plane_far = 4 [default = 1000.0];
+
+  // Speed at which to play the animation (in frames per second).
+  optional float animation_speed_fps = 5 [default = 25.0];
+}
--- a/mediapipe/graphs/object_detection_3d/calculators/lift_2d_frame_annotation_to_3d_calculator.cc
+++ b/mediapipe/graphs/object_detection_3d/calculators/lift_2d_frame_annotation_to_3d_calculator.cc
@ -0,0 +1,168 @@
+// Copyright 2020 The MediaPipe Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <memory>
+#include <unordered_map>
+#include <vector>
+
+#include "Eigen/Dense"
+#include "absl/memory/memory.h"
+#include "absl/strings/str_format.h"
+#include "absl/types/span.h"
+#include "mediapipe/framework/calculator_framework.h"
+#include "mediapipe/framework/deps/file_path.h"
+#include "mediapipe/framework/port/ret_check.h"
+#include "mediapipe/graphs/object_detection_3d/calculators/annotation_data.pb.h"
+#include "mediapipe/graphs/object_detection_3d/calculators/decoder.h"
+#include "mediapipe/graphs/object_detection_3d/calculators/lift_2d_frame_annotation_to_3d_calculator.pb.h"
+#include "mediapipe/graphs/object_detection_3d/calculators/tensor_util.h"
+
+namespace {
+constexpr char kInputStreamTag[] = "FRAME_ANNOTATION";
+constexpr char kOutputStreamTag[] = "LIFTED_FRAME_ANNOTATION";
+
+// Each detection object will be assigned an unique id that starts from 1.
+static int object_id = 0;
+
+inline int GetNextObjectId() { return ++object_id; }
+}  // namespace
+
+namespace mediapipe {
+
+// Lifted the 2D points in a tracked frame annotation to 3D.
+//
+// Input:
+//  FRAME_ANNOTATIONS - Frame annotations with detected 2D points
+// Output:
+//  LIFTED_FRAME_ANNOTATIONS - Result FrameAnnotation with lifted 3D points.
+//
+// Usage example:
+// node {
+//   calculator: "Lift2DFrameAnnotationTo3DCalculator"
+//   input_stream: "FRAME_ANNOTATIONS:tracked_annotations"
+//   output_stream: "LIFTED_FRAME_ANNOTATIONS:lifted_3d_annotations"
+// }
+class Lift2DFrameAnnotationTo3DCalculator : public CalculatorBase {
+ public:
+  static ::mediapipe::Status GetContract(CalculatorContract* cc);
+
+  ::mediapipe::Status Open(CalculatorContext* cc) override;
+  ::mediapipe::Status Process(CalculatorContext* cc) override;
+  ::mediapipe::Status Close(CalculatorContext* cc) override;
+
+ private:
+  ::mediapipe::Status ProcessCPU(CalculatorContext* cc,
+                                 FrameAnnotation* output_objects);
+  ::mediapipe::Status LoadOptions(CalculatorContext* cc);
+
+  // Increment and assign object ID for each detected object.
+  // In a single MediaPipe session, the IDs are unique.
+  // Also assign timestamp for the FrameAnnotation to be the input packet
+  // timestamp.
+  void AssignObjectIdAndTimestamp(int64 timestamp_us,
+                                  FrameAnnotation* annotation);
+  std::unique_ptr<Decoder> decoder_;
+  ::mediapipe::Lift2DFrameAnnotationTo3DCalculatorOptions options_;
+  Eigen::Matrix<float, 4, 4, Eigen::RowMajor> projection_matrix_;
+};
+REGISTER_CALCULATOR(Lift2DFrameAnnotationTo3DCalculator);
+
+::mediapipe::Status Lift2DFrameAnnotationTo3DCalculator::GetContract(
+    CalculatorContract* cc) {
+  RET_CHECK(cc->Inputs().HasTag(kInputStreamTag));
+  RET_CHECK(cc->Outputs().HasTag(kOutputStreamTag));
+  cc->Inputs().Tag(kInputStreamTag).Set<FrameAnnotation>();
+  cc->Outputs().Tag(kOutputStreamTag).Set<FrameAnnotation>();
+
+  return ::mediapipe::OkStatus();
+}
+
+::mediapipe::Status Lift2DFrameAnnotationTo3DCalculator::Open(
+    CalculatorContext* cc) {
+  MP_RETURN_IF_ERROR(LoadOptions(cc));
+  // clang-format off
+  projection_matrix_ <<
+      1.5731,   0,       0,    0,
+      0,   2.0975,       0,    0,
+      0,        0, -1.0002, -0.2,
+      0,        0,      -1,    0;
+  // clang-format on
+
+  decoder_ = absl::make_unique<Decoder>(
+      BeliefDecoderConfig(options_.decoder_config()));
+  return ::mediapipe::OkStatus();
+}
+
+::mediapipe::Status Lift2DFrameAnnotationTo3DCalculator::Process(
+    CalculatorContext* cc) {
+  if (cc->Inputs().Tag(kInputStreamTag).IsEmpty()) {
+    return ::mediapipe::OkStatus();
+  }
+
+  auto output_objects = absl::make_unique<FrameAnnotation>();
+
+  MP_RETURN_IF_ERROR(ProcessCPU(cc, output_objects.get()));
+
+  // Output
+  if (cc->Outputs().HasTag(kOutputStreamTag)) {
+    cc->Outputs()
+        .Tag(kOutputStreamTag)
+        .Add(output_objects.release(), cc->InputTimestamp());
+  }
+
+  return ::mediapipe::OkStatus();
+}
+
+::mediapipe::Status Lift2DFrameAnnotationTo3DCalculator::ProcessCPU(
+    CalculatorContext* cc, FrameAnnotation* output_objects) {
+  const auto& input_frame_annotations =
+      cc->Inputs().Tag(kInputStreamTag).Get<FrameAnnotation>();
+  // Copy the input frame annotation to the output
+  *output_objects = input_frame_annotations;
+
+  auto status = decoder_->Lift2DTo3D(projection_matrix_, /*portrait*/ true,
+                                     output_objects);
+  if (!status.ok()) {
+    LOG(ERROR) << status;
+    return status;
+  }
+  AssignObjectIdAndTimestamp(cc->InputTimestamp().Microseconds(),
+                             output_objects);
+
+  return ::mediapipe::OkStatus();
+}
+
+::mediapipe::Status Lift2DFrameAnnotationTo3DCalculator::Close(
+    CalculatorContext* cc) {
+  return ::mediapipe::OkStatus();
+}
+
+::mediapipe::Status Lift2DFrameAnnotationTo3DCalculator::LoadOptions(
+    CalculatorContext* cc) {
+  // Get calculator options specified in the graph.
+  options_ =
+      cc->Options<::mediapipe::Lift2DFrameAnnotationTo3DCalculatorOptions>();
+
+  return ::mediapipe::OkStatus();
+}
+
+void Lift2DFrameAnnotationTo3DCalculator::AssignObjectIdAndTimestamp(
+    int64 timestamp_us, FrameAnnotation* annotation) {
+  for (auto& ann : *annotation->mutable_annotations()) {
+    ann.set_object_id(GetNextObjectId());
+  }
+  annotation->set_timestamp(timestamp_us);
+}
+
+}  // namespace mediapipe
--- a/mediapipe/graphs/object_detection_3d/calculators/lift_2d_frame_annotation_to_3d_calculator.proto
+++ b/mediapipe/graphs/object_detection_3d/calculators/lift_2d_frame_annotation_to_3d_calculator.proto
@ -0,0 +1,30 @@
+// Copyright 2020 The MediaPipe Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// The option proto for the Lift2DFrameAnnotationTo3DCalculatorOptions.
+
+syntax = "proto2";
+
+package mediapipe;
+
+import "mediapipe/framework/calculator.proto";
+import "mediapipe/graphs/object_detection_3d/calculators/belief_decoder_config.proto";
+
+message Lift2DFrameAnnotationTo3DCalculatorOptions {
+  extend CalculatorOptions {
+    optional Lift2DFrameAnnotationTo3DCalculatorOptions ext = 290166284;
+  }
+
+  optional BeliefDecoderConfig decoder_config = 1;
+}
--- a/mediapipe/graphs/object_detection_3d/calculators/model.cc
+++ b/mediapipe/graphs/object_detection_3d/calculators/model.cc
@ -0,0 +1,101 @@
+// Copyright 2020 The MediaPipe Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "mediapipe/graphs/object_detection_3d/calculators/model.h"
+
+#include "mediapipe/framework/port/logging.h"
+
+namespace mediapipe {
+
+void Model::SetTransformation(const Eigen::Matrix4f& transform) {
+  transformation_ = transform;
+}
+
+void Model::SetTranslation(const Eigen::Vector3f& translation) {
+  transformation_.col(3).template head<3>() = translation;
+}
+
+void Model::SetRotation(float roll, float pitch, float yaw) {
+  // In our coordinate system, Y is up. We first rotate the object around Y
+  // (yaw), then around Z (pitch), and finally around X (roll).
+  Eigen::Matrix3f r;
+  r = Eigen::AngleAxisf(yaw, Eigen::Vector3f::UnitY()) *
+      Eigen::AngleAxisf(pitch, Eigen::Vector3f::UnitZ()) *
+      Eigen::AngleAxisf(roll, Eigen::Vector3f::UnitX());
+  transformation_.topLeftCorner<3, 3>() = r;
+}
+
+void Model::SetRotation(const Eigen::Matrix3f& rotation) {
+  transformation_.topLeftCorner<3, 3>() = rotation;
+}
+
+void Model::SetScale(const Eigen::Vector3f& scale) { scale_ = scale; }
+
+void Model::SetCategory(const std::string& category) { category_ = category; }
+
+const Eigen::Vector3f Model::GetRotationAngles() const {
+  Vector3f ypr = transformation_.topLeftCorner<3, 3>().eulerAngles(1, 2, 0);
+  return Vector3f(ypr(2), ypr(1), ypr(0));  // swap YPR with RPY
+}
+
+const Eigen::Matrix4f& Model::GetTransformation() const {
+  return transformation_;
+}
+
+const Eigen::Vector3f& Model::GetScale() const { return scale_; }
+
+const Eigen::Ref<const Eigen::Vector3f> Model::GetTranslation() const {
+  return transformation_.col(3).template head<3>();
+}
+
+const Eigen::Ref<const Eigen::Matrix3f> Model::GetRotation() const {
+  return transformation_.template topLeftCorner<3, 3>();
+}
+
+const std::string& Model::GetCategory() const { return category_; }
+
+void Model::Deserialize(const Object& obj) {
+  CHECK_EQ(obj.rotation_size(), 9);
+  CHECK_EQ(obj.translation_size(), 3);
+  CHECK_EQ(obj.scale_size(), 3);
+  category_ = obj.category();
+
+  using RotationMatrix = Eigen::Matrix<float, 3, 3, Eigen::RowMajor>;
+  transformation_.setIdentity();
+  transformation_.topLeftCorner<3, 3>() =
+      Eigen::Map<const RotationMatrix>(obj.rotation().data());
+  transformation_.col(3).head<3>() =
+      Eigen::Map<const Eigen::Vector3f>(obj.translation().data());
+  scale_ = Eigen::Map<const Eigen::Vector3f>(obj.scale().data());
+  Update();
+}
+
+void Model::Serialize(Object* obj) {
+  obj->set_category(category_);
+  for (int i = 0; i < 3; ++i) {
+    for (int j = 0; j < 3; ++j) {
+      obj->add_rotation(transformation_(i, j));
+    }
+  }
+
+  for (int i = 0; i < 3; ++i) {
+    obj->add_translation(transformation_(i, 3));
+  }
+
+  for (int i = 0; i < 3; ++i) {
+    obj->add_scale(scale_[i]);
+  }
+}
+
+}  // namespace mediapipe
--- a/mediapipe/graphs/object_detection_3d/calculators/model.h
+++ b/mediapipe/graphs/object_detection_3d/calculators/model.h
@ -0,0 +1,92 @@
+// Copyright 2020 The MediaPipe Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef MEDIAPIPE_GRAPHS_OBJECT_DETECTION_3D_MODEL_H_
+#define MEDIAPIPE_GRAPHS_OBJECT_DETECTION_3D_MODEL_H_
+
+#include "mediapipe/graphs/object_detection_3d/calculators/annotation_data.pb.h"
+#include "mediapipe/graphs/object_detection_3d/calculators/object.pb.h"
+#include "mediapipe/graphs/object_detection_3d/calculators/types.h"
+
+namespace mediapipe {
+
+class Model {
+ public:
+  EIGEN_MAKE_ALIGNED_OPERATOR_NEW
+
+  enum Type {
+    kVisualizationOnly = 0,
+    kBoundingBox,
+    kSkeleton,
+    kShape,  // Shape is a virtual object.
+    kNumModes,
+  };
+
+  virtual ~Model() = default;
+
+  virtual void SetTransformation(const Eigen::Matrix4f& transform);
+  virtual void SetTranslation(const Eigen::Vector3f& translation);
+
+  // Compute the rotation matrix from these angles and update the transformation
+  // matrix accordingly
+  virtual void SetRotation(float roll, float pitch, float yaw);
+  virtual void SetRotation(const Eigen::Matrix3f& rotation);
+  virtual void SetScale(const Eigen::Vector3f& scale);
+  virtual void SetCategory(const std::string& category);
+  virtual size_t GetNumberKeypoints() const { return number_keypoints_; }
+
+  // Gets Euler angles in the order of roll, pitch, yaw.
+  virtual const Eigen::Vector3f GetRotationAngles() const;
+  virtual const Eigen::Matrix4f& GetTransformation() const;
+  virtual const Eigen::Vector3f& GetScale() const;
+  virtual const Eigen::Ref<const Eigen::Vector3f> GetTranslation() const;
+  virtual const Eigen::Ref<const Eigen::Matrix3f> GetRotation() const;
+  virtual const std::string& GetCategory() const;
+
+  // Update the model's keypoints in the world-coordinate system.
+  // The update includes transforming the model to the world-coordinate system
+  // as well as scaling the model.
+  // The user is expected to call this function after Setting the rotation,
+  // orientation or the scale of the model to get an updated model.
+  virtual void Update() = 0;
+
+  // Update the model's parameters (orientation, position, and scale) from the
+  // user-provided variables.
+  virtual void Adjust(const std::vector<float>& variables) = 0;
+
+  // Returns a pointer to the model's keypoints.
+  // Use Eigen::Map to cast the pointer back to Vector3 or Vector4
+  virtual const float* GetVertex(size_t id) const = 0;
+  virtual float* GetVertex(size_t id) = 0;
+  virtual void Deserialize(const Object& obj);
+  virtual void Serialize(Object* obj);
+
+  // TODO: make member variables protected, and add public apis.
+  // 4x4 transformation matrix mapping the first keypoint to world coordinate
+  Eigen::Matrix4f transformation_;
+  Eigen::Vector3f scale_;  // width, height, depth
+  Type model_type_;
+  size_t number_keypoints_;
+  std::string category_;
+
+ protected:
+  Model(Type type, size_t number_keypoints, const std::string& category)
+      : model_type_(type),
+        number_keypoints_(number_keypoints),
+        category_(category) {}
+};
+
+}  // namespace mediapipe
+
+#endif  // MEDIAPIPE_GRAPHS_OBJECT_DETECTION_3D_MODEL_H_
--- a/mediapipe/graphs/object_detection_3d/calculators/model_matrix.proto
+++ b/mediapipe/graphs/object_detection_3d/calculators/model_matrix.proto
@ -0,0 +1,48 @@
+// Copyright 2020 The MediaPipe Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+syntax = "proto2";
+
+package mediapipe;
+
+message TimedModelMatrixProto {
+  // 4x4 model matrix stored in ROW major order.
+  repeated float matrix_entries = 1 [packed = true];
+  // Timestamp of this model matrix in milliseconds.
+  optional int64 time_msec = 2 [default = 0];
+  // Unique per object id
+  optional int32 id = 3 [default = -1];
+}
+
+message TimedModelMatrixProtoList {
+  repeated TimedModelMatrixProto model_matrix = 1;
+}
+
+// For convenience, when the desired information or transformation can be
+// encoded into vectors (e.g. when the matrix represents a scale or Euler-angle-
+// based rotation operation.)
+message TimedVectorProto {
+  // The vector values themselves.
+  repeated float vector_entries = 1 [packed = true];
+
+  // Timestamp of this vector in milliseconds.
+  optional int64 time_msec = 2 [default = 0];
+
+  // Unique per object id
+  optional int32 id = 3 [default = -1];
+}
+
+message TimedVectorProtoList {
+  repeated TimedVectorProto vector_list = 1;
+}
--- a/mediapipe/graphs/object_detection_3d/calculators/object.proto
+++ b/mediapipe/graphs/object_detection_3d/calculators/object.proto
@ -0,0 +1,124 @@
+// Copyright 2020 The MediaPipe Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+syntax = "proto3";
+
+package mediapipe;
+
+message KeyPoint {
+  // The position of the keypoint in the local coordinate system of the rigid
+  // object.
+  float x = 1;
+  float y = 2;
+  float z = 3;
+
+  // Sphere around the keypoint, indiciating annotator's confidence of the
+  // position in meters.
+  float confidence_radius = 4;
+
+  // The name of the keypoint (e.g. legs, head, etc.).
+  // Does not have to be unique.
+  string name = 5;
+
+  // Indicates whether the keypoint is hidden or not.
+  bool hidden = 6;
+}
+
+message Object {
+  // Unique object id through a sequence. There might be multiple objects of
+  // the same label in this sequence.
+  int32 id = 1;
+
+  // Describes what category an object is. E.g. object class, attribute,
+  // instance or person identity. This provides additional context for the
+  // object type.
+  string category = 2;
+
+  enum Type {
+    UNDEFINED_TYPE = 0;
+    BOUNDING_BOX = 1;
+    SKELETON = 2;
+  }
+
+  Type type = 3;
+
+  // 3x3 row-major rotation matrix describing the orientation of the rigid
+  // object's frame of reference in the world-coordinate system.
+  repeated float rotation = 4;
+
+  // 3x1 vector describing the translation of the rigid object's frame of
+  // reference in the world-coordinate system in meters.
+  repeated float translation = 5;
+
+  // 3x1 vector describing the scale of the rigid object's frame of reference in
+  // the world-coordinate system in meters.
+  repeated float scale = 6;
+
+  // List of all the key points associated with this object in the object
+  // coordinate system.
+  // The first keypoint is always the object's frame of reference,
+  // e.g. the centroid of the box.
+  // E.g. bounding box with its center as frame of reference, the 9 keypoints :
+  // {0., 0., 0.},
+  // {-.5, -.5, -.5}, {-.5, -.5, +.5}, {-.5, +.5, -.5}, {-.5, +.5, +.5},
+  // {+.5, -.5, -.5}, {+.5, -.5, +.5}, {+.5, +.5, -.5}, {+.5, +.5, +.5}
+  // To get the bounding box in the world-coordinate system, we first scale the
+  // box then transform the scaled box.
+  // For example, bounding box in the world coordinate system is
+  // rotation * scale * keypoints + translation
+  repeated KeyPoint keypoints = 7;
+
+  // Enum to reflect how this object is created.
+  enum Method {
+    UNKNOWN_METHOD = 0;
+    ANNOTATION = 1;    // Created by data annotation.
+    AUGMENTATION = 2;  // Created by data augmentation.
+  }
+  Method method = 8;
+}
+
+// The edge connecting two keypoints together
+message Edge {
+  // keypoint id of the edge's source
+  int32 source = 1;
+
+  // keypoint id of the edge's sink
+  int32 sink = 2;
+}
+
+// The skeleton template for different objects (e.g. humans, chairs, hands, etc)
+// The annotation tool reads the skeleton template dictionary.
+message Skeleton {
+  // The origin keypoint in the object coordinate system. (i.e. Point 0, 0, 0)
+  int32 reference_keypoint = 1;
+
+  // The skeleton's category (e.g. human, chair, hand.). Should be unique in the
+  // dictionary.
+  string category = 2;
+
+  // Initialization value for all the keypoints in the skeleton in the object's
+  // local coordinate system. Pursuit will transform these points using object's
+  // transformation to get the keypoint in the world-cooridnate.
+  repeated KeyPoint keypoints = 3;
+
+  // List of edges connecting keypoints
+  repeated Edge edges = 4;
+}
+
+// The list of all the modeled skeletons in our library. These models can be
+// objects (chairs, desks, etc), humans (full pose, hands, faces, etc), or box.
+// We can have multiple skeletons in the same file.
+message Skeletons {
+  repeated Skeleton object = 1;
+}
--- a/mediapipe/graphs/object_detection_3d/calculators/tensor_util.cc
+++ b/mediapipe/graphs/object_detection_3d/calculators/tensor_util.cc
@ -0,0 +1,33 @@
+// Copyright 2020 The MediaPipe Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "mediapipe/graphs/object_detection_3d/calculators/tensor_util.h"
+
+#include "mediapipe/framework/port/logging.h"
+
+namespace mediapipe {
+
+cv::Mat ConvertTfliteTensorToCvMat(const TfLiteTensor& tensor) {
+  // Check tensor is BxCxWxH (size = 4) and the batch size is one(data[0] = 1)
+  CHECK(tensor.dims->size == 4 && tensor.dims->data[0] == 1);
+  CHECK_EQ(kTfLiteFloat32, tensor.type) << "tflite_tensor type is not float";
+
+  const size_t num_output_channels = tensor.dims->data[3];
+  const int dims = 2;
+  const int sizes[] = {tensor.dims->data[1], tensor.dims->data[2]};
+  const int type = CV_MAKETYPE(CV_32F, num_output_channels);
+  return cv::Mat(dims, sizes, type, reinterpret_cast<void*>(tensor.data.f));
+}
+
+}  // namespace mediapipe
--- a/mediapipe/graphs/object_detection_3d/calculators/tensor_util.h
+++ b/mediapipe/graphs/object_detection_3d/calculators/tensor_util.h
@ -0,0 +1,27 @@
+// Copyright 2020 The MediaPipe Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef MEDIAPIPE_GRAPHS_OBJECT_DETECTION_3D_TENSOR_UTIL_H_
+#define MEDIAPIPE_GRAPHS_OBJECT_DETECTION_3D_TENSOR_UTIL_H_
+
+#include "mediapipe/framework/port/opencv_core_inc.h"
+#include "tensorflow/lite/interpreter.h"
+
+namespace mediapipe {
+
+// Converts a single channel tflite tensor to a grayscale image
+cv::Mat ConvertTfliteTensorToCvMat(const TfLiteTensor& tensor);
+}  // namespace mediapipe
+
+#endif  // MEDIAPIPE_GRAPHS_OBJECT_DETECTION_3D_TENSOR_UTIL_H_
--- a/mediapipe/graphs/object_detection_3d/calculators/tflite_tensors_to_objects_calculator.cc
+++ b/mediapipe/graphs/object_detection_3d/calculators/tflite_tensors_to_objects_calculator.cc
@ -0,0 +1,216 @@
+// Copyright 2020 The MediaPipe Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <memory>
+#include <unordered_map>
+#include <vector>
+
+#include "Eigen/Dense"
+#include "absl/memory/memory.h"
+#include "absl/strings/str_format.h"
+#include "absl/types/span.h"
+#include "mediapipe/framework/calculator_framework.h"
+#include "mediapipe/framework/deps/file_path.h"
+#include "mediapipe/framework/port/opencv_core_inc.h"
+#include "mediapipe/framework/port/ret_check.h"
+#include "mediapipe/graphs/object_detection_3d/calculators/annotation_data.pb.h"
+#include "mediapipe/graphs/object_detection_3d/calculators/belief_decoder_config.pb.h"
+#include "mediapipe/graphs/object_detection_3d/calculators/decoder.h"
+#include "mediapipe/graphs/object_detection_3d/calculators/tensor_util.h"
+#include "mediapipe/graphs/object_detection_3d/calculators/tflite_tensors_to_objects_calculator.pb.h"
+#include "tensorflow/lite/interpreter.h"
+
+namespace {
+constexpr char kInputStreamTag[] = "TENSORS";
+constexpr char kOutputStreamTag[] = "ANNOTATIONS";
+
+// Each detection object will be assigned an unique id that starts from 1.
+static int object_id = 0;
+
+inline int GetNextObjectId() { return ++object_id; }
+}  // namespace
+
+namespace mediapipe {
+
+// Convert result TFLite tensors from deep pursuit 3d model into
+// FrameAnnotation.
+//
+// Input:
+//  TENSORS - Vector of TfLiteTensor of type kTfLiteFloat32.
+// Output:
+//  ANNOTATIONS - Result FrameAnnotation.
+//
+// Usage example:
+// node {
+//   calculator: "TfLiteTensorsToObjectsCalculator"
+//   input_stream: "TENSORS:tensors"
+//   output_stream: "ANNOTATIONS:annotations"
+// }
+class TfLiteTensorsToObjectsCalculator : public CalculatorBase {
+ public:
+  static ::mediapipe::Status GetContract(CalculatorContract* cc);
+
+  ::mediapipe::Status Open(CalculatorContext* cc) override;
+  ::mediapipe::Status Process(CalculatorContext* cc) override;
+  ::mediapipe::Status Close(CalculatorContext* cc) override;
+
+ private:
+  ::mediapipe::Status ProcessCPU(CalculatorContext* cc,
+                                 FrameAnnotation* output_objects);
+  ::mediapipe::Status LoadOptions(CalculatorContext* cc);
+  // Takes point_3d in FrameAnnotation, projects to 2D, and overwrite the
+  // point_2d field with the projection.
+  void Project3DTo2D(bool portrait, FrameAnnotation* annotation) const;
+  // Increment and assign object ID for each detected object.
+  // In a single MediaPipe session, the IDs are unique.
+  // Also assign timestamp for the FrameAnnotation to be the input packet
+  // timestamp.
+  void AssignObjectIdAndTimestamp(int64 timestamp_us,
+                                  FrameAnnotation* annotation);
+
+  int num_classes_ = 0;
+  int num_keypoints_ = 0;
+
+  ::mediapipe::TfLiteTensorsToObjectsCalculatorOptions options_;
+  std::unique_ptr<Decoder> decoder_;
+  Eigen::Matrix<float, 4, 4, Eigen::RowMajor> projection_matrix_;
+};
+REGISTER_CALCULATOR(TfLiteTensorsToObjectsCalculator);
+
+::mediapipe::Status TfLiteTensorsToObjectsCalculator::GetContract(
+    CalculatorContract* cc) {
+  RET_CHECK(!cc->Inputs().GetTags().empty());
+  RET_CHECK(!cc->Outputs().GetTags().empty());
+
+  if (cc->Inputs().HasTag(kInputStreamTag)) {
+    cc->Inputs().Tag(kInputStreamTag).Set<std::vector<TfLiteTensor>>();
+  }
+
+  if (cc->Outputs().HasTag(kOutputStreamTag)) {
+    cc->Outputs().Tag(kOutputStreamTag).Set<FrameAnnotation>();
+  }
+  return ::mediapipe::OkStatus();
+}
+
+::mediapipe::Status TfLiteTensorsToObjectsCalculator::Open(
+    CalculatorContext* cc) {
+  MP_RETURN_IF_ERROR(LoadOptions(cc));
+  // clang-format off
+  projection_matrix_ <<
+      1.5731,   0,       0, 0,
+      0,   2.0975,       0, 0,
+      0,        0, -1.0002, -0.2,
+      0,        0,      -1, 0;
+  // clang-format on
+  decoder_ = absl::make_unique<Decoder>(
+      BeliefDecoderConfig(options_.decoder_config()));
+
+  return ::mediapipe::OkStatus();
+}
+
+::mediapipe::Status TfLiteTensorsToObjectsCalculator::Process(
+    CalculatorContext* cc) {
+  if (cc->Inputs().Tag(kInputStreamTag).IsEmpty()) {
+    return ::mediapipe::OkStatus();
+  }
+
+  auto output_objects = absl::make_unique<FrameAnnotation>();
+
+  MP_RETURN_IF_ERROR(ProcessCPU(cc, output_objects.get()));
+
+  // Output
+  if (cc->Outputs().HasTag(kOutputStreamTag)) {
+    cc->Outputs()
+        .Tag(kOutputStreamTag)
+        .Add(output_objects.release(), cc->InputTimestamp());
+  }
+
+  return ::mediapipe::OkStatus();
+}
+
+::mediapipe::Status TfLiteTensorsToObjectsCalculator::ProcessCPU(
+    CalculatorContext* cc, FrameAnnotation* output_objects) {
+  const auto& input_tensors =
+      cc->Inputs().Tag(kInputStreamTag).Get<std::vector<TfLiteTensor>>();
+
+  cv::Mat prediction_heatmap = ConvertTfliteTensorToCvMat(input_tensors[0]);
+  cv::Mat offsetmap = ConvertTfliteTensorToCvMat(input_tensors[1]);
+
+  *output_objects =
+      decoder_->DecodeBoundingBoxKeypoints(prediction_heatmap, offsetmap);
+  auto status = decoder_->Lift2DTo3D(projection_matrix_, /*portrait*/ true,
+                                     output_objects);
+  if (!status.ok()) {
+    LOG(ERROR) << status;
+    return status;
+  }
+  Project3DTo2D(/*portrait*/ true, output_objects);
+  AssignObjectIdAndTimestamp(cc->InputTimestamp().Microseconds(),
+                             output_objects);
+
+  return ::mediapipe::OkStatus();
+}
+
+::mediapipe::Status TfLiteTensorsToObjectsCalculator::Close(
+    CalculatorContext* cc) {
+  return ::mediapipe::OkStatus();
+}
+
+::mediapipe::Status TfLiteTensorsToObjectsCalculator::LoadOptions(
+    CalculatorContext* cc) {
+  // Get calculator options specified in the graph.
+  options_ =
+      cc->Options<::mediapipe::TfLiteTensorsToObjectsCalculatorOptions>();
+
+  num_classes_ = options_.num_classes();
+  num_keypoints_ = options_.num_keypoints();
+
+  // Currently only support 2D when num_values_per_keypoint equals to 2.
+  CHECK_EQ(options_.num_values_per_keypoint(), 2);
+
+  return ::mediapipe::OkStatus();
+}
+
+void TfLiteTensorsToObjectsCalculator::Project3DTo2D(
+    bool portrait, FrameAnnotation* annotation) const {
+  for (auto& ann : *annotation->mutable_annotations()) {
+    for (auto& key_point : *ann.mutable_keypoints()) {
+      Eigen::Vector4f point3d;
+      point3d << key_point.point_3d().x(), key_point.point_3d().y(),
+          key_point.point_3d().z(), 1.0f;
+      Eigen::Vector4f point3d_projection = projection_matrix_ * point3d;
+      float u, v;
+      const float inv_w = 1.0f / point3d_projection(3);
+      if (portrait) {
+        u = (point3d_projection(1) * inv_w + 1.0f) * 0.5f;
+        v = (point3d_projection(0) * inv_w + 1.0f) * 0.5f;
+      } else {
+        u = (point3d_projection(0) * inv_w + 1.0f) * 0.5f;
+        v = (1.0f - point3d_projection(1) * inv_w) * 0.5f;
+      }
+      key_point.mutable_point_2d()->set_x(u);
+      key_point.mutable_point_2d()->set_y(v);
+    }
+  }
+}
+
+void TfLiteTensorsToObjectsCalculator::AssignObjectIdAndTimestamp(
+    int64 timestamp_us, FrameAnnotation* annotation) {
+  for (auto& ann : *annotation->mutable_annotations()) {
+    ann.set_object_id(GetNextObjectId());
+  }
+  annotation->set_timestamp(timestamp_us);
+}
+
+}  // namespace mediapipe
--- a/mediapipe/graphs/object_detection_3d/calculators/tflite_tensors_to_objects_calculator.proto
+++ b/mediapipe/graphs/object_detection_3d/calculators/tflite_tensors_to_objects_calculator.proto
@ -0,0 +1,39 @@
+// Copyright 2020 The MediaPipe Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// The option proto for the TfLiteTensorsToObjectsCalculatorOptions.
+
+syntax = "proto2";
+
+package mediapipe;
+
+import "mediapipe/framework/calculator.proto";
+import "mediapipe/graphs/object_detection_3d/calculators/belief_decoder_config.proto";
+
+message TfLiteTensorsToObjectsCalculatorOptions {
+  extend CalculatorOptions {
+    optional TfLiteTensorsToObjectsCalculatorOptions ext = 263667646;
+  }
+
+  // The number of output classes predicted by the detection model.
+  optional int32 num_classes = 1;
+
+  // The number of predicted keypoints.
+  optional int32 num_keypoints = 2;
+  // The dimension of each keypoint, e.g. number of values predicted for each
+  // keypoint.
+  optional int32 num_values_per_keypoint = 3 [default = 2];
+
+  optional BeliefDecoderConfig decoder_config = 4;
+}
--- a/mediapipe/graphs/object_detection_3d/calculators/types.h
+++ b/mediapipe/graphs/object_detection_3d/calculators/types.h
@ -0,0 +1,56 @@
+// Copyright 2020 The MediaPipe Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef MEDIAPIPE_GRAPHS_OBJECT_DETECTION_3D_TYPES_H_
+#define MEDIAPIPE_GRAPHS_OBJECT_DETECTION_3D_TYPES_H_
+
+#include <array>
+
+#include "Eigen/Geometry"
+
+namespace mediapipe {
+
+using Eigen::Map;
+using Eigen::Vector2f;
+using Eigen::Vector3f;
+using Eigen::Vector4f;
+using Matrix4f_RM = Eigen::Matrix<float, 4, 4, Eigen::RowMajor>;
+using Matrix3f_RM = Eigen::Matrix<float, 3, 3, Eigen::RowMajor>;
+
+using Face = std::array<int, 4>;
+
+struct SuperPoint {
+  enum PointSourceType { kPointCloud = 0, kBoundingBox = 1, kSkeleton = 2 };
+  // The id of the point in the point-cloud
+  int reference_point;
+  // The source of the
+  PointSourceType source;
+  // The id of the point in set of points in current frame
+  int id;
+  // If source is kBoundingBox or kSkeleton, object_id stores the id of which \
+  // object this point belongs to.
+  int object_id;
+  // projected u-v value
+  Vector2f uv;
+  Vector2f pixel;
+  // the 3D point
+  Vector3f point_3d;
+  // Color
+  Eigen::Matrix<unsigned char, 4, 1> color;
+  bool rendered;
+};
+
+}  // namespace mediapipe
+
+#endif  // MEDIAPIPE_GRAPHS_OBJECT_DETECTION_3D_TYPES_H_
--- a/mediapipe/graphs/object_detection_3d/chair_classic_occlusion_tracking.pbtxt
+++ b/mediapipe/graphs/object_detection_3d/chair_classic_occlusion_tracking.pbtxt
@ -0,0 +1,133 @@
+# MediaPipe object detection 3D with tracking graph.
+
+# Images on GPU coming into and out of the graph.
+input_stream: "input_video"
+input_stream: "input_width"
+input_stream: "input_height"
+output_stream: "output_video"
+
+# Crops the image from the center to the size WIDTHxHEIGHT.
+node: {
+  calculator: "ImageCroppingCalculator"
+  input_stream: "IMAGE_GPU:input_video"
+  output_stream: "IMAGE_GPU:input_video_4x3"
+  input_stream: "WIDTH:input_width"
+  input_stream: "HEIGHT:input_height"
+  node_options: {
+    [type.googleapis.com/mediapipe.ImageCroppingCalculatorOptions] {
+      border_mode: BORDER_REPLICATE
+    }
+  }
+}
+
+# Creates a copy of the input_video stream. At the end of the graph, the
+# GlAnimationOverlayCalculator will consume the input_video texture and draws
+# on top of it.
+node: {
+  calculator: "GlScalerCalculator"
+  input_stream: "VIDEO:input_video_4x3"
+  output_stream: "VIDEO:input_video_copy"
+}
+
+# Resamples the images by specific frame rate. This calculator is used to
+# control the frequecy of subsequent calculators/subgraphs, e.g. less power
+# consumption for expensive process.
+node {
+  calculator: "PacketResamplerCalculator"
+  input_stream: "DATA:input_video_copy"
+  output_stream: "DATA:sampled_input_video"
+  node_options: {
+    [type.googleapis.com/mediapipe.PacketResamplerCalculatorOptions] {
+      frame_rate: 5
+    }
+  }
+}
+
+node {
+  calculator: "ObjectronDetectionSubgraphGpu"
+  input_stream: "IMAGE_GPU:sampled_input_video"
+  output_stream: "ANNOTATIONS:objects"
+}
+
+node {
+  calculator: "ObjectronTrackingSubgraphGpu"
+  input_stream: "FRAME_ANNOTATION:objects"
+  input_stream: "IMAGE_GPU:input_video_copy"
+  output_stream: "LIFTED_FRAME_ANNOTATION:lifted_tracked_objects"
+}
+
+# The rendering nodes:
+# We are rendering two meshes: 1) a 3D bounding box, which we overlay directly
+# on the texture, and 2) a shoe CAD model, which we use as an occlusion mask.
+# These models are designed using different tools, so we supply a transformation
+# to bring both of them to the Objectron's coordinate system.
+
+# Creates a model matrices for the tracked object given the lifted 3D points.
+# This calculator does two things: 1) Estimates object's pose (orientation,
+# translation, and scale) from the 3D vertices, and
+# 2) bring the object from the objectron's coordinate system to the renderer
+# (OpenGL) coordinate system. Since the final goal is to render a mesh file on
+# top of the object, we also supply a transformation to bring the mesh to the
+# objectron's coordinate system, and rescale mesh to the unit size.
+node {
+  calculator: "AnnotationsToModelMatricesCalculator"
+  input_stream: "ANNOTATIONS:lifted_tracked_objects"
+  output_stream: "MODEL_MATRICES:model_matrices"
+  node_options: {
+    [type.googleapis.com/mediapipe.AnnotationsToModelMatricesCalculatorOptions] {
+      # Re-scale the CAD model to the size of a unit box
+      model_scale: [0.05, 0.05, 0.05]
+      # Bring the box CAD model to objectron's coordinate system. This
+      # is equivalent of -pi/2 rotation along the y-axis (right-hand rule):
+      # Eigen::AngleAxisf(-M_PI / 2., Eigen::Vector3f::UnitY())
+      model_transformation: [0.0,  0.0, -1.0,  0.0]
+      model_transformation: [0.0,  1.0,  0.0,  0.0]
+      model_transformation: [1.0,  0.0,  0.0,  0.0]
+      model_transformation: [0.0,  0.0,  0.0,  1.0]
+    }
+  }
+}
+
+# Compute the model matrices for the CAD model of the chair, to be used as an
+# occlusion mask. The model will be rendered at the exact same location as the
+# bounding box.
+node {
+  calculator: "AnnotationsToModelMatricesCalculator"
+  input_stream: "ANNOTATIONS:lifted_tracked_objects"
+  output_stream: "MODEL_MATRICES:mask_model_matrices"
+  node_options: {
+    [type.googleapis.com/mediapipe.AnnotationsToModelMatricesCalculatorOptions] {
+      # Re-scale the CAD model to the size of a unit box
+      model_scale: [0.15, 0.1, 0.15]
+      # Bring the shoe CAD model to Deep Pursuit 3D's coordinate system. This
+      # is equivalent of -pi/2 rotation along the x-axis:
+      # Eigen::AngleAxisf(-M_PI / 2., Eigen::Vector3f::UnitX())
+      model_transformation: [1.0,  0.0,  0.0,  0.0]
+      model_transformation: [0.0,  1.0,  0.0,  -10.0]
+      model_transformation: [0.0,  0.0,  -1.0,  0.0]
+      model_transformation: [0.0,  0.0,  0.0,  1.0]
+    }
+  }
+}
+
+# Render everything together. First we render the 3D bounding box animation,
+# then we render the occlusion mask.
+node:{
+  calculator:"GlAnimationOverlayCalculator"
+  input_stream:"VIDEO:input_video_4x3"
+  input_stream:"MODEL_MATRICES:model_matrices"
+  input_stream:"MASK_MODEL_MATRICES:mask_model_matrices"
+  output_stream:"output_video"
+  input_side_packet:"TEXTURE:box_texture"
+  input_side_packet:"ANIMATION_ASSET:box_asset_name"
+  input_side_packet:"MASK_TEXTURE:obj_texture"
+  input_side_packet:"MASK_ASSET:obj_asset_name"
+  node_options: {
+    [type.googleapis.com/mediapipe.GlAnimationOverlayCalculatorOptions] {
+    # Output resolution is 480x640 with the aspect ratio of 0.75
+      aspect_ratio: 0.75
+      vertical_fov_degrees: 70.
+      animation_speed_fps: 25
+    }
+  }
+}
--- a/mediapipe/graphs/object_detection_3d/shoe_classic_occlusion_tracking.pbtxt
+++ b/mediapipe/graphs/object_detection_3d/shoe_classic_occlusion_tracking.pbtxt
@ -0,0 +1,134 @@
+# MediaPipe object detection 3D with tracking graph.
+
+# Images on GPU coming into and out of the graph.
+input_stream: "input_video"
+input_stream: "input_width"
+input_stream: "input_height"
+output_stream: "output_video"
+
+# Crops the image from the center to the size WIDTHxHEIGHT.
+node: {
+  calculator: "ImageCroppingCalculator"
+  input_stream: "IMAGE_GPU:input_video"
+  output_stream: "IMAGE_GPU:input_video_4x3"
+  input_stream: "WIDTH:input_width"
+  input_stream: "HEIGHT:input_height"
+  node_options: {
+    [type.googleapis.com/mediapipe.ImageCroppingCalculatorOptions] {
+      border_mode: BORDER_REPLICATE
+    }
+  }
+}
+
+# Creates a copy of the input_video stream. At the end of the graph, the
+# GlAnimationOverlayCalculator will consume the input_video texture and draws
+# on top of it.
+node: {
+  calculator: "GlScalerCalculator"
+  input_stream: "VIDEO:input_video_4x3"
+  output_stream: "VIDEO:input_video_copy"
+}
+
+# Resamples the images by specific frame rate. This calculator is used to
+# control the frequecy of subsequent calculators/subgraphs, e.g. less power
+# consumption for expensive process.
+node {
+  calculator: "PacketResamplerCalculator"
+  input_stream: "DATA:input_video_copy"
+  output_stream: "DATA:sampled_input_video"
+  node_options: {
+    [type.googleapis.com/mediapipe.PacketResamplerCalculatorOptions] {
+      frame_rate: 5
+    }
+  }
+}
+
+node {
+  calculator: "ObjectronDetectionSubgraphGpu"
+  input_stream: "IMAGE_GPU:sampled_input_video"
+  output_stream: "ANNOTATIONS:objects"
+}
+
+node {
+  calculator: "ObjectronTrackingSubgraphGpu"
+  input_stream: "FRAME_ANNOTATION:objects"
+  input_stream: "IMAGE_GPU:input_video_copy"
+  output_stream: "LIFTED_FRAME_ANNOTATION:lifted_tracked_objects"
+}
+
+# The rendering nodes:
+# We are rendering two meshes: 1) a 3D bounding box, which we overlay directly
+# on the texture, and 2) a shoe CAD model, which we use as an occlusion mask.
+# These models are designed using different tools, so we supply a transformation
+# to bring both of them to the Objectron's coordinate system.
+
+# Creates a model matrices for the tracked object given the lifted 3D points.
+# This calculator does two things: 1) Estimates object's pose (orientation,
+# translation, and scale) from the 3D vertices, and
+# 2) bring the object from the objectron's coordinate system to the renderer
+# (OpenGL) coordinate system. Since the final goal is to render a mesh file on
+# top of the object, we also supply a transformation to bring the mesh to the
+# objectron's coordinate system, and rescale mesh to the unit size.
+node {
+  calculator: "AnnotationsToModelMatricesCalculator"
+  input_stream: "ANNOTATIONS:lifted_tracked_objects"
+  output_stream: "MODEL_MATRICES:model_matrices"
+  node_options: {
+    [type.googleapis.com/mediapipe.AnnotationsToModelMatricesCalculatorOptions] {
+      # Re-scale the CAD model to the size of a unit box
+      model_scale: [0.05, 0.05, 0.05]
+      # Bring the box CAD model to objectron's coordinate system. This
+      # is equivalent of -pi/2 rotation along the y-axis (right-hand rule):
+      # Eigen::AngleAxisf(-M_PI / 2., Eigen::Vector3f::UnitY())
+      model_transformation: [0.0,  0.0, -1.0,  0.0]
+      model_transformation: [0.0,  1.0,  0.0,  0.0]
+      model_transformation: [1.0,  0.0,  0.0,  0.0]
+      model_transformation: [0.0,  0.0,  0.0,  1.0]
+    }
+  }
+}
+
+# Compute the model matrices for the CAD model of the shoe, to be used as an
+# occlusion mask. The model will be rendered at the exact same location as the
+# bounding box.
+node {
+  calculator: "AnnotationsToModelMatricesCalculator"
+  input_stream: "ANNOTATIONS:lifted_tracked_objects"
+  output_stream: "MODEL_MATRICES:mask_model_matrices"
+  #input_side_packet: "MODEL_SCALE:model_scale"
+  node_options: {
+    [type.googleapis.com/mediapipe.AnnotationsToModelMatricesCalculatorOptions] {
+      # Re-scale the CAD model to the size of a unit box
+      model_scale: [0.45, 0.25, 0.15]
+      # Bring the shoe CAD model to Deep Pursuit 3D's coordinate system. This
+      # is equivalent of -pi/2 rotation along the x-axis (right-hand rule):
+      # Eigen::AngleAxisf(-M_PI / 2., Eigen::Vector3f::UnitX())
+      model_transformation: [1.0,  0.0,  0.0,  0.0]
+      model_transformation: [0.0,  0.0, 1.0,  0.0]
+      model_transformation: [0.0,  -1.0,  0.0,  0.0]
+      model_transformation: [0.0,  0.0,  0.0,  1.0]
+    }
+  }
+}
+
+# Render everything together. First we render the 3D bounding box animation,
+# then we render the occlusion mask.
+node: {
+  calculator: "GlAnimationOverlayCalculator"
+  input_stream: "VIDEO:input_video_4x3"
+  input_stream: "MODEL_MATRICES:model_matrices"
+  input_stream: "MASK_MODEL_MATRICES:mask_model_matrices"
+  output_stream: "output_video"
+  input_side_packet: "TEXTURE:box_texture"
+  input_side_packet: "ANIMATION_ASSET:box_asset_name"
+  input_side_packet: "MASK_TEXTURE:obj_texture"
+  input_side_packet: "MASK_ASSET:obj_asset_name"
+  node_options: {
+    [type.googleapis.com/mediapipe.GlAnimationOverlayCalculatorOptions] {
+      # Output resolution is 480x640 with the aspect ratio of 0.75
+      aspect_ratio: 0.75
+      vertical_fov_degrees: 70.
+      animation_speed_fps: 25
+    }
+  }
+}
--- a/mediapipe/graphs/object_detection_3d/subgraphs/BUILD
+++ b/mediapipe/graphs/object_detection_3d/subgraphs/BUILD
@ -0,0 +1,52 @@
+# Copyright 2020 The MediaPipe Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+load(
+    "//mediapipe/framework/tool:mediapipe_graph.bzl",
+    "mediapipe_simple_subgraph",
+)
+
+licenses(["notice"])  # Apache 2.0
+
+package(default_visibility = ["//visibility:public"])
+
+mediapipe_simple_subgraph(
+    name = "objectron_detection_gpu",
+    graph = "objectron_detection_gpu.pbtxt",
+    register_as = "ObjectronDetectionSubgraphGpu",
+    deps = [
+        "//mediapipe/calculators/image:image_transformation_calculator",
+        "//mediapipe/calculators/tflite:tflite_converter_calculator",
+        "//mediapipe/calculators/tflite:tflite_custom_op_resolver_calculator",
+        "//mediapipe/calculators/tflite:tflite_inference_calculator",
+        "//mediapipe/graphs/object_detection_3d/calculators:tflite_tensors_to_objects_calculator",
+    ],
+)
+
+mediapipe_simple_subgraph(
+    name = "objectron_tracking_gpu",
+    graph = "objectron_tracking_gpu.pbtxt",
+    register_as = "ObjectronTrackingSubgraphGpu",
+    deps = [
+        "//mediapipe/calculators/image:image_transformation_calculator",
+        "//mediapipe/calculators/video:box_tracker_calculator",
+        "//mediapipe/calculators/video:flow_packager_calculator",
+        "//mediapipe/calculators/video:motion_analysis_calculator",
+        "//mediapipe/framework/stream_handler:sync_set_input_stream_handler",
+        "//mediapipe/gpu:gpu_buffer_to_image_frame_calculator",
+        "//mediapipe/graphs/object_detection_3d/calculators:frame_annotation_to_timed_box_list_calculator",
+        "//mediapipe/graphs/object_detection_3d/calculators:frame_annotation_tracker_calculator",
+        "//mediapipe/graphs/object_detection_3d/calculators:lift_2d_frame_annotation_to_3d_calculator",
+    ],
+)
--- a/mediapipe/graphs/object_detection_3d/subgraphs/objectron_detection_gpu.pbtxt
+++ b/mediapipe/graphs/object_detection_3d/subgraphs/objectron_detection_gpu.pbtxt
@ -0,0 +1,81 @@
+# MediaPipe Objectron detection gpu subgraph
+
+type: "ObjectronDetectionSubgraphGpu"
+
+input_stream: "IMAGE_GPU:input_video"
+output_stream: "ANNOTATIONS:objects"
+
+# Transforms the input image on GPU to a 480x640 image. To scale the input
+# image, the scale_mode option is set to FIT to preserve the aspect ratio,
+# resulting in potential letterboxing in the transformed image.
+node: {
+  calculator: "ImageTransformationCalculator"
+  input_stream: "IMAGE_GPU:input_video"
+  output_stream: "IMAGE_GPU:transformed_input_video"
+  node_options: {
+    [type.googleapis.com/mediapipe.ImageTransformationCalculatorOptions] {
+      output_width: 480
+      output_height: 640
+      scale_mode: FIT
+    }
+  }
+}
+
+# Converts the transformed input image on GPU into an image tensor stored as a
+# TfLiteTensor.
+node {
+  calculator: "TfLiteConverterCalculator"
+  input_stream: "IMAGE_GPU:transformed_input_video"
+  output_stream: "TENSORS_GPU:image_tensor"
+}
+
+# Generates a single side packet containing a TensorFlow Lite op resolver that
+# supports custom ops needed by the model used in this graph.
+node {
+  calculator: "TfLiteCustomOpResolverCalculator"
+  output_side_packet: "opresolver"
+  node_options: {
+    [type.googleapis.com/mediapipe.TfLiteCustomOpResolverCalculatorOptions] {
+      use_gpu: true
+    }
+  }
+}
+
+# Runs a TensorFlow Lite model on GPU that takes an image tensor and outputs a
+# vector of tensors representing, for instance, detection boxes/keypoints and
+# scores.
+node {
+  calculator: "TfLiteInferenceCalculator"
+  input_stream: "TENSORS_GPU:image_tensor"
+  output_stream: "TENSORS:detection_tensors"
+  input_side_packet: "CUSTOM_OP_RESOLVER:opresolver"
+  node_options: {
+    [type.googleapis.com/mediapipe.TfLiteInferenceCalculatorOptions] {
+      model_path: "object_detection_3d.tflite"
+    }
+  }
+}
+
+# Decodes the model's output tensor (the heatmap and the distance fields) to 2D
+# keypoints. There are nine 2D keypoints: one center keypoint and eight vertices
+# for the 3D bounding box. The calculator parameters determine's the decoder's
+# sensitivity.
+node {
+  calculator: "TfLiteTensorsToObjectsCalculator"
+  input_stream: "TENSORS:detection_tensors"
+  output_stream: "ANNOTATIONS:objects"
+  node_options: {
+    [type.googleapis.com/mediapipe.TfLiteTensorsToObjectsCalculatorOptions] {
+      num_classes: 1
+      num_keypoints: 9
+      decoder_config {
+        heatmap_threshold: 0.6
+        local_max_distance: 2
+        offset_scale_coef: 1.0
+        voting_radius: 2
+        voting_allowance: 1
+        voting_threshold: 0.2
+      }
+    }
+  }
+}
--- a/mediapipe/graphs/object_detection_3d/subgraphs/objectron_tracking_gpu.pbtxt
+++ b/mediapipe/graphs/object_detection_3d/subgraphs/objectron_tracking_gpu.pbtxt
@ -0,0 +1,170 @@
+# MediaPipe Objectron tracking gpu subgraph
+
+type: "ObjectronTrackingSubgraphGpu"
+
+input_stream: "FRAME_ANNOTATION:objects"
+input_stream: "IMAGE_GPU:input_video"
+output_stream: "LIFTED_FRAME_ANNOTATION:lifted_tracked_objects"
+
+
+# Converts the detected keypoints to Boxes, used by the tracking subgraph.
+node {
+  calculator: "FrameAnnotationToTimedBoxListCalculator"
+  input_stream: "FRAME_ANNOTATION:objects"
+  output_stream: "BOXES:start_pos"
+}
+
+node: {
+  calculator: "ImageTransformationCalculator"
+  input_stream: "IMAGE_GPU:input_video"
+  output_stream: "IMAGE_GPU:downscaled_input_video"
+  node_options: {
+    [type.googleapis.com/mediapipe.ImageTransformationCalculatorOptions] {
+      output_width: 240
+      output_height: 320
+    }
+  }
+}
+
+# Converts GPU buffer to ImageFrame for processing tracking.
+node: {
+  calculator: "GpuBufferToImageFrameCalculator"
+  input_stream: "downscaled_input_video"
+  output_stream: "downscaled_input_video_cpu"
+}
+
+# Performs motion analysis on an incoming video stream.
+node: {
+  calculator: "MotionAnalysisCalculator"
+  input_stream: "VIDEO:downscaled_input_video_cpu"
+  output_stream: "CAMERA:camera_motion"
+  output_stream: "FLOW:region_flow"
+
+  node_options: {
+    [type.googleapis.com/mediapipe.MotionAnalysisCalculatorOptions]: {
+      analysis_options {
+        analysis_policy: ANALYSIS_POLICY_CAMERA_MOBILE
+        flow_options {
+          fast_estimation_min_block_size: 100
+          top_inlier_sets: 1
+          frac_inlier_error_threshold: 3e-3
+          downsample_mode: DOWNSAMPLE_TO_INPUT_SIZE
+          verification_distance: 5.0
+          verify_long_feature_acceleration: true
+          verify_long_feature_trigger_ratio: 0.1
+          tracking_options {
+            max_features: 500
+            adaptive_extraction_levels: 2
+            min_eig_val_settings {
+              adaptive_lowest_quality_level: 2e-4
+            }
+            klt_tracker_implementation: KLT_OPENCV
+          }
+        }
+      }
+    }
+  }
+}
+
+# Reads optical flow fields defined in
+# mediapipe/framework/formats/motion/optical_flow_field.h,
+# returns a VideoFrame with 2 channels (v_x and v_y), each channel is quantized
+# to 0-255.
+node: {
+  calculator: "FlowPackagerCalculator"
+  input_stream: "FLOW:region_flow"
+  input_stream: "CAMERA:camera_motion"
+  output_stream: "TRACKING:tracking_data"
+
+  node_options: {
+    [type.googleapis.com/mediapipe.FlowPackagerCalculatorOptions]: {
+      flow_packager_options: {
+        binary_tracking_data_support: false
+      }
+    }
+  }
+}
+
+# Tracks box positions over time.
+node: {
+  calculator: "BoxTrackerCalculator"
+  input_stream: "TRACKING:tracking_data"
+  input_stream: "TRACK_TIME:input_video"
+  input_stream: "START_POS:start_pos"
+  input_stream: "CANCEL_OBJECT_ID:cancel_object_id"
+  input_stream_info: {
+    tag_index: "CANCEL_OBJECT_ID"
+    back_edge: true
+  }
+  output_stream: "BOXES:boxes"
+
+  input_stream_handler {
+    input_stream_handler: "SyncSetInputStreamHandler"
+    options {
+      [mediapipe.SyncSetInputStreamHandlerOptions.ext] {
+        sync_set {
+          tag_index: "TRACKING"
+          tag_index: "TRACK_TIME"
+        }
+        sync_set {
+          tag_index: "START_POS"
+        }
+        sync_set {
+          tag_index: "CANCEL_OBJECT_ID"
+        }
+      }
+    }
+  }
+
+  node_options: {
+    [type.googleapis.com/mediapipe.BoxTrackerCalculatorOptions]: {
+      tracker_options: {
+        track_step_options {
+          track_object_and_camera: true
+          tracking_degrees: TRACKING_DEGREE_OBJECT_ROTATION_SCALE
+          inlier_spring_force: 0.0
+          static_motion_temporal_ratio: 3e-2
+        }
+      }
+      visualize_tracking_data: false
+      streaming_track_data_cache_size: 100
+    }
+  }
+}
+
+# Consolidates tracking and detection results.
+node {
+  calculator: "FrameAnnotationTrackerCalculator"
+  input_stream: "FRAME_ANNOTATION:objects"
+  input_stream: "TRACKED_BOXES:boxes"
+  output_stream: "TRACKED_FRAME_ANNOTATION:tracked_objects"
+  output_stream: "CANCEL_OBJECT_ID:cancel_object_id"
+  node_options: {
+    [type.googleapis.com/mediapipe.FrameAnnotationTrackerCalculatorOptions] {
+      img_width: 240
+      img_height: 320
+      iou_threshold: 0.1
+    }
+  }
+
+  input_stream_handler {
+    input_stream_handler: "SyncSetInputStreamHandler"
+    options {
+      [mediapipe.SyncSetInputStreamHandlerOptions.ext] {
+        sync_set {
+          tag_index: "FRAME_ANNOTATION"
+        }
+        sync_set {
+          tag_index: "TRACKED_BOXES"
+        }
+      }
+    }
+  }
+}
+
+# Lift the tracked 2D keypoints to 3D using EPnP algorithm.
+node {
+  calculator: "Lift2DFrameAnnotationTo3DCalculator"
+  input_stream: "FRAME_ANNOTATION:tracked_objects"
+  output_stream: "LIFTED_FRAME_ANNOTATION:lifted_tracked_objects"
+}
--- a/mediapipe/java/com/google/mediapipe/components/CameraXPreviewHelper.java
+++ b/mediapipe/java/com/google/mediapipe/components/CameraXPreviewHelper.java
@ -67,15 +67,19 @@ public class CameraXPreviewHelper extends CameraHelper {
  private int cameraTimestampSource = CameraCharacteristics.SENSOR_INFO_TIMESTAMP_SOURCE_UNKNOWN;

  @Override
-  @SuppressWarnings("RestrictTo") // See b/132705545.
  public void startCamera(
      Activity context, CameraFacing cameraFacing, SurfaceTexture surfaceTexture) {
+    startCamera(context, cameraFacing, surfaceTexture, TARGET_SIZE);
+  }
+
+  public void startCamera(
+      Activity context, CameraFacing cameraFacing, SurfaceTexture surfaceTexture, Size targetSize) {
    LensFacing cameraLensFacing =
        cameraFacing == CameraHelper.CameraFacing.FRONT ? LensFacing.FRONT : LensFacing.BACK;
    PreviewConfig previewConfig =
        new PreviewConfig.Builder()
            .setLensFacing(cameraLensFacing)
-            .setTargetResolution(TARGET_SIZE)
+            .setTargetResolution(targetSize)
            .build();
    preview = new Preview(previewConfig);

@ -110,7 +114,6 @@ public class CameraXPreviewHelper extends CameraHelper {
          }
        });
    CameraX.bindToLifecycle(/*lifecycleOwner=*/ (LifecycleOwner) context, preview);
-
  }

  @Override
@ -210,6 +213,10 @@ public class CameraXPreviewHelper extends CameraHelper {
    return focalLengthPixels;
  }

+  public Size getFrameSize() {
+    return frameSize;
+  }
+
  // Computes the focal length of the camera in pixels based on lens and sensor properties.
  private float calculateFocalLengthInPixels() {
    // Focal length of the camera in millimeters.
--- a/mediapipe/models/object_detection_3d_chair.tflite
+++ b/mediapipe/models/object_detection_3d_chair.tflite
--- a/mediapipe/models/object_detection_3d_sneakers.tflite
+++ b/mediapipe/models/object_detection_3d_sneakers.tflite
--- a/mediapipe/util/tflite/BUILD
+++ b/mediapipe/util/tflite/BUILD
@ -41,3 +41,37 @@ cc_library(
        "@org_tensorflow//tensorflow/lite/kernels:builtin_ops",
    ],
 )
+
+cc_library(
+    name = "tensor_buffer",
+    srcs = ["tensor_buffer.cc"],
+    hdrs = ["tensor_buffer.h"],
+    deps = [
+        "@org_tensorflow//tensorflow/lite:framework",
+        "@com_google_absl//absl/memory",
+        "//mediapipe/framework:port",
+    ] + select({
+        "//mediapipe/gpu:disable_gpu": [],
+        "//mediapipe:ios": [
+            "//mediapipe/gpu:MPPMetalUtil",
+            "//mediapipe/gpu:gl_base",
+        ],
+        "//conditions:default": [
+            "@org_tensorflow//tensorflow/lite/delegates/gpu/gl:gl_buffer",
+            "//mediapipe/gpu:gl_base",
+            "//mediapipe/gpu:gl_context",
+        ],
+    }),
+)
+
+cc_test(
+    name = "tensor_buffer_test",
+    srcs = ["tensor_buffer_test.cc"],
+    deps = [
+        ":tensor_buffer",
+        "//mediapipe/framework/port:gtest_main",
+    ] + select({
+        "//mediapipe/gpu:disable_gpu": [],
+        "//conditions:default": [],
+    }),
+)
--- a/mediapipe/util/tflite/tensor_buffer.cc
+++ b/mediapipe/util/tflite/tensor_buffer.cc
@ -0,0 +1,43 @@
+#include "mediapipe/util/tflite/tensor_buffer.h"
+
+namespace mediapipe {
+
+TensorBuffer::TensorBuffer() {}
+
+TensorBuffer::~TensorBuffer() { uses_gpu_ = false; }
+
+TensorBuffer::TensorBuffer(TfLiteTensor& tensor) {
+  cpu_ = tensor;
+  uses_gpu_ = false;
+}
+
+#if !defined(MEDIAPIPE_DISABLE_GL_COMPUTE)
+TensorBuffer::TensorBuffer(std::shared_ptr<tflite::gpu::gl::GlBuffer> tensor) {
+  gpu_ = std::move(tensor);
+  uses_gpu_ = true;
+}
+// static
+std::shared_ptr<tflite::gpu::gl::GlBuffer> TensorBuffer::CreateGlBuffer(
+    std::shared_ptr<mediapipe::GlContext> context) {
+  std::shared_ptr<tflite::gpu::gl::GlBuffer> ptr(
+      new tflite::gpu::gl::GlBuffer, [context](tflite::gpu::gl::GlBuffer* ref) {
+        if (context) {
+          context->Run([ref]() {
+            if (ref) delete ref;
+          });
+        } else {
+          if (ref) delete ref;  // No context provided.
+        }
+      });
+  return ptr;
+}
+#endif  // MEDIAPIPE_DISABLE_GL_COMPUTE
+
+#if defined(MEDIAPIPE_IOS)
+TensorBuffer::TensorBuffer(id<MTLBuffer> tensor) {
+  gpu_ = tensor;
+  uses_gpu_ = true;
+}
+#endif  // MEDIAPIPE_IOS
+
+}  // namespace mediapipe
--- a/Show More
+++ b/Show More