From 1722d4b8a25ad7c919576f9b1bab4ffa7a9299bc Mon Sep 17 00:00:00 2001
From: MediaPipe Team <mediapipe-team@google.com>
Date: Fri, 20 Mar 2020 13:09:58 -0700
Subject: [PATCH] Project import generated by Copybara.

GitOrigin-RevId: 43cd697ec87dcc5cab5051f27960bb77a057399d
---
 WORKSPACE                                     |    6 +-
 mediapipe/calculators/core/BUILD              |   13 +-
 .../begin_end_loop_calculator_graph_test.cc   |  389 ++-
 .../calculators/core/begin_loop_calculator.cc |    6 +
 .../calculators/core/begin_loop_calculator.h  |   20 +-
 .../core/clip_vector_size_calculator.cc       |    5 +
 .../calculators/core/end_loop_calculator.cc   |    5 +
 .../core/previous_loopback_calculator.cc      |  148 +-
 .../core/previous_loopback_calculator_test.cc |  551 ++-
 .../core/split_vector_calculator.cc           |   10 +
 mediapipe/calculators/image/BUILD             |    3 +
 .../calculators/image/recolor_calculator.cc   |   79 +-
 mediapipe/calculators/tensorflow/BUILD        |    1 +
 mediapipe/calculators/tflite/BUILD            |   41 +
 .../tflite/tflite_inference_calculator.cc     |  131 +-
 .../tflite/tflite_inference_calculator.proto  |    6 +
 .../tflite_inference_calculator_test.cc       |   85 +-
 .../tflite/tflite_model_calculator.cc         |   86 +
 .../tflite/tflite_model_calculator_test.cc    |   88 +
 ...te_tensors_to_classification_calculator.cc |   39 +-
 ...tensors_to_classification_calculator.proto |    6 +
 mediapipe/calculators/util/BUILD              |   14 +
 .../collection_has_min_size_calculator.cc     |    8 +
 ...collection_has_min_size_calculator_test.cc |  156 +
 .../util/filter_collection_calculator.cc      |    5 +
 .../landmarks_to_render_data_calculator.cc    |  233 +-
 .../util/packet_latency_calculator_test.cc    |    1 +
 mediapipe/calculators/video/BUILD             |   26 +-
 .../java/com/google/mediapipe/apps/METADATA   |    7 +
 .../polynomial_regression_path_solver.cc      |   11 +-
 .../polynomial_regression_path_solver.h       |    4 +-
 .../examples/desktop/hair_segmentation/BUILD  |   14 +
 mediapipe/framework/BUILD                     |    1 +
 mediapipe/framework/calculator_contract.h     |   27 +-
 mediapipe/framework/calculator_graph.h        |    2 +-
 .../framework/calculator_graph_bounds_test.cc |  275 +-
 mediapipe/framework/calculator_node.cc        |    9 +
 mediapipe/framework/deps/registration.cc      |    4 +
 mediapipe/framework/formats/annotation/BUILD  |    7 +
 .../formats/annotation/rasterization.proto    |    3 +
 .../framework/formats/location_data.proto     |    3 +
 mediapipe/framework/input_stream_handler.cc   |   90 +
 mediapipe/framework/input_stream_handler.h    |   62 +-
 .../framework/legacy_calculator_support.h     |    3 +
 mediapipe/framework/output_stream_handler.h   |    7 +-
 mediapipe/framework/output_stream_poller.h    |    3 +
 .../testdata/profile_latency_test.pbtxt       |   97 +
 .../testdata/profile_process_test.pbtxt       |  122 +
 mediapipe/framework/scheduler_queue.cc        |    4 +
 .../default_input_stream_handler.cc           |   53 +-
 .../default_input_stream_handler.h            |    3 +
 .../immediate_input_stream_handler.cc         |   79 +-
 .../sync_set_input_stream_handler.cc          |   70 +-
 mediapipe/framework/timestamp.cc              |    1 -
 mediapipe/framework/timestamp.h               |    6 +
 mediapipe/framework/validated_graph_config.cc |   21 +-
 mediapipe/gpu/gl_base.h                       |    5 +-
 mediapipe/gpu/gl_calculator_helper_impl.h     |    4 +
 .../gpu/gl_calculator_helper_impl_common.cc   |   17 +-
 mediapipe/gpu/gl_context.cc                   |   72 +-
 mediapipe/gpu/gl_context.h                    |   10 +
 mediapipe/gpu/gl_simple_shaders.cc            |   26 +-
 mediapipe/graphs/hair_segmentation/BUILD      |   21 +-
 .../hair_segmentation_desktop_live.pbtxt      |  152 +
 .../com/google/mediapipe/mediapipe_aar.bzl    |   30 +
 mediapipe/objc/BUILD                          |   49 +-
 mediapipe/util/annotation_renderer.cc         |    2 +-
 mediapipe/util/sequence/BUILD                 |    2 -
 setup_opencv.sh                               |    6 +-
 ...6366bcadab23a25c773b3ed405bac8ded4d0d.diff |  112 +
 ...31e324c8de6b52f752a39cb161d99d853ca99.diff | 3083 +++++++++++++++++
 71 files changed, 6114 insertions(+), 626 deletions(-)
 create mode 100644 mediapipe/calculators/tflite/tflite_model_calculator.cc
 create mode 100644 mediapipe/calculators/tflite/tflite_model_calculator_test.cc
 create mode 100644 mediapipe/calculators/util/collection_has_min_size_calculator_test.cc
 create mode 100644 mediapipe/examples/android/src/java/com/google/mediapipe/apps/METADATA
 create mode 100644 mediapipe/framework/profiler/testdata/profile_latency_test.pbtxt
 create mode 100644 mediapipe/framework/profiler/testdata/profile_process_test.pbtxt
 create mode 100644 mediapipe/graphs/hair_segmentation/hair_segmentation_desktop_live.pbtxt
 create mode 100644 third_party/org_tensorflow_9696366bcadab23a25c773b3ed405bac8ded4d0d.diff
 create mode 100644 third_party/org_tensorflow_cfc31e324c8de6b52f752a39cb161d99d853ca99.diff

diff --git a/WORKSPACE b/WORKSPACE
index 411f21b94..eb2b07c4d 100644
--- a/WORKSPACE
+++ b/WORKSPACE
@@ -129,7 +129,11 @@ http_archive(
     ],
     # A compatibility patch
     patches = [
-        "@//third_party:org_tensorflow_528e22eae8bf3206189a066032c66e9e5c9b4a61.diff"
+        "@//third_party:org_tensorflow_528e22eae8bf3206189a066032c66e9e5c9b4a61.diff",
+        # Updates for XNNPACK: https://github.com/tensorflow/tensorflow/commit/cfc31e324c8de6b52f752a39cb161d99d853ca99
+        "@//third_party:org_tensorflow_cfc31e324c8de6b52f752a39cb161d99d853ca99.diff",
+        # CpuInfo's build rule fixes.
+        "@//third_party:org_tensorflow_9696366bcadab23a25c773b3ed405bac8ded4d0d.diff",
     ],
     patch_args = [
         "-p1",
diff --git a/mediapipe/calculators/core/BUILD b/mediapipe/calculators/core/BUILD
index c3074c2c8..65c426489 100644
--- a/mediapipe/calculators/core/BUILD
+++ b/mediapipe/calculators/core/BUILD
@@ -228,6 +228,7 @@ cc_library(
         "//mediapipe/framework:calculator_framework",
         "//mediapipe/framework:collection_item_id",
         "//mediapipe/framework:packet",
+        "//mediapipe/framework/formats:detection_cc_proto",
         "//mediapipe/framework/formats:landmark_cc_proto",
         "//mediapipe/framework/formats:rect_cc_proto",
         "//mediapipe/framework/port:integral_types",
@@ -249,6 +250,7 @@ cc_library(
         "//mediapipe/framework:calculator_framework",
         "//mediapipe/framework:collection_item_id",
         "//mediapipe/framework:packet",
+        "//mediapipe/framework/formats:classification_cc_proto",
         "//mediapipe/framework/formats:landmark_cc_proto",
         "//mediapipe/framework/formats:rect_cc_proto",
         "//mediapipe/framework/port:integral_types",
@@ -265,10 +267,11 @@ cc_test(
     deps = [
         ":begin_loop_calculator",
         ":end_loop_calculator",
-        "//mediapipe/calculators/core:packet_cloner_calculator",
+        ":gate_calculator",
         "//mediapipe/framework:calculator_context",
         "//mediapipe/framework:calculator_contract",
         "//mediapipe/framework:calculator_framework",
+        "//mediapipe/framework:packet",
         "//mediapipe/framework/port:gtest_main",
         "//mediapipe/framework/port:integral_types",
         "//mediapipe/framework/port:parse_text_proto",
@@ -334,6 +337,7 @@ cc_library(
     deps = [
         ":clip_vector_size_calculator_cc_proto",
         "//mediapipe/framework:calculator_framework",
+        "//mediapipe/framework/formats:detection_cc_proto",
         "//mediapipe/framework/formats:rect_cc_proto",
         "//mediapipe/framework/port:ret_check",
         "//mediapipe/framework/port:status",
@@ -693,15 +697,17 @@ cc_test(
     name = "previous_loopback_calculator_test",
     srcs = ["previous_loopback_calculator_test.cc"],
     deps = [
+        ":gate_calculator",
+        ":make_pair_calculator",
+        ":pass_through_calculator",
         ":previous_loopback_calculator",
-        "//mediapipe/calculators/core:make_pair_calculator",
-        "//mediapipe/calculators/core:pass_through_calculator",
         "//mediapipe/framework:calculator_framework",
         "//mediapipe/framework:calculator_runner",
         "//mediapipe/framework:timestamp",
         "//mediapipe/framework/port:gtest_main",
         "//mediapipe/framework/port:integral_types",
         "//mediapipe/framework/port:parse_text_proto",
+        "//mediapipe/framework/port:status",
         "//mediapipe/framework/stream_handler:immediate_input_stream_handler",
         "//mediapipe/framework/tool:sink",
         "@com_google_absl//absl/time",
@@ -769,6 +775,7 @@ cc_library(
     visibility = ["//visibility:public"],
     deps = [
         ":split_vector_calculator_cc_proto",
+        "//mediapipe/framework/formats:detection_cc_proto",
         "//mediapipe/framework:calculator_framework",
         "//mediapipe/framework/formats:landmark_cc_proto",
         "//mediapipe/framework/formats:rect_cc_proto",
diff --git a/mediapipe/calculators/core/begin_end_loop_calculator_graph_test.cc b/mediapipe/calculators/core/begin_end_loop_calculator_graph_test.cc
index 03e1a4439..716151b69 100644
--- a/mediapipe/calculators/core/begin_end_loop_calculator_graph_test.cc
+++ b/mediapipe/calculators/core/begin_end_loop_calculator_graph_test.cc
@@ -20,6 +20,8 @@
 #include "mediapipe/calculators/core/end_loop_calculator.h"
 #include "mediapipe/framework/calculator_contract.h"
 #include "mediapipe/framework/calculator_framework.h"
+#include "mediapipe/framework/packet.h"
+#include "mediapipe/framework/port/gmock.h"
 #include "mediapipe/framework/port/gtest.h"
 #include "mediapipe/framework/port/integral_types.h"
 #include "mediapipe/framework/port/parse_text_proto.h"
@@ -28,6 +30,13 @@
 namespace mediapipe {
 namespace {
 
+MATCHER_P2(PacketOfIntsEq, timestamp, value, "") {
+  Timestamp actual_timestamp = arg.Timestamp();
+  const auto& actual_value = arg.template Get<std::vector<int>>();
+  return testing::Value(actual_timestamp, testing::Eq(timestamp)) &&
+         testing::Value(actual_value, testing::ElementsAreArray(value));
+}
+
 typedef BeginLoopCalculator<std::vector<int>> BeginLoopIntegerCalculator;
 REGISTER_CALCULATOR(BeginLoopIntegerCalculator);
 
@@ -59,8 +68,8 @@ REGISTER_CALCULATOR(EndLoopIntegersCalculator);
 
 class BeginEndLoopCalculatorGraphTest : public ::testing::Test {
  protected:
-  BeginEndLoopCalculatorGraphTest() {
-    graph_config_ = ParseTextProtoOrDie<CalculatorGraphConfig>(
+  void SetUp() override {
+    auto graph_config = ParseTextProtoOrDie<CalculatorGraphConfig>(
         R"(
           num_threads: 4
           input_stream: "ints"
@@ -82,94 +91,222 @@ class BeginEndLoopCalculatorGraphTest : public ::testing::Test {
             output_stream: "ITERABLE:ints_plus_one"
           }
         )");
-    tool::AddVectorSink("ints_plus_one", &graph_config_, &output_packets_);
+    tool::AddVectorSink("ints_plus_one", &graph_config, &output_packets_);
+    MP_ASSERT_OK(graph_.Initialize(graph_config));
+    MP_ASSERT_OK(graph_.StartRun({}));
   }
 
-  CalculatorGraphConfig graph_config_;
+  void SendPacketOfInts(Timestamp timestamp, std::vector<int> ints) {
+    MP_ASSERT_OK(graph_.AddPacketToInputStream(
+        "ints", MakePacket<std::vector<int>>(std::move(ints)).At(timestamp)));
+  }
+
+  CalculatorGraph graph_;
   std::vector<Packet> output_packets_;
 };
 
+TEST_F(BeginEndLoopCalculatorGraphTest, InputStreamForIterableIsEmpty) {
+  MP_ASSERT_OK(graph_.WaitUntilIdle());
+
+  // EndLoopCalc will forward the timestamp bound because there are no packets
+  // to process.
+  ASSERT_EQ(0, output_packets_.size());
+
+  MP_ASSERT_OK(graph_.CloseAllPacketSources());
+  MP_ASSERT_OK(graph_.WaitUntilDone());
+}
+
 TEST_F(BeginEndLoopCalculatorGraphTest, SingleEmptyVector) {
-  CalculatorGraph graph;
-  MP_EXPECT_OK(graph.Initialize(graph_config_));
-  MP_EXPECT_OK(graph.StartRun({}));
-  auto input_vector = absl::make_unique<std::vector<int>>();
-  Timestamp input_timestamp = Timestamp(0);
-  MP_ASSERT_OK(graph.AddPacketToInputStream(
-      "ints", Adopt(input_vector.release()).At(input_timestamp)));
-  MP_ASSERT_OK(graph.WaitUntilIdle());
+  SendPacketOfInts(Timestamp(0), {});
+  MP_ASSERT_OK(graph_.WaitUntilIdle());
 
   // EndLoopCalc will forward the timestamp bound because there are no elements
   // in collection to output.
-  ASSERT_EQ(0, output_packets_.size());
+  EXPECT_TRUE(output_packets_.empty());
 
-  MP_ASSERT_OK(graph.CloseAllPacketSources());
-  MP_ASSERT_OK(graph.WaitUntilDone());
+  MP_ASSERT_OK(graph_.CloseAllPacketSources());
+  MP_ASSERT_OK(graph_.WaitUntilDone());
 }
 
 TEST_F(BeginEndLoopCalculatorGraphTest, SingleNonEmptyVector) {
-  CalculatorGraph graph;
-  MP_EXPECT_OK(graph.Initialize(graph_config_));
-  MP_EXPECT_OK(graph.StartRun({}));
-  auto input_vector = absl::make_unique<std::vector<int>>();
-  input_vector->emplace_back(0);
-  input_vector->emplace_back(1);
-  input_vector->emplace_back(2);
   Timestamp input_timestamp = Timestamp(0);
-  MP_ASSERT_OK(graph.AddPacketToInputStream(
-      "ints", Adopt(input_vector.release()).At(input_timestamp)));
-  MP_ASSERT_OK(graph.WaitUntilIdle());
+  SendPacketOfInts(input_timestamp, {0, 1, 2});
+  MP_ASSERT_OK(graph_.WaitUntilIdle());
 
-  ASSERT_EQ(1, output_packets_.size());
-  EXPECT_EQ(input_timestamp, output_packets_[0].Timestamp());
-  std::vector<int> expected_output_vector = {1, 2, 3};
-  EXPECT_EQ(expected_output_vector, output_packets_[0].Get<std::vector<int>>());
+  EXPECT_THAT(output_packets_,
+              testing::ElementsAre(
+                  PacketOfIntsEq(input_timestamp, std::vector<int>{1, 2, 3})));
 
-  MP_ASSERT_OK(graph.CloseAllPacketSources());
-  MP_ASSERT_OK(graph.WaitUntilDone());
+  MP_ASSERT_OK(graph_.CloseAllPacketSources());
+  MP_ASSERT_OK(graph_.WaitUntilDone());
 }
 
 TEST_F(BeginEndLoopCalculatorGraphTest, MultipleVectors) {
-  CalculatorGraph graph;
-  MP_EXPECT_OK(graph.Initialize(graph_config_));
-  MP_EXPECT_OK(graph.StartRun({}));
-
-  auto input_vector0 = absl::make_unique<std::vector<int>>();
-  input_vector0->emplace_back(0);
-  input_vector0->emplace_back(1);
   Timestamp input_timestamp0 = Timestamp(0);
-  MP_ASSERT_OK(graph.AddPacketToInputStream(
-      "ints", Adopt(input_vector0.release()).At(input_timestamp0)));
+  SendPacketOfInts(input_timestamp0, {0, 1});
 
-  auto input_vector1 = absl::make_unique<std::vector<int>>();
   Timestamp input_timestamp1 = Timestamp(1);
-  MP_ASSERT_OK(graph.AddPacketToInputStream(
-      "ints", Adopt(input_vector1.release()).At(input_timestamp1)));
+  SendPacketOfInts(input_timestamp1, {});
 
-  auto input_vector2 = absl::make_unique<std::vector<int>>();
-  input_vector2->emplace_back(2);
-  input_vector2->emplace_back(3);
   Timestamp input_timestamp2 = Timestamp(2);
-  MP_ASSERT_OK(graph.AddPacketToInputStream(
-      "ints", Adopt(input_vector2.release()).At(input_timestamp2)));
+  SendPacketOfInts(input_timestamp2, {2, 3});
 
-  MP_ASSERT_OK(graph.CloseAllPacketSources());
-  MP_ASSERT_OK(graph.WaitUntilDone());
-
-  ASSERT_EQ(2, output_packets_.size());
-
-  EXPECT_EQ(input_timestamp0, output_packets_[0].Timestamp());
-  std::vector<int> expected_output_vector0 = {1, 2};
-  EXPECT_EQ(expected_output_vector0,
-            output_packets_[0].Get<std::vector<int>>());
+  MP_ASSERT_OK(graph_.CloseAllPacketSources());
+  MP_ASSERT_OK(graph_.WaitUntilDone());
 
   // At input_timestamp1, EndLoopCalc will forward timestamp bound as there are
   // no elements in vector to process.
+  EXPECT_THAT(output_packets_,
+              testing::ElementsAre(
+                  PacketOfIntsEq(input_timestamp0, std::vector<int>{1, 2}),
+                  PacketOfIntsEq(input_timestamp2, std::vector<int>{3, 4})));
+}
 
-  EXPECT_EQ(input_timestamp2, output_packets_[1].Timestamp());
-  std::vector<int> expected_output_vector2 = {3, 4};
-  EXPECT_EQ(expected_output_vector2,
-            output_packets_[1].Get<std::vector<int>>());
+// Passes non empty vector through or outputs empty vector in case of timestamp
+// bound update.
+class PassThroughOrEmptyVectorCalculator : public CalculatorBase {
+ public:
+  static ::mediapipe::Status GetContract(CalculatorContract* cc) {
+    cc->SetProcessTimestampBounds(true);
+    cc->Inputs().Index(0).Set<std::vector<int>>();
+    cc->Outputs().Index(0).Set<std::vector<int>>();
+    return ::mediapipe::OkStatus();
+  }
+
+  ::mediapipe::Status Open(CalculatorContext* cc) override {
+    cc->SetOffset(TimestampDiff(0));
+    return ::mediapipe::OkStatus();
+  }
+
+  ::mediapipe::Status Process(CalculatorContext* cc) override {
+    if (!cc->Inputs().Index(0).IsEmpty()) {
+      cc->Outputs().Index(0).AddPacket(cc->Inputs().Index(0).Value());
+    } else {
+      cc->Outputs().Index(0).AddPacket(
+          MakePacket<std::vector<int>>(std::vector<int>())
+              .At(cc->InputTimestamp()));
+    }
+    return ::mediapipe::OkStatus();
+  }
+};
+
+REGISTER_CALCULATOR(PassThroughOrEmptyVectorCalculator);
+
+class BeginEndLoopCalculatorGraphProcessingEmptyPacketsTest
+    : public ::testing::Test {
+ protected:
+  void SetUp() override {
+    auto graph_config = ParseTextProtoOrDie<CalculatorGraphConfig>(
+        R"(
+          num_threads: 4
+          input_stream: "ints"
+          input_stream: "force_ints_to_be_timestamp_bound_update"
+          node {
+            calculator: "GateCalculator"
+            input_stream: "ints"
+            input_stream: "DISALLOW:force_ints_to_be_timestamp_bound_update"
+            output_stream: "ints_passed_through"
+          }
+          node {
+            calculator: "BeginLoopIntegerCalculator"
+            input_stream: "ITERABLE:ints_passed_through"
+            output_stream: "ITEM:int"
+            output_stream: "BATCH_END:timestamp"
+          }
+          node {
+            calculator: "IncrementCalculator"
+            input_stream: "int"
+            output_stream: "int_plus_one"
+          }
+          node {
+            calculator: "EndLoopIntegersCalculator"
+            input_stream: "ITEM:int_plus_one"
+            input_stream: "BATCH_END:timestamp"
+            output_stream: "ITERABLE:ints_plus_one"
+          }
+          node {
+            calculator: "PassThroughOrEmptyVectorCalculator"
+            input_stream: "ints_plus_one"
+            output_stream: "ints_plus_one_passed_through"
+          }
+        )");
+    tool::AddVectorSink("ints_plus_one_passed_through", &graph_config,
+                        &output_packets_);
+    MP_ASSERT_OK(graph_.Initialize(graph_config));
+    MP_ASSERT_OK(graph_.StartRun({}));
+  }
+
+  void SendPacketOfIntsOrBound(Timestamp timestamp, std::vector<int> ints) {
+    // All "ints" packets which are empty are forced to be just timestamp
+    // bound updates for begin loop calculator.
+    bool force_ints_to_be_timestamp_bound_update = ints.empty();
+    MP_ASSERT_OK(graph_.AddPacketToInputStream(
+        "force_ints_to_be_timestamp_bound_update",
+        MakePacket<bool>(force_ints_to_be_timestamp_bound_update)
+            .At(timestamp)));
+    MP_ASSERT_OK(graph_.AddPacketToInputStream(
+        "ints", MakePacket<std::vector<int>>(std::move(ints)).At(timestamp)));
+  }
+
+  CalculatorGraph graph_;
+  std::vector<Packet> output_packets_;
+};
+
+TEST_F(BeginEndLoopCalculatorGraphProcessingEmptyPacketsTest,
+       SingleEmptyVector) {
+  SendPacketOfIntsOrBound(Timestamp(0), {});
+  MP_ASSERT_OK(graph_.WaitUntilIdle());
+
+  EXPECT_THAT(output_packets_, testing::ElementsAre(PacketOfIntsEq(
+                                   Timestamp(0), std::vector<int>{})));
+
+  MP_ASSERT_OK(graph_.CloseAllPacketSources());
+  MP_ASSERT_OK(graph_.WaitUntilDone());
+}
+
+TEST_F(BeginEndLoopCalculatorGraphProcessingEmptyPacketsTest,
+       SingleNonEmptyVector) {
+  SendPacketOfIntsOrBound(Timestamp(0), {0, 1, 2});
+  MP_ASSERT_OK(graph_.WaitUntilIdle());
+
+  EXPECT_THAT(output_packets_, testing::ElementsAre(PacketOfIntsEq(
+                                   Timestamp(0), std::vector<int>{1, 2, 3})));
+
+  MP_ASSERT_OK(graph_.CloseAllPacketSources());
+  MP_ASSERT_OK(graph_.WaitUntilDone());
+}
+
+TEST_F(BeginEndLoopCalculatorGraphProcessingEmptyPacketsTest, MultipleVectors) {
+  SendPacketOfIntsOrBound(Timestamp(0), {});
+  // Waiting until idle to guarantee all timestamp bound updates are processed
+  // individually. (Timestamp bounds updates occur in the provide config only
+  // if input is an empty vector.)
+  MP_ASSERT_OK(graph_.WaitUntilIdle());
+
+  SendPacketOfIntsOrBound(Timestamp(1), {0, 1});
+  SendPacketOfIntsOrBound(Timestamp(2), {});
+  // Waiting until idle to guarantee all timestamp bound updates are processed
+  // individually. (Timestamp bounds updates occur in the provide config only
+  // if input is an empty vector.)
+  MP_ASSERT_OK(graph_.WaitUntilIdle());
+
+  SendPacketOfIntsOrBound(Timestamp(3), {2, 3});
+  SendPacketOfIntsOrBound(Timestamp(4), {});
+  // Waiting until idle to guarantee all timestamp bound updates are processed
+  // individually. (Timestamp bounds updates occur in the provide config only
+  // if input is an empty vector.)
+  MP_ASSERT_OK(graph_.WaitUntilIdle());
+
+  MP_ASSERT_OK(graph_.CloseAllPacketSources());
+  MP_ASSERT_OK(graph_.WaitUntilDone());
+
+  EXPECT_THAT(
+      output_packets_,
+      testing::ElementsAre(PacketOfIntsEq(Timestamp(0), std::vector<int>{}),
+                           PacketOfIntsEq(Timestamp(1), std::vector<int>{1, 2}),
+                           PacketOfIntsEq(Timestamp(2), std::vector<int>{}),
+                           PacketOfIntsEq(Timestamp(3), std::vector<int>{3, 4}),
+                           PacketOfIntsEq(Timestamp(4), std::vector<int>{})));
 }
 
 class MultiplierCalculator : public CalculatorBase {
@@ -199,8 +336,8 @@ REGISTER_CALCULATOR(MultiplierCalculator);
 
 class BeginEndLoopCalculatorGraphWithClonedInputsTest : public ::testing::Test {
  protected:
-  BeginEndLoopCalculatorGraphWithClonedInputsTest() {
-    graph_config_ = ParseTextProtoOrDie<CalculatorGraphConfig>(
+  void SetUp() override {
+    auto graph_config = ParseTextProtoOrDie<CalculatorGraphConfig>(
         R"(
           num_threads: 4
           input_stream: "ints"
@@ -226,109 +363,85 @@ class BeginEndLoopCalculatorGraphWithClonedInputsTest : public ::testing::Test {
             output_stream: "ITERABLE:multiplied_ints"
           }
         )");
-    tool::AddVectorSink("multiplied_ints", &graph_config_, &output_packets_);
+    tool::AddVectorSink("multiplied_ints", &graph_config, &output_packets_);
+    MP_ASSERT_OK(graph_.Initialize(graph_config));
+    MP_ASSERT_OK(graph_.StartRun({}));
   }
 
-  CalculatorGraphConfig graph_config_;
+  void SendPackets(Timestamp timestamp, int multiplier, std::vector<int> ints) {
+    MP_ASSERT_OK(graph_.AddPacketToInputStream(
+        "ints", MakePacket<std::vector<int>>(std::move(ints)).At(timestamp)));
+    MP_ASSERT_OK(graph_.AddPacketToInputStream(
+        "multiplier", MakePacket<int>(multiplier).At(timestamp)));
+  }
+
+  void SendMultiplier(Timestamp timestamp, int multiplier) {
+    MP_ASSERT_OK(graph_.AddPacketToInputStream(
+        "multiplier", MakePacket<int>(multiplier).At(timestamp)));
+  }
+
+  CalculatorGraph graph_;
   std::vector<Packet> output_packets_;
 };
 
-TEST_F(BeginEndLoopCalculatorGraphWithClonedInputsTest, SingleEmptyVector) {
-  CalculatorGraph graph;
-  MP_EXPECT_OK(graph.Initialize(graph_config_));
-  MP_EXPECT_OK(graph.StartRun({}));
-  auto input_vector = absl::make_unique<std::vector<int>>();
+TEST_F(BeginEndLoopCalculatorGraphWithClonedInputsTest,
+       InputStreamForIterableIsEmpty) {
   Timestamp input_timestamp = Timestamp(42);
-  MP_ASSERT_OK(graph.AddPacketToInputStream(
-      "ints", Adopt(input_vector.release()).At(input_timestamp)));
-  auto multiplier = absl::make_unique<int>(2);
-  MP_ASSERT_OK(graph.AddPacketToInputStream(
-      "multiplier", Adopt(multiplier.release()).At(input_timestamp)));
-  MP_ASSERT_OK(graph.WaitUntilIdle());
+  SendMultiplier(input_timestamp, /*multiplier=*/2);
+  MP_ASSERT_OK(graph_.WaitUntilIdle());
+
+  // EndLoopCalc will forward the timestamp bound because there are no packets
+  // to process.
+  ASSERT_EQ(0, output_packets_.size());
+
+  MP_ASSERT_OK(graph_.CloseAllPacketSources());
+  MP_ASSERT_OK(graph_.WaitUntilDone());
+}
+
+TEST_F(BeginEndLoopCalculatorGraphWithClonedInputsTest, SingleEmptyVector) {
+  SendPackets(Timestamp(0), /*multiplier=*/2, /*ints=*/{});
+  MP_ASSERT_OK(graph_.WaitUntilIdle());
 
   // EndLoopCalc will forward the timestamp bound because there are no elements
   // in collection to output.
-  ASSERT_EQ(0, output_packets_.size());
+  EXPECT_TRUE(output_packets_.empty());
 
-  MP_ASSERT_OK(graph.CloseAllPacketSources());
-  MP_ASSERT_OK(graph.WaitUntilDone());
+  MP_ASSERT_OK(graph_.CloseAllPacketSources());
+  MP_ASSERT_OK(graph_.WaitUntilDone());
 }
 
 TEST_F(BeginEndLoopCalculatorGraphWithClonedInputsTest, SingleNonEmptyVector) {
-  CalculatorGraph graph;
-  MP_EXPECT_OK(graph.Initialize(graph_config_));
-  MP_EXPECT_OK(graph.StartRun({}));
-  auto input_vector = absl::make_unique<std::vector<int>>();
-  input_vector->emplace_back(0);
-  input_vector->emplace_back(1);
-  input_vector->emplace_back(2);
   Timestamp input_timestamp = Timestamp(42);
-  MP_ASSERT_OK(graph.AddPacketToInputStream(
-      "ints", Adopt(input_vector.release()).At(input_timestamp)));
-  auto multiplier = absl::make_unique<int>(2);
-  MP_ASSERT_OK(graph.AddPacketToInputStream(
-      "multiplier", Adopt(multiplier.release()).At(input_timestamp)));
-  MP_ASSERT_OK(graph.WaitUntilIdle());
+  SendPackets(input_timestamp, /*multiplier=*/2, /*ints=*/{0, 1, 2});
+  MP_ASSERT_OK(graph_.WaitUntilIdle());
 
-  ASSERT_EQ(1, output_packets_.size());
-  EXPECT_EQ(input_timestamp, output_packets_[0].Timestamp());
-  std::vector<int> expected_output_vector = {0, 2, 4};
-  EXPECT_EQ(expected_output_vector, output_packets_[0].Get<std::vector<int>>());
+  EXPECT_THAT(output_packets_,
+              testing::ElementsAre(
+                  PacketOfIntsEq(input_timestamp, std::vector<int>{0, 2, 4})));
 
-  MP_ASSERT_OK(graph.CloseAllPacketSources());
-  MP_ASSERT_OK(graph.WaitUntilDone());
+  MP_ASSERT_OK(graph_.CloseAllPacketSources());
+  MP_ASSERT_OK(graph_.WaitUntilDone());
 }
 
 TEST_F(BeginEndLoopCalculatorGraphWithClonedInputsTest, MultipleVectors) {
-  CalculatorGraph graph;
-  MP_EXPECT_OK(graph.Initialize(graph_config_));
-  MP_EXPECT_OK(graph.StartRun({}));
-
-  auto input_vector0 = absl::make_unique<std::vector<int>>();
-  input_vector0->emplace_back(0);
-  input_vector0->emplace_back(1);
   Timestamp input_timestamp0 = Timestamp(42);
-  MP_ASSERT_OK(graph.AddPacketToInputStream(
-      "ints", Adopt(input_vector0.release()).At(input_timestamp0)));
-  auto multiplier0 = absl::make_unique<int>(2);
-  MP_ASSERT_OK(graph.AddPacketToInputStream(
-      "multiplier", Adopt(multiplier0.release()).At(input_timestamp0)));
+  SendPackets(input_timestamp0, /*multiplier=*/2, /*ints=*/{0, 1});
 
-  auto input_vector1 = absl::make_unique<std::vector<int>>();
   Timestamp input_timestamp1 = Timestamp(43);
-  MP_ASSERT_OK(graph.AddPacketToInputStream(
-      "ints", Adopt(input_vector1.release()).At(input_timestamp1)));
-  auto multiplier1 = absl::make_unique<int>(2);
-  MP_ASSERT_OK(graph.AddPacketToInputStream(
-      "multiplier", Adopt(multiplier1.release()).At(input_timestamp1)));
+  SendPackets(input_timestamp1, /*multiplier=*/2, /*ints=*/{});
 
-  auto input_vector2 = absl::make_unique<std::vector<int>>();
-  input_vector2->emplace_back(2);
-  input_vector2->emplace_back(3);
   Timestamp input_timestamp2 = Timestamp(44);
-  MP_ASSERT_OK(graph.AddPacketToInputStream(
-      "ints", Adopt(input_vector2.release()).At(input_timestamp2)));
-  auto multiplier2 = absl::make_unique<int>(3);
-  MP_ASSERT_OK(graph.AddPacketToInputStream(
-      "multiplier", Adopt(multiplier2.release()).At(input_timestamp2)));
+  SendPackets(input_timestamp2, /*multiplier=*/3, /*ints=*/{2, 3});
 
-  MP_ASSERT_OK(graph.CloseAllPacketSources());
-  MP_ASSERT_OK(graph.WaitUntilDone());
-
-  ASSERT_EQ(2, output_packets_.size());
-
-  EXPECT_EQ(input_timestamp0, output_packets_[0].Timestamp());
-  std::vector<int> expected_output_vector0 = {0, 2};
-  EXPECT_EQ(expected_output_vector0,
-            output_packets_[0].Get<std::vector<int>>());
+  MP_ASSERT_OK(graph_.CloseAllPacketSources());
+  MP_ASSERT_OK(graph_.WaitUntilDone());
 
   // At input_timestamp1, EndLoopCalc will forward timestamp bound as there are
   // no elements in vector to process.
-
-  EXPECT_EQ(input_timestamp2, output_packets_[1].Timestamp());
-  std::vector<int> expected_output_vector2 = {6, 9};
-  EXPECT_EQ(expected_output_vector2,
-            output_packets_[1].Get<std::vector<int>>());
+  EXPECT_THAT(output_packets_,
+              testing::ElementsAre(
+                  PacketOfIntsEq(input_timestamp0, std::vector<int>{0, 2}),
+                  PacketOfIntsEq(input_timestamp2, std::vector<int>{6, 9})));
 }
 
 }  // namespace
diff --git a/mediapipe/calculators/core/begin_loop_calculator.cc b/mediapipe/calculators/core/begin_loop_calculator.cc
index 6c1ac20bf..bd4e554e1 100644
--- a/mediapipe/calculators/core/begin_loop_calculator.cc
+++ b/mediapipe/calculators/core/begin_loop_calculator.cc
@@ -16,6 +16,7 @@
 
 #include <vector>
 
+#include "mediapipe/framework/formats/detection.pb.h"
 #include "mediapipe/framework/formats/landmark.pb.h"
 #include "mediapipe/framework/formats/rect.pb.h"
 
@@ -31,4 +32,9 @@ typedef BeginLoopCalculator<std::vector<::mediapipe::NormalizedRect>>
     BeginLoopNormalizedRectCalculator;
 REGISTER_CALCULATOR(BeginLoopNormalizedRectCalculator);
 
+// A calculator to process std::vector<Detection>.
+typedef BeginLoopCalculator<std::vector<::mediapipe::Detection>>
+    BeginLoopDetectionCalculator;
+REGISTER_CALCULATOR(BeginLoopDetectionCalculator);
+
 }  // namespace mediapipe
diff --git a/mediapipe/calculators/core/begin_loop_calculator.h b/mediapipe/calculators/core/begin_loop_calculator.h
index 7258b4bf7..ec59e1012 100644
--- a/mediapipe/calculators/core/begin_loop_calculator.h
+++ b/mediapipe/calculators/core/begin_loop_calculator.h
@@ -52,20 +52,28 @@ namespace mediapipe {
 //   output_stream: "OUTPUT:aggregated_result"     # IterableU @ext_ts
 // }
 //
-// BeginLoopCalculator accepts an optional input stream tagged with "TICK"
-// which if non-empty, wakes up the calculator and calls
-// BeginLoopCalculator::Process(). Input streams tagged with "CLONE" are cloned
-// to the corresponding output streams at loop timestamps. This ensures that a
-// MediaPipe graph or sub-graph can run multiple times, once per element in the
-// "ITERABLE" for each pakcet clone of the packets in the "CLONE" input streams.
+// Input streams tagged with "CLONE" are cloned to the corresponding output
+// streams at loop timestamps. This ensures that a MediaPipe graph or sub-graph
+// can run multiple times, once per element in the "ITERABLE" for each pakcet
+// clone of the packets in the "CLONE" input streams.
 template <typename IterableT>
 class BeginLoopCalculator : public CalculatorBase {
   using ItemT = typename IterableT::value_type;
 
  public:
   static ::mediapipe::Status GetContract(CalculatorContract* cc) {
+    // The below enables processing of timestamp bound updates, and that enables
+    // correct timestamp propagation by the companion EndLoopCalculator.
+    //
+    // For instance, Process() function will be still invoked even if upstream
+    // calculator has updated timestamp bound for ITERABLE input instead of
+    // providing actual value.
+    cc->SetProcessTimestampBounds(true);
+
     // A non-empty packet in the optional "TICK" input stream wakes up the
     // calculator.
+    // DEPRECATED as timestamp bound updates are processed by default in this
+    // calculator.
     if (cc->Inputs().HasTag("TICK")) {
       cc->Inputs().Tag("TICK").SetAny();
     }
diff --git a/mediapipe/calculators/core/clip_vector_size_calculator.cc b/mediapipe/calculators/core/clip_vector_size_calculator.cc
index 388cc3a6a..89ac0b9ef 100644
--- a/mediapipe/calculators/core/clip_vector_size_calculator.cc
+++ b/mediapipe/calculators/core/clip_vector_size_calculator.cc
@@ -17,6 +17,7 @@
 #include <vector>
 
 #include "mediapipe/framework/calculator_framework.h"
+#include "mediapipe/framework/formats/detection.pb.h"
 #include "mediapipe/framework/formats/rect.pb.h"
 
 namespace mediapipe {
@@ -25,4 +26,8 @@ typedef ClipVectorSizeCalculator<::mediapipe::NormalizedRect>
     ClipNormalizedRectVectorSizeCalculator;
 REGISTER_CALCULATOR(ClipNormalizedRectVectorSizeCalculator);
 
+typedef ClipVectorSizeCalculator<::mediapipe::Detection>
+    ClipDetectionVectorSizeCalculator;
+REGISTER_CALCULATOR(ClipDetectionVectorSizeCalculator);
+
 }  // namespace mediapipe
diff --git a/mediapipe/calculators/core/end_loop_calculator.cc b/mediapipe/calculators/core/end_loop_calculator.cc
index e27ab11ea..61e8c6ac0 100644
--- a/mediapipe/calculators/core/end_loop_calculator.cc
+++ b/mediapipe/calculators/core/end_loop_calculator.cc
@@ -16,6 +16,7 @@
 
 #include <vector>
 
+#include "mediapipe/framework/formats/classification.pb.h"
 #include "mediapipe/framework/formats/landmark.pb.h"
 #include "mediapipe/framework/formats/rect.pb.h"
 #include "mediapipe/util/render_data.pb.h"
@@ -37,4 +38,8 @@ typedef EndLoopCalculator<std::vector<::mediapipe::RenderData>>
     EndLoopRenderDataCalculator;
 REGISTER_CALCULATOR(EndLoopRenderDataCalculator);
 
+typedef EndLoopCalculator<std::vector<::mediapipe::ClassificationList>>
+    EndLoopClassificationListCalculator;
+REGISTER_CALCULATOR(EndLoopClassificationListCalculator);
+
 }  // namespace mediapipe
diff --git a/mediapipe/calculators/core/previous_loopback_calculator.cc b/mediapipe/calculators/core/previous_loopback_calculator.cc
index f9abb35a2..feefd6a56 100644
--- a/mediapipe/calculators/core/previous_loopback_calculator.cc
+++ b/mediapipe/calculators/core/previous_loopback_calculator.cc
@@ -25,13 +25,17 @@ namespace mediapipe {
 // together with some previous output.
 //
 // For the first packet that arrives on the MAIN input, the timestamp bound is
-// advanced on the output. Downstream calculators will see this as an empty
+// advanced on the PREV_LOOP. Downstream calculators will see this as an empty
 // packet. This way they are not kept waiting for the previous output, which
 // for the first iteration does not exist.
 //
-// Thereafter, each packet received on MAIN is matched with a packet received
-// on LOOP; the LOOP packet's timestamp is changed to that of the MAIN packet,
-// and it is output on PREV_LOOP.
+// Thereafter,
+// - Each non-empty MAIN packet results in:
+//   a) a PREV_LOOP packet with contents of the LOOP packet received at the
+//      timestamp of the previous non-empty MAIN packet
+//   b) or in a PREV_LOOP timestamp bound update if the LOOP packet was empty.
+// - Each empty MAIN packet indicating timestamp bound update results in a
+//   PREV_LOOP timestamp bound update.
 //
 // Example config:
 // node {
@@ -56,83 +60,115 @@ class PreviousLoopbackCalculator : public CalculatorBase {
     // TODO: an optional PREV_TIMESTAMP output could be added to
     // carry the original timestamp of the packet on PREV_LOOP.
     cc->SetInputStreamHandler("ImmediateInputStreamHandler");
+    // Process() function is invoked in response to MAIN/LOOP stream timestamp
+    // bound updates.
+    cc->SetProcessTimestampBounds(true);
     return ::mediapipe::OkStatus();
   }
 
   ::mediapipe::Status Open(CalculatorContext* cc) final {
     main_id_ = cc->Inputs().GetId("MAIN", 0);
     loop_id_ = cc->Inputs().GetId("LOOP", 0);
-    loop_out_id_ = cc->Outputs().GetId("PREV_LOOP", 0);
+    prev_loop_id_ = cc->Outputs().GetId("PREV_LOOP", 0);
     cc->Outputs()
-        .Get(loop_out_id_)
+        .Get(prev_loop_id_)
         .SetHeader(cc->Inputs().Get(loop_id_).Header());
-
-    // Use an empty packet for the first round, since there is no previous
-    // output.
-    loopback_packets_.push_back({});
-
     return ::mediapipe::OkStatus();
   }
 
   ::mediapipe::Status Process(CalculatorContext* cc) final {
-    Packet& main_packet = cc->Inputs().Get(main_id_).Value();
-    if (!main_packet.IsEmpty()) {
-      main_ts_.push_back(main_packet.Timestamp());
-    }
-    Packet& loopback_packet = cc->Inputs().Get(loop_id_).Value();
-    if (!loopback_packet.IsEmpty()) {
-      loopback_packets_.push_back(loopback_packet);
-      while (!main_ts_.empty() &&
-             main_ts_.front() <= loopback_packets_.front().Timestamp()) {
-        main_ts_.pop_front();
-      }
-    }
-    auto& loop_out = cc->Outputs().Get(loop_out_id_);
+    // Non-empty packets and empty packets indicating timestamp bound updates
+    // are guaranteed to have timestamps greater than timestamps of previous
+    // packets within the same stream. Calculator tracks and operates on such
+    // packets.
 
-    while (!main_ts_.empty() && !loopback_packets_.empty()) {
-      Timestamp main_timestamp = main_ts_.front();
-      main_ts_.pop_front();
-      Packet previous_loopback = loopback_packets_.front().At(main_timestamp);
-      loopback_packets_.pop_front();
-
-      if (previous_loopback.IsEmpty()) {
-        // TODO: SetCompleteTimestampBound would be more useful.
-        loop_out.SetNextTimestampBound(main_timestamp + 1);
+    const Packet& main_packet = cc->Inputs().Get(main_id_).Value();
+    if (prev_main_ts_ < main_packet.Timestamp()) {
+      Timestamp loop_timestamp;
+      if (!main_packet.IsEmpty()) {
+        loop_timestamp = prev_non_empty_main_ts_;
+        prev_non_empty_main_ts_ = main_packet.Timestamp();
       } else {
-        loop_out.AddPacket(std::move(previous_loopback));
+        // Calculator advances PREV_LOOP timestamp bound in response to empty
+        // MAIN packet, hence not caring about corresponding loop packet.
+        loop_timestamp = Timestamp::Unset();
+      }
+      main_packet_specs_.push_back({.timestamp = main_packet.Timestamp(),
+                                    .loop_timestamp = loop_timestamp});
+      prev_main_ts_ = main_packet.Timestamp();
+    }
+
+    const Packet& loop_packet = cc->Inputs().Get(loop_id_).Value();
+    if (prev_loop_ts_ < loop_packet.Timestamp()) {
+      loop_packets_.push_back(loop_packet);
+      prev_loop_ts_ = loop_packet.Timestamp();
+    }
+
+    auto& prev_loop = cc->Outputs().Get(prev_loop_id_);
+    while (!main_packet_specs_.empty() && !loop_packets_.empty()) {
+      // The earliest MAIN packet.
+      const MainPacketSpec& main_spec = main_packet_specs_.front();
+      // The earliest LOOP packet.
+      const Packet& loop_candidate = loop_packets_.front();
+      // Match LOOP and MAIN packets.
+      if (main_spec.loop_timestamp < loop_candidate.Timestamp()) {
+        // No LOOP packet can match the MAIN packet under review.
+        prev_loop.SetNextTimestampBound(main_spec.timestamp + 1);
+        main_packet_specs_.pop_front();
+      } else if (main_spec.loop_timestamp > loop_candidate.Timestamp()) {
+        // No MAIN packet can match the LOOP packet under review.
+        loop_packets_.pop_front();
+      } else {
+        // Exact match found.
+        if (loop_candidate.IsEmpty()) {
+          // However, LOOP packet is empty.
+          prev_loop.SetNextTimestampBound(main_spec.timestamp + 1);
+        } else {
+          prev_loop.AddPacket(loop_candidate.At(main_spec.timestamp));
+        }
+        loop_packets_.pop_front();
+        main_packet_specs_.pop_front();
       }
     }
 
-    // In case of an empty loopback input, the next timestamp bound for
-    // loopback input is the loopback timestamp + 1.  The next timestamp bound
-    // for output is set and the main_ts_ vector is truncated accordingly.
-    if (loopback_packet.IsEmpty() &&
-        loopback_packet.Timestamp() != Timestamp::Unstarted()) {
-      Timestamp loopback_bound =
-          loopback_packet.Timestamp().NextAllowedInStream();
-      while (!main_ts_.empty() && main_ts_.front() <= loopback_bound) {
-        main_ts_.pop_front();
-      }
-      if (main_ts_.empty()) {
-        loop_out.SetNextTimestampBound(loopback_bound.NextAllowedInStream());
-      }
-    }
-    if (!main_ts_.empty()) {
-      loop_out.SetNextTimestampBound(main_ts_.front());
-    }
-    if (cc->Inputs().Get(main_id_).IsDone() && main_ts_.empty()) {
-      loop_out.Close();
+    if (main_packet_specs_.empty() && cc->Inputs().Get(main_id_).IsDone()) {
+      prev_loop.Close();
     }
     return ::mediapipe::OkStatus();
   }
 
  private:
+  struct MainPacketSpec {
+    Timestamp timestamp;
+    // Expected timestamp of the packet from LOOP stream that corresponds to the
+    // packet from MAIN stream descirbed by this spec.
+    Timestamp loop_timestamp;
+  };
+
   CollectionItemId main_id_;
   CollectionItemId loop_id_;
-  CollectionItemId loop_out_id_;
+  CollectionItemId prev_loop_id_;
 
-  std::deque<Timestamp> main_ts_;
-  std::deque<Packet> loopback_packets_;
+  // Contains specs for MAIN packets which only can be:
+  // - non-empty packets
+  // - empty packets indicating timestamp bound updates
+  //
+  // Sorted according to packet timestamps.
+  std::deque<MainPacketSpec> main_packet_specs_;
+  Timestamp prev_main_ts_ = Timestamp::Unstarted();
+  Timestamp prev_non_empty_main_ts_ = Timestamp::Unstarted();
+
+  // Contains LOOP packets which only can be:
+  // - the very first empty packet
+  // - non empty packets
+  // - empty packets indicating timestamp bound updates
+  //
+  // Sorted according to packet timestamps.
+  std::deque<Packet> loop_packets_;
+  // Using "Timestamp::Unset" instead of "Timestamp::Unstarted" in order to
+  // allow addition of the very first empty packet (which doesn't indicate
+  // timestamp bound change necessarily).
+  Timestamp prev_loop_ts_ = Timestamp::Unset();
 };
 REGISTER_CALCULATOR(PreviousLoopbackCalculator);
 
diff --git a/mediapipe/calculators/core/previous_loopback_calculator_test.cc b/mediapipe/calculators/core/previous_loopback_calculator_test.cc
index 5ef98257f..0fabacd57 100644
--- a/mediapipe/calculators/core/previous_loopback_calculator_test.cc
+++ b/mediapipe/calculators/core/previous_loopback_calculator_test.cc
@@ -13,6 +13,7 @@
 // limitations under the License.
 
 #include <algorithm>
+#include <functional>
 #include <memory>
 #include <string>
 #include <vector>
@@ -25,12 +26,17 @@
 #include "mediapipe/framework/port/gtest.h"
 #include "mediapipe/framework/port/integral_types.h"
 #include "mediapipe/framework/port/parse_text_proto.h"
+#include "mediapipe/framework/port/status.h"
 #include "mediapipe/framework/port/status_matchers.h"
 #include "mediapipe/framework/timestamp.h"
 #include "mediapipe/framework/tool/sink.h"
 
 namespace mediapipe {
 
+using ::testing::ElementsAre;
+using ::testing::Eq;
+using ::testing::Pair;
+using ::testing::Value;
 namespace {
 
 // Returns the timestamp values for a vector of Packets.
@@ -43,6 +49,23 @@ std::vector<int64> TimestampValues(const std::vector<Packet>& packets) {
   return result;
 }
 
+MATCHER(EmptyPacket, negation ? "isn't empty" : "is empty") {
+  if (arg.IsEmpty()) {
+    return true;
+  }
+  return false;
+}
+
+MATCHER_P(IntPacket, value, "") {
+  return Value(arg.template Get<int>(), Eq(value));
+}
+
+MATCHER_P2(PairPacket, timestamp, pair, "") {
+  Timestamp actual_timestamp = arg.Timestamp();
+  const auto& actual_pair = arg.template Get<std::pair<Packet, Packet>>();
+  return Value(actual_timestamp, Eq(timestamp)) && Value(actual_pair, pair);
+}
+
 TEST(PreviousLoopbackCalculator, CorrectTimestamps) {
   std::vector<Packet> in_prev;
   CalculatorGraphConfig graph_config_ =
@@ -81,32 +104,30 @@ TEST(PreviousLoopbackCalculator, CorrectTimestamps) {
     MP_EXPECT_OK(graph_.AddPacketToInputStream(
         input_name, MakePacket<int>(n).At(Timestamp(n))));
   };
-  auto pair_values = [](const Packet& packet) {
-    auto pair = packet.Get<std::pair<Packet, Packet>>();
-    int first = pair.first.IsEmpty() ? -1 : pair.first.Get<int>();
-    int second = pair.second.IsEmpty() ? -1 : pair.second.Get<int>();
-    return std::make_pair(first, second);
-  };
 
   send_packet("in", 1);
   MP_EXPECT_OK(graph_.WaitUntilIdle());
-  EXPECT_EQ(TimestampValues(in_prev), (std::vector<int64>{1}));
-  EXPECT_EQ(pair_values(in_prev.back()), std::make_pair(1, -1));
+  EXPECT_THAT(TimestampValues(in_prev), ElementsAre(1));
+  EXPECT_THAT(in_prev.back(),
+              PairPacket(Timestamp(1), Pair(IntPacket(1), EmptyPacket())));
 
   send_packet("in", 2);
   MP_EXPECT_OK(graph_.WaitUntilIdle());
-  EXPECT_EQ(TimestampValues(in_prev), (std::vector<int64>{1, 2}));
-  EXPECT_EQ(pair_values(in_prev.back()), std::make_pair(2, 1));
+  EXPECT_THAT(TimestampValues(in_prev), ElementsAre(1, 2));
+  EXPECT_THAT(in_prev.back(),
+              PairPacket(Timestamp(2), Pair(IntPacket(2), IntPacket(1))));
 
   send_packet("in", 5);
   MP_EXPECT_OK(graph_.WaitUntilIdle());
-  EXPECT_EQ(TimestampValues(in_prev), (std::vector<int64>{1, 2, 5}));
-  EXPECT_EQ(pair_values(in_prev.back()), std::make_pair(5, 2));
+  EXPECT_THAT(TimestampValues(in_prev), ElementsAre(1, 2, 5));
+  EXPECT_THAT(in_prev.back(),
+              PairPacket(Timestamp(5), Pair(IntPacket(5), IntPacket(2))));
 
   send_packet("in", 15);
   MP_EXPECT_OK(graph_.WaitUntilIdle());
-  EXPECT_EQ(TimestampValues(in_prev), (std::vector<int64>{1, 2, 5, 15}));
-  EXPECT_EQ(pair_values(in_prev.back()), std::make_pair(15, 5));
+  EXPECT_THAT(TimestampValues(in_prev), ElementsAre(1, 2, 5, 15));
+  EXPECT_THAT(in_prev.back(),
+              PairPacket(Timestamp(15), Pair(IntPacket(15), IntPacket(5))));
 
   MP_EXPECT_OK(graph_.CloseAllInputStreams());
   MP_EXPECT_OK(graph_.WaitUntilDone());
@@ -185,24 +206,24 @@ TEST(PreviousLoopbackCalculator, ClosesCorrectly) {
 
   send_packet("in", 1);
   MP_EXPECT_OK(graph_.WaitUntilIdle());
-  EXPECT_EQ(TimestampValues(outputs), (std::vector<int64>{1}));
+  EXPECT_THAT(TimestampValues(outputs), ElementsAre(1));
 
   send_packet("in", 2);
   MP_EXPECT_OK(graph_.WaitUntilIdle());
-  EXPECT_EQ(TimestampValues(outputs), (std::vector<int64>{1, 2}));
+  EXPECT_THAT(TimestampValues(outputs), ElementsAre(1, 2));
 
   send_packet("in", 5);
   MP_EXPECT_OK(graph_.WaitUntilIdle());
-  EXPECT_EQ(TimestampValues(outputs), (std::vector<int64>{1, 2, 5}));
+  EXPECT_THAT(TimestampValues(outputs), ElementsAre(1, 2, 5));
 
   send_packet("in", 15);
   MP_EXPECT_OK(graph_.WaitUntilIdle());
-  EXPECT_EQ(TimestampValues(outputs), (std::vector<int64>{1, 2, 5, 15}));
+  EXPECT_THAT(TimestampValues(outputs), ElementsAre(1, 2, 5, 15));
 
   MP_EXPECT_OK(graph_.CloseAllInputStreams());
   MP_EXPECT_OK(graph_.WaitUntilIdle());
-  EXPECT_EQ(TimestampValues(outputs),
-            (std::vector<int64>{1, 2, 5, 15, Timestamp::Max().Value()}));
+  EXPECT_THAT(TimestampValues(outputs),
+              ElementsAre(1, 2, 5, 15, Timestamp::Max().Value()));
 
   MP_EXPECT_OK(graph_.WaitUntilDone());
 }
@@ -247,16 +268,12 @@ TEST(PreviousLoopbackCalculator, EmptyLoopForever) {
         input_name, MakePacket<int>(n).At(Timestamp(n))));
   };
 
-  send_packet("in", 0);
-  MP_EXPECT_OK(graph_.WaitUntilIdle());
-  EXPECT_EQ(TimestampValues(outputs), (std::vector<int64>{0}));
-
-  for (int main_ts = 1; main_ts < 50; ++main_ts) {
+  for (int main_ts = 0; main_ts < 50; ++main_ts) {
     send_packet("in", main_ts);
     MP_EXPECT_OK(graph_.WaitUntilIdle());
     std::vector<int64> ts_values = TimestampValues(outputs);
     EXPECT_EQ(ts_values.size(), main_ts + 1);
-    for (int j = 0; j < main_ts; ++j) {
+    for (int j = 0; j < main_ts + 1; ++j) {
       EXPECT_EQ(ts_values[j], j);
     }
   }
@@ -266,5 +283,487 @@ TEST(PreviousLoopbackCalculator, EmptyLoopForever) {
   MP_EXPECT_OK(graph_.WaitUntilDone());
 }
 
+class PreviousLoopbackCalculatorProcessingTimestampsTest
+    : public testing::Test {
+ protected:
+  void SetUp() override {
+    CalculatorGraphConfig graph_config =
+        ParseTextProtoOrDie<CalculatorGraphConfig>(R"(
+          input_stream: 'input'
+          input_stream: 'force_main_empty'
+          input_stream: 'force_loop_empty'
+          # Used to indicate "main" timestamp bound updates.
+          node {
+            calculator: 'GateCalculator'
+            input_stream: 'input'
+            input_stream: 'DISALLOW:force_main_empty'
+            output_stream: 'main'
+          }
+          node {
+            calculator: 'PreviousLoopbackCalculator'
+            input_stream: 'MAIN:main'
+            input_stream: 'LOOP:loop'
+            input_stream_info: { tag_index: 'LOOP' back_edge: true }
+            output_stream: 'PREV_LOOP:prev_loop'
+          }
+          node {
+            calculator: 'PassThroughCalculator'
+            input_stream: 'input'
+            input_stream: 'prev_loop'
+            output_stream: 'passed_through_input'
+            output_stream: 'passed_through_prev_loop'
+          }
+          # Used to indicate "loop" timestamp bound updates.
+          node {
+            calculator: 'GateCalculator'
+            input_stream: 'input'
+            input_stream: 'DISALLOW:force_loop_empty'
+            output_stream: 'loop'
+          }
+          node {
+            calculator: 'MakePairCalculator'
+            input_stream: 'passed_through_input'
+            input_stream: 'passed_through_prev_loop'
+            output_stream: 'passed_through_input_and_prev_loop'
+          }
+        )");
+    tool::AddVectorSink("passed_through_input_and_prev_loop", &graph_config,
+                        &output_packets_);
+    MP_ASSERT_OK(graph_.Initialize(graph_config, {}));
+    MP_ASSERT_OK(graph_.StartRun({}));
+  }
+
+  void SendPackets(int timestamp, int input, bool force_main_empty,
+                   bool force_loop_empty) {
+    MP_ASSERT_OK(graph_.AddPacketToInputStream(
+        "input", MakePacket<int>(input).At(Timestamp(timestamp))));
+    MP_ASSERT_OK(graph_.AddPacketToInputStream(
+        "force_main_empty",
+        MakePacket<bool>(force_main_empty).At(Timestamp(timestamp))));
+    MP_ASSERT_OK(graph_.AddPacketToInputStream(
+        "force_loop_empty",
+        MakePacket<bool>(force_loop_empty).At(Timestamp(timestamp))));
+  }
+
+  CalculatorGraph graph_;
+  std::vector<Packet> output_packets_;
+};
+
+TEST_F(PreviousLoopbackCalculatorProcessingTimestampsTest,
+       MultiplePacketsEmptyMainNonEmptyLoop) {
+  SendPackets(/*timestamp=*/1, /*input=*/1, /*force_main_empty=*/true,
+              /*force_loop_empty=*/false);
+  MP_EXPECT_OK(graph_.WaitUntilIdle());
+  EXPECT_THAT(
+      output_packets_,
+      ElementsAre(PairPacket(Timestamp(1), Pair(IntPacket(1), EmptyPacket()))));
+
+  SendPackets(/*timestamp=*/2, /*input=*/2, /*force_main_empty=*/true,
+              /*force_loop_empty=*/false);
+  MP_EXPECT_OK(graph_.WaitUntilIdle());
+  EXPECT_THAT(
+      output_packets_,
+      ElementsAre(PairPacket(Timestamp(1), Pair(IntPacket(1), EmptyPacket())),
+                  PairPacket(Timestamp(2), Pair(IntPacket(2), EmptyPacket()))));
+
+  SendPackets(/*timestamp=*/3, /*input=*/3, /*force_main_empty=*/true,
+              /*force_loop_empty=*/false);
+  MP_EXPECT_OK(graph_.WaitUntilIdle());
+  EXPECT_THAT(
+      output_packets_,
+      ElementsAre(PairPacket(Timestamp(1), Pair(IntPacket(1), EmptyPacket())),
+                  PairPacket(Timestamp(2), Pair(IntPacket(2), EmptyPacket())),
+                  PairPacket(Timestamp(3), Pair(IntPacket(3), EmptyPacket()))));
+
+  SendPackets(/*timestamp=*/5, /*input=*/5, /*force_main_empty=*/true,
+              /*force_loop_empty=*/false);
+  MP_EXPECT_OK(graph_.WaitUntilIdle());
+  EXPECT_THAT(
+      output_packets_,
+      ElementsAre(PairPacket(Timestamp(1), Pair(IntPacket(1), EmptyPacket())),
+                  PairPacket(Timestamp(2), Pair(IntPacket(2), EmptyPacket())),
+                  PairPacket(Timestamp(3), Pair(IntPacket(3), EmptyPacket())),
+                  PairPacket(Timestamp(5), Pair(IntPacket(5), EmptyPacket()))));
+
+  SendPackets(/*timestamp=*/15, /*input=*/15,
+              /*force_main_empty=*/true,
+              /*force_loop_empty=*/false);
+  MP_EXPECT_OK(graph_.WaitUntilIdle());
+  EXPECT_THAT(
+      output_packets_,
+      ElementsAre(
+          PairPacket(Timestamp(1), Pair(IntPacket(1), EmptyPacket())),
+          PairPacket(Timestamp(2), Pair(IntPacket(2), EmptyPacket())),
+          PairPacket(Timestamp(3), Pair(IntPacket(3), EmptyPacket())),
+          PairPacket(Timestamp(5), Pair(IntPacket(5), EmptyPacket())),
+          PairPacket(Timestamp(15), Pair(IntPacket(15), EmptyPacket()))));
+
+  MP_EXPECT_OK(graph_.CloseAllInputStreams());
+  MP_EXPECT_OK(graph_.WaitUntilDone());
+}
+
+TEST_F(PreviousLoopbackCalculatorProcessingTimestampsTest,
+       MultiplePacketsNonEmptyMainEmptyLoop) {
+  SendPackets(/*timestamp=*/1, /*input=*/1,
+              /*force_main_empty=*/false,
+              /*force_loop_empty=*/true);
+  MP_EXPECT_OK(graph_.WaitUntilIdle());
+  EXPECT_THAT(
+      output_packets_,
+      ElementsAre(PairPacket(Timestamp(1), Pair(IntPacket(1), EmptyPacket()))));
+
+  SendPackets(/*timestamp=*/2, /*input=*/2,
+              /*force_main_empty=*/false,
+              /*force_loop_empty=*/true);
+  MP_EXPECT_OK(graph_.WaitUntilIdle());
+  EXPECT_THAT(
+      output_packets_,
+      ElementsAre(PairPacket(Timestamp(1), Pair(IntPacket(1), EmptyPacket())),
+                  PairPacket(Timestamp(2), Pair(IntPacket(2), EmptyPacket()))));
+
+  SendPackets(/*timestamp=*/3, /*input=*/3,
+              /*force_main_empty=*/false,
+              /*force_loop_empty=*/true);
+  MP_EXPECT_OK(graph_.WaitUntilIdle());
+  EXPECT_THAT(
+      output_packets_,
+      ElementsAre(PairPacket(Timestamp(1), Pair(IntPacket(1), EmptyPacket())),
+                  PairPacket(Timestamp(2), Pair(IntPacket(2), EmptyPacket())),
+                  PairPacket(Timestamp(3), Pair(IntPacket(3), EmptyPacket()))));
+
+  SendPackets(/*timestamp=*/5, /*input=*/5,
+              /*force_main_empty=*/false,
+              /*force_loop_empty=*/true);
+  MP_EXPECT_OK(graph_.WaitUntilIdle());
+  EXPECT_THAT(
+      output_packets_,
+      ElementsAre(PairPacket(Timestamp(1), Pair(IntPacket(1), EmptyPacket())),
+                  PairPacket(Timestamp(2), Pair(IntPacket(2), EmptyPacket())),
+                  PairPacket(Timestamp(3), Pair(IntPacket(3), EmptyPacket())),
+                  PairPacket(Timestamp(5), Pair(IntPacket(5), EmptyPacket()))));
+
+  SendPackets(/*timestamp=*/15, /*input=*/15,
+              /*force_main_empty=*/false,
+              /*force_loop_empty=*/true);
+  MP_EXPECT_OK(graph_.WaitUntilIdle());
+  EXPECT_THAT(
+      output_packets_,
+      ElementsAre(
+          PairPacket(Timestamp(1), Pair(IntPacket(1), EmptyPacket())),
+          PairPacket(Timestamp(2), Pair(IntPacket(2), EmptyPacket())),
+          PairPacket(Timestamp(3), Pair(IntPacket(3), EmptyPacket())),
+          PairPacket(Timestamp(5), Pair(IntPacket(5), EmptyPacket())),
+          PairPacket(Timestamp(15), Pair(IntPacket(15), EmptyPacket()))));
+
+  MP_EXPECT_OK(graph_.CloseAllInputStreams());
+  MP_EXPECT_OK(graph_.WaitUntilDone());
+}
+
+TEST_F(PreviousLoopbackCalculatorProcessingTimestampsTest,
+       MultiplePacketsAlteringMainNonEmptyLoop) {
+  SendPackets(/*timestamp=*/1, /*input=*/1,
+              /*force_main_empty=*/false,
+              /*force_loop_empty=*/false);
+  MP_EXPECT_OK(graph_.WaitUntilIdle());
+  EXPECT_THAT(
+      output_packets_,
+      ElementsAre(PairPacket(Timestamp(1), Pair(IntPacket(1), EmptyPacket()))));
+
+  SendPackets(/*timestamp=*/2, /*input=*/2, /*force_main_empty=*/true,
+              /*force_loop_empty=*/false);
+  MP_EXPECT_OK(graph_.WaitUntilIdle());
+  EXPECT_THAT(
+      output_packets_,
+      ElementsAre(PairPacket(Timestamp(1), Pair(IntPacket(1), EmptyPacket())),
+                  PairPacket(Timestamp(2), Pair(IntPacket(2), EmptyPacket()))));
+
+  SendPackets(/*timestamp=*/3, /*input=*/3,
+              /*force_main_empty=*/false,
+              /*force_loop_empty=*/false);
+  MP_EXPECT_OK(graph_.WaitUntilIdle());
+  EXPECT_THAT(
+      output_packets_,
+      ElementsAre(PairPacket(Timestamp(1), Pair(IntPacket(1), EmptyPacket())),
+                  PairPacket(Timestamp(2), Pair(IntPacket(2), EmptyPacket())),
+                  PairPacket(Timestamp(3), Pair(IntPacket(3), IntPacket(1)))));
+
+  SendPackets(/*timestamp=*/5, /*input=*/5,
+              /*force_main_empty=*/false,
+              /*force_loop_empty=*/false);
+  MP_EXPECT_OK(graph_.WaitUntilIdle());
+  EXPECT_THAT(
+      output_packets_,
+      ElementsAre(PairPacket(Timestamp(1), Pair(IntPacket(1), EmptyPacket())),
+                  PairPacket(Timestamp(2), Pair(IntPacket(2), EmptyPacket())),
+                  PairPacket(Timestamp(3), Pair(IntPacket(3), IntPacket(1))),
+                  PairPacket(Timestamp(5), Pair(IntPacket(5), IntPacket(3)))));
+
+  SendPackets(/*timestamp=*/15, /*input=*/15,
+              /*force_main_empty=*/true,
+              /*force_loop_empty=*/false);
+  MP_EXPECT_OK(graph_.WaitUntilIdle());
+  EXPECT_THAT(
+      output_packets_,
+      ElementsAre(
+          PairPacket(Timestamp(1), Pair(IntPacket(1), EmptyPacket())),
+          PairPacket(Timestamp(2), Pair(IntPacket(2), EmptyPacket())),
+          PairPacket(Timestamp(3), Pair(IntPacket(3), IntPacket(1))),
+          PairPacket(Timestamp(5), Pair(IntPacket(5), IntPacket(3))),
+          PairPacket(Timestamp(15), Pair(IntPacket(15), EmptyPacket()))));
+
+  MP_EXPECT_OK(graph_.CloseAllInputStreams());
+  MP_EXPECT_OK(graph_.WaitUntilDone());
+}
+
+TEST_F(PreviousLoopbackCalculatorProcessingTimestampsTest,
+       MultiplePacketsNonEmptyMainAlteringLoop) {
+  SendPackets(/*timestamp=*/1, /*input=*/1,
+              /*force_main_empty=*/false,
+              /*force_loop_empty=*/false);
+  MP_EXPECT_OK(graph_.WaitUntilIdle());
+  EXPECT_THAT(
+      output_packets_,
+      ElementsAre(PairPacket(Timestamp(1), Pair(IntPacket(1), EmptyPacket()))));
+
+  SendPackets(/*timestamp=*/2, /*input=*/2,
+              /*force_main_empty=*/false,
+              /*force_loop_empty=*/true);
+  MP_EXPECT_OK(graph_.WaitUntilIdle());
+  EXPECT_THAT(
+      output_packets_,
+      ElementsAre(PairPacket(Timestamp(1), Pair(IntPacket(1), EmptyPacket())),
+                  PairPacket(Timestamp(2), Pair(IntPacket(2), IntPacket(1)))));
+
+  SendPackets(/*timestamp=*/3, /*input=*/3,
+              /*force_main_empty=*/false,
+              /*force_loop_empty=*/false);
+  MP_EXPECT_OK(graph_.WaitUntilIdle());
+  EXPECT_THAT(
+      output_packets_,
+      ElementsAre(PairPacket(Timestamp(1), Pair(IntPacket(1), EmptyPacket())),
+                  PairPacket(Timestamp(2), Pair(IntPacket(2), IntPacket(1))),
+                  PairPacket(Timestamp(3), Pair(IntPacket(3), EmptyPacket()))));
+
+  SendPackets(/*timestamp=*/5, /*input=*/5,
+              /*force_main_empty=*/false,
+              /*force_loop_empty=*/true);
+  MP_EXPECT_OK(graph_.WaitUntilIdle());
+  EXPECT_THAT(
+      output_packets_,
+      ElementsAre(PairPacket(Timestamp(1), Pair(IntPacket(1), EmptyPacket())),
+                  PairPacket(Timestamp(2), Pair(IntPacket(2), IntPacket(1))),
+                  PairPacket(Timestamp(3), Pair(IntPacket(3), EmptyPacket())),
+                  PairPacket(Timestamp(5), Pair(IntPacket(5), IntPacket(3)))));
+
+  SendPackets(/*timestamp=*/15, /*input=*/15,
+              /*force_main_empty=*/false,
+              /*force_loop_empty=*/false);
+  MP_EXPECT_OK(graph_.WaitUntilIdle());
+  EXPECT_THAT(
+      output_packets_,
+      ElementsAre(
+          PairPacket(Timestamp(1), Pair(IntPacket(1), EmptyPacket())),
+          PairPacket(Timestamp(2), Pair(IntPacket(2), IntPacket(1))),
+          PairPacket(Timestamp(3), Pair(IntPacket(3), EmptyPacket())),
+          PairPacket(Timestamp(5), Pair(IntPacket(5), IntPacket(3))),
+          PairPacket(Timestamp(15), Pair(IntPacket(15), EmptyPacket()))));
+
+  MP_EXPECT_OK(graph_.CloseAllInputStreams());
+  MP_EXPECT_OK(graph_.WaitUntilDone());
+}
+
+TEST_F(PreviousLoopbackCalculatorProcessingTimestampsTest,
+       MultiplePacketsCheckIfLastCorrectAlteringMainAlteringLoop) {
+  int num_packets = 1000;
+  for (int i = 0; i < num_packets; ++i) {
+    bool force_main_empty = i % 3 == 0 ? true : false;
+    bool force_loop_empty = i % 2 == 0 ? true : false;
+    SendPackets(/*timestamp=*/i + 1, /*input=*/i + 1, force_main_empty,
+                force_loop_empty);
+  }
+  SendPackets(/*timestamp=*/num_packets + 1,
+              /*input=*/num_packets + 1, /*force_main_empty=*/false,
+              /*force_loop_empty=*/false);
+  SendPackets(/*timestamp=*/num_packets + 2,
+              /*input=*/num_packets + 2, /*force_main_empty=*/false,
+              /*force_loop_empty=*/false);
+
+  MP_EXPECT_OK(graph_.WaitUntilIdle());
+  ASSERT_FALSE(output_packets_.empty());
+  EXPECT_THAT(
+      output_packets_.back(),
+      PairPacket(Timestamp(num_packets + 2),
+                 Pair(IntPacket(num_packets + 2), IntPacket(num_packets + 1))));
+
+  MP_EXPECT_OK(graph_.CloseAllInputStreams());
+  MP_EXPECT_OK(graph_.WaitUntilDone());
+}
+
+// Similar to GateCalculator, but it doesn't propagate timestamp bound updates.
+class DroppingGateCalculator : public CalculatorBase {
+ public:
+  static ::mediapipe::Status GetContract(CalculatorContract* cc) {
+    cc->Inputs().Index(0).SetAny();
+    cc->Inputs().Tag("DISALLOW").Set<bool>();
+    cc->Outputs().Index(0).SetSameAs(&cc->Inputs().Index(0));
+    return ::mediapipe::OkStatus();
+  }
+
+  ::mediapipe::Status Process(CalculatorContext* cc) final {
+    if (!cc->Inputs().Index(0).IsEmpty() &&
+        !cc->Inputs().Tag("DISALLOW").Get<bool>()) {
+      cc->Outputs().Index(0).AddPacket(cc->Inputs().Index(0).Value());
+    }
+    return ::mediapipe::OkStatus();
+  }
+};
+REGISTER_CALCULATOR(DroppingGateCalculator);
+
+// Tests PreviousLoopbackCalculator in cases when there are no "LOOP" timestamp
+// bound updates and non-empty packets for a while and the aforementioned start
+// to arrive at some point. So, "PREV_LOOP" is delayed for a couple of inputs.
+class PreviousLoopbackCalculatorDelayBehaviorTest : public testing::Test {
+ protected:
+  void SetUp() override {
+    CalculatorGraphConfig graph_config =
+        ParseTextProtoOrDie<CalculatorGraphConfig>(R"(
+          input_stream: 'input'
+          # Drops "loop" when set to "true", delaying output of prev_loop, hence
+          # delaying output of the graph.
+          input_stream: 'delay_next_output'
+          node {
+            calculator: 'PreviousLoopbackCalculator'
+            input_stream: 'MAIN:input'
+            input_stream: 'LOOP:loop'
+            input_stream_info: { tag_index: 'LOOP' back_edge: true }
+            output_stream: 'PREV_LOOP:prev_loop'
+          }
+          node {
+            calculator: 'PassThroughCalculator'
+            input_stream: 'input'
+            input_stream: 'prev_loop'
+            output_stream: 'passed_through_input'
+            output_stream: 'passed_through_prev_loop'
+          }
+          node {
+            calculator: 'DroppingGateCalculator'
+            input_stream: 'input'
+            input_stream: 'DISALLOW:delay_next_output'
+            output_stream: 'loop'
+          }
+          node {
+            calculator: 'MakePairCalculator'
+            input_stream: 'passed_through_input'
+            input_stream: 'passed_through_prev_loop'
+            output_stream: 'passed_through_input_and_prev_loop'
+          }
+        )");
+    tool::AddVectorSink("passed_through_input_and_prev_loop", &graph_config,
+                        &output_packets_);
+    MP_ASSERT_OK(graph_.Initialize(graph_config, {}));
+    MP_ASSERT_OK(graph_.StartRun({}));
+  }
+
+  void SendPackets(int timestamp, int input, bool delay_next_output) {
+    MP_ASSERT_OK(graph_.AddPacketToInputStream(
+        "input", MakePacket<int>(input).At(Timestamp(timestamp))));
+    MP_ASSERT_OK(graph_.AddPacketToInputStream(
+        "delay_next_output",
+        MakePacket<bool>(delay_next_output).At(Timestamp(timestamp))));
+  }
+
+  CalculatorGraph graph_;
+  std::vector<Packet> output_packets_;
+};
+
+TEST_F(PreviousLoopbackCalculatorDelayBehaviorTest, MultipleDelayedOutputs) {
+  SendPackets(/*timestamp=*/1, /*input=*/1, /*delay_next_output=*/true);
+  MP_EXPECT_OK(graph_.WaitUntilIdle());
+  EXPECT_THAT(
+      output_packets_,
+      ElementsAre(PairPacket(Timestamp(1), Pair(IntPacket(1), EmptyPacket()))));
+
+  SendPackets(/*timestamp=*/2, /*input=*/2, /*delay_next_output=*/true);
+  MP_EXPECT_OK(graph_.WaitUntilIdle());
+  EXPECT_THAT(
+      output_packets_,
+      ElementsAre(PairPacket(Timestamp(1), Pair(IntPacket(1), EmptyPacket()))));
+
+  SendPackets(/*timestamp=*/3, /*input=*/3, /*delay_next_output=*/true);
+  MP_EXPECT_OK(graph_.WaitUntilIdle());
+  EXPECT_THAT(
+      output_packets_,
+      ElementsAre(PairPacket(Timestamp(1), Pair(IntPacket(1), EmptyPacket()))));
+
+  SendPackets(/*timestamp=*/5, /*input=*/5, /*delay_next_output=*/false);
+  MP_EXPECT_OK(graph_.WaitUntilIdle());
+  EXPECT_THAT(
+      output_packets_,
+      ElementsAre(PairPacket(Timestamp(1), Pair(IntPacket(1), EmptyPacket())),
+                  PairPacket(Timestamp(2), Pair(IntPacket(2), EmptyPacket())),
+                  PairPacket(Timestamp(3), Pair(IntPacket(3), EmptyPacket())),
+                  PairPacket(Timestamp(5), Pair(IntPacket(5), EmptyPacket()))));
+
+  SendPackets(/*timestamp=*/15, /*input=*/15, /*delay_next_output=*/false);
+  MP_EXPECT_OK(graph_.WaitUntilIdle());
+  EXPECT_THAT(
+      output_packets_,
+      ElementsAre(
+          PairPacket(Timestamp(1), Pair(IntPacket(1), EmptyPacket())),
+          PairPacket(Timestamp(2), Pair(IntPacket(2), EmptyPacket())),
+          PairPacket(Timestamp(3), Pair(IntPacket(3), EmptyPacket())),
+          PairPacket(Timestamp(5), Pair(IntPacket(5), EmptyPacket())),
+          PairPacket(Timestamp(15), Pair(IntPacket(15), IntPacket(5)))));
+
+  MP_EXPECT_OK(graph_.CloseAllInputStreams());
+  MP_EXPECT_OK(graph_.WaitUntilDone());
+}
+
+TEST_F(PreviousLoopbackCalculatorDelayBehaviorTest,
+       NonDelayedOutputFollowedByMultipleDelayedOutputs) {
+  SendPackets(/*timestamp=*/1, /*input=*/1, /*delay_next_output=*/false);
+  MP_EXPECT_OK(graph_.WaitUntilIdle());
+  EXPECT_THAT(
+      output_packets_,
+      ElementsAre(PairPacket(Timestamp(1), Pair(IntPacket(1), EmptyPacket()))));
+
+  SendPackets(/*timestamp=*/2, /*input=*/2, /*delay_next_output=*/true);
+  MP_EXPECT_OK(graph_.WaitUntilIdle());
+  EXPECT_THAT(
+      output_packets_,
+      ElementsAre(PairPacket(Timestamp(1), Pair(IntPacket(1), EmptyPacket())),
+                  PairPacket(Timestamp(2), Pair(IntPacket(2), IntPacket(1)))));
+
+  SendPackets(/*timestamp=*/3, /*input=*/3, /*delay_next_output=*/true);
+  MP_EXPECT_OK(graph_.WaitUntilIdle());
+  EXPECT_THAT(
+      output_packets_,
+      ElementsAre(PairPacket(Timestamp(1), Pair(IntPacket(1), EmptyPacket())),
+                  PairPacket(Timestamp(2), Pair(IntPacket(2), IntPacket(1)))));
+
+  SendPackets(/*timestamp=*/5, /*input=*/5, /*delay_next_output=*/false);
+  MP_EXPECT_OK(graph_.WaitUntilIdle());
+  EXPECT_THAT(
+      output_packets_,
+      ElementsAre(PairPacket(Timestamp(1), Pair(IntPacket(1), EmptyPacket())),
+                  PairPacket(Timestamp(2), Pair(IntPacket(2), IntPacket(1))),
+                  PairPacket(Timestamp(3), Pair(IntPacket(3), EmptyPacket())),
+                  PairPacket(Timestamp(5), Pair(IntPacket(5), EmptyPacket()))));
+
+  SendPackets(/*timestamp=*/15, /*input=*/15, /*delay_next_output=*/false);
+  MP_EXPECT_OK(graph_.WaitUntilIdle());
+  EXPECT_THAT(
+      output_packets_,
+      ElementsAre(
+          PairPacket(Timestamp(1), Pair(IntPacket(1), EmptyPacket())),
+          PairPacket(Timestamp(2), Pair(IntPacket(2), IntPacket(1))),
+          PairPacket(Timestamp(3), Pair(IntPacket(3), EmptyPacket())),
+          PairPacket(Timestamp(5), Pair(IntPacket(5), EmptyPacket())),
+          PairPacket(Timestamp(15), Pair(IntPacket(15), IntPacket(5)))));
+
+  MP_EXPECT_OK(graph_.CloseAllInputStreams());
+  MP_EXPECT_OK(graph_.WaitUntilDone());
+}
+
 }  // anonymous namespace
 }  // namespace mediapipe
diff --git a/mediapipe/calculators/core/split_vector_calculator.cc b/mediapipe/calculators/core/split_vector_calculator.cc
index 79c884e43..d993387df 100644
--- a/mediapipe/calculators/core/split_vector_calculator.cc
+++ b/mediapipe/calculators/core/split_vector_calculator.cc
@@ -16,6 +16,7 @@
 
 #include <vector>
 
+#include "mediapipe/framework/formats/detection.pb.h"
 #include "mediapipe/framework/formats/landmark.pb.h"
 #include "mediapipe/framework/formats/rect.pb.h"
 #include "tensorflow/lite/interpreter.h"
@@ -48,6 +49,10 @@ typedef SplitVectorCalculator<::mediapipe::NormalizedLandmark, false>
     SplitLandmarkVectorCalculator;
 REGISTER_CALCULATOR(SplitLandmarkVectorCalculator);
 
+typedef SplitVectorCalculator<::mediapipe::NormalizedLandmarkList, false>
+    SplitNormalizedLandmarkListVectorCalculator;
+REGISTER_CALCULATOR(SplitNormalizedLandmarkListVectorCalculator);
+
 typedef SplitVectorCalculator<::mediapipe::NormalizedRect, false>
     SplitNormalizedRectVectorCalculator;
 REGISTER_CALCULATOR(SplitNormalizedRectVectorCalculator);
@@ -57,4 +62,9 @@ typedef SplitVectorCalculator<::tflite::gpu::gl::GlBuffer, true>
     MovableSplitGlBufferVectorCalculator;
 REGISTER_CALCULATOR(MovableSplitGlBufferVectorCalculator);
 #endif
+
+typedef SplitVectorCalculator<::mediapipe::Detection, false>
+    SplitDetectionVectorCalculator;
+REGISTER_CALCULATOR(SplitDetectionVectorCalculator);
+
 }  // namespace mediapipe
diff --git a/mediapipe/calculators/image/BUILD b/mediapipe/calculators/image/BUILD
index a6159b554..1ef87d314 100644
--- a/mediapipe/calculators/image/BUILD
+++ b/mediapipe/calculators/image/BUILD
@@ -422,9 +422,12 @@ cc_library(
         ":recolor_calculator_cc_proto",
         "//mediapipe/framework:calculator_framework",
         "//mediapipe/framework/formats:image_frame",
+        "//mediapipe/framework/formats:image_frame_opencv",
         "//mediapipe/framework/port:status",
         "//mediapipe/framework/port:ret_check",
         "//mediapipe/util:color_cc_proto",
+        "//mediapipe/framework/port:opencv_core",
+        "//mediapipe/framework/port:opencv_imgproc",
     ] + select({
         "//mediapipe/gpu:disable_gpu": [],
         "//conditions:default": [
diff --git a/mediapipe/calculators/image/recolor_calculator.cc b/mediapipe/calculators/image/recolor_calculator.cc
index fff26b704..07f347a15 100644
--- a/mediapipe/calculators/image/recolor_calculator.cc
+++ b/mediapipe/calculators/image/recolor_calculator.cc
@@ -17,6 +17,9 @@
 #include "mediapipe/calculators/image/recolor_calculator.pb.h"
 #include "mediapipe/framework/calculator_framework.h"
 #include "mediapipe/framework/formats/image_frame.h"
+#include "mediapipe/framework/formats/image_frame_opencv.h"
+#include "mediapipe/framework/port/opencv_core_inc.h"
+#include "mediapipe/framework/port/opencv_imgproc_inc.h"
 #include "mediapipe/framework/port/ret_check.h"
 #include "mediapipe/framework/port/status.h"
 #include "mediapipe/util/color.pb.h"
@@ -39,8 +42,6 @@ namespace mediapipe {
 // The luminance of the input image is used to adjust the blending weight,
 // to help preserve image textures.
 //
-// TODO implement cpu support.
-//
 // Inputs:
 //   One of the following IMAGE tags:
 //   IMAGE: An ImageFrame input image, RGB or RGBA.
@@ -71,6 +72,8 @@ namespace mediapipe {
 //    }
 //  }
 //
+// Note: Cannot mix-match CPU & GPU inputs/outputs.
+//       CPU-in & CPU-out <or> GPU-in & GPU-out
 class RecolorCalculator : public CalculatorBase {
  public:
   RecolorCalculator() = default;
@@ -138,6 +141,11 @@ REGISTER_CALCULATOR(RecolorCalculator);
     cc->Outputs().Tag("IMAGE").Set<ImageFrame>();
   }
 
+  // Confirm only one of the input streams is present.
+  RET_CHECK(cc->Inputs().HasTag("IMAGE") ^ cc->Inputs().HasTag("IMAGE_GPU"));
+  // Confirm only one of the output streams is present.
+  RET_CHECK(cc->Outputs().HasTag("IMAGE") ^ cc->Outputs().HasTag("IMAGE_GPU"));
+
   if (use_gpu) {
 #if !defined(MEDIAPIPE_DISABLE_GPU)
     MP_RETURN_IF_ERROR(mediapipe::GlCalculatorHelper::UpdateContract(cc));
@@ -193,7 +201,62 @@ REGISTER_CALCULATOR(RecolorCalculator);
 }
 
 ::mediapipe::Status RecolorCalculator::RenderCpu(CalculatorContext* cc) {
-  return ::mediapipe::UnimplementedError("CPU support is not implemented yet.");
+  if (cc->Inputs().Tag("MASK").IsEmpty()) {
+    return ::mediapipe::OkStatus();
+  }
+  // Get inputs and setup output.
+  const auto& input_img = cc->Inputs().Tag("IMAGE").Get<ImageFrame>();
+  const auto& mask_img = cc->Inputs().Tag("MASK").Get<ImageFrame>();
+
+  cv::Mat input_mat = formats::MatView(&input_img);
+  cv::Mat mask_mat = formats::MatView(&mask_img);
+
+  RET_CHECK(input_mat.channels() == 3);  // RGB only.
+
+  if (mask_mat.channels() > 1) {
+    std::vector<cv::Mat> channels;
+    cv::split(mask_mat, channels);
+    if (mask_channel_ == mediapipe::RecolorCalculatorOptions_MaskChannel_ALPHA)
+      mask_mat = channels[3];
+    else
+      mask_mat = channels[0];
+  }
+  cv::Mat mask_full;
+  cv::resize(mask_mat, mask_full, input_mat.size());
+
+  auto output_img = absl::make_unique<ImageFrame>(
+      input_img.Format(), input_mat.cols, input_mat.rows);
+  cv::Mat output_mat = mediapipe::formats::MatView(output_img.get());
+
+  // From GPU shader:
+  /*
+      vec4 weight = texture2D(mask, sample_coordinate);
+      vec4 color1 = texture2D(frame, sample_coordinate);
+      vec4 color2 = vec4(recolor, 1.0);
+
+      float luminance = dot(color1.rgb, vec3(0.299, 0.587, 0.114));
+      float mix_value = weight.MASK_COMPONENT * luminance;
+
+      fragColor = mix(color1, color2, mix_value);
+  */
+  for (int i = 0; i < output_mat.rows; ++i) {
+    for (int j = 0; j < output_mat.cols; ++j) {
+      float weight = mask_full.at<uchar>(i, j) * (1.0 / 255.0);
+      cv::Vec3f color1 = input_mat.at<cv::Vec3b>(i, j);
+      cv::Vec3f color2 = {color_[0], color_[1], color_[2]};
+
+      float luminance =
+          (color1[0] * 0.299 + color1[1] * 0.587 + color1[2] * 0.114) / 255;
+      float mix_value = weight * luminance;
+
+      cv::Vec3b mix_color = color1 * (1.0 - mix_value) + color2 * mix_value;
+      output_mat.at<cv::Vec3b>(i, j) = mix_color;
+    }
+  }
+
+  cc->Outputs().Tag("IMAGE").Add(output_img.release(), cc->InputTimestamp());
+
+  return ::mediapipe::OkStatus();
 }
 
 ::mediapipe::Status RecolorCalculator::RenderGpu(CalculatorContext* cc) {
@@ -303,9 +366,9 @@ void RecolorCalculator::GlRender() {
 
   if (!options.has_color()) RET_CHECK_FAIL() << "Missing color option.";
 
-  color_.push_back(options.color().r() / 255.0);
-  color_.push_back(options.color().g() / 255.0);
-  color_.push_back(options.color().b() / 255.0);
+  color_.push_back(options.color().r());
+  color_.push_back(options.color().g());
+  color_.push_back(options.color().b());
 
   return ::mediapipe::OkStatus();
 }
@@ -378,8 +441,8 @@ void RecolorCalculator::GlRender() {
   glUseProgram(program_);
   glUniform1i(glGetUniformLocation(program_, "frame"), 1);
   glUniform1i(glGetUniformLocation(program_, "mask"), 2);
-  glUniform3f(glGetUniformLocation(program_, "recolor"), color_[0], color_[1],
-              color_[2]);
+  glUniform3f(glGetUniformLocation(program_, "recolor"), color_[0] / 255.0,
+              color_[1] / 255.0, color_[2] / 255.0);
 #endif  //  !MEDIAPIPE_DISABLE_GPU
 
   return ::mediapipe::OkStatus();
diff --git a/mediapipe/calculators/tensorflow/BUILD b/mediapipe/calculators/tensorflow/BUILD
index 93c4f751e..f774fe717 100644
--- a/mediapipe/calculators/tensorflow/BUILD
+++ b/mediapipe/calculators/tensorflow/BUILD
@@ -1110,6 +1110,7 @@ cc_test(
         ],
         "//mediapipe:android": [
             "@org_tensorflow//tensorflow/core:android_tensorflow_lib_with_ops_lite_proto_no_rtti_lib",
+            "@org_tensorflow//tensorflow/core:android_tensorflow_test_lib",
         ],
         "//mediapipe:ios": [
             "@org_tensorflow//tensorflow/core:ios_tensorflow_test_lib",
diff --git a/mediapipe/calculators/tflite/BUILD b/mediapipe/calculators/tflite/BUILD
index 7c711e842..45ef317c8 100644
--- a/mediapipe/calculators/tflite/BUILD
+++ b/mediapipe/calculators/tflite/BUILD
@@ -222,9 +222,11 @@ cc_library(
     deps = [
         ":util",
         ":tflite_inference_calculator_cc_proto",
+        "@com_google_absl//absl/memory",
         "//mediapipe/framework:calculator_framework",
         "//mediapipe/util:resource_util",
         "@org_tensorflow//tensorflow/lite:framework",
+        "@org_tensorflow//tensorflow/lite/delegates/xnnpack:xnnpack_delegate",
         "@org_tensorflow//tensorflow/lite/kernels:builtin_ops",
         "//mediapipe/framework/stream_handler:fixed_size_input_stream_handler",
         "//mediapipe/framework/port:ret_check",
@@ -254,6 +256,10 @@ cc_library(
         "//mediapipe:android": [
             "@org_tensorflow//tensorflow/lite/delegates/nnapi:nnapi_delegate",
         ],
+    }) + select({
+        "//conditions:default": [
+            "//mediapipe/util:cpu_util",
+        ],
     }),
     alwayslink = 1,
 )
@@ -308,6 +314,20 @@ cc_library(
     alwayslink = 1,
 )
 
+cc_library(
+    name = "tflite_model_calculator",
+    srcs = ["tflite_model_calculator.cc"],
+    visibility = ["//visibility:public"],
+    deps = [
+        ":util",
+        "//mediapipe/framework:calculator_framework",
+        "//mediapipe/framework:packet",
+        "//mediapipe/framework/port:ret_check",
+        "@org_tensorflow//tensorflow/lite:framework",
+    ],
+    alwayslink = 1,
+)
+
 cc_library(
     name = "tflite_tensors_to_segmentation_calculator",
     srcs = ["tflite_tensors_to_segmentation_calculator.cc"],
@@ -478,6 +498,9 @@ cc_test(
     deps = [
         ":tflite_inference_calculator",
         ":tflite_inference_calculator_cc_proto",
+        ":tflite_model_calculator",
+        "//mediapipe/calculators/core:constant_side_packet_calculator",
+        "//mediapipe/calculators/util:local_file_contents_calculator",
         "//mediapipe/framework:calculator_framework",
         "//mediapipe/framework:calculator_runner",
         "//mediapipe/framework/deps:file_path",
@@ -485,7 +508,9 @@ cc_test(
         "//mediapipe/framework/port:integral_types",
         "//mediapipe/framework/port:parse_text_proto",
         "//mediapipe/framework/tool:validate_type",
+        "@com_google_absl//absl/memory",
         "@com_google_absl//absl/strings",
+        "@com_google_absl//absl/types:optional",
         "@org_tensorflow//tensorflow/lite:framework",
         "@org_tensorflow//tensorflow/lite/kernels:builtin_ops",
     ],
@@ -511,3 +536,19 @@ cc_test(
         "@org_tensorflow//tensorflow/lite/kernels:builtin_ops",
     ],
 )
+
+cc_test(
+    name = "tflite_model_calculator_test",
+    srcs = ["tflite_model_calculator_test.cc"],
+    data = ["testdata/add.bin"],
+    deps = [
+        ":tflite_model_calculator",
+        "//mediapipe/calculators/core:constant_side_packet_calculator",
+        "//mediapipe/calculators/util:local_file_contents_calculator",
+        "//mediapipe/framework:calculator_framework",
+        "//mediapipe/framework:calculator_runner",
+        "//mediapipe/framework/port:gtest_main",
+        "//mediapipe/framework/port:parse_text_proto",
+        "@org_tensorflow//tensorflow/lite:framework",
+    ],
+)
diff --git a/mediapipe/calculators/tflite/tflite_inference_calculator.cc b/mediapipe/calculators/tflite/tflite_inference_calculator.cc
index 7634fe251..665bd89f7 100644
--- a/mediapipe/calculators/tflite/tflite_inference_calculator.cc
+++ b/mediapipe/calculators/tflite/tflite_inference_calculator.cc
@@ -17,10 +17,16 @@
 #include <string>
 #include <vector>
 
+#include "absl/memory/memory.h"
 #include "mediapipe/calculators/tflite/tflite_inference_calculator.pb.h"
 #include "mediapipe/calculators/tflite/util.h"
 #include "mediapipe/framework/calculator_framework.h"
 #include "mediapipe/framework/port/ret_check.h"
+
+#if !defined(__EMSCRIPTEN__)
+#include "mediapipe/util/cpu_util.h"
+#endif  // !__EMSCRIPTEN__
+
 #include "mediapipe/util/resource_util.h"
 #include "tensorflow/lite/error_reporter.h"
 #include "tensorflow/lite/interpreter.h"
@@ -50,7 +56,7 @@
 #include "tensorflow/lite/delegates/gpu/metal_delegate.h"
 #include "tensorflow/lite/delegates/gpu/metal_delegate_internal.h"
 #endif  // iOS
-
+#include "tensorflow/lite/delegates/xnnpack/xnnpack_delegate.h"
 #if defined(MEDIAPIPE_ANDROID)
 #include "tensorflow/lite/delegates/nnapi/nnapi_delegate.h"
 #endif  // ANDROID
@@ -113,6 +119,23 @@ struct GPUData {
 };
 #endif
 
+// Returns number of threads to configure XNNPACK delegate with.
+// (Equal to user provided value if specified.  Otherwise, it returns number of
+// high cores (hard-coded to 1 for __EMSCRIPTEN__))
+int GetXnnpackNumThreads(
+    const mediapipe::TfLiteInferenceCalculatorOptions& opts) {
+  static constexpr int kDefaultNumThreads = -1;
+  if (opts.has_delegate() && opts.delegate().has_xnnpack() &&
+      opts.delegate().xnnpack().num_threads() != kDefaultNumThreads) {
+    return opts.delegate().xnnpack().num_threads();
+  }
+#if !defined(__EMSCRIPTEN__)
+  return InferHigherCoreIds().size();
+#else
+  return 1;
+#endif  // !__EMSCRIPTEN__
+}
+
 // Calculator Header Section
 
 // Runs inference on the provided input TFLite tensors and TFLite model.
@@ -139,6 +162,9 @@ struct GPUData {
 // Input side packet:
 //  CUSTOM_OP_RESOLVER (optional) - Use a custom op resolver,
 //                                  instead of the builtin one.
+//  MODEL (optional) - Use to specify TfLite model
+//                     (std::unique_ptr<tflite::FlatBufferModel,
+//                       std::function<void(tflite::FlatBufferModel*)>>)
 //
 // Example use:
 // node {
@@ -153,6 +179,20 @@ struct GPUData {
 //   }
 // }
 //
+// or
+//
+// node {
+//   calculator: "TfLiteInferenceCalculator"
+//   input_stream: "TENSORS:tensor_image"
+//   input_side_packet: "MODEL:model"
+//   output_stream: "TENSORS:tensors"
+//   options: {
+//     [mediapipe.TfLiteInferenceCalculatorOptions.ext] {
+//       delegate { gpu {} }
+//     }
+//   }
+// }
+//
 // IMPORTANT Notes:
 //  Tensors are assumed to be ordered correctly (sequentially added to model).
 //  Input tensors are assumed to be of the correct size and already normalized.
@@ -165,6 +205,9 @@ class TfLiteInferenceCalculator : public CalculatorBase {
  public:
   using TfLiteDelegatePtr =
       std::unique_ptr<TfLiteDelegate, std::function<void(TfLiteDelegate*)>>;
+  using TfLiteModelPtr =
+      std::unique_ptr<tflite::FlatBufferModel,
+                      std::function<void(tflite::FlatBufferModel*)>>;
 
   static ::mediapipe::Status GetContract(CalculatorContract* cc);
 
@@ -173,12 +216,12 @@ class TfLiteInferenceCalculator : public CalculatorBase {
   ::mediapipe::Status Close(CalculatorContext* cc) override;
 
  private:
-  ::mediapipe::Status LoadOptions(CalculatorContext* cc);
   ::mediapipe::Status LoadModel(CalculatorContext* cc);
+  ::mediapipe::StatusOr<Packet> GetModelAsPacket(const CalculatorContext& cc);
   ::mediapipe::Status LoadDelegate(CalculatorContext* cc);
 
+  Packet model_packet_;
   std::unique_ptr<tflite::Interpreter> interpreter_;
-  std::unique_ptr<tflite::FlatBufferModel> model_;
   TfLiteDelegatePtr delegate_;
 
 #if !defined(MEDIAPIPE_DISABLE_GL_COMPUTE)
@@ -198,7 +241,6 @@ class TfLiteInferenceCalculator : public CalculatorBase {
       edgetpu::EdgeTpuManager::GetSingleton()->OpenDevice();
 #endif
 
-  std::string model_path_ = "";
   bool gpu_inference_ = false;
   bool gpu_input_ = false;
   bool gpu_output_ = false;
@@ -217,6 +259,10 @@ REGISTER_CALCULATOR(TfLiteInferenceCalculator);
 
   const auto& options =
       cc->Options<::mediapipe::TfLiteInferenceCalculatorOptions>();
+  RET_CHECK(!options.model_path().empty() ^
+            cc->InputSidePackets().HasTag("MODEL"))
+      << "Either model as side packet or model path in options is required.";
+
   bool use_gpu =
       options.has_delegate() ? options.delegate().has_gpu() : options.use_gpu();
 
@@ -249,6 +295,9 @@ REGISTER_CALCULATOR(TfLiteInferenceCalculator);
         .Tag("CUSTOM_OP_RESOLVER")
         .Set<tflite::ops::builtin::BuiltinOpResolver>();
   }
+  if (cc->InputSidePackets().HasTag("MODEL")) {
+    cc->InputSidePackets().Tag("MODEL").Set<TfLiteModelPtr>();
+  }
 
   if (use_gpu) {
 #if !defined(MEDIAPIPE_DISABLE_GL_COMPUTE)
@@ -267,7 +316,9 @@ REGISTER_CALCULATOR(TfLiteInferenceCalculator);
 ::mediapipe::Status TfLiteInferenceCalculator::Open(CalculatorContext* cc) {
   cc->SetOffset(TimestampDiff(0));
 
-  MP_RETURN_IF_ERROR(LoadOptions(cc));
+  const auto& options =
+      cc->Options<::mediapipe::TfLiteInferenceCalculatorOptions>();
+  gpu_inference_ = options.use_gpu();
 
   if (cc->Inputs().HasTag("TENSORS_GPU")) {
 #if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__EMSCRIPTEN__)
@@ -492,34 +543,10 @@ REGISTER_CALCULATOR(TfLiteInferenceCalculator);
 
 // Calculator Auxiliary Section
 
-::mediapipe::Status TfLiteInferenceCalculator::LoadOptions(
-    CalculatorContext* cc) {
-  // Get calculator options specified in the graph.
-  const auto& options =
-      cc->Options<::mediapipe::TfLiteInferenceCalculatorOptions>();
-
-  // Get model name.
-  if (!options.model_path().empty()) {
-    std::string model_path = options.model_path();
-
-    ASSIGN_OR_RETURN(model_path_, mediapipe::PathToResourceAsFile(model_path));
-  } else {
-    LOG(ERROR) << "Must specify path to TFLite model.";
-    return ::mediapipe::Status(::mediapipe::StatusCode::kNotFound,
-                               "Must specify path to TFLite model.");
-  }
-
-  // Get execution modes.
-  gpu_inference_ =
-      options.has_delegate() ? options.delegate().has_gpu() : options.use_gpu();
-
-  return ::mediapipe::OkStatus();
-}
-
 ::mediapipe::Status TfLiteInferenceCalculator::LoadModel(
     CalculatorContext* cc) {
-  model_ = tflite::FlatBufferModel::BuildFromFile(model_path_.c_str());
-  RET_CHECK(model_);
+  ASSIGN_OR_RETURN(model_packet_, GetModelAsPacket(*cc));
+  const auto& model = *model_packet_.Get<TfLiteModelPtr>();
 
   tflite::ops::builtin::BuiltinOpResolver op_resolver;
   if (cc->InputSidePackets().HasTag("CUSTOM_OP_RESOLVER")) {
@@ -529,9 +556,9 @@ REGISTER_CALCULATOR(TfLiteInferenceCalculator);
   }
 #if defined(MEDIAPIPE_EDGE_TPU)
   interpreter_ =
-      BuildEdgeTpuInterpreter(*model_, &op_resolver, edgetpu_context_.get());
+      BuildEdgeTpuInterpreter(model, &op_resolver, edgetpu_context_.get());
 #else
-  tflite::InterpreterBuilder(*model_, op_resolver)(&interpreter_);
+  tflite::InterpreterBuilder(model, op_resolver)(&interpreter_);
 #endif  // MEDIAPIPE_EDGE_TPU
 
   RET_CHECK(interpreter_);
@@ -557,6 +584,28 @@ REGISTER_CALCULATOR(TfLiteInferenceCalculator);
   return ::mediapipe::OkStatus();
 }
 
+::mediapipe::StatusOr<Packet> TfLiteInferenceCalculator::GetModelAsPacket(
+    const CalculatorContext& cc) {
+  const auto& options =
+      cc.Options<mediapipe::TfLiteInferenceCalculatorOptions>();
+  if (!options.model_path().empty()) {
+    std::string model_path = options.model_path();
+
+    ASSIGN_OR_RETURN(model_path, mediapipe::PathToResourceAsFile(model_path));
+
+    auto model = tflite::FlatBufferModel::BuildFromFile(model_path.c_str());
+    RET_CHECK(model) << "Failed to load model from path.";
+    return MakePacket<TfLiteModelPtr>(TfLiteModelPtr(
+        model.release(), [](tflite::FlatBufferModel* model) { delete model; }));
+  }
+  if (cc.InputSidePackets().HasTag("MODEL")) {
+    return cc.InputSidePackets().Tag("MODEL");
+  }
+  return ::mediapipe::Status(
+      ::mediapipe::StatusCode::kNotFound,
+      "Must specify TFLite model as path or loaded model.");
+}
+
 ::mediapipe::Status TfLiteInferenceCalculator::LoadDelegate(
     CalculatorContext* cc) {
   const auto& calculator_opts =
@@ -587,6 +636,22 @@ REGISTER_CALCULATOR(TfLiteInferenceCalculator);
     }
 #endif  // MEDIAPIPE_ANDROID
 
+#if defined(__EMSCRIPTEN__)
+    const bool xnnpack_requested = true;
+#else
+    const bool xnnpack_requested = calculator_opts.has_delegate() &&
+                                   calculator_opts.delegate().has_xnnpack();
+#endif  // __EMSCRIPTEN__
+
+    if (xnnpack_requested) {
+      TfLiteXNNPackDelegateOptions xnnpack_opts{};
+      xnnpack_opts.num_threads = GetXnnpackNumThreads(calculator_opts);
+      delegate_ = TfLiteDelegatePtr(TfLiteXNNPackDelegateCreate(&xnnpack_opts),
+                                    &TfLiteXNNPackDelegateDelete);
+      RET_CHECK_EQ(interpreter_->ModifyGraphWithDelegate(delegate_.get()),
+                   kTfLiteOk);
+    }
+
     // Return, no need for GPU delegate below.
     return ::mediapipe::OkStatus();
   }
diff --git a/mediapipe/calculators/tflite/tflite_inference_calculator.proto b/mediapipe/calculators/tflite/tflite_inference_calculator.proto
index 893574b6a..a764e89fd 100644
--- a/mediapipe/calculators/tflite/tflite_inference_calculator.proto
+++ b/mediapipe/calculators/tflite/tflite_inference_calculator.proto
@@ -45,11 +45,17 @@ message TfLiteInferenceCalculatorOptions {
     message Gpu {}
     // Android only.
     message Nnapi {}
+    message Xnnpack {
+      // Number of threads for XNNPACK delegate. (By default, calculator tries
+      // to choose optimal number of threads depending on the device.)
+      optional int32 num_threads = 1 [default = -1];
+    }
 
     oneof delegate {
       TfLite tflite = 1;
       Gpu gpu = 2;
       Nnapi nnapi = 3;
+      Xnnpack xnnpack = 4;
     }
   }
 
diff --git a/mediapipe/calculators/tflite/tflite_inference_calculator_test.cc b/mediapipe/calculators/tflite/tflite_inference_calculator_test.cc
index 9529a8ecb..c3df07191 100644
--- a/mediapipe/calculators/tflite/tflite_inference_calculator_test.cc
+++ b/mediapipe/calculators/tflite/tflite_inference_calculator_test.cc
@@ -41,7 +41,7 @@ namespace mediapipe {
 
 using ::tflite::Interpreter;
 
-void DoSmokeTest(absl::string_view delegate) {
+void DoSmokeTest(const std::string& graph_proto) {
   const int width = 8;
   const int height = 8;
   const int channels = 3;
@@ -69,24 +69,9 @@ void DoSmokeTest(absl::string_view delegate) {
   auto input_vec = absl::make_unique<std::vector<TfLiteTensor>>();
   input_vec->emplace_back(*tensor);
 
-  std::string graph_proto = R"(
-    input_stream: "tensor_in"
-    node {
-      calculator: "TfLiteInferenceCalculator"
-      input_stream: "TENSORS:tensor_in"
-      output_stream: "TENSORS:tensor_out"
-      options {
-        [mediapipe.TfLiteInferenceCalculatorOptions.ext] {
-          model_path: "mediapipe/calculators/tflite/testdata/add.bin"
-          $delegate
-        }
-      }
-    }
-  )";
-  ASSERT_EQ(absl::StrReplaceAll({{"$delegate", delegate}}, &graph_proto), 1);
   // Prepare single calculator graph to and wait for packets.
   CalculatorGraphConfig graph_config =
-      ::mediapipe::ParseTextProtoOrDie<CalculatorGraphConfig>(graph_proto);
+      ParseTextProtoOrDie<CalculatorGraphConfig>(graph_proto);
   std::vector<Packet> output_packets;
   tool::AddVectorSink("tensor_out", &graph_config, &output_packets);
   CalculatorGraph graph(graph_config);
@@ -119,8 +104,70 @@ void DoSmokeTest(absl::string_view delegate) {
 
 // Tests a simple add model that adds an input tensor to itself.
 TEST(TfLiteInferenceCalculatorTest, SmokeTest) {
-  DoSmokeTest(/*delegate=*/"");
-  DoSmokeTest(/*delegate=*/"delegate { tflite {} }");
+  std::string graph_proto = R"(
+    input_stream: "tensor_in"
+    node {
+      calculator: "TfLiteInferenceCalculator"
+      input_stream: "TENSORS:tensor_in"
+      output_stream: "TENSORS:tensor_out"
+      options {
+        [mediapipe.TfLiteInferenceCalculatorOptions.ext] {
+          model_path: "mediapipe/calculators/tflite/testdata/add.bin"
+          $delegate
+        }
+      }
+    }
+  )";
+  DoSmokeTest(
+      /*graph_proto=*/absl::StrReplaceAll(graph_proto, {{"$delegate", ""}}));
+  DoSmokeTest(/*graph_proto=*/absl::StrReplaceAll(
+      graph_proto, {{"$delegate", "delegate { tflite {} }"}}));
+  DoSmokeTest(/*graph_proto=*/absl::StrReplaceAll(
+      graph_proto, {{"$delegate", "delegate { xnnpack {} }"}}));
+  DoSmokeTest(/*graph_proto=*/absl::StrReplaceAll(
+      graph_proto,
+      {{"$delegate", "delegate { xnnpack { num_threads: 10 } }"}}));
+}
+
+TEST(TfLiteInferenceCalculatorTest, SmokeTest_ModelAsInputSidePacket) {
+  std::string graph_proto = R"(
+    input_stream: "tensor_in"
+
+    node {
+      calculator: "ConstantSidePacketCalculator"
+      output_side_packet: "PACKET:model_path"
+      options: {
+        [mediapipe.ConstantSidePacketCalculatorOptions.ext]: {
+          packet { string_value: "mediapipe/calculators/tflite/testdata/add.bin" }
+        }
+      }
+    }
+
+    node {
+      calculator: "LocalFileContentsCalculator"
+      input_side_packet: "FILE_PATH:model_path"
+      output_side_packet: "CONTENTS:model_blob"
+    }
+
+    node {
+      calculator: "TfLiteModelCalculator"
+      input_side_packet: "MODEL_BLOB:model_blob"
+      output_side_packet: "MODEL:model"
+    }
+
+    node {
+      calculator: "TfLiteInferenceCalculator"
+      input_stream: "TENSORS:tensor_in"
+      output_stream: "TENSORS:tensor_out"
+      input_side_packet: "MODEL:model"
+      options {
+        [mediapipe.TfLiteInferenceCalculatorOptions.ext] {
+          use_gpu: false
+        }
+      }
+    }
+  )";
+  DoSmokeTest(graph_proto);
 }
 
 }  // namespace mediapipe
diff --git a/mediapipe/calculators/tflite/tflite_model_calculator.cc b/mediapipe/calculators/tflite/tflite_model_calculator.cc
new file mode 100644
index 000000000..d24c55b14
--- /dev/null
+++ b/mediapipe/calculators/tflite/tflite_model_calculator.cc
@@ -0,0 +1,86 @@
+// Copyright 2020 The MediaPipe Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <functional>
+#include <memory>
+#include <string>
+
+#include "mediapipe/framework/calculator_framework.h"
+#include "mediapipe/framework/packet.h"
+#include "mediapipe/framework/port/ret_check.h"
+#include "tensorflow/lite/model.h"
+
+namespace mediapipe {
+
+// Loads TfLite model from model blob specified as input side packet and outputs
+// corresponding side packet.
+//
+// Input side packets:
+//   MODEL_BLOB - TfLite model blob/file-contents (std::string). You can read
+//                model blob from file (using whatever APIs you have) and pass
+//                it to the graph as input side packet or you can use some of
+//                calculators like LocalFileContentsCalculator to get model
+//                blob and use it as input here.
+//
+// Output side packets:
+//   MODEL - TfLite model. (std::unique_ptr<tflite::FlatBufferModel,
+//           std::function<void(tflite::FlatBufferModel*)>>)
+//
+// Example use:
+//
+// node {
+//   calculator: "TfLiteModelCalculator"
+//   input_side_packet: "MODEL_BLOB:model_blob"
+//   output_side_packet: "MODEL:model"
+// }
+//
+class TfLiteModelCalculator : public CalculatorBase {
+ public:
+  using TfLiteModelPtr =
+      std::unique_ptr<tflite::FlatBufferModel,
+                      std::function<void(tflite::FlatBufferModel*)>>;
+
+  static ::mediapipe::Status GetContract(CalculatorContract* cc) {
+    cc->InputSidePackets().Tag("MODEL_BLOB").Set<std::string>();
+    cc->OutputSidePackets().Tag("MODEL").Set<TfLiteModelPtr>();
+    return ::mediapipe::OkStatus();
+  }
+
+  ::mediapipe::Status Open(CalculatorContext* cc) override {
+    const Packet& model_packet = cc->InputSidePackets().Tag("MODEL_BLOB");
+    const std::string& model_blob = model_packet.Get<std::string>();
+    std::unique_ptr<tflite::FlatBufferModel> model =
+        tflite::FlatBufferModel::BuildFromBuffer(model_blob.data(),
+                                                 model_blob.size());
+    RET_CHECK(model) << "Failed to load TfLite model from blob.";
+
+    cc->OutputSidePackets().Tag("MODEL").Set(
+        MakePacket<TfLiteModelPtr>(TfLiteModelPtr(
+            model.release(), [model_packet](tflite::FlatBufferModel* model) {
+              // Keeping model_packet in order to keep underlying model blob
+              // which can be released only after TfLite model is not needed
+              // anymore (deleted).
+              delete model;
+            })));
+
+    return ::mediapipe::OkStatus();
+  }
+
+  ::mediapipe::Status Process(CalculatorContext* cc) override {
+    return ::mediapipe::OkStatus();
+  }
+};
+REGISTER_CALCULATOR(TfLiteModelCalculator);
+
+}  // namespace mediapipe
diff --git a/mediapipe/calculators/tflite/tflite_model_calculator_test.cc b/mediapipe/calculators/tflite/tflite_model_calculator_test.cc
new file mode 100644
index 000000000..fed3743a5
--- /dev/null
+++ b/mediapipe/calculators/tflite/tflite_model_calculator_test.cc
@@ -0,0 +1,88 @@
+// Copyright 2020 The MediaPipe Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <memory>
+
+#include "mediapipe/framework/calculator_framework.h"
+#include "mediapipe/framework/calculator_runner.h"
+#include "mediapipe/framework/port/gmock.h"
+#include "mediapipe/framework/port/gtest.h"
+#include "mediapipe/framework/port/parse_text_proto.h"
+#include "mediapipe/framework/port/status_matchers.h"  // NOLINT
+#include "tensorflow/lite/model.h"
+
+namespace mediapipe {
+
+TEST(TfLiteModelCalculatorTest, SmokeTest) {
+  // Prepare single calculator graph to and wait for packets.
+  CalculatorGraphConfig graph_config = ParseTextProtoOrDie<
+      CalculatorGraphConfig>(
+      R"(
+        node {
+          calculator: "ConstantSidePacketCalculator"
+          output_side_packet: "PACKET:model_path"
+          options: {
+            [mediapipe.ConstantSidePacketCalculatorOptions.ext]: {
+              packet {
+                string_value: "mediapipe/calculators/tflite/testdata/add.bin"
+              }
+            }
+          }
+        }
+
+        node {
+          calculator: "LocalFileContentsCalculator"
+          input_side_packet: "FILE_PATH:model_path"
+          output_side_packet: "CONTENTS:model_blob"
+        }
+
+        node {
+          calculator: "TfLiteModelCalculator"
+          input_side_packet: "MODEL_BLOB:model_blob"
+          output_side_packet: "MODEL:model"
+        }
+      )");
+  CalculatorGraph graph(graph_config);
+  MP_ASSERT_OK(graph.StartRun({}));
+  MP_ASSERT_OK(graph.WaitUntilIdle());
+  auto status_or_packet = graph.GetOutputSidePacket("model");
+  MP_ASSERT_OK(status_or_packet);
+  auto model_packet = status_or_packet.ValueOrDie();
+  const auto& model = model_packet.Get<
+      std::unique_ptr<tflite::FlatBufferModel,
+                      std::function<void(tflite::FlatBufferModel*)>>>();
+
+  auto expected_model = tflite::FlatBufferModel::BuildFromFile(
+      "mediapipe/calculators/tflite/testdata/add.bin");
+
+  EXPECT_EQ(model->GetModel()->version(),
+            expected_model->GetModel()->version());
+  EXPECT_EQ(model->GetModel()->buffers()->size(),
+            expected_model->GetModel()->buffers()->size());
+  const int num_subgraphs = expected_model->GetModel()->subgraphs()->size();
+  EXPECT_EQ(model->GetModel()->subgraphs()->size(), num_subgraphs);
+  for (int i = 0; i < num_subgraphs; ++i) {
+    const auto* expected_subgraph =
+        expected_model->GetModel()->subgraphs()->Get(i);
+    const auto* subgraph = model->GetModel()->subgraphs()->Get(i);
+    const int num_tensors = expected_subgraph->tensors()->size();
+    EXPECT_EQ(subgraph->tensors()->size(), num_tensors);
+    for (int j = 0; j < num_tensors; ++j) {
+      EXPECT_EQ(subgraph->tensors()->Get(j)->name()->str(),
+                expected_subgraph->tensors()->Get(j)->name()->str());
+    }
+  }
+}
+
+}  // namespace mediapipe
diff --git a/mediapipe/calculators/tflite/tflite_tensors_to_classification_calculator.cc b/mediapipe/calculators/tflite/tflite_tensors_to_classification_calculator.cc
index 6e1c6e1e6..e9c09169b 100644
--- a/mediapipe/calculators/tflite/tflite_tensors_to_classification_calculator.cc
+++ b/mediapipe/calculators/tflite/tflite_tensors_to_classification_calculator.cc
@@ -129,22 +129,43 @@ REGISTER_CALCULATOR(TfLiteTensorsToClassificationCalculator);
     num_classes *= raw_score_tensor->dims->data[i];
   }
 
+  if (options_.binary_classification()) {
+    RET_CHECK_EQ(num_classes, 1);
+    // Number of classes for binary classification.
+    num_classes = 2;
+  }
   if (label_map_loaded_) {
     RET_CHECK_EQ(num_classes, label_map_.size());
   }
   const float* raw_scores = raw_score_tensor->data.f;
 
   auto classification_list = absl::make_unique<ClassificationList>();
-  for (int i = 0; i < num_classes; ++i) {
-    if (options_.has_min_score_threshold() &&
-        raw_scores[i] < options_.min_score_threshold()) {
-      continue;
-    }
-    Classification* classification = classification_list->add_classification();
-    classification->set_index(i);
-    classification->set_score(raw_scores[i]);
+  if (options_.binary_classification()) {
+    Classification* class_first = classification_list->add_classification();
+    Classification* class_second = classification_list->add_classification();
+    class_first->set_index(0);
+    class_second->set_index(1);
+    class_first->set_score(raw_scores[0]);
+    class_second->set_score(1. - raw_scores[0]);
+
     if (label_map_loaded_) {
-      classification->set_label(label_map_[i]);
+      class_first->set_label(label_map_[0]);
+      class_second->set_label(label_map_[1]);
+    }
+  } else {
+    for (int i = 0; i < num_classes; ++i) {
+      if (options_.has_min_score_threshold() &&
+          raw_scores[i] < options_.min_score_threshold()) {
+        continue;
+      }
+      Classification* classification =
+          classification_list->add_classification();
+      classification->set_index(i);
+      classification->set_score(raw_scores[i]);
+
+      if (label_map_loaded_) {
+        classification->set_label(label_map_[i]);
+      }
     }
   }
 
diff --git a/mediapipe/calculators/tflite/tflite_tensors_to_classification_calculator.proto b/mediapipe/calculators/tflite/tflite_tensors_to_classification_calculator.proto
index a2b5dd224..c6c9d915d 100644
--- a/mediapipe/calculators/tflite/tflite_tensors_to_classification_calculator.proto
+++ b/mediapipe/calculators/tflite/tflite_tensors_to_classification_calculator.proto
@@ -32,4 +32,10 @@ message TfLiteTensorsToClassificationCalculatorOptions {
   optional int32 top_k = 2;
   // Path to a label map file for getting the actual name of class ids.
   optional string label_map_path = 3;
+  // Whether the input is a single float for binary classification.
+  // When true, only a single float is expected in the input tensor and the
+  // label map, if provided, is expected to have exactly two labels.
+  // The single score(float) represent the probability of first label, and
+  // 1 - score is the probabilility of the second label.
+  optional bool binary_classification = 4;
 }
diff --git a/mediapipe/calculators/util/BUILD b/mediapipe/calculators/util/BUILD
index b5df9ff04..3d2210ca4 100644
--- a/mediapipe/calculators/util/BUILD
+++ b/mediapipe/calculators/util/BUILD
@@ -998,6 +998,7 @@ cc_library(
     visibility = ["//visibility:public"],
     deps = [
         "//mediapipe/framework:calculator_framework",
+        "//mediapipe/framework/formats:classification_cc_proto",
         "//mediapipe/framework/formats:landmark_cc_proto",
         "//mediapipe/framework/formats:rect_cc_proto",
         "//mediapipe/framework/port:ret_check",
@@ -1015,6 +1016,7 @@ cc_library(
     deps = [
         ":collection_has_min_size_calculator_cc_proto",
         "//mediapipe/framework:calculator_framework",
+        "//mediapipe/framework/formats:landmark_cc_proto",
         "//mediapipe/framework/formats:rect_cc_proto",
         "//mediapipe/framework/port:ret_check",
         "//mediapipe/framework/port:status",
@@ -1022,6 +1024,18 @@ cc_library(
     alwayslink = 1,
 )
 
+cc_test(
+    name = "collection_has_min_size_calculator_test",
+    srcs = ["collection_has_min_size_calculator_test.cc"],
+    deps = [
+        ":collection_has_min_size_calculator",
+        "//mediapipe/framework:calculator_framework",
+        "//mediapipe/framework:calculator_runner",
+        "//mediapipe/framework/port:gtest_main",
+        "//mediapipe/framework/port:parse_text_proto",
+    ],
+)
+
 cc_library(
     name = "association_calculator",
     hdrs = ["association_calculator.h"],
diff --git a/mediapipe/calculators/util/collection_has_min_size_calculator.cc b/mediapipe/calculators/util/collection_has_min_size_calculator.cc
index 5ff43c605..22bfb9c4c 100644
--- a/mediapipe/calculators/util/collection_has_min_size_calculator.cc
+++ b/mediapipe/calculators/util/collection_has_min_size_calculator.cc
@@ -15,6 +15,9 @@
 
 #include "mediapipe/calculators/util/collection_has_min_size_calculator.h"
 
+#include <vector>
+
+#include "mediapipe/framework/formats/landmark.pb.h"
 #include "mediapipe/framework/formats/rect.pb.h"
 
 namespace mediapipe {
@@ -23,4 +26,9 @@ typedef CollectionHasMinSizeCalculator<std::vector<::mediapipe::NormalizedRect>>
     NormalizedRectVectorHasMinSizeCalculator;
 REGISTER_CALCULATOR(NormalizedRectVectorHasMinSizeCalculator);
 
+typedef CollectionHasMinSizeCalculator<
+    std::vector<::mediapipe::NormalizedLandmarkList>>
+    NormalizedLandmarkListVectorHasMinSizeCalculator;
+REGISTER_CALCULATOR(NormalizedLandmarkListVectorHasMinSizeCalculator);
+
 }  // namespace mediapipe
diff --git a/mediapipe/calculators/util/collection_has_min_size_calculator_test.cc b/mediapipe/calculators/util/collection_has_min_size_calculator_test.cc
new file mode 100644
index 000000000..be3dc41e6
--- /dev/null
+++ b/mediapipe/calculators/util/collection_has_min_size_calculator_test.cc
@@ -0,0 +1,156 @@
+// Copyright 2020 The MediaPipe Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "mediapipe/calculators/util/collection_has_min_size_calculator.h"
+
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "mediapipe/framework/calculator_framework.h"
+#include "mediapipe/framework/calculator_runner.h"
+#include "mediapipe/framework/port/gmock.h"
+#include "mediapipe/framework/port/gtest.h"
+#include "mediapipe/framework/port/parse_text_proto.h"
+#include "mediapipe/framework/port/status_matchers.h"  // NOLINT
+
+namespace mediapipe {
+
+typedef CollectionHasMinSizeCalculator<std::vector<int>>
+    TestIntCollectionHasMinSizeCalculator;
+REGISTER_CALCULATOR(TestIntCollectionHasMinSizeCalculator);
+
+void AddInputVector(const std::vector<int>& input, int64 timestamp,
+                    CalculatorRunner* runner) {
+  runner->MutableInputs()
+      ->Tag("ITERABLE")
+      .packets.push_back(
+          MakePacket<std::vector<int>>(input).At(Timestamp(timestamp)));
+}
+
+TEST(TestIntCollectionHasMinSizeCalculator, DoesHaveMinSize) {
+  CalculatorGraphConfig::Node node_config =
+      ParseTextProtoOrDie<CalculatorGraphConfig::Node>(R"(
+        calculator: "TestIntCollectionHasMinSizeCalculator"
+        input_stream: "ITERABLE:input_vector"
+        output_stream: "output_vector"
+        options {
+          [mediapipe.CollectionHasMinSizeCalculatorOptions.ext] { min_size: 2 }
+        }
+      )");
+  CalculatorRunner runner(node_config);
+  const std::vector<Packet>& outputs = runner.Outputs().Index(0).packets;
+
+  AddInputVector({1, 2}, /*timestamp=*/1, &runner);
+  MP_ASSERT_OK(runner.Run());
+
+  EXPECT_EQ(1, outputs.size());
+  EXPECT_EQ(Timestamp(1), outputs[0].Timestamp());
+  EXPECT_TRUE(outputs[0].Get<bool>());
+
+  AddInputVector({1, 2, 3}, /*timestamp=*/2, &runner);
+  MP_ASSERT_OK(runner.Run());
+
+  EXPECT_EQ(2, outputs.size());
+  EXPECT_EQ(Timestamp(2), outputs[1].Timestamp());
+  EXPECT_TRUE(outputs[1].Get<bool>());
+}
+
+TEST(TestIntCollectionHasMinSizeCalculator,
+     DoesHaveMinSize_MinSizeAsSidePacket) {
+  CalculatorGraphConfig::Node node_config =
+      ParseTextProtoOrDie<CalculatorGraphConfig::Node>(R"(
+        calculator: "TestIntCollectionHasMinSizeCalculator"
+        input_stream: "ITERABLE:input_vector"
+        input_side_packet: "min_size"
+        output_stream: "output_vector"
+      )");
+  CalculatorRunner runner(node_config);
+  const std::vector<Packet>& outputs = runner.Outputs().Index(0).packets;
+
+  runner.MutableSidePackets()->Index(0) = MakePacket<int>(2);
+
+  AddInputVector({1, 2}, /*timestamp=*/1, &runner);
+  MP_ASSERT_OK(runner.Run());
+
+  EXPECT_EQ(1, outputs.size());
+  EXPECT_EQ(Timestamp(1), outputs[0].Timestamp());
+  EXPECT_TRUE(outputs[0].Get<bool>());
+
+  AddInputVector({1, 2, 3}, /*timestamp=*/2, &runner);
+  MP_ASSERT_OK(runner.Run());
+
+  EXPECT_EQ(2, outputs.size());
+  EXPECT_EQ(Timestamp(2), outputs[1].Timestamp());
+  EXPECT_TRUE(outputs[1].Get<bool>());
+}
+
+TEST(TestIntCollectionHasMinSizeCalculator, DoesNotHaveMinSize) {
+  CalculatorGraphConfig::Node node_config =
+      ParseTextProtoOrDie<CalculatorGraphConfig::Node>(R"(
+        calculator: "TestIntCollectionHasMinSizeCalculator"
+        input_stream: "ITERABLE:input_vector"
+        output_stream: "output_vector"
+        options {
+          [mediapipe.CollectionHasMinSizeCalculatorOptions.ext] { min_size: 3 }
+        }
+      )");
+  CalculatorRunner runner(node_config);
+  const std::vector<Packet>& outputs = runner.Outputs().Index(0).packets;
+
+  AddInputVector({1}, /*timestamp=*/1, &runner);
+  MP_ASSERT_OK(runner.Run());
+
+  EXPECT_EQ(1, outputs.size());
+  EXPECT_EQ(Timestamp(1), outputs[0].Timestamp());
+  EXPECT_FALSE(outputs[0].Get<bool>());
+
+  AddInputVector({1, 2}, /*timestamp=*/2, &runner);
+  MP_ASSERT_OK(runner.Run());
+
+  EXPECT_EQ(2, outputs.size());
+  EXPECT_EQ(Timestamp(2), outputs[1].Timestamp());
+  EXPECT_FALSE(outputs[1].Get<bool>());
+}
+
+TEST(TestIntCollectionHasMinSizeCalculator,
+     DoesNotHaveMinSize_MinSizeAsSidePacket) {
+  CalculatorGraphConfig::Node node_config =
+      ParseTextProtoOrDie<CalculatorGraphConfig::Node>(R"(
+        calculator: "TestIntCollectionHasMinSizeCalculator"
+        input_stream: "ITERABLE:input_vector"
+        input_side_packet: "min_size"
+        output_stream: "output_vector"
+      )");
+  CalculatorRunner runner(node_config);
+  const std::vector<Packet>& outputs = runner.Outputs().Index(0).packets;
+
+  runner.MutableSidePackets()->Index(0) = MakePacket<int>(3);
+
+  AddInputVector({1}, /*timestamp=*/1, &runner);
+  MP_ASSERT_OK(runner.Run());
+
+  EXPECT_EQ(1, outputs.size());
+  EXPECT_EQ(Timestamp(1), outputs[0].Timestamp());
+  EXPECT_FALSE(outputs[0].Get<bool>());
+
+  AddInputVector({1, 2}, /*timestamp=*/2, &runner);
+  MP_ASSERT_OK(runner.Run());
+
+  EXPECT_EQ(2, outputs.size());
+  EXPECT_EQ(Timestamp(2), outputs[1].Timestamp());
+  EXPECT_FALSE(outputs[1].Get<bool>());
+}
+
+}  // namespace mediapipe
diff --git a/mediapipe/calculators/util/filter_collection_calculator.cc b/mediapipe/calculators/util/filter_collection_calculator.cc
index e110afe7d..356b03dd6 100644
--- a/mediapipe/calculators/util/filter_collection_calculator.cc
+++ b/mediapipe/calculators/util/filter_collection_calculator.cc
@@ -17,6 +17,7 @@
 
 #include <vector>
 
+#include "mediapipe/framework/formats/classification.pb.h"
 #include "mediapipe/framework/formats/landmark.pb.h"
 #include "mediapipe/framework/formats/rect.pb.h"
 
@@ -31,4 +32,8 @@ typedef FilterCollectionCalculator<
     FilterLandmarkListCollectionCalculator;
 REGISTER_CALCULATOR(FilterLandmarkListCollectionCalculator);
 
+typedef FilterCollectionCalculator<std::vector<::mediapipe::ClassificationList>>
+    FilterClassificationListCollectionCalculator;
+REGISTER_CALCULATOR(FilterClassificationListCollectionCalculator);
+
 }  // namespace mediapipe
diff --git a/mediapipe/calculators/util/landmarks_to_render_data_calculator.cc b/mediapipe/calculators/util/landmarks_to_render_data_calculator.cc
index c2b318a3d..d83df435d 100644
--- a/mediapipe/calculators/util/landmarks_to_render_data_calculator.cc
+++ b/mediapipe/calculators/util/landmarks_to_render_data_calculator.cc
@@ -29,6 +29,7 @@ namespace {
 
 constexpr char kLandmarksTag[] = "LANDMARKS";
 constexpr char kNormLandmarksTag[] = "NORM_LANDMARKS";
+constexpr char kRenderScaleTag[] = "RENDER_SCALE";
 constexpr char kRenderDataTag[] = "RENDER_DATA";
 constexpr char kLandmarkLabel[] = "KEYPOINT";
 constexpr int kMaxLandmarkThickness = 18;
@@ -71,6 +72,83 @@ void SetColorSizeValueFromZ(float z, float z_min, float z_max,
   render_annotation->set_thickness(thickness);
 }
 
+template <class LandmarkType>
+void AddConnectionToRenderData(const LandmarkType& start,
+                               const LandmarkType& end, int gray_val1,
+                               int gray_val2, float thickness, bool normalized,
+                               RenderData* render_data) {
+  auto* connection_annotation = render_data->add_render_annotations();
+  RenderAnnotation::GradientLine* line =
+      connection_annotation->mutable_gradient_line();
+  line->set_x_start(start.x());
+  line->set_y_start(start.y());
+  line->set_x_end(end.x());
+  line->set_y_end(end.y());
+  line->set_normalized(normalized);
+  line->mutable_color1()->set_r(gray_val1);
+  line->mutable_color1()->set_g(gray_val1);
+  line->mutable_color1()->set_b(gray_val1);
+  line->mutable_color2()->set_r(gray_val2);
+  line->mutable_color2()->set_g(gray_val2);
+  line->mutable_color2()->set_b(gray_val2);
+  connection_annotation->set_thickness(thickness);
+}
+
+template <class LandmarkListType, class LandmarkType>
+void AddConnectionsWithDepth(const LandmarkListType& landmarks,
+                             const std::vector<int>& landmark_connections,
+                             float thickness, bool normalized, float min_z,
+                             float max_z, RenderData* render_data) {
+  for (int i = 0; i < landmark_connections.size(); i += 2) {
+    const auto& ld0 = landmarks.landmark(landmark_connections[i]);
+    const auto& ld1 = landmarks.landmark(landmark_connections[i + 1]);
+    const int gray_val1 =
+        255 - static_cast<int>(Remap(ld0.z(), min_z, max_z, 255));
+    const int gray_val2 =
+        255 - static_cast<int>(Remap(ld1.z(), min_z, max_z, 255));
+    AddConnectionToRenderData<LandmarkType>(ld0, ld1, gray_val1, gray_val2,
+                                            thickness, normalized, render_data);
+  }
+}
+
+template <class LandmarkType>
+void AddConnectionToRenderData(const LandmarkType& start,
+                               const LandmarkType& end,
+                               const Color& connection_color, float thickness,
+                               bool normalized, RenderData* render_data) {
+  auto* connection_annotation = render_data->add_render_annotations();
+  RenderAnnotation::Line* line = connection_annotation->mutable_line();
+  line->set_x_start(start.x());
+  line->set_y_start(start.y());
+  line->set_x_end(end.x());
+  line->set_y_end(end.y());
+  line->set_normalized(normalized);
+  SetColor(connection_annotation, connection_color);
+  connection_annotation->set_thickness(thickness);
+}
+
+template <class LandmarkListType, class LandmarkType>
+void AddConnections(const LandmarkListType& landmarks,
+                    const std::vector<int>& landmark_connections,
+                    const Color& connection_color, float thickness,
+                    bool normalized, RenderData* render_data) {
+  for (int i = 0; i < landmark_connections.size(); i += 2) {
+    const auto& ld0 = landmarks.landmark(landmark_connections[i]);
+    const auto& ld1 = landmarks.landmark(landmark_connections[i + 1]);
+    AddConnectionToRenderData<LandmarkType>(ld0, ld1, connection_color,
+                                            thickness, normalized, render_data);
+  }
+}
+
+RenderAnnotation* AddPointRenderData(const Color& landmark_color,
+                                     float thickness, RenderData* render_data) {
+  auto* landmark_data_annotation = render_data->add_render_annotations();
+  landmark_data_annotation->set_scene_tag(kLandmarkLabel);
+  SetColor(landmark_data_annotation, landmark_color);
+  landmark_data_annotation->set_thickness(thickness);
+  return landmark_data_annotation;
+}
+
 }  // namespace
 
 // A calculator that converts Landmark proto to RenderData proto for
@@ -107,29 +185,6 @@ class LandmarksToRenderDataCalculator : public CalculatorBase {
   ::mediapipe::Status Process(CalculatorContext* cc) override;
 
  private:
-  static void AddConnectionToRenderData(
-      float start_x, float start_y, float end_x, float end_y,
-      const LandmarksToRenderDataCalculatorOptions& options, bool normalized,
-      RenderData* render_data);
-  static void SetRenderAnnotationColorThickness(
-      const LandmarksToRenderDataCalculatorOptions& options,
-      RenderAnnotation* render_annotation);
-  static RenderAnnotation* AddPointRenderData(
-      const LandmarksToRenderDataCalculatorOptions& options,
-      RenderData* render_data);
-  static void AddConnectionToRenderData(
-      float start_x, float start_y, float end_x, float end_y,
-      const LandmarksToRenderDataCalculatorOptions& options, bool normalized,
-      int gray_val1, int gray_val2, RenderData* render_data);
-
-  template <class LandmarkListType>
-  void AddConnections(const LandmarkListType& landmarks, bool normalized,
-                      RenderData* render_data);
-  template <class LandmarkListType>
-  void AddConnectionsWithDepth(const LandmarkListType& landmarks,
-                               bool normalized, float min_z, float max_z,
-                               RenderData* render_data);
-
   LandmarksToRenderDataCalculatorOptions options_;
 };
 REGISTER_CALCULATOR(LandmarksToRenderDataCalculator);
@@ -150,6 +205,9 @@ REGISTER_CALCULATOR(LandmarksToRenderDataCalculator);
   if (cc->Inputs().HasTag(kNormLandmarksTag)) {
     cc->Inputs().Tag(kNormLandmarksTag).Set<NormalizedLandmarkList>();
   }
+  if (cc->Inputs().HasTag(kRenderScaleTag)) {
+    cc->Inputs().Tag(kRenderScaleTag).Set<float>();
+  }
   cc->Outputs().Tag(kRenderDataTag).Set<RenderData>();
   return ::mediapipe::OkStatus();
 }
@@ -169,11 +227,26 @@ REGISTER_CALCULATOR(LandmarksToRenderDataCalculator);
   float z_min = 0.f;
   float z_max = 0.f;
 
+  // Apply scale to `thickness` of rendered landmarks and connections to make
+  // them bigger when object (e.g. pose, hand or face) is closer/bigger and
+  // snaller when object is further/smaller.
+  float thickness = options_.thickness();
+  if (cc->Inputs().HasTag(kRenderScaleTag)) {
+    const float render_scale = cc->Inputs().Tag(kRenderScaleTag).Get<float>();
+    thickness *= render_scale;
+  }
+
+  // Parse landmarks connections to a vector.
+  RET_CHECK_EQ(options_.landmark_connections_size() % 2, 0)
+      << "Number of entries in landmark connections must be a multiple of 2";
+  std::vector<int> landmark_connections;
+  for (int i = 0; i < options_.landmark_connections_size(); i += 1) {
+    landmark_connections.push_back(options_.landmark_connections(i));
+  }
+
   if (cc->Inputs().HasTag(kLandmarksTag)) {
     const LandmarkList& landmarks =
         cc->Inputs().Tag(kLandmarksTag).Get<LandmarkList>();
-    RET_CHECK_EQ(options_.landmark_connections_size() % 2, 0)
-        << "Number of entries in landmark connections must be a multiple of 2";
     if (visualize_depth) {
       GetMinMaxZ<LandmarkList, Landmark>(landmarks, &z_min, &z_max);
     }
@@ -181,8 +254,8 @@ REGISTER_CALCULATOR(LandmarksToRenderDataCalculator);
     visualize_depth &= ((z_max - z_min) > 1e-3);
     for (int i = 0; i < landmarks.landmark_size(); ++i) {
       const Landmark& landmark = landmarks.landmark(i);
-      auto* landmark_data_render =
-          AddPointRenderData(options_, render_data.get());
+      auto* landmark_data_render = AddPointRenderData(
+          options_.landmark_color(), thickness, render_data.get());
       if (visualize_depth) {
         SetColorSizeValueFromZ(landmark.z(), z_min, z_max,
                                landmark_data_render);
@@ -193,19 +266,19 @@ REGISTER_CALCULATOR(LandmarksToRenderDataCalculator);
       landmark_data->set_y(landmark.y());
     }
     if (visualize_depth) {
-      AddConnectionsWithDepth<LandmarkList>(landmarks, /*normalized=*/false,
-                                            z_min, z_max, render_data.get());
+      AddConnectionsWithDepth<LandmarkList, Landmark>(
+          landmarks, landmark_connections, thickness, /*normalized=*/false,
+          z_min, z_max, render_data.get());
     } else {
-      AddConnections<LandmarkList>(landmarks, /*normalized=*/false,
-                                   render_data.get());
+      AddConnections<LandmarkList, Landmark>(
+          landmarks, landmark_connections, options_.connection_color(),
+          thickness, /*normalized=*/false, render_data.get());
     }
   }
 
   if (cc->Inputs().HasTag(kNormLandmarksTag)) {
     const NormalizedLandmarkList& landmarks =
         cc->Inputs().Tag(kNormLandmarksTag).Get<NormalizedLandmarkList>();
-    RET_CHECK_EQ(options_.landmark_connections_size() % 2, 0)
-        << "Number of entries in landmark connections must be a multiple of 2";
     if (visualize_depth) {
       GetMinMaxZ<NormalizedLandmarkList, NormalizedLandmark>(landmarks, &z_min,
                                                              &z_max);
@@ -214,8 +287,8 @@ REGISTER_CALCULATOR(LandmarksToRenderDataCalculator);
     visualize_depth &= ((z_max - z_min) > 1e-3);
     for (int i = 0; i < landmarks.landmark_size(); ++i) {
       const NormalizedLandmark& landmark = landmarks.landmark(i);
-      auto* landmark_data_render =
-          AddPointRenderData(options_, render_data.get());
+      auto* landmark_data_render = AddPointRenderData(
+          options_.landmark_color(), thickness, render_data.get());
       if (visualize_depth) {
         SetColorSizeValueFromZ(landmark.z(), z_min, z_max,
                                landmark_data_render);
@@ -226,11 +299,13 @@ REGISTER_CALCULATOR(LandmarksToRenderDataCalculator);
       landmark_data->set_y(landmark.y());
     }
     if (visualize_depth) {
-      AddConnectionsWithDepth<NormalizedLandmarkList>(
-          landmarks, /*normalized=*/true, z_min, z_max, render_data.get());
+      AddConnectionsWithDepth<NormalizedLandmarkList, NormalizedLandmark>(
+          landmarks, landmark_connections, thickness, /*normalized=*/true,
+          z_min, z_max, render_data.get());
     } else {
-      AddConnections<NormalizedLandmarkList>(landmarks, /*normalized=*/true,
-                                             render_data.get());
+      AddConnections<NormalizedLandmarkList, NormalizedLandmark>(
+          landmarks, landmark_connections, options_.connection_color(),
+          thickness, /*normalized=*/true, render_data.get());
     }
   }
 
@@ -240,84 +315,4 @@ REGISTER_CALCULATOR(LandmarksToRenderDataCalculator);
   return ::mediapipe::OkStatus();
 }
 
-template <class LandmarkListType>
-void LandmarksToRenderDataCalculator::AddConnectionsWithDepth(
-    const LandmarkListType& landmarks, bool normalized, float min_z,
-    float max_z, RenderData* render_data) {
-  for (int i = 0; i < options_.landmark_connections_size(); i += 2) {
-    const auto& ld0 = landmarks.landmark(options_.landmark_connections(i));
-    const auto& ld1 = landmarks.landmark(options_.landmark_connections(i + 1));
-    const int gray_val1 =
-        255 - static_cast<int>(Remap(ld0.z(), min_z, max_z, 255));
-    const int gray_val2 =
-        255 - static_cast<int>(Remap(ld1.z(), min_z, max_z, 255));
-    AddConnectionToRenderData(ld0.x(), ld0.y(), ld1.x(), ld1.y(), options_,
-                              normalized, gray_val1, gray_val2, render_data);
-  }
-}
-
-void LandmarksToRenderDataCalculator::AddConnectionToRenderData(
-    float start_x, float start_y, float end_x, float end_y,
-    const LandmarksToRenderDataCalculatorOptions& options, bool normalized,
-    int gray_val1, int gray_val2, RenderData* render_data) {
-  auto* connection_annotation = render_data->add_render_annotations();
-  RenderAnnotation::GradientLine* line =
-      connection_annotation->mutable_gradient_line();
-  line->set_x_start(start_x);
-  line->set_y_start(start_y);
-  line->set_x_end(end_x);
-  line->set_y_end(end_y);
-  line->set_normalized(normalized);
-  line->mutable_color1()->set_r(gray_val1);
-  line->mutable_color1()->set_g(gray_val1);
-  line->mutable_color1()->set_b(gray_val1);
-  line->mutable_color2()->set_r(gray_val2);
-  line->mutable_color2()->set_g(gray_val2);
-  line->mutable_color2()->set_b(gray_val2);
-  connection_annotation->set_thickness(options.thickness());
-}
-
-template <class LandmarkListType>
-void LandmarksToRenderDataCalculator::AddConnections(
-    const LandmarkListType& landmarks, bool normalized,
-    RenderData* render_data) {
-  for (int i = 0; i < options_.landmark_connections_size(); i += 2) {
-    const auto& ld0 = landmarks.landmark(options_.landmark_connections(i));
-    const auto& ld1 = landmarks.landmark(options_.landmark_connections(i + 1));
-    AddConnectionToRenderData(ld0.x(), ld0.y(), ld1.x(), ld1.y(), options_,
-                              normalized, render_data);
-  }
-}
-
-void LandmarksToRenderDataCalculator::AddConnectionToRenderData(
-    float start_x, float start_y, float end_x, float end_y,
-    const LandmarksToRenderDataCalculatorOptions& options, bool normalized,
-    RenderData* render_data) {
-  auto* connection_annotation = render_data->add_render_annotations();
-  RenderAnnotation::Line* line = connection_annotation->mutable_line();
-  line->set_x_start(start_x);
-  line->set_y_start(start_y);
-  line->set_x_end(end_x);
-  line->set_y_end(end_y);
-  line->set_normalized(normalized);
-  SetColor(connection_annotation, options.connection_color());
-  connection_annotation->set_thickness(options.thickness());
-}
-
-RenderAnnotation* LandmarksToRenderDataCalculator::AddPointRenderData(
-    const LandmarksToRenderDataCalculatorOptions& options,
-    RenderData* render_data) {
-  auto* landmark_data_annotation = render_data->add_render_annotations();
-  landmark_data_annotation->set_scene_tag(kLandmarkLabel);
-  SetRenderAnnotationColorThickness(options, landmark_data_annotation);
-  return landmark_data_annotation;
-}
-
-void LandmarksToRenderDataCalculator::SetRenderAnnotationColorThickness(
-    const LandmarksToRenderDataCalculatorOptions& options,
-    RenderAnnotation* render_annotation) {
-  SetColor(render_annotation, options.landmark_color());
-  render_annotation->set_thickness(options.thickness());
-}
-
 }  // namespace mediapipe
diff --git a/mediapipe/calculators/util/packet_latency_calculator_test.cc b/mediapipe/calculators/util/packet_latency_calculator_test.cc
index 25d28d061..9ba7f70bf 100644
--- a/mediapipe/calculators/util/packet_latency_calculator_test.cc
+++ b/mediapipe/calculators/util/packet_latency_calculator_test.cc
@@ -276,6 +276,7 @@ TEST_F(PacketLatencyCalculatorTest, DoesNotOutputUntilReferencePacketReceived) {
       "delayed_packet_0", Adopt(new double()).At(Timestamp(2))));
 
   // Send a reference packet with timestamp 10 usec.
+  simulation_clock_->Sleep(absl::Microseconds(1));
   MP_ASSERT_OK(graph_.AddPacketToInputStream(
       "camera_frames", Adopt(new double()).At(Timestamp(10))));
   simulation_clock_->Sleep(absl::Microseconds(1));
diff --git a/mediapipe/calculators/video/BUILD b/mediapipe/calculators/video/BUILD
index da2bc7fbd..f4fca811a 100644
--- a/mediapipe/calculators/video/BUILD
+++ b/mediapipe/calculators/video/BUILD
@@ -138,7 +138,7 @@ cc_library(
     srcs = ["flow_to_image_calculator.cc"],
     visibility = ["//visibility:public"],
     deps = [
-        "//mediapipe/calculators/video:flow_to_image_calculator_cc_proto",
+        ":flow_to_image_calculator_cc_proto",
         "//mediapipe/calculators/video/tool:flow_quantizer_model",
         "//mediapipe/framework:calculator_framework",
         "//mediapipe/framework/formats:image_format_cc_proto",
@@ -384,20 +384,18 @@ cc_test(
     ],
 )
 
-MEDIAPIPE_DEPS = [
-    "//mediapipe/calculators/video:box_tracker_calculator",
-    "//mediapipe/calculators/video:flow_packager_calculator",
-    "//mediapipe/calculators/video:motion_analysis_calculator",
-    "//mediapipe/framework/stream_handler:fixed_size_input_stream_handler",
-    "//mediapipe/framework/stream_handler:sync_set_input_stream_handler",
-]
-
 mediapipe_binary_graph(
     name = "parallel_tracker_binarypb",
     graph = "testdata/parallel_tracker_graph.pbtxt",
     output_name = "testdata/parallel_tracker.binarypb",
     visibility = ["//visibility:public"],
-    deps = MEDIAPIPE_DEPS,
+    deps = [
+        ":box_tracker_calculator",
+        ":flow_packager_calculator",
+        ":motion_analysis_calculator",
+        "//mediapipe/framework/stream_handler:fixed_size_input_stream_handler",
+        "//mediapipe/framework/stream_handler:sync_set_input_stream_handler",
+    ],
 )
 
 mediapipe_binary_graph(
@@ -405,7 +403,13 @@ mediapipe_binary_graph(
     graph = "testdata/tracker_graph.pbtxt",
     output_name = "testdata/tracker.binarypb",
     visibility = ["//visibility:public"],
-    deps = MEDIAPIPE_DEPS,
+    deps = [
+        ":box_tracker_calculator",
+        ":flow_packager_calculator",
+        ":motion_analysis_calculator",
+        "//mediapipe/framework/stream_handler:fixed_size_input_stream_handler",
+        "//mediapipe/framework/stream_handler:sync_set_input_stream_handler",
+    ],
 )
 
 cc_test(
diff --git a/mediapipe/examples/android/src/java/com/google/mediapipe/apps/METADATA b/mediapipe/examples/android/src/java/com/google/mediapipe/apps/METADATA
new file mode 100644
index 000000000..aee0b0fe7
--- /dev/null
+++ b/mediapipe/examples/android/src/java/com/google/mediapipe/apps/METADATA
@@ -0,0 +1,7 @@
+tricorder: {
+  options: {
+    builder: {
+      config: "android_arm64"
+    }
+  }
+}
diff --git a/mediapipe/examples/desktop/autoflip/quality/polynomial_regression_path_solver.cc b/mediapipe/examples/desktop/autoflip/quality/polynomial_regression_path_solver.cc
index fa9d4781a..b038b0f3c 100644
--- a/mediapipe/examples/desktop/autoflip/quality/polynomial_regression_path_solver.cc
+++ b/mediapipe/examples/desktop/autoflip/quality/polynomial_regression_path_solver.cc
@@ -95,7 +95,7 @@ void PolynomialRegressionPathSolver::AddCostFunctionToProblem(
     const std::vector<FocusPointFrame>& focus_point_frames,
     const std::vector<FocusPointFrame>& prior_focus_point_frames,
     const int original_width, const int original_height, const int output_width,
-    const int output_height, std::vector<cv::Mat>* all_xforms) {
+    const int output_height, std::vector<cv::Mat>* all_transforms) {
   RET_CHECK_GE(original_width, output_width);
   RET_CHECK_GE(original_height, output_height);
   const bool should_solve_x_problem = original_width != output_width;
@@ -138,9 +138,10 @@ void PolynomialRegressionPathSolver::AddCostFunctionToProblem(
   Solver::Options options;
   options.linear_solver_type = ceres::DENSE_QR;
 
-  Solver::Summary summary;
-  Solve(options, &problem_x, &summary);
-  all_xforms->clear();
+  Solver::Summary summary_x, summary_y;
+  Solve(options, &problem_x, &summary_x);
+  Solve(options, &problem_y, &summary_y);
+  all_transforms->clear();
   for (int i = 0;
        i < focus_point_frames.size() + prior_focus_point_frames.size(); i++) {
     // Code below assigns values into an affine model, defined as:
@@ -160,7 +161,7 @@ void PolynomialRegressionPathSolver::AddCostFunctionToProblem(
                                        yb_, yc_, yd_, yk_);
       transform.at<float>(1, 2) = delta;
     }
-    all_xforms->push_back(transform);
+    all_transforms->push_back(transform);
   }
   return mediapipe::OkStatus();
 }
diff --git a/mediapipe/examples/desktop/autoflip/quality/polynomial_regression_path_solver.h b/mediapipe/examples/desktop/autoflip/quality/polynomial_regression_path_solver.h
index cbcb85858..514f8760d 100644
--- a/mediapipe/examples/desktop/autoflip/quality/polynomial_regression_path_solver.h
+++ b/mediapipe/examples/desktop/autoflip/quality/polynomial_regression_path_solver.h
@@ -40,14 +40,14 @@ class PolynomialRegressionPathSolver {
   // Given a series of focus points on frames, uses polynomial regression to
   // compute a best guess of a 1D camera movement trajectory along x-axis and
   // y-axis, such that focus points can be preserved as much as possible. The
-  // returned |all_xforms| hold the camera location at each timestamp
+  // returned |all_transforms| hold the camera location at each timestamp
   // corresponding to each input frame.
   ::mediapipe::Status ComputeCameraPath(
       const std::vector<FocusPointFrame>& focus_point_frames,
       const std::vector<FocusPointFrame>& prior_focus_point_frames,
       const int original_width, const int original_height,
       const int output_width, const int output_height,
-      std::vector<cv::Mat>* all_xforms);
+      std::vector<cv::Mat>* all_transforms);
 
  private:
   // Adds a new cost function, constructed using |in| and |out|, into |problem|.
diff --git a/mediapipe/examples/desktop/hair_segmentation/BUILD b/mediapipe/examples/desktop/hair_segmentation/BUILD
index 0338feddf..69948e437 100644
--- a/mediapipe/examples/desktop/hair_segmentation/BUILD
+++ b/mediapipe/examples/desktop/hair_segmentation/BUILD
@@ -24,3 +24,17 @@ cc_binary(
         "//mediapipe/graphs/hair_segmentation:mobile_calculators",
     ],
 )
+
+cc_binary(
+    name = "hair_segmentation_cpu",
+    deps = [
+        "//mediapipe/examples/desktop:demo_run_graph_main",
+    ] + select({
+        "//mediapipe/gpu:disable_gpu": [
+            "//mediapipe/graphs/hair_segmentation:desktop_calculators",
+        ],
+        "//conditions:default": [
+            "//mediapipe/graphs/hair_segmentation:mobile_calculators",
+        ],
+    }),
+)
diff --git a/mediapipe/framework/BUILD b/mediapipe/framework/BUILD
index 4176bcd9c..4ab28fbd2 100644
--- a/mediapipe/framework/BUILD
+++ b/mediapipe/framework/BUILD
@@ -361,6 +361,7 @@ cc_library(
         "//mediapipe/framework:mediapipe_options_cc_proto",
         "//mediapipe/framework:packet_generator_cc_proto",
         "//mediapipe/framework:status_handler_cc_proto",
+        "//mediapipe/framework:stream_handler_cc_proto",
         "//mediapipe/framework/port:any_proto",
         "//mediapipe/framework/port:status",
         "//mediapipe/framework/tool:options_util",
diff --git a/mediapipe/framework/calculator_contract.h b/mediapipe/framework/calculator_contract.h
index 2402c2525..a47632fc9 100644
--- a/mediapipe/framework/calculator_contract.h
+++ b/mediapipe/framework/calculator_contract.h
@@ -84,7 +84,7 @@ class CalculatorContract {
     return *output_side_packets_;
   }
 
-  // Set this Node's default InputStreamHandler.
+  // Specifies the preferred InputStreamHandler for this Node.
   // If there is an InputStreamHandler specified in the graph (.pbtxt) for this
   // Node, then the graph's InputStreamHandler will take priority.
   void SetInputStreamHandler(const std::string& name) {
@@ -104,6 +104,29 @@ class CalculatorContract {
     return input_stream_handler_options_;
   }
 
+  // The next few methods are concerned with timestamp bound propagation
+  // (see scheduling_sync.md#input-policies). Every calculator that processes
+  // live inputs should specify either ProcessTimestampBounds or
+  // TimestampOffset.  Calculators that produce output at the same timestamp as
+  // the input, or with a fixed offset, should declare this fact using
+  // SetTimestampOffset.  Calculators that require custom timestamp bound
+  // calculations should use SetProcessTimestampBounds.
+
+  // When true, Process is called for every new timestamp bound, with or without
+  // new packets.  A call to Process with only an input timestamp bound is
+  // normally used to compute a new output timestamp bound.
+  void SetProcessTimestampBounds(bool process_timestamps) {
+    process_timestamps_ = process_timestamps;
+  }
+  bool GetProcessTimestampBounds() const { return process_timestamps_; }
+
+  // Specifies the maximum difference between input and output timestamps.
+  // When specified, the mediapipe framework automatically computes output
+  // timestamp bounds based on input timestamps.  The special value
+  // TimestampDiff::Unset disables the timestamp offset.
+  void SetTimestampOffset(TimestampDiff offset) { timestamp_offset_ = offset; }
+  TimestampDiff GetTimestampOffset() const { return timestamp_offset_; }
+
   class GraphServiceRequest {
    public:
     // APIs that should be used by calculators.
@@ -147,6 +170,8 @@ class CalculatorContract {
   MediaPipeOptions input_stream_handler_options_;
   std::string node_name_;
   std::map<std::string, GraphServiceRequest> service_requests_;
+  bool process_timestamps_ = false;
+  TimestampDiff timestamp_offset_ = TimestampDiff::Unset();
 };
 
 }  // namespace mediapipe
diff --git a/mediapipe/framework/calculator_graph.h b/mediapipe/framework/calculator_graph.h
index 63520b90d..50a4069a8 100644
--- a/mediapipe/framework/calculator_graph.h
+++ b/mediapipe/framework/calculator_graph.h
@@ -143,7 +143,7 @@ class CalculatorGraph {
       const std::string& graph_type = "",
       const Subgraph::SubgraphOptions* options = nullptr);
 
-  // Resturns the canonicalized CalculatorGraphConfig for this graph.
+  // Returns the canonicalized CalculatorGraphConfig for this graph.
   const CalculatorGraphConfig& Config() const {
     return validated_graph_->Config();
   }
diff --git a/mediapipe/framework/calculator_graph_bounds_test.cc b/mediapipe/framework/calculator_graph_bounds_test.cc
index 17998a1ff..4de8ffb7b 100644
--- a/mediapipe/framework/calculator_graph_bounds_test.cc
+++ b/mediapipe/framework/calculator_graph_bounds_test.cc
@@ -31,6 +31,17 @@ namespace {
 typedef std::function<::mediapipe::Status(CalculatorContext* cc)>
     CalculatorContextFunction;
 
+// Returns the contents of a set of Packets.
+// The contents must be copyable.
+template <typename T>
+std::vector<T> GetContents(const std::vector<Packet>& packets) {
+  std::vector<T> result;
+  for (Packet p : packets) {
+    result.push_back(p.Get<T>());
+  }
+  return result;
+}
+
 // A simple Semaphore for synchronizing test threads.
 class AtomicSemaphore {
  public:
@@ -671,9 +682,9 @@ REGISTER_CALCULATOR(BoundToPacketCalculator);
 
 // A Calculator that produces packets at timestamps beyond the input timestamp.
 class FuturePacketCalculator : public CalculatorBase {
+ public:
   static constexpr int64 kOutputFutureMicros = 3;
 
- public:
   static ::mediapipe::Status GetContract(CalculatorContract* cc) {
     cc->Inputs().Index(0).Set<int>();
     cc->Outputs().Index(0).Set<int>();
@@ -742,9 +753,8 @@ TEST(CalculatorGraphBoundsTest, OffsetBoundPropagation) {
   MP_ASSERT_OK(graph.WaitUntilDone());
 }
 
-// Shows that bounds changes alone do not invoke Process.
-// Note: Bounds changes alone will invoke Process eventually
-// when SetOffset is cleared, see: go/mediapipe-realtime-graph.
+// Shows that timestamp bounds changes alone do not invoke Process,
+// without SetProcessTimestampBounds(true).
 TEST(CalculatorGraphBoundsTest, BoundWithoutInputPackets) {
   // OffsetBoundCalculator produces only timestamp bounds.
   // The BoundToPacketCalculator delivers an output packet whenever the
@@ -753,8 +763,13 @@ TEST(CalculatorGraphBoundsTest, BoundWithoutInputPackets) {
       ::mediapipe::ParseTextProtoOrDie<CalculatorGraphConfig>(R"(
         input_stream: 'input'
         node {
-          calculator: 'OffsetBoundCalculator'
+          calculator: 'FuturePacketCalculator'
           input_stream: 'input'
+          output_stream: 'input_2'
+        }
+        node {
+          calculator: 'OffsetBoundCalculator'
+          input_stream: 'input_2'
           output_stream: 'bounds'
         }
         node {
@@ -778,6 +793,7 @@ TEST(CalculatorGraphBoundsTest, BoundWithoutInputPackets) {
   for (int i = 0; i < kNumInputs; ++i) {
     Packet p = MakePacket<int>(33).At(Timestamp(i));
     MP_ASSERT_OK(graph.AddPacketToInputStream("input", p));
+    MP_ASSERT_OK(graph.WaitUntilIdle());
   }
 
   // No packets arrive, because updated timestamp bounds do not invoke
@@ -1104,5 +1120,254 @@ TEST(CalculatorGraphBoundsTest, BoundsForEmptyInputs_SyncSets) {
     )");
 }
 
+// A Calculator that produces a packet for each timestamp bounds update.
+class ProcessBoundToPacketCalculator : public CalculatorBase {
+ public:
+  static ::mediapipe::Status GetContract(CalculatorContract* cc) {
+    for (int i = 0; i < cc->Inputs().NumEntries(); ++i) {
+      cc->Inputs().Index(i).SetAny();
+    }
+    for (int i = 0; i < cc->Outputs().NumEntries(); ++i) {
+      cc->Outputs().Index(i).Set<Timestamp>();
+    }
+    cc->SetInputStreamHandler("ImmediateInputStreamHandler");
+    cc->SetProcessTimestampBounds(true);
+    return ::mediapipe::OkStatus();
+  }
+
+  ::mediapipe::Status Process(CalculatorContext* cc) final {
+    for (int i = 0; i < cc->Outputs().NumEntries(); ++i) {
+      Timestamp t = cc->Inputs().Index(i).Value().Timestamp();
+      if (t == cc->InputTimestamp() &&
+          t >= cc->Outputs().Index(i).NextTimestampBound()) {
+        cc->Outputs().Index(i).Add(new auto(t), t);
+      }
+    }
+    return ::mediapipe::OkStatus();
+  }
+};
+REGISTER_CALCULATOR(ProcessBoundToPacketCalculator);
+
+// A Calculator that passes through each packet and timestamp immediately.
+class ImmediatePassthroughCalculator : public CalculatorBase {
+ public:
+  static ::mediapipe::Status GetContract(CalculatorContract* cc) {
+    for (int i = 0; i < cc->Inputs().NumEntries(); ++i) {
+      cc->Inputs().Index(i).SetAny();
+    }
+    for (int i = 0; i < cc->Outputs().NumEntries(); ++i) {
+      cc->Outputs().Index(i).SetSameAs(&cc->Inputs().Index(i));
+    }
+    cc->SetInputStreamHandler("ImmediateInputStreamHandler");
+    cc->SetProcessTimestampBounds(true);
+    return ::mediapipe::OkStatus();
+  }
+
+  ::mediapipe::Status Process(CalculatorContext* cc) final {
+    for (int i = 0; i < cc->Outputs().NumEntries(); ++i) {
+      if (!cc->Inputs().Index(i).IsEmpty()) {
+        cc->Outputs().Index(i).AddPacket(cc->Inputs().Index(i).Value());
+      } else {
+        Timestamp input_bound =
+            cc->Inputs().Index(i).Value().Timestamp().NextAllowedInStream();
+        if (cc->Outputs().Index(i).NextTimestampBound() < input_bound) {
+          cc->Outputs().Index(i).SetNextTimestampBound(input_bound);
+        }
+      }
+    }
+    return ::mediapipe::OkStatus();
+  }
+};
+REGISTER_CALCULATOR(ImmediatePassthroughCalculator);
+
+// Shows that Process is called for input-sets without input packets.
+void TestProcessForEmptyInputs(const std::string& input_stream_handler) {
+  // FuturePacketCalculator and OffsetBoundCalculator produce only ts bounds,
+  // The ProcessBoundToPacketCalculator has SetProcessTimestampBounds(true),
+  // and produces an output packet for every timestamp bound update.
+  std::string config_str = R"(
+            input_stream: 'input'
+            node {
+              calculator: 'FuturePacketCalculator'
+              input_stream: 'input'
+              output_stream: 'futures'
+            }
+            node {
+              calculator: 'OffsetBoundCalculator'
+              input_stream: 'futures'
+              output_stream: 'bounds'
+            }
+            node {
+              calculator: 'ProcessBoundToPacketCalculator'
+              input_stream: 'bounds'
+              output_stream: 'bounds_ts'
+              input_stream_handler { $input_stream_handler }
+            }
+          )";
+  absl::StrReplaceAll({{"$input_stream_handler", input_stream_handler}},
+                      &config_str);
+  CalculatorGraphConfig config =
+      ::mediapipe::ParseTextProtoOrDie<CalculatorGraphConfig>(config_str);
+  CalculatorGraph graph;
+  std::vector<Packet> input_ts_packets;
+  std::vector<Packet> bounds_ts_packets;
+  MP_ASSERT_OK(graph.Initialize(config));
+  MP_ASSERT_OK(graph.ObserveOutputStream("bounds_ts", [&](const Packet& p) {
+    bounds_ts_packets.push_back(p);
+    return ::mediapipe::OkStatus();
+  }));
+  MP_ASSERT_OK(graph.StartRun({}));
+  MP_ASSERT_OK(graph.WaitUntilIdle());
+
+  // Add four packets into the graph.
+  constexpr int kFutureMicros = FuturePacketCalculator::kOutputFutureMicros;
+  Packet p;
+  p = MakePacket<int>(33).At(Timestamp(0));
+  MP_ASSERT_OK(graph.AddPacketToInputStream("input", p));
+  MP_ASSERT_OK(graph.WaitUntilIdle());
+
+  p = MakePacket<int>(33).At(Timestamp(10));
+  MP_ASSERT_OK(graph.AddPacketToInputStream("input", p));
+  MP_ASSERT_OK(graph.WaitUntilIdle());
+
+  p = MakePacket<int>(33).At(Timestamp(20));
+  MP_ASSERT_OK(graph.AddPacketToInputStream("input", p));
+  MP_ASSERT_OK(graph.WaitUntilIdle());
+
+  p = MakePacket<int>(33).At(Timestamp(30));
+  MP_ASSERT_OK(graph.AddPacketToInputStream("input", p));
+  MP_ASSERT_OK(graph.WaitUntilIdle());
+
+  // Packets arrive.
+  MP_ASSERT_OK(graph.WaitUntilIdle());
+  EXPECT_EQ(bounds_ts_packets.size(), 4);
+
+  std::vector<Timestamp> expected = {
+      Timestamp(0 + kFutureMicros), Timestamp(10 + kFutureMicros),
+      Timestamp(20 + kFutureMicros), Timestamp(30 + kFutureMicros)};
+  EXPECT_EQ(GetContents<Timestamp>(bounds_ts_packets), expected);
+
+  // Shutdown the graph.
+  MP_ASSERT_OK(graph.CloseAllPacketSources());
+  MP_ASSERT_OK(graph.WaitUntilDone());
+}
+
+// Shows that Process is called for input-sets without input packets
+// using an DefaultInputStreamHandler.
+TEST(CalculatorGraphBoundsTest, ProcessTimestampBounds_Default) {
+  TestProcessForEmptyInputs(R"(
+      input_stream_handler: "DefaultInputStreamHandler")");
+}
+
+// Shows that Process is called for input-sets without input packets
+// using an ImmediateInputStreamHandler.
+TEST(CalculatorGraphBoundsTest, ProcessTimestampBounds_Immediate) {
+  TestProcessForEmptyInputs(R"(
+      input_stream_handler: "ImmediateInputStreamHandler")");
+}
+
+// Shows that Process is called for input-sets without input packets
+// using a SyncSetInputStreamHandler with a single sync-set.
+TEST(CalculatorGraphBoundsTest, ProcessTimestampBounds_SyncSet) {
+  TestProcessForEmptyInputs(R"(
+      input_stream_handler: "SyncSetInputStreamHandler")");
+}
+
+// Shows that Process is called for input-sets without input packets
+// using a SyncSetInputStreamHandler with multiple sync-sets.
+TEST(CalculatorGraphBoundsTest, ProcessTimestampBounds_SyncSets) {
+  TestProcessForEmptyInputs(R"(
+      input_stream_handler: "SyncSetInputStreamHandler"
+      options {
+        [mediapipe.SyncSetInputStreamHandlerOptions.ext] {
+          sync_set { tag_index: ":0" }
+        }
+      }
+    )");
+}
+
+// Demonstrates the functionality of an "ImmediatePassthroughCalculator".
+// The ImmediatePassthroughCalculator simply relays each input packet to
+// the corresponding output stream.  ProcessTimestampBounds is needed to
+// relay timestamp bounds as well as packets.
+TEST(CalculatorGraphBoundsTest, ProcessTimestampBounds_Passthrough) {
+  // OffsetBoundCalculator produces timestamp bounds.
+  // ImmediatePassthroughCalculator relays packets and bounds.
+  // ProcessBoundToPacketCalculator reports packets and bounds as packets.
+  std::string config_str = R"(
+            input_stream: "input_0"
+            input_stream: "input_1"
+            node {
+              calculator: "OffsetBoundCalculator"
+              input_stream: "input_1"
+              output_stream: "bound_1"
+            }
+            node {
+              calculator: "ImmediatePassthroughCalculator"
+              input_stream: "input_0"
+              input_stream: "bound_1"
+              output_stream: "same_0"
+              output_stream: "same_1"
+            }
+            node {
+              calculator: "ProcessBoundToPacketCalculator"
+              input_stream: "same_0"
+              input_stream: "same_1"
+              output_stream: "output_0"
+              output_stream: "output_1"
+            }
+          )";
+  CalculatorGraphConfig config =
+      ::mediapipe::ParseTextProtoOrDie<CalculatorGraphConfig>(config_str);
+  CalculatorGraph graph;
+  std::vector<Packet> output_0_packets;
+  std::vector<Packet> output_1_packets;
+  MP_ASSERT_OK(graph.Initialize(config));
+  MP_ASSERT_OK(graph.ObserveOutputStream("output_0", [&](const Packet& p) {
+    output_0_packets.push_back(p);
+    return ::mediapipe::OkStatus();
+  }));
+  MP_ASSERT_OK(graph.ObserveOutputStream("output_1", [&](const Packet& p) {
+    output_1_packets.push_back(p);
+    return ::mediapipe::OkStatus();
+  }));
+  MP_ASSERT_OK(graph.StartRun({}));
+  MP_ASSERT_OK(graph.WaitUntilIdle());
+
+  // Add four packets to input_0.
+  for (int i = 0; i < 4; ++i) {
+    Packet p = MakePacket<int>(33).At(Timestamp(i * 10));
+    MP_ASSERT_OK(graph.AddPacketToInputStream("input_0", p));
+    MP_ASSERT_OK(graph.WaitUntilIdle());
+  }
+
+  // Packets arrive.
+  MP_ASSERT_OK(graph.WaitUntilIdle());
+  EXPECT_EQ(output_0_packets.size(), 4);
+  EXPECT_EQ(output_1_packets.size(), 0);
+  std::vector<Timestamp> expected =  //
+      {Timestamp(0), Timestamp(10), Timestamp(20), Timestamp(30)};
+  EXPECT_EQ(GetContents<Timestamp>(output_0_packets), expected);
+
+  // Add two timestamp bounds to bound_1.
+  for (int i = 0; i < 2; ++i) {
+    Packet p = MakePacket<int>(33).At(Timestamp(10 + i * 10));
+    MP_ASSERT_OK(graph.AddPacketToInputStream("input_1", p));
+    MP_ASSERT_OK(graph.WaitUntilIdle());
+  }
+
+  // Bounds arrive.
+  MP_ASSERT_OK(graph.WaitUntilIdle());
+  EXPECT_EQ(output_0_packets.size(), 4);
+  EXPECT_EQ(output_1_packets.size(), 2);
+  expected =  //
+      {Timestamp(10), Timestamp(20)};
+  EXPECT_EQ(GetContents<Timestamp>(output_1_packets), expected);
+
+  // Shutdown the graph.
+  MP_ASSERT_OK(graph.CloseAllPacketSources());
+  MP_ASSERT_OK(graph.WaitUntilDone());
+}
+
 }  // namespace
 }  // namespace mediapipe
diff --git a/mediapipe/framework/calculator_node.cc b/mediapipe/framework/calculator_node.cc
index 2834f011f..f69254db0 100644
--- a/mediapipe/framework/calculator_node.cc
+++ b/mediapipe/framework/calculator_node.cc
@@ -97,6 +97,7 @@ Timestamp CalculatorNode::SourceProcessOrder(
 
   const NodeTypeInfo& node_type_info =
       validated_graph_->CalculatorInfos()[node_id_];
+  const CalculatorContract& contract = node_type_info.Contract();
 
   uses_gpu_ =
       node_type_info.InputSidePacketTypes().HasTag(kGpuSharedTagName) ||
@@ -147,6 +148,14 @@ Timestamp CalculatorNode::SourceProcessOrder(
       use_calc_specified ? handler_config : node_config.input_stream_handler(),
       node_type_info.InputStreamTypes()));
 
+  for (auto& stream : output_stream_handler_->OutputStreams()) {
+    stream->Spec()->offset_enabled =
+        (contract.GetTimestampOffset() != TimestampDiff::Unset());
+    stream->Spec()->offset = contract.GetTimestampOffset();
+  }
+  input_stream_handler_->SetProcessTimestampBounds(
+      contract.GetProcessTimestampBounds());
+
   return InitializeInputStreams(input_stream_managers, output_stream_managers);
 }
 
diff --git a/mediapipe/framework/deps/registration.cc b/mediapipe/framework/deps/registration.cc
index f12a3834f..c467b700b 100644
--- a/mediapipe/framework/deps/registration.cc
+++ b/mediapipe/framework/deps/registration.cc
@@ -18,6 +18,10 @@ namespace mediapipe {
 
 namespace {
 
+// List of namespaces that can register calculators inside the namespace
+// and still refer to them using an unqualified name.  This whitelist
+// is meant to facilitate migration from unqualified to fully qualified
+// calculator names.
 constexpr char const* kTopNamespaces[] = {
     "mediapipe",
 };
diff --git a/mediapipe/framework/formats/annotation/BUILD b/mediapipe/framework/formats/annotation/BUILD
index 5ea495abe..d501901a7 100644
--- a/mediapipe/framework/formats/annotation/BUILD
+++ b/mediapipe/framework/formats/annotation/BUILD
@@ -49,3 +49,10 @@ mediapipe_cc_proto_library(
     visibility = ["//visibility:public"],
     deps = [":rasterization_proto"],
 )
+
+# Expose the proto source files for building mediapipe AAR.
+filegroup(
+    name = "protos_src",
+    srcs = glob(["*.proto"]),
+    visibility = ["//mediapipe:__subpackages__"],
+)
diff --git a/mediapipe/framework/formats/annotation/rasterization.proto b/mediapipe/framework/formats/annotation/rasterization.proto
index 9aad7e88f..38414dff3 100644
--- a/mediapipe/framework/formats/annotation/rasterization.proto
+++ b/mediapipe/framework/formats/annotation/rasterization.proto
@@ -16,6 +16,9 @@ syntax = "proto2";
 
 package mediapipe;
 
+option java_package = "com.google.mediapipe.formats.annotation.proto";
+option java_outer_classname = "RasterizationProto";
+
 // A Region can be represented in each frame as a set of scanlines
 // (compressed RLE, similar to rasterization of polygons).
 // For each scanline with y-coordinate y, we save (possibly multiple) intervals
diff --git a/mediapipe/framework/formats/location_data.proto b/mediapipe/framework/formats/location_data.proto
index dbbf2909d..3edd54208 100644
--- a/mediapipe/framework/formats/location_data.proto
+++ b/mediapipe/framework/formats/location_data.proto
@@ -23,6 +23,9 @@ package mediapipe;
 
 import "mediapipe/framework/formats/annotation/rasterization.proto";
 
+option java_package = "com.google.mediapipe.formats.proto";
+option java_outer_classname = "LocationDataProto";
+
 message LocationData {
   // The supported formats for representing location data. A single location
   // must store its data in exactly one way.
diff --git a/mediapipe/framework/input_stream_handler.cc b/mediapipe/framework/input_stream_handler.cc
index 858360d25..0d6965056 100644
--- a/mediapipe/framework/input_stream_handler.cc
+++ b/mediapipe/framework/input_stream_handler.cc
@@ -22,6 +22,8 @@
 
 namespace mediapipe {
 
+using SyncSet = InputStreamHandler::SyncSet;
+
 ::mediapipe::Status InputStreamHandler::InitializeInputStreamManagers(
     InputStreamManager* flat_input_stream_managers) {
   for (CollectionItemId id = input_stream_managers_.BeginId();
@@ -300,4 +302,92 @@ void InputStreamHandler::SetLatePreparation(bool late_preparation) {
   late_preparation_ = late_preparation;
 }
 
+SyncSet::SyncSet(InputStreamHandler* input_stream_handler,
+                 std::vector<CollectionItemId> stream_ids)
+    : input_stream_handler_(input_stream_handler),
+      stream_ids_(std::move(stream_ids)) {}
+
+NodeReadiness SyncSet::GetReadiness(Timestamp* min_stream_timestamp) {
+  Timestamp min_bound = Timestamp::Done();
+  Timestamp min_packet = Timestamp::Done();
+  for (CollectionItemId id : stream_ids_) {
+    const auto& stream = input_stream_handler_->input_stream_managers_.Get(id);
+    bool empty;
+    Timestamp stream_timestamp = stream->MinTimestampOrBound(&empty);
+    if (empty) {
+      min_bound = std::min(min_bound, stream_timestamp);
+    } else {
+      min_packet = std::min(min_packet, stream_timestamp);
+    }
+  }
+  *min_stream_timestamp = std::min(min_packet, min_bound);
+  if (*min_stream_timestamp == Timestamp::Done()) {
+    last_processed_ts_ = Timestamp::Done().PreviousAllowedInStream();
+    return NodeReadiness::kReadyForClose;
+  }
+  if (!input_stream_handler_->process_timestamps_) {
+    // Only an input_ts with packets can be processed.
+    // Note that (min_bound - 1) is the highest fully settled timestamp.
+    if (min_bound > min_packet) {
+      last_processed_ts_ = *min_stream_timestamp;
+      return NodeReadiness::kReadyForProcess;
+    }
+  } else {
+    // Any unprocessed input_ts can be processed.
+    // Note that (min_bound - 1) is the highest fully settled timestamp.
+    Timestamp input_timestamp =
+        std::min(min_packet, min_bound.PreviousAllowedInStream());
+    if (input_timestamp >
+        std::max(last_processed_ts_, Timestamp::Unstarted())) {
+      *min_stream_timestamp = input_timestamp;
+      last_processed_ts_ = input_timestamp;
+      return NodeReadiness::kReadyForProcess;
+    }
+  }
+  return NodeReadiness::kNotReady;
+}
+
+Timestamp SyncSet::LastProcessed() const { return last_processed_ts_; }
+
+Timestamp SyncSet::MinPacketTimestamp() const {
+  Timestamp result = Timestamp::Done();
+  for (CollectionItemId id : stream_ids_) {
+    const auto& stream = input_stream_handler_->input_stream_managers_.Get(id);
+    bool empty;
+    Timestamp stream_timestamp = stream->MinTimestampOrBound(&empty);
+    if (!empty) {
+      result = std::min(result, stream_timestamp);
+    }
+  }
+  return result;
+}
+
+void SyncSet::FillInputSet(Timestamp input_timestamp,
+                           InputStreamShardSet* input_set) {
+  CHECK(input_timestamp.IsAllowedInStream());
+  CHECK(input_set);
+  for (CollectionItemId id : stream_ids_) {
+    const auto& stream = input_stream_handler_->input_stream_managers_.Get(id);
+    int num_packets_dropped = 0;
+    bool stream_is_done = false;
+    Packet current_packet = stream->PopPacketAtTimestamp(
+        input_timestamp, &num_packets_dropped, &stream_is_done);
+    CHECK_EQ(num_packets_dropped, 0)
+        << absl::Substitute("Dropped $0 packet(s) on input stream \"$1\".",
+                            num_packets_dropped, stream->Name());
+    input_stream_handler_->AddPacketToShard(
+        &input_set->Get(id), std::move(current_packet), stream_is_done);
+  }
+}
+
+void SyncSet::FillInputBounds(InputStreamShardSet* input_set) {
+  for (CollectionItemId id : stream_ids_) {
+    const auto* stream = input_stream_handler_->input_stream_managers_.Get(id);
+    Timestamp bound = stream->MinTimestampOrBound(nullptr);
+    input_stream_handler_->AddPacketToShard(
+        &input_set->Get(id), Packet().At(bound.PreviousAllowedInStream()),
+        bound == Timestamp::Done());
+  }
+}
+
 }  // namespace mediapipe
diff --git a/mediapipe/framework/input_stream_handler.h b/mediapipe/framework/input_stream_handler.h
index 859610397..8d8b5a716 100644
--- a/mediapipe/framework/input_stream_handler.h
+++ b/mediapipe/framework/input_stream_handler.h
@@ -74,9 +74,7 @@ class InputStreamHandler {
       : input_stream_managers_(std::move(tag_map)),
         calculator_context_manager_(calculator_context_manager),
         options_(options),
-        calculator_run_in_parallel_(calculator_run_in_parallel),
-        late_preparation_(false),
-        batch_size_(1) {}
+        calculator_run_in_parallel_(calculator_run_in_parallel) {}
 
   virtual ~InputStreamHandler() = default;
 
@@ -174,6 +172,57 @@ class InputStreamHandler {
     return unset_header_count_.load(std::memory_order_relaxed);
   }
 
+  // When true, Calculator::Process is called for any increase in the
+  // timestamp bound, whether or not any packets are available.
+  // Calculator::Process is called when the minimum timestamp bound
+  // increases for any synchronized set of input streams.
+  // DefaultInputStreamHandler groups all input streams into a single set.
+  // ImmediateInputStreamHandler treats each input stream as a separate set.
+  void SetProcessTimestampBounds(bool process_ts) {
+    process_timestamps_ = process_ts;
+  }
+
+  // When true, Calculator::Process is called for every input timestamp bound.
+  bool ProcessTimestampBounds() { return process_timestamps_; }
+
+  // A helper class to build input packet sets for a certain set of streams.
+  //
+  // ReadyForProcess requires all of the streams to be fully determined
+  // at the same input-timestamp.
+  // This is the readiness policy for all streams in DefaultInputStreamHandler.
+  // It is also the policy for each sync-set in SyncSetInputStreamHandler.
+  // It is also the policy for each input-stream in ImmediateInputStreamHandler.
+  //
+  // If ProcessTimestampBounds() is set, then a fully determined input timestamp
+  // with only empty input packets will qualify as ReadyForProcess.
+  class SyncSet {
+   public:
+    // Creates a SyncSet for a certain set of streams, |stream_ids|.
+    SyncSet(InputStreamHandler* input_stream_handler,
+            std::vector<CollectionItemId> stream_ids);
+
+    // Answers whether this stream is ready for Process or Close.
+    NodeReadiness GetReadiness(Timestamp* min_stream_timestamp);
+
+    // Returns the latest timestamp returned for processing.
+    Timestamp LastProcessed() const;
+
+    // The earliest available packet timestamp, or Timestamp::Done.
+    Timestamp MinPacketTimestamp() const;
+
+    // Moves packets from all input streams to the input_set.
+    void FillInputSet(Timestamp input_timestamp,
+                      InputStreamShardSet* input_set);
+
+    // Copies timestamp bounds from all input streams to the input_set.
+    void FillInputBounds(InputStreamShardSet* input_set);
+
+   private:
+    InputStreamHandler* input_stream_handler_;
+    std::vector<CollectionItemId> stream_ids_;
+    Timestamp last_processed_ts_ = Timestamp::Unset();
+  };
+
  protected:
   typedef internal::Collection<InputStreamManager*> InputStreamManagerSet;
 
@@ -240,11 +289,14 @@ class InputStreamHandler {
   // The variable is set to false by default. A subclass should set it to true
   // with SetLatePreparation(true) in the constructor if the input sets need to
   // be filled in ProcessNode().
-  bool late_preparation_;
+  bool late_preparation_ = false;
 
   // Determines how many sets of input packets are collected before a
   // CalculatorNode is scheduled.
-  int batch_size_;
+  int batch_size_ = 1;
+
+  // When true, any increase in timestamp bound invokes Calculator::Process.
+  bool process_timestamps_ = false;
 
   // A callback to notify the observer when all the input stream headers
   // (excluding headers of back edges) become available.
diff --git a/mediapipe/framework/legacy_calculator_support.h b/mediapipe/framework/legacy_calculator_support.h
index a78a21b91..019473e67 100644
--- a/mediapipe/framework/legacy_calculator_support.h
+++ b/mediapipe/framework/legacy_calculator_support.h
@@ -107,6 +107,9 @@ CalculatorContext* LegacyCalculatorSupport::Scoped<CalculatorContext>::current_;
 template <>
 CalculatorContract*
     LegacyCalculatorSupport::Scoped<CalculatorContract>::current_;
+#elif _MSC_VER
+// MSVC interprets these declarations as definitions and during linking it
+// generates an error about multiple definitions of current_.
 #else
 template <>
 thread_local CalculatorContext*
diff --git a/mediapipe/framework/output_stream_handler.h b/mediapipe/framework/output_stream_handler.h
index db1d4089a..b9ec42b92 100644
--- a/mediapipe/framework/output_stream_handler.h
+++ b/mediapipe/framework/output_stream_handler.h
@@ -46,6 +46,7 @@ class OutputStreamHandler {
   // ids of upstream sources that affect it.
   typedef std::unordered_map<std::string, std::unordered_set<int>>
       OutputStreamToSourcesMap;
+  typedef internal::Collection<OutputStreamManager*> OutputStreamManagerSet;
 
   // The constructor of the OutputStreamHandler takes four arguments.
   // The tag_map argument holds the information needed for tag/index retrieval
@@ -119,9 +120,11 @@ class OutputStreamHandler {
   // collection for debugging purpose.
   std::string FirstStreamName() const;
 
- protected:
-  typedef internal::Collection<OutputStreamManager*> OutputStreamManagerSet;
+  const OutputStreamManagerSet& OutputStreams() {
+    return output_stream_managers_;
+  }
 
+ protected:
   // Checks if the given input bound should be propagated or not. If any output
   // streams with OffsetEnabled() need to have the timestamp bounds updated,
   // then propagates the timestamp bounds of all output streams with
diff --git a/mediapipe/framework/output_stream_poller.h b/mediapipe/framework/output_stream_poller.h
index ff803317f..26c0e72b2 100644
--- a/mediapipe/framework/output_stream_poller.h
+++ b/mediapipe/framework/output_stream_poller.h
@@ -27,6 +27,9 @@ class OutputStreamPoller {
   OutputStreamPoller(const OutputStreamPoller&) = delete;
   OutputStreamPoller& operator=(const OutputStreamPoller&) = delete;
   OutputStreamPoller(OutputStreamPoller&&) = default;
+  // Move assignment needs to be explicitly defaulted to allow ASSIGN_OR_RETURN
+  // on `StatusOr<OutputStreamPoller>`.
+  OutputStreamPoller& operator=(OutputStreamPoller&&) = default;
 
   // Resets OutputStramPollerImpl and cleans the internal packet queue.
   void Reset() {
diff --git a/mediapipe/framework/profiler/testdata/profile_latency_test.pbtxt b/mediapipe/framework/profiler/testdata/profile_latency_test.pbtxt
new file mode 100644
index 000000000..60e7a0e47
--- /dev/null
+++ b/mediapipe/framework/profiler/testdata/profile_latency_test.pbtxt
@@ -0,0 +1,97 @@
+graph_trace: {
+    calculator_name : ["ACalculator", "BCalculator"]
+    stream_name     : [ "", "input1", "a_b"]
+    base_time       : 0
+    base_timestamp  : 100
+
+    # Fire off three input packets and have them spend time in Calculator A.
+    # Drop the middle packet.
+
+    calculator_trace: {
+      node_id: -1
+      input_timestamp: 100
+      event_type     : PROCESS
+      finish_time    : 1000
+      output_trace: {
+        packet_timestamp: 100
+        stream_id       : 1
+      }
+      thread_id      : 1
+    }
+    calculator_trace: {
+      node_id: -1
+      input_timestamp: 101
+      event_type     : PROCESS
+      finish_time    : 2000
+      output_trace: {
+        packet_timestamp: 101
+        stream_id       : 1
+      }
+      thread_id      : 1
+    }
+
+    calculator_trace: {
+      node_id: 0
+      input_timestamp: 100
+      event_type     : PROCESS
+      start_time     : 1200  # 200 after initial input (emits at 1000)
+      finish_time    : 1500  # Speed to delivery is 500 (1500 - 1000)
+      input_trace: {
+        packet_timestamp: 100
+        stream_id       : 1
+      }
+      thread_id      : 1
+    }
+
+    calculator_trace: {
+      node_id: 0
+      input_timestamp: 101
+      event_type     : PROCESS
+      start_time     : 2100  # 100 after initial input (emits at 2000)
+      finish_time    : 2500  # Speed to delivery is 500 (2500 - 2000)
+      input_trace: {
+        packet_timestamp: 101
+        stream_id       : 1
+      }
+      thread_id      : 1
+    }
+
+    calculator_trace: {
+      node_id: 1
+      input_timestamp: 100
+      event_type     : PROCESS
+      start_time     : 1600  # 600 after the initial input (emits at 1000)
+      finish_time    : 2000  # Speed to delivery is 1000 (2000 - 1000)
+      input_trace: {
+        packet_timestamp: 100
+        stream_id       : 1
+      }
+      thread_id      : 1
+    }
+
+    calculator_trace: {
+      node_id: 1
+      input_timestamp: 101
+      event_type     : PROCESS
+      start_time     : 2900  # 700 after the initial input (emits at 2000)
+      finish_time    : 3100  # Speed to delivery is 1000 (3000 - 2000)
+      input_trace: {
+        packet_timestamp: 101
+        stream_id       : 1
+      }
+      thread_id      : 1
+    }
+}
+config: {
+  node: {
+    name: "ACalculator"
+    calculator: "ACalculator"
+    input_stream: "input1"
+    output_stream: "a_b"
+  }
+  node: {
+    name: "BCalculator"
+    calculator: "BCalculator"
+    input_stream: "a_b"
+  }
+}
diff --git a/mediapipe/framework/profiler/testdata/profile_process_test.pbtxt b/mediapipe/framework/profiler/testdata/profile_process_test.pbtxt
new file mode 100644
index 000000000..0f4bfab52
--- /dev/null
+++ b/mediapipe/framework/profiler/testdata/profile_process_test.pbtxt
@@ -0,0 +1,122 @@
+graph_trace: {
+    calculator_name : ["ACalculator", "BCalculator"]
+    stream_name     : [ "", "input1"]
+    base_time       : 0
+    base_timestamp  : 100
+
+    # Fire off three input packets and have them spend time in Calculator A.
+    # Drop the middle packet.
+
+    calculator_trace: {
+      node_id: -1
+      input_timestamp: 100
+      event_type     : PROCESS
+      finish_time    : 1000
+      output_trace: {
+        packet_timestamp: 100
+        stream_id       : 1
+      }
+      thread_id      : 1
+    }
+    calculator_trace: {
+      node_id: -1
+      input_timestamp: 101
+      event_type     : PROCESS
+      finish_time    : 2000
+      output_trace: {
+        packet_timestamp: 101
+        stream_id       : 1
+      }
+      thread_id      : 1
+    }
+    calculator_trace: {
+      node_id: -1
+      input_timestamp: 102
+      event_type     : PROCESS
+      finish_time    : 3000
+      output_trace: {
+        packet_timestamp: 102
+        stream_id       : 1
+      }
+      thread_id      : 1
+    }
+
+    # First event is disconnected. We'll see the output_trace later.
+    calculator_trace: {
+      node_id: 0
+      input_timestamp: 100
+      event_type     : PROCESS
+      start_time    : 1100
+      input_trace: {
+        packet_timestamp: 100
+        stream_id       : 1
+      }
+      thread_id      : 1
+    }
+
+    # # We're going to drop this packet.
+    calculator_trace: {
+      node_id: 0
+      input_timestamp: 101
+      event_type     : PROCESS
+      start_time     : 2100
+      input_trace: {
+        packet_timestamp: 101
+        stream_id       : 1
+      }
+      thread_id      : 1
+    }
+    # # Here's that matching output trace.
+    calculator_trace: {
+      node_id: 0
+      input_timestamp: 100
+      event_type     : PROCESS
+      finish_time    : 1500
+      input_trace: {
+        packet_timestamp: 100
+        stream_id       : 1
+      }
+      thread_id      : 1
+    }
+    # Third packet is processed all at the same time.
+    calculator_trace: {
+      node_id: 0
+      input_timestamp: 102
+      event_type     : PROCESS
+      start_time     : 3100
+      finish_time    : 3600
+      input_trace: {
+        packet_timestamp: 102
+        stream_id       : 1
+      }
+      thread_id      : 1
+    }
+
+    # A second calculator will process an input in order to affect the
+    # time_percent.
+
+    calculator_trace: {
+      node_id: 1
+      input_timestamp: 102
+      event_type     : PROCESS
+      start_time     : 3200
+      finish_time    : 3500
+      input_trace: {
+        packet_timestamp: 102
+        stream_id       : 1
+      }
+      thread_id      : 1
+    }
+}
+config: {
+  node: {
+    name: "ACalculator"
+    calculator: "ACalculator"
+    input_stream: "input1"
+  }
+  node: {
+    name: "BCalculator"
+    calculator: "BCalculator"
+    input_stream: "input1"
+  }
+}
diff --git a/mediapipe/framework/scheduler_queue.cc b/mediapipe/framework/scheduler_queue.cc
index 06f8a50cd..1b491cc36 100644
--- a/mediapipe/framework/scheduler_queue.cc
+++ b/mediapipe/framework/scheduler_queue.cc
@@ -25,7 +25,11 @@
 #include "mediapipe/framework/port/logging.h"
 #include "mediapipe/framework/port/status.h"
 
+#ifdef __APPLE__
+#define AUTORELEASEPOOL @autoreleasepool
+#else
 #define AUTORELEASEPOOL
+#endif  // __APPLE__
 
 namespace mediapipe {
 namespace internal {
diff --git a/mediapipe/framework/stream_handler/default_input_stream_handler.cc b/mediapipe/framework/stream_handler/default_input_stream_handler.cc
index 4d825ce92..a5f6878b1 100644
--- a/mediapipe/framework/stream_handler/default_input_stream_handler.cc
+++ b/mediapipe/framework/stream_handler/default_input_stream_handler.cc
@@ -17,16 +17,28 @@
 #include <algorithm>
 
 #include "absl/strings/substitute.h"
+#include "mediapipe/framework/input_stream_handler.h"
 
 namespace mediapipe {
 
 REGISTER_INPUT_STREAM_HANDLER(DefaultInputStreamHandler);
 
+// Returns all CollectionItemId's for a Collection TagMap.
+std::vector<CollectionItemId> GetIds(
+    const std::shared_ptr<tool::TagMap>& tag_map) {
+  std::vector<CollectionItemId> result;
+  for (auto id = tag_map->BeginId(); id < tag_map->EndId(); ++id) {
+    result.push_back(id);
+  }
+  return result;
+}
+
 DefaultInputStreamHandler::DefaultInputStreamHandler(
     std::shared_ptr<tool::TagMap> tag_map, CalculatorContextManager* cc_manager,
     const MediaPipeOptions& options, bool calculator_run_in_parallel)
     : InputStreamHandler(std::move(tag_map), cc_manager, options,
-                         calculator_run_in_parallel) {
+                         calculator_run_in_parallel),
+      sync_set_(this, GetIds(input_stream_managers_.TagMap())) {
   if (options.HasExtension(DefaultInputStreamHandlerOptions::ext)) {
     SetBatchSize(options.GetExtension(DefaultInputStreamHandlerOptions::ext)
                      .batch_size());
@@ -35,47 +47,12 @@ DefaultInputStreamHandler::DefaultInputStreamHandler(
 
 NodeReadiness DefaultInputStreamHandler::GetNodeReadiness(
     Timestamp* min_stream_timestamp) {
-  DCHECK(min_stream_timestamp);
-  *min_stream_timestamp = Timestamp::Done();
-  Timestamp min_bound = Timestamp::Done();
-  for (const auto& stream : input_stream_managers_) {
-    bool empty;
-    Timestamp stream_timestamp = stream->MinTimestampOrBound(&empty);
-    if (empty) {
-      min_bound = std::min(min_bound, stream_timestamp);
-    }
-    *min_stream_timestamp = std::min(*min_stream_timestamp, stream_timestamp);
-  }
-
-  if (*min_stream_timestamp == Timestamp::Done()) {
-    return NodeReadiness::kReadyForClose;
-  }
-
-  if (min_bound > *min_stream_timestamp) {
-    return NodeReadiness::kReadyForProcess;
-  }
-
-  CHECK_EQ(min_bound, *min_stream_timestamp);
-  return NodeReadiness::kNotReady;
+  return sync_set_.GetReadiness(min_stream_timestamp);
 }
 
 void DefaultInputStreamHandler::FillInputSet(Timestamp input_timestamp,
                                              InputStreamShardSet* input_set) {
-  CHECK(input_timestamp.IsAllowedInStream());
-  CHECK(input_set);
-  for (CollectionItemId id = input_stream_managers_.BeginId();
-       id < input_stream_managers_.EndId(); ++id) {
-    auto& stream = input_stream_managers_.Get(id);
-    int num_packets_dropped = 0;
-    bool stream_is_done = false;
-    Packet current_packet = stream->PopPacketAtTimestamp(
-        input_timestamp, &num_packets_dropped, &stream_is_done);
-    CHECK_EQ(num_packets_dropped, 0)
-        << absl::Substitute("Dropped $0 packet(s) on input stream \"$1\".",
-                            num_packets_dropped, stream->Name());
-    AddPacketToShard(&input_set->Get(id), std::move(current_packet),
-                     stream_is_done);
-  }
+  sync_set_.FillInputSet(input_timestamp, input_set);
 }
 
 }  // namespace mediapipe
diff --git a/mediapipe/framework/stream_handler/default_input_stream_handler.h b/mediapipe/framework/stream_handler/default_input_stream_handler.h
index fc90b3b25..b83dc98b4 100644
--- a/mediapipe/framework/stream_handler/default_input_stream_handler.h
+++ b/mediapipe/framework/stream_handler/default_input_stream_handler.h
@@ -45,6 +45,9 @@ class DefaultInputStreamHandler : public InputStreamHandler {
   // Only invoked when associated GetNodeReadiness() returned kReadyForProcess.
   void FillInputSet(Timestamp input_timestamp,
                     InputStreamShardSet* input_set) override;
+
+  // The packet-set builder.
+  SyncSet sync_set_;
 };
 
 }  // namespace mediapipe
diff --git a/mediapipe/framework/stream_handler/immediate_input_stream_handler.cc b/mediapipe/framework/stream_handler/immediate_input_stream_handler.cc
index 97b0ad782..b34d08498 100644
--- a/mediapipe/framework/stream_handler/immediate_input_stream_handler.cc
+++ b/mediapipe/framework/stream_handler/immediate_input_stream_handler.cc
@@ -19,6 +19,8 @@
 
 namespace mediapipe {
 
+using SyncSet = InputStreamHandler::SyncSet;
+
 // An input stream handler that delivers input packets to the Calculator
 // immediately, with no dependency between input streams.  It also invokes
 // Calculator::Process when any input stream becomes done.
@@ -47,8 +49,11 @@ class ImmediateInputStreamHandler : public InputStreamHandler {
   void FillInputSet(Timestamp input_timestamp,
                     InputStreamShardSet* input_set) override;
 
-  // Record of the last reported timestamp bound for each input stream.
-  mediapipe::internal::Collection<Timestamp> timestamp_bounds_;
+  absl::Mutex mutex_;
+  // The packet-set builder for each input stream.
+  std::vector<SyncSet> sync_sets_ ABSL_GUARDED_BY(mutex_);
+  // The input timestamp for each kReadyForProcess input stream.
+  std::vector<Timestamp> ready_timestamps_ ABSL_GUARDED_BY(mutex_);
 };
 REGISTER_INPUT_STREAM_HANDLER(ImmediateInputStreamHandler);
 
@@ -57,31 +62,47 @@ ImmediateInputStreamHandler::ImmediateInputStreamHandler(
     CalculatorContextManager* calculator_context_manager,
     const MediaPipeOptions& options, bool calculator_run_in_parallel)
     : InputStreamHandler(tag_map, calculator_context_manager, options,
-                         calculator_run_in_parallel),
-      timestamp_bounds_(std::move(tag_map)) {}
+                         calculator_run_in_parallel) {
+  for (auto id = tag_map->BeginId(); id < tag_map->EndId(); ++id) {
+    sync_sets_.emplace_back(this, std::vector<CollectionItemId>{id});
+    ready_timestamps_.push_back(Timestamp::Unset());
+  }
+}
 
 NodeReadiness ImmediateInputStreamHandler::GetNodeReadiness(
     Timestamp* min_stream_timestamp) {
-  *min_stream_timestamp = Timestamp::Done();
+  absl::MutexLock lock(&mutex_);
   Timestamp input_timestamp = Timestamp::Done();
+  Timestamp min_bound = Timestamp::Done();
   bool stream_became_done = false;
-
-  for (CollectionItemId i = input_stream_managers_.BeginId();
-       i < input_stream_managers_.EndId(); ++i) {
-    const auto& stream = input_stream_managers_.Get(i);
-    bool empty;
-    Timestamp stream_timestamp = stream->MinTimestampOrBound(&empty);
-    if (!empty) {
-      input_timestamp = std::min(input_timestamp, stream_timestamp);
+  for (int i = 0; i < sync_sets_.size(); ++i) {
+    if (ready_timestamps_[i] > Timestamp::Unset()) {
+      min_bound = std::min(min_bound, ready_timestamps_[i]);
+      input_timestamp = std::min(input_timestamp, ready_timestamps_[i]);
+      continue;
     }
-    *min_stream_timestamp = std::min(*min_stream_timestamp, stream_timestamp);
-    if (stream_timestamp != timestamp_bounds_.Get(i)) {
-      if (stream_timestamp == Timestamp::Done()) {
+    Timestamp prev_ts = sync_sets_[i].LastProcessed();
+    Timestamp stream_ts;
+    NodeReadiness readiness = sync_sets_[i].GetReadiness(&stream_ts);
+    min_bound = std::min(min_bound, stream_ts);
+    if (readiness == NodeReadiness::kReadyForProcess) {
+      ready_timestamps_[i] = stream_ts;
+      input_timestamp = std::min(input_timestamp, stream_ts);
+    } else if (readiness == NodeReadiness::kReadyForClose) {
+      CHECK_EQ(stream_ts, Timestamp::Done());
+      if (ProcessTimestampBounds()) {
+        // With kReadyForClose, the timestamp-bound Done is returned.
+        // This bound is processed using the preceding input-timestamp.
+        // TODO: Make all InputStreamHandlers process Done() like this.
+        ready_timestamps_[i] = stream_ts.PreviousAllowedInStream();
+        input_timestamp = std::min(input_timestamp, ready_timestamps_[i]);
+      } else if (prev_ts < Timestamp::Done()) {
         stream_became_done = true;
+        ready_timestamps_[i] = Timestamp::Done();
       }
-      timestamp_bounds_.Get(i) = stream_timestamp;
     }
   }
+  *min_stream_timestamp = min_bound;
 
   if (*min_stream_timestamp == Timestamp::Done()) {
     return NodeReadiness::kReadyForClose;
@@ -94,6 +115,8 @@ NodeReadiness ImmediateInputStreamHandler::GetNodeReadiness(
   }
 
   if (stream_became_done) {
+    // The stream_became_done logic is kept for backward compatibility.
+    // Note that the minimum bound is returned in min_stream_timestamp.
     return NodeReadiness::kReadyForProcess;
   }
 
@@ -102,23 +125,13 @@ NodeReadiness ImmediateInputStreamHandler::GetNodeReadiness(
 
 void ImmediateInputStreamHandler::FillInputSet(Timestamp input_timestamp,
                                                InputStreamShardSet* input_set) {
-  CHECK(input_timestamp.IsAllowedInStream());
-  CHECK(input_set);
-  for (CollectionItemId id = input_stream_managers_.BeginId();
-       id < input_stream_managers_.EndId(); ++id) {
-    auto& stream = input_stream_managers_.Get(id);
-    if (stream->QueueHead().Timestamp() == input_timestamp) {
-      int num_packets_dropped = 0;
-      bool stream_is_done = false;
-      Packet current_packet = stream->PopPacketAtTimestamp(
-          input_timestamp, &num_packets_dropped, &stream_is_done);
-      AddPacketToShard(&input_set->Get(id), std::move(current_packet),
-                       stream_is_done);
+  absl::MutexLock lock(&mutex_);
+  for (int i = 0; i < sync_sets_.size(); ++i) {
+    if (ready_timestamps_[i] == input_timestamp) {
+      sync_sets_[i].FillInputSet(input_timestamp, input_set);
+      ready_timestamps_[i] = Timestamp::Unset();
     } else {
-      Timestamp bound = stream->MinTimestampOrBound(nullptr);
-      AddPacketToShard(&input_set->Get(id),
-                       Packet().At(bound.PreviousAllowedInStream()),
-                       bound == Timestamp::Done());
+      sync_sets_[i].FillInputBounds(input_set);
     }
   }
 }
diff --git a/mediapipe/framework/stream_handler/sync_set_input_stream_handler.cc b/mediapipe/framework/stream_handler/sync_set_input_stream_handler.cc
index d30040bbc..5217366a4 100644
--- a/mediapipe/framework/stream_handler/sync_set_input_stream_handler.cc
+++ b/mediapipe/framework/stream_handler/sync_set_input_stream_handler.cc
@@ -17,6 +17,7 @@
 // TODO: Move protos in another CL after the C++ code migration.
 #include "absl/strings/substitute.h"
 #include "absl/synchronization/mutex.h"
+#include "mediapipe/framework/collection_item_id.h"
 #include "mediapipe/framework/input_stream_handler.h"
 #include "mediapipe/framework/mediapipe_options.pb.h"
 #include "mediapipe/framework/packet_set.h"
@@ -69,7 +70,7 @@ class SyncSetInputStreamHandler : public InputStreamHandler {
  private:
   absl::Mutex mutex_;
   // The ids of each set of inputs.
-  std::vector<std::vector<CollectionItemId>> sync_sets_ ABSL_GUARDED_BY(mutex_);
+  std::vector<SyncSet> sync_sets_ ABSL_GUARDED_BY(mutex_);
   // The index of the ready sync set.  A value of -1 indicates that no
   // sync sets are ready.
   int ready_sync_set_index_ ABSL_GUARDED_BY(mutex_) = -1;
@@ -98,7 +99,7 @@ void SyncSetInputStreamHandler::PrepareForRun(
     sync_sets_.clear();
     std::set<CollectionItemId> used_ids;
     for (const auto& sync_set : handler_options.sync_set()) {
-      sync_sets_.emplace_back();
+      std::vector<CollectionItemId> stream_ids;
       CHECK_LT(0, sync_set.tag_index_size());
       for (const auto& tag_index : sync_set.tag_index()) {
         std::string tag;
@@ -109,8 +110,9 @@ void SyncSetInputStreamHandler::PrepareForRun(
         CHECK(!::mediapipe::ContainsKey(used_ids, id))
             << "stream \"" << tag_index << "\" is in more than one sync set.";
         used_ids.insert(id);
-        sync_sets_.back().push_back(id);
+        stream_ids.push_back(id);
       }
+      sync_sets_.emplace_back(this, std::move(stream_ids));
     }
     std::vector<CollectionItemId> remaining_ids;
     for (CollectionItemId id = input_stream_managers_.BeginId();
@@ -120,7 +122,7 @@ void SyncSetInputStreamHandler::PrepareForRun(
       }
     }
     if (!remaining_ids.empty()) {
-      sync_sets_.push_back(std::move(remaining_ids));
+      sync_sets_.emplace_back(this, std::move(remaining_ids));
     }
     ready_sync_set_index_ = -1;
     ready_timestamp_ = Timestamp::Done();
@@ -137,24 +139,14 @@ NodeReadiness SyncSetInputStreamHandler::GetNodeReadiness(
   absl::MutexLock lock(&mutex_);
   if (ready_sync_set_index_ >= 0) {
     *min_stream_timestamp = ready_timestamp_;
+    // TODO: Return kNotReady unless a new ready syncset is found.
     return NodeReadiness::kReadyForProcess;
   }
   for (int sync_set_index = 0; sync_set_index < sync_sets_.size();
        ++sync_set_index) {
-    const std::vector<CollectionItemId>& sync_set = sync_sets_[sync_set_index];
-    *min_stream_timestamp = Timestamp::Done();
-    Timestamp min_bound = Timestamp::Done();
-    for (CollectionItemId id : sync_set) {
-      const auto& stream = input_stream_managers_.Get(id);
-      bool empty;
-      Timestamp stream_timestamp = stream->MinTimestampOrBound(&empty);
-      if (empty) {
-        min_bound = std::min(min_bound, stream_timestamp);
-      }
-      *min_stream_timestamp = std::min(*min_stream_timestamp, stream_timestamp);
-    }
-
-    if (*min_stream_timestamp == Timestamp::Done()) {
+    NodeReadiness readiness =
+        sync_sets_[sync_set_index].GetReadiness(min_stream_timestamp);
+    if (readiness == NodeReadiness::kReadyForClose) {
       // This sync set is done, remove it.  Note that this invalidates
       // sync set indexes higher than sync_set_index.  However, we are
       // guaranteed that we were not ready before entering the outer
@@ -165,15 +157,14 @@ NodeReadiness SyncSetInputStreamHandler::GetNodeReadiness(
       continue;
     }
 
-    if (min_bound > *min_stream_timestamp) {
+    if (readiness == NodeReadiness::kReadyForProcess) {
+      // TODO: Prioritize sync-sets to avoid starvation.
       if (*min_stream_timestamp < ready_timestamp_) {
         // Store the timestamp and corresponding sync set index for the
         // sync set with the earliest arrival timestamp.
         ready_timestamp_ = *min_stream_timestamp;
         ready_sync_set_index_ = sync_set_index;
       }
-    } else {
-      CHECK_EQ(min_bound, *min_stream_timestamp);
     }
   }
   if (ready_sync_set_index_ >= 0) {
@@ -188,44 +179,17 @@ NodeReadiness SyncSetInputStreamHandler::GetNodeReadiness(
   return NodeReadiness::kNotReady;
 }
 
-void SyncSetInputStreamHandler::FillInputBounds(
-    Timestamp input_timestamp, InputStreamShardSet* input_set) {
-  for (int i = 0; i < sync_sets_.size(); ++i) {
-    if (i != ready_sync_set_index_) {
-      // Set the input streams for the not-ready sync sets.
-      for (CollectionItemId id : sync_sets_[i]) {
-        const auto stream = input_stream_managers_.Get(id);
-        Timestamp bound = stream->MinTimestampOrBound(nullptr);
-        AddPacketToShard(&input_set->Get(id),
-                         Packet().At(bound.PreviousAllowedInStream()),
-                         bound == Timestamp::Done());
-      }
-    }
-  }
-}
-
 void SyncSetInputStreamHandler::FillInputSet(Timestamp input_timestamp,
                                              InputStreamShardSet* input_set) {
   // Assume that all current packets are already cleared.
-  CHECK(input_timestamp.IsAllowedInStream());
-  CHECK(input_set);
   absl::MutexLock lock(&mutex_);
   CHECK_LE(0, ready_sync_set_index_);
-  CHECK_EQ(input_timestamp, ready_timestamp_);
-  // Set the input streams for the ready sync set.
-  for (CollectionItemId id : sync_sets_[ready_sync_set_index_]) {
-    const auto& stream = input_stream_managers_.Get(id);
-    int num_packets_dropped = 0;
-    bool stream_is_done = false;
-    Packet current_packet = stream->PopPacketAtTimestamp(
-        input_timestamp, &num_packets_dropped, &stream_is_done);
-    CHECK_EQ(num_packets_dropped, 0)
-        << absl::Substitute("Dropped $0 packet(s) on input stream \"$1\".",
-                            num_packets_dropped, stream->Name());
-    AddPacketToShard(&input_set->Get(id), std::move(current_packet),
-                     stream_is_done);
+  sync_sets_[ready_sync_set_index_].FillInputSet(input_timestamp, input_set);
+  for (int i = 0; i < sync_sets_.size(); ++i) {
+    if (i != ready_sync_set_index_) {
+      sync_sets_[i].FillInputBounds(input_set);
+    }
   }
-  FillInputBounds(input_timestamp, input_set);
   ready_sync_set_index_ = -1;
   ready_timestamp_ = Timestamp::Done();
 }
diff --git a/mediapipe/framework/timestamp.cc b/mediapipe/framework/timestamp.cc
index 99d48b14b..05b69747f 100644
--- a/mediapipe/framework/timestamp.cc
+++ b/mediapipe/framework/timestamp.cc
@@ -122,7 +122,6 @@ std::string TimestampDiff::DebugString() const {
 }
 
 Timestamp Timestamp::NextAllowedInStream() const {
-  CHECK(IsAllowedInStream()) << "Timestamp is: " << DebugString();
   if (*this >= Max() || *this == PreStream()) {
     // Indicates that no further timestamps may occur.
     return OneOverPostStream();
diff --git a/mediapipe/framework/timestamp.h b/mediapipe/framework/timestamp.h
index dc574cbdc..179388942 100644
--- a/mediapipe/framework/timestamp.h
+++ b/mediapipe/framework/timestamp.h
@@ -247,6 +247,12 @@ class TimestampDiff {
   TimestampDiff operator-(const TimestampDiff other) const;
   Timestamp operator+(const Timestamp other) const;
 
+  // Special values.
+
+  static TimestampDiff Unset() {
+    return TimestampDiff(Timestamp::Unset().Value());
+  }
+
  private:
   TimestampBaseType timestamp_;
 };
diff --git a/mediapipe/framework/validated_graph_config.cc b/mediapipe/framework/validated_graph_config.cc
index 31ade5845..13d236560 100644
--- a/mediapipe/framework/validated_graph_config.cc
+++ b/mediapipe/framework/validated_graph_config.cc
@@ -815,16 +815,25 @@ NodeTypeInfo::NodeRef ValidatedGraphConfig::NodeForSorterIndex(
       sorted_nodes_.push_back(&tmp_calculators.back());
     }
   }
+  if (cyclic) {
+    // This reads from partilly altered config_ (by node Swap()) but we assume
+    // the nodes in the cycle are not altered, as TopologicalSorter reports
+    // cyclicity before processing any node in cycle.
+    auto node_name_formatter = [this](std::string* out, int i) {
+      const auto& n = NodeForSorterIndex(i);
+      absl::StrAppend(out, n.type == NodeTypeInfo::NodeType::CALCULATOR
+                               ? tool::CanonicalNodeName(Config(), n.index)
+                               : DebugName(Config(), n.type, n.index));
+    };
+    return ::mediapipe::UnknownErrorBuilder(MEDIAPIPE_LOC)
+           << "Generator side packet cycle or calculator stream cycle detected "
+              "in graph: ["
+           << absl::StrJoin(cycle_indexes, ", ", node_name_formatter) << "]";
+  }
   generator_configs.Swap(config_.mutable_packet_generator());
   tmp_generators.swap(generators_);
   node_configs.Swap(config_.mutable_node());
   tmp_calculators.swap(calculators_);
-  if (cyclic) {
-    return ::mediapipe::UnknownErrorBuilder(MEDIAPIPE_LOC)
-           << "Generator side packet cycle or calculator stream cycle detected "
-              "in graph.  Cycle indexes: "
-           << absl::StrJoin(cycle_indexes, ", ");
-  }
 #if !(defined(MEDIAPIPE_LITE) || defined(MEDIAPIPE_MOBILE))
   VLOG(2) << "AFTER TOPOLOGICAL SORT:\n" << config_.DebugString();
 #endif  // !(MEDIAPIPE_LITE || MEDIAPIPE_MOBILE)
diff --git a/mediapipe/gpu/gl_base.h b/mediapipe/gpu/gl_base.h
index 3fd823388..9aa8b02dc 100644
--- a/mediapipe/gpu/gl_base.h
+++ b/mediapipe/gpu/gl_base.h
@@ -57,15 +57,14 @@
 #include <EGL/egl.h>
 #include <GLES2/gl2.h>
 #include <GLES2/gl2ext.h>
-
-#ifdef __ANDROID__
+#if defined(__ANDROID__)
 // Weak-link all GL APIs included from this point on.
 // TODO: Annotate these with availability attributes for the
 // appropriate versions of Android, by including gl{3,31,31}.h and resetting
 // GL_APICALL for each.
 #undef GL_APICALL
 #define GL_APICALL __attribute__((weak_import)) KHRONOS_APICALL
-#endif  // __ANDROID__
+#endif  // defined(__ANDROID__)
 
 #include <GLES3/gl32.h>
 
diff --git a/mediapipe/gpu/gl_calculator_helper_impl.h b/mediapipe/gpu/gl_calculator_helper_impl.h
index 3d92ca671..8f90eb5fd 100644
--- a/mediapipe/gpu/gl_calculator_helper_impl.h
+++ b/mediapipe/gpu/gl_calculator_helper_impl.h
@@ -83,6 +83,10 @@ class GlCalculatorHelperImpl {
   GLuint framebuffer_ = 0;
 
   GpuResources& gpu_resources_;
+
+  // Necessary to compute for a given GlContext in order to properly enforce the
+  // SetStandardTextureParams.
+  bool can_linear_filter_float_textures_;
 };
 
 }  // namespace mediapipe
diff --git a/mediapipe/gpu/gl_calculator_helper_impl_common.cc b/mediapipe/gpu/gl_calculator_helper_impl_common.cc
index cf2dcf582..d26888ace 100644
--- a/mediapipe/gpu/gl_calculator_helper_impl_common.cc
+++ b/mediapipe/gpu/gl_calculator_helper_impl_common.cc
@@ -22,6 +22,17 @@ GlCalculatorHelperImpl::GlCalculatorHelperImpl(CalculatorContext* cc,
                                                GpuResources* gpu_resources)
     : gpu_resources_(*gpu_resources) {
   gl_context_ = gpu_resources_.gl_context(cc);
+// GL_ES_VERSION_2_0 and up (at least through ES 3.2) may contain the extension.
+// Checking against one also checks against higher ES versions. So this checks
+// against GLES >= 2.0.
+#if GL_ES_VERSION_2_0
+  // No linear float filtering by default, check extensions.
+  can_linear_filter_float_textures_ =
+      gl_context_->HasGlExtension("OES_texture_float_linear");
+#else
+  // Any float32 texture we create should automatically have linear filtering.
+  can_linear_filter_float_textures_ = true;
+#endif  // GL_ES_VERSION_2_0
 }
 
 GlCalculatorHelperImpl::~GlCalculatorHelperImpl() {
@@ -89,13 +100,15 @@ void GlCalculatorHelperImpl::BindFramebuffer(const GlTexture& dst) {
 
 void GlCalculatorHelperImpl::SetStandardTextureParams(GLenum target,
                                                       GLint internal_format) {
+  // Default to using linear filter everywhere. For float32 textures, fall back
+  // to GL_NEAREST if linear filtering unsupported.
   GLint filter;
   switch (internal_format) {
     case GL_R32F:
     case GL_RGBA32F:
-      // 32F (unlike 16f) textures do not support texture filtering
+      // 32F (unlike 16f) textures do not always support texture filtering
       // (According to OpenGL ES specification [TEXTURE IMAGE SPECIFICATION])
-      filter = GL_NEAREST;
+      filter = can_linear_filter_float_textures_ ? GL_LINEAR : GL_NEAREST;
       break;
     default:
       filter = GL_LINEAR;
diff --git a/mediapipe/gpu/gl_context.cc b/mediapipe/gpu/gl_context.cc
index 497a28e1f..dd1b6fa21 100644
--- a/mediapipe/gpu/gl_context.cc
+++ b/mediapipe/gpu/gl_context.cc
@@ -203,6 +203,69 @@ bool GlContext::ParseGlVersion(absl::string_view version_string, GLint* major,
   return true;
 }
 
+bool GlContext::HasGlExtension(absl::string_view extension) const {
+  return gl_extensions_.find(extension) != gl_extensions_.end();
+}
+
+// Function for GL3.0+ to query for and store all of our available GL extensions
+// in an easily-accessible set.  The glGetString call is actually *not* required
+// to work with GL_EXTENSIONS for newer GL versions, so we must maintain both
+// variations of this function.
+::mediapipe::Status GlContext::GetGlExtensions() {
+  gl_extensions_.clear();
+  // glGetStringi only introduced in GL 3.0+; so we exit out this function if
+  // we don't have that function defined, regardless of version number reported.
+  // The function itself is also fully stubbed out if we're linking against an
+  // API version without a glGetStringi declaration. Although Emscripten
+  // sometimes provides this function, its default library implementation
+  // appears to only provide glGetString, so we skip this for Emscripten
+  // platforms to avoid possible undefined symbol or runtime errors.
+#if (GL_VERSION_3_0 || GL_ES_VERSION_3_0) && !defined(__EMSCRIPTEN__)
+  if (!SymbolAvailable(&glGetStringi)) {
+    LOG(ERROR) << "GL major version > 3.0 indicated, but glGetStringi not "
+               << "defined. Falling back to deprecated GL extensions querying "
+               << "method.";
+    return ::mediapipe::InternalError("glGetStringi not defined, but queried");
+  }
+  int num_extensions = 0;
+  glGetIntegerv(GL_NUM_EXTENSIONS, &num_extensions);
+  if (glGetError() != 0) {
+    return ::mediapipe::InternalError(
+        "Error querying for number of extensions");
+  }
+
+  for (int i = 0; i < num_extensions; ++i) {
+    const GLubyte* res = glGetStringi(GL_EXTENSIONS, i);
+    if (glGetError() != 0 || res == nullptr) {
+      return ::mediapipe::InternalError(
+          "Error querying for an extension by index");
+    }
+    const char* signed_res = reinterpret_cast<const char*>(res);
+    gl_extensions_.insert(signed_res);
+  }
+
+  return ::mediapipe::OkStatus();
+#else
+  return ::mediapipe::InternalError("GL version mismatch in GlGetExtensions");
+#endif  // (GL_VERSION_3_0 || GL_ES_VERSION_3_0) && !defined(__EMSCRIPTEN__)
+}
+
+// Same as GetGlExtensions() above, but for pre-GL3.0, where glGetStringi did
+// not exist.
+::mediapipe::Status GlContext::GetGlExtensionsCompat() {
+  gl_extensions_.clear();
+
+  const GLubyte* res = glGetString(GL_EXTENSIONS);
+  if (glGetError() != 0 || res == nullptr) {
+    LOG(ERROR) << "Error querying for GL extensions";
+    return ::mediapipe::InternalError("Error querying for GL extensions");
+  }
+  const char* signed_res = reinterpret_cast<const char*>(res);
+  gl_extensions_ = absl::StrSplit(signed_res, ' ');
+
+  return ::mediapipe::OkStatus();
+}
+
 ::mediapipe::Status GlContext::FinishInitialization(bool create_thread) {
   if (create_thread) {
     thread_ = absl::make_unique<GlContext::DedicatedThread>();
@@ -232,8 +295,13 @@ bool GlContext::ParseGlVersion(absl::string_view version_string, GLint* major,
 
     LOG(INFO) << "GL version: " << gl_major_version_ << "." << gl_minor_version_
               << " (" << glGetString(GL_VERSION) << ")";
-
-    return ::mediapipe::OkStatus();
+    if (gl_major_version_ >= 3) {
+      auto status = GetGlExtensions();
+      if (status.ok()) {
+        return ::mediapipe::OkStatus();
+      }
+    }
+    return GetGlExtensionsCompat();
   });
 }
 
diff --git a/mediapipe/gpu/gl_context.h b/mediapipe/gpu/gl_context.h
index c28e310b6..455b232d7 100644
--- a/mediapipe/gpu/gl_context.h
+++ b/mediapipe/gpu/gl_context.h
@@ -237,6 +237,10 @@ class GlContext : public std::enable_shared_from_this<GlContext> {
   static bool ParseGlVersion(absl::string_view version_string, GLint* major,
                              GLint* minor);
 
+  // Simple query for GL extension support; only valid after GlContext has
+  // finished its initialization successfully.
+  bool HasGlExtension(absl::string_view extension) const;
+
   int64_t gl_finish_count() { return gl_finish_count_; }
 
   // Used by GlFinishSyncPoint. The count_to_pass cannot exceed the current
@@ -346,6 +350,8 @@ class GlContext : public std::enable_shared_from_this<GlContext> {
   bool HasContext() const;
   bool CheckForGlErrors();
   void LogUncheckedGlErrors(bool had_gl_errors);
+  ::mediapipe::Status GetGlExtensions();
+  ::mediapipe::Status GetGlExtensionsCompat();
 
   // The following ContextBinding functions have platform-specific
   // implementations.
@@ -366,6 +372,10 @@ class GlContext : public std::enable_shared_from_this<GlContext> {
   GLint gl_major_version_ = 0;
   GLint gl_minor_version_ = 0;
 
+  // glGetString and glGetStringi both return pointers to static strings,
+  // so we should be fine storing the extension pieces as string_view's.
+  std::set<absl::string_view> gl_extensions_;
+
   // Number of glFinish calls completed on the GL thread.
   // Changes should be guarded by mutex_. However, we use simple atomic
   // loads for efficiency on the fast path.
diff --git a/mediapipe/gpu/gl_simple_shaders.cc b/mediapipe/gpu/gl_simple_shaders.cc
index 6b56288a2..b038c88aa 100644
--- a/mediapipe/gpu/gl_simple_shaders.cc
+++ b/mediapipe/gpu/gl_simple_shaders.cc
@@ -24,6 +24,19 @@ namespace mediapipe {
 #define _STRINGIFY(_x) __STRINGIFY(_x)
 #endif
 
+// Our fragment shaders use DEFAULT_PRECISION to define the default precision
+// for a type. The macro strips out the precision declaration on desktop GL,
+// where it's not supported.
+//
+// Note: this does not use a raw std::string because some compilers don't handle
+// raw strings inside macros correctly. It uses a macro because we want to be
+// able to concatenate strings by juxtaposition. We want to concatenate strings
+// by juxtaposition so we can export const char* static data containing the
+// pre-expanded strings.
+//
+// TODO: this was written before we could rely on C++11 support.
+// Consider replacing it with constexpr std::string concatenation, or replacing
+// the static variables with functions.
 #define PRECISION_COMPAT                              \
   GLES_VERSION_COMPAT                                 \
   "#ifdef GL_ES \n"                                   \
@@ -42,10 +55,15 @@ namespace mediapipe {
   "#define out varying\n"   \
   "#endif  // __VERSION__ < 130\n"
 
-#define FRAGMENT_PREAMBLE   \
-  PRECISION_COMPAT          \
-  "#if __VERSION__ < 130\n" \
-  "#define in varying\n"    \
+// Note: on systems where highp precision for floats is not supported (look up
+// GL_FRAGMENT_PRECISION_HIGH), we replace it with mediump.
+#define FRAGMENT_PREAMBLE                             \
+  PRECISION_COMPAT                                    \
+  "#if __VERSION__ < 130\n"                           \
+  "#define in varying\n"                              \
+  "#if GL_ES && !GL_FRAGMENT_PRECISION_HIGH\n"        \
+  "#define highp mediump\n"                           \
+  "#endif  // GL_ES && !GL_FRAGMENT_PRECISION_HIGH\n" \
   "#endif  // __VERSION__ < 130\n"
 
 const GLchar* const kMediaPipeVertexShaderPreamble = VERTEX_PREAMBLE;
diff --git a/mediapipe/graphs/hair_segmentation/BUILD b/mediapipe/graphs/hair_segmentation/BUILD
index eec0732e3..52598175f 100644
--- a/mediapipe/graphs/hair_segmentation/BUILD
+++ b/mediapipe/graphs/hair_segmentation/BUILD
@@ -12,6 +12,11 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+load(
+    "//mediapipe/framework/tool:mediapipe_graph.bzl",
+    "mediapipe_binary_graph",
+)
+
 licenses(["notice"])  # Apache 2.0
 
 package(default_visibility = ["//visibility:public"])
@@ -33,9 +38,19 @@ cc_library(
     ],
 )
 
-load(
-    "//mediapipe/framework/tool:mediapipe_graph.bzl",
-    "mediapipe_binary_graph",
+cc_library(
+    name = "desktop_calculators",
+    deps = [
+        "//mediapipe/calculators/core:flow_limiter_calculator",
+        "//mediapipe/calculators/core:previous_loopback_calculator",
+        "//mediapipe/calculators/image:image_transformation_calculator",
+        "//mediapipe/calculators/image:recolor_calculator",
+        "//mediapipe/calculators/image:set_alpha_calculator",
+        "//mediapipe/calculators/tflite:tflite_converter_calculator",
+        "//mediapipe/calculators/tflite:tflite_custom_op_resolver_calculator",
+        "//mediapipe/calculators/tflite:tflite_inference_calculator",
+        "//mediapipe/calculators/tflite:tflite_tensors_to_segmentation_calculator",
+    ],
 )
 
 mediapipe_binary_graph(
diff --git a/mediapipe/graphs/hair_segmentation/hair_segmentation_desktop_live.pbtxt b/mediapipe/graphs/hair_segmentation/hair_segmentation_desktop_live.pbtxt
new file mode 100644
index 000000000..36c6970e1
--- /dev/null
+++ b/mediapipe/graphs/hair_segmentation/hair_segmentation_desktop_live.pbtxt
@@ -0,0 +1,152 @@
+# MediaPipe graph that performs hair segmentation with TensorFlow Lite on CPU.
+# Used in the example in
+# mediapipie/examples/desktop/hair_segmentation:hair_segmentation_cpu
+
+# Images on CPU coming into and out of the graph.
+input_stream: "input_video"
+output_stream: "output_video"
+
+# Throttles the images flowing downstream for flow control. It passes through
+# the very first incoming image unaltered, and waits for
+# TfLiteTensorsToSegmentationCalculator downstream in the graph to finish
+# generating the corresponding hair mask before it passes through another
+# image. All images that come in while waiting are dropped, limiting the number
+# of in-flight images between this calculator and
+# TfLiteTensorsToSegmentationCalculator to 1. This prevents the nodes in between
+# from queuing up incoming images and data excessively, which leads to increased
+# latency and memory usage, unwanted in real-time mobile applications. It also
+# eliminates unnecessarily computation, e.g., a transformed image produced by
+# ImageTransformationCalculator may get dropped downstream if the subsequent
+# TfLiteConverterCalculator or TfLiteInferenceCalculator is still busy
+# processing previous inputs.
+node {
+  calculator: "FlowLimiterCalculator"
+  input_stream: "input_video"
+  input_stream: "FINISHED:hair_mask"
+  input_stream_info: {
+    tag_index: "FINISHED"
+    back_edge: true
+  }
+  output_stream: "throttled_input_video"
+}
+
+# Transforms the input image on CPU to a 512x512 image. To scale the image, by
+# default it uses the STRETCH scale mode that maps the entire input image to the
+# entire transformed image. As a result, image aspect ratio may be changed and
+# objects in the image may be deformed (stretched or squeezed), but the hair
+# segmentation model used in this graph is agnostic to that deformation.
+node: {
+  calculator: "ImageTransformationCalculator"
+  input_stream: "IMAGE:throttled_input_video"
+  output_stream: "IMAGE:transformed_input_video"
+  node_options: {
+    [type.googleapis.com/mediapipe.ImageTransformationCalculatorOptions] {
+      output_width: 512
+      output_height: 512
+    }
+  }
+}
+
+# Caches a mask fed back from the previous round of hair segmentation, and upon
+# the arrival of the next input image sends out the cached mask with the
+# timestamp replaced by that of the input image, essentially generating a packet
+# that carries the previous mask. Note that upon the arrival of the very first
+# input image, an empty packet is sent out to jump start the feedback loop.
+node {
+  calculator: "PreviousLoopbackCalculator"
+  input_stream: "MAIN:throttled_input_video"
+  input_stream: "LOOP:hair_mask"
+  input_stream_info: {
+    tag_index: "LOOP"
+    back_edge: true
+  }
+  output_stream: "PREV_LOOP:previous_hair_mask"
+}
+
+# Embeds the hair mask generated from the previous round of hair segmentation
+# as the alpha channel of the current input image.
+node {
+  calculator: "SetAlphaCalculator"
+  input_stream: "IMAGE:transformed_input_video"
+  input_stream: "ALPHA:previous_hair_mask"
+  output_stream: "IMAGE:mask_embedded_input_video"
+}
+
+# Converts the transformed input image on CPU into an image tensor stored in
+# TfLiteTensor. The zero_center option is set to false to normalize the
+# pixel values to [0.f, 1.f] as opposed to [-1.f, 1.f]. With the
+# max_num_channels option set to 4, all 4 RGBA channels are contained in the
+# image tensor.
+node {
+  calculator: "TfLiteConverterCalculator"
+  input_stream: "IMAGE:mask_embedded_input_video"
+  output_stream: "TENSORS:image_tensor"
+  node_options: {
+    [type.googleapis.com/mediapipe.TfLiteConverterCalculatorOptions] {
+      zero_center: false
+      max_num_channels: 4
+    }
+  }
+}
+
+# Generates a single side packet containing a TensorFlow Lite op resolver that
+# supports custom ops needed by the model used in this graph.
+node {
+  calculator: "TfLiteCustomOpResolverCalculator"
+  output_side_packet: "op_resolver"
+  node_options: {
+    [type.googleapis.com/mediapipe.TfLiteCustomOpResolverCalculatorOptions] {
+      use_gpu: false
+    }
+  }
+}
+
+# Runs a TensorFlow Lite model on CPU that takes an image tensor and outputs a
+# tensor representing the hair segmentation, which has the same width and height
+# as the input image tensor.
+node {
+  calculator: "TfLiteInferenceCalculator"
+  input_stream: "TENSORS:image_tensor"
+  output_stream: "TENSORS:segmentation_tensor"
+  input_side_packet: "CUSTOM_OP_RESOLVER:op_resolver"
+  node_options: {
+    [type.googleapis.com/mediapipe.TfLiteInferenceCalculatorOptions] {
+      model_path: "mediapipe/models/hair_segmentation.tflite"
+      use_gpu: false
+    }
+  }
+}
+
+# Decodes the segmentation tensor generated by the TensorFlow Lite model into a
+# mask of values in [0, 255], stored in a CPU buffer. It also
+# takes the mask generated previously as another input to improve the temporal
+# consistency.
+node {
+  calculator: "TfLiteTensorsToSegmentationCalculator"
+  input_stream: "TENSORS:segmentation_tensor"
+  input_stream: "PREV_MASK:previous_hair_mask"
+  output_stream: "MASK:hair_mask"
+  node_options: {
+    [type.googleapis.com/mediapipe.TfLiteTensorsToSegmentationCalculatorOptions] {
+      tensor_width: 512
+      tensor_height: 512
+      tensor_channels: 2
+      combine_with_previous_ratio: 0.9
+      output_layer_index: 1
+    }
+  }
+}
+
+# Colors the hair segmentation with the color specified in the option.
+node {
+  calculator: "RecolorCalculator"
+  input_stream: "IMAGE:throttled_input_video"
+  input_stream: "MASK:hair_mask"
+  output_stream: "IMAGE:output_video"
+  node_options: {
+    [type.googleapis.com/mediapipe.RecolorCalculatorOptions] {
+      color { r: 0 g: 0 b: 255 }
+      mask_channel: RED
+    }
+  }
+}
diff --git a/mediapipe/java/com/google/mediapipe/mediapipe_aar.bzl b/mediapipe/java/com/google/mediapipe/mediapipe_aar.bzl
index 0c0d2aac4..6078318ac 100644
--- a/mediapipe/java/com/google/mediapipe/mediapipe_aar.bzl
+++ b/mediapipe/java/com/google/mediapipe/mediapipe_aar.bzl
@@ -78,6 +78,33 @@ cat > $(OUTS) <<EOF
         srcs = ["//mediapipe/framework/formats:protos_src"],
     )
 
+    _proto_java_src_generator(
+        name = "rasterization_proto",
+        proto_src = "mediapipe/framework/formats/annotation/rasterization.proto",
+        java_lite_out = "com/google/mediapipe/formats/annotation/proto/RasterizationProto.java",
+        srcs = ["//mediapipe/framework/formats/annotation:protos_src"],
+    )
+
+    _proto_java_src_generator(
+        name = "location_data_proto",
+        proto_src = "mediapipe/framework/formats/location_data.proto",
+        java_lite_out = "com/google/mediapipe/formats/proto/LocationDataProto.java",
+        srcs = [
+            "//mediapipe/framework/formats:protos_src",
+            "//mediapipe/framework/formats/annotation:protos_src",
+        ],
+    )
+
+    _proto_java_src_generator(
+        name = "detection_proto",
+        proto_src = "mediapipe/framework/formats/detection.proto",
+        java_lite_out = "com/google/mediapipe/formats/proto/DetectionProto.java",
+        srcs = [
+            "//mediapipe/framework/formats:protos_src",
+            "//mediapipe/framework/formats/annotation:protos_src",
+        ],
+    )
+
     android_library(
         name = name + "_android_lib",
         srcs = [
@@ -86,6 +113,9 @@ cat > $(OUTS) <<EOF
             "//mediapipe/java/com/google/mediapipe/glutil:java_src",
             "com/google/mediapipe/proto/CalculatorProto.java",
             "com/google/mediapipe/formats/proto/LandmarkProto.java",
+            "com/google/mediapipe/formats/proto/DetectionProto.java",
+            "com/google/mediapipe/formats/proto/LocationDataProto.java",
+            "com/google/mediapipe/formats/annotation/proto/RasterizationProto.java",
         ],
         manifest = "AndroidManifest.xml",
         proguard_specs = ["//mediapipe/java/com/google/mediapipe/framework:proguard.pgcfg"],
diff --git a/mediapipe/objc/BUILD b/mediapipe/objc/BUILD
index 70258687d..00191af1a 100644
--- a/mediapipe/objc/BUILD
+++ b/mediapipe/objc/BUILD
@@ -43,31 +43,6 @@ MEDIAPIPE_IOS_HDRS = [
     "NSError+util_status.h",
 ]
 
-MEDIAPIPE_IOS_CC_DEPS = [
-    ":CFHolder",
-    ":util",
-    "//mediapipe/framework/port:map_util",
-    "//mediapipe/framework/port:ret_check",
-    "//mediapipe/framework/port:source_location",
-    "//mediapipe/framework/port:status",
-    "//mediapipe/framework/port:statusor",
-    "//mediapipe/framework/port:threadpool",
-    "//mediapipe/framework:calculator_framework",
-    "//mediapipe/framework:mediapipe_profiling",
-    "//mediapipe/gpu:MPPGraphGPUData",
-    "//mediapipe/gpu:pixel_buffer_pool_util",
-    "//mediapipe/gpu:gpu_buffer",
-    "//mediapipe/gpu:gl_base",
-    "//mediapipe/gpu:gpu_shared_data_internal",
-    "//mediapipe/gpu:graph_support",
-    # Other deps
-    "//mediapipe/util:cpu_util",
-    "@com_google_absl//absl/base:core_headers",
-    "@com_google_absl//absl/memory",
-    "@com_google_absl//absl/strings",
-    "@com_google_absl//absl/synchronization",
-]
-
 objc_library(
     name = "mediapipe_framework_ios",
     srcs = MEDIAPIPE_IOS_SRCS,
@@ -80,8 +55,28 @@ objc_library(
         "Accelerate",
     ],
     visibility = ["//mediapipe/framework:mediapipe_internal"],
-    deps = MEDIAPIPE_IOS_CC_DEPS + [
-        # These are objc_library deps.
+    deps = [
+        ":CFHolder",
+        ":util",
+        "//mediapipe/framework:calculator_framework",
+        "//mediapipe/framework:mediapipe_profiling",
+        "//mediapipe/framework/port:map_util",
+        "//mediapipe/framework/port:ret_check",
+        "//mediapipe/framework/port:source_location",
+        "//mediapipe/framework/port:status",
+        "//mediapipe/framework/port:statusor",
+        "//mediapipe/framework/port:threadpool",
+        "//mediapipe/gpu:MPPGraphGPUData",
+        "//mediapipe/gpu:gl_base",
+        "//mediapipe/gpu:gpu_buffer",
+        "//mediapipe/gpu:gpu_shared_data_internal",
+        "//mediapipe/gpu:graph_support",
+        "//mediapipe/gpu:pixel_buffer_pool_util",
+        "//mediapipe/util:cpu_util",
+        "@com_google_absl//absl/base:core_headers",
+        "@com_google_absl//absl/memory",
+        "@com_google_absl//absl/strings",
+        "@com_google_absl//absl/synchronization",
         "@google_toolbox_for_mac//:GTM_Defines",
     ],
 )
diff --git a/mediapipe/util/annotation_renderer.cc b/mediapipe/util/annotation_renderer.cc
index 54ba7bb17..c6ada6402 100644
--- a/mediapipe/util/annotation_renderer.cc
+++ b/mediapipe/util/annotation_renderer.cc
@@ -426,7 +426,7 @@ void AnnotationRenderer::DrawPoint(const RenderAnnotation& annotation) {
   cv::Point point_to_draw(x, y);
   const cv::Scalar color = MediapipeColorToOpenCVColor(annotation.color());
   const int thickness = annotation.thickness();
-  cv::circle(mat_image_, point_to_draw, thickness, color, thickness);
+  cv::circle(mat_image_, point_to_draw, thickness, color, -1);
 }
 
 void AnnotationRenderer::DrawLine(const RenderAnnotation& annotation) {
diff --git a/mediapipe/util/sequence/BUILD b/mediapipe/util/sequence/BUILD
index 82f9e8c98..56b68484a 100644
--- a/mediapipe/util/sequence/BUILD
+++ b/mediapipe/util/sequence/BUILD
@@ -22,7 +22,6 @@ cc_library(
     hdrs = ["media_sequence_util.h"],
     visibility = [
         "//mediapipe:__subpackages__",
-        "//research/action_recognition/sequence:__subpackages__",
     ],
     deps = [
         "//mediapipe/framework/port:core_proto",
@@ -38,7 +37,6 @@ cc_library(
     hdrs = ["media_sequence.h"],
     visibility = [
         "//mediapipe:__subpackages__",
-        "//research/action_recognition/sequence:__subpackages__",
     ],
     deps = [
         ":media_sequence_util",
diff --git a/setup_opencv.sh b/setup_opencv.sh
index 8e4209eb6..0a235529a 100644
--- a/setup_opencv.sh
+++ b/setup_opencv.sh
@@ -36,7 +36,11 @@ cd /tmp/build_opencv
 git clone https://github.com/opencv/opencv_contrib.git
 git clone https://github.com/opencv/opencv.git
 mkdir opencv/release
-cd opencv/release
+cd opencv_contrib
+git checkout 3.4
+cd ../opencv
+git checkout 3.4
+cd release
 cmake .. -DCMAKE_BUILD_TYPE=RELEASE -DCMAKE_INSTALL_PREFIX=/usr/local \
       -DBUILD_TESTS=OFF -DBUILD_PERF_TESTS=OFF -DBUILD_opencv_ts=OFF \
       -DOPENCV_EXTRA_MODULES_PATH=/tmp/build_opencv/opencv_contrib/modules \
diff --git a/third_party/org_tensorflow_9696366bcadab23a25c773b3ed405bac8ded4d0d.diff b/third_party/org_tensorflow_9696366bcadab23a25c773b3ed405bac8ded4d0d.diff
new file mode 100644
index 000000000..4c15c9aec
--- /dev/null
+++ b/third_party/org_tensorflow_9696366bcadab23a25c773b3ed405bac8ded4d0d.diff
@@ -0,0 +1,112 @@
+diff --git a/third_party/cpuinfo/BUILD.bazel b/third_party/cpuinfo/BUILD.bazel
+index 8d89521612..6ea60acdda 100644
+--- a/third_party/cpuinfo/BUILD.bazel
++++ b/third_party/cpuinfo/BUILD.bazel
+@@ -116,6 +111,8 @@ cc_library(
+         ":watchos_x86": COMMON_SRCS + X86_SRCS + MACH_SRCS + MACH_X86_SRCS,
+         ":watchos_armv7k": COMMON_SRCS + MACH_SRCS + MACH_ARM_SRCS,
+         ":watchos_arm64_32": COMMON_SRCS + MACH_SRCS + MACH_ARM_SRCS,
++        ":tvos_x86_64": COMMON_SRCS + X86_SRCS + MACH_SRCS + MACH_X86_SRCS,
++        ":tvos_arm64": COMMON_SRCS + MACH_SRCS + MACH_ARM_SRCS,
+         ":emscripten_wasm": COMMON_SRCS + EMSCRIPTEN_SRCS,
+     }),
+     copts = C99OPTS + [
+@@ -212,7 +209,7 @@ config_setting(
+ config_setting(
+     name = "ios_armv7",
+     values = {
+-        "crosstool_top": "//tools/osx/crosstool:crosstool",
++        "apple_platform_type": "ios",
+         "cpu": "ios_armv7",
+     },
+ )
+@@ -220,7 +217,7 @@ config_setting(
+ config_setting(
+     name = "ios_arm64",
+     values = {
+-        "crosstool_top": "//tools/osx/crosstool:crosstool",
++        "apple_platform_type": "ios",
+         "cpu": "ios_arm64",
+     },
+ )
+@@ -228,7 +225,7 @@ config_setting(
+ config_setting(
+     name = "ios_arm64e",
+     values = {
+-        "crosstool_top": "//tools/osx/crosstool:crosstool",
++        "apple_platform_type": "ios",
+         "cpu": "ios_arm64e",
+     },
+ )
+@@ -236,7 +233,7 @@ config_setting(
+ config_setting(
+     name = "ios_x86",
+     values = {
+-        "crosstool_top": "//tools/osx/crosstool:crosstool",
++        "apple_platform_type": "ios",
+         "cpu": "ios_i386",
+     },
+ )
+@@ -244,7 +241,7 @@ config_setting(
+ config_setting(
+     name = "ios_x86_64",
+     values = {
+-        "crosstool_top": "//tools/osx/crosstool:crosstool",
++        "apple_platform_type": "ios",
+         "cpu": "ios_x86_64",
+     },
+ )
+@@ -252,7 +249,7 @@ config_setting(
+ config_setting(
+     name = "watchos_armv7k",
+     values = {
+-        "crosstool_top": "//tools/osx/crosstool:crosstool",
++        "apple_platform_type": "watchos",
+         "cpu": "watchos_armv7k",
+     },
+ )
+@@ -260,7 +257,7 @@ config_setting(
+ config_setting(
+     name = "watchos_arm64_32",
+     values = {
+-        "crosstool_top": "//tools/osx/crosstool:crosstool",
++        "apple_platform_type": "watchos",
+         "cpu": "watchos_arm64_32",
+     },
+ )
+@@ -268,7 +265,7 @@ config_setting(
+ config_setting(
+     name = "watchos_x86",
+     values = {
+-        "crosstool_top": "//tools/osx/crosstool:crosstool",
++        "apple_platform_type": "watchos",
+         "cpu": "watchos_i386",
+     },
+ )
+@@ -276,7 +273,7 @@ config_setting(
+ config_setting(
+     name = "watchos_x86_64",
+     values = {
+-        "crosstool_top": "//tools/osx/crosstool:crosstool",
++        "apple_platform_type": "watchos",
+         "cpu": "watchos_x86_64",
+     },
+ )
+@@ -284,7 +281,7 @@ config_setting(
+ config_setting(
+     name = "tvos_arm64",
+     values = {
+-        "crosstool_top": "//tools/osx/crosstool:crosstool",
++        "apple_platform_type": "tvos",
+         "cpu": "tvos_arm64",
+     },
+ )
+@@ -292,7 +289,7 @@ config_setting(
+ config_setting(
+     name = "tvos_x86_64",
+     values = {
+-        "crosstool_top": "//tools/osx/crosstool:crosstool",
++        "apple_platform_type": "tvos",
+         "cpu": "tvos_x86_64",
+     },
+ )
diff --git a/third_party/org_tensorflow_cfc31e324c8de6b52f752a39cb161d99d853ca99.diff b/third_party/org_tensorflow_cfc31e324c8de6b52f752a39cb161d99d853ca99.diff
new file mode 100644
index 000000000..f42f31d43
--- /dev/null
+++ b/third_party/org_tensorflow_cfc31e324c8de6b52f752a39cb161d99d853ca99.diff
@@ -0,0 +1,3083 @@
+diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl
+index 8947038da8..4e588d718e 100755
+--- a/tensorflow/workspace.bzl
++++ b/tensorflow/workspace.bzl
+@@ -144,11 +144,11 @@ def tf_repositories(path_prefix = "", tf_repo_name = ""):
+
+     tf_http_archive(
+         name = "XNNPACK",
+-        sha256 = "8f29d32a35d5e12aa5f02d0ef9018c80f2c985cd1837493fdfa670d84dfe2e2b",
+-        strip_prefix = "XNNPACK-1498d1d4d0430480dfe5c4538049b4f789d29134",
++        sha256 = "190e61e50af3497bb46b8d936bd2d2d551a9aeedb02ff66388918408a54e216a",
++        strip_prefix = "XNNPACK-b18783570f0643560be641b193367d3906955141",
+         urls = [
+-            "https://storage.googleapis.com/mirror.tensorflow.org/github.com/google/XNNPACK/archive/1498d1d4d0430480dfe5c4538049b4f789d29134.zip",
+-            "https://github.com/google/XNNPACK/archive/1498d1d4d0430480dfe5c4538049b4f789d29134.zip",
++            "https://storage.googleapis.com/mirror.tensorflow.org/github.com/google/XNNPACK/archive/b18783570f0643560be641b193367d3906955141.zip",
++            "https://github.com/google/XNNPACK/archive/b18783570f0643560be641b193367d3906955141.zip",
+         ],
+     )
+
+diff --git a/third_party/cpuinfo/BUILD.bazel b/third_party/cpuinfo/BUILD.bazel
+index cea88aafbd..afa0b9798a 100644
+--- a/third_party/cpuinfo/BUILD.bazel
++++ b/third_party/cpuinfo/BUILD.bazel
+@@ -42,7 +42,6 @@ ARM_SRCS = [
+ # Platform-specific sources and headers
+ LINUX_SRCS = [
+     "src/linux/cpulist.c",
+-    "src/linux/current.c",
+     "src/linux/multiline.c",
+     "src/linux/processors.c",
+     "src/linux/smallfile.c",
+diff --git a/third_party/cpuinfo/cpuinfo.patch b/third_party/cpuinfo/cpuinfo.patch
+new file mode 100644
+index 0000000000..a9fa0dde0e
+--- /dev/null
++++ b/third_party/cpuinfo/cpuinfo.patch
+@@ -0,0 +1,3016 @@
++diff --git a/CMakeLists.txt b/CMakeLists.txt
++index de319ef..fefb60b 100644
++--- a/CMakeLists.txt
+++++ b/CMakeLists.txt
++@@ -179,7 +179,6 @@ IF(CPUINFO_SUPPORTED_PLATFORM)
++     LIST(APPEND CPUINFO_SRCS
++       src/linux/smallfile.c
++       src/linux/multiline.c
++-      src/linux/current.c
++       src/linux/cpulist.c
++       src/linux/processors.c)
++   ELSEIF(CMAKE_SYSTEM_NAME STREQUAL "Darwin" OR CMAKE_SYSTEM_NAME STREQUAL "iOS")
++diff --git a/CMakeLists.txt.orig b/CMakeLists.txt.orig
++deleted file mode 100644
++index a71aede..0000000
++--- a/CMakeLists.txt.orig
+++++ /dev/null
++@@ -1,819 +0,0 @@
++-CMAKE_MINIMUM_REQUIRED(VERSION 2.8.12 FATAL_ERROR)
++-
++-INCLUDE(GNUInstallDirs)
++-
++-# ---[ Project and semantic versioning.
++-PROJECT(cpuinfo C CXX)
++-
++-# ---[ Options.
++-SET(CPUINFO_LIBRARY_TYPE "default" CACHE STRING "Type of cpuinfo library (shared, static, or default) to build")
++-SET_PROPERTY(CACHE CPUINFO_LIBRARY_TYPE PROPERTY STRINGS default static shared)
++-SET(CPUINFO_RUNTIME_TYPE "default" CACHE STRING "Type of runtime library (shared, static, or default) to use")
++-SET_PROPERTY(CACHE CPUINFO_RUNTIME_TYPE PROPERTY STRINGS default static shared)
++-SET(CPUINFO_LOG_LEVEL "default" CACHE STRING "Minimum logging level (info with lower severity will be ignored)")
++-SET_PROPERTY(CACHE CPUINFO_LOG_LEVEL PROPERTY STRINGS default debug info warning error fatal none)
++-OPTION(CPUINFO_BUILD_TOOLS "Build command-line tools" ON)
++-OPTION(CPUINFO_BUILD_UNIT_TESTS "Build cpuinfo unit tests" ON)
++-OPTION(CPUINFO_BUILD_MOCK_TESTS "Build cpuinfo mock tests" ON)
++-OPTION(CPUINFO_BUILD_BENCHMARKS "Build cpuinfo micro-benchmarks" ON)
++-
++-# ---[ CMake options
++-IF(CPUINFO_BUILD_UNIT_TESTS OR CPUINFO_BUILD_MOCK_TESTS)
++-  ENABLE_TESTING()
++-ENDIF()
++-
++-MACRO(CPUINFO_TARGET_ENABLE_C99 target)
++-  IF(${CMAKE_VERSION} VERSION_LESS "3.1")
++-    IF(NOT MSVC)
++-      TARGET_COMPILE_OPTIONS(${target} PRIVATE -std=c99)
++-    ENDIF()
++-  ELSE()
++-    SET_TARGET_PROPERTIES(${target} PROPERTIES
++-      C_STANDARD 99
++-      C_EXTENSIONS NO)
++-  ENDIF()
++-ENDMACRO()
++-
++-MACRO(CPUINFO_TARGET_ENABLE_CXX11 target)
++-  IF(${CMAKE_VERSION} VERSION_LESS "3.1")
++-    IF(NOT MSVC)
++-      TARGET_COMPILE_OPTIONS(${target} PRIVATE -std=c++11)
++-    ENDIF()
++-  ELSE()
++-    SET_TARGET_PROPERTIES(${target} PROPERTIES
++-      CXX_STANDARD 11
++-      CXX_EXTENSIONS NO)
++-  ENDIF()
++-ENDMACRO()
++-
++-MACRO(CPUINFO_TARGET_RUNTIME_LIBRARY target)
++-  IF(MSVC AND NOT CPUINFO_RUNTIME_TYPE STREQUAL "default")
++-    IF(CPUINFO_RUNTIME_TYPE STREQUAL "shared")
++-      TARGET_COMPILE_OPTIONS(${target} PRIVATE
++-        "/MD$<$<CONFIG:Debug>:d>")
++-    ELSEIF(CPUINFO_RUNTIME_TYPE STREQUAL "static")
++-      TARGET_COMPILE_OPTIONS(${target} PRIVATE
++-        "/MT$<$<CONFIG:Debug>:d>")
++-    ENDIF()
++-  ENDIF()
++-ENDMACRO()
++-
++-# ---[ Build flags
++-SET(CPUINFO_SUPPORTED_PLATFORM TRUE)
++-IF(NOT CMAKE_SYSTEM_PROCESSOR)
++-  IF(NOT IOS)
++-    MESSAGE(WARNING
++-      "Target processor architecture is not specified. "
++-      "cpuinfo will compile, but cpuinfo_initialize() will always fail.")
++-    SET(CPUINFO_SUPPORTED_PLATFORM FALSE)
++-  ENDIF()
++-ELSEIF(NOT CMAKE_SYSTEM_PROCESSOR MATCHES "^(i[3-6]86|AMD64|x86(_64)?|armv[5-8].*|aarch64)$")
++-  MESSAGE(WARNING
++-    "Target processor architecture \"${CMAKE_SYSTEM_PROCESSOR}\" is not supported in cpuinfo. "
++-    "cpuinfo will compile, but cpuinfo_initialize() will always fail.")
++-  SET(CPUINFO_SUPPORTED_PLATFORM FALSE)
++-ENDIF()
++-
++-IF(NOT CMAKE_SYSTEM_NAME)
++-    MESSAGE(WARNING
++-      "Target operating system is not specified. "
++-      "cpuinfo will compile, but cpuinfo_initialize() will always fail.")
++-  SET(CPUINFO_SUPPORTED_PLATFORM FALSE)
++-ELSEIF(NOT CMAKE_SYSTEM_NAME MATCHES "^(Windows|Darwin|Linux|Android)$")
++-  IF(${CMAKE_VERSION} VERSION_GREATER_EQUAL "3.14" AND NOT CMAKE_SYSTEM_NAME STREQUAL "iOS")
++-    MESSAGE(WARNING
++-      "Target operating system \"${CMAKE_SYSTEM_NAME}\" is not supported in cpuinfo. "
++-      "cpuinfo will compile, but cpuinfo_initialize() will always fail.")
++-    SET(CPUINFO_SUPPORTED_PLATFORM FALSE)
++-  ENDIF()
++-ENDIF()
++-
++-# ---[ Download deps
++-SET(CONFU_DEPENDENCIES_SOURCE_DIR ${CMAKE_SOURCE_DIR}/deps
++-  CACHE PATH "Confu-style dependencies source directory")
++-SET(CONFU_DEPENDENCIES_BINARY_DIR ${CMAKE_BINARY_DIR}/deps
++-  CACHE PATH "Confu-style dependencies binary directory")
++-
++-IF(CPUINFO_BUILD_MOCK_TESTS OR CPUINFO_BUILD_UNIT_TESTS)
++-  IF(CPUINFO_SUPPORTED_PLATFORM AND NOT DEFINED GOOGLETEST_SOURCE_DIR)
++-    MESSAGE(STATUS "Downloading Google Test to ${CONFU_DEPENDENCIES_SOURCE_DIR}/googletest (define GOOGLETEST_SOURCE_DIR to avoid it)")
++-    CONFIGURE_FILE(cmake/DownloadGoogleTest.cmake "${CONFU_DEPENDENCIES_BINARY_DIR}/googletest-download/CMakeLists.txt")
++-    EXECUTE_PROCESS(COMMAND "${CMAKE_COMMAND}" -G "${CMAKE_GENERATOR}" .
++-      WORKING_DIRECTORY "${CONFU_DEPENDENCIES_BINARY_DIR}/googletest-download")
++-    EXECUTE_PROCESS(COMMAND "${CMAKE_COMMAND}" --build .
++-      WORKING_DIRECTORY "${CONFU_DEPENDENCIES_BINARY_DIR}/googletest-download")
++-    SET(GOOGLETEST_SOURCE_DIR "${CONFU_DEPENDENCIES_SOURCE_DIR}/googletest" CACHE STRING "Google Test source directory")
++-  ENDIF()
++-ENDIF()
++-
++-IF(CPUINFO_BUILD_BENCHMARKS)
++-  IF(CPUINFO_SUPPORTED_PLATFORM AND NOT DEFINED GOOGLEBENCHMARK_SOURCE_DIR)
++-    MESSAGE(STATUS "Downloading Google Benchmark to ${CONFU_DEPENDENCIES_SOURCE_DIR}/googlebenchmark (define GOOGLEBENCHMARK_SOURCE_DIR to avoid it)")
++-    CONFIGURE_FILE(cmake/DownloadGoogleBenchmark.cmake "${CONFU_DEPENDENCIES_BINARY_DIR}/googlebenchmark-download/CMakeLists.txt")
++-    EXECUTE_PROCESS(COMMAND "${CMAKE_COMMAND}" -G "${CMAKE_GENERATOR}" .
++-      WORKING_DIRECTORY "${CONFU_DEPENDENCIES_BINARY_DIR}/googlebenchmark-download")
++-    EXECUTE_PROCESS(COMMAND "${CMAKE_COMMAND}" --build .
++-      WORKING_DIRECTORY "${CONFU_DEPENDENCIES_BINARY_DIR}/googlebenchmark-download")
++-    SET(GOOGLEBENCHMARK_SOURCE_DIR "${CONFU_DEPENDENCIES_SOURCE_DIR}/googlebenchmark" CACHE STRING "Google Benchmark source directory")
++-  ENDIF()
++-ENDIF()
++-
++-# ---[ cpuinfo library
++-SET(CPUINFO_SRCS
++-  src/init.c
++-  src/api.c)
++-
++-IF(CPUINFO_SUPPORTED_PLATFORM)
++-  IF(CMAKE_SYSTEM_PROCESSOR MATCHES "^(i[3-6]86|AMD64|x86(_64)?)$" OR IOS_ARCH MATCHES "^(i386|x86_64)$")
++-    LIST(APPEND CPUINFO_SRCS
++-      src/x86/init.c
++-      src/x86/info.c
++-      src/x86/vendor.c
++-      src/x86/uarch.c
++-      src/x86/name.c
++-      src/x86/topology.c
++-      src/x86/isa.c
++-      src/x86/cache/init.c
++-      src/x86/cache/descriptor.c
++-      src/x86/cache/deterministic.c)
++-    IF(CMAKE_SYSTEM_NAME STREQUAL "Linux" OR CMAKE_SYSTEM_NAME STREQUAL "Android")
++-      LIST(APPEND CPUINFO_SRCS
++-        src/x86/linux/init.c
++-        src/x86/linux/cpuinfo.c)
++-    ELSEIF(CMAKE_SYSTEM_NAME STREQUAL "Darwin" OR CMAKE_SYSTEM_NAME STREQUAL "iOS")
++-      LIST(APPEND CPUINFO_SRCS src/x86/mach/init.c)
++-    ELSEIF(CMAKE_SYSTEM_NAME STREQUAL "Windows")
++-      LIST(APPEND CPUINFO_SRCS src/x86/windows/init.c)
++-    ENDIF()
++-  ELSEIF(CMAKE_SYSTEM_PROCESSOR MATCHES "^(armv[5-8].*|aarch64)$" OR IOS_ARCH MATCHES "^(armv7.*|arm64.*)$")
++-    LIST(APPEND CPUINFO_SRCS
++-      src/arm/uarch.c
++-      src/arm/cache.c)
++-    IF(CMAKE_SYSTEM_NAME STREQUAL "Linux" OR CMAKE_SYSTEM_NAME STREQUAL "Android")
++-      LIST(APPEND CPUINFO_SRCS
++-        src/arm/linux/init.c
++-        src/arm/linux/cpuinfo.c
++-        src/arm/linux/clusters.c
++-        src/arm/linux/chipset.c
++-        src/arm/linux/midr.c
++-        src/arm/linux/hwcap.c)
++-      IF(CMAKE_SYSTEM_PROCESSOR MATCHES "^armv[5-8]")
++-        LIST(APPEND CPUINFO_SRCS src/arm/linux/aarch32-isa.c)
++-        IF(CMAKE_SYSTEM_NAME STREQUAL "Android" AND ANDROID_ABI STREQUAL "armeabi")
++-          SET_SOURCE_FILES_PROPERTIES(src/arm/linux/aarch32-isa.c PROPERTIES COMPILE_FLAGS -marm)
++-        ENDIF()
++-      ELSEIF(CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64")
++-        LIST(APPEND CPUINFO_SRCS src/arm/linux/aarch64-isa.c)
++-      ENDIF()
++-    ELSEIF(IOS)
++-      LIST(APPEND CPUINFO_SRCS src/arm/mach/init.c)
++-    ENDIF()
++-    IF(CMAKE_SYSTEM_NAME STREQUAL "Android")
++-      LIST(APPEND CPUINFO_SRCS
++-        src/arm/android/properties.c)
++-    ENDIF()
++-  ENDIF()
++-
++-  IF(CMAKE_SYSTEM_NAME STREQUAL "Linux" OR CMAKE_SYSTEM_NAME STREQUAL "Android")
++-    LIST(APPEND CPUINFO_SRCS
++-      src/linux/smallfile.c
++-      src/linux/multiline.c
++-      src/linux/current.c
++-      src/linux/cpulist.c
++-      src/linux/processors.c)
++-  ELSEIF(CMAKE_SYSTEM_NAME STREQUAL "Darwin" OR CMAKE_SYSTEM_NAME STREQUAL "iOS")
++-    LIST(APPEND CPUINFO_SRCS src/mach/topology.c)
++-  ENDIF()
++-
++-  IF(CMAKE_SYSTEM_NAME STREQUAL "Linux" OR CMAKE_SYSTEM_NAME STREQUAL "Android")
++-    SET(CMAKE_THREAD_PREFER_PTHREAD TRUE)
++-    SET(THREADS_PREFER_PTHREAD_FLAG TRUE)
++-    FIND_PACKAGE(Threads REQUIRED)
++-  ENDIF()
++-ENDIF()
++-
++-IF(CPUINFO_LIBRARY_TYPE STREQUAL "default")
++-  ADD_LIBRARY(cpuinfo ${CPUINFO_SRCS})
++-ELSEIF(CPUINFO_LIBRARY_TYPE STREQUAL "shared")
++-  ADD_LIBRARY(cpuinfo SHARED ${CPUINFO_SRCS})
++-ELSEIF(CPUINFO_LIBRARY_TYPE STREQUAL "static")
++-  ADD_LIBRARY(cpuinfo STATIC ${CPUINFO_SRCS})
++-ELSE()
++-  MESSAGE(FATAL_ERROR "Unsupported library type ${CPUINFO_LIBRARY_TYPE}")
++-ENDIF()
++-ADD_LIBRARY(cpuinfo_internals STATIC ${CPUINFO_SRCS})
++-CPUINFO_TARGET_ENABLE_C99(cpuinfo)
++-CPUINFO_TARGET_ENABLE_C99(cpuinfo_internals)
++-CPUINFO_TARGET_RUNTIME_LIBRARY(cpuinfo)
++-SET_TARGET_PROPERTIES(cpuinfo PROPERTIES PUBLIC_HEADER include/cpuinfo.h)
++-TARGET_INCLUDE_DIRECTORIES(cpuinfo BEFORE PUBLIC include)
++-TARGET_INCLUDE_DIRECTORIES(cpuinfo BEFORE PRIVATE src)
++-TARGET_INCLUDE_DIRECTORIES(cpuinfo_internals BEFORE PUBLIC include src)
++-IF(CPUINFO_LOG_LEVEL STREQUAL "default")
++-  # default logging level: error (subject to change)
++-  TARGET_COMPILE_DEFINITIONS(cpuinfo PRIVATE CPUINFO_LOG_LEVEL=2)
++-ELSEIF(CPUINFO_LOG_LEVEL STREQUAL "debug")
++-  TARGET_COMPILE_DEFINITIONS(cpuinfo PRIVATE CPUINFO_LOG_LEVEL=5)
++-ELSEIF(CPUINFO_LOG_LEVEL STREQUAL "info")
++-  TARGET_COMPILE_DEFINITIONS(cpuinfo PRIVATE CPUINFO_LOG_LEVEL=4)
++-ELSEIF(CPUINFO_LOG_LEVEL STREQUAL "warning")
++-  TARGET_COMPILE_DEFINITIONS(cpuinfo PRIVATE CPUINFO_LOG_LEVEL=3)
++-ELSEIF(CPUINFO_LOG_LEVEL STREQUAL "error")
++-  TARGET_COMPILE_DEFINITIONS(cpuinfo PRIVATE CPUINFO_LOG_LEVEL=2)
++-ELSEIF(CPUINFO_LOG_LEVEL STREQUAL "fatal")
++-  TARGET_COMPILE_DEFINITIONS(cpuinfo PRIVATE CPUINFO_LOG_LEVEL=1)
++-ELSEIF(CPUINFO_LOG_LEVEL STREQUAL "none")
++-  TARGET_COMPILE_DEFINITIONS(cpuinfo PRIVATE CPUINFO_LOG_LEVEL=0)
++-ELSE()
++-  MESSAGE(FATAL_ERROR "Unsupported logging level ${CPUINFO_LOG_LEVEL}")
++-ENDIF()
++-TARGET_COMPILE_DEFINITIONS(cpuinfo_internals PRIVATE CPUINFO_LOG_LEVEL=0)
++-
++-IF(CPUINFO_SUPPORTED_PLATFORM)
++-  TARGET_COMPILE_DEFINITIONS(cpuinfo INTERFACE CPUINFO_SUPPORTED_PLATFORM=1)
++-  IF(CMAKE_SYSTEM_NAME STREQUAL "Linux" OR CMAKE_SYSTEM_NAME STREQUAL "Android")
++-    TARGET_LINK_LIBRARIES(cpuinfo PUBLIC ${CMAKE_THREAD_LIBS_INIT})
++-    TARGET_LINK_LIBRARIES(cpuinfo_internals PUBLIC ${CMAKE_THREAD_LIBS_INIT})
++-    TARGET_COMPILE_DEFINITIONS(cpuinfo PRIVATE _GNU_SOURCE=1)
++-    TARGET_COMPILE_DEFINITIONS(cpuinfo_internals PRIVATE _GNU_SOURCE=1)
++-  ENDIF()
++-ELSE()
++-  TARGET_COMPILE_DEFINITIONS(cpuinfo INTERFACE CPUINFO_SUPPORTED_PLATFORM=0)
++-ENDIF()
++-
++-# ---[ cpuinfo dependencies: clog
++-IF(NOT DEFINED CLOG_SOURCE_DIR)
++-  SET(CLOG_SOURCE_DIR "${PROJECT_SOURCE_DIR}/deps/clog")
++-ENDIF()
++-IF(NOT TARGET clog)
++-  SET(CLOG_BUILD_TESTS OFF CACHE BOOL "")
++-  SET(CLOG_RUNTIME_TYPE "${CPUINFO_RUNTIME_TYPE}" CACHE STRING "")
++-  ADD_SUBDIRECTORY(
++-    "${CLOG_SOURCE_DIR}")
++-  # We build static version of clog but a dynamic library may indirectly depend on it
++-  SET_PROPERTY(TARGET clog PROPERTY POSITION_INDEPENDENT_CODE ON)
++-ENDIF()
++-TARGET_LINK_LIBRARIES(cpuinfo PRIVATE clog)
++-TARGET_LINK_LIBRARIES(cpuinfo_internals PRIVATE clog)
++-
++-INSTALL(TARGETS cpuinfo
++-  LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
++-  ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
++-  PUBLIC_HEADER DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})
++-
++-# ---[ cpuinfo micro-benchmarks
++-IF(CPUINFO_SUPPORTED_PLATFORM AND CPUINFO_BUILD_BENCHMARKS)
++-  # ---[ Build google benchmark
++-  IF(NOT TARGET benchmark)
++-    SET(BENCHMARK_ENABLE_TESTING OFF CACHE BOOL "")
++-    ADD_SUBDIRECTORY(
++-      "${GOOGLEBENCHMARK_SOURCE_DIR}"
++-      "${CONFU_DEPENDENCIES_BINARY_DIR}/googlebenchmark")
++-  ENDIF()
++-
++-  IF(CMAKE_SYSTEM_NAME MATCHES "^(Linux|Android)$")
++-    ADD_EXECUTABLE(get-current-bench bench/get-current.cc)
++-    TARGET_LINK_LIBRARIES(get-current-bench cpuinfo benchmark)
++-  ENDIF()
++-
++-  ADD_EXECUTABLE(init-bench bench/init.cc)
++-  TARGET_LINK_LIBRARIES(init-bench cpuinfo benchmark)
++-ENDIF()
++-
++-IF(CPUINFO_SUPPORTED_PLATFORM)
++-  IF(CPUINFO_BUILD_MOCK_TESTS OR CPUINFO_BUILD_UNIT_TESTS)
++-    # ---[ Build google test
++-    IF(NOT TARGET gtest)
++-      IF(MSVC AND NOT CPUINFO_RUNTIME_TYPE STREQUAL "static")
++-        SET(gtest_force_shared_crt ON CACHE BOOL "" FORCE)
++-      ENDIF()
++-      ADD_SUBDIRECTORY(
++-        "${GOOGLETEST_SOURCE_DIR}"
++-        "${CONFU_DEPENDENCIES_BINARY_DIR}/googletest")
++-    ENDIF()
++-  ENDIF()
++-ENDIF()
++-
++-# ---[ cpuinfo mock library and mock tests
++-IF(CPUINFO_SUPPORTED_PLATFORM AND CPUINFO_BUILD_MOCK_TESTS)
++-  SET(CPUINFO_MOCK_SRCS "${CPUINFO_SRCS}")
++-  IF(CMAKE_SYSTEM_PROCESSOR MATCHES "^(i[3-6]86|AMD64|x86_64)$")
++-    LIST(APPEND CPUINFO_MOCK_SRCS src/x86/mockcpuid.c)
++-  ENDIF()
++-  IF(CMAKE_SYSTEM_NAME STREQUAL "Linux" OR CMAKE_SYSTEM_NAME STREQUAL "Android")
++-    LIST(APPEND CPUINFO_MOCK_SRCS src/linux/mockfile.c)
++-  ENDIF()
++-
++-  ADD_LIBRARY(cpuinfo_mock STATIC ${CPUINFO_MOCK_SRCS})
++-  CPUINFO_TARGET_ENABLE_C99(cpuinfo_mock)
++-  CPUINFO_TARGET_RUNTIME_LIBRARY(cpuinfo_mock)
++-  SET_TARGET_PROPERTIES(cpuinfo_mock PROPERTIES PUBLIC_HEADER include/cpuinfo.h)
++-  TARGET_INCLUDE_DIRECTORIES(cpuinfo_mock BEFORE PUBLIC include)
++-  TARGET_INCLUDE_DIRECTORIES(cpuinfo_mock BEFORE PRIVATE src)
++-  TARGET_COMPILE_DEFINITIONS(cpuinfo_mock PUBLIC CPUINFO_MOCK=1)
++-  TARGET_COMPILE_DEFINITIONS(cpuinfo_mock PRIVATE CLOG_LOG_TO_STDIO=1)
++-  IF(CMAKE_SYSTEM_NAME STREQUAL "Linux" OR CMAKE_SYSTEM_NAME STREQUAL "Android")
++-    TARGET_LINK_LIBRARIES(cpuinfo_mock PUBLIC ${CMAKE_THREAD_LIBS_INIT})
++-    TARGET_COMPILE_DEFINITIONS(cpuinfo_mock PRIVATE _GNU_SOURCE=1)
++-  ENDIF()
++-  TARGET_LINK_LIBRARIES(cpuinfo_mock PRIVATE clog)
++-
++-  IF(CMAKE_SYSTEM_NAME STREQUAL "Android" AND CMAKE_SYSTEM_PROCESSOR MATCHES "^(armv5te|armv7-a)$")
++-    ADD_EXECUTABLE(atm7029b-tablet-test test/mock/atm7029b-tablet.cc)
++-    TARGET_INCLUDE_DIRECTORIES(atm7029b-tablet-test BEFORE PRIVATE test/mock)
++-    TARGET_LINK_LIBRARIES(atm7029b-tablet-test PRIVATE cpuinfo_mock gtest)
++-    ADD_TEST(atm7029b-tablet-test atm7029b-tablet-test)
++-
++-    ADD_EXECUTABLE(blu-r1-hd-test test/mock/blu-r1-hd.cc)
++-    TARGET_INCLUDE_DIRECTORIES(blu-r1-hd-test BEFORE PRIVATE test/mock)
++-    TARGET_LINK_LIBRARIES(blu-r1-hd-test PRIVATE cpuinfo_mock gtest)
++-    ADD_TEST(blu-r1-hd-test blu-r1-hd-test)
++-
++-    ADD_EXECUTABLE(galaxy-a3-2016-eu-test test/mock/galaxy-a3-2016-eu.cc)
++-    TARGET_INCLUDE_DIRECTORIES(galaxy-a3-2016-eu-test BEFORE PRIVATE test/mock)
++-    TARGET_LINK_LIBRARIES(galaxy-a3-2016-eu-test PRIVATE cpuinfo_mock gtest)
++-    ADD_TEST(galaxy-a3-2016-eu-test galaxy-a3-2016-eu-test)
++-
++-    ADD_EXECUTABLE(galaxy-a8-2016-duos-test test/mock/galaxy-a8-2016-duos.cc)
++-    TARGET_INCLUDE_DIRECTORIES(galaxy-a8-2016-duos-test BEFORE PRIVATE test/mock)
++-    TARGET_LINK_LIBRARIES(galaxy-a8-2016-duos-test PRIVATE cpuinfo_mock gtest)
++-    ADD_TEST(galaxy-a8-2016-duos-test galaxy-a8-2016-duos-test)
++-
++-    ADD_EXECUTABLE(galaxy-grand-prime-value-edition-test test/mock/galaxy-grand-prime-value-edition.cc)
++-    TARGET_INCLUDE_DIRECTORIES(galaxy-grand-prime-value-edition-test BEFORE PRIVATE test/mock)
++-    TARGET_LINK_LIBRARIES(galaxy-grand-prime-value-edition-test PRIVATE cpuinfo_mock gtest)
++-    ADD_TEST(galaxy-grand-prime-value-edition-test galaxy-grand-prime-value-edition-test)
++-
++-    ADD_EXECUTABLE(galaxy-j1-2016-test test/mock/galaxy-j1-2016.cc)
++-    TARGET_INCLUDE_DIRECTORIES(galaxy-j1-2016-test BEFORE PRIVATE test/mock)
++-    TARGET_LINK_LIBRARIES(galaxy-j1-2016-test PRIVATE cpuinfo_mock gtest)
++-    ADD_TEST(galaxy-j1-2016-test galaxy-j1-2016-test)
++-
++-    ADD_EXECUTABLE(galaxy-j5-test test/mock/galaxy-j5.cc)
++-    TARGET_INCLUDE_DIRECTORIES(galaxy-j5-test BEFORE PRIVATE test/mock)
++-    TARGET_LINK_LIBRARIES(galaxy-j5-test PRIVATE cpuinfo_mock gtest)
++-    ADD_TEST(galaxy-j5-test galaxy-j5-test)
++-
++-    ADD_EXECUTABLE(galaxy-j7-prime-test test/mock/galaxy-j7-prime.cc)
++-    TARGET_INCLUDE_DIRECTORIES(galaxy-j7-prime-test BEFORE PRIVATE test/mock)
++-    TARGET_LINK_LIBRARIES(galaxy-j7-prime-test PRIVATE cpuinfo_mock gtest)
++-    ADD_TEST(galaxy-j7-prime-test galaxy-j7-prime-test)
++-
++-    ADD_EXECUTABLE(galaxy-j7-tmobile-test test/mock/galaxy-j7-tmobile.cc)
++-    TARGET_INCLUDE_DIRECTORIES(galaxy-j7-tmobile-test BEFORE PRIVATE test/mock)
++-    TARGET_LINK_LIBRARIES(galaxy-j7-tmobile-test PRIVATE cpuinfo_mock gtest)
++-    ADD_TEST(galaxy-j7-tmobile-test galaxy-j7-tmobile-test)
++-
++-    ADD_EXECUTABLE(galaxy-j7-uae-test test/mock/galaxy-j7-uae.cc)
++-    TARGET_INCLUDE_DIRECTORIES(galaxy-j7-uae-test BEFORE PRIVATE test/mock)
++-    TARGET_LINK_LIBRARIES(galaxy-j7-uae-test PRIVATE cpuinfo_mock gtest)
++-    ADD_TEST(galaxy-j7-uae-test galaxy-j7-uae-test)
++-
++-    ADD_EXECUTABLE(galaxy-s3-us-test test/mock/galaxy-s3-us.cc)
++-    TARGET_INCLUDE_DIRECTORIES(galaxy-s3-us-test BEFORE PRIVATE test/mock)
++-    TARGET_LINK_LIBRARIES(galaxy-s3-us-test PRIVATE cpuinfo_mock gtest)
++-    ADD_TEST(galaxy-s3-us-test galaxy-s3-us-test)
++-
++-    ADD_EXECUTABLE(galaxy-s4-us-test test/mock/galaxy-s4-us.cc)
++-    TARGET_INCLUDE_DIRECTORIES(galaxy-s4-us-test BEFORE PRIVATE test/mock)
++-    TARGET_LINK_LIBRARIES(galaxy-s4-us-test PRIVATE cpuinfo_mock gtest)
++-    ADD_TEST(galaxy-s4-us-test galaxy-s4-us-test)
++-
++-    ADD_EXECUTABLE(galaxy-s5-global-test test/mock/galaxy-s5-global.cc)
++-    TARGET_INCLUDE_DIRECTORIES(galaxy-s5-global-test BEFORE PRIVATE test/mock)
++-    TARGET_LINK_LIBRARIES(galaxy-s5-global-test PRIVATE cpuinfo_mock gtest)
++-    ADD_TEST(galaxy-s5-global-test galaxy-s5-global-test)
++-
++-    ADD_EXECUTABLE(galaxy-s5-us-test test/mock/galaxy-s5-us.cc)
++-    TARGET_INCLUDE_DIRECTORIES(galaxy-s5-us-test BEFORE PRIVATE test/mock)
++-    TARGET_LINK_LIBRARIES(galaxy-s5-us-test PRIVATE cpuinfo_mock gtest)
++-    ADD_TEST(galaxy-s5-us-test galaxy-s5-us-test)
++-
++-    ADD_EXECUTABLE(galaxy-tab-3-7.0-test test/mock/galaxy-tab-3-7.0.cc)
++-    TARGET_INCLUDE_DIRECTORIES(galaxy-tab-3-7.0-test BEFORE PRIVATE test/mock)
++-    TARGET_LINK_LIBRARIES(galaxy-tab-3-7.0-test PRIVATE cpuinfo_mock gtest)
++-    ADD_TEST(galaxy-tab-3-7.0-test galaxy-tab-3-7.0-test)
++-
++-    ADD_EXECUTABLE(galaxy-tab-3-lite-test test/mock/galaxy-tab-3-lite.cc)
++-    TARGET_INCLUDE_DIRECTORIES(galaxy-tab-3-lite-test BEFORE PRIVATE test/mock)
++-    TARGET_LINK_LIBRARIES(galaxy-tab-3-lite-test PRIVATE cpuinfo_mock gtest)
++-    ADD_TEST(galaxy-tab-3-lite-test galaxy-tab-3-lite-test)
++-
++-    ADD_EXECUTABLE(galaxy-win-duos-test test/mock/galaxy-win-duos.cc)
++-    TARGET_INCLUDE_DIRECTORIES(galaxy-win-duos-test BEFORE PRIVATE test/mock)
++-    TARGET_LINK_LIBRARIES(galaxy-win-duos-test PRIVATE cpuinfo_mock gtest)
++-    ADD_TEST(galaxy-win-duos-test galaxy-win-duos-test)
++-
++-    ADD_EXECUTABLE(huawei-ascend-p7-test test/mock/huawei-ascend-p7.cc)
++-    TARGET_INCLUDE_DIRECTORIES(huawei-ascend-p7-test BEFORE PRIVATE test/mock)
++-    TARGET_LINK_LIBRARIES(huawei-ascend-p7-test PRIVATE cpuinfo_mock gtest)
++-    ADD_TEST(huawei-ascend-p7-test huawei-ascend-p7-test)
++-
++-    ADD_EXECUTABLE(huawei-honor-6-test test/mock/huawei-honor-6.cc)
++-    TARGET_INCLUDE_DIRECTORIES(huawei-honor-6-test BEFORE PRIVATE test/mock)
++-    TARGET_LINK_LIBRARIES(huawei-honor-6-test PRIVATE cpuinfo_mock gtest)
++-    ADD_TEST(huawei-honor-6-test huawei-honor-6-test)
++-
++-    ADD_EXECUTABLE(lenovo-a6600-plus-test test/mock/lenovo-a6600-plus.cc)
++-    TARGET_INCLUDE_DIRECTORIES(lenovo-a6600-plus-test BEFORE PRIVATE test/mock)
++-    TARGET_LINK_LIBRARIES(lenovo-a6600-plus-test PRIVATE cpuinfo_mock gtest)
++-    ADD_TEST(lenovo-a6600-plus-test lenovo-a6600-plus-test)
++-
++-    ADD_EXECUTABLE(lenovo-vibe-x2-test test/mock/lenovo-vibe-x2.cc)
++-    TARGET_INCLUDE_DIRECTORIES(lenovo-vibe-x2-test BEFORE PRIVATE test/mock)
++-    TARGET_LINK_LIBRARIES(lenovo-vibe-x2-test PRIVATE cpuinfo_mock gtest)
++-    ADD_TEST(lenovo-vibe-x2-test lenovo-vibe-x2-test)
++-
++-    ADD_EXECUTABLE(lg-k10-eu-test test/mock/lg-k10-eu.cc)
++-    TARGET_INCLUDE_DIRECTORIES(lg-k10-eu-test BEFORE PRIVATE test/mock)
++-    TARGET_LINK_LIBRARIES(lg-k10-eu-test PRIVATE cpuinfo_mock gtest)
++-    ADD_TEST(lg-k10-eu-test lg-k10-eu-test)
++-
++-    ADD_EXECUTABLE(lg-optimus-g-pro-test test/mock/lg-optimus-g-pro.cc)
++-    TARGET_INCLUDE_DIRECTORIES(lg-optimus-g-pro-test BEFORE PRIVATE test/mock)
++-    TARGET_LINK_LIBRARIES(lg-optimus-g-pro-test PRIVATE cpuinfo_mock gtest)
++-    ADD_TEST(lg-optimus-g-pro-test lg-optimus-g-pro-test)
++-
++-    ADD_EXECUTABLE(moto-e-gen1-test test/mock/moto-e-gen1.cc)
++-    TARGET_INCLUDE_DIRECTORIES(moto-e-gen1-test BEFORE PRIVATE test/mock)
++-    TARGET_LINK_LIBRARIES(moto-e-gen1-test PRIVATE cpuinfo_mock gtest)
++-    ADD_TEST(moto-e-gen1-test moto-e-gen1-test)
++-
++-    ADD_EXECUTABLE(moto-g-gen1-test test/mock/moto-g-gen1.cc)
++-    TARGET_INCLUDE_DIRECTORIES(moto-g-gen1-test BEFORE PRIVATE test/mock)
++-    TARGET_LINK_LIBRARIES(moto-g-gen1-test PRIVATE cpuinfo_mock gtest)
++-    ADD_TEST(moto-g-gen1-test moto-g-gen1-test)
++-
++-    ADD_EXECUTABLE(moto-g-gen2-test test/mock/moto-g-gen2.cc)
++-    TARGET_INCLUDE_DIRECTORIES(moto-g-gen2-test BEFORE PRIVATE test/mock)
++-    TARGET_LINK_LIBRARIES(moto-g-gen2-test PRIVATE cpuinfo_mock gtest)
++-    ADD_TEST(moto-g-gen2-test moto-g-gen2-test)
++-
++-    ADD_EXECUTABLE(moto-g-gen3-test test/mock/moto-g-gen3.cc)
++-    TARGET_INCLUDE_DIRECTORIES(moto-g-gen3-test BEFORE PRIVATE test/mock)
++-    TARGET_LINK_LIBRARIES(moto-g-gen3-test PRIVATE cpuinfo_mock gtest)
++-    ADD_TEST(moto-g-gen3-test moto-g-gen3-test)
++-
++-    ADD_EXECUTABLE(moto-g-gen4-test test/mock/moto-g-gen4.cc)
++-    TARGET_INCLUDE_DIRECTORIES(moto-g-gen4-test BEFORE PRIVATE test/mock)
++-    TARGET_LINK_LIBRARIES(moto-g-gen4-test PRIVATE cpuinfo_mock gtest)
++-    ADD_TEST(moto-g-gen4-test moto-g-gen4-test)
++-
++-    ADD_EXECUTABLE(moto-g-gen5-test test/mock/moto-g-gen5.cc)
++-    TARGET_INCLUDE_DIRECTORIES(moto-g-gen5-test BEFORE PRIVATE test/mock)
++-    TARGET_LINK_LIBRARIES(moto-g-gen5-test PRIVATE cpuinfo_mock gtest)
++-    ADD_TEST(moto-g-gen5-test moto-g-gen5-test)
++-
++-    ADD_EXECUTABLE(nexus-s-test test/mock/nexus-s.cc)
++-    TARGET_INCLUDE_DIRECTORIES(nexus-s-test BEFORE PRIVATE test/mock)
++-    TARGET_LINK_LIBRARIES(nexus-s-test PRIVATE cpuinfo_mock gtest)
++-    ADD_TEST(nexus-s-test nexus-s-test)
++-
++-    ADD_EXECUTABLE(nexus4-test test/mock/nexus4.cc)
++-    TARGET_INCLUDE_DIRECTORIES(nexus4-test BEFORE PRIVATE test/mock)
++-    TARGET_LINK_LIBRARIES(nexus4-test PRIVATE cpuinfo_mock gtest)
++-    ADD_TEST(nexus4-test nexus4-test)
++-
++-    ADD_EXECUTABLE(nexus6-test test/mock/nexus6.cc)
++-    TARGET_INCLUDE_DIRECTORIES(nexus6-test BEFORE PRIVATE test/mock)
++-    TARGET_LINK_LIBRARIES(nexus6-test PRIVATE cpuinfo_mock gtest)
++-    ADD_TEST(nexus6-test nexus6-test)
++-
++-    ADD_EXECUTABLE(nexus10-test test/mock/nexus10.cc)
++-    TARGET_INCLUDE_DIRECTORIES(nexus10-test BEFORE PRIVATE test/mock)
++-    TARGET_LINK_LIBRARIES(nexus10-test PRIVATE cpuinfo_mock gtest)
++-    ADD_TEST(nexus10-test nexus10-test)
++-
++-    ADD_EXECUTABLE(padcod-10.1-test test/mock/padcod-10.1.cc)
++-    TARGET_INCLUDE_DIRECTORIES(padcod-10.1-test BEFORE PRIVATE test/mock)
++-    TARGET_LINK_LIBRARIES(padcod-10.1-test PRIVATE cpuinfo_mock gtest)
++-    ADD_TEST(padcod-10.1-test padcod-10.1-test)
++-
++-    ADD_EXECUTABLE(xiaomi-redmi-2a-test test/mock/xiaomi-redmi-2a.cc)
++-    TARGET_INCLUDE_DIRECTORIES(xiaomi-redmi-2a-test BEFORE PRIVATE test/mock)
++-    TARGET_LINK_LIBRARIES(xiaomi-redmi-2a-test PRIVATE cpuinfo_mock gtest)
++-    ADD_TEST(xiaomi-redmi-2a-test xiaomi-redmi-2a-test)
++-
++-    ADD_EXECUTABLE(xperia-sl-test test/mock/xperia-sl.cc)
++-    TARGET_INCLUDE_DIRECTORIES(xperia-sl-test BEFORE PRIVATE test/mock)
++-    TARGET_LINK_LIBRARIES(xperia-sl-test PRIVATE cpuinfo_mock gtest)
++-    ADD_TEST(xperia-sl-test xperia-sl-test)
++-  ENDIF()
++-
++-  IF(CMAKE_SYSTEM_NAME STREQUAL "Android" AND CMAKE_SYSTEM_PROCESSOR MATCHES "^(armv5te|armv7-a|aarch64)$")
++-    ADD_EXECUTABLE(alcatel-revvl-test test/mock/alcatel-revvl.cc)
++-    TARGET_INCLUDE_DIRECTORIES(alcatel-revvl-test BEFORE PRIVATE test/mock)
++-    TARGET_LINK_LIBRARIES(alcatel-revvl-test PRIVATE cpuinfo_mock gtest)
++-    ADD_TEST(alcatel-revvl-test alcatel-revvl-test)
++-
++-    ADD_EXECUTABLE(galaxy-a8-2018-test test/mock/galaxy-a8-2018.cc)
++-    TARGET_INCLUDE_DIRECTORIES(galaxy-a8-2018-test BEFORE PRIVATE test/mock)
++-    TARGET_LINK_LIBRARIES(galaxy-a8-2018-test PRIVATE cpuinfo_mock gtest)
++-    ADD_TEST(galaxy-a8-2018-test galaxy-a8-2018-test)
++-
++-    ADD_EXECUTABLE(galaxy-c9-pro-test test/mock/galaxy-c9-pro.cc)
++-    TARGET_INCLUDE_DIRECTORIES(galaxy-c9-pro-test BEFORE PRIVATE test/mock)
++-    TARGET_LINK_LIBRARIES(galaxy-c9-pro-test PRIVATE cpuinfo_mock gtest)
++-    ADD_TEST(galaxy-c9-pro-test galaxy-c9-pro-test)
++-
++-    ADD_EXECUTABLE(galaxy-s6-test test/mock/galaxy-s6.cc)
++-    TARGET_INCLUDE_DIRECTORIES(galaxy-s6-test BEFORE PRIVATE test/mock)
++-    TARGET_LINK_LIBRARIES(galaxy-s6-test PRIVATE cpuinfo_mock gtest)
++-    ADD_TEST(galaxy-s6-test galaxy-s6-test)
++-
++-    ADD_EXECUTABLE(galaxy-s7-us-test test/mock/galaxy-s7-us.cc)
++-    TARGET_INCLUDE_DIRECTORIES(galaxy-s7-us-test BEFORE PRIVATE test/mock)
++-    TARGET_LINK_LIBRARIES(galaxy-s7-us-test PRIVATE cpuinfo_mock gtest)
++-    ADD_TEST(galaxy-s7-us-test galaxy-s7-us-test)
++-
++-    ADD_EXECUTABLE(galaxy-s7-global-test test/mock/galaxy-s7-global.cc)
++-    TARGET_INCLUDE_DIRECTORIES(galaxy-s7-global-test BEFORE PRIVATE test/mock)
++-    TARGET_LINK_LIBRARIES(galaxy-s7-global-test PRIVATE cpuinfo_mock gtest)
++-    ADD_TEST(galaxy-s7-global-test galaxy-s7-global-test)
++-
++-    ADD_EXECUTABLE(galaxy-s8-us-test test/mock/galaxy-s8-us.cc)
++-    TARGET_INCLUDE_DIRECTORIES(galaxy-s8-us-test BEFORE PRIVATE test/mock)
++-    TARGET_LINK_LIBRARIES(galaxy-s8-us-test PRIVATE cpuinfo_mock gtest)
++-    ADD_TEST(galaxy-s8-us-test galaxy-s8-us-test)
++-
++-    ADD_EXECUTABLE(galaxy-s8-global-test test/mock/galaxy-s8-global.cc)
++-    TARGET_INCLUDE_DIRECTORIES(galaxy-s8-global-test BEFORE PRIVATE test/mock)
++-    TARGET_LINK_LIBRARIES(galaxy-s8-global-test PRIVATE cpuinfo_mock gtest)
++-    ADD_TEST(galaxy-s8-global-test galaxy-s8-global-test)
++-
++-    ADD_EXECUTABLE(galaxy-s9-us-test test/mock/galaxy-s9-us.cc)
++-    TARGET_INCLUDE_DIRECTORIES(galaxy-s9-us-test BEFORE PRIVATE test/mock)
++-    TARGET_LINK_LIBRARIES(galaxy-s9-us-test PRIVATE cpuinfo_mock gtest)
++-    ADD_TEST(galaxy-s9-us-test galaxy-s9-us-test)
++-
++-    ADD_EXECUTABLE(galaxy-s9-global-test test/mock/galaxy-s9-global.cc)
++-    TARGET_INCLUDE_DIRECTORIES(galaxy-s9-global-test BEFORE PRIVATE test/mock)
++-    TARGET_LINK_LIBRARIES(galaxy-s9-global-test PRIVATE cpuinfo_mock gtest)
++-    ADD_TEST(galaxy-s9-global-test galaxy-s9-global-test)
++-
++-    ADD_EXECUTABLE(huawei-mate-8-test test/mock/huawei-mate-8.cc)
++-    TARGET_INCLUDE_DIRECTORIES(huawei-mate-8-test BEFORE PRIVATE test/mock)
++-    TARGET_LINK_LIBRARIES(huawei-mate-8-test PRIVATE cpuinfo_mock gtest)
++-    ADD_TEST(huawei-mate-8-test huawei-mate-8-test)
++-
++-    ADD_EXECUTABLE(huawei-mate-9-test test/mock/huawei-mate-9.cc)
++-    TARGET_INCLUDE_DIRECTORIES(huawei-mate-9-test BEFORE PRIVATE test/mock)
++-    TARGET_LINK_LIBRARIES(huawei-mate-9-test PRIVATE cpuinfo_mock gtest)
++-    ADD_TEST(huawei-mate-9-test huawei-mate-9-test)
++-
++-    ADD_EXECUTABLE(huawei-mate-10-test test/mock/huawei-mate-10.cc)
++-    TARGET_INCLUDE_DIRECTORIES(huawei-mate-10-test BEFORE PRIVATE test/mock)
++-    TARGET_LINK_LIBRARIES(huawei-mate-10-test PRIVATE cpuinfo_mock gtest)
++-    ADD_TEST(huawei-mate-10-test huawei-mate-10-test)
++-
++-    ADD_EXECUTABLE(huawei-mate-20-test test/mock/huawei-mate-20.cc)
++-    TARGET_INCLUDE_DIRECTORIES(huawei-mate-20-test BEFORE PRIVATE test/mock)
++-    TARGET_LINK_LIBRARIES(huawei-mate-20-test PRIVATE cpuinfo_mock gtest)
++-    ADD_TEST(huawei-mate-20-test huawei-mate-20-test)
++-
++-    ADD_EXECUTABLE(huawei-p8-lite-test test/mock/huawei-p8-lite.cc)
++-    TARGET_INCLUDE_DIRECTORIES(huawei-p8-lite-test BEFORE PRIVATE test/mock)
++-    TARGET_LINK_LIBRARIES(huawei-p8-lite-test PRIVATE cpuinfo_mock gtest)
++-    ADD_TEST(huawei-p8-lite-test huawei-p8-lite-test)
++-
++-    ADD_EXECUTABLE(huawei-p9-lite-test test/mock/huawei-p9-lite.cc)
++-    TARGET_INCLUDE_DIRECTORIES(huawei-p9-lite-test BEFORE PRIVATE test/mock)
++-    TARGET_LINK_LIBRARIES(huawei-p9-lite-test PRIVATE cpuinfo_mock gtest)
++-    ADD_TEST(huawei-p9-lite-test huawei-p9-lite-test)
++-
++-    ADD_EXECUTABLE(huawei-p20-pro-test test/mock/huawei-p20-pro.cc)
++-    TARGET_INCLUDE_DIRECTORIES(huawei-p20-pro-test BEFORE PRIVATE test/mock)
++-    TARGET_LINK_LIBRARIES(huawei-p20-pro-test PRIVATE cpuinfo_mock gtest)
++-    ADD_TEST(huawei-p20-pro-test huawei-p20-pro-test)
++-
++-    ADD_EXECUTABLE(iconia-one-10-test test/mock/iconia-one-10.cc)
++-    TARGET_INCLUDE_DIRECTORIES(iconia-one-10-test BEFORE PRIVATE test/mock)
++-    TARGET_LINK_LIBRARIES(iconia-one-10-test PRIVATE cpuinfo_mock gtest)
++-    ADD_TEST(iconia-one-10-test iconia-one-10-test)
++-
++-    ADD_EXECUTABLE(meizu-pro-6-test test/mock/meizu-pro-6.cc)
++-    TARGET_INCLUDE_DIRECTORIES(meizu-pro-6-test BEFORE PRIVATE test/mock)
++-    TARGET_LINK_LIBRARIES(meizu-pro-6-test PRIVATE cpuinfo_mock gtest)
++-    ADD_TEST(meizu-pro-6-test meizu-pro-6-test)
++-
++-    ADD_EXECUTABLE(meizu-pro-6s-test test/mock/meizu-pro-6s.cc)
++-    TARGET_INCLUDE_DIRECTORIES(meizu-pro-6s-test BEFORE PRIVATE test/mock)
++-    TARGET_LINK_LIBRARIES(meizu-pro-6s-test PRIVATE cpuinfo_mock gtest)
++-    ADD_TEST(meizu-pro-6s-test meizu-pro-6s-test)
++-
++-    ADD_EXECUTABLE(meizu-pro-7-plus-test test/mock/meizu-pro-7-plus.cc)
++-    TARGET_INCLUDE_DIRECTORIES(meizu-pro-7-plus-test BEFORE PRIVATE test/mock)
++-    TARGET_LINK_LIBRARIES(meizu-pro-7-plus-test PRIVATE cpuinfo_mock gtest)
++-    ADD_TEST(meizu-pro-7-plus-test meizu-pro-7-plus-test)
++-
++-    ADD_EXECUTABLE(nexus5x-test test/mock/nexus5x.cc)
++-    TARGET_INCLUDE_DIRECTORIES(nexus5x-test BEFORE PRIVATE test/mock)
++-    TARGET_LINK_LIBRARIES(nexus5x-test PRIVATE cpuinfo_mock gtest)
++-    ADD_TEST(nexus5x-test nexus5x-test)
++-
++-    ADD_EXECUTABLE(nexus6p-test test/mock/nexus6p.cc)
++-    TARGET_INCLUDE_DIRECTORIES(nexus6p-test BEFORE PRIVATE test/mock)
++-    TARGET_LINK_LIBRARIES(nexus6p-test PRIVATE cpuinfo_mock gtest)
++-    ADD_TEST(nexus6p-test nexus6p-test)
++-
++-    ADD_EXECUTABLE(nexus9-test test/mock/nexus9.cc)
++-    TARGET_INCLUDE_DIRECTORIES(nexus9-test BEFORE PRIVATE test/mock)
++-    TARGET_LINK_LIBRARIES(nexus9-test PRIVATE cpuinfo_mock gtest)
++-    ADD_TEST(nexus9-test nexus9-test)
++-
++-    ADD_EXECUTABLE(oneplus-3t-test test/mock/oneplus-3t.cc)
++-    TARGET_INCLUDE_DIRECTORIES(oneplus-3t-test BEFORE PRIVATE test/mock)
++-    TARGET_LINK_LIBRARIES(oneplus-3t-test PRIVATE cpuinfo_mock gtest)
++-    ADD_TEST(oneplus-3t-test oneplus-3t-test)
++-
++-    ADD_EXECUTABLE(oneplus-5-test test/mock/oneplus-5.cc)
++-    TARGET_INCLUDE_DIRECTORIES(oneplus-5-test BEFORE PRIVATE test/mock)
++-    TARGET_LINK_LIBRARIES(oneplus-5-test PRIVATE cpuinfo_mock gtest)
++-    ADD_TEST(oneplus-5-test oneplus-5-test)
++-
++-    ADD_EXECUTABLE(oneplus-5t-test test/mock/oneplus-5t.cc)
++-    TARGET_INCLUDE_DIRECTORIES(oneplus-5t-test BEFORE PRIVATE test/mock)
++-    TARGET_LINK_LIBRARIES(oneplus-5t-test PRIVATE cpuinfo_mock gtest)
++-    ADD_TEST(oneplus-5t-test oneplus-5t-test)
++-
++-    ADD_EXECUTABLE(oppo-a37-test test/mock/oppo-a37.cc)
++-    TARGET_INCLUDE_DIRECTORIES(oppo-a37-test BEFORE PRIVATE test/mock)
++-    TARGET_LINK_LIBRARIES(oppo-a37-test PRIVATE cpuinfo_mock gtest)
++-    ADD_TEST(oppo-a37-test oppo-a37-test)
++-
++-    ADD_EXECUTABLE(oppo-r9-test test/mock/oppo-r9.cc)
++-    TARGET_INCLUDE_DIRECTORIES(oppo-r9-test BEFORE PRIVATE test/mock)
++-    TARGET_LINK_LIBRARIES(oppo-r9-test PRIVATE cpuinfo_mock gtest)
++-    ADD_TEST(oppo-r9-test oppo-r9-test)
++-
++-    ADD_EXECUTABLE(oppo-r15-test test/mock/oppo-r15.cc)
++-    TARGET_INCLUDE_DIRECTORIES(oppo-r15-test BEFORE PRIVATE test/mock)
++-    TARGET_LINK_LIBRARIES(oppo-r15-test PRIVATE cpuinfo_mock gtest)
++-    ADD_TEST(oppo-r15-test oppo-r15-test)
++-
++-    ADD_EXECUTABLE(pixel-test test/mock/pixel.cc)
++-    TARGET_INCLUDE_DIRECTORIES(pixel-test BEFORE PRIVATE test/mock)
++-    TARGET_LINK_LIBRARIES(pixel-test PRIVATE cpuinfo_mock gtest)
++-    ADD_TEST(pixel-test pixel-test)
++-
++-    ADD_EXECUTABLE(pixel-c-test test/mock/pixel-c.cc)
++-    TARGET_INCLUDE_DIRECTORIES(pixel-c-test BEFORE PRIVATE test/mock)
++-    TARGET_LINK_LIBRARIES(pixel-c-test PRIVATE cpuinfo_mock gtest)
++-    ADD_TEST(pixel-c-test pixel-c-test)
++-
++-    ADD_EXECUTABLE(pixel-xl-test test/mock/pixel-xl.cc)
++-    TARGET_INCLUDE_DIRECTORIES(pixel-xl-test BEFORE PRIVATE test/mock)
++-    TARGET_LINK_LIBRARIES(pixel-xl-test PRIVATE cpuinfo_mock gtest)
++-    ADD_TEST(pixel-xl-test pixel-xl-test)
++-
++-    ADD_EXECUTABLE(pixel-2-xl-test test/mock/pixel-2-xl.cc)
++-    TARGET_INCLUDE_DIRECTORIES(pixel-2-xl-test BEFORE PRIVATE test/mock)
++-    TARGET_LINK_LIBRARIES(pixel-2-xl-test PRIVATE cpuinfo_mock gtest)
++-    ADD_TEST(pixel-2-xl-test pixel-2-xl-test)
++-
++-    ADD_EXECUTABLE(xiaomi-mi-5c-test test/mock/xiaomi-mi-5c.cc)
++-    TARGET_INCLUDE_DIRECTORIES(xiaomi-mi-5c-test BEFORE PRIVATE test/mock)
++-    TARGET_LINK_LIBRARIES(xiaomi-mi-5c-test PRIVATE cpuinfo_mock gtest)
++-    ADD_TEST(xiaomi-mi-5c-test xiaomi-mi-5c-test)
++-
++-    ADD_EXECUTABLE(xiaomi-redmi-note-3-test test/mock/xiaomi-redmi-note-3.cc)
++-    TARGET_INCLUDE_DIRECTORIES(xiaomi-redmi-note-3-test BEFORE PRIVATE test/mock)
++-    TARGET_LINK_LIBRARIES(xiaomi-redmi-note-3-test PRIVATE cpuinfo_mock gtest)
++-    ADD_TEST(xiaomi-redmi-note-3-test xiaomi-redmi-note-3-test)
++-
++-    ADD_EXECUTABLE(xiaomi-redmi-note-4-test test/mock/xiaomi-redmi-note-4.cc)
++-    TARGET_INCLUDE_DIRECTORIES(xiaomi-redmi-note-4-test BEFORE PRIVATE test/mock)
++-    TARGET_LINK_LIBRARIES(xiaomi-redmi-note-4-test PRIVATE cpuinfo_mock gtest)
++-    ADD_TEST(xiaomi-redmi-note-4-test xiaomi-redmi-note-4-test)
++-
++-    ADD_EXECUTABLE(xperia-c4-dual-test test/mock/xperia-c4-dual.cc)
++-    TARGET_INCLUDE_DIRECTORIES(xperia-c4-dual-test BEFORE PRIVATE test/mock)
++-    TARGET_LINK_LIBRARIES(xperia-c4-dual-test PRIVATE cpuinfo_mock gtest)
++-    ADD_TEST(xperia-c4-dual-test xperia-c4-dual-test)
++-  ENDIF()
++-
++-  IF(CMAKE_SYSTEM_NAME STREQUAL "Android" AND CMAKE_SYSTEM_PROCESSOR MATCHES "^(i686|x86_64)$")
++-    ADD_EXECUTABLE(alldocube-iwork8-test test/mock/alldocube-iwork8.cc)
++-    TARGET_INCLUDE_DIRECTORIES(alldocube-iwork8-test BEFORE PRIVATE test/mock)
++-    TARGET_LINK_LIBRARIES(alldocube-iwork8-test PRIVATE cpuinfo_mock gtest)
++-    ADD_TEST(alldocube-iwork8-test alldocube-iwork8-test)
++-
++-    ADD_EXECUTABLE(leagoo-t5c-test test/mock/leagoo-t5c.cc)
++-    TARGET_INCLUDE_DIRECTORIES(leagoo-t5c-test BEFORE PRIVATE test/mock)
++-    TARGET_LINK_LIBRARIES(leagoo-t5c-test PRIVATE cpuinfo_mock gtest)
++-    ADD_TEST(leagoo-t5c-test leagoo-t5c-test)
++-
++-    ADD_EXECUTABLE(memo-pad-7-test test/mock/memo-pad-7.cc)
++-    TARGET_INCLUDE_DIRECTORIES(memo-pad-7-test BEFORE PRIVATE test/mock)
++-    TARGET_LINK_LIBRARIES(memo-pad-7-test PRIVATE cpuinfo_mock gtest)
++-    ADD_TEST(memo-pad-7-test memo-pad-7-test)
++-
++-    ADD_EXECUTABLE(zenfone-c-test test/mock/zenfone-c.cc)
++-    TARGET_INCLUDE_DIRECTORIES(zenfone-c-test BEFORE PRIVATE test/mock)
++-    TARGET_LINK_LIBRARIES(zenfone-c-test PRIVATE cpuinfo_mock gtest)
++-    ADD_TEST(zenfone-c-test zenfone-c-test)
++-
++-    ADD_EXECUTABLE(zenfone-2-test test/mock/zenfone-2.cc)
++-    TARGET_INCLUDE_DIRECTORIES(zenfone-2-test BEFORE PRIVATE test/mock)
++-    TARGET_LINK_LIBRARIES(zenfone-2-test PRIVATE cpuinfo_mock gtest)
++-    ADD_TEST(zenfone-2-test zenfone-2-test)
++-
++-    ADD_EXECUTABLE(zenfone-2e-test test/mock/zenfone-2e.cc)
++-    TARGET_INCLUDE_DIRECTORIES(zenfone-2e-test BEFORE PRIVATE test/mock)
++-    TARGET_LINK_LIBRARIES(zenfone-2e-test PRIVATE cpuinfo_mock gtest)
++-    ADD_TEST(zenfone-2e-test zenfone-2e-test)
++-  ENDIF()
++-ENDIF()
++-
++-# ---[ cpuinfo unit tests
++-IF(CPUINFO_SUPPORTED_PLATFORM AND CPUINFO_BUILD_UNIT_TESTS)
++-  ADD_EXECUTABLE(init-test test/init.cc)
++-  CPUINFO_TARGET_ENABLE_CXX11(init-test)
++-  CPUINFO_TARGET_RUNTIME_LIBRARY(init-test)
++-  TARGET_LINK_LIBRARIES(init-test PRIVATE cpuinfo gtest gtest_main)
++-  ADD_TEST(init-test init-test)
++-
++-  IF(CMAKE_SYSTEM_NAME STREQUAL "Linux" OR CMAKE_SYSTEM_NAME STREQUAL "Android")
++-    ADD_EXECUTABLE(get-current-test test/get-current.cc)
++-    CPUINFO_TARGET_ENABLE_CXX11(get-current-test)
++-    CPUINFO_TARGET_RUNTIME_LIBRARY(get-current-test)
++-    TARGET_LINK_LIBRARIES(get-current-test PRIVATE cpuinfo gtest gtest_main)
++-    ADD_TEST(get-current-test get-current-test)
++-  ENDIF()
++-
++-  IF(CMAKE_SYSTEM_PROCESSOR MATCHES "^(i[3-6]86|AMD64|x86_64)$")
++-    ADD_EXECUTABLE(brand-string-test test/name/brand-string.cc)
++-    CPUINFO_TARGET_ENABLE_CXX11(brand-string-test)
++-    CPUINFO_TARGET_RUNTIME_LIBRARY(brand-string-test)
++-    TARGET_LINK_LIBRARIES(brand-string-test PRIVATE cpuinfo_internals gtest gtest_main)
++-    ADD_TEST(brand-string-test brand-string-test)
++-  ENDIF()
++-
++-  IF(CMAKE_SYSTEM_NAME STREQUAL "Android" AND CMAKE_SYSTEM_PROCESSOR MATCHES "^(armv[5-8].*|aarch64)$")
++-    ADD_LIBRARY(android_properties_interface STATIC test/name/android-properties-interface.c)
++-    CPUINFO_TARGET_ENABLE_C99(android_properties_interface)
++-    CPUINFO_TARGET_RUNTIME_LIBRARY(android_properties_interface)
++-    TARGET_LINK_LIBRARIES(android_properties_interface PRIVATE cpuinfo_internals)
++-
++-    ADD_EXECUTABLE(chipset-test
++-      test/name/proc-cpuinfo-hardware.cc
++-      test/name/ro-product-board.cc
++-      test/name/ro-board-platform.cc
++-      test/name/ro-mediatek-platform.cc
++-      test/name/ro-arch.cc
++-      test/name/ro-chipname.cc
++-      test/name/android-properties.cc)
++-    CPUINFO_TARGET_ENABLE_CXX11(chipset-test)
++-    CPUINFO_TARGET_RUNTIME_LIBRARY(chipset-test)
++-    TARGET_LINK_LIBRARIES(chipset-test PRIVATE android_properties_interface gtest gtest_main)
++-    ADD_TEST(chipset-test chipset-test)
++-
++-    ADD_EXECUTABLE(cache-test test/arm-cache.cc)
++-    CPUINFO_TARGET_ENABLE_CXX11(cache-test)
++-    CPUINFO_TARGET_RUNTIME_LIBRARY(cache-test)
++-    TARGET_COMPILE_DEFINITIONS(cache-test PRIVATE __STDC_LIMIT_MACROS=1 __STDC_CONSTANT_MACROS=1)
++-    TARGET_LINK_LIBRARIES(cache-test PRIVATE cpuinfo_internals gtest gtest_main)
++-    ADD_TEST(cache-test, cache-test)
++-  ENDIF()
++-ENDIF()
++-
++-# ---[ Helper and debug tools
++-IF(CPUINFO_SUPPORTED_PLATFORM AND CPUINFO_BUILD_TOOLS)
++-  ADD_EXECUTABLE(isa-info tools/isa-info.c)
++-  CPUINFO_TARGET_ENABLE_C99(isa-info)
++-  CPUINFO_TARGET_RUNTIME_LIBRARY(isa-info)
++-  TARGET_LINK_LIBRARIES(isa-info PRIVATE cpuinfo)
++-  INSTALL(TARGETS isa-info RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR})
++-
++-  ADD_EXECUTABLE(cpu-info tools/cpu-info.c)
++-  CPUINFO_TARGET_ENABLE_C99(cpu-info)
++-  CPUINFO_TARGET_RUNTIME_LIBRARY(cpu-info)
++-  TARGET_LINK_LIBRARIES(cpu-info PRIVATE cpuinfo)
++-  INSTALL(TARGETS cpu-info RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR})
++-
++-  ADD_EXECUTABLE(cache-info tools/cache-info.c)
++-  CPUINFO_TARGET_ENABLE_C99(cache-info)
++-  CPUINFO_TARGET_RUNTIME_LIBRARY(cache-info)
++-  TARGET_LINK_LIBRARIES(cache-info PRIVATE cpuinfo)
++-  INSTALL(TARGETS cache-info RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR})
++-
++-  IF(CMAKE_SYSTEM_NAME MATCHES "^(Android|Linux)$" AND CMAKE_SYSTEM_PROCESSOR MATCHES "^(armv[5-8].*|aarch64)$")
++-    ADD_EXECUTABLE(auxv-dump tools/auxv-dump.c)
++-    CPUINFO_TARGET_ENABLE_C99(auxv-dump)
++-    CPUINFO_TARGET_RUNTIME_LIBRARY(auxv-dump)
++-    TARGET_LINK_LIBRARIES(auxv-dump PRIVATE ${CMAKE_DL_LIBS} cpuinfo)
++-
++-    ADD_EXECUTABLE(cpuinfo-dump tools/cpuinfo-dump.c)
++-    CPUINFO_TARGET_ENABLE_C99(cpuinfo-dump)
++-    CPUINFO_TARGET_RUNTIME_LIBRARY(cpuinfo-dump)
++-  ENDIF()
++-
++-  IF(CMAKE_SYSTEM_PROCESSOR MATCHES "^(i[3-6]86|AMD64|x86_64)$")
++-    ADD_EXECUTABLE(cpuid-dump tools/cpuid-dump.c)
++-    CPUINFO_TARGET_ENABLE_C99(cpuid-dump)
++-    CPUINFO_TARGET_RUNTIME_LIBRARY(cpuid-dump)
++-    TARGET_INCLUDE_DIRECTORIES(cpuid-dump BEFORE PRIVATE src)
++-    TARGET_INCLUDE_DIRECTORIES(cpuid-dump BEFORE PRIVATE include)
++-    INSTALL(TARGETS cpuid-dump RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR})
++-  ENDIF()
++-ENDIF()
++diff --git a/README.md b/README.md
++index 7d383ff..ee5fb82 100644
++--- a/README.md
+++++ b/README.md
++@@ -152,21 +152,20 @@ pthread_setaffinity_np(pthread_self(), sizeof(cpu_set_t), &cpu_set);
++   - [x] Using `ro.chipname`, `ro.board.platform`, `ro.product.board`, `ro.mediatek.platform`, `ro.arch` properties (Android)
++   - [ ] Using kernel log (`dmesg`) on ARM Linux
++ - Vendor and microarchitecture detection
++-  - [x] Intel-designed x86/x86-64 cores (up to Kaby Lake, Airmont, and Knights Mill)
++-  - [x] AMD-designed x86/x86-64 cores (up to Puma/Jaguar and Zen)
+++  - [x] Intel-designed x86/x86-64 cores (up to Sunny Cove, Goldmont Plus, and Knights Mill)
+++  - [x] AMD-designed x86/x86-64 cores (up to Puma/Jaguar and Zen 2)
++   - [ ] VIA-designed x86/x86-64 cores
++   - [ ] Other x86 cores (DM&P, RDC, Transmeta, Cyrix, Rise)
++-  - [x] ARM-designed ARM cores (up to Cortex-A55 and Cortex-A75)
++-  - [x] Qualcomm-designed ARM cores (up to Kryo, Kryo-280, and Kryo-385)
++-  - [x] Nvidia-designed ARM cores (Denver)
+++  - [x] ARM-designed ARM cores (up to Cortex-A55, Cortex-A77, and Neoverse E1/N1)
+++  - [x] Qualcomm-designed ARM cores (Scorpion, Krait, and Kryo)
+++  - [x] Nvidia-designed ARM cores (Denver and Carmel)
++   - [x] Samsung-designed ARM cores (Exynos)
++   - [x] Intel-designed ARM cores (XScale up to 3rd-gen)
++-  - [x] Apple-designed ARM cores (up to Hurricane)
+++  - [x] Apple-designed ARM cores (up to Lightning and Thunder)
++   - [x] Cavium-designed ARM cores (ThunderX)
++   - [x] AppliedMicro-designed ARM cores (X-Gene)
++ - Instruction set detection
++   - [x] Using CPUID (x86/x86-64)
++-  - [x] Using dynamic code generation validator (Native Client/x86-64)
++   - [x] Using `/proc/cpuinfo` on 32-bit ARM EABI (Linux)
++   - [x] Using microarchitecture heuristics on (32-bit ARM)
++   - [x] Using `FPSID` and `WCID` registers (32-bit ARM)
++diff --git a/bench/get-current.cc b/bench/get-current.cc
++index 91b35a0..b547df0 100644
++--- a/bench/get-current.cc
+++++ b/bench/get-current.cc
++@@ -21,4 +21,13 @@ static void cpuinfo_get_current_core(benchmark::State& state) {
++ }
++ BENCHMARK(cpuinfo_get_current_core)->Unit(benchmark::kNanosecond);
++ 
+++static void cpuinfo_get_current_uarch_index(benchmark::State& state) {
+++	cpuinfo_initialize();
+++	while (state.KeepRunning()) {
+++		const uint32_t uarch_index = cpuinfo_get_current_uarch_index();
+++		benchmark::DoNotOptimize(uarch_index);
+++	}
+++}
+++BENCHMARK(cpuinfo_get_current_uarch_index)->Unit(benchmark::kNanosecond);
+++
++ BENCHMARK_MAIN();
++diff --git a/cmake/DownloadGoogleTest.cmake b/cmake/DownloadGoogleTest.cmake
++index d69d19a..dc86c9c 100644
++--- a/cmake/DownloadGoogleTest.cmake
+++++ b/cmake/DownloadGoogleTest.cmake
++@@ -4,8 +4,8 @@ PROJECT(googletest-download NONE)
++ 
++ INCLUDE(ExternalProject)
++ ExternalProject_Add(googletest
++-	URL https://github.com/google/googletest/archive/release-1.8.0.zip
++-	URL_HASH SHA256=f3ed3b58511efd272eb074a3a6d6fb79d7c2e6a0e374323d1e6bcbcc1ef141bf
+++	URL https://github.com/google/googletest/archive/release-1.10.0.zip
+++	URL_HASH SHA256=94c634d499558a76fa649edb13721dce6e98fb1e7018dfaeba3cd7a083945e91
++ 	SOURCE_DIR "${CONFU_DEPENDENCIES_SOURCE_DIR}/googletest"
++ 	BINARY_DIR "${CONFU_DEPENDENCIES_BINARY_DIR}/googletest"
++ 	CONFIGURE_COMMAND ""
++diff --git a/configure.py b/configure.py
++index a340c4c..0e58dba 100755
++--- a/configure.py
+++++ b/configure.py
++@@ -26,8 +26,8 @@ def main(args):
++         sources = ["init.c", "api.c"]
++         if build.target.is_x86 or build.target.is_x86_64:
++             sources += [
++-                "x86/init.c", "x86/info.c", "x86/vendor.c", "x86/uarch.c", "x86/name.c",
++-                "x86/topology.c",
+++                "x86/init.c", "x86/info.c", "x86/isa.c", "x86/vendor.c",
+++                "x86/uarch.c", "x86/name.c", "x86/topology.c",
++                 "x86/cache/init.c", "x86/cache/descriptor.c", "x86/cache/deterministic.c",
++             ]
++             if build.target.is_macos:
++@@ -37,7 +37,6 @@ def main(args):
++                     "x86/linux/init.c",
++                     "x86/linux/cpuinfo.c",
++                 ]
++-            sources.append("x86/isa.c" if not build.target.is_nacl else "x86/nacl/isa.c")
++         if build.target.is_arm or build.target.is_arm64:
++             sources += ["arm/uarch.c", "arm/cache.c"]
++             if build.target.is_linux or build.target.is_android:
++diff --git a/include/cpuinfo.h b/include/cpuinfo.h
++index 9938d2b..e4d2d0c 100644
++--- a/include/cpuinfo.h
+++++ b/include/cpuinfo.h
++@@ -34,10 +34,6 @@
++ 	#define CPUINFO_ARCH_PPC64 1
++ #endif
++ 
++-#if defined(__pnacl__)
++-	#define CPUINFO_ARCH_PNACL 1
++-#endif
++-
++ #if defined(__asmjs__)
++ 	#define CPUINFO_ARCH_ASMJS 1
++ #endif
++@@ -80,10 +76,6 @@
++ 	#define CPUINFO_ARCH_PPC64 0
++ #endif
++ 
++-#ifndef CPUINFO_ARCH_PNACL
++-	#define CPUINFO_ARCH_PNACL 0
++-#endif
++-
++ #ifndef CPUINFO_ARCH_ASMJS
++ 	#define CPUINFO_ARCH_ASMJS 0
++ #endif
++@@ -190,6 +182,12 @@ enum cpuinfo_vendor {
++ 	 * Processors are designed by HiSilicon, a subsidiary of Huawei.
++ 	 */
++ 	cpuinfo_vendor_huawei   = 15,
+++	/**
+++	 * Hygon (Chengdu Haiguang Integrated Circuit Design Co., Ltd), Vendor of x86-64 processor microarchitectures.
+++	 *
+++	 * Processors are variants of AMD cores.
+++	 */
+++	cpuinfo_vendor_hygon    = 16,
++ 
++ 	/* Active vendors of embedded CPUs */
++ 
++@@ -401,6 +399,8 @@ enum cpuinfo_uarch {
++ 	cpuinfo_uarch_cortex_a35   = 0x00300335,
++ 	/** ARM Cortex-A53. */
++ 	cpuinfo_uarch_cortex_a53   = 0x00300353,
+++	/** ARM Cortex-A55 revision 0 (restricted dual-issue capabilities compared to revision 1+). */
+++	cpuinfo_uarch_cortex_a55r0 = 0x00300354,
++ 	/** ARM Cortex-A55. */
++ 	cpuinfo_uarch_cortex_a55   = 0x00300355,
++ 	/** ARM Cortex-A57. */
++@@ -478,6 +478,10 @@ enum cpuinfo_uarch {
++ 	cpuinfo_uarch_vortex    = 0x00700107,
++ 	/** Apple A12 processor (little cores). */
++ 	cpuinfo_uarch_tempest   = 0x00700108,
+++	/** Apple A13 processor (big cores). */
+++	cpuinfo_uarch_lightning = 0x00700109,
+++	/** Apple A13 processor (little cores). */
+++	cpuinfo_uarch_thunder   = 0x0070010A,
++ 
++ 	/** Cavium ThunderX. */
++ 	cpuinfo_uarch_thunderx = 0x00800100,
++@@ -494,6 +498,9 @@ enum cpuinfo_uarch {
++ 
++ 	/** Applied Micro X-Gene. */
++ 	cpuinfo_uarch_xgene = 0x00B00100,
+++
+++	/* Hygon Dhyana (a modification of AMD Zen for Chinese market). */
+++	cpuinfo_uarch_dhyana = 0x01000100,
++ };
++ 
++ struct cpuinfo_processor {
++@@ -613,6 +620,22 @@ struct cpuinfo_package {
++ 	uint32_t cluster_count;
++ };
++ 
+++struct cpuinfo_uarch_info {
+++	/** Type of CPU microarchitecture */
+++	enum cpuinfo_uarch uarch;
+++#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+++	/** Value of CPUID leaf 1 EAX register for the microarchitecture */
+++	uint32_t cpuid;
+++#elif CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
+++	/** Value of Main ID Register (MIDR) for the microarchitecture */
+++	uint32_t midr;
+++#endif
+++	/** Number of logical processors with the microarchitecture */
+++	uint32_t processor_count;
+++	/** Number of cores with the microarchitecture */
+++	uint32_t core_count;
+++};
+++
++ #ifdef __cplusplus
++ extern "C" {
++ #endif
++@@ -1721,6 +1744,7 @@ const struct cpuinfo_processor* CPUINFO_ABI cpuinfo_get_processors(void);
++ const struct cpuinfo_core* CPUINFO_ABI cpuinfo_get_cores(void);
++ const struct cpuinfo_cluster* CPUINFO_ABI cpuinfo_get_clusters(void);
++ const struct cpuinfo_package* CPUINFO_ABI cpuinfo_get_packages(void);
+++const struct cpuinfo_uarch_info* CPUINFO_ABI cpuinfo_get_uarchs(void);
++ const struct cpuinfo_cache* CPUINFO_ABI cpuinfo_get_l1i_caches(void);
++ const struct cpuinfo_cache* CPUINFO_ABI cpuinfo_get_l1d_caches(void);
++ const struct cpuinfo_cache* CPUINFO_ABI cpuinfo_get_l2_caches(void);
++@@ -1731,6 +1755,7 @@ const struct cpuinfo_processor* CPUINFO_ABI cpuinfo_get_processor(uint32_t index
++ const struct cpuinfo_core* CPUINFO_ABI cpuinfo_get_core(uint32_t index);
++ const struct cpuinfo_cluster* CPUINFO_ABI cpuinfo_get_cluster(uint32_t index);
++ const struct cpuinfo_package* CPUINFO_ABI cpuinfo_get_package(uint32_t index);
+++const struct cpuinfo_uarch_info* CPUINFO_ABI cpuinfo_get_uarch(uint32_t index);
++ const struct cpuinfo_cache* CPUINFO_ABI cpuinfo_get_l1i_cache(uint32_t index);
++ const struct cpuinfo_cache* CPUINFO_ABI cpuinfo_get_l1d_cache(uint32_t index);
++ const struct cpuinfo_cache* CPUINFO_ABI cpuinfo_get_l2_cache(uint32_t index);
++@@ -1741,6 +1766,7 @@ uint32_t CPUINFO_ABI cpuinfo_get_processors_count(void);
++ uint32_t CPUINFO_ABI cpuinfo_get_cores_count(void);
++ uint32_t CPUINFO_ABI cpuinfo_get_clusters_count(void);
++ uint32_t CPUINFO_ABI cpuinfo_get_packages_count(void);
+++uint32_t CPUINFO_ABI cpuinfo_get_uarchs_count(void);
++ uint32_t CPUINFO_ABI cpuinfo_get_l1i_caches_count(void);
++ uint32_t CPUINFO_ABI cpuinfo_get_l1d_caches_count(void);
++ uint32_t CPUINFO_ABI cpuinfo_get_l2_caches_count(void);
++@@ -1752,9 +1778,31 @@ uint32_t CPUINFO_ABI cpuinfo_get_l4_caches_count(void);
++  */
++ uint32_t CPUINFO_ABI cpuinfo_get_max_cache_size(void);
++ 
+++/**
+++ * Identify the logical processor that executes the current thread.
+++ *
+++ * There is no guarantee that the thread will stay on the same logical processor for any time.
+++ * Callers should treat the result as only a hint, and be prepared to handle NULL return value.
+++ */
++ const struct cpuinfo_processor* CPUINFO_ABI cpuinfo_get_current_processor(void);
+++
+++/**
+++ * Identify the core that executes the current thread.
+++ *
+++ * There is no guarantee that the thread will stay on the same core for any time.
+++ * Callers should treat the result as only a hint, and be prepared to handle NULL return value.
+++ */
++ const struct cpuinfo_core* CPUINFO_ABI cpuinfo_get_current_core(void);
++ 
+++/**
+++ * Identify the microarchitecture index of the core that executes the current thread.
+++ * If the system does not support such identification, the function return 0.
+++ *
+++ * There is no guarantee that the thread will stay on the same type of core for any time.
+++ * Callers should treat the result as only a hint.
+++ */
+++uint32_t CPUINFO_ABI cpuinfo_get_current_uarch_index(void);
+++
++ #ifdef __cplusplus
++ } /* extern "C" */
++ #endif
++diff --git a/src/api.c b/src/api.c
++index b180d80..0cc5d4e 100644
++--- a/src/api.c
+++++ b/src/api.c
++@@ -1,9 +1,16 @@
+++#include <stdbool.h>
++ #include <stddef.h>
++ 
++ #include <cpuinfo.h>
++ #include <cpuinfo/internal-api.h>
++ #include <cpuinfo/log.h>
++ 
+++#ifdef __linux__
+++	#include <linux/api.h>
+++
+++	#include <unistd.h>
+++	#include <sys/syscall.h>
+++#endif
++ 
++ bool cpuinfo_is_initialized = false;
++ 
++@@ -20,235 +27,347 @@ uint32_t cpuinfo_packages_count = 0;
++ uint32_t cpuinfo_cache_count[cpuinfo_cache_level_max] = { 0 };
++ uint32_t cpuinfo_max_cache_size = 0;
++ 
+++#if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
+++	struct cpuinfo_uarch_info* cpuinfo_uarchs = NULL;
+++	uint32_t cpuinfo_uarchs_count = 0;
+++#else
+++	struct cpuinfo_uarch_info cpuinfo_global_uarch = { cpuinfo_uarch_unknown };
+++#endif
+++
+++#ifdef __linux__
+++	uint32_t cpuinfo_linux_cpu_max = 0;
+++	const struct cpuinfo_processor** cpuinfo_linux_cpu_to_processor_map = NULL;
+++	const struct cpuinfo_core** cpuinfo_linux_cpu_to_core_map = NULL;
+++	#if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
+++		const uint32_t* cpuinfo_linux_cpu_to_uarch_index_map = NULL;
+++	#endif
+++#endif
+++
++ 
++ const struct cpuinfo_processor* cpuinfo_get_processors(void) {
++-	if (!cpuinfo_is_initialized) {
+++	if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) {
++ 		cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "processors");
++ 	}
++ 	return cpuinfo_processors;
++ }
++ 
++ const struct cpuinfo_core* cpuinfo_get_cores(void) {
++-	if (!cpuinfo_is_initialized) {
+++	if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) {
++ 		cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "core");
++ 	}
++ 	return cpuinfo_cores;
++ }
++ 
++ const struct cpuinfo_cluster* cpuinfo_get_clusters(void) {
++-	if (!cpuinfo_is_initialized) {
+++	if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) {
++ 		cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "clusters");
++ 	}
++ 	return cpuinfo_clusters;
++ }
++ 
++ const struct cpuinfo_package* cpuinfo_get_packages(void) {
++-	if (!cpuinfo_is_initialized) {
+++	if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) {
++ 		cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "packages");
++ 	}
++ 	return cpuinfo_packages;
++ }
++ 
++-const struct cpuinfo_processor* cpuinfo_get_processor(uint32_t index) {
+++const struct cpuinfo_uarch_info* cpuinfo_get_uarchs() {
++ 	if (!cpuinfo_is_initialized) {
+++		cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "uarchs");
+++	}
+++	#if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
+++		return cpuinfo_uarchs;
+++	#else
+++		return &cpuinfo_global_uarch;
+++	#endif
+++}
+++
+++const struct cpuinfo_processor* cpuinfo_get_processor(uint32_t index) {
+++	if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) {
++ 		cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "processor");
++ 	}
++-	if (index < cpuinfo_processors_count) {
++-		return cpuinfo_processors + index;
++-	} else {
+++	if CPUINFO_UNLIKELY(index >= cpuinfo_processors_count) {
++ 		return NULL;
++ 	}
+++	return &cpuinfo_processors[index];
++ }
++ 
++ const struct cpuinfo_core* cpuinfo_get_core(uint32_t index) {
++-	if (!cpuinfo_is_initialized) {
+++	if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) {
++ 		cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "core");
++ 	}
++-	if (index < cpuinfo_cores_count) {
++-		return cpuinfo_cores + index;
++-	} else {
+++	if CPUINFO_UNLIKELY(index >= cpuinfo_cores_count) {
++ 		return NULL;
++ 	}
+++	return &cpuinfo_cores[index];
++ }
++ 
++ const struct cpuinfo_cluster* cpuinfo_get_cluster(uint32_t index) {
++-	if (!cpuinfo_is_initialized) {
+++	if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) {
++ 		cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "cluster");
++ 	}
++-	if (index < cpuinfo_clusters_count) {
++-		return cpuinfo_clusters + index;
++-	} else {
+++	if CPUINFO_UNLIKELY(index >= cpuinfo_clusters_count) {
++ 		return NULL;
++ 	}
+++	return &cpuinfo_clusters[index];
++ }
++ 
++ const struct cpuinfo_package* cpuinfo_get_package(uint32_t index) {
++-	if (!cpuinfo_is_initialized) {
+++	if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) {
++ 		cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "package");
++ 	}
++-	if (index < cpuinfo_packages_count) {
++-		return cpuinfo_packages + index;
++-	} else {
+++	if CPUINFO_UNLIKELY(index >= cpuinfo_packages_count) {
++ 		return NULL;
++ 	}
+++	return &cpuinfo_packages[index];
++ }
++ 
++-uint32_t cpuinfo_get_processors_count(void) {
+++const struct cpuinfo_uarch_info* cpuinfo_get_uarch(uint32_t index) {
++ 	if (!cpuinfo_is_initialized) {
+++		cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "uarch");
+++	}
+++	#if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
+++		if CPUINFO_UNLIKELY(index >= cpuinfo_uarchs_count) {
+++			return NULL;
+++		}
+++		return &cpuinfo_uarchs[index];
+++	#else
+++		if CPUINFO_UNLIKELY(index != 0) {
+++			return NULL;
+++		}
+++		return &cpuinfo_global_uarch;
+++	#endif
+++}
+++
+++uint32_t cpuinfo_get_processors_count(void) {
+++	if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) {
++ 		cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "processors_count");
++ 	}
++ 	return cpuinfo_processors_count;
++ }
++ 
++ uint32_t cpuinfo_get_cores_count(void) {
++-	if (!cpuinfo_is_initialized) {
+++	if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) {
++ 		cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "cores_count");
++ 	}
++ 	return cpuinfo_cores_count;
++ }
++ 
++ uint32_t cpuinfo_get_clusters_count(void) {
++-	if (!cpuinfo_is_initialized) {
+++	if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) {
++ 		cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "clusters_count");
++ 	}
++ 	return cpuinfo_clusters_count;
++ }
++ 
++ uint32_t cpuinfo_get_packages_count(void) {
++-	if (!cpuinfo_is_initialized) {
+++	if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) {
++ 		cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "packages_count");
++ 	}
++ 	return cpuinfo_packages_count;
++ }
++ 
++-const struct cpuinfo_cache* CPUINFO_ABI cpuinfo_get_l1i_caches(void) {
+++uint32_t cpuinfo_get_uarchs_count(void) {
++ 	if (!cpuinfo_is_initialized) {
+++		cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "uarchs_count");
+++	}
+++	#if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
+++		return cpuinfo_uarchs_count;
+++	#else
+++		return 1;
+++	#endif
+++}
+++
+++const struct cpuinfo_cache* CPUINFO_ABI cpuinfo_get_l1i_caches(void) {
+++	if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) {
++ 		cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "l1i_caches");
++ 	}
++ 	return cpuinfo_cache[cpuinfo_cache_level_1i];
++ }
++ 
++ const struct cpuinfo_cache* CPUINFO_ABI cpuinfo_get_l1d_caches(void) {
++-	if (!cpuinfo_is_initialized) {
+++	if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) {
++ 		cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "l1d_caches");
++ 	}
++ 	return cpuinfo_cache[cpuinfo_cache_level_1d];
++ }
++ 
++ const struct cpuinfo_cache* CPUINFO_ABI cpuinfo_get_l2_caches(void) {
++-	if (!cpuinfo_is_initialized) {
+++	if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) {
++ 		cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "l2_caches");
++ 	}
++ 	return cpuinfo_cache[cpuinfo_cache_level_2];
++ }
++ 
++ const struct cpuinfo_cache* CPUINFO_ABI cpuinfo_get_l3_caches(void) {
++-	if (!cpuinfo_is_initialized) {
+++	if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) {
++ 		cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "l3_caches");
++ 	}
++ 	return cpuinfo_cache[cpuinfo_cache_level_3];
++ }
++ 
++ const struct cpuinfo_cache* CPUINFO_ABI cpuinfo_get_l4_caches(void) {
++-	if (!cpuinfo_is_initialized) {
+++	if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) {
++ 		cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "l4_caches");
++ 	}
++ 	return cpuinfo_cache[cpuinfo_cache_level_4];
++ }
++ 
++ const struct cpuinfo_cache* CPUINFO_ABI cpuinfo_get_l1i_cache(uint32_t index) {
++-	if (!cpuinfo_is_initialized) {
+++	if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) {
++ 		cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "l1i_cache");
++ 	}
++-	if (index < cpuinfo_cache_count[cpuinfo_cache_level_1i]) {
++-		return cpuinfo_cache[cpuinfo_cache_level_1i] + index;
++-	} else {
+++	if CPUINFO_UNLIKELY(index >= cpuinfo_cache_count[cpuinfo_cache_level_1i]) {
++ 		return NULL;
++ 	}
+++	return &cpuinfo_cache[cpuinfo_cache_level_1i][index];
++ }
++ 
++ const struct cpuinfo_cache* CPUINFO_ABI cpuinfo_get_l1d_cache(uint32_t index) {
++-	if (!cpuinfo_is_initialized) {
+++	if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) {
++ 		cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "l1d_cache");
++ 	}
++-	if (index < cpuinfo_cache_count[cpuinfo_cache_level_1d]) {
++-		return cpuinfo_cache[cpuinfo_cache_level_1d] + index;
++-	} else {
+++	if CPUINFO_UNLIKELY(index >= cpuinfo_cache_count[cpuinfo_cache_level_1d]) {
++ 		return NULL;
++ 	}
+++	return &cpuinfo_cache[cpuinfo_cache_level_1d][index];
++ }
++ 
++ const struct cpuinfo_cache* CPUINFO_ABI cpuinfo_get_l2_cache(uint32_t index) {
++-	if (!cpuinfo_is_initialized) {
+++	if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) {
++ 		cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "l2_cache");
++ 	}
++-	if (index < cpuinfo_cache_count[cpuinfo_cache_level_2]) {
++-		return cpuinfo_cache[cpuinfo_cache_level_2] + index;
++-	} else {
+++	if CPUINFO_UNLIKELY(index >= cpuinfo_cache_count[cpuinfo_cache_level_2]) {
++ 		return NULL;
++ 	}
+++	return &cpuinfo_cache[cpuinfo_cache_level_2][index];
++ }
++ 
++ const struct cpuinfo_cache* CPUINFO_ABI cpuinfo_get_l3_cache(uint32_t index) {
++-	if (!cpuinfo_is_initialized) {
+++	if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) {
++ 		cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "l3_cache");
++ 	}
++-	if (index < cpuinfo_cache_count[cpuinfo_cache_level_3]) {
++-		return cpuinfo_cache[cpuinfo_cache_level_3] + index;
++-	} else {
+++	if CPUINFO_UNLIKELY(index >= cpuinfo_cache_count[cpuinfo_cache_level_3]) {
++ 		return NULL;
++ 	}
+++	return &cpuinfo_cache[cpuinfo_cache_level_3][index];
++ }
++ 
++ const struct cpuinfo_cache* CPUINFO_ABI cpuinfo_get_l4_cache(uint32_t index) {
++-	if (!cpuinfo_is_initialized) {
+++	if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) {
++ 		cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "l4_cache");
++ 	}
++-	if (index < cpuinfo_cache_count[cpuinfo_cache_level_4]) {
++-		return cpuinfo_cache[cpuinfo_cache_level_4] + index;
++-	} else {
+++	if CPUINFO_UNLIKELY(index >= cpuinfo_cache_count[cpuinfo_cache_level_4]) {
++ 		return NULL;
++ 	}
+++	return &cpuinfo_cache[cpuinfo_cache_level_4][index];
++ }
++ 
++ uint32_t CPUINFO_ABI cpuinfo_get_l1i_caches_count(void) {
++-	if (!cpuinfo_is_initialized) {
+++	if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) {
++ 		cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "l1i_caches_count");
++ 	}
++ 	return cpuinfo_cache_count[cpuinfo_cache_level_1i];
++ }
++ 
++ uint32_t CPUINFO_ABI cpuinfo_get_l1d_caches_count(void) {
++-	if (!cpuinfo_is_initialized) {
+++	if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) {
++ 		cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "l1d_caches_count");
++ 	}
++ 	return cpuinfo_cache_count[cpuinfo_cache_level_1d];
++ }
++ 
++ uint32_t CPUINFO_ABI cpuinfo_get_l2_caches_count(void) {
++-	if (!cpuinfo_is_initialized) {
+++	if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) {
++ 		cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "l2_caches_count");
++ 	}
++ 	return cpuinfo_cache_count[cpuinfo_cache_level_2];
++ }
++ 
++ uint32_t CPUINFO_ABI cpuinfo_get_l3_caches_count(void) {
++-	if (!cpuinfo_is_initialized) {
+++	if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) {
++ 		cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "l3_caches_count");
++ 	}
++ 	return cpuinfo_cache_count[cpuinfo_cache_level_3];
++ }
++ 
++ uint32_t CPUINFO_ABI cpuinfo_get_l4_caches_count(void) {
++-	if (!cpuinfo_is_initialized) {
+++	if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) {
++ 		cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "l4_caches_count");
++ 	}
++ 	return cpuinfo_cache_count[cpuinfo_cache_level_4];
++ }
++ 
++ uint32_t CPUINFO_ABI cpuinfo_get_max_cache_size(void) {
++-	if (!cpuinfo_is_initialized) {
+++	if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) {
++ 		cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "max_cache_size");
++ 	}
++ 	return cpuinfo_max_cache_size;
++ }
+++
+++const struct cpuinfo_processor* CPUINFO_ABI cpuinfo_get_current_processor(void) {
+++	if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) {
+++		cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "current_processor");
+++	}
+++	#ifdef __linux__
+++		unsigned cpu;
+++		if CPUINFO_UNLIKELY(syscall(__NR_getcpu, &cpu, NULL, NULL) != 0) {
+++			return 0;
+++		}
+++		if CPUINFO_UNLIKELY((uint32_t) cpu >= cpuinfo_linux_cpu_max) {
+++			return 0;
+++		}
+++		return cpuinfo_linux_cpu_to_processor_map[cpu];
+++	#else
+++		return NULL;
+++	#endif
+++}
+++
+++const struct cpuinfo_core* CPUINFO_ABI cpuinfo_get_current_core(void) {
+++	if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) {
+++		cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "current_core");
+++	}
+++	#ifdef __linux__
+++		unsigned cpu;
+++		if CPUINFO_UNLIKELY(syscall(__NR_getcpu, &cpu, NULL, NULL) != 0) {
+++			return 0;
+++		}
+++		if CPUINFO_UNLIKELY((uint32_t) cpu >= cpuinfo_linux_cpu_max) {
+++			return 0;
+++		}
+++		return cpuinfo_linux_cpu_to_core_map[cpu];
+++	#else
+++		return NULL;
+++	#endif
+++}
+++
+++uint32_t CPUINFO_ABI cpuinfo_get_current_uarch_index(void) {
+++	if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) {
+++		cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "current_uarch_index");
+++	}
+++	#if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
+++		#ifdef __linux__
+++			if (cpuinfo_linux_cpu_to_uarch_index_map == NULL) {
+++				/* Special case: avoid syscall on systems with only a single type of cores */
+++				return 0;
+++			}
+++
+++			/* General case */
+++			unsigned cpu;
+++			if CPUINFO_UNLIKELY(syscall(__NR_getcpu, &cpu, NULL, NULL) != 0) {
+++				return 0;
+++			}
+++			if CPUINFO_UNLIKELY((uint32_t) cpu >= cpuinfo_linux_cpu_max) {
+++				return 0;
+++			}
+++			return cpuinfo_linux_cpu_to_uarch_index_map[cpu];
+++		#else
+++			/* Fallback: pretend to be on the big core. */
+++			return 0;
+++		#endif
+++	#else
+++		/* Only ARM/ARM64 processors may include cores of different types in the same package. */
+++		return 0;
+++	#endif
+++}
++diff --git a/src/arm/cache.c b/src/arm/cache.c
++index ccadeb4..c2bc7d2 100644
++--- a/src/arm/cache.c
+++++ b/src/arm/cache.c
++@@ -659,6 +659,7 @@ void cpuinfo_arm_decode_cache(
++ 				};
++ 			}
++ 			break;
+++		case cpuinfo_uarch_cortex_a55r0:
++ 		case cpuinfo_uarch_cortex_a55:
++ 			/*
++ 			 * ARM Cortex-A55 Core Technical Reference Manual
++diff --git a/src/arm/linux/api.h b/src/arm/linux/api.h
++index 275d072..f99da66 100644
++--- a/src/arm/linux/api.h
+++++ b/src/arm/linux/api.h
++@@ -153,6 +153,7 @@ struct cpuinfo_arm_linux_processor {
++ 	uint32_t midr;
++ 	enum cpuinfo_vendor vendor;
++ 	enum cpuinfo_uarch uarch;
+++	uint32_t uarch_index;
++ 	/**
++ 	 * ID of the physical package which includes this logical processor.
++ 	 * The value is parsed from /sys/devices/system/cpu/cpu<N>/topology/physical_package_id
++@@ -346,3 +347,6 @@ CPUINFO_INTERNAL uint32_t cpuinfo_arm_linux_detect_cluster_midr(
++ 	uint32_t max_processors,
++ 	uint32_t usable_processors,
++ 	struct cpuinfo_arm_linux_processor processors[restrict static max_processors]);
+++
+++extern CPUINFO_INTERNAL const uint32_t* cpuinfo_linux_cpu_to_uarch_index_map;
+++extern CPUINFO_INTERNAL uint32_t cpuinfo_linux_cpu_to_uarch_index_map_entries;
++diff --git a/src/arm/linux/init.c b/src/arm/linux/init.c
++index f0c432c..6272abf 100644
++--- a/src/arm/linux/init.c
+++++ b/src/arm/linux/init.c
++@@ -106,12 +106,14 @@ void cpuinfo_arm_linux_init(void) {
++ 	struct cpuinfo_processor* processors = NULL;
++ 	struct cpuinfo_core* cores = NULL;
++ 	struct cpuinfo_cluster* clusters = NULL;
++-	const struct cpuinfo_processor** linux_cpu_to_processor_map = NULL;
++-	const struct cpuinfo_core** linux_cpu_to_core_map = NULL;
+++	struct cpuinfo_uarch_info* uarchs = NULL;
++ 	struct cpuinfo_cache* l1i = NULL;
++ 	struct cpuinfo_cache* l1d = NULL;
++ 	struct cpuinfo_cache* l2 = NULL;
++ 	struct cpuinfo_cache* l3 = NULL;
+++	const struct cpuinfo_processor** linux_cpu_to_processor_map = NULL;
+++	const struct cpuinfo_core** linux_cpu_to_core_map = NULL;
+++	uint32_t* linux_cpu_to_uarch_index_map = NULL;
++ 
++ 	const uint32_t max_processors_count = cpuinfo_linux_get_max_processors_count();
++ 	cpuinfo_log_debug("system maximum processors count: %"PRIu32, max_processors_count);
++@@ -400,6 +402,18 @@ void cpuinfo_arm_linux_init(void) {
++ 		}
++ 	}
++ 
+++	uint32_t uarchs_count = 0;
+++	enum cpuinfo_uarch last_uarch;
+++	for (uint32_t i = 0; i < arm_linux_processors_count; i++) {
+++		if (bitmask_all(arm_linux_processors[i].flags, CPUINFO_LINUX_FLAG_VALID)) {
+++			if (uarchs_count == 0 || arm_linux_processors[i].uarch != last_uarch) {
+++				last_uarch = arm_linux_processors[i].uarch;
+++				uarchs_count += 1;
+++			}
+++			arm_linux_processors[i].uarch_index = uarchs_count - 1;
+++		}
+++	}
+++
++ 	/*
++ 	 * Assumptions:
++ 	 * - No SMP (i.e. each core supports only one hardware thread).
++@@ -432,6 +446,13 @@ void cpuinfo_arm_linux_init(void) {
++ 		goto cleanup;
++ 	}
++ 
+++	uarchs = calloc(uarchs_count, sizeof(struct cpuinfo_uarch_info));
+++	if (uarchs == NULL) {
+++		cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" microarchitectures",
+++			uarchs_count * sizeof(struct cpuinfo_uarch_info), uarchs_count);
+++		goto cleanup;
+++	}
+++
++ 	linux_cpu_to_processor_map = calloc(arm_linux_processors_count, sizeof(struct cpuinfo_processor*));
++ 	if (linux_cpu_to_processor_map == NULL) {
++ 		cpuinfo_log_error("failed to allocate %zu bytes for %"PRIu32" logical processor mapping entries",
++@@ -446,6 +467,15 @@ void cpuinfo_arm_linux_init(void) {
++ 		goto cleanup;
++ 	}
++ 
+++	if (uarchs_count > 1) {
+++		linux_cpu_to_uarch_index_map = calloc(arm_linux_processors_count, sizeof(uint32_t));
+++		if (linux_cpu_to_uarch_index_map == NULL) {
+++			cpuinfo_log_error("failed to allocate %zu bytes for %"PRIu32" uarch index mapping entries",
+++				arm_linux_processors_count * sizeof(uint32_t), arm_linux_processors_count);
+++			goto cleanup;
+++		}
+++	}
+++
++ 	l1i = calloc(valid_processors, sizeof(struct cpuinfo_cache));
++ 	if (l1i == NULL) {
++ 		cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" L1I caches",
++@@ -460,6 +490,22 @@ void cpuinfo_arm_linux_init(void) {
++ 		goto cleanup;
++ 	}
++ 
+++	uint32_t uarchs_index = 0;
+++	for (uint32_t i = 0; i < arm_linux_processors_count; i++) {
+++		if (bitmask_all(arm_linux_processors[i].flags, CPUINFO_LINUX_FLAG_VALID)) {
+++			if (uarchs_index == 0 || arm_linux_processors[i].uarch != last_uarch) {
+++				last_uarch = arm_linux_processors[i].uarch;
+++				uarchs[uarchs_index] = (struct cpuinfo_uarch_info) {
+++					.uarch = arm_linux_processors[i].uarch,
+++					.midr = arm_linux_processors[i].midr,
+++				};
+++				uarchs_index += 1;
+++			}
+++			uarchs[uarchs_index - 1].processor_count += 1;
+++			uarchs[uarchs_index - 1].core_count += 1;
+++		}
+++	}
+++
++ 	uint32_t l2_count = 0, l3_count = 0, big_l3_size = 0, cluster_id = UINT32_MAX;
++ 	/* Indication whether L3 (if it exists) is shared between all cores */
++ 	bool shared_l3 = true;
++@@ -499,6 +545,11 @@ void cpuinfo_arm_linux_init(void) {
++ 		cores[i].midr = arm_linux_processors[i].midr;
++ 		linux_cpu_to_core_map[arm_linux_processors[i].system_processor_id] = &cores[i];
++ 
+++		if (linux_cpu_to_uarch_index_map != NULL) {
+++			linux_cpu_to_uarch_index_map[arm_linux_processors[i].system_processor_id] =
+++				arm_linux_processors[i].uarch_index;
+++		}
+++
++ 		struct cpuinfo_cache temp_l2 = { 0 }, temp_l3 = { 0 };
++ 		cpuinfo_arm_decode_cache(
++ 			arm_linux_processors[i].uarch,
++@@ -658,12 +709,11 @@ void cpuinfo_arm_linux_init(void) {
++ 	}
++ 
++ 	/* Commit */
++-	cpuinfo_linux_cpu_to_processor_map = linux_cpu_to_processor_map;
++-	cpuinfo_linux_cpu_to_core_map = linux_cpu_to_core_map;
++ 	cpuinfo_processors = processors;
++ 	cpuinfo_cores = cores;
++ 	cpuinfo_clusters = clusters;
++ 	cpuinfo_packages = &package;
+++	cpuinfo_uarchs = uarchs;
++ 	cpuinfo_cache[cpuinfo_cache_level_1i] = l1i;
++ 	cpuinfo_cache[cpuinfo_cache_level_1d] = l1d;
++ 	cpuinfo_cache[cpuinfo_cache_level_2]  = l2;
++@@ -673,33 +723,42 @@ void cpuinfo_arm_linux_init(void) {
++ 	cpuinfo_cores_count = valid_processors;
++ 	cpuinfo_clusters_count = cluster_count;
++ 	cpuinfo_packages_count = 1;
+++	cpuinfo_uarchs_count = uarchs_count;
++ 	cpuinfo_cache_count[cpuinfo_cache_level_1i] = valid_processors;
++ 	cpuinfo_cache_count[cpuinfo_cache_level_1d] = valid_processors;
++ 	cpuinfo_cache_count[cpuinfo_cache_level_2]  = l2_count;
++ 	cpuinfo_cache_count[cpuinfo_cache_level_3]  = l3_count;
++-
++ 	cpuinfo_max_cache_size = cpuinfo_arm_compute_max_cache_size(&processors[0]);
++ 
+++	cpuinfo_linux_cpu_max = arm_linux_processors_count;
+++	cpuinfo_linux_cpu_to_processor_map = linux_cpu_to_processor_map;
+++	cpuinfo_linux_cpu_to_core_map = linux_cpu_to_core_map;
+++	cpuinfo_linux_cpu_to_uarch_index_map = linux_cpu_to_uarch_index_map;
+++
++ 	__sync_synchronize();
++ 
++ 	cpuinfo_is_initialized = true;
++ 
++-	linux_cpu_to_processor_map = NULL;
++-	linux_cpu_to_core_map = NULL;
++ 	processors = NULL;
++ 	cores = NULL;
++ 	clusters = NULL;
+++	uarchs = NULL;
++ 	l1i = l1d = l2 = l3 = NULL;
+++	linux_cpu_to_processor_map = NULL;
+++	linux_cpu_to_core_map = NULL;
+++	linux_cpu_to_uarch_index_map = NULL;
++ 
++ cleanup:
++ 	free(arm_linux_processors);
++-	free(linux_cpu_to_processor_map);
++-	free(linux_cpu_to_core_map);
++ 	free(processors);
++ 	free(cores);
++ 	free(clusters);
+++	free(uarchs);
++ 	free(l1i);
++ 	free(l1d);
++ 	free(l2);
++ 	free(l3);
+++	free(linux_cpu_to_processor_map);
+++	free(linux_cpu_to_core_map);
+++	free(linux_cpu_to_uarch_index_map);
++ }
++diff --git a/src/arm/mach/init.c b/src/arm/mach/init.c
++index e64cc18..bd27259 100644
++--- a/src/arm/mach/init.c
+++++ b/src/arm/mach/init.c
++@@ -14,6 +14,16 @@
++ #include <cpuinfo/internal-api.h>
++ #include <cpuinfo/log.h>
++ 
+++/* Polyfill recent CPUFAMILY_ARM_* values for older SDKs */
+++#ifndef CPUFAMILY_ARM_MONSOON_MISTRAL
+++	#define CPUFAMILY_ARM_MONSOON_MISTRAL   0xE81E7EF6
+++#endif
+++#ifndef CPUFAMILY_ARM_VORTEX_TEMPEST
+++	#define CPUFAMILY_ARM_VORTEX_TEMPEST    0x07D34B9F
+++#endif
+++#ifndef CPUFAMILY_ARM_LIGHTNING_THUNDER
+++	#define CPUFAMILY_ARM_LIGHTNING_THUNDER 0x462504D2
+++#endif
++ 
++ struct cpuinfo_arm_isa cpuinfo_isa = {
++ #if CPUINFO_ARCH_ARM
++@@ -82,37 +92,34 @@ static enum cpuinfo_uarch decode_uarch(uint32_t cpu_family, uint32_t cpu_subtype
++ 			return cpuinfo_uarch_twister;
++ 		case CPUFAMILY_ARM_HURRICANE:
++ 			return cpuinfo_uarch_hurricane;
++-#ifdef CPUFAMILY_ARM_MONSOON_MISTRAL
++ 		case CPUFAMILY_ARM_MONSOON_MISTRAL:
++-#else
++-		case 0xe81e7ef6:
++-			/* Hard-coded value for older SDKs which do not define CPUFAMILY_ARM_MONSOON_MISTRAL */
++-#endif
++ 			/* 2x Monsoon + 4x Mistral cores */
++ 			return core_index < 2 ? cpuinfo_uarch_monsoon : cpuinfo_uarch_mistral;
++-#ifdef CPUFAMILY_ARM_VORTEX_TEMPEST
++ 		case CPUFAMILY_ARM_VORTEX_TEMPEST:
++-#else
++-		case 0x07d34b9f:
++-			/* Hard-coded value for older SDKs which do not define CPUFAMILY_ARM_VORTEX_TEMPEST */
++-#endif
++ 			/* Hexa-core: 2x Vortex + 4x Tempest; Octa-core: 4x Cortex + 4x Tempest */
++ 			return core_index + 4 < core_count ? cpuinfo_uarch_vortex : cpuinfo_uarch_tempest;
+++		case CPUFAMILY_ARM_LIGHTNING_THUNDER:
+++			/* Hexa-core: 2x Lightning + 4x Thunder; Octa-core (presumed): 4x Lightning + 4x Thunder */
+++			return core_index + 4 < core_count ? cpuinfo_uarch_lightning : cpuinfo_uarch_thunder;
++ 		default:
++ 			/* Use hw.cpusubtype for detection */
++ 			break;
++ 	}
++ 
++-	switch (cpu_subtype) {
++-		case CPU_SUBTYPE_ARM_V7:
++-			return cpuinfo_uarch_cortex_a8;
++-		case CPU_SUBTYPE_ARM_V7F:
++-			return cpuinfo_uarch_cortex_a9;
++-		case CPU_SUBTYPE_ARM_V7K:
++-			return cpuinfo_uarch_cortex_a7;
++-		default:
++-			return cpuinfo_uarch_unknown;
++-	}
+++	#if CPUINFO_ARCH_ARM
+++		switch (cpu_subtype) {
+++			case CPU_SUBTYPE_ARM_V7:
+++				return cpuinfo_uarch_cortex_a8;
+++			case CPU_SUBTYPE_ARM_V7F:
+++				return cpuinfo_uarch_cortex_a9;
+++			case CPU_SUBTYPE_ARM_V7K:
+++				return cpuinfo_uarch_cortex_a7;
+++			default:
+++				return cpuinfo_uarch_unknown;
+++		}
+++	#else
+++		return cpuinfo_uarch_unknown;
+++	#endif
++ }
++ 
++ static void decode_package_name(char* package_name) {
++@@ -244,6 +251,7 @@ void cpuinfo_arm_mach_init(void) {
++ 	struct cpuinfo_core* cores = NULL;
++ 	struct cpuinfo_cluster* clusters = NULL;
++ 	struct cpuinfo_package* packages = NULL;
+++	struct cpuinfo_uarch_info* uarchs = NULL;
++ 	struct cpuinfo_cache* l1i = NULL;
++ 	struct cpuinfo_cache* l1d = NULL;
++ 	struct cpuinfo_cache* l2 = NULL;
++@@ -330,21 +338,12 @@ void cpuinfo_arm_mach_init(void) {
++ 	 * Thus, we whitelist CPUs known to support these instructions.
++ 	 */
++ 	switch (cpu_family) {
++-#ifdef CPUFAMILY_ARM_MONSOON_MISTRAL
++ 		case CPUFAMILY_ARM_MONSOON_MISTRAL:
++-#else
++-		case 0xe81e7ef6:
++-			/* Hard-coded value for older SDKs which do not define CPUFAMILY_ARM_MONSOON_MISTRAL */
++-#endif
++-#ifdef CPUFAMILY_ARM_VORTEX_TEMPEST
++ 		case CPUFAMILY_ARM_VORTEX_TEMPEST:
++-#else
++-		case 0x07d34b9f:
++-			/* Hard-coded value for older SDKs which do not define CPUFAMILY_ARM_VORTEX_TEMPEST */
++-#endif
++-#if CPUINFO_ARCH_ARM64
++-			cpuinfo_isa.atomics = true;
++-#endif
+++		case CPUFAMILY_ARM_LIGHTNING_THUNDER:
+++			#if CPUINFO_ARCH_ARM64
+++				cpuinfo_isa.atomics = true;
+++			#endif
++ 			cpuinfo_isa.fp16arith = true;
++ 	}
++ 
++@@ -379,10 +378,22 @@ void cpuinfo_arm_mach_init(void) {
++ 			num_clusters * sizeof(struct cpuinfo_cluster), num_clusters);
++ 		goto cleanup;
++ 	}
+++	uarchs = calloc(num_clusters, sizeof(struct cpuinfo_uarch_info));
+++	if (uarchs == NULL) {
+++		cpuinfo_log_error(
+++			"failed to allocate %zu bytes for descriptions of %"PRIu32" uarchs",
+++			num_clusters * sizeof(enum cpuinfo_uarch), num_clusters);
+++		goto cleanup;
+++	}
++ 	uint32_t cluster_idx = UINT32_MAX;
++ 	for (uint32_t i = 0; i < mach_topology.cores; i++) {
++ 		if (i == 0 || cores[i].uarch != cores[i - 1].uarch) {
++ 			cluster_idx++;
+++			uarchs[cluster_idx] = (struct cpuinfo_uarch_info) {
+++				.uarch = cores[i].uarch,
+++				.processor_count = 1,
+++				.core_count = 1,
+++			};
++ 			clusters[cluster_idx] = (struct cpuinfo_cluster) {
++ 				.processor_start = i * threads_per_core,
++ 				.processor_count = 1,
++@@ -394,6 +405,8 @@ void cpuinfo_arm_mach_init(void) {
++ 				.uarch = cores[i].uarch,
++ 			};
++ 		} else {
+++			uarchs[cluster_idx].processor_count++;
+++			uarchs[cluster_idx].core_count++;
++ 			clusters[cluster_idx].processor_count++;
++ 			clusters[cluster_idx].core_count++;
++ 		}
++@@ -542,26 +555,25 @@ void cpuinfo_arm_mach_init(void) {
++ 	}
++ 
++ 	/* Commit changes */
++-	cpuinfo_cache[cpuinfo_cache_level_1i] = l1i;
++-	cpuinfo_cache[cpuinfo_cache_level_1d] = l1d;
++-	cpuinfo_cache[cpuinfo_cache_level_2]  = l2;
++-	cpuinfo_cache[cpuinfo_cache_level_3]  = l3;
++-
++ 	cpuinfo_processors = processors;
++ 	cpuinfo_cores = cores;
++ 	cpuinfo_clusters = clusters;
++ 	cpuinfo_packages = packages;
++-
++-	cpuinfo_cache_count[cpuinfo_cache_level_1i] = l1_count;
++-	cpuinfo_cache_count[cpuinfo_cache_level_1d] = l1_count;
++-	cpuinfo_cache_count[cpuinfo_cache_level_2]  = l2_count;
++-	cpuinfo_cache_count[cpuinfo_cache_level_3]  = l3_count;
+++	cpuinfo_uarchs = uarchs;
+++	cpuinfo_cache[cpuinfo_cache_level_1i] = l1i;
+++	cpuinfo_cache[cpuinfo_cache_level_1d] = l1d;
+++	cpuinfo_cache[cpuinfo_cache_level_2]  = l2;
+++	cpuinfo_cache[cpuinfo_cache_level_3]  = l3;
++ 
++ 	cpuinfo_processors_count = mach_topology.threads;
++ 	cpuinfo_cores_count = mach_topology.cores;
++ 	cpuinfo_clusters_count = num_clusters;
++ 	cpuinfo_packages_count = mach_topology.packages;
++-
+++	cpuinfo_uarchs_count = num_clusters;
+++	cpuinfo_cache_count[cpuinfo_cache_level_1i] = l1_count;
+++	cpuinfo_cache_count[cpuinfo_cache_level_1d] = l1_count;
+++	cpuinfo_cache_count[cpuinfo_cache_level_2]  = l2_count;
+++	cpuinfo_cache_count[cpuinfo_cache_level_3]  = l3_count;
++ 	cpuinfo_max_cache_size = cpuinfo_compute_max_cache_size(&processors[0]);
++ 
++ 	__sync_synchronize();
++@@ -572,6 +584,7 @@ void cpuinfo_arm_mach_init(void) {
++ 	cores = NULL;
++ 	clusters = NULL;
++ 	packages = NULL;
+++	uarchs = NULL;
++ 	l1i = l1d = l2 = l3 = NULL;
++ 
++ cleanup:
++@@ -579,6 +592,7 @@ cleanup:
++ 	free(cores);
++ 	free(clusters);
++ 	free(packages);
+++	free(uarchs);
++ 	free(l1i);
++ 	free(l1d);
++ 	free(l2);
++diff --git a/src/arm/uarch.c b/src/arm/uarch.c
++index a38250a..2aef9e7 100644
++--- a/src/arm/uarch.c
+++++ b/src/arm/uarch.c
++@@ -58,7 +58,9 @@ void cpuinfo_arm_decode_vendor_uarch(
++ 					*uarch = cpuinfo_uarch_cortex_a35;
++ 					break;
++ 				case 0xD05:
++-					*uarch = cpuinfo_uarch_cortex_a55;
+++					// Note: use Variant, not Revision, field
+++					*uarch = (midr & CPUINFO_ARM_MIDR_VARIANT_MASK) == 0 ?
+++						cpuinfo_uarch_cortex_a55r0 : cpuinfo_uarch_cortex_a55;
++ 					break;
++ 				case 0xD06:
++ 					*uarch = cpuinfo_uarch_cortex_a65;
++@@ -257,9 +259,9 @@ void cpuinfo_arm_decode_vendor_uarch(
++ 					*vendor = cpuinfo_vendor_arm;
++ 					*uarch = cpuinfo_uarch_cortex_a75;
++ 					break;
++-				case 0x803: /* Low-power Kryo 385 "Silver" -> Cortex-A55 */
+++				case 0x803: /* Low-power Kryo 385 "Silver" -> Cortex-A55r0 */
++ 					*vendor = cpuinfo_vendor_arm;
++-					*uarch = cpuinfo_uarch_cortex_a55;
+++					*uarch = cpuinfo_uarch_cortex_a55r0;
++ 					break;
++ 				case 0x804: /* High-performance Kryo 485 "Gold" / "Gold Prime" -> Cortex-A76 */
++ 					*vendor = cpuinfo_vendor_arm;
++diff --git a/src/cpuinfo/common.h b/src/cpuinfo/common.h
++index 6ba746e..b2b404d 100644
++--- a/src/cpuinfo/common.h
+++++ b/src/cpuinfo/common.h
++@@ -12,29 +12,29 @@
++ #define CPUINFO_COUNT_OF(array) (sizeof(array) / sizeof(0[array]))
++ 
++ #if defined(__GNUC__)
++-  #define CPUINFO_LIKELY(condition) (__builtin_expect(!!(condition), 1))
++-  #define CPUINFO_UNLIKELY(condition) (__builtin_expect(!!(condition), 0))
+++	#define CPUINFO_LIKELY(condition) (__builtin_expect(!!(condition), 1))
+++	#define CPUINFO_UNLIKELY(condition) (__builtin_expect(!!(condition), 0))
++ #else
++-  #define CPUINFO_LIKELY(condition) (!!(condition))
++-  #define CPUINFO_UNLIKELY(condition) (!!(condition))
+++	#define CPUINFO_LIKELY(condition) (!!(condition))
+++	#define CPUINFO_UNLIKELY(condition) (!!(condition))
++ #endif
++ 
++ #ifndef CPUINFO_INTERNAL
++-  #if defined(__ELF__)
++-    #define CPUINFO_INTERNAL __attribute__((__visibility__("internal")))
++-  #elif defined(__MACH__)
++-    #define CPUINFO_INTERNAL __attribute__((__visibility__("hidden")))
++-  #else
++-    #define CPUINFO_INTERNAL
++-  #endif
+++	#if defined(__ELF__)
+++		#define CPUINFO_INTERNAL __attribute__((__visibility__("internal")))
+++	#elif defined(__MACH__)
+++		#define CPUINFO_INTERNAL __attribute__((__visibility__("hidden")))
+++	#else
+++		#define CPUINFO_INTERNAL
+++	#endif
++ #endif
++ 
++ #ifndef CPUINFO_PRIVATE
++-  #if defined(__ELF__)
++-    #define CPUINFO_PRIVATE __attribute__((__visibility__("hidden")))
++-  #elif defined(__MACH__)
++-    #define CPUINFO_PRIVATE __attribute__((__visibility__("hidden")))
++-  #else
++-    #define CPUINFO_PRIVATE
++-  #endif
+++	#if defined(__ELF__)
+++		#define CPUINFO_PRIVATE __attribute__((__visibility__("hidden")))
+++	#elif defined(__MACH__)
+++		#define CPUINFO_PRIVATE __attribute__((__visibility__("hidden")))
+++	#else
+++		#define CPUINFO_PRIVATE
+++	#endif
++ #endif
++diff --git a/src/cpuinfo/internal-api.h b/src/cpuinfo/internal-api.h
++index f12c48d..c6eed0b 100644
++--- a/src/cpuinfo/internal-api.h
+++++ b/src/cpuinfo/internal-api.h
++@@ -21,11 +21,13 @@ enum cpuinfo_cache_level {
++ };
++ 
++ extern CPUINFO_INTERNAL bool cpuinfo_is_initialized;
+++
++ extern CPUINFO_INTERNAL struct cpuinfo_processor* cpuinfo_processors;
++ extern CPUINFO_INTERNAL struct cpuinfo_core* cpuinfo_cores;
++ extern CPUINFO_INTERNAL struct cpuinfo_cluster* cpuinfo_clusters;
++ extern CPUINFO_INTERNAL struct cpuinfo_package* cpuinfo_packages;
++ extern CPUINFO_INTERNAL struct cpuinfo_cache* cpuinfo_cache[cpuinfo_cache_level_max];
+++
++ extern CPUINFO_INTERNAL uint32_t cpuinfo_processors_count;
++ extern CPUINFO_INTERNAL uint32_t cpuinfo_cores_count;
++ extern CPUINFO_INTERNAL uint32_t cpuinfo_clusters_count;
++@@ -33,6 +35,19 @@ extern CPUINFO_INTERNAL uint32_t cpuinfo_packages_count;
++ extern CPUINFO_INTERNAL uint32_t cpuinfo_cache_count[cpuinfo_cache_level_max];
++ extern CPUINFO_INTERNAL uint32_t cpuinfo_max_cache_size;
++ 
+++#if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
+++	extern CPUINFO_INTERNAL struct cpuinfo_uarch_info* cpuinfo_uarchs;
+++	extern CPUINFO_INTERNAL uint32_t cpuinfo_uarchs_count;
+++#else
+++	extern CPUINFO_INTERNAL struct cpuinfo_uarch_info cpuinfo_global_uarch;
+++#endif
+++
+++#ifdef __linux__
+++	extern CPUINFO_INTERNAL uint32_t cpuinfo_linux_cpu_max;
+++	extern CPUINFO_INTERNAL const struct cpuinfo_processor** cpuinfo_linux_cpu_to_processor_map;
+++	extern CPUINFO_INTERNAL const struct cpuinfo_core** cpuinfo_linux_cpu_to_core_map;
+++#endif
+++
++ CPUINFO_PRIVATE void cpuinfo_x86_mach_init(void);
++ CPUINFO_PRIVATE void cpuinfo_x86_linux_init(void);
++ #ifdef _WIN32
++diff --git a/src/linux/current.c b/src/linux/current.c
++deleted file mode 100644
++index 472a4c9..0000000
++--- a/src/linux/current.c
+++++ /dev/null
++@@ -1,41 +0,0 @@
++-#include <stdbool.h>
++-#include <stdint.h>
++-#include <stdlib.h>
++-#include <string.h>
++-#include <errno.h>
++-
++-#include <sched.h>
++-
++-#include <cpuinfo.h>
++-#include <cpuinfo/internal-api.h>
++-#include <cpuinfo/log.h>
++-#include <linux/api.h>
++-
++-
++-const struct cpuinfo_processor** cpuinfo_linux_cpu_to_processor_map = NULL;
++-const struct cpuinfo_core** cpuinfo_linux_cpu_to_core_map = NULL;
++-
++-
++-const struct cpuinfo_processor* CPUINFO_ABI cpuinfo_get_current_processor(void) {
++-	if (!cpuinfo_is_initialized) {
++-		cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "current_processor");
++-	}
++-	const int cpu = sched_getcpu();
++-	if (cpu >= 0) {
++-		return cpuinfo_linux_cpu_to_processor_map[cpu];
++-	} else {
++-		return &cpuinfo_processors[0];
++-	}
++-}
++-
++-const struct cpuinfo_core* CPUINFO_ABI cpuinfo_get_current_core(void) {
++-	if (!cpuinfo_is_initialized) {
++-		cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "current_core");
++-	}
++-	const int cpu = sched_getcpu();
++-	if (cpu >= 0) {
++-		return cpuinfo_linux_cpu_to_core_map[cpu];
++-	} else {
++-		return &cpuinfo_cores[0];
++-	}
++-}
++diff --git a/src/x86/api.h b/src/x86/api.h
++index 5f5e76d..213c2d8 100644
++--- a/src/x86/api.h
+++++ b/src/x86/api.h
++@@ -93,7 +93,6 @@ CPUINFO_INTERNAL struct cpuinfo_x86_isa cpuinfo_x86_detect_isa(
++ 	const struct cpuid_regs basic_info, const struct cpuid_regs extended_info,
++ 	uint32_t max_base_index, uint32_t max_extended_index,
++ 	enum cpuinfo_vendor vendor, enum cpuinfo_uarch uarch);
++-CPUINFO_INTERNAL struct cpuinfo_x86_isa cpuinfo_x86_nacl_detect_isa(void);
++ 
++ CPUINFO_INTERNAL void cpuinfo_x86_detect_topology(
++ 	uint32_t max_base_index,
++diff --git a/src/x86/cache/init.c b/src/x86/cache/init.c
++index d581016..dd1f1ea 100644
++--- a/src/x86/cache/init.c
+++++ b/src/x86/cache/init.c
++@@ -65,7 +65,7 @@ iterate_descriptors:
++ 			}
++ 		}
++ 
++-		if (vendor != cpuinfo_vendor_amd && max_base_index >= 4) {
+++		if (vendor != cpuinfo_vendor_amd && vendor != cpuinfo_vendor_hygon && max_base_index >= 4) {
++ 			struct cpuid_regs leaf4;
++ 			uint32_t input_ecx = 0;
++ 			uint32_t package_cores_max = 0;
++diff --git a/src/x86/cpuid.h b/src/x86/cpuid.h
++index 829ec21..9e9e013 100644
++--- a/src/x86/cpuid.h
+++++ b/src/x86/cpuid.h
++@@ -67,18 +67,13 @@
++ 	}
++ #endif
++ 
++-/*
++- * This instruction may be not supported by Native Client validator,
++- * make sure it doesn't appear in the binary
++- */
++-#ifndef __native_client__
++-	static inline uint64_t xgetbv(uint32_t ext_ctrl_reg) {
++-		#ifdef _MSC_VER
++-			return (uint64_t)_xgetbv((unsigned int)ext_ctrl_reg);
++-		#else
++-			uint32_t lo, hi;
++-			__asm__(".byte 0x0F, 0x01, 0xD0" : "=a" (lo), "=d" (hi) : "c" (ext_ctrl_reg));
++-			return ((uint64_t) hi << 32) | (uint64_t) lo;
++-		#endif
++-	}
++-#endif
+++static inline uint64_t xgetbv(uint32_t ext_ctrl_reg) {
+++	#ifdef _MSC_VER
+++		return (uint64_t)_xgetbv((unsigned int)ext_ctrl_reg);
+++	#else
+++		uint32_t lo, hi;
+++		__asm__(".byte 0x0F, 0x01, 0xD0" : "=a" (lo), "=d" (hi) : "c" (ext_ctrl_reg));
+++		return ((uint64_t) hi << 32) | (uint64_t) lo;
+++	#endif
+++}
+++
++diff --git a/src/x86/init.c b/src/x86/init.c
++index d736578..244359c 100644
++--- a/src/x86/init.c
+++++ b/src/x86/init.c
++@@ -61,12 +61,8 @@ void cpuinfo_x86_init_processor(struct cpuinfo_x86_processor* processor) {
++ 
++ 		cpuinfo_x86_detect_topology(max_base_index, max_extended_index, leaf1, &processor->topology);
++ 
++-		#ifdef __native_client__
++-			cpuinfo_isa = cpuinfo_x86_nacl_detect_isa();			
++-		#else
++-			cpuinfo_isa = cpuinfo_x86_detect_isa(leaf1, leaf0x80000001,
++-				max_base_index, max_extended_index, vendor, uarch);
++-		#endif
+++		cpuinfo_isa = cpuinfo_x86_detect_isa(leaf1, leaf0x80000001,
+++			max_base_index, max_extended_index, vendor, uarch);
++ 	}
++ 	if (max_extended_index >= UINT32_C(0x80000004)) {
++ 		struct cpuid_regs brand_string[3];
++diff --git a/src/x86/isa.c b/src/x86/isa.c
++index d27dbca..f2e5a28 100644
++--- a/src/x86/isa.c
+++++ b/src/x86/isa.c
++@@ -244,6 +244,7 @@ struct cpuinfo_x86_isa cpuinfo_x86_detect_isa(
++ 			 */
++ 			break;
++ 		case cpuinfo_vendor_amd:
+++		case cpuinfo_vendor_hygon:
++ 			isa.prefetch = !!((extended_info.ecx & UINT32_C(0x00000100)) | (extended_info.edx & UINT32_C(0xE0000000)));
++ 			break;
++ 		default:
++@@ -265,6 +266,7 @@ struct cpuinfo_x86_isa cpuinfo_x86_detect_isa(
++ 	 */
++ 	switch (vendor) {
++ 		case cpuinfo_vendor_amd:
+++		case cpuinfo_vendor_hygon:
++ 			isa.prefetchw = !!((extended_info.ecx & UINT32_C(0x00000100)) | (extended_info.edx & UINT32_C(0xE0000000)));
++ 			break;
++ 		default:
++diff --git a/src/x86/linux/init.c b/src/x86/linux/init.c
++index c096336..f565789 100644
++--- a/src/x86/linux/init.c
+++++ b/src/x86/linux/init.c
++@@ -569,9 +569,6 @@ void cpuinfo_x86_linux_init(void) {
++ 	}
++ 
++ 	/* Commit changes */
++-	cpuinfo_linux_cpu_to_processor_map = linux_cpu_to_processor_map;
++-	cpuinfo_linux_cpu_to_core_map = linux_cpu_to_core_map;
++-
++ 	cpuinfo_processors = processors;
++ 	cpuinfo_cores = cores;
++ 	cpuinfo_clusters = clusters;
++@@ -591,24 +588,32 @@ void cpuinfo_x86_linux_init(void) {
++ 	cpuinfo_cache_count[cpuinfo_cache_level_2]  = l2_count;
++ 	cpuinfo_cache_count[cpuinfo_cache_level_3]  = l3_count;
++ 	cpuinfo_cache_count[cpuinfo_cache_level_4]  = l4_count;
++-
++ 	cpuinfo_max_cache_size = cpuinfo_compute_max_cache_size(&processors[0]);
++ 
+++	cpuinfo_global_uarch = (struct cpuinfo_uarch_info) {
+++		.uarch = x86_processor.uarch,
+++		.cpuid = x86_processor.cpuid,
+++		.processor_count = processors_count,
+++		.core_count = cores_count,
+++	};
+++
+++	cpuinfo_linux_cpu_max = x86_linux_processors_count;
+++	cpuinfo_linux_cpu_to_processor_map = linux_cpu_to_processor_map;
+++	cpuinfo_linux_cpu_to_core_map = linux_cpu_to_core_map;
+++
++ 	__sync_synchronize();
++ 
++ 	cpuinfo_is_initialized = true;
++ 
++-	linux_cpu_to_processor_map = NULL;
++-	linux_cpu_to_core_map = NULL;
++ 	processors = NULL;
++ 	cores = NULL;
++ 	clusters = NULL;
++ 	packages = NULL;
++ 	l1i = l1d = l2 = l3 = l4 = NULL;
+++	linux_cpu_to_processor_map = NULL;
+++	linux_cpu_to_core_map = NULL;
++ 
++ cleanup:
++-	free(linux_cpu_to_processor_map);
++-	free(linux_cpu_to_core_map);
++ 	free(x86_linux_processors);
++ 	free(processors);
++ 	free(cores);
++@@ -619,4 +624,6 @@ cleanup:
++ 	free(l2);
++ 	free(l3);
++ 	free(l4);
+++	free(linux_cpu_to_processor_map);
+++	free(linux_cpu_to_core_map);
++ }
++diff --git a/src/x86/mach/init.c b/src/x86/mach/init.c
++index ae2be33..b44d3ad 100644
++--- a/src/x86/mach/init.c
+++++ b/src/x86/mach/init.c
++@@ -305,30 +305,34 @@ void cpuinfo_x86_mach_init(void) {
++ 	}
++ 
++ 	/* Commit changes */
+++	cpuinfo_processors = processors;
+++	cpuinfo_cores = cores;
+++	cpuinfo_clusters = clusters;
+++	cpuinfo_packages = packages;
++ 	cpuinfo_cache[cpuinfo_cache_level_1i] = l1i;
++ 	cpuinfo_cache[cpuinfo_cache_level_1d] = l1d;
++ 	cpuinfo_cache[cpuinfo_cache_level_2]  = l2;
++ 	cpuinfo_cache[cpuinfo_cache_level_3]  = l3;
++ 	cpuinfo_cache[cpuinfo_cache_level_4]  = l4;
++ 
++-	cpuinfo_processors = processors;
++-	cpuinfo_cores = cores;
++-	cpuinfo_clusters = clusters;
++-	cpuinfo_packages = packages;
++-
+++	cpuinfo_processors_count = mach_topology.threads;
+++	cpuinfo_cores_count = mach_topology.cores;
+++	cpuinfo_clusters_count = mach_topology.packages;
+++	cpuinfo_packages_count = mach_topology.packages;
++ 	cpuinfo_cache_count[cpuinfo_cache_level_1i] = l1_count;
++ 	cpuinfo_cache_count[cpuinfo_cache_level_1d] = l1_count;
++ 	cpuinfo_cache_count[cpuinfo_cache_level_2]  = l2_count;
++ 	cpuinfo_cache_count[cpuinfo_cache_level_3]  = l3_count;
++ 	cpuinfo_cache_count[cpuinfo_cache_level_4]  = l4_count;
++-
++-	cpuinfo_processors_count = mach_topology.threads;
++-	cpuinfo_cores_count = mach_topology.cores;
++-	cpuinfo_clusters_count = mach_topology.packages;
++-	cpuinfo_packages_count = mach_topology.packages;
++-
++ 	cpuinfo_max_cache_size = cpuinfo_compute_max_cache_size(&processors[0]);
++ 
+++	cpuinfo_global_uarch = (struct cpuinfo_uarch_info) {
+++		.uarch = x86_processor.uarch,
+++		.cpuid = x86_processor.cpuid,
+++		.processor_count = mach_topology.threads,
+++		.core_count = mach_topology.cores,
+++	};
+++
++ 	__sync_synchronize();
++ 
++ 	cpuinfo_is_initialized = true;
++diff --git a/src/x86/nacl/isa.c b/src/x86/nacl/isa.c
++deleted file mode 100644
++index 662be33..0000000
++--- a/src/x86/nacl/isa.c
+++++ /dev/null
++@@ -1,306 +0,0 @@
++-#include <stdbool.h>
++-#include <stdint.h>
++-#include <stddef.h>
++-
++-#include <irt.h>
++-
++-#define NACL_CODE_BUNDLE_SIZE 32
++-#include <cpuinfo.h>
++-#include <x86/api.h>
++-
++-static const uint8_t cmpxchg16b_bundle[NACL_CODE_BUNDLE_SIZE] = {
++-	/* MOV edi, edi */
++-	0x89, 0xFF,
++-	/* CMPXCHG16B [r15 + rdi * 1] */
++-	0x49, 0x0F, 0xC7, 0x0C, 0x3F,
++-	/* Fill remainder with HLTs */
++-	0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4,
++-	0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4,
++-};
++-
++-static const uint8_t lzcnt_bundle[NACL_CODE_BUNDLE_SIZE] = {
++-	/* LZCNT eax, ecx */
++-	0xF3, 0x0F, 0xBD, 0xC1,
++-	/* Fill remainder with HLTs */
++-	0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4,
++-	0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4,
++-};
++-
++-static const uint8_t popcnt_bundle[NACL_CODE_BUNDLE_SIZE] = {
++-	/* POPCNT eax, ecx */
++-	0xF3, 0x0F, 0xB8, 0xC1,
++-	/* Fill remainder with HLTs */
++-	0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4,
++-	0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4,
++-};
++-
++-static const uint8_t movbe_bundle[NACL_CODE_BUNDLE_SIZE] = {
++-	/* MOV ecx, ecx */
++-	0x89, 0xC9,
++-	/* MOVBE eax, [r15 + rcx * 1] */
++-	0x41, 0x0F, 0x38, 0xF0, 0x04, 0x0F,
++-	/* Fill remainder with HLTs */
++-	0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4,
++-	0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4,
++-};
++-
++-static const uint8_t bmi_bundle[NACL_CODE_BUNDLE_SIZE] = {
++-	/* ANDN eax, ecx, edx */
++-	0xC4, 0xE2, 0x70, 0xF2, 0xC2,
++-	/* Fill remainder with HLTs */
++-	0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4,
++-	0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4,
++-};
++-
++-static const uint8_t tbm_bundle[NACL_CODE_BUNDLE_SIZE] = {
++-	/* BLCS eax, ecx */
++-	0x8F, 0xE9, 0x78, 0x01, 0xD9,
++-	/* Fill remainder with HLTs */
++-	0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4,
++-	0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4,
++-};
++-
++-static const uint8_t three_d_now_bundle[NACL_CODE_BUNDLE_SIZE] = {
++-	/* PFADD mm0, mm1 */
++-	0x0F, 0x0F, 0xC1, 0x9E,
++-	/* Fill remainder with HLTs */
++-	0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4,
++-	0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4,
++-};
++-
++-static const uint8_t three_d_now_plus_bundle[NACL_CODE_BUNDLE_SIZE] = {
++-	/* PFNACC mm0, mm1 */
++-	0x0F, 0x0F, 0xC1, 0x8A,
++-	/* Fill remainder with HLTs */
++-	0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4,
++-	0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4,
++-};
++-
++-static const uint8_t sse3_bundle[NACL_CODE_BUNDLE_SIZE] = {
++-	/* HADDPS xmm0, xmm1 */
++-	0xF2, 0x0F, 0x7C, 0xC1,
++-	/* Fill remainder with HLTs */
++-	0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4,
++-	0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4,
++-};
++-
++-static const uint8_t ssse3_bundle[NACL_CODE_BUNDLE_SIZE] = {
++-	/* PSHUFB xmm0, xmm1 */
++-	0x66, 0x0F, 0x38, 0x00, 0xC1,
++-	/* Fill remainder with HLTs */
++-	0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4,
++-	0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4,
++-};
++-
++-static const uint8_t sse4_1_bundle[NACL_CODE_BUNDLE_SIZE] = {
++-	/* PMULLD xmm0, xmm1 */
++-	0x66, 0x0F, 0x38, 0x40, 0xC1,
++-	/* Fill remainder with HLTs */
++-	0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4,
++-	0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4,
++-};
++-
++-static const uint8_t sse4_2_bundle[NACL_CODE_BUNDLE_SIZE] = {
++-	/* PCMPGTQ xmm0, xmm1 */
++-	0x66, 0x0F, 0x38, 0x37, 0xC1,
++-	/* Fill remainder with HLTs */
++-	0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4,
++-	0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4,
++-};
++-
++-static const uint8_t sse4a_bundle[NACL_CODE_BUNDLE_SIZE] = {
++-	/* EXTRQ xmm0, xmm1 */
++-	0x66, 0x0F, 0x79, 0xC1,
++-	/* Fill remainder with HLTs */
++-	0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4,
++-	0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4,
++-};
++-
++-static const uint8_t aes_bundle[NACL_CODE_BUNDLE_SIZE] = {
++-	/* AESENC xmm0, xmm1 */
++-	0x66, 0x0F, 0x38, 0xDC, 0xC1,
++-	/* Fill remainder with HLTs */
++-	0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4,
++-	0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4,
++-};
++-
++-static const uint8_t pclmulqdq_bundle[NACL_CODE_BUNDLE_SIZE] = {
++-	/* PCLMULQDQ xmm0, xmm1, 0 */
++-	0x66, 0x0F, 0x3A, 0x44, 0xC1, 0x00,
++-	/* Fill remainder with HLTs */
++-	0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4,
++-	0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4,
++-};
++-
++-static const uint8_t avx_bundle[NACL_CODE_BUNDLE_SIZE] = {
++-	/* VPERMILPS ymm0, ymm1, 0xAA */
++-	0xC4, 0xE3, 0x7D, 0x04, 0xC1, 0xAA,
++-	/* Fill remainder with HLTs */
++-	0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4,
++-	0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4,
++-};
++-
++-static const uint8_t fma3_bundle[NACL_CODE_BUNDLE_SIZE] = {
++-	/* VFMADDSUB213PS ymm0, ymm1, ymm2 */
++-	0xC4, 0xE2, 0x75, 0xA6, 0xC2,
++-	/* Fill remainder with HLTs */
++-	0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4,
++-	0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4,
++-};
++-
++-static const uint8_t fma4_bundle[NACL_CODE_BUNDLE_SIZE] = {
++-	/* VFMADDPS ymm0, ymm1, ymm2, ymm3 */
++-	0xC4, 0xE3, 0xF5, 0x68, 0xC3, 0x20,
++-	/* Fill remainder with HLTs */
++-	0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4,
++-	0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4,
++-};
++-
++-static const uint8_t xop_bundle[NACL_CODE_BUNDLE_SIZE] = {
++-	/* VPHADDBQ xmm0, xmm1 */
++-	0x8F, 0xE9, 0x78, 0xC3, 0xC1,
++-	/* Fill remainder with HLTs */
++-	0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4,
++-	0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4,
++-};
++-
++-static const uint8_t f16c_bundle[NACL_CODE_BUNDLE_SIZE] = {
++-	/* VCVTPH2PS ymm0, xmm1 */
++-	0xC4, 0xE2, 0x7D, 0x13, 0xC1,
++-	/* Fill remainder with HLTs */
++-	0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4,
++-	0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4,
++-};
++-
++-static const uint8_t avx2_bundle[NACL_CODE_BUNDLE_SIZE] = {
++-	/* VPERMPS ymm0, ymm1, ymm2 */
++-	0xC4, 0xE2, 0x75, 0x16, 0xC2,
++-	/* Fill remainder with HLTs */
++-	0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4,
++-	0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4,
++-};
++-
++-
++-struct cpuinfo_x86_isa cpuinfo_x86_nacl_detect_isa(void) {
++-	/*
++-	 * Under Native Client sandbox we can't just ask the CPU:
++-	 * - First, some instructions (XGETBV) necessary to query AVX support are not white-listed in the validator.
++-	 * - Secondly, even if CPU supports some instruction, but validator doesn't know about it (e.g. due a bug in the
++-	 *   ISA detection in the validator), all instructions from the "unsupported" ISA extensions will be replaced by
++-	 *   HLTs when the module is loaded.
++-	 * Thus, instead of quering the CPU about supported ISA extensions, we query the validator: we pass bundles with
++-	 * instructions from ISA extensions to dynamic code generation APIs, and test if they are accepted.
++-	 */
++-
++-	struct cpuinfo_x86_isa isa = { 0 };
++-
++-	struct nacl_irt_code_data_alloc nacl_irt_code_data_alloc = { 0 };
++-	struct nacl_irt_dyncode nacl_irt_dyncode = { 0 };
++-	if (sizeof(nacl_irt_code_data_alloc) != nacl_interface_query(NACL_IRT_CODE_DATA_ALLOC_v0_1,
++-	                                                             &nacl_irt_code_data_alloc,
++-	                                                             sizeof(nacl_irt_code_data_alloc)))
++-	{
++-		goto finish;
++-	}
++-
++-	if (sizeof(nacl_irt_dyncode) != nacl_interface_query(NACL_IRT_DYNCODE_v0_1,
++-	                                                     &nacl_irt_dyncode,
++-	                                                     sizeof(nacl_irt_dyncode)))
++-	{
++-		goto finish;
++-	}
++-
++-	const size_t allocation_size = 65536;
++-	uintptr_t code_segment = 0;
++-	if (0 != nacl_irt_code_data_alloc.allocate_code_data(0, allocation_size, 0, 0, &code_segment))
++-	{
++-		goto finish;
++-	}
++-
++-	isa.cmpxchg16b = !nacl_irt_dyncode.dyncode_create((void*) code_segment, cmpxchg16b_bundle, NACL_CODE_BUNDLE_SIZE) &&
++-		(*((const uint8_t*) code_segment) != 0xF4);
++-	code_segment += NACL_CODE_BUNDLE_SIZE;
++-
++-	isa.lzcnt = !nacl_irt_dyncode.dyncode_create((void*) code_segment, lzcnt_bundle, NACL_CODE_BUNDLE_SIZE) &&
++-		(*((const uint8_t*) code_segment) != 0xF4);
++-	code_segment += NACL_CODE_BUNDLE_SIZE;
++-
++-	isa.popcnt = !nacl_irt_dyncode.dyncode_create((void*) code_segment, popcnt_bundle, NACL_CODE_BUNDLE_SIZE) &&
++-		(*((const uint8_t*) code_segment) != 0xF4);
++-	code_segment += NACL_CODE_BUNDLE_SIZE;
++-
++-	isa.movbe = !nacl_irt_dyncode.dyncode_create((void*) code_segment, movbe_bundle, NACL_CODE_BUNDLE_SIZE) &&
++-		(*((const uint8_t*) code_segment) != 0xF4);
++-	code_segment += NACL_CODE_BUNDLE_SIZE;
++-
++-	isa.bmi = !nacl_irt_dyncode.dyncode_create((void*) code_segment, bmi_bundle, NACL_CODE_BUNDLE_SIZE) &&
++-		(*((const uint8_t*) code_segment) != 0xF4);
++-	code_segment += NACL_CODE_BUNDLE_SIZE;
++-
++-	isa.tbm = !nacl_irt_dyncode.dyncode_create((void*) code_segment, tbm_bundle, NACL_CODE_BUNDLE_SIZE) &&
++-		(*((const uint8_t*) code_segment) != 0xF4);
++-	code_segment += NACL_CODE_BUNDLE_SIZE;
++-
++-	isa.three_d_now = !nacl_irt_dyncode.dyncode_create((void*) code_segment, three_d_now_bundle, NACL_CODE_BUNDLE_SIZE) &&
++-		(*((const uint8_t*) code_segment) != 0xF4);
++-	code_segment += NACL_CODE_BUNDLE_SIZE;
++-
++-	isa.three_d_now_plus =
++-		!nacl_irt_dyncode.dyncode_create((void*) code_segment, three_d_now_plus_bundle, NACL_CODE_BUNDLE_SIZE) &&
++-		(*((const uint8_t*) code_segment) != 0xF4);
++-	code_segment += NACL_CODE_BUNDLE_SIZE;
++-
++-	isa.sse3 = !nacl_irt_dyncode.dyncode_create((void*) code_segment, sse3_bundle, NACL_CODE_BUNDLE_SIZE) &&
++-		(*((const uint8_t*) code_segment) != 0xF4);
++-	code_segment += NACL_CODE_BUNDLE_SIZE;
++-
++-	isa.ssse3 = !nacl_irt_dyncode.dyncode_create((void*) code_segment, ssse3_bundle, NACL_CODE_BUNDLE_SIZE) &&
++-		(*((const uint8_t*) code_segment) != 0xF4);
++-	code_segment += NACL_CODE_BUNDLE_SIZE;
++-
++-	isa.sse4_1 = !nacl_irt_dyncode.dyncode_create((void*) code_segment, sse4_1_bundle, NACL_CODE_BUNDLE_SIZE) &&
++-		(*((const uint8_t*) code_segment) != 0xF4);
++-	code_segment += NACL_CODE_BUNDLE_SIZE;
++-
++-	isa.sse4_2 = !nacl_irt_dyncode.dyncode_create((void*) code_segment, sse4_2_bundle, NACL_CODE_BUNDLE_SIZE) &&
++-		(*((const uint8_t*) code_segment) != 0xF4);
++-	code_segment += NACL_CODE_BUNDLE_SIZE;
++-
++-	isa.sse4a = !nacl_irt_dyncode.dyncode_create((void*) code_segment, sse4a_bundle, NACL_CODE_BUNDLE_SIZE) &&
++-		(*((const uint8_t*) code_segment) != 0xF4);
++-	code_segment += NACL_CODE_BUNDLE_SIZE;
++-
++-	isa.aes = !nacl_irt_dyncode.dyncode_create((void*) code_segment, aes_bundle, NACL_CODE_BUNDLE_SIZE) &&
++-		(*((const uint8_t*) code_segment) != 0xF4);
++-	code_segment += NACL_CODE_BUNDLE_SIZE;
++-
++-	isa.pclmulqdq = !nacl_irt_dyncode.dyncode_create((void*) code_segment, pclmulqdq_bundle, NACL_CODE_BUNDLE_SIZE) &&
++-		(*((const uint8_t*) code_segment) != 0xF4);
++-	code_segment += NACL_CODE_BUNDLE_SIZE;
++-
++-	isa.avx = !nacl_irt_dyncode.dyncode_create((void*) code_segment, avx_bundle, NACL_CODE_BUNDLE_SIZE) &&
++-		(*((const uint8_t*) code_segment) != 0xF4);
++-	code_segment += NACL_CODE_BUNDLE_SIZE;
++-
++-	isa.fma3 = !nacl_irt_dyncode.dyncode_create((void*) code_segment, fma3_bundle, NACL_CODE_BUNDLE_SIZE) &&
++-		(*((const uint8_t*) code_segment) != 0xF4);
++-	code_segment += NACL_CODE_BUNDLE_SIZE;
++-
++-	isa.fma4 = !nacl_irt_dyncode.dyncode_create((void*) code_segment, fma4_bundle, NACL_CODE_BUNDLE_SIZE) &&
++-		(*((const uint8_t*) code_segment) != 0xF4);
++-	code_segment += NACL_CODE_BUNDLE_SIZE;
++-
++-	isa.xop = !nacl_irt_dyncode.dyncode_create((void*) code_segment, xop_bundle, NACL_CODE_BUNDLE_SIZE) &&
++-		(*((const uint8_t*) code_segment) != 0xF4);
++-	code_segment += NACL_CODE_BUNDLE_SIZE;
++-
++-	isa.f16c = !nacl_irt_dyncode.dyncode_create((void*) code_segment, f16c_bundle, NACL_CODE_BUNDLE_SIZE) &&
++-		(*((const uint8_t*) code_segment) != 0xF4);
++-	code_segment += NACL_CODE_BUNDLE_SIZE;
++-
++-	isa.avx2 = !nacl_irt_dyncode.dyncode_create((void*) code_segment, avx2_bundle, NACL_CODE_BUNDLE_SIZE) &&
++-		(*((const uint8_t*) code_segment) != 0xF4);
++-
++-finish:
++-	return isa;
++-}
++diff --git a/src/x86/name.c b/src/x86/name.c
++index 708be1d..e0d5a5b 100644
++--- a/src/x86/name.c
+++++ b/src/x86/name.c
++@@ -671,6 +671,7 @@ static const char* vendor_string_map[] = {
++ 	[cpuinfo_vendor_intel] = "Intel",
++ 	[cpuinfo_vendor_amd] = "AMD",
++ 	[cpuinfo_vendor_via] = "VIA",
+++	[cpuinfo_vendor_hygon] = "Hygon",
++ 	[cpuinfo_vendor_rdc] = "RDC",
++ 	[cpuinfo_vendor_dmp] = "DM&P",
++ 	[cpuinfo_vendor_transmeta] = "Transmeta",
++diff --git a/src/x86/uarch.c b/src/x86/uarch.c
++index ba72d8a..ecaa762 100644
++--- a/src/x86/uarch.c
+++++ b/src/x86/uarch.c
++@@ -79,6 +79,8 @@ enum cpuinfo_uarch cpuinfo_x86_decode_uarch(
++ 						case 0x5E: // Sky Lake Client DT/H/S
++ 						case 0x8E: // Kaby/Whiskey/Amber/Comet Lake Y/U
++ 						case 0x9E: // Kaby/Coffee Lake DT/H/S
+++						case 0xA5: // Comet Lake H/S
+++						case 0xA6: // Comet Lake U/Y
++ 							return cpuinfo_uarch_sky_lake;
++ 						case 0x66: // Cannon Lake (Core i3-8121U)
++ 							return cpuinfo_uarch_palm_cove;
++@@ -94,7 +96,7 @@ enum cpuinfo_uarch cpuinfo_x86_decode_uarch(
++ 							return cpuinfo_uarch_bonnell;
++ 						case 0x27: // Medfield
++ 						case 0x35: // Cloverview
++-						case 0x36: // Cedarview, Centerton 
+++						case 0x36: // Cedarview, Centerton
++ 							return cpuinfo_uarch_saltwell;
++ 						case 0x37: // Bay Trail
++ 						case 0x4A: // Merrifield
++@@ -110,6 +112,7 @@ enum cpuinfo_uarch cpuinfo_x86_decode_uarch(
++ 							return cpuinfo_uarch_goldmont;
++ 						case 0x7A: // Gemini Lake
++ 							return cpuinfo_uarch_goldmont_plus;
+++
++ 						/* Knights-series cores */
++ 						case 0x57:
++ 							return cpuinfo_uarch_knights_landing;
++@@ -173,7 +176,7 @@ enum cpuinfo_uarch cpuinfo_x86_decode_uarch(
++ 						case 0x38: // Godavari
++ 						case 0x30: // Kaveri
++ 							return cpuinfo_uarch_steamroller;
++-						case 0x60: // Carrizo 
+++						case 0x60: // Carrizo
++ 						case 0x65: // Bristol Ridge
++ 						case 0x70: // Stoney Ridge
++ 							return cpuinfo_uarch_excavator;
++@@ -201,14 +204,22 @@ enum cpuinfo_uarch cpuinfo_x86_decode_uarch(
++ 					switch (model_info->model) {
++ 						case 0x01: // 14 nm Naples, Whitehaven, Summit Ridge, Snowy Owl
++ 						case 0x08: // 12 nm Pinnacle Ridge
++-						case 0x11: // 14 nm Raven Ridge
+++						case 0x11: // 14 nm Raven Ridge, Great Horned Owl
++ 						case 0x18: // 12 nm Picasso
++ 							return cpuinfo_uarch_zen;
+++						case 0x31: // Rome, Castle Peak
+++						case 0x60: // Renoir
++ 						case 0x71: // Matisse
++ 							return cpuinfo_uarch_zen2;
++ 					}
++ 			}
++ 			break;
+++		case cpuinfo_vendor_hygon:
+++			switch (model_info->family) {
+++				case 0x00:
+++					return cpuinfo_uarch_dhyana;
+++			}
+++			break;
++ 		default:
++ 			break;
++ 	}
++diff --git a/src/x86/vendor.c b/src/x86/vendor.c
++index 3f3c753..2bba90d 100644
++--- a/src/x86/vendor.c
+++++ b/src/x86/vendor.c
++@@ -26,6 +26,11 @@
++ #define auls UINT32_C(0x736C7561)
++ #define VIA  UINT32_C(0x20414956)
++ 
+++/* Hygon vendor string: "HygonGenuine" */
+++#define Hygo UINT32_C(0x6F677948)
+++#define nGen UINT32_C(0x6E65476E)
+++#define uine UINT32_C(0x656E6975)
+++
++ /* Transmeta vendor strings: "GenuineTMx86", "TransmetaCPU" */
++ #define ineT UINT32_C(0x54656E69)
++ #define Mx86 UINT32_C(0x3638784D)
++@@ -105,6 +110,12 @@ enum cpuinfo_vendor cpuinfo_x86_decode_vendor(uint32_t ebx, uint32_t ecx, uint32
++ 				return cpuinfo_vendor_via;
++ 			}
++ 			break;
+++		case Hygo:
+++			if (edx == nGen && ecx == uine) {
+++				/* "HygonGenuine" */
+++				return cpuinfo_vendor_hygon;
+++			}
+++			break;
++ #if CPUINFO_ARCH_X86
++ 		case AMDi:
++ 			if (edx == sbet && ecx == ter) {
++diff --git a/src/x86/windows/init.c b/src/x86/windows/init.c
++index 7a2090e..2c7e3cd 100644
++--- a/src/x86/windows/init.c
+++++ b/src/x86/windows/init.c
++@@ -417,9 +417,6 @@ BOOL CALLBACK cpuinfo_x86_windows_init(PINIT_ONCE init_once, PVOID parameter, PV
++ 	for (uint32_t i = 0; i < processors_count; i++) {
++ 		const uint32_t apic_id = processors[i].apic_id;
++ 
++-		//linux_cpu_to_processor_map[x86_linux_processors[i].linux_id] = processors + processor_index;
++-		//linux_cpu_to_core_map[x86_linux_processors[i].linux_id] = cores + core_index;
++-
++ 		if (x86_processor.cache.l1i.size != 0) {
++ 			const uint32_t l1i_id = apic_id & ~bit_mask(x86_processor.cache.l1i.apic_bits);
++ 			processors[i].cache.l1i = &l1i[l1i_index];
++@@ -549,30 +546,34 @@ BOOL CALLBACK cpuinfo_x86_windows_init(PINIT_ONCE init_once, PVOID parameter, PV
++ 
++ 
++ 	/* Commit changes */
+++	cpuinfo_processors = processors;
+++	cpuinfo_cores = cores;
+++	cpuinfo_clusters = clusters;
+++	cpuinfo_packages = packages;
++ 	cpuinfo_cache[cpuinfo_cache_level_1i] = l1i;
++ 	cpuinfo_cache[cpuinfo_cache_level_1d] = l1d;
++ 	cpuinfo_cache[cpuinfo_cache_level_2]  = l2;
++ 	cpuinfo_cache[cpuinfo_cache_level_3]  = l3;
++ 	cpuinfo_cache[cpuinfo_cache_level_4]  = l4;
++ 
++-	cpuinfo_processors = processors;
++-	cpuinfo_cores = cores;
++-	cpuinfo_clusters = clusters;
++-	cpuinfo_packages = packages;
++-
+++	cpuinfo_processors_count = processors_count;
+++	cpuinfo_cores_count = cores_count;
+++	cpuinfo_clusters_count = packages_count;
+++	cpuinfo_packages_count = packages_count;
++ 	cpuinfo_cache_count[cpuinfo_cache_level_1i] = l1i_count;
++ 	cpuinfo_cache_count[cpuinfo_cache_level_1d] = l1d_count;
++ 	cpuinfo_cache_count[cpuinfo_cache_level_2]  = l2_count;
++ 	cpuinfo_cache_count[cpuinfo_cache_level_3]  = l3_count;
++ 	cpuinfo_cache_count[cpuinfo_cache_level_4]  = l4_count;
++-
++-	cpuinfo_processors_count = processors_count;
++-	cpuinfo_cores_count = cores_count;
++-	cpuinfo_clusters_count = packages_count;
++-	cpuinfo_packages_count = packages_count;
++-
++ 	cpuinfo_max_cache_size = cpuinfo_compute_max_cache_size(&processors[0]);
++ 
+++	cpuinfo_global_uarch = (struct cpuinfo_uarch_info) {
+++		.uarch = x86_processor.uarch,
+++		.cpuid = x86_processor.cpuid,
+++		.processor_count = processors_count,
+++		.core_count = cores_count,
+++	};
+++
++ 	MemoryBarrier();
++ 
++ 	cpuinfo_is_initialized = true;
++diff --git a/test/arm-cache.cc b/test/arm-cache.cc
++index 8373f7c..7d2e4a4 100644
++--- a/test/arm-cache.cc
+++++ b/test/arm-cache.cc
++@@ -766,7 +766,7 @@ TEST(QUALCOMM, snapdragon_845) {
++ 	struct cpuinfo_cache little_l2 = { 0 };
++ 	struct cpuinfo_cache little_l3 = { 0 };
++ 	cpuinfo_arm_decode_cache(
++-		cpuinfo_uarch_cortex_a55, 4, UINT32_C(0x518F803C),
+++		cpuinfo_uarch_cortex_a55r0, 4, UINT32_C(0x518F803C),
++ 		&chipset, 1, 8,
++ 		&little_l1i, &little_l1d, &little_l2, &little_l3);
++ 
++@@ -910,7 +910,7 @@ TEST(SAMSUNG, exynos_9810) {
++ 	struct cpuinfo_cache little_l2 = { 0 };
++ 	struct cpuinfo_cache little_l3 = { 0 };
++ 	cpuinfo_arm_decode_cache(
++-		cpuinfo_uarch_cortex_a55, 4, UINT32_C(0x410FD051),
+++		cpuinfo_uarch_cortex_a55r0, 4, UINT32_C(0x410FD051),
++ 		&chipset, 1, 8,
++ 		&little_l1i, &little_l1d, &little_l2, &little_l3);
++ 
++diff --git a/test/get-current.cc b/test/get-current.cc
++index 4a80cab..f410b12 100644
++--- a/test/get-current.cc
+++++ b/test/get-current.cc
++@@ -3,34 +3,36 @@
++ #include <cpuinfo.h>
++ 
++ 
++-TEST(CURRENT_PROCESSOR, not_null) {
++-	ASSERT_TRUE(cpuinfo_initialize());
++-
++-	ASSERT_TRUE(cpuinfo_get_current_processor());
++-}
++-
++ TEST(CURRENT_PROCESSOR, within_bounds) {
++ 	ASSERT_TRUE(cpuinfo_initialize());
++ 
++ 	const struct cpuinfo_processor* current_processor = cpuinfo_get_current_processor();
+++	if (current_processor == nullptr) {
+++		GTEST_SKIP();
+++	}
+++
++ 	const struct cpuinfo_processor* processors_begin = cpuinfo_get_processors();
++ 	const struct cpuinfo_processor* processors_end = processors_begin + cpuinfo_get_processors_count();
++ 	ASSERT_GE(current_processor, processors_begin);
++ 	ASSERT_LT(current_processor, processors_end);
++ }
++ 
++-TEST(CURRENT_CORE, not_null) {
++-	ASSERT_TRUE(cpuinfo_initialize());
++-
++-	ASSERT_TRUE(cpuinfo_get_current_core());
++-}
++-
++ TEST(CURRENT_CORE, within_bounds) {
++ 	ASSERT_TRUE(cpuinfo_initialize());
++ 
++ 	const struct cpuinfo_core* current_core = cpuinfo_get_current_core();
+++	if (current_core == nullptr) {
+++		GTEST_SKIP();
+++	}
+++
++ 	const struct cpuinfo_core* cores_begin = cpuinfo_get_cores();
++ 	const struct cpuinfo_core* cores_end = cores_begin + cpuinfo_get_cores_count();
++ 	ASSERT_GE(current_core, cores_begin);
++ 	ASSERT_LT(current_core, cores_end);
++ }
+++
+++TEST(CURRENT_UARCH_INDEX, within_bounds) {
+++	ASSERT_TRUE(cpuinfo_initialize());
+++
+++	ASSERT_LT(cpuinfo_get_current_uarch_index(), cpuinfo_get_uarchs_count());
+++}
++diff --git a/test/init.cc b/test/init.cc
++index 941cb97..718eb96 100644
++--- a/test/init.cc
+++++ b/test/init.cc
++@@ -678,6 +678,72 @@ TEST(PACKAGE, consistent_cluster) {
++ 	cpuinfo_deinitialize();
++ }
++ 
+++TEST(UARCHS_COUNT, within_bounds) {
+++	ASSERT_TRUE(cpuinfo_initialize());
+++	EXPECT_NE(0, cpuinfo_get_uarchs_count());
+++	EXPECT_LE(cpuinfo_get_packages_count(), cpuinfo_get_cores_count());
+++	EXPECT_LE(cpuinfo_get_packages_count(), cpuinfo_get_processors_count());
+++	cpuinfo_deinitialize();
+++}
+++
+++TEST(UARCHS, non_null) {
+++	ASSERT_TRUE(cpuinfo_initialize());
+++	EXPECT_TRUE(cpuinfo_get_uarchs());
+++	cpuinfo_deinitialize();
+++}
+++
+++TEST(UARCH, non_null) {
+++	ASSERT_TRUE(cpuinfo_initialize());
+++	for (uint32_t i = 0; i < cpuinfo_get_uarchs_count(); i++) {
+++		EXPECT_TRUE(cpuinfo_get_uarch(i));
+++	}
+++	cpuinfo_deinitialize();
+++}
+++
+++TEST(UARCH, non_zero_processors) {
+++	ASSERT_TRUE(cpuinfo_initialize());
+++	for (uint32_t i = 0; i < cpuinfo_get_uarchs_count(); i++) {
+++		const cpuinfo_uarch_info* uarch = cpuinfo_get_uarch(i);
+++		ASSERT_TRUE(uarch);
+++
+++		EXPECT_NE(0, uarch->processor_count);
+++	}
+++	cpuinfo_deinitialize();
+++}
+++
+++TEST(UARCH, valid_processors) {
+++	ASSERT_TRUE(cpuinfo_initialize());
+++	for (uint32_t i = 0; i < cpuinfo_get_uarchs_count(); i++) {
+++		const cpuinfo_uarch_info* uarch = cpuinfo_get_uarch(i);
+++		ASSERT_TRUE(uarch);
+++
+++		EXPECT_LE(uarch->processor_count, cpuinfo_get_processors_count());
+++	}
+++	cpuinfo_deinitialize();
+++}
+++
+++TEST(UARCH, non_zero_cores) {
+++	ASSERT_TRUE(cpuinfo_initialize());
+++	for (uint32_t i = 0; i < cpuinfo_get_uarchs_count(); i++) {
+++		const cpuinfo_uarch_info* uarch = cpuinfo_get_uarch(i);
+++		ASSERT_TRUE(uarch);
+++
+++		EXPECT_NE(0, uarch->core_count);
+++	}
+++	cpuinfo_deinitialize();
+++}
+++
+++TEST(UARCH, valid_cores) {
+++	ASSERT_TRUE(cpuinfo_initialize());
+++	for (uint32_t i = 0; i < cpuinfo_get_uarchs_count(); i++) {
+++		const cpuinfo_uarch_info* uarch = cpuinfo_get_uarch(i);
+++		ASSERT_TRUE(uarch);
+++
+++		EXPECT_LE(uarch->core_count, cpuinfo_get_cores_count());
+++	}
+++	cpuinfo_deinitialize();
+++}
+++
++ TEST(L1I_CACHES_COUNT, within_bounds) {
++ 	ASSERT_TRUE(cpuinfo_initialize());
++ 	EXPECT_NE(0, cpuinfo_get_l1i_caches_count());
++diff --git a/test/mock/galaxy-s9-global.cc b/test/mock/galaxy-s9-global.cc
++index 7a67129..6c72513 100644
++--- a/test/mock/galaxy-s9-global.cc
+++++ b/test/mock/galaxy-s9-global.cc
++@@ -207,7 +207,7 @@ TEST(CORES, uarch) {
++ 			case 5:
++ 			case 6:
++ 			case 7:
++-				ASSERT_EQ(cpuinfo_uarch_cortex_a55, cpuinfo_get_core(i)->uarch);
+++				ASSERT_EQ(cpuinfo_uarch_cortex_a55r0, cpuinfo_get_core(i)->uarch);
++ 				break;
++ 		}
++ 	}
++@@ -329,7 +329,7 @@ TEST(CLUSTERS, uarch) {
++ 				ASSERT_EQ(cpuinfo_uarch_exynos_m3, cpuinfo_get_cluster(i)->uarch);
++ 				break;
++ 			case 1:
++-				ASSERT_EQ(cpuinfo_uarch_cortex_a55, cpuinfo_get_cluster(i)->uarch);
+++				ASSERT_EQ(cpuinfo_uarch_cortex_a55r0, cpuinfo_get_cluster(i)->uarch);
++ 				break;
++ 		}
++ 	}
++diff --git a/test/mock/galaxy-s9-us.cc b/test/mock/galaxy-s9-us.cc
++index 6df7f3c..ceea969 100644
++--- a/test/mock/galaxy-s9-us.cc
+++++ b/test/mock/galaxy-s9-us.cc
++@@ -168,7 +168,7 @@ TEST(CORES, uarch) {
++ 			case 5:
++ 			case 6:
++ 			case 7:
++-				ASSERT_EQ(cpuinfo_uarch_cortex_a55, cpuinfo_get_core(i)->uarch);
+++				ASSERT_EQ(cpuinfo_uarch_cortex_a55r0, cpuinfo_get_core(i)->uarch);
++ 				break;
++ 		}
++ 	}
++@@ -283,7 +283,7 @@ TEST(CLUSTERS, uarch) {
++ 				ASSERT_EQ(cpuinfo_uarch_cortex_a75, cpuinfo_get_cluster(i)->uarch);
++ 				break;
++ 			case 1:
++-				ASSERT_EQ(cpuinfo_uarch_cortex_a55, cpuinfo_get_cluster(i)->uarch);
+++				ASSERT_EQ(cpuinfo_uarch_cortex_a55r0, cpuinfo_get_cluster(i)->uarch);
++ 				break;
++ 		}
++ 	}
++@@ -817,4 +817,4 @@ int main(int argc, char* argv[]) {
++ 	cpuinfo_initialize();
++ 	::testing::InitGoogleTest(&argc, argv);
++ 	return RUN_ALL_TESTS();
++-}
++\ No newline at end of file
+++}
++diff --git a/tools/cpu-info.c b/tools/cpu-info.c
++index 7fa5187..7963c00 100644
++--- a/tools/cpu-info.c
+++++ b/tools/cpu-info.c
++@@ -14,6 +14,8 @@ static const char* vendor_to_string(enum cpuinfo_vendor vendor) {
++ 			return "Intel";
++ 		case cpuinfo_vendor_amd:
++ 			return "AMD";
+++		case cpuinfo_vendor_hygon:
+++			return "Hygon";
++ 		case cpuinfo_vendor_arm:
++ 			return "ARM";
++ 		case cpuinfo_vendor_qualcomm:
++@@ -161,6 +163,8 @@ static const char* uarch_to_string(enum cpuinfo_uarch uarch) {
++ 			return "Cortex-A35";
++ 		case cpuinfo_uarch_cortex_a53:
++ 			return "Cortex-A53";
+++		case cpuinfo_uarch_cortex_a55r0:
+++			return "Cortex-A55r0";
++ 		case cpuinfo_uarch_cortex_a55:
++ 			return "Cortex-A55";
++ 		case cpuinfo_uarch_cortex_a57:
++@@ -223,6 +227,10 @@ static const char* uarch_to_string(enum cpuinfo_uarch uarch) {
++ 			return "Vortex";
++ 		case cpuinfo_uarch_tempest:
++ 			return "Tempest";
+++		case cpuinfo_uarch_lightning:
+++			return "Lightning";
+++		case cpuinfo_uarch_thunder:
+++			return "Thunder";
++ 		case cpuinfo_uarch_thunderx:
++ 			return "ThunderX";
++ 		case cpuinfo_uarch_thunderx2:
++@@ -253,6 +261,17 @@ int main(int argc, char** argv) {
++ 			printf("\t%"PRIu32": %s\n", i, cpuinfo_get_package(i)->name);
++ 		}
++ 	#endif
+++	printf("Microarchitectures:\n");
+++	for (uint32_t i = 0; i < cpuinfo_get_uarchs_count(); i++) {
+++		const struct cpuinfo_uarch_info* uarch_info = cpuinfo_get_uarch(i);
+++		const char* uarch_string = uarch_to_string(uarch_info->uarch);
+++		if (uarch_string == NULL) {
+++			printf("\t%"PRIu32"x Unknown (0x%08"PRIx32"\n",
+++				uarch_info->core_count, (uint32_t) uarch_info->uarch);
+++		} else {
+++			printf("\t%"PRIu32"x %s\n", uarch_info->core_count, uarch_string);
+++		}
+++	}
++ 	printf("Cores:\n");
++ 	for (uint32_t i = 0; i < cpuinfo_get_cores_count(); i++) {
++ 		const struct cpuinfo_core* core = cpuinfo_get_core(i);
++@@ -277,17 +296,17 @@ int main(int argc, char** argv) {
++ 		}
++ 	}
++ 	printf("Logical processors");
++-  #if defined(__linux__)
++-    printf(" (System ID)");
++-  #endif
++-  printf(":\n");
+++	#if defined(__linux__)
+++		printf(" (System ID)");
+++	#endif
+++	printf(":\n");
++ 	for (uint32_t i = 0; i < cpuinfo_get_processors_count(); i++) {
++ 		const struct cpuinfo_processor* processor = cpuinfo_get_processor(i);
++-    printf("\t%"PRIu32"", i);
+++		printf("\t%"PRIu32"", i);
++ 
++-    #if defined(__linux__)
++-      printf(" (%"PRId32")", processor->linux_id);
++-    #endif
+++		#if defined(__linux__)
+++			printf(" (%"PRId32")", processor->linux_id);
+++		#endif
++ 
++ 		#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
++ 			printf(": APIC ID 0x%08"PRIx32"\n", processor->apic_id);
+diff --git a/third_party/cpuinfo/workspace.bzl b/third_party/cpuinfo/workspace.bzl
+index c2eeede8a0..77aecf5a9a 100644
+--- a/third_party/cpuinfo/workspace.bzl
++++ b/third_party/cpuinfo/workspace.bzl
+@@ -2,14 +2,20 @@
+ 
+ load("//third_party:repo.bzl", "third_party_http_archive")
+ 
++# Sanitize a dependency so that it works correctly from code that includes
++# TensorFlow as a submodule.
++def clean_dep(dep):
++    return str(Label(dep))
++
+ def repo():
+     third_party_http_archive(
+         name = "cpuinfo",
+-        strip_prefix = "cpuinfo-e39a5790059b6b8274ed91f7b5b5b13641dff267",
+-        sha256 = "e5caa8b7c58f1623eed88f4d5147e3753ff19cde821526bc9aa551b004f751fe",
++        strip_prefix = "cpuinfo-d6c0f915ee737f961915c9d17f1679b6777af207",
++        sha256 = "146fc61c3cf63d7d88db963876929a4d373f621fb65568b895efa0857f467770",
+         urls = [
+-            "https://storage.googleapis.com/mirror.tensorflow.org/github.com/pytorch/cpuinfo/archive/e39a5790059b6b8274ed91f7b5b5b13641dff267.tar.gz",
+-            "https://github.com/pytorch/cpuinfo/archive/e39a5790059b6b8274ed91f7b5b5b13641dff267.tar.gz",
++            "https://storage.googleapis.com/mirror.tensorflow.org/github.com/pytorch/cpuinfo/archive/d6c0f915ee737f961915c9d17f1679b6777af207.tar.gz",
++            "https://github.com/pytorch/cpuinfo/archive/d6c0f915ee737f961915c9d17f1679b6777af207.tar.gz",
+         ],
+         build_file = "//third_party/cpuinfo:BUILD.bazel",
++        patch_file = clean_dep("//third_party/cpuinfo:cpuinfo.patch"),
+     )