From 1722d4b8a25ad7c919576f9b1bab4ffa7a9299bc Mon Sep 17 00:00:00 2001 From: MediaPipe Team Date: Fri, 20 Mar 2020 13:09:58 -0700 Subject: [PATCH] Project import generated by Copybara. GitOrigin-RevId: 43cd697ec87dcc5cab5051f27960bb77a057399d --- WORKSPACE | 6 +- mediapipe/calculators/core/BUILD | 13 +- .../begin_end_loop_calculator_graph_test.cc | 389 ++- .../calculators/core/begin_loop_calculator.cc | 6 + .../calculators/core/begin_loop_calculator.h | 20 +- .../core/clip_vector_size_calculator.cc | 5 + .../calculators/core/end_loop_calculator.cc | 5 + .../core/previous_loopback_calculator.cc | 148 +- .../core/previous_loopback_calculator_test.cc | 551 ++- .../core/split_vector_calculator.cc | 10 + mediapipe/calculators/image/BUILD | 3 + .../calculators/image/recolor_calculator.cc | 79 +- mediapipe/calculators/tensorflow/BUILD | 1 + mediapipe/calculators/tflite/BUILD | 41 + .../tflite/tflite_inference_calculator.cc | 131 +- .../tflite/tflite_inference_calculator.proto | 6 + .../tflite_inference_calculator_test.cc | 85 +- .../tflite/tflite_model_calculator.cc | 86 + .../tflite/tflite_model_calculator_test.cc | 88 + ...te_tensors_to_classification_calculator.cc | 39 +- ...tensors_to_classification_calculator.proto | 6 + mediapipe/calculators/util/BUILD | 14 + .../collection_has_min_size_calculator.cc | 8 + ...collection_has_min_size_calculator_test.cc | 156 + .../util/filter_collection_calculator.cc | 5 + .../landmarks_to_render_data_calculator.cc | 233 +- .../util/packet_latency_calculator_test.cc | 1 + mediapipe/calculators/video/BUILD | 26 +- .../java/com/google/mediapipe/apps/METADATA | 7 + .../polynomial_regression_path_solver.cc | 11 +- .../polynomial_regression_path_solver.h | 4 +- .../examples/desktop/hair_segmentation/BUILD | 14 + mediapipe/framework/BUILD | 1 + mediapipe/framework/calculator_contract.h | 27 +- mediapipe/framework/calculator_graph.h | 2 +- .../framework/calculator_graph_bounds_test.cc | 275 +- mediapipe/framework/calculator_node.cc | 9 + mediapipe/framework/deps/registration.cc | 4 + mediapipe/framework/formats/annotation/BUILD | 7 + .../formats/annotation/rasterization.proto | 3 + .../framework/formats/location_data.proto | 3 + mediapipe/framework/input_stream_handler.cc | 90 + mediapipe/framework/input_stream_handler.h | 62 +- .../framework/legacy_calculator_support.h | 3 + mediapipe/framework/output_stream_handler.h | 7 +- mediapipe/framework/output_stream_poller.h | 3 + .../testdata/profile_latency_test.pbtxt | 97 + .../testdata/profile_process_test.pbtxt | 122 + mediapipe/framework/scheduler_queue.cc | 4 + .../default_input_stream_handler.cc | 53 +- .../default_input_stream_handler.h | 3 + .../immediate_input_stream_handler.cc | 79 +- .../sync_set_input_stream_handler.cc | 70 +- mediapipe/framework/timestamp.cc | 1 - mediapipe/framework/timestamp.h | 6 + mediapipe/framework/validated_graph_config.cc | 21 +- mediapipe/gpu/gl_base.h | 5 +- mediapipe/gpu/gl_calculator_helper_impl.h | 4 + .../gpu/gl_calculator_helper_impl_common.cc | 17 +- mediapipe/gpu/gl_context.cc | 72 +- mediapipe/gpu/gl_context.h | 10 + mediapipe/gpu/gl_simple_shaders.cc | 26 +- mediapipe/graphs/hair_segmentation/BUILD | 21 +- .../hair_segmentation_desktop_live.pbtxt | 152 + .../com/google/mediapipe/mediapipe_aar.bzl | 30 + mediapipe/objc/BUILD | 49 +- mediapipe/util/annotation_renderer.cc | 2 +- mediapipe/util/sequence/BUILD | 2 - setup_opencv.sh | 6 +- ...6366bcadab23a25c773b3ed405bac8ded4d0d.diff | 112 + ...31e324c8de6b52f752a39cb161d99d853ca99.diff | 3083 +++++++++++++++++ 71 files changed, 6114 insertions(+), 626 deletions(-) create mode 100644 mediapipe/calculators/tflite/tflite_model_calculator.cc create mode 100644 mediapipe/calculators/tflite/tflite_model_calculator_test.cc create mode 100644 mediapipe/calculators/util/collection_has_min_size_calculator_test.cc create mode 100644 mediapipe/examples/android/src/java/com/google/mediapipe/apps/METADATA create mode 100644 mediapipe/framework/profiler/testdata/profile_latency_test.pbtxt create mode 100644 mediapipe/framework/profiler/testdata/profile_process_test.pbtxt create mode 100644 mediapipe/graphs/hair_segmentation/hair_segmentation_desktop_live.pbtxt create mode 100644 third_party/org_tensorflow_9696366bcadab23a25c773b3ed405bac8ded4d0d.diff create mode 100644 third_party/org_tensorflow_cfc31e324c8de6b52f752a39cb161d99d853ca99.diff diff --git a/WORKSPACE b/WORKSPACE index 411f21b94..eb2b07c4d 100644 --- a/WORKSPACE +++ b/WORKSPACE @@ -129,7 +129,11 @@ http_archive( ], # A compatibility patch patches = [ - "@//third_party:org_tensorflow_528e22eae8bf3206189a066032c66e9e5c9b4a61.diff" + "@//third_party:org_tensorflow_528e22eae8bf3206189a066032c66e9e5c9b4a61.diff", + # Updates for XNNPACK: https://github.com/tensorflow/tensorflow/commit/cfc31e324c8de6b52f752a39cb161d99d853ca99 + "@//third_party:org_tensorflow_cfc31e324c8de6b52f752a39cb161d99d853ca99.diff", + # CpuInfo's build rule fixes. + "@//third_party:org_tensorflow_9696366bcadab23a25c773b3ed405bac8ded4d0d.diff", ], patch_args = [ "-p1", diff --git a/mediapipe/calculators/core/BUILD b/mediapipe/calculators/core/BUILD index c3074c2c8..65c426489 100644 --- a/mediapipe/calculators/core/BUILD +++ b/mediapipe/calculators/core/BUILD @@ -228,6 +228,7 @@ cc_library( "//mediapipe/framework:calculator_framework", "//mediapipe/framework:collection_item_id", "//mediapipe/framework:packet", + "//mediapipe/framework/formats:detection_cc_proto", "//mediapipe/framework/formats:landmark_cc_proto", "//mediapipe/framework/formats:rect_cc_proto", "//mediapipe/framework/port:integral_types", @@ -249,6 +250,7 @@ cc_library( "//mediapipe/framework:calculator_framework", "//mediapipe/framework:collection_item_id", "//mediapipe/framework:packet", + "//mediapipe/framework/formats:classification_cc_proto", "//mediapipe/framework/formats:landmark_cc_proto", "//mediapipe/framework/formats:rect_cc_proto", "//mediapipe/framework/port:integral_types", @@ -265,10 +267,11 @@ cc_test( deps = [ ":begin_loop_calculator", ":end_loop_calculator", - "//mediapipe/calculators/core:packet_cloner_calculator", + ":gate_calculator", "//mediapipe/framework:calculator_context", "//mediapipe/framework:calculator_contract", "//mediapipe/framework:calculator_framework", + "//mediapipe/framework:packet", "//mediapipe/framework/port:gtest_main", "//mediapipe/framework/port:integral_types", "//mediapipe/framework/port:parse_text_proto", @@ -334,6 +337,7 @@ cc_library( deps = [ ":clip_vector_size_calculator_cc_proto", "//mediapipe/framework:calculator_framework", + "//mediapipe/framework/formats:detection_cc_proto", "//mediapipe/framework/formats:rect_cc_proto", "//mediapipe/framework/port:ret_check", "//mediapipe/framework/port:status", @@ -693,15 +697,17 @@ cc_test( name = "previous_loopback_calculator_test", srcs = ["previous_loopback_calculator_test.cc"], deps = [ + ":gate_calculator", + ":make_pair_calculator", + ":pass_through_calculator", ":previous_loopback_calculator", - "//mediapipe/calculators/core:make_pair_calculator", - "//mediapipe/calculators/core:pass_through_calculator", "//mediapipe/framework:calculator_framework", "//mediapipe/framework:calculator_runner", "//mediapipe/framework:timestamp", "//mediapipe/framework/port:gtest_main", "//mediapipe/framework/port:integral_types", "//mediapipe/framework/port:parse_text_proto", + "//mediapipe/framework/port:status", "//mediapipe/framework/stream_handler:immediate_input_stream_handler", "//mediapipe/framework/tool:sink", "@com_google_absl//absl/time", @@ -769,6 +775,7 @@ cc_library( visibility = ["//visibility:public"], deps = [ ":split_vector_calculator_cc_proto", + "//mediapipe/framework/formats:detection_cc_proto", "//mediapipe/framework:calculator_framework", "//mediapipe/framework/formats:landmark_cc_proto", "//mediapipe/framework/formats:rect_cc_proto", diff --git a/mediapipe/calculators/core/begin_end_loop_calculator_graph_test.cc b/mediapipe/calculators/core/begin_end_loop_calculator_graph_test.cc index 03e1a4439..716151b69 100644 --- a/mediapipe/calculators/core/begin_end_loop_calculator_graph_test.cc +++ b/mediapipe/calculators/core/begin_end_loop_calculator_graph_test.cc @@ -20,6 +20,8 @@ #include "mediapipe/calculators/core/end_loop_calculator.h" #include "mediapipe/framework/calculator_contract.h" #include "mediapipe/framework/calculator_framework.h" +#include "mediapipe/framework/packet.h" +#include "mediapipe/framework/port/gmock.h" #include "mediapipe/framework/port/gtest.h" #include "mediapipe/framework/port/integral_types.h" #include "mediapipe/framework/port/parse_text_proto.h" @@ -28,6 +30,13 @@ namespace mediapipe { namespace { +MATCHER_P2(PacketOfIntsEq, timestamp, value, "") { + Timestamp actual_timestamp = arg.Timestamp(); + const auto& actual_value = arg.template Get>(); + return testing::Value(actual_timestamp, testing::Eq(timestamp)) && + testing::Value(actual_value, testing::ElementsAreArray(value)); +} + typedef BeginLoopCalculator> BeginLoopIntegerCalculator; REGISTER_CALCULATOR(BeginLoopIntegerCalculator); @@ -59,8 +68,8 @@ REGISTER_CALCULATOR(EndLoopIntegersCalculator); class BeginEndLoopCalculatorGraphTest : public ::testing::Test { protected: - BeginEndLoopCalculatorGraphTest() { - graph_config_ = ParseTextProtoOrDie( + void SetUp() override { + auto graph_config = ParseTextProtoOrDie( R"( num_threads: 4 input_stream: "ints" @@ -82,94 +91,222 @@ class BeginEndLoopCalculatorGraphTest : public ::testing::Test { output_stream: "ITERABLE:ints_plus_one" } )"); - tool::AddVectorSink("ints_plus_one", &graph_config_, &output_packets_); + tool::AddVectorSink("ints_plus_one", &graph_config, &output_packets_); + MP_ASSERT_OK(graph_.Initialize(graph_config)); + MP_ASSERT_OK(graph_.StartRun({})); } - CalculatorGraphConfig graph_config_; + void SendPacketOfInts(Timestamp timestamp, std::vector ints) { + MP_ASSERT_OK(graph_.AddPacketToInputStream( + "ints", MakePacket>(std::move(ints)).At(timestamp))); + } + + CalculatorGraph graph_; std::vector output_packets_; }; +TEST_F(BeginEndLoopCalculatorGraphTest, InputStreamForIterableIsEmpty) { + MP_ASSERT_OK(graph_.WaitUntilIdle()); + + // EndLoopCalc will forward the timestamp bound because there are no packets + // to process. + ASSERT_EQ(0, output_packets_.size()); + + MP_ASSERT_OK(graph_.CloseAllPacketSources()); + MP_ASSERT_OK(graph_.WaitUntilDone()); +} + TEST_F(BeginEndLoopCalculatorGraphTest, SingleEmptyVector) { - CalculatorGraph graph; - MP_EXPECT_OK(graph.Initialize(graph_config_)); - MP_EXPECT_OK(graph.StartRun({})); - auto input_vector = absl::make_unique>(); - Timestamp input_timestamp = Timestamp(0); - MP_ASSERT_OK(graph.AddPacketToInputStream( - "ints", Adopt(input_vector.release()).At(input_timestamp))); - MP_ASSERT_OK(graph.WaitUntilIdle()); + SendPacketOfInts(Timestamp(0), {}); + MP_ASSERT_OK(graph_.WaitUntilIdle()); // EndLoopCalc will forward the timestamp bound because there are no elements // in collection to output. - ASSERT_EQ(0, output_packets_.size()); + EXPECT_TRUE(output_packets_.empty()); - MP_ASSERT_OK(graph.CloseAllPacketSources()); - MP_ASSERT_OK(graph.WaitUntilDone()); + MP_ASSERT_OK(graph_.CloseAllPacketSources()); + MP_ASSERT_OK(graph_.WaitUntilDone()); } TEST_F(BeginEndLoopCalculatorGraphTest, SingleNonEmptyVector) { - CalculatorGraph graph; - MP_EXPECT_OK(graph.Initialize(graph_config_)); - MP_EXPECT_OK(graph.StartRun({})); - auto input_vector = absl::make_unique>(); - input_vector->emplace_back(0); - input_vector->emplace_back(1); - input_vector->emplace_back(2); Timestamp input_timestamp = Timestamp(0); - MP_ASSERT_OK(graph.AddPacketToInputStream( - "ints", Adopt(input_vector.release()).At(input_timestamp))); - MP_ASSERT_OK(graph.WaitUntilIdle()); + SendPacketOfInts(input_timestamp, {0, 1, 2}); + MP_ASSERT_OK(graph_.WaitUntilIdle()); - ASSERT_EQ(1, output_packets_.size()); - EXPECT_EQ(input_timestamp, output_packets_[0].Timestamp()); - std::vector expected_output_vector = {1, 2, 3}; - EXPECT_EQ(expected_output_vector, output_packets_[0].Get>()); + EXPECT_THAT(output_packets_, + testing::ElementsAre( + PacketOfIntsEq(input_timestamp, std::vector{1, 2, 3}))); - MP_ASSERT_OK(graph.CloseAllPacketSources()); - MP_ASSERT_OK(graph.WaitUntilDone()); + MP_ASSERT_OK(graph_.CloseAllPacketSources()); + MP_ASSERT_OK(graph_.WaitUntilDone()); } TEST_F(BeginEndLoopCalculatorGraphTest, MultipleVectors) { - CalculatorGraph graph; - MP_EXPECT_OK(graph.Initialize(graph_config_)); - MP_EXPECT_OK(graph.StartRun({})); - - auto input_vector0 = absl::make_unique>(); - input_vector0->emplace_back(0); - input_vector0->emplace_back(1); Timestamp input_timestamp0 = Timestamp(0); - MP_ASSERT_OK(graph.AddPacketToInputStream( - "ints", Adopt(input_vector0.release()).At(input_timestamp0))); + SendPacketOfInts(input_timestamp0, {0, 1}); - auto input_vector1 = absl::make_unique>(); Timestamp input_timestamp1 = Timestamp(1); - MP_ASSERT_OK(graph.AddPacketToInputStream( - "ints", Adopt(input_vector1.release()).At(input_timestamp1))); + SendPacketOfInts(input_timestamp1, {}); - auto input_vector2 = absl::make_unique>(); - input_vector2->emplace_back(2); - input_vector2->emplace_back(3); Timestamp input_timestamp2 = Timestamp(2); - MP_ASSERT_OK(graph.AddPacketToInputStream( - "ints", Adopt(input_vector2.release()).At(input_timestamp2))); + SendPacketOfInts(input_timestamp2, {2, 3}); - MP_ASSERT_OK(graph.CloseAllPacketSources()); - MP_ASSERT_OK(graph.WaitUntilDone()); - - ASSERT_EQ(2, output_packets_.size()); - - EXPECT_EQ(input_timestamp0, output_packets_[0].Timestamp()); - std::vector expected_output_vector0 = {1, 2}; - EXPECT_EQ(expected_output_vector0, - output_packets_[0].Get>()); + MP_ASSERT_OK(graph_.CloseAllPacketSources()); + MP_ASSERT_OK(graph_.WaitUntilDone()); // At input_timestamp1, EndLoopCalc will forward timestamp bound as there are // no elements in vector to process. + EXPECT_THAT(output_packets_, + testing::ElementsAre( + PacketOfIntsEq(input_timestamp0, std::vector{1, 2}), + PacketOfIntsEq(input_timestamp2, std::vector{3, 4}))); +} - EXPECT_EQ(input_timestamp2, output_packets_[1].Timestamp()); - std::vector expected_output_vector2 = {3, 4}; - EXPECT_EQ(expected_output_vector2, - output_packets_[1].Get>()); +// Passes non empty vector through or outputs empty vector in case of timestamp +// bound update. +class PassThroughOrEmptyVectorCalculator : public CalculatorBase { + public: + static ::mediapipe::Status GetContract(CalculatorContract* cc) { + cc->SetProcessTimestampBounds(true); + cc->Inputs().Index(0).Set>(); + cc->Outputs().Index(0).Set>(); + return ::mediapipe::OkStatus(); + } + + ::mediapipe::Status Open(CalculatorContext* cc) override { + cc->SetOffset(TimestampDiff(0)); + return ::mediapipe::OkStatus(); + } + + ::mediapipe::Status Process(CalculatorContext* cc) override { + if (!cc->Inputs().Index(0).IsEmpty()) { + cc->Outputs().Index(0).AddPacket(cc->Inputs().Index(0).Value()); + } else { + cc->Outputs().Index(0).AddPacket( + MakePacket>(std::vector()) + .At(cc->InputTimestamp())); + } + return ::mediapipe::OkStatus(); + } +}; + +REGISTER_CALCULATOR(PassThroughOrEmptyVectorCalculator); + +class BeginEndLoopCalculatorGraphProcessingEmptyPacketsTest + : public ::testing::Test { + protected: + void SetUp() override { + auto graph_config = ParseTextProtoOrDie( + R"( + num_threads: 4 + input_stream: "ints" + input_stream: "force_ints_to_be_timestamp_bound_update" + node { + calculator: "GateCalculator" + input_stream: "ints" + input_stream: "DISALLOW:force_ints_to_be_timestamp_bound_update" + output_stream: "ints_passed_through" + } + node { + calculator: "BeginLoopIntegerCalculator" + input_stream: "ITERABLE:ints_passed_through" + output_stream: "ITEM:int" + output_stream: "BATCH_END:timestamp" + } + node { + calculator: "IncrementCalculator" + input_stream: "int" + output_stream: "int_plus_one" + } + node { + calculator: "EndLoopIntegersCalculator" + input_stream: "ITEM:int_plus_one" + input_stream: "BATCH_END:timestamp" + output_stream: "ITERABLE:ints_plus_one" + } + node { + calculator: "PassThroughOrEmptyVectorCalculator" + input_stream: "ints_plus_one" + output_stream: "ints_plus_one_passed_through" + } + )"); + tool::AddVectorSink("ints_plus_one_passed_through", &graph_config, + &output_packets_); + MP_ASSERT_OK(graph_.Initialize(graph_config)); + MP_ASSERT_OK(graph_.StartRun({})); + } + + void SendPacketOfIntsOrBound(Timestamp timestamp, std::vector ints) { + // All "ints" packets which are empty are forced to be just timestamp + // bound updates for begin loop calculator. + bool force_ints_to_be_timestamp_bound_update = ints.empty(); + MP_ASSERT_OK(graph_.AddPacketToInputStream( + "force_ints_to_be_timestamp_bound_update", + MakePacket(force_ints_to_be_timestamp_bound_update) + .At(timestamp))); + MP_ASSERT_OK(graph_.AddPacketToInputStream( + "ints", MakePacket>(std::move(ints)).At(timestamp))); + } + + CalculatorGraph graph_; + std::vector output_packets_; +}; + +TEST_F(BeginEndLoopCalculatorGraphProcessingEmptyPacketsTest, + SingleEmptyVector) { + SendPacketOfIntsOrBound(Timestamp(0), {}); + MP_ASSERT_OK(graph_.WaitUntilIdle()); + + EXPECT_THAT(output_packets_, testing::ElementsAre(PacketOfIntsEq( + Timestamp(0), std::vector{}))); + + MP_ASSERT_OK(graph_.CloseAllPacketSources()); + MP_ASSERT_OK(graph_.WaitUntilDone()); +} + +TEST_F(BeginEndLoopCalculatorGraphProcessingEmptyPacketsTest, + SingleNonEmptyVector) { + SendPacketOfIntsOrBound(Timestamp(0), {0, 1, 2}); + MP_ASSERT_OK(graph_.WaitUntilIdle()); + + EXPECT_THAT(output_packets_, testing::ElementsAre(PacketOfIntsEq( + Timestamp(0), std::vector{1, 2, 3}))); + + MP_ASSERT_OK(graph_.CloseAllPacketSources()); + MP_ASSERT_OK(graph_.WaitUntilDone()); +} + +TEST_F(BeginEndLoopCalculatorGraphProcessingEmptyPacketsTest, MultipleVectors) { + SendPacketOfIntsOrBound(Timestamp(0), {}); + // Waiting until idle to guarantee all timestamp bound updates are processed + // individually. (Timestamp bounds updates occur in the provide config only + // if input is an empty vector.) + MP_ASSERT_OK(graph_.WaitUntilIdle()); + + SendPacketOfIntsOrBound(Timestamp(1), {0, 1}); + SendPacketOfIntsOrBound(Timestamp(2), {}); + // Waiting until idle to guarantee all timestamp bound updates are processed + // individually. (Timestamp bounds updates occur in the provide config only + // if input is an empty vector.) + MP_ASSERT_OK(graph_.WaitUntilIdle()); + + SendPacketOfIntsOrBound(Timestamp(3), {2, 3}); + SendPacketOfIntsOrBound(Timestamp(4), {}); + // Waiting until idle to guarantee all timestamp bound updates are processed + // individually. (Timestamp bounds updates occur in the provide config only + // if input is an empty vector.) + MP_ASSERT_OK(graph_.WaitUntilIdle()); + + MP_ASSERT_OK(graph_.CloseAllPacketSources()); + MP_ASSERT_OK(graph_.WaitUntilDone()); + + EXPECT_THAT( + output_packets_, + testing::ElementsAre(PacketOfIntsEq(Timestamp(0), std::vector{}), + PacketOfIntsEq(Timestamp(1), std::vector{1, 2}), + PacketOfIntsEq(Timestamp(2), std::vector{}), + PacketOfIntsEq(Timestamp(3), std::vector{3, 4}), + PacketOfIntsEq(Timestamp(4), std::vector{}))); } class MultiplierCalculator : public CalculatorBase { @@ -199,8 +336,8 @@ REGISTER_CALCULATOR(MultiplierCalculator); class BeginEndLoopCalculatorGraphWithClonedInputsTest : public ::testing::Test { protected: - BeginEndLoopCalculatorGraphWithClonedInputsTest() { - graph_config_ = ParseTextProtoOrDie( + void SetUp() override { + auto graph_config = ParseTextProtoOrDie( R"( num_threads: 4 input_stream: "ints" @@ -226,109 +363,85 @@ class BeginEndLoopCalculatorGraphWithClonedInputsTest : public ::testing::Test { output_stream: "ITERABLE:multiplied_ints" } )"); - tool::AddVectorSink("multiplied_ints", &graph_config_, &output_packets_); + tool::AddVectorSink("multiplied_ints", &graph_config, &output_packets_); + MP_ASSERT_OK(graph_.Initialize(graph_config)); + MP_ASSERT_OK(graph_.StartRun({})); } - CalculatorGraphConfig graph_config_; + void SendPackets(Timestamp timestamp, int multiplier, std::vector ints) { + MP_ASSERT_OK(graph_.AddPacketToInputStream( + "ints", MakePacket>(std::move(ints)).At(timestamp))); + MP_ASSERT_OK(graph_.AddPacketToInputStream( + "multiplier", MakePacket(multiplier).At(timestamp))); + } + + void SendMultiplier(Timestamp timestamp, int multiplier) { + MP_ASSERT_OK(graph_.AddPacketToInputStream( + "multiplier", MakePacket(multiplier).At(timestamp))); + } + + CalculatorGraph graph_; std::vector output_packets_; }; -TEST_F(BeginEndLoopCalculatorGraphWithClonedInputsTest, SingleEmptyVector) { - CalculatorGraph graph; - MP_EXPECT_OK(graph.Initialize(graph_config_)); - MP_EXPECT_OK(graph.StartRun({})); - auto input_vector = absl::make_unique>(); +TEST_F(BeginEndLoopCalculatorGraphWithClonedInputsTest, + InputStreamForIterableIsEmpty) { Timestamp input_timestamp = Timestamp(42); - MP_ASSERT_OK(graph.AddPacketToInputStream( - "ints", Adopt(input_vector.release()).At(input_timestamp))); - auto multiplier = absl::make_unique(2); - MP_ASSERT_OK(graph.AddPacketToInputStream( - "multiplier", Adopt(multiplier.release()).At(input_timestamp))); - MP_ASSERT_OK(graph.WaitUntilIdle()); + SendMultiplier(input_timestamp, /*multiplier=*/2); + MP_ASSERT_OK(graph_.WaitUntilIdle()); + + // EndLoopCalc will forward the timestamp bound because there are no packets + // to process. + ASSERT_EQ(0, output_packets_.size()); + + MP_ASSERT_OK(graph_.CloseAllPacketSources()); + MP_ASSERT_OK(graph_.WaitUntilDone()); +} + +TEST_F(BeginEndLoopCalculatorGraphWithClonedInputsTest, SingleEmptyVector) { + SendPackets(Timestamp(0), /*multiplier=*/2, /*ints=*/{}); + MP_ASSERT_OK(graph_.WaitUntilIdle()); // EndLoopCalc will forward the timestamp bound because there are no elements // in collection to output. - ASSERT_EQ(0, output_packets_.size()); + EXPECT_TRUE(output_packets_.empty()); - MP_ASSERT_OK(graph.CloseAllPacketSources()); - MP_ASSERT_OK(graph.WaitUntilDone()); + MP_ASSERT_OK(graph_.CloseAllPacketSources()); + MP_ASSERT_OK(graph_.WaitUntilDone()); } TEST_F(BeginEndLoopCalculatorGraphWithClonedInputsTest, SingleNonEmptyVector) { - CalculatorGraph graph; - MP_EXPECT_OK(graph.Initialize(graph_config_)); - MP_EXPECT_OK(graph.StartRun({})); - auto input_vector = absl::make_unique>(); - input_vector->emplace_back(0); - input_vector->emplace_back(1); - input_vector->emplace_back(2); Timestamp input_timestamp = Timestamp(42); - MP_ASSERT_OK(graph.AddPacketToInputStream( - "ints", Adopt(input_vector.release()).At(input_timestamp))); - auto multiplier = absl::make_unique(2); - MP_ASSERT_OK(graph.AddPacketToInputStream( - "multiplier", Adopt(multiplier.release()).At(input_timestamp))); - MP_ASSERT_OK(graph.WaitUntilIdle()); + SendPackets(input_timestamp, /*multiplier=*/2, /*ints=*/{0, 1, 2}); + MP_ASSERT_OK(graph_.WaitUntilIdle()); - ASSERT_EQ(1, output_packets_.size()); - EXPECT_EQ(input_timestamp, output_packets_[0].Timestamp()); - std::vector expected_output_vector = {0, 2, 4}; - EXPECT_EQ(expected_output_vector, output_packets_[0].Get>()); + EXPECT_THAT(output_packets_, + testing::ElementsAre( + PacketOfIntsEq(input_timestamp, std::vector{0, 2, 4}))); - MP_ASSERT_OK(graph.CloseAllPacketSources()); - MP_ASSERT_OK(graph.WaitUntilDone()); + MP_ASSERT_OK(graph_.CloseAllPacketSources()); + MP_ASSERT_OK(graph_.WaitUntilDone()); } TEST_F(BeginEndLoopCalculatorGraphWithClonedInputsTest, MultipleVectors) { - CalculatorGraph graph; - MP_EXPECT_OK(graph.Initialize(graph_config_)); - MP_EXPECT_OK(graph.StartRun({})); - - auto input_vector0 = absl::make_unique>(); - input_vector0->emplace_back(0); - input_vector0->emplace_back(1); Timestamp input_timestamp0 = Timestamp(42); - MP_ASSERT_OK(graph.AddPacketToInputStream( - "ints", Adopt(input_vector0.release()).At(input_timestamp0))); - auto multiplier0 = absl::make_unique(2); - MP_ASSERT_OK(graph.AddPacketToInputStream( - "multiplier", Adopt(multiplier0.release()).At(input_timestamp0))); + SendPackets(input_timestamp0, /*multiplier=*/2, /*ints=*/{0, 1}); - auto input_vector1 = absl::make_unique>(); Timestamp input_timestamp1 = Timestamp(43); - MP_ASSERT_OK(graph.AddPacketToInputStream( - "ints", Adopt(input_vector1.release()).At(input_timestamp1))); - auto multiplier1 = absl::make_unique(2); - MP_ASSERT_OK(graph.AddPacketToInputStream( - "multiplier", Adopt(multiplier1.release()).At(input_timestamp1))); + SendPackets(input_timestamp1, /*multiplier=*/2, /*ints=*/{}); - auto input_vector2 = absl::make_unique>(); - input_vector2->emplace_back(2); - input_vector2->emplace_back(3); Timestamp input_timestamp2 = Timestamp(44); - MP_ASSERT_OK(graph.AddPacketToInputStream( - "ints", Adopt(input_vector2.release()).At(input_timestamp2))); - auto multiplier2 = absl::make_unique(3); - MP_ASSERT_OK(graph.AddPacketToInputStream( - "multiplier", Adopt(multiplier2.release()).At(input_timestamp2))); + SendPackets(input_timestamp2, /*multiplier=*/3, /*ints=*/{2, 3}); - MP_ASSERT_OK(graph.CloseAllPacketSources()); - MP_ASSERT_OK(graph.WaitUntilDone()); - - ASSERT_EQ(2, output_packets_.size()); - - EXPECT_EQ(input_timestamp0, output_packets_[0].Timestamp()); - std::vector expected_output_vector0 = {0, 2}; - EXPECT_EQ(expected_output_vector0, - output_packets_[0].Get>()); + MP_ASSERT_OK(graph_.CloseAllPacketSources()); + MP_ASSERT_OK(graph_.WaitUntilDone()); // At input_timestamp1, EndLoopCalc will forward timestamp bound as there are // no elements in vector to process. - - EXPECT_EQ(input_timestamp2, output_packets_[1].Timestamp()); - std::vector expected_output_vector2 = {6, 9}; - EXPECT_EQ(expected_output_vector2, - output_packets_[1].Get>()); + EXPECT_THAT(output_packets_, + testing::ElementsAre( + PacketOfIntsEq(input_timestamp0, std::vector{0, 2}), + PacketOfIntsEq(input_timestamp2, std::vector{6, 9}))); } } // namespace diff --git a/mediapipe/calculators/core/begin_loop_calculator.cc b/mediapipe/calculators/core/begin_loop_calculator.cc index 6c1ac20bf..bd4e554e1 100644 --- a/mediapipe/calculators/core/begin_loop_calculator.cc +++ b/mediapipe/calculators/core/begin_loop_calculator.cc @@ -16,6 +16,7 @@ #include +#include "mediapipe/framework/formats/detection.pb.h" #include "mediapipe/framework/formats/landmark.pb.h" #include "mediapipe/framework/formats/rect.pb.h" @@ -31,4 +32,9 @@ typedef BeginLoopCalculator> BeginLoopNormalizedRectCalculator; REGISTER_CALCULATOR(BeginLoopNormalizedRectCalculator); +// A calculator to process std::vector. +typedef BeginLoopCalculator> + BeginLoopDetectionCalculator; +REGISTER_CALCULATOR(BeginLoopDetectionCalculator); + } // namespace mediapipe diff --git a/mediapipe/calculators/core/begin_loop_calculator.h b/mediapipe/calculators/core/begin_loop_calculator.h index 7258b4bf7..ec59e1012 100644 --- a/mediapipe/calculators/core/begin_loop_calculator.h +++ b/mediapipe/calculators/core/begin_loop_calculator.h @@ -52,20 +52,28 @@ namespace mediapipe { // output_stream: "OUTPUT:aggregated_result" # IterableU @ext_ts // } // -// BeginLoopCalculator accepts an optional input stream tagged with "TICK" -// which if non-empty, wakes up the calculator and calls -// BeginLoopCalculator::Process(). Input streams tagged with "CLONE" are cloned -// to the corresponding output streams at loop timestamps. This ensures that a -// MediaPipe graph or sub-graph can run multiple times, once per element in the -// "ITERABLE" for each pakcet clone of the packets in the "CLONE" input streams. +// Input streams tagged with "CLONE" are cloned to the corresponding output +// streams at loop timestamps. This ensures that a MediaPipe graph or sub-graph +// can run multiple times, once per element in the "ITERABLE" for each pakcet +// clone of the packets in the "CLONE" input streams. template class BeginLoopCalculator : public CalculatorBase { using ItemT = typename IterableT::value_type; public: static ::mediapipe::Status GetContract(CalculatorContract* cc) { + // The below enables processing of timestamp bound updates, and that enables + // correct timestamp propagation by the companion EndLoopCalculator. + // + // For instance, Process() function will be still invoked even if upstream + // calculator has updated timestamp bound for ITERABLE input instead of + // providing actual value. + cc->SetProcessTimestampBounds(true); + // A non-empty packet in the optional "TICK" input stream wakes up the // calculator. + // DEPRECATED as timestamp bound updates are processed by default in this + // calculator. if (cc->Inputs().HasTag("TICK")) { cc->Inputs().Tag("TICK").SetAny(); } diff --git a/mediapipe/calculators/core/clip_vector_size_calculator.cc b/mediapipe/calculators/core/clip_vector_size_calculator.cc index 388cc3a6a..89ac0b9ef 100644 --- a/mediapipe/calculators/core/clip_vector_size_calculator.cc +++ b/mediapipe/calculators/core/clip_vector_size_calculator.cc @@ -17,6 +17,7 @@ #include #include "mediapipe/framework/calculator_framework.h" +#include "mediapipe/framework/formats/detection.pb.h" #include "mediapipe/framework/formats/rect.pb.h" namespace mediapipe { @@ -25,4 +26,8 @@ typedef ClipVectorSizeCalculator<::mediapipe::NormalizedRect> ClipNormalizedRectVectorSizeCalculator; REGISTER_CALCULATOR(ClipNormalizedRectVectorSizeCalculator); +typedef ClipVectorSizeCalculator<::mediapipe::Detection> + ClipDetectionVectorSizeCalculator; +REGISTER_CALCULATOR(ClipDetectionVectorSizeCalculator); + } // namespace mediapipe diff --git a/mediapipe/calculators/core/end_loop_calculator.cc b/mediapipe/calculators/core/end_loop_calculator.cc index e27ab11ea..61e8c6ac0 100644 --- a/mediapipe/calculators/core/end_loop_calculator.cc +++ b/mediapipe/calculators/core/end_loop_calculator.cc @@ -16,6 +16,7 @@ #include +#include "mediapipe/framework/formats/classification.pb.h" #include "mediapipe/framework/formats/landmark.pb.h" #include "mediapipe/framework/formats/rect.pb.h" #include "mediapipe/util/render_data.pb.h" @@ -37,4 +38,8 @@ typedef EndLoopCalculator> EndLoopRenderDataCalculator; REGISTER_CALCULATOR(EndLoopRenderDataCalculator); +typedef EndLoopCalculator> + EndLoopClassificationListCalculator; +REGISTER_CALCULATOR(EndLoopClassificationListCalculator); + } // namespace mediapipe diff --git a/mediapipe/calculators/core/previous_loopback_calculator.cc b/mediapipe/calculators/core/previous_loopback_calculator.cc index f9abb35a2..feefd6a56 100644 --- a/mediapipe/calculators/core/previous_loopback_calculator.cc +++ b/mediapipe/calculators/core/previous_loopback_calculator.cc @@ -25,13 +25,17 @@ namespace mediapipe { // together with some previous output. // // For the first packet that arrives on the MAIN input, the timestamp bound is -// advanced on the output. Downstream calculators will see this as an empty +// advanced on the PREV_LOOP. Downstream calculators will see this as an empty // packet. This way they are not kept waiting for the previous output, which // for the first iteration does not exist. // -// Thereafter, each packet received on MAIN is matched with a packet received -// on LOOP; the LOOP packet's timestamp is changed to that of the MAIN packet, -// and it is output on PREV_LOOP. +// Thereafter, +// - Each non-empty MAIN packet results in: +// a) a PREV_LOOP packet with contents of the LOOP packet received at the +// timestamp of the previous non-empty MAIN packet +// b) or in a PREV_LOOP timestamp bound update if the LOOP packet was empty. +// - Each empty MAIN packet indicating timestamp bound update results in a +// PREV_LOOP timestamp bound update. // // Example config: // node { @@ -56,83 +60,115 @@ class PreviousLoopbackCalculator : public CalculatorBase { // TODO: an optional PREV_TIMESTAMP output could be added to // carry the original timestamp of the packet on PREV_LOOP. cc->SetInputStreamHandler("ImmediateInputStreamHandler"); + // Process() function is invoked in response to MAIN/LOOP stream timestamp + // bound updates. + cc->SetProcessTimestampBounds(true); return ::mediapipe::OkStatus(); } ::mediapipe::Status Open(CalculatorContext* cc) final { main_id_ = cc->Inputs().GetId("MAIN", 0); loop_id_ = cc->Inputs().GetId("LOOP", 0); - loop_out_id_ = cc->Outputs().GetId("PREV_LOOP", 0); + prev_loop_id_ = cc->Outputs().GetId("PREV_LOOP", 0); cc->Outputs() - .Get(loop_out_id_) + .Get(prev_loop_id_) .SetHeader(cc->Inputs().Get(loop_id_).Header()); - - // Use an empty packet for the first round, since there is no previous - // output. - loopback_packets_.push_back({}); - return ::mediapipe::OkStatus(); } ::mediapipe::Status Process(CalculatorContext* cc) final { - Packet& main_packet = cc->Inputs().Get(main_id_).Value(); - if (!main_packet.IsEmpty()) { - main_ts_.push_back(main_packet.Timestamp()); - } - Packet& loopback_packet = cc->Inputs().Get(loop_id_).Value(); - if (!loopback_packet.IsEmpty()) { - loopback_packets_.push_back(loopback_packet); - while (!main_ts_.empty() && - main_ts_.front() <= loopback_packets_.front().Timestamp()) { - main_ts_.pop_front(); - } - } - auto& loop_out = cc->Outputs().Get(loop_out_id_); + // Non-empty packets and empty packets indicating timestamp bound updates + // are guaranteed to have timestamps greater than timestamps of previous + // packets within the same stream. Calculator tracks and operates on such + // packets. - while (!main_ts_.empty() && !loopback_packets_.empty()) { - Timestamp main_timestamp = main_ts_.front(); - main_ts_.pop_front(); - Packet previous_loopback = loopback_packets_.front().At(main_timestamp); - loopback_packets_.pop_front(); - - if (previous_loopback.IsEmpty()) { - // TODO: SetCompleteTimestampBound would be more useful. - loop_out.SetNextTimestampBound(main_timestamp + 1); + const Packet& main_packet = cc->Inputs().Get(main_id_).Value(); + if (prev_main_ts_ < main_packet.Timestamp()) { + Timestamp loop_timestamp; + if (!main_packet.IsEmpty()) { + loop_timestamp = prev_non_empty_main_ts_; + prev_non_empty_main_ts_ = main_packet.Timestamp(); } else { - loop_out.AddPacket(std::move(previous_loopback)); + // Calculator advances PREV_LOOP timestamp bound in response to empty + // MAIN packet, hence not caring about corresponding loop packet. + loop_timestamp = Timestamp::Unset(); + } + main_packet_specs_.push_back({.timestamp = main_packet.Timestamp(), + .loop_timestamp = loop_timestamp}); + prev_main_ts_ = main_packet.Timestamp(); + } + + const Packet& loop_packet = cc->Inputs().Get(loop_id_).Value(); + if (prev_loop_ts_ < loop_packet.Timestamp()) { + loop_packets_.push_back(loop_packet); + prev_loop_ts_ = loop_packet.Timestamp(); + } + + auto& prev_loop = cc->Outputs().Get(prev_loop_id_); + while (!main_packet_specs_.empty() && !loop_packets_.empty()) { + // The earliest MAIN packet. + const MainPacketSpec& main_spec = main_packet_specs_.front(); + // The earliest LOOP packet. + const Packet& loop_candidate = loop_packets_.front(); + // Match LOOP and MAIN packets. + if (main_spec.loop_timestamp < loop_candidate.Timestamp()) { + // No LOOP packet can match the MAIN packet under review. + prev_loop.SetNextTimestampBound(main_spec.timestamp + 1); + main_packet_specs_.pop_front(); + } else if (main_spec.loop_timestamp > loop_candidate.Timestamp()) { + // No MAIN packet can match the LOOP packet under review. + loop_packets_.pop_front(); + } else { + // Exact match found. + if (loop_candidate.IsEmpty()) { + // However, LOOP packet is empty. + prev_loop.SetNextTimestampBound(main_spec.timestamp + 1); + } else { + prev_loop.AddPacket(loop_candidate.At(main_spec.timestamp)); + } + loop_packets_.pop_front(); + main_packet_specs_.pop_front(); } } - // In case of an empty loopback input, the next timestamp bound for - // loopback input is the loopback timestamp + 1. The next timestamp bound - // for output is set and the main_ts_ vector is truncated accordingly. - if (loopback_packet.IsEmpty() && - loopback_packet.Timestamp() != Timestamp::Unstarted()) { - Timestamp loopback_bound = - loopback_packet.Timestamp().NextAllowedInStream(); - while (!main_ts_.empty() && main_ts_.front() <= loopback_bound) { - main_ts_.pop_front(); - } - if (main_ts_.empty()) { - loop_out.SetNextTimestampBound(loopback_bound.NextAllowedInStream()); - } - } - if (!main_ts_.empty()) { - loop_out.SetNextTimestampBound(main_ts_.front()); - } - if (cc->Inputs().Get(main_id_).IsDone() && main_ts_.empty()) { - loop_out.Close(); + if (main_packet_specs_.empty() && cc->Inputs().Get(main_id_).IsDone()) { + prev_loop.Close(); } return ::mediapipe::OkStatus(); } private: + struct MainPacketSpec { + Timestamp timestamp; + // Expected timestamp of the packet from LOOP stream that corresponds to the + // packet from MAIN stream descirbed by this spec. + Timestamp loop_timestamp; + }; + CollectionItemId main_id_; CollectionItemId loop_id_; - CollectionItemId loop_out_id_; + CollectionItemId prev_loop_id_; - std::deque main_ts_; - std::deque loopback_packets_; + // Contains specs for MAIN packets which only can be: + // - non-empty packets + // - empty packets indicating timestamp bound updates + // + // Sorted according to packet timestamps. + std::deque main_packet_specs_; + Timestamp prev_main_ts_ = Timestamp::Unstarted(); + Timestamp prev_non_empty_main_ts_ = Timestamp::Unstarted(); + + // Contains LOOP packets which only can be: + // - the very first empty packet + // - non empty packets + // - empty packets indicating timestamp bound updates + // + // Sorted according to packet timestamps. + std::deque loop_packets_; + // Using "Timestamp::Unset" instead of "Timestamp::Unstarted" in order to + // allow addition of the very first empty packet (which doesn't indicate + // timestamp bound change necessarily). + Timestamp prev_loop_ts_ = Timestamp::Unset(); }; REGISTER_CALCULATOR(PreviousLoopbackCalculator); diff --git a/mediapipe/calculators/core/previous_loopback_calculator_test.cc b/mediapipe/calculators/core/previous_loopback_calculator_test.cc index 5ef98257f..0fabacd57 100644 --- a/mediapipe/calculators/core/previous_loopback_calculator_test.cc +++ b/mediapipe/calculators/core/previous_loopback_calculator_test.cc @@ -13,6 +13,7 @@ // limitations under the License. #include +#include #include #include #include @@ -25,12 +26,17 @@ #include "mediapipe/framework/port/gtest.h" #include "mediapipe/framework/port/integral_types.h" #include "mediapipe/framework/port/parse_text_proto.h" +#include "mediapipe/framework/port/status.h" #include "mediapipe/framework/port/status_matchers.h" #include "mediapipe/framework/timestamp.h" #include "mediapipe/framework/tool/sink.h" namespace mediapipe { +using ::testing::ElementsAre; +using ::testing::Eq; +using ::testing::Pair; +using ::testing::Value; namespace { // Returns the timestamp values for a vector of Packets. @@ -43,6 +49,23 @@ std::vector TimestampValues(const std::vector& packets) { return result; } +MATCHER(EmptyPacket, negation ? "isn't empty" : "is empty") { + if (arg.IsEmpty()) { + return true; + } + return false; +} + +MATCHER_P(IntPacket, value, "") { + return Value(arg.template Get(), Eq(value)); +} + +MATCHER_P2(PairPacket, timestamp, pair, "") { + Timestamp actual_timestamp = arg.Timestamp(); + const auto& actual_pair = arg.template Get>(); + return Value(actual_timestamp, Eq(timestamp)) && Value(actual_pair, pair); +} + TEST(PreviousLoopbackCalculator, CorrectTimestamps) { std::vector in_prev; CalculatorGraphConfig graph_config_ = @@ -81,32 +104,30 @@ TEST(PreviousLoopbackCalculator, CorrectTimestamps) { MP_EXPECT_OK(graph_.AddPacketToInputStream( input_name, MakePacket(n).At(Timestamp(n)))); }; - auto pair_values = [](const Packet& packet) { - auto pair = packet.Get>(); - int first = pair.first.IsEmpty() ? -1 : pair.first.Get(); - int second = pair.second.IsEmpty() ? -1 : pair.second.Get(); - return std::make_pair(first, second); - }; send_packet("in", 1); MP_EXPECT_OK(graph_.WaitUntilIdle()); - EXPECT_EQ(TimestampValues(in_prev), (std::vector{1})); - EXPECT_EQ(pair_values(in_prev.back()), std::make_pair(1, -1)); + EXPECT_THAT(TimestampValues(in_prev), ElementsAre(1)); + EXPECT_THAT(in_prev.back(), + PairPacket(Timestamp(1), Pair(IntPacket(1), EmptyPacket()))); send_packet("in", 2); MP_EXPECT_OK(graph_.WaitUntilIdle()); - EXPECT_EQ(TimestampValues(in_prev), (std::vector{1, 2})); - EXPECT_EQ(pair_values(in_prev.back()), std::make_pair(2, 1)); + EXPECT_THAT(TimestampValues(in_prev), ElementsAre(1, 2)); + EXPECT_THAT(in_prev.back(), + PairPacket(Timestamp(2), Pair(IntPacket(2), IntPacket(1)))); send_packet("in", 5); MP_EXPECT_OK(graph_.WaitUntilIdle()); - EXPECT_EQ(TimestampValues(in_prev), (std::vector{1, 2, 5})); - EXPECT_EQ(pair_values(in_prev.back()), std::make_pair(5, 2)); + EXPECT_THAT(TimestampValues(in_prev), ElementsAre(1, 2, 5)); + EXPECT_THAT(in_prev.back(), + PairPacket(Timestamp(5), Pair(IntPacket(5), IntPacket(2)))); send_packet("in", 15); MP_EXPECT_OK(graph_.WaitUntilIdle()); - EXPECT_EQ(TimestampValues(in_prev), (std::vector{1, 2, 5, 15})); - EXPECT_EQ(pair_values(in_prev.back()), std::make_pair(15, 5)); + EXPECT_THAT(TimestampValues(in_prev), ElementsAre(1, 2, 5, 15)); + EXPECT_THAT(in_prev.back(), + PairPacket(Timestamp(15), Pair(IntPacket(15), IntPacket(5)))); MP_EXPECT_OK(graph_.CloseAllInputStreams()); MP_EXPECT_OK(graph_.WaitUntilDone()); @@ -185,24 +206,24 @@ TEST(PreviousLoopbackCalculator, ClosesCorrectly) { send_packet("in", 1); MP_EXPECT_OK(graph_.WaitUntilIdle()); - EXPECT_EQ(TimestampValues(outputs), (std::vector{1})); + EXPECT_THAT(TimestampValues(outputs), ElementsAre(1)); send_packet("in", 2); MP_EXPECT_OK(graph_.WaitUntilIdle()); - EXPECT_EQ(TimestampValues(outputs), (std::vector{1, 2})); + EXPECT_THAT(TimestampValues(outputs), ElementsAre(1, 2)); send_packet("in", 5); MP_EXPECT_OK(graph_.WaitUntilIdle()); - EXPECT_EQ(TimestampValues(outputs), (std::vector{1, 2, 5})); + EXPECT_THAT(TimestampValues(outputs), ElementsAre(1, 2, 5)); send_packet("in", 15); MP_EXPECT_OK(graph_.WaitUntilIdle()); - EXPECT_EQ(TimestampValues(outputs), (std::vector{1, 2, 5, 15})); + EXPECT_THAT(TimestampValues(outputs), ElementsAre(1, 2, 5, 15)); MP_EXPECT_OK(graph_.CloseAllInputStreams()); MP_EXPECT_OK(graph_.WaitUntilIdle()); - EXPECT_EQ(TimestampValues(outputs), - (std::vector{1, 2, 5, 15, Timestamp::Max().Value()})); + EXPECT_THAT(TimestampValues(outputs), + ElementsAre(1, 2, 5, 15, Timestamp::Max().Value())); MP_EXPECT_OK(graph_.WaitUntilDone()); } @@ -247,16 +268,12 @@ TEST(PreviousLoopbackCalculator, EmptyLoopForever) { input_name, MakePacket(n).At(Timestamp(n)))); }; - send_packet("in", 0); - MP_EXPECT_OK(graph_.WaitUntilIdle()); - EXPECT_EQ(TimestampValues(outputs), (std::vector{0})); - - for (int main_ts = 1; main_ts < 50; ++main_ts) { + for (int main_ts = 0; main_ts < 50; ++main_ts) { send_packet("in", main_ts); MP_EXPECT_OK(graph_.WaitUntilIdle()); std::vector ts_values = TimestampValues(outputs); EXPECT_EQ(ts_values.size(), main_ts + 1); - for (int j = 0; j < main_ts; ++j) { + for (int j = 0; j < main_ts + 1; ++j) { EXPECT_EQ(ts_values[j], j); } } @@ -266,5 +283,487 @@ TEST(PreviousLoopbackCalculator, EmptyLoopForever) { MP_EXPECT_OK(graph_.WaitUntilDone()); } +class PreviousLoopbackCalculatorProcessingTimestampsTest + : public testing::Test { + protected: + void SetUp() override { + CalculatorGraphConfig graph_config = + ParseTextProtoOrDie(R"( + input_stream: 'input' + input_stream: 'force_main_empty' + input_stream: 'force_loop_empty' + # Used to indicate "main" timestamp bound updates. + node { + calculator: 'GateCalculator' + input_stream: 'input' + input_stream: 'DISALLOW:force_main_empty' + output_stream: 'main' + } + node { + calculator: 'PreviousLoopbackCalculator' + input_stream: 'MAIN:main' + input_stream: 'LOOP:loop' + input_stream_info: { tag_index: 'LOOP' back_edge: true } + output_stream: 'PREV_LOOP:prev_loop' + } + node { + calculator: 'PassThroughCalculator' + input_stream: 'input' + input_stream: 'prev_loop' + output_stream: 'passed_through_input' + output_stream: 'passed_through_prev_loop' + } + # Used to indicate "loop" timestamp bound updates. + node { + calculator: 'GateCalculator' + input_stream: 'input' + input_stream: 'DISALLOW:force_loop_empty' + output_stream: 'loop' + } + node { + calculator: 'MakePairCalculator' + input_stream: 'passed_through_input' + input_stream: 'passed_through_prev_loop' + output_stream: 'passed_through_input_and_prev_loop' + } + )"); + tool::AddVectorSink("passed_through_input_and_prev_loop", &graph_config, + &output_packets_); + MP_ASSERT_OK(graph_.Initialize(graph_config, {})); + MP_ASSERT_OK(graph_.StartRun({})); + } + + void SendPackets(int timestamp, int input, bool force_main_empty, + bool force_loop_empty) { + MP_ASSERT_OK(graph_.AddPacketToInputStream( + "input", MakePacket(input).At(Timestamp(timestamp)))); + MP_ASSERT_OK(graph_.AddPacketToInputStream( + "force_main_empty", + MakePacket(force_main_empty).At(Timestamp(timestamp)))); + MP_ASSERT_OK(graph_.AddPacketToInputStream( + "force_loop_empty", + MakePacket(force_loop_empty).At(Timestamp(timestamp)))); + } + + CalculatorGraph graph_; + std::vector output_packets_; +}; + +TEST_F(PreviousLoopbackCalculatorProcessingTimestampsTest, + MultiplePacketsEmptyMainNonEmptyLoop) { + SendPackets(/*timestamp=*/1, /*input=*/1, /*force_main_empty=*/true, + /*force_loop_empty=*/false); + MP_EXPECT_OK(graph_.WaitUntilIdle()); + EXPECT_THAT( + output_packets_, + ElementsAre(PairPacket(Timestamp(1), Pair(IntPacket(1), EmptyPacket())))); + + SendPackets(/*timestamp=*/2, /*input=*/2, /*force_main_empty=*/true, + /*force_loop_empty=*/false); + MP_EXPECT_OK(graph_.WaitUntilIdle()); + EXPECT_THAT( + output_packets_, + ElementsAre(PairPacket(Timestamp(1), Pair(IntPacket(1), EmptyPacket())), + PairPacket(Timestamp(2), Pair(IntPacket(2), EmptyPacket())))); + + SendPackets(/*timestamp=*/3, /*input=*/3, /*force_main_empty=*/true, + /*force_loop_empty=*/false); + MP_EXPECT_OK(graph_.WaitUntilIdle()); + EXPECT_THAT( + output_packets_, + ElementsAre(PairPacket(Timestamp(1), Pair(IntPacket(1), EmptyPacket())), + PairPacket(Timestamp(2), Pair(IntPacket(2), EmptyPacket())), + PairPacket(Timestamp(3), Pair(IntPacket(3), EmptyPacket())))); + + SendPackets(/*timestamp=*/5, /*input=*/5, /*force_main_empty=*/true, + /*force_loop_empty=*/false); + MP_EXPECT_OK(graph_.WaitUntilIdle()); + EXPECT_THAT( + output_packets_, + ElementsAre(PairPacket(Timestamp(1), Pair(IntPacket(1), EmptyPacket())), + PairPacket(Timestamp(2), Pair(IntPacket(2), EmptyPacket())), + PairPacket(Timestamp(3), Pair(IntPacket(3), EmptyPacket())), + PairPacket(Timestamp(5), Pair(IntPacket(5), EmptyPacket())))); + + SendPackets(/*timestamp=*/15, /*input=*/15, + /*force_main_empty=*/true, + /*force_loop_empty=*/false); + MP_EXPECT_OK(graph_.WaitUntilIdle()); + EXPECT_THAT( + output_packets_, + ElementsAre( + PairPacket(Timestamp(1), Pair(IntPacket(1), EmptyPacket())), + PairPacket(Timestamp(2), Pair(IntPacket(2), EmptyPacket())), + PairPacket(Timestamp(3), Pair(IntPacket(3), EmptyPacket())), + PairPacket(Timestamp(5), Pair(IntPacket(5), EmptyPacket())), + PairPacket(Timestamp(15), Pair(IntPacket(15), EmptyPacket())))); + + MP_EXPECT_OK(graph_.CloseAllInputStreams()); + MP_EXPECT_OK(graph_.WaitUntilDone()); +} + +TEST_F(PreviousLoopbackCalculatorProcessingTimestampsTest, + MultiplePacketsNonEmptyMainEmptyLoop) { + SendPackets(/*timestamp=*/1, /*input=*/1, + /*force_main_empty=*/false, + /*force_loop_empty=*/true); + MP_EXPECT_OK(graph_.WaitUntilIdle()); + EXPECT_THAT( + output_packets_, + ElementsAre(PairPacket(Timestamp(1), Pair(IntPacket(1), EmptyPacket())))); + + SendPackets(/*timestamp=*/2, /*input=*/2, + /*force_main_empty=*/false, + /*force_loop_empty=*/true); + MP_EXPECT_OK(graph_.WaitUntilIdle()); + EXPECT_THAT( + output_packets_, + ElementsAre(PairPacket(Timestamp(1), Pair(IntPacket(1), EmptyPacket())), + PairPacket(Timestamp(2), Pair(IntPacket(2), EmptyPacket())))); + + SendPackets(/*timestamp=*/3, /*input=*/3, + /*force_main_empty=*/false, + /*force_loop_empty=*/true); + MP_EXPECT_OK(graph_.WaitUntilIdle()); + EXPECT_THAT( + output_packets_, + ElementsAre(PairPacket(Timestamp(1), Pair(IntPacket(1), EmptyPacket())), + PairPacket(Timestamp(2), Pair(IntPacket(2), EmptyPacket())), + PairPacket(Timestamp(3), Pair(IntPacket(3), EmptyPacket())))); + + SendPackets(/*timestamp=*/5, /*input=*/5, + /*force_main_empty=*/false, + /*force_loop_empty=*/true); + MP_EXPECT_OK(graph_.WaitUntilIdle()); + EXPECT_THAT( + output_packets_, + ElementsAre(PairPacket(Timestamp(1), Pair(IntPacket(1), EmptyPacket())), + PairPacket(Timestamp(2), Pair(IntPacket(2), EmptyPacket())), + PairPacket(Timestamp(3), Pair(IntPacket(3), EmptyPacket())), + PairPacket(Timestamp(5), Pair(IntPacket(5), EmptyPacket())))); + + SendPackets(/*timestamp=*/15, /*input=*/15, + /*force_main_empty=*/false, + /*force_loop_empty=*/true); + MP_EXPECT_OK(graph_.WaitUntilIdle()); + EXPECT_THAT( + output_packets_, + ElementsAre( + PairPacket(Timestamp(1), Pair(IntPacket(1), EmptyPacket())), + PairPacket(Timestamp(2), Pair(IntPacket(2), EmptyPacket())), + PairPacket(Timestamp(3), Pair(IntPacket(3), EmptyPacket())), + PairPacket(Timestamp(5), Pair(IntPacket(5), EmptyPacket())), + PairPacket(Timestamp(15), Pair(IntPacket(15), EmptyPacket())))); + + MP_EXPECT_OK(graph_.CloseAllInputStreams()); + MP_EXPECT_OK(graph_.WaitUntilDone()); +} + +TEST_F(PreviousLoopbackCalculatorProcessingTimestampsTest, + MultiplePacketsAlteringMainNonEmptyLoop) { + SendPackets(/*timestamp=*/1, /*input=*/1, + /*force_main_empty=*/false, + /*force_loop_empty=*/false); + MP_EXPECT_OK(graph_.WaitUntilIdle()); + EXPECT_THAT( + output_packets_, + ElementsAre(PairPacket(Timestamp(1), Pair(IntPacket(1), EmptyPacket())))); + + SendPackets(/*timestamp=*/2, /*input=*/2, /*force_main_empty=*/true, + /*force_loop_empty=*/false); + MP_EXPECT_OK(graph_.WaitUntilIdle()); + EXPECT_THAT( + output_packets_, + ElementsAre(PairPacket(Timestamp(1), Pair(IntPacket(1), EmptyPacket())), + PairPacket(Timestamp(2), Pair(IntPacket(2), EmptyPacket())))); + + SendPackets(/*timestamp=*/3, /*input=*/3, + /*force_main_empty=*/false, + /*force_loop_empty=*/false); + MP_EXPECT_OK(graph_.WaitUntilIdle()); + EXPECT_THAT( + output_packets_, + ElementsAre(PairPacket(Timestamp(1), Pair(IntPacket(1), EmptyPacket())), + PairPacket(Timestamp(2), Pair(IntPacket(2), EmptyPacket())), + PairPacket(Timestamp(3), Pair(IntPacket(3), IntPacket(1))))); + + SendPackets(/*timestamp=*/5, /*input=*/5, + /*force_main_empty=*/false, + /*force_loop_empty=*/false); + MP_EXPECT_OK(graph_.WaitUntilIdle()); + EXPECT_THAT( + output_packets_, + ElementsAre(PairPacket(Timestamp(1), Pair(IntPacket(1), EmptyPacket())), + PairPacket(Timestamp(2), Pair(IntPacket(2), EmptyPacket())), + PairPacket(Timestamp(3), Pair(IntPacket(3), IntPacket(1))), + PairPacket(Timestamp(5), Pair(IntPacket(5), IntPacket(3))))); + + SendPackets(/*timestamp=*/15, /*input=*/15, + /*force_main_empty=*/true, + /*force_loop_empty=*/false); + MP_EXPECT_OK(graph_.WaitUntilIdle()); + EXPECT_THAT( + output_packets_, + ElementsAre( + PairPacket(Timestamp(1), Pair(IntPacket(1), EmptyPacket())), + PairPacket(Timestamp(2), Pair(IntPacket(2), EmptyPacket())), + PairPacket(Timestamp(3), Pair(IntPacket(3), IntPacket(1))), + PairPacket(Timestamp(5), Pair(IntPacket(5), IntPacket(3))), + PairPacket(Timestamp(15), Pair(IntPacket(15), EmptyPacket())))); + + MP_EXPECT_OK(graph_.CloseAllInputStreams()); + MP_EXPECT_OK(graph_.WaitUntilDone()); +} + +TEST_F(PreviousLoopbackCalculatorProcessingTimestampsTest, + MultiplePacketsNonEmptyMainAlteringLoop) { + SendPackets(/*timestamp=*/1, /*input=*/1, + /*force_main_empty=*/false, + /*force_loop_empty=*/false); + MP_EXPECT_OK(graph_.WaitUntilIdle()); + EXPECT_THAT( + output_packets_, + ElementsAre(PairPacket(Timestamp(1), Pair(IntPacket(1), EmptyPacket())))); + + SendPackets(/*timestamp=*/2, /*input=*/2, + /*force_main_empty=*/false, + /*force_loop_empty=*/true); + MP_EXPECT_OK(graph_.WaitUntilIdle()); + EXPECT_THAT( + output_packets_, + ElementsAre(PairPacket(Timestamp(1), Pair(IntPacket(1), EmptyPacket())), + PairPacket(Timestamp(2), Pair(IntPacket(2), IntPacket(1))))); + + SendPackets(/*timestamp=*/3, /*input=*/3, + /*force_main_empty=*/false, + /*force_loop_empty=*/false); + MP_EXPECT_OK(graph_.WaitUntilIdle()); + EXPECT_THAT( + output_packets_, + ElementsAre(PairPacket(Timestamp(1), Pair(IntPacket(1), EmptyPacket())), + PairPacket(Timestamp(2), Pair(IntPacket(2), IntPacket(1))), + PairPacket(Timestamp(3), Pair(IntPacket(3), EmptyPacket())))); + + SendPackets(/*timestamp=*/5, /*input=*/5, + /*force_main_empty=*/false, + /*force_loop_empty=*/true); + MP_EXPECT_OK(graph_.WaitUntilIdle()); + EXPECT_THAT( + output_packets_, + ElementsAre(PairPacket(Timestamp(1), Pair(IntPacket(1), EmptyPacket())), + PairPacket(Timestamp(2), Pair(IntPacket(2), IntPacket(1))), + PairPacket(Timestamp(3), Pair(IntPacket(3), EmptyPacket())), + PairPacket(Timestamp(5), Pair(IntPacket(5), IntPacket(3))))); + + SendPackets(/*timestamp=*/15, /*input=*/15, + /*force_main_empty=*/false, + /*force_loop_empty=*/false); + MP_EXPECT_OK(graph_.WaitUntilIdle()); + EXPECT_THAT( + output_packets_, + ElementsAre( + PairPacket(Timestamp(1), Pair(IntPacket(1), EmptyPacket())), + PairPacket(Timestamp(2), Pair(IntPacket(2), IntPacket(1))), + PairPacket(Timestamp(3), Pair(IntPacket(3), EmptyPacket())), + PairPacket(Timestamp(5), Pair(IntPacket(5), IntPacket(3))), + PairPacket(Timestamp(15), Pair(IntPacket(15), EmptyPacket())))); + + MP_EXPECT_OK(graph_.CloseAllInputStreams()); + MP_EXPECT_OK(graph_.WaitUntilDone()); +} + +TEST_F(PreviousLoopbackCalculatorProcessingTimestampsTest, + MultiplePacketsCheckIfLastCorrectAlteringMainAlteringLoop) { + int num_packets = 1000; + for (int i = 0; i < num_packets; ++i) { + bool force_main_empty = i % 3 == 0 ? true : false; + bool force_loop_empty = i % 2 == 0 ? true : false; + SendPackets(/*timestamp=*/i + 1, /*input=*/i + 1, force_main_empty, + force_loop_empty); + } + SendPackets(/*timestamp=*/num_packets + 1, + /*input=*/num_packets + 1, /*force_main_empty=*/false, + /*force_loop_empty=*/false); + SendPackets(/*timestamp=*/num_packets + 2, + /*input=*/num_packets + 2, /*force_main_empty=*/false, + /*force_loop_empty=*/false); + + MP_EXPECT_OK(graph_.WaitUntilIdle()); + ASSERT_FALSE(output_packets_.empty()); + EXPECT_THAT( + output_packets_.back(), + PairPacket(Timestamp(num_packets + 2), + Pair(IntPacket(num_packets + 2), IntPacket(num_packets + 1)))); + + MP_EXPECT_OK(graph_.CloseAllInputStreams()); + MP_EXPECT_OK(graph_.WaitUntilDone()); +} + +// Similar to GateCalculator, but it doesn't propagate timestamp bound updates. +class DroppingGateCalculator : public CalculatorBase { + public: + static ::mediapipe::Status GetContract(CalculatorContract* cc) { + cc->Inputs().Index(0).SetAny(); + cc->Inputs().Tag("DISALLOW").Set(); + cc->Outputs().Index(0).SetSameAs(&cc->Inputs().Index(0)); + return ::mediapipe::OkStatus(); + } + + ::mediapipe::Status Process(CalculatorContext* cc) final { + if (!cc->Inputs().Index(0).IsEmpty() && + !cc->Inputs().Tag("DISALLOW").Get()) { + cc->Outputs().Index(0).AddPacket(cc->Inputs().Index(0).Value()); + } + return ::mediapipe::OkStatus(); + } +}; +REGISTER_CALCULATOR(DroppingGateCalculator); + +// Tests PreviousLoopbackCalculator in cases when there are no "LOOP" timestamp +// bound updates and non-empty packets for a while and the aforementioned start +// to arrive at some point. So, "PREV_LOOP" is delayed for a couple of inputs. +class PreviousLoopbackCalculatorDelayBehaviorTest : public testing::Test { + protected: + void SetUp() override { + CalculatorGraphConfig graph_config = + ParseTextProtoOrDie(R"( + input_stream: 'input' + # Drops "loop" when set to "true", delaying output of prev_loop, hence + # delaying output of the graph. + input_stream: 'delay_next_output' + node { + calculator: 'PreviousLoopbackCalculator' + input_stream: 'MAIN:input' + input_stream: 'LOOP:loop' + input_stream_info: { tag_index: 'LOOP' back_edge: true } + output_stream: 'PREV_LOOP:prev_loop' + } + node { + calculator: 'PassThroughCalculator' + input_stream: 'input' + input_stream: 'prev_loop' + output_stream: 'passed_through_input' + output_stream: 'passed_through_prev_loop' + } + node { + calculator: 'DroppingGateCalculator' + input_stream: 'input' + input_stream: 'DISALLOW:delay_next_output' + output_stream: 'loop' + } + node { + calculator: 'MakePairCalculator' + input_stream: 'passed_through_input' + input_stream: 'passed_through_prev_loop' + output_stream: 'passed_through_input_and_prev_loop' + } + )"); + tool::AddVectorSink("passed_through_input_and_prev_loop", &graph_config, + &output_packets_); + MP_ASSERT_OK(graph_.Initialize(graph_config, {})); + MP_ASSERT_OK(graph_.StartRun({})); + } + + void SendPackets(int timestamp, int input, bool delay_next_output) { + MP_ASSERT_OK(graph_.AddPacketToInputStream( + "input", MakePacket(input).At(Timestamp(timestamp)))); + MP_ASSERT_OK(graph_.AddPacketToInputStream( + "delay_next_output", + MakePacket(delay_next_output).At(Timestamp(timestamp)))); + } + + CalculatorGraph graph_; + std::vector output_packets_; +}; + +TEST_F(PreviousLoopbackCalculatorDelayBehaviorTest, MultipleDelayedOutputs) { + SendPackets(/*timestamp=*/1, /*input=*/1, /*delay_next_output=*/true); + MP_EXPECT_OK(graph_.WaitUntilIdle()); + EXPECT_THAT( + output_packets_, + ElementsAre(PairPacket(Timestamp(1), Pair(IntPacket(1), EmptyPacket())))); + + SendPackets(/*timestamp=*/2, /*input=*/2, /*delay_next_output=*/true); + MP_EXPECT_OK(graph_.WaitUntilIdle()); + EXPECT_THAT( + output_packets_, + ElementsAre(PairPacket(Timestamp(1), Pair(IntPacket(1), EmptyPacket())))); + + SendPackets(/*timestamp=*/3, /*input=*/3, /*delay_next_output=*/true); + MP_EXPECT_OK(graph_.WaitUntilIdle()); + EXPECT_THAT( + output_packets_, + ElementsAre(PairPacket(Timestamp(1), Pair(IntPacket(1), EmptyPacket())))); + + SendPackets(/*timestamp=*/5, /*input=*/5, /*delay_next_output=*/false); + MP_EXPECT_OK(graph_.WaitUntilIdle()); + EXPECT_THAT( + output_packets_, + ElementsAre(PairPacket(Timestamp(1), Pair(IntPacket(1), EmptyPacket())), + PairPacket(Timestamp(2), Pair(IntPacket(2), EmptyPacket())), + PairPacket(Timestamp(3), Pair(IntPacket(3), EmptyPacket())), + PairPacket(Timestamp(5), Pair(IntPacket(5), EmptyPacket())))); + + SendPackets(/*timestamp=*/15, /*input=*/15, /*delay_next_output=*/false); + MP_EXPECT_OK(graph_.WaitUntilIdle()); + EXPECT_THAT( + output_packets_, + ElementsAre( + PairPacket(Timestamp(1), Pair(IntPacket(1), EmptyPacket())), + PairPacket(Timestamp(2), Pair(IntPacket(2), EmptyPacket())), + PairPacket(Timestamp(3), Pair(IntPacket(3), EmptyPacket())), + PairPacket(Timestamp(5), Pair(IntPacket(5), EmptyPacket())), + PairPacket(Timestamp(15), Pair(IntPacket(15), IntPacket(5))))); + + MP_EXPECT_OK(graph_.CloseAllInputStreams()); + MP_EXPECT_OK(graph_.WaitUntilDone()); +} + +TEST_F(PreviousLoopbackCalculatorDelayBehaviorTest, + NonDelayedOutputFollowedByMultipleDelayedOutputs) { + SendPackets(/*timestamp=*/1, /*input=*/1, /*delay_next_output=*/false); + MP_EXPECT_OK(graph_.WaitUntilIdle()); + EXPECT_THAT( + output_packets_, + ElementsAre(PairPacket(Timestamp(1), Pair(IntPacket(1), EmptyPacket())))); + + SendPackets(/*timestamp=*/2, /*input=*/2, /*delay_next_output=*/true); + MP_EXPECT_OK(graph_.WaitUntilIdle()); + EXPECT_THAT( + output_packets_, + ElementsAre(PairPacket(Timestamp(1), Pair(IntPacket(1), EmptyPacket())), + PairPacket(Timestamp(2), Pair(IntPacket(2), IntPacket(1))))); + + SendPackets(/*timestamp=*/3, /*input=*/3, /*delay_next_output=*/true); + MP_EXPECT_OK(graph_.WaitUntilIdle()); + EXPECT_THAT( + output_packets_, + ElementsAre(PairPacket(Timestamp(1), Pair(IntPacket(1), EmptyPacket())), + PairPacket(Timestamp(2), Pair(IntPacket(2), IntPacket(1))))); + + SendPackets(/*timestamp=*/5, /*input=*/5, /*delay_next_output=*/false); + MP_EXPECT_OK(graph_.WaitUntilIdle()); + EXPECT_THAT( + output_packets_, + ElementsAre(PairPacket(Timestamp(1), Pair(IntPacket(1), EmptyPacket())), + PairPacket(Timestamp(2), Pair(IntPacket(2), IntPacket(1))), + PairPacket(Timestamp(3), Pair(IntPacket(3), EmptyPacket())), + PairPacket(Timestamp(5), Pair(IntPacket(5), EmptyPacket())))); + + SendPackets(/*timestamp=*/15, /*input=*/15, /*delay_next_output=*/false); + MP_EXPECT_OK(graph_.WaitUntilIdle()); + EXPECT_THAT( + output_packets_, + ElementsAre( + PairPacket(Timestamp(1), Pair(IntPacket(1), EmptyPacket())), + PairPacket(Timestamp(2), Pair(IntPacket(2), IntPacket(1))), + PairPacket(Timestamp(3), Pair(IntPacket(3), EmptyPacket())), + PairPacket(Timestamp(5), Pair(IntPacket(5), EmptyPacket())), + PairPacket(Timestamp(15), Pair(IntPacket(15), IntPacket(5))))); + + MP_EXPECT_OK(graph_.CloseAllInputStreams()); + MP_EXPECT_OK(graph_.WaitUntilDone()); +} + } // anonymous namespace } // namespace mediapipe diff --git a/mediapipe/calculators/core/split_vector_calculator.cc b/mediapipe/calculators/core/split_vector_calculator.cc index 79c884e43..d993387df 100644 --- a/mediapipe/calculators/core/split_vector_calculator.cc +++ b/mediapipe/calculators/core/split_vector_calculator.cc @@ -16,6 +16,7 @@ #include +#include "mediapipe/framework/formats/detection.pb.h" #include "mediapipe/framework/formats/landmark.pb.h" #include "mediapipe/framework/formats/rect.pb.h" #include "tensorflow/lite/interpreter.h" @@ -48,6 +49,10 @@ typedef SplitVectorCalculator<::mediapipe::NormalizedLandmark, false> SplitLandmarkVectorCalculator; REGISTER_CALCULATOR(SplitLandmarkVectorCalculator); +typedef SplitVectorCalculator<::mediapipe::NormalizedLandmarkList, false> + SplitNormalizedLandmarkListVectorCalculator; +REGISTER_CALCULATOR(SplitNormalizedLandmarkListVectorCalculator); + typedef SplitVectorCalculator<::mediapipe::NormalizedRect, false> SplitNormalizedRectVectorCalculator; REGISTER_CALCULATOR(SplitNormalizedRectVectorCalculator); @@ -57,4 +62,9 @@ typedef SplitVectorCalculator<::tflite::gpu::gl::GlBuffer, true> MovableSplitGlBufferVectorCalculator; REGISTER_CALCULATOR(MovableSplitGlBufferVectorCalculator); #endif + +typedef SplitVectorCalculator<::mediapipe::Detection, false> + SplitDetectionVectorCalculator; +REGISTER_CALCULATOR(SplitDetectionVectorCalculator); + } // namespace mediapipe diff --git a/mediapipe/calculators/image/BUILD b/mediapipe/calculators/image/BUILD index a6159b554..1ef87d314 100644 --- a/mediapipe/calculators/image/BUILD +++ b/mediapipe/calculators/image/BUILD @@ -422,9 +422,12 @@ cc_library( ":recolor_calculator_cc_proto", "//mediapipe/framework:calculator_framework", "//mediapipe/framework/formats:image_frame", + "//mediapipe/framework/formats:image_frame_opencv", "//mediapipe/framework/port:status", "//mediapipe/framework/port:ret_check", "//mediapipe/util:color_cc_proto", + "//mediapipe/framework/port:opencv_core", + "//mediapipe/framework/port:opencv_imgproc", ] + select({ "//mediapipe/gpu:disable_gpu": [], "//conditions:default": [ diff --git a/mediapipe/calculators/image/recolor_calculator.cc b/mediapipe/calculators/image/recolor_calculator.cc index fff26b704..07f347a15 100644 --- a/mediapipe/calculators/image/recolor_calculator.cc +++ b/mediapipe/calculators/image/recolor_calculator.cc @@ -17,6 +17,9 @@ #include "mediapipe/calculators/image/recolor_calculator.pb.h" #include "mediapipe/framework/calculator_framework.h" #include "mediapipe/framework/formats/image_frame.h" +#include "mediapipe/framework/formats/image_frame_opencv.h" +#include "mediapipe/framework/port/opencv_core_inc.h" +#include "mediapipe/framework/port/opencv_imgproc_inc.h" #include "mediapipe/framework/port/ret_check.h" #include "mediapipe/framework/port/status.h" #include "mediapipe/util/color.pb.h" @@ -39,8 +42,6 @@ namespace mediapipe { // The luminance of the input image is used to adjust the blending weight, // to help preserve image textures. // -// TODO implement cpu support. -// // Inputs: // One of the following IMAGE tags: // IMAGE: An ImageFrame input image, RGB or RGBA. @@ -71,6 +72,8 @@ namespace mediapipe { // } // } // +// Note: Cannot mix-match CPU & GPU inputs/outputs. +// CPU-in & CPU-out GPU-in & GPU-out class RecolorCalculator : public CalculatorBase { public: RecolorCalculator() = default; @@ -138,6 +141,11 @@ REGISTER_CALCULATOR(RecolorCalculator); cc->Outputs().Tag("IMAGE").Set(); } + // Confirm only one of the input streams is present. + RET_CHECK(cc->Inputs().HasTag("IMAGE") ^ cc->Inputs().HasTag("IMAGE_GPU")); + // Confirm only one of the output streams is present. + RET_CHECK(cc->Outputs().HasTag("IMAGE") ^ cc->Outputs().HasTag("IMAGE_GPU")); + if (use_gpu) { #if !defined(MEDIAPIPE_DISABLE_GPU) MP_RETURN_IF_ERROR(mediapipe::GlCalculatorHelper::UpdateContract(cc)); @@ -193,7 +201,62 @@ REGISTER_CALCULATOR(RecolorCalculator); } ::mediapipe::Status RecolorCalculator::RenderCpu(CalculatorContext* cc) { - return ::mediapipe::UnimplementedError("CPU support is not implemented yet."); + if (cc->Inputs().Tag("MASK").IsEmpty()) { + return ::mediapipe::OkStatus(); + } + // Get inputs and setup output. + const auto& input_img = cc->Inputs().Tag("IMAGE").Get(); + const auto& mask_img = cc->Inputs().Tag("MASK").Get(); + + cv::Mat input_mat = formats::MatView(&input_img); + cv::Mat mask_mat = formats::MatView(&mask_img); + + RET_CHECK(input_mat.channels() == 3); // RGB only. + + if (mask_mat.channels() > 1) { + std::vector channels; + cv::split(mask_mat, channels); + if (mask_channel_ == mediapipe::RecolorCalculatorOptions_MaskChannel_ALPHA) + mask_mat = channels[3]; + else + mask_mat = channels[0]; + } + cv::Mat mask_full; + cv::resize(mask_mat, mask_full, input_mat.size()); + + auto output_img = absl::make_unique( + input_img.Format(), input_mat.cols, input_mat.rows); + cv::Mat output_mat = mediapipe::formats::MatView(output_img.get()); + + // From GPU shader: + /* + vec4 weight = texture2D(mask, sample_coordinate); + vec4 color1 = texture2D(frame, sample_coordinate); + vec4 color2 = vec4(recolor, 1.0); + + float luminance = dot(color1.rgb, vec3(0.299, 0.587, 0.114)); + float mix_value = weight.MASK_COMPONENT * luminance; + + fragColor = mix(color1, color2, mix_value); + */ + for (int i = 0; i < output_mat.rows; ++i) { + for (int j = 0; j < output_mat.cols; ++j) { + float weight = mask_full.at(i, j) * (1.0 / 255.0); + cv::Vec3f color1 = input_mat.at(i, j); + cv::Vec3f color2 = {color_[0], color_[1], color_[2]}; + + float luminance = + (color1[0] * 0.299 + color1[1] * 0.587 + color1[2] * 0.114) / 255; + float mix_value = weight * luminance; + + cv::Vec3b mix_color = color1 * (1.0 - mix_value) + color2 * mix_value; + output_mat.at(i, j) = mix_color; + } + } + + cc->Outputs().Tag("IMAGE").Add(output_img.release(), cc->InputTimestamp()); + + return ::mediapipe::OkStatus(); } ::mediapipe::Status RecolorCalculator::RenderGpu(CalculatorContext* cc) { @@ -303,9 +366,9 @@ void RecolorCalculator::GlRender() { if (!options.has_color()) RET_CHECK_FAIL() << "Missing color option."; - color_.push_back(options.color().r() / 255.0); - color_.push_back(options.color().g() / 255.0); - color_.push_back(options.color().b() / 255.0); + color_.push_back(options.color().r()); + color_.push_back(options.color().g()); + color_.push_back(options.color().b()); return ::mediapipe::OkStatus(); } @@ -378,8 +441,8 @@ void RecolorCalculator::GlRender() { glUseProgram(program_); glUniform1i(glGetUniformLocation(program_, "frame"), 1); glUniform1i(glGetUniformLocation(program_, "mask"), 2); - glUniform3f(glGetUniformLocation(program_, "recolor"), color_[0], color_[1], - color_[2]); + glUniform3f(glGetUniformLocation(program_, "recolor"), color_[0] / 255.0, + color_[1] / 255.0, color_[2] / 255.0); #endif // !MEDIAPIPE_DISABLE_GPU return ::mediapipe::OkStatus(); diff --git a/mediapipe/calculators/tensorflow/BUILD b/mediapipe/calculators/tensorflow/BUILD index 93c4f751e..f774fe717 100644 --- a/mediapipe/calculators/tensorflow/BUILD +++ b/mediapipe/calculators/tensorflow/BUILD @@ -1110,6 +1110,7 @@ cc_test( ], "//mediapipe:android": [ "@org_tensorflow//tensorflow/core:android_tensorflow_lib_with_ops_lite_proto_no_rtti_lib", + "@org_tensorflow//tensorflow/core:android_tensorflow_test_lib", ], "//mediapipe:ios": [ "@org_tensorflow//tensorflow/core:ios_tensorflow_test_lib", diff --git a/mediapipe/calculators/tflite/BUILD b/mediapipe/calculators/tflite/BUILD index 7c711e842..45ef317c8 100644 --- a/mediapipe/calculators/tflite/BUILD +++ b/mediapipe/calculators/tflite/BUILD @@ -222,9 +222,11 @@ cc_library( deps = [ ":util", ":tflite_inference_calculator_cc_proto", + "@com_google_absl//absl/memory", "//mediapipe/framework:calculator_framework", "//mediapipe/util:resource_util", "@org_tensorflow//tensorflow/lite:framework", + "@org_tensorflow//tensorflow/lite/delegates/xnnpack:xnnpack_delegate", "@org_tensorflow//tensorflow/lite/kernels:builtin_ops", "//mediapipe/framework/stream_handler:fixed_size_input_stream_handler", "//mediapipe/framework/port:ret_check", @@ -254,6 +256,10 @@ cc_library( "//mediapipe:android": [ "@org_tensorflow//tensorflow/lite/delegates/nnapi:nnapi_delegate", ], + }) + select({ + "//conditions:default": [ + "//mediapipe/util:cpu_util", + ], }), alwayslink = 1, ) @@ -308,6 +314,20 @@ cc_library( alwayslink = 1, ) +cc_library( + name = "tflite_model_calculator", + srcs = ["tflite_model_calculator.cc"], + visibility = ["//visibility:public"], + deps = [ + ":util", + "//mediapipe/framework:calculator_framework", + "//mediapipe/framework:packet", + "//mediapipe/framework/port:ret_check", + "@org_tensorflow//tensorflow/lite:framework", + ], + alwayslink = 1, +) + cc_library( name = "tflite_tensors_to_segmentation_calculator", srcs = ["tflite_tensors_to_segmentation_calculator.cc"], @@ -478,6 +498,9 @@ cc_test( deps = [ ":tflite_inference_calculator", ":tflite_inference_calculator_cc_proto", + ":tflite_model_calculator", + "//mediapipe/calculators/core:constant_side_packet_calculator", + "//mediapipe/calculators/util:local_file_contents_calculator", "//mediapipe/framework:calculator_framework", "//mediapipe/framework:calculator_runner", "//mediapipe/framework/deps:file_path", @@ -485,7 +508,9 @@ cc_test( "//mediapipe/framework/port:integral_types", "//mediapipe/framework/port:parse_text_proto", "//mediapipe/framework/tool:validate_type", + "@com_google_absl//absl/memory", "@com_google_absl//absl/strings", + "@com_google_absl//absl/types:optional", "@org_tensorflow//tensorflow/lite:framework", "@org_tensorflow//tensorflow/lite/kernels:builtin_ops", ], @@ -511,3 +536,19 @@ cc_test( "@org_tensorflow//tensorflow/lite/kernels:builtin_ops", ], ) + +cc_test( + name = "tflite_model_calculator_test", + srcs = ["tflite_model_calculator_test.cc"], + data = ["testdata/add.bin"], + deps = [ + ":tflite_model_calculator", + "//mediapipe/calculators/core:constant_side_packet_calculator", + "//mediapipe/calculators/util:local_file_contents_calculator", + "//mediapipe/framework:calculator_framework", + "//mediapipe/framework:calculator_runner", + "//mediapipe/framework/port:gtest_main", + "//mediapipe/framework/port:parse_text_proto", + "@org_tensorflow//tensorflow/lite:framework", + ], +) diff --git a/mediapipe/calculators/tflite/tflite_inference_calculator.cc b/mediapipe/calculators/tflite/tflite_inference_calculator.cc index 7634fe251..665bd89f7 100644 --- a/mediapipe/calculators/tflite/tflite_inference_calculator.cc +++ b/mediapipe/calculators/tflite/tflite_inference_calculator.cc @@ -17,10 +17,16 @@ #include #include +#include "absl/memory/memory.h" #include "mediapipe/calculators/tflite/tflite_inference_calculator.pb.h" #include "mediapipe/calculators/tflite/util.h" #include "mediapipe/framework/calculator_framework.h" #include "mediapipe/framework/port/ret_check.h" + +#if !defined(__EMSCRIPTEN__) +#include "mediapipe/util/cpu_util.h" +#endif // !__EMSCRIPTEN__ + #include "mediapipe/util/resource_util.h" #include "tensorflow/lite/error_reporter.h" #include "tensorflow/lite/interpreter.h" @@ -50,7 +56,7 @@ #include "tensorflow/lite/delegates/gpu/metal_delegate.h" #include "tensorflow/lite/delegates/gpu/metal_delegate_internal.h" #endif // iOS - +#include "tensorflow/lite/delegates/xnnpack/xnnpack_delegate.h" #if defined(MEDIAPIPE_ANDROID) #include "tensorflow/lite/delegates/nnapi/nnapi_delegate.h" #endif // ANDROID @@ -113,6 +119,23 @@ struct GPUData { }; #endif +// Returns number of threads to configure XNNPACK delegate with. +// (Equal to user provided value if specified. Otherwise, it returns number of +// high cores (hard-coded to 1 for __EMSCRIPTEN__)) +int GetXnnpackNumThreads( + const mediapipe::TfLiteInferenceCalculatorOptions& opts) { + static constexpr int kDefaultNumThreads = -1; + if (opts.has_delegate() && opts.delegate().has_xnnpack() && + opts.delegate().xnnpack().num_threads() != kDefaultNumThreads) { + return opts.delegate().xnnpack().num_threads(); + } +#if !defined(__EMSCRIPTEN__) + return InferHigherCoreIds().size(); +#else + return 1; +#endif // !__EMSCRIPTEN__ +} + // Calculator Header Section // Runs inference on the provided input TFLite tensors and TFLite model. @@ -139,6 +162,9 @@ struct GPUData { // Input side packet: // CUSTOM_OP_RESOLVER (optional) - Use a custom op resolver, // instead of the builtin one. +// MODEL (optional) - Use to specify TfLite model +// (std::unique_ptr>) // // Example use: // node { @@ -153,6 +179,20 @@ struct GPUData { // } // } // +// or +// +// node { +// calculator: "TfLiteInferenceCalculator" +// input_stream: "TENSORS:tensor_image" +// input_side_packet: "MODEL:model" +// output_stream: "TENSORS:tensors" +// options: { +// [mediapipe.TfLiteInferenceCalculatorOptions.ext] { +// delegate { gpu {} } +// } +// } +// } +// // IMPORTANT Notes: // Tensors are assumed to be ordered correctly (sequentially added to model). // Input tensors are assumed to be of the correct size and already normalized. @@ -165,6 +205,9 @@ class TfLiteInferenceCalculator : public CalculatorBase { public: using TfLiteDelegatePtr = std::unique_ptr>; + using TfLiteModelPtr = + std::unique_ptr>; static ::mediapipe::Status GetContract(CalculatorContract* cc); @@ -173,12 +216,12 @@ class TfLiteInferenceCalculator : public CalculatorBase { ::mediapipe::Status Close(CalculatorContext* cc) override; private: - ::mediapipe::Status LoadOptions(CalculatorContext* cc); ::mediapipe::Status LoadModel(CalculatorContext* cc); + ::mediapipe::StatusOr GetModelAsPacket(const CalculatorContext& cc); ::mediapipe::Status LoadDelegate(CalculatorContext* cc); + Packet model_packet_; std::unique_ptr interpreter_; - std::unique_ptr model_; TfLiteDelegatePtr delegate_; #if !defined(MEDIAPIPE_DISABLE_GL_COMPUTE) @@ -198,7 +241,6 @@ class TfLiteInferenceCalculator : public CalculatorBase { edgetpu::EdgeTpuManager::GetSingleton()->OpenDevice(); #endif - std::string model_path_ = ""; bool gpu_inference_ = false; bool gpu_input_ = false; bool gpu_output_ = false; @@ -217,6 +259,10 @@ REGISTER_CALCULATOR(TfLiteInferenceCalculator); const auto& options = cc->Options<::mediapipe::TfLiteInferenceCalculatorOptions>(); + RET_CHECK(!options.model_path().empty() ^ + cc->InputSidePackets().HasTag("MODEL")) + << "Either model as side packet or model path in options is required."; + bool use_gpu = options.has_delegate() ? options.delegate().has_gpu() : options.use_gpu(); @@ -249,6 +295,9 @@ REGISTER_CALCULATOR(TfLiteInferenceCalculator); .Tag("CUSTOM_OP_RESOLVER") .Set(); } + if (cc->InputSidePackets().HasTag("MODEL")) { + cc->InputSidePackets().Tag("MODEL").Set(); + } if (use_gpu) { #if !defined(MEDIAPIPE_DISABLE_GL_COMPUTE) @@ -267,7 +316,9 @@ REGISTER_CALCULATOR(TfLiteInferenceCalculator); ::mediapipe::Status TfLiteInferenceCalculator::Open(CalculatorContext* cc) { cc->SetOffset(TimestampDiff(0)); - MP_RETURN_IF_ERROR(LoadOptions(cc)); + const auto& options = + cc->Options<::mediapipe::TfLiteInferenceCalculatorOptions>(); + gpu_inference_ = options.use_gpu(); if (cc->Inputs().HasTag("TENSORS_GPU")) { #if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__EMSCRIPTEN__) @@ -492,34 +543,10 @@ REGISTER_CALCULATOR(TfLiteInferenceCalculator); // Calculator Auxiliary Section -::mediapipe::Status TfLiteInferenceCalculator::LoadOptions( - CalculatorContext* cc) { - // Get calculator options specified in the graph. - const auto& options = - cc->Options<::mediapipe::TfLiteInferenceCalculatorOptions>(); - - // Get model name. - if (!options.model_path().empty()) { - std::string model_path = options.model_path(); - - ASSIGN_OR_RETURN(model_path_, mediapipe::PathToResourceAsFile(model_path)); - } else { - LOG(ERROR) << "Must specify path to TFLite model."; - return ::mediapipe::Status(::mediapipe::StatusCode::kNotFound, - "Must specify path to TFLite model."); - } - - // Get execution modes. - gpu_inference_ = - options.has_delegate() ? options.delegate().has_gpu() : options.use_gpu(); - - return ::mediapipe::OkStatus(); -} - ::mediapipe::Status TfLiteInferenceCalculator::LoadModel( CalculatorContext* cc) { - model_ = tflite::FlatBufferModel::BuildFromFile(model_path_.c_str()); - RET_CHECK(model_); + ASSIGN_OR_RETURN(model_packet_, GetModelAsPacket(*cc)); + const auto& model = *model_packet_.Get(); tflite::ops::builtin::BuiltinOpResolver op_resolver; if (cc->InputSidePackets().HasTag("CUSTOM_OP_RESOLVER")) { @@ -529,9 +556,9 @@ REGISTER_CALCULATOR(TfLiteInferenceCalculator); } #if defined(MEDIAPIPE_EDGE_TPU) interpreter_ = - BuildEdgeTpuInterpreter(*model_, &op_resolver, edgetpu_context_.get()); + BuildEdgeTpuInterpreter(model, &op_resolver, edgetpu_context_.get()); #else - tflite::InterpreterBuilder(*model_, op_resolver)(&interpreter_); + tflite::InterpreterBuilder(model, op_resolver)(&interpreter_); #endif // MEDIAPIPE_EDGE_TPU RET_CHECK(interpreter_); @@ -557,6 +584,28 @@ REGISTER_CALCULATOR(TfLiteInferenceCalculator); return ::mediapipe::OkStatus(); } +::mediapipe::StatusOr TfLiteInferenceCalculator::GetModelAsPacket( + const CalculatorContext& cc) { + const auto& options = + cc.Options(); + if (!options.model_path().empty()) { + std::string model_path = options.model_path(); + + ASSIGN_OR_RETURN(model_path, mediapipe::PathToResourceAsFile(model_path)); + + auto model = tflite::FlatBufferModel::BuildFromFile(model_path.c_str()); + RET_CHECK(model) << "Failed to load model from path."; + return MakePacket(TfLiteModelPtr( + model.release(), [](tflite::FlatBufferModel* model) { delete model; })); + } + if (cc.InputSidePackets().HasTag("MODEL")) { + return cc.InputSidePackets().Tag("MODEL"); + } + return ::mediapipe::Status( + ::mediapipe::StatusCode::kNotFound, + "Must specify TFLite model as path or loaded model."); +} + ::mediapipe::Status TfLiteInferenceCalculator::LoadDelegate( CalculatorContext* cc) { const auto& calculator_opts = @@ -587,6 +636,22 @@ REGISTER_CALCULATOR(TfLiteInferenceCalculator); } #endif // MEDIAPIPE_ANDROID +#if defined(__EMSCRIPTEN__) + const bool xnnpack_requested = true; +#else + const bool xnnpack_requested = calculator_opts.has_delegate() && + calculator_opts.delegate().has_xnnpack(); +#endif // __EMSCRIPTEN__ + + if (xnnpack_requested) { + TfLiteXNNPackDelegateOptions xnnpack_opts{}; + xnnpack_opts.num_threads = GetXnnpackNumThreads(calculator_opts); + delegate_ = TfLiteDelegatePtr(TfLiteXNNPackDelegateCreate(&xnnpack_opts), + &TfLiteXNNPackDelegateDelete); + RET_CHECK_EQ(interpreter_->ModifyGraphWithDelegate(delegate_.get()), + kTfLiteOk); + } + // Return, no need for GPU delegate below. return ::mediapipe::OkStatus(); } diff --git a/mediapipe/calculators/tflite/tflite_inference_calculator.proto b/mediapipe/calculators/tflite/tflite_inference_calculator.proto index 893574b6a..a764e89fd 100644 --- a/mediapipe/calculators/tflite/tflite_inference_calculator.proto +++ b/mediapipe/calculators/tflite/tflite_inference_calculator.proto @@ -45,11 +45,17 @@ message TfLiteInferenceCalculatorOptions { message Gpu {} // Android only. message Nnapi {} + message Xnnpack { + // Number of threads for XNNPACK delegate. (By default, calculator tries + // to choose optimal number of threads depending on the device.) + optional int32 num_threads = 1 [default = -1]; + } oneof delegate { TfLite tflite = 1; Gpu gpu = 2; Nnapi nnapi = 3; + Xnnpack xnnpack = 4; } } diff --git a/mediapipe/calculators/tflite/tflite_inference_calculator_test.cc b/mediapipe/calculators/tflite/tflite_inference_calculator_test.cc index 9529a8ecb..c3df07191 100644 --- a/mediapipe/calculators/tflite/tflite_inference_calculator_test.cc +++ b/mediapipe/calculators/tflite/tflite_inference_calculator_test.cc @@ -41,7 +41,7 @@ namespace mediapipe { using ::tflite::Interpreter; -void DoSmokeTest(absl::string_view delegate) { +void DoSmokeTest(const std::string& graph_proto) { const int width = 8; const int height = 8; const int channels = 3; @@ -69,24 +69,9 @@ void DoSmokeTest(absl::string_view delegate) { auto input_vec = absl::make_unique>(); input_vec->emplace_back(*tensor); - std::string graph_proto = R"( - input_stream: "tensor_in" - node { - calculator: "TfLiteInferenceCalculator" - input_stream: "TENSORS:tensor_in" - output_stream: "TENSORS:tensor_out" - options { - [mediapipe.TfLiteInferenceCalculatorOptions.ext] { - model_path: "mediapipe/calculators/tflite/testdata/add.bin" - $delegate - } - } - } - )"; - ASSERT_EQ(absl::StrReplaceAll({{"$delegate", delegate}}, &graph_proto), 1); // Prepare single calculator graph to and wait for packets. CalculatorGraphConfig graph_config = - ::mediapipe::ParseTextProtoOrDie(graph_proto); + ParseTextProtoOrDie(graph_proto); std::vector output_packets; tool::AddVectorSink("tensor_out", &graph_config, &output_packets); CalculatorGraph graph(graph_config); @@ -119,8 +104,70 @@ void DoSmokeTest(absl::string_view delegate) { // Tests a simple add model that adds an input tensor to itself. TEST(TfLiteInferenceCalculatorTest, SmokeTest) { - DoSmokeTest(/*delegate=*/""); - DoSmokeTest(/*delegate=*/"delegate { tflite {} }"); + std::string graph_proto = R"( + input_stream: "tensor_in" + node { + calculator: "TfLiteInferenceCalculator" + input_stream: "TENSORS:tensor_in" + output_stream: "TENSORS:tensor_out" + options { + [mediapipe.TfLiteInferenceCalculatorOptions.ext] { + model_path: "mediapipe/calculators/tflite/testdata/add.bin" + $delegate + } + } + } + )"; + DoSmokeTest( + /*graph_proto=*/absl::StrReplaceAll(graph_proto, {{"$delegate", ""}})); + DoSmokeTest(/*graph_proto=*/absl::StrReplaceAll( + graph_proto, {{"$delegate", "delegate { tflite {} }"}})); + DoSmokeTest(/*graph_proto=*/absl::StrReplaceAll( + graph_proto, {{"$delegate", "delegate { xnnpack {} }"}})); + DoSmokeTest(/*graph_proto=*/absl::StrReplaceAll( + graph_proto, + {{"$delegate", "delegate { xnnpack { num_threads: 10 } }"}})); +} + +TEST(TfLiteInferenceCalculatorTest, SmokeTest_ModelAsInputSidePacket) { + std::string graph_proto = R"( + input_stream: "tensor_in" + + node { + calculator: "ConstantSidePacketCalculator" + output_side_packet: "PACKET:model_path" + options: { + [mediapipe.ConstantSidePacketCalculatorOptions.ext]: { + packet { string_value: "mediapipe/calculators/tflite/testdata/add.bin" } + } + } + } + + node { + calculator: "LocalFileContentsCalculator" + input_side_packet: "FILE_PATH:model_path" + output_side_packet: "CONTENTS:model_blob" + } + + node { + calculator: "TfLiteModelCalculator" + input_side_packet: "MODEL_BLOB:model_blob" + output_side_packet: "MODEL:model" + } + + node { + calculator: "TfLiteInferenceCalculator" + input_stream: "TENSORS:tensor_in" + output_stream: "TENSORS:tensor_out" + input_side_packet: "MODEL:model" + options { + [mediapipe.TfLiteInferenceCalculatorOptions.ext] { + use_gpu: false + } + } + } + )"; + DoSmokeTest(graph_proto); } } // namespace mediapipe diff --git a/mediapipe/calculators/tflite/tflite_model_calculator.cc b/mediapipe/calculators/tflite/tflite_model_calculator.cc new file mode 100644 index 000000000..d24c55b14 --- /dev/null +++ b/mediapipe/calculators/tflite/tflite_model_calculator.cc @@ -0,0 +1,86 @@ +// Copyright 2020 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include + +#include "mediapipe/framework/calculator_framework.h" +#include "mediapipe/framework/packet.h" +#include "mediapipe/framework/port/ret_check.h" +#include "tensorflow/lite/model.h" + +namespace mediapipe { + +// Loads TfLite model from model blob specified as input side packet and outputs +// corresponding side packet. +// +// Input side packets: +// MODEL_BLOB - TfLite model blob/file-contents (std::string). You can read +// model blob from file (using whatever APIs you have) and pass +// it to the graph as input side packet or you can use some of +// calculators like LocalFileContentsCalculator to get model +// blob and use it as input here. +// +// Output side packets: +// MODEL - TfLite model. (std::unique_ptr>) +// +// Example use: +// +// node { +// calculator: "TfLiteModelCalculator" +// input_side_packet: "MODEL_BLOB:model_blob" +// output_side_packet: "MODEL:model" +// } +// +class TfLiteModelCalculator : public CalculatorBase { + public: + using TfLiteModelPtr = + std::unique_ptr>; + + static ::mediapipe::Status GetContract(CalculatorContract* cc) { + cc->InputSidePackets().Tag("MODEL_BLOB").Set(); + cc->OutputSidePackets().Tag("MODEL").Set(); + return ::mediapipe::OkStatus(); + } + + ::mediapipe::Status Open(CalculatorContext* cc) override { + const Packet& model_packet = cc->InputSidePackets().Tag("MODEL_BLOB"); + const std::string& model_blob = model_packet.Get(); + std::unique_ptr model = + tflite::FlatBufferModel::BuildFromBuffer(model_blob.data(), + model_blob.size()); + RET_CHECK(model) << "Failed to load TfLite model from blob."; + + cc->OutputSidePackets().Tag("MODEL").Set( + MakePacket(TfLiteModelPtr( + model.release(), [model_packet](tflite::FlatBufferModel* model) { + // Keeping model_packet in order to keep underlying model blob + // which can be released only after TfLite model is not needed + // anymore (deleted). + delete model; + }))); + + return ::mediapipe::OkStatus(); + } + + ::mediapipe::Status Process(CalculatorContext* cc) override { + return ::mediapipe::OkStatus(); + } +}; +REGISTER_CALCULATOR(TfLiteModelCalculator); + +} // namespace mediapipe diff --git a/mediapipe/calculators/tflite/tflite_model_calculator_test.cc b/mediapipe/calculators/tflite/tflite_model_calculator_test.cc new file mode 100644 index 000000000..fed3743a5 --- /dev/null +++ b/mediapipe/calculators/tflite/tflite_model_calculator_test.cc @@ -0,0 +1,88 @@ +// Copyright 2020 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include + +#include "mediapipe/framework/calculator_framework.h" +#include "mediapipe/framework/calculator_runner.h" +#include "mediapipe/framework/port/gmock.h" +#include "mediapipe/framework/port/gtest.h" +#include "mediapipe/framework/port/parse_text_proto.h" +#include "mediapipe/framework/port/status_matchers.h" // NOLINT +#include "tensorflow/lite/model.h" + +namespace mediapipe { + +TEST(TfLiteModelCalculatorTest, SmokeTest) { + // Prepare single calculator graph to and wait for packets. + CalculatorGraphConfig graph_config = ParseTextProtoOrDie< + CalculatorGraphConfig>( + R"( + node { + calculator: "ConstantSidePacketCalculator" + output_side_packet: "PACKET:model_path" + options: { + [mediapipe.ConstantSidePacketCalculatorOptions.ext]: { + packet { + string_value: "mediapipe/calculators/tflite/testdata/add.bin" + } + } + } + } + + node { + calculator: "LocalFileContentsCalculator" + input_side_packet: "FILE_PATH:model_path" + output_side_packet: "CONTENTS:model_blob" + } + + node { + calculator: "TfLiteModelCalculator" + input_side_packet: "MODEL_BLOB:model_blob" + output_side_packet: "MODEL:model" + } + )"); + CalculatorGraph graph(graph_config); + MP_ASSERT_OK(graph.StartRun({})); + MP_ASSERT_OK(graph.WaitUntilIdle()); + auto status_or_packet = graph.GetOutputSidePacket("model"); + MP_ASSERT_OK(status_or_packet); + auto model_packet = status_or_packet.ValueOrDie(); + const auto& model = model_packet.Get< + std::unique_ptr>>(); + + auto expected_model = tflite::FlatBufferModel::BuildFromFile( + "mediapipe/calculators/tflite/testdata/add.bin"); + + EXPECT_EQ(model->GetModel()->version(), + expected_model->GetModel()->version()); + EXPECT_EQ(model->GetModel()->buffers()->size(), + expected_model->GetModel()->buffers()->size()); + const int num_subgraphs = expected_model->GetModel()->subgraphs()->size(); + EXPECT_EQ(model->GetModel()->subgraphs()->size(), num_subgraphs); + for (int i = 0; i < num_subgraphs; ++i) { + const auto* expected_subgraph = + expected_model->GetModel()->subgraphs()->Get(i); + const auto* subgraph = model->GetModel()->subgraphs()->Get(i); + const int num_tensors = expected_subgraph->tensors()->size(); + EXPECT_EQ(subgraph->tensors()->size(), num_tensors); + for (int j = 0; j < num_tensors; ++j) { + EXPECT_EQ(subgraph->tensors()->Get(j)->name()->str(), + expected_subgraph->tensors()->Get(j)->name()->str()); + } + } +} + +} // namespace mediapipe diff --git a/mediapipe/calculators/tflite/tflite_tensors_to_classification_calculator.cc b/mediapipe/calculators/tflite/tflite_tensors_to_classification_calculator.cc index 6e1c6e1e6..e9c09169b 100644 --- a/mediapipe/calculators/tflite/tflite_tensors_to_classification_calculator.cc +++ b/mediapipe/calculators/tflite/tflite_tensors_to_classification_calculator.cc @@ -129,22 +129,43 @@ REGISTER_CALCULATOR(TfLiteTensorsToClassificationCalculator); num_classes *= raw_score_tensor->dims->data[i]; } + if (options_.binary_classification()) { + RET_CHECK_EQ(num_classes, 1); + // Number of classes for binary classification. + num_classes = 2; + } if (label_map_loaded_) { RET_CHECK_EQ(num_classes, label_map_.size()); } const float* raw_scores = raw_score_tensor->data.f; auto classification_list = absl::make_unique(); - for (int i = 0; i < num_classes; ++i) { - if (options_.has_min_score_threshold() && - raw_scores[i] < options_.min_score_threshold()) { - continue; - } - Classification* classification = classification_list->add_classification(); - classification->set_index(i); - classification->set_score(raw_scores[i]); + if (options_.binary_classification()) { + Classification* class_first = classification_list->add_classification(); + Classification* class_second = classification_list->add_classification(); + class_first->set_index(0); + class_second->set_index(1); + class_first->set_score(raw_scores[0]); + class_second->set_score(1. - raw_scores[0]); + if (label_map_loaded_) { - classification->set_label(label_map_[i]); + class_first->set_label(label_map_[0]); + class_second->set_label(label_map_[1]); + } + } else { + for (int i = 0; i < num_classes; ++i) { + if (options_.has_min_score_threshold() && + raw_scores[i] < options_.min_score_threshold()) { + continue; + } + Classification* classification = + classification_list->add_classification(); + classification->set_index(i); + classification->set_score(raw_scores[i]); + + if (label_map_loaded_) { + classification->set_label(label_map_[i]); + } } } diff --git a/mediapipe/calculators/tflite/tflite_tensors_to_classification_calculator.proto b/mediapipe/calculators/tflite/tflite_tensors_to_classification_calculator.proto index a2b5dd224..c6c9d915d 100644 --- a/mediapipe/calculators/tflite/tflite_tensors_to_classification_calculator.proto +++ b/mediapipe/calculators/tflite/tflite_tensors_to_classification_calculator.proto @@ -32,4 +32,10 @@ message TfLiteTensorsToClassificationCalculatorOptions { optional int32 top_k = 2; // Path to a label map file for getting the actual name of class ids. optional string label_map_path = 3; + // Whether the input is a single float for binary classification. + // When true, only a single float is expected in the input tensor and the + // label map, if provided, is expected to have exactly two labels. + // The single score(float) represent the probability of first label, and + // 1 - score is the probabilility of the second label. + optional bool binary_classification = 4; } diff --git a/mediapipe/calculators/util/BUILD b/mediapipe/calculators/util/BUILD index b5df9ff04..3d2210ca4 100644 --- a/mediapipe/calculators/util/BUILD +++ b/mediapipe/calculators/util/BUILD @@ -998,6 +998,7 @@ cc_library( visibility = ["//visibility:public"], deps = [ "//mediapipe/framework:calculator_framework", + "//mediapipe/framework/formats:classification_cc_proto", "//mediapipe/framework/formats:landmark_cc_proto", "//mediapipe/framework/formats:rect_cc_proto", "//mediapipe/framework/port:ret_check", @@ -1015,6 +1016,7 @@ cc_library( deps = [ ":collection_has_min_size_calculator_cc_proto", "//mediapipe/framework:calculator_framework", + "//mediapipe/framework/formats:landmark_cc_proto", "//mediapipe/framework/formats:rect_cc_proto", "//mediapipe/framework/port:ret_check", "//mediapipe/framework/port:status", @@ -1022,6 +1024,18 @@ cc_library( alwayslink = 1, ) +cc_test( + name = "collection_has_min_size_calculator_test", + srcs = ["collection_has_min_size_calculator_test.cc"], + deps = [ + ":collection_has_min_size_calculator", + "//mediapipe/framework:calculator_framework", + "//mediapipe/framework:calculator_runner", + "//mediapipe/framework/port:gtest_main", + "//mediapipe/framework/port:parse_text_proto", + ], +) + cc_library( name = "association_calculator", hdrs = ["association_calculator.h"], diff --git a/mediapipe/calculators/util/collection_has_min_size_calculator.cc b/mediapipe/calculators/util/collection_has_min_size_calculator.cc index 5ff43c605..22bfb9c4c 100644 --- a/mediapipe/calculators/util/collection_has_min_size_calculator.cc +++ b/mediapipe/calculators/util/collection_has_min_size_calculator.cc @@ -15,6 +15,9 @@ #include "mediapipe/calculators/util/collection_has_min_size_calculator.h" +#include + +#include "mediapipe/framework/formats/landmark.pb.h" #include "mediapipe/framework/formats/rect.pb.h" namespace mediapipe { @@ -23,4 +26,9 @@ typedef CollectionHasMinSizeCalculator> NormalizedRectVectorHasMinSizeCalculator; REGISTER_CALCULATOR(NormalizedRectVectorHasMinSizeCalculator); +typedef CollectionHasMinSizeCalculator< + std::vector<::mediapipe::NormalizedLandmarkList>> + NormalizedLandmarkListVectorHasMinSizeCalculator; +REGISTER_CALCULATOR(NormalizedLandmarkListVectorHasMinSizeCalculator); + } // namespace mediapipe diff --git a/mediapipe/calculators/util/collection_has_min_size_calculator_test.cc b/mediapipe/calculators/util/collection_has_min_size_calculator_test.cc new file mode 100644 index 000000000..be3dc41e6 --- /dev/null +++ b/mediapipe/calculators/util/collection_has_min_size_calculator_test.cc @@ -0,0 +1,156 @@ +// Copyright 2020 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "mediapipe/calculators/util/collection_has_min_size_calculator.h" + +#include +#include +#include + +#include "mediapipe/framework/calculator_framework.h" +#include "mediapipe/framework/calculator_runner.h" +#include "mediapipe/framework/port/gmock.h" +#include "mediapipe/framework/port/gtest.h" +#include "mediapipe/framework/port/parse_text_proto.h" +#include "mediapipe/framework/port/status_matchers.h" // NOLINT + +namespace mediapipe { + +typedef CollectionHasMinSizeCalculator> + TestIntCollectionHasMinSizeCalculator; +REGISTER_CALCULATOR(TestIntCollectionHasMinSizeCalculator); + +void AddInputVector(const std::vector& input, int64 timestamp, + CalculatorRunner* runner) { + runner->MutableInputs() + ->Tag("ITERABLE") + .packets.push_back( + MakePacket>(input).At(Timestamp(timestamp))); +} + +TEST(TestIntCollectionHasMinSizeCalculator, DoesHaveMinSize) { + CalculatorGraphConfig::Node node_config = + ParseTextProtoOrDie(R"( + calculator: "TestIntCollectionHasMinSizeCalculator" + input_stream: "ITERABLE:input_vector" + output_stream: "output_vector" + options { + [mediapipe.CollectionHasMinSizeCalculatorOptions.ext] { min_size: 2 } + } + )"); + CalculatorRunner runner(node_config); + const std::vector& outputs = runner.Outputs().Index(0).packets; + + AddInputVector({1, 2}, /*timestamp=*/1, &runner); + MP_ASSERT_OK(runner.Run()); + + EXPECT_EQ(1, outputs.size()); + EXPECT_EQ(Timestamp(1), outputs[0].Timestamp()); + EXPECT_TRUE(outputs[0].Get()); + + AddInputVector({1, 2, 3}, /*timestamp=*/2, &runner); + MP_ASSERT_OK(runner.Run()); + + EXPECT_EQ(2, outputs.size()); + EXPECT_EQ(Timestamp(2), outputs[1].Timestamp()); + EXPECT_TRUE(outputs[1].Get()); +} + +TEST(TestIntCollectionHasMinSizeCalculator, + DoesHaveMinSize_MinSizeAsSidePacket) { + CalculatorGraphConfig::Node node_config = + ParseTextProtoOrDie(R"( + calculator: "TestIntCollectionHasMinSizeCalculator" + input_stream: "ITERABLE:input_vector" + input_side_packet: "min_size" + output_stream: "output_vector" + )"); + CalculatorRunner runner(node_config); + const std::vector& outputs = runner.Outputs().Index(0).packets; + + runner.MutableSidePackets()->Index(0) = MakePacket(2); + + AddInputVector({1, 2}, /*timestamp=*/1, &runner); + MP_ASSERT_OK(runner.Run()); + + EXPECT_EQ(1, outputs.size()); + EXPECT_EQ(Timestamp(1), outputs[0].Timestamp()); + EXPECT_TRUE(outputs[0].Get()); + + AddInputVector({1, 2, 3}, /*timestamp=*/2, &runner); + MP_ASSERT_OK(runner.Run()); + + EXPECT_EQ(2, outputs.size()); + EXPECT_EQ(Timestamp(2), outputs[1].Timestamp()); + EXPECT_TRUE(outputs[1].Get()); +} + +TEST(TestIntCollectionHasMinSizeCalculator, DoesNotHaveMinSize) { + CalculatorGraphConfig::Node node_config = + ParseTextProtoOrDie(R"( + calculator: "TestIntCollectionHasMinSizeCalculator" + input_stream: "ITERABLE:input_vector" + output_stream: "output_vector" + options { + [mediapipe.CollectionHasMinSizeCalculatorOptions.ext] { min_size: 3 } + } + )"); + CalculatorRunner runner(node_config); + const std::vector& outputs = runner.Outputs().Index(0).packets; + + AddInputVector({1}, /*timestamp=*/1, &runner); + MP_ASSERT_OK(runner.Run()); + + EXPECT_EQ(1, outputs.size()); + EXPECT_EQ(Timestamp(1), outputs[0].Timestamp()); + EXPECT_FALSE(outputs[0].Get()); + + AddInputVector({1, 2}, /*timestamp=*/2, &runner); + MP_ASSERT_OK(runner.Run()); + + EXPECT_EQ(2, outputs.size()); + EXPECT_EQ(Timestamp(2), outputs[1].Timestamp()); + EXPECT_FALSE(outputs[1].Get()); +} + +TEST(TestIntCollectionHasMinSizeCalculator, + DoesNotHaveMinSize_MinSizeAsSidePacket) { + CalculatorGraphConfig::Node node_config = + ParseTextProtoOrDie(R"( + calculator: "TestIntCollectionHasMinSizeCalculator" + input_stream: "ITERABLE:input_vector" + input_side_packet: "min_size" + output_stream: "output_vector" + )"); + CalculatorRunner runner(node_config); + const std::vector& outputs = runner.Outputs().Index(0).packets; + + runner.MutableSidePackets()->Index(0) = MakePacket(3); + + AddInputVector({1}, /*timestamp=*/1, &runner); + MP_ASSERT_OK(runner.Run()); + + EXPECT_EQ(1, outputs.size()); + EXPECT_EQ(Timestamp(1), outputs[0].Timestamp()); + EXPECT_FALSE(outputs[0].Get()); + + AddInputVector({1, 2}, /*timestamp=*/2, &runner); + MP_ASSERT_OK(runner.Run()); + + EXPECT_EQ(2, outputs.size()); + EXPECT_EQ(Timestamp(2), outputs[1].Timestamp()); + EXPECT_FALSE(outputs[1].Get()); +} + +} // namespace mediapipe diff --git a/mediapipe/calculators/util/filter_collection_calculator.cc b/mediapipe/calculators/util/filter_collection_calculator.cc index e110afe7d..356b03dd6 100644 --- a/mediapipe/calculators/util/filter_collection_calculator.cc +++ b/mediapipe/calculators/util/filter_collection_calculator.cc @@ -17,6 +17,7 @@ #include +#include "mediapipe/framework/formats/classification.pb.h" #include "mediapipe/framework/formats/landmark.pb.h" #include "mediapipe/framework/formats/rect.pb.h" @@ -31,4 +32,8 @@ typedef FilterCollectionCalculator< FilterLandmarkListCollectionCalculator; REGISTER_CALCULATOR(FilterLandmarkListCollectionCalculator); +typedef FilterCollectionCalculator> + FilterClassificationListCollectionCalculator; +REGISTER_CALCULATOR(FilterClassificationListCollectionCalculator); + } // namespace mediapipe diff --git a/mediapipe/calculators/util/landmarks_to_render_data_calculator.cc b/mediapipe/calculators/util/landmarks_to_render_data_calculator.cc index c2b318a3d..d83df435d 100644 --- a/mediapipe/calculators/util/landmarks_to_render_data_calculator.cc +++ b/mediapipe/calculators/util/landmarks_to_render_data_calculator.cc @@ -29,6 +29,7 @@ namespace { constexpr char kLandmarksTag[] = "LANDMARKS"; constexpr char kNormLandmarksTag[] = "NORM_LANDMARKS"; +constexpr char kRenderScaleTag[] = "RENDER_SCALE"; constexpr char kRenderDataTag[] = "RENDER_DATA"; constexpr char kLandmarkLabel[] = "KEYPOINT"; constexpr int kMaxLandmarkThickness = 18; @@ -71,6 +72,83 @@ void SetColorSizeValueFromZ(float z, float z_min, float z_max, render_annotation->set_thickness(thickness); } +template +void AddConnectionToRenderData(const LandmarkType& start, + const LandmarkType& end, int gray_val1, + int gray_val2, float thickness, bool normalized, + RenderData* render_data) { + auto* connection_annotation = render_data->add_render_annotations(); + RenderAnnotation::GradientLine* line = + connection_annotation->mutable_gradient_line(); + line->set_x_start(start.x()); + line->set_y_start(start.y()); + line->set_x_end(end.x()); + line->set_y_end(end.y()); + line->set_normalized(normalized); + line->mutable_color1()->set_r(gray_val1); + line->mutable_color1()->set_g(gray_val1); + line->mutable_color1()->set_b(gray_val1); + line->mutable_color2()->set_r(gray_val2); + line->mutable_color2()->set_g(gray_val2); + line->mutable_color2()->set_b(gray_val2); + connection_annotation->set_thickness(thickness); +} + +template +void AddConnectionsWithDepth(const LandmarkListType& landmarks, + const std::vector& landmark_connections, + float thickness, bool normalized, float min_z, + float max_z, RenderData* render_data) { + for (int i = 0; i < landmark_connections.size(); i += 2) { + const auto& ld0 = landmarks.landmark(landmark_connections[i]); + const auto& ld1 = landmarks.landmark(landmark_connections[i + 1]); + const int gray_val1 = + 255 - static_cast(Remap(ld0.z(), min_z, max_z, 255)); + const int gray_val2 = + 255 - static_cast(Remap(ld1.z(), min_z, max_z, 255)); + AddConnectionToRenderData(ld0, ld1, gray_val1, gray_val2, + thickness, normalized, render_data); + } +} + +template +void AddConnectionToRenderData(const LandmarkType& start, + const LandmarkType& end, + const Color& connection_color, float thickness, + bool normalized, RenderData* render_data) { + auto* connection_annotation = render_data->add_render_annotations(); + RenderAnnotation::Line* line = connection_annotation->mutable_line(); + line->set_x_start(start.x()); + line->set_y_start(start.y()); + line->set_x_end(end.x()); + line->set_y_end(end.y()); + line->set_normalized(normalized); + SetColor(connection_annotation, connection_color); + connection_annotation->set_thickness(thickness); +} + +template +void AddConnections(const LandmarkListType& landmarks, + const std::vector& landmark_connections, + const Color& connection_color, float thickness, + bool normalized, RenderData* render_data) { + for (int i = 0; i < landmark_connections.size(); i += 2) { + const auto& ld0 = landmarks.landmark(landmark_connections[i]); + const auto& ld1 = landmarks.landmark(landmark_connections[i + 1]); + AddConnectionToRenderData(ld0, ld1, connection_color, + thickness, normalized, render_data); + } +} + +RenderAnnotation* AddPointRenderData(const Color& landmark_color, + float thickness, RenderData* render_data) { + auto* landmark_data_annotation = render_data->add_render_annotations(); + landmark_data_annotation->set_scene_tag(kLandmarkLabel); + SetColor(landmark_data_annotation, landmark_color); + landmark_data_annotation->set_thickness(thickness); + return landmark_data_annotation; +} + } // namespace // A calculator that converts Landmark proto to RenderData proto for @@ -107,29 +185,6 @@ class LandmarksToRenderDataCalculator : public CalculatorBase { ::mediapipe::Status Process(CalculatorContext* cc) override; private: - static void AddConnectionToRenderData( - float start_x, float start_y, float end_x, float end_y, - const LandmarksToRenderDataCalculatorOptions& options, bool normalized, - RenderData* render_data); - static void SetRenderAnnotationColorThickness( - const LandmarksToRenderDataCalculatorOptions& options, - RenderAnnotation* render_annotation); - static RenderAnnotation* AddPointRenderData( - const LandmarksToRenderDataCalculatorOptions& options, - RenderData* render_data); - static void AddConnectionToRenderData( - float start_x, float start_y, float end_x, float end_y, - const LandmarksToRenderDataCalculatorOptions& options, bool normalized, - int gray_val1, int gray_val2, RenderData* render_data); - - template - void AddConnections(const LandmarkListType& landmarks, bool normalized, - RenderData* render_data); - template - void AddConnectionsWithDepth(const LandmarkListType& landmarks, - bool normalized, float min_z, float max_z, - RenderData* render_data); - LandmarksToRenderDataCalculatorOptions options_; }; REGISTER_CALCULATOR(LandmarksToRenderDataCalculator); @@ -150,6 +205,9 @@ REGISTER_CALCULATOR(LandmarksToRenderDataCalculator); if (cc->Inputs().HasTag(kNormLandmarksTag)) { cc->Inputs().Tag(kNormLandmarksTag).Set(); } + if (cc->Inputs().HasTag(kRenderScaleTag)) { + cc->Inputs().Tag(kRenderScaleTag).Set(); + } cc->Outputs().Tag(kRenderDataTag).Set(); return ::mediapipe::OkStatus(); } @@ -169,11 +227,26 @@ REGISTER_CALCULATOR(LandmarksToRenderDataCalculator); float z_min = 0.f; float z_max = 0.f; + // Apply scale to `thickness` of rendered landmarks and connections to make + // them bigger when object (e.g. pose, hand or face) is closer/bigger and + // snaller when object is further/smaller. + float thickness = options_.thickness(); + if (cc->Inputs().HasTag(kRenderScaleTag)) { + const float render_scale = cc->Inputs().Tag(kRenderScaleTag).Get(); + thickness *= render_scale; + } + + // Parse landmarks connections to a vector. + RET_CHECK_EQ(options_.landmark_connections_size() % 2, 0) + << "Number of entries in landmark connections must be a multiple of 2"; + std::vector landmark_connections; + for (int i = 0; i < options_.landmark_connections_size(); i += 1) { + landmark_connections.push_back(options_.landmark_connections(i)); + } + if (cc->Inputs().HasTag(kLandmarksTag)) { const LandmarkList& landmarks = cc->Inputs().Tag(kLandmarksTag).Get(); - RET_CHECK_EQ(options_.landmark_connections_size() % 2, 0) - << "Number of entries in landmark connections must be a multiple of 2"; if (visualize_depth) { GetMinMaxZ(landmarks, &z_min, &z_max); } @@ -181,8 +254,8 @@ REGISTER_CALCULATOR(LandmarksToRenderDataCalculator); visualize_depth &= ((z_max - z_min) > 1e-3); for (int i = 0; i < landmarks.landmark_size(); ++i) { const Landmark& landmark = landmarks.landmark(i); - auto* landmark_data_render = - AddPointRenderData(options_, render_data.get()); + auto* landmark_data_render = AddPointRenderData( + options_.landmark_color(), thickness, render_data.get()); if (visualize_depth) { SetColorSizeValueFromZ(landmark.z(), z_min, z_max, landmark_data_render); @@ -193,19 +266,19 @@ REGISTER_CALCULATOR(LandmarksToRenderDataCalculator); landmark_data->set_y(landmark.y()); } if (visualize_depth) { - AddConnectionsWithDepth(landmarks, /*normalized=*/false, - z_min, z_max, render_data.get()); + AddConnectionsWithDepth( + landmarks, landmark_connections, thickness, /*normalized=*/false, + z_min, z_max, render_data.get()); } else { - AddConnections(landmarks, /*normalized=*/false, - render_data.get()); + AddConnections( + landmarks, landmark_connections, options_.connection_color(), + thickness, /*normalized=*/false, render_data.get()); } } if (cc->Inputs().HasTag(kNormLandmarksTag)) { const NormalizedLandmarkList& landmarks = cc->Inputs().Tag(kNormLandmarksTag).Get(); - RET_CHECK_EQ(options_.landmark_connections_size() % 2, 0) - << "Number of entries in landmark connections must be a multiple of 2"; if (visualize_depth) { GetMinMaxZ(landmarks, &z_min, &z_max); @@ -214,8 +287,8 @@ REGISTER_CALCULATOR(LandmarksToRenderDataCalculator); visualize_depth &= ((z_max - z_min) > 1e-3); for (int i = 0; i < landmarks.landmark_size(); ++i) { const NormalizedLandmark& landmark = landmarks.landmark(i); - auto* landmark_data_render = - AddPointRenderData(options_, render_data.get()); + auto* landmark_data_render = AddPointRenderData( + options_.landmark_color(), thickness, render_data.get()); if (visualize_depth) { SetColorSizeValueFromZ(landmark.z(), z_min, z_max, landmark_data_render); @@ -226,11 +299,13 @@ REGISTER_CALCULATOR(LandmarksToRenderDataCalculator); landmark_data->set_y(landmark.y()); } if (visualize_depth) { - AddConnectionsWithDepth( - landmarks, /*normalized=*/true, z_min, z_max, render_data.get()); + AddConnectionsWithDepth( + landmarks, landmark_connections, thickness, /*normalized=*/true, + z_min, z_max, render_data.get()); } else { - AddConnections(landmarks, /*normalized=*/true, - render_data.get()); + AddConnections( + landmarks, landmark_connections, options_.connection_color(), + thickness, /*normalized=*/true, render_data.get()); } } @@ -240,84 +315,4 @@ REGISTER_CALCULATOR(LandmarksToRenderDataCalculator); return ::mediapipe::OkStatus(); } -template -void LandmarksToRenderDataCalculator::AddConnectionsWithDepth( - const LandmarkListType& landmarks, bool normalized, float min_z, - float max_z, RenderData* render_data) { - for (int i = 0; i < options_.landmark_connections_size(); i += 2) { - const auto& ld0 = landmarks.landmark(options_.landmark_connections(i)); - const auto& ld1 = landmarks.landmark(options_.landmark_connections(i + 1)); - const int gray_val1 = - 255 - static_cast(Remap(ld0.z(), min_z, max_z, 255)); - const int gray_val2 = - 255 - static_cast(Remap(ld1.z(), min_z, max_z, 255)); - AddConnectionToRenderData(ld0.x(), ld0.y(), ld1.x(), ld1.y(), options_, - normalized, gray_val1, gray_val2, render_data); - } -} - -void LandmarksToRenderDataCalculator::AddConnectionToRenderData( - float start_x, float start_y, float end_x, float end_y, - const LandmarksToRenderDataCalculatorOptions& options, bool normalized, - int gray_val1, int gray_val2, RenderData* render_data) { - auto* connection_annotation = render_data->add_render_annotations(); - RenderAnnotation::GradientLine* line = - connection_annotation->mutable_gradient_line(); - line->set_x_start(start_x); - line->set_y_start(start_y); - line->set_x_end(end_x); - line->set_y_end(end_y); - line->set_normalized(normalized); - line->mutable_color1()->set_r(gray_val1); - line->mutable_color1()->set_g(gray_val1); - line->mutable_color1()->set_b(gray_val1); - line->mutable_color2()->set_r(gray_val2); - line->mutable_color2()->set_g(gray_val2); - line->mutable_color2()->set_b(gray_val2); - connection_annotation->set_thickness(options.thickness()); -} - -template -void LandmarksToRenderDataCalculator::AddConnections( - const LandmarkListType& landmarks, bool normalized, - RenderData* render_data) { - for (int i = 0; i < options_.landmark_connections_size(); i += 2) { - const auto& ld0 = landmarks.landmark(options_.landmark_connections(i)); - const auto& ld1 = landmarks.landmark(options_.landmark_connections(i + 1)); - AddConnectionToRenderData(ld0.x(), ld0.y(), ld1.x(), ld1.y(), options_, - normalized, render_data); - } -} - -void LandmarksToRenderDataCalculator::AddConnectionToRenderData( - float start_x, float start_y, float end_x, float end_y, - const LandmarksToRenderDataCalculatorOptions& options, bool normalized, - RenderData* render_data) { - auto* connection_annotation = render_data->add_render_annotations(); - RenderAnnotation::Line* line = connection_annotation->mutable_line(); - line->set_x_start(start_x); - line->set_y_start(start_y); - line->set_x_end(end_x); - line->set_y_end(end_y); - line->set_normalized(normalized); - SetColor(connection_annotation, options.connection_color()); - connection_annotation->set_thickness(options.thickness()); -} - -RenderAnnotation* LandmarksToRenderDataCalculator::AddPointRenderData( - const LandmarksToRenderDataCalculatorOptions& options, - RenderData* render_data) { - auto* landmark_data_annotation = render_data->add_render_annotations(); - landmark_data_annotation->set_scene_tag(kLandmarkLabel); - SetRenderAnnotationColorThickness(options, landmark_data_annotation); - return landmark_data_annotation; -} - -void LandmarksToRenderDataCalculator::SetRenderAnnotationColorThickness( - const LandmarksToRenderDataCalculatorOptions& options, - RenderAnnotation* render_annotation) { - SetColor(render_annotation, options.landmark_color()); - render_annotation->set_thickness(options.thickness()); -} - } // namespace mediapipe diff --git a/mediapipe/calculators/util/packet_latency_calculator_test.cc b/mediapipe/calculators/util/packet_latency_calculator_test.cc index 25d28d061..9ba7f70bf 100644 --- a/mediapipe/calculators/util/packet_latency_calculator_test.cc +++ b/mediapipe/calculators/util/packet_latency_calculator_test.cc @@ -276,6 +276,7 @@ TEST_F(PacketLatencyCalculatorTest, DoesNotOutputUntilReferencePacketReceived) { "delayed_packet_0", Adopt(new double()).At(Timestamp(2)))); // Send a reference packet with timestamp 10 usec. + simulation_clock_->Sleep(absl::Microseconds(1)); MP_ASSERT_OK(graph_.AddPacketToInputStream( "camera_frames", Adopt(new double()).At(Timestamp(10)))); simulation_clock_->Sleep(absl::Microseconds(1)); diff --git a/mediapipe/calculators/video/BUILD b/mediapipe/calculators/video/BUILD index da2bc7fbd..f4fca811a 100644 --- a/mediapipe/calculators/video/BUILD +++ b/mediapipe/calculators/video/BUILD @@ -138,7 +138,7 @@ cc_library( srcs = ["flow_to_image_calculator.cc"], visibility = ["//visibility:public"], deps = [ - "//mediapipe/calculators/video:flow_to_image_calculator_cc_proto", + ":flow_to_image_calculator_cc_proto", "//mediapipe/calculators/video/tool:flow_quantizer_model", "//mediapipe/framework:calculator_framework", "//mediapipe/framework/formats:image_format_cc_proto", @@ -384,20 +384,18 @@ cc_test( ], ) -MEDIAPIPE_DEPS = [ - "//mediapipe/calculators/video:box_tracker_calculator", - "//mediapipe/calculators/video:flow_packager_calculator", - "//mediapipe/calculators/video:motion_analysis_calculator", - "//mediapipe/framework/stream_handler:fixed_size_input_stream_handler", - "//mediapipe/framework/stream_handler:sync_set_input_stream_handler", -] - mediapipe_binary_graph( name = "parallel_tracker_binarypb", graph = "testdata/parallel_tracker_graph.pbtxt", output_name = "testdata/parallel_tracker.binarypb", visibility = ["//visibility:public"], - deps = MEDIAPIPE_DEPS, + deps = [ + ":box_tracker_calculator", + ":flow_packager_calculator", + ":motion_analysis_calculator", + "//mediapipe/framework/stream_handler:fixed_size_input_stream_handler", + "//mediapipe/framework/stream_handler:sync_set_input_stream_handler", + ], ) mediapipe_binary_graph( @@ -405,7 +403,13 @@ mediapipe_binary_graph( graph = "testdata/tracker_graph.pbtxt", output_name = "testdata/tracker.binarypb", visibility = ["//visibility:public"], - deps = MEDIAPIPE_DEPS, + deps = [ + ":box_tracker_calculator", + ":flow_packager_calculator", + ":motion_analysis_calculator", + "//mediapipe/framework/stream_handler:fixed_size_input_stream_handler", + "//mediapipe/framework/stream_handler:sync_set_input_stream_handler", + ], ) cc_test( diff --git a/mediapipe/examples/android/src/java/com/google/mediapipe/apps/METADATA b/mediapipe/examples/android/src/java/com/google/mediapipe/apps/METADATA new file mode 100644 index 000000000..aee0b0fe7 --- /dev/null +++ b/mediapipe/examples/android/src/java/com/google/mediapipe/apps/METADATA @@ -0,0 +1,7 @@ +tricorder: { + options: { + builder: { + config: "android_arm64" + } + } +} diff --git a/mediapipe/examples/desktop/autoflip/quality/polynomial_regression_path_solver.cc b/mediapipe/examples/desktop/autoflip/quality/polynomial_regression_path_solver.cc index fa9d4781a..b038b0f3c 100644 --- a/mediapipe/examples/desktop/autoflip/quality/polynomial_regression_path_solver.cc +++ b/mediapipe/examples/desktop/autoflip/quality/polynomial_regression_path_solver.cc @@ -95,7 +95,7 @@ void PolynomialRegressionPathSolver::AddCostFunctionToProblem( const std::vector& focus_point_frames, const std::vector& prior_focus_point_frames, const int original_width, const int original_height, const int output_width, - const int output_height, std::vector* all_xforms) { + const int output_height, std::vector* all_transforms) { RET_CHECK_GE(original_width, output_width); RET_CHECK_GE(original_height, output_height); const bool should_solve_x_problem = original_width != output_width; @@ -138,9 +138,10 @@ void PolynomialRegressionPathSolver::AddCostFunctionToProblem( Solver::Options options; options.linear_solver_type = ceres::DENSE_QR; - Solver::Summary summary; - Solve(options, &problem_x, &summary); - all_xforms->clear(); + Solver::Summary summary_x, summary_y; + Solve(options, &problem_x, &summary_x); + Solve(options, &problem_y, &summary_y); + all_transforms->clear(); for (int i = 0; i < focus_point_frames.size() + prior_focus_point_frames.size(); i++) { // Code below assigns values into an affine model, defined as: @@ -160,7 +161,7 @@ void PolynomialRegressionPathSolver::AddCostFunctionToProblem( yb_, yc_, yd_, yk_); transform.at(1, 2) = delta; } - all_xforms->push_back(transform); + all_transforms->push_back(transform); } return mediapipe::OkStatus(); } diff --git a/mediapipe/examples/desktop/autoflip/quality/polynomial_regression_path_solver.h b/mediapipe/examples/desktop/autoflip/quality/polynomial_regression_path_solver.h index cbcb85858..514f8760d 100644 --- a/mediapipe/examples/desktop/autoflip/quality/polynomial_regression_path_solver.h +++ b/mediapipe/examples/desktop/autoflip/quality/polynomial_regression_path_solver.h @@ -40,14 +40,14 @@ class PolynomialRegressionPathSolver { // Given a series of focus points on frames, uses polynomial regression to // compute a best guess of a 1D camera movement trajectory along x-axis and // y-axis, such that focus points can be preserved as much as possible. The - // returned |all_xforms| hold the camera location at each timestamp + // returned |all_transforms| hold the camera location at each timestamp // corresponding to each input frame. ::mediapipe::Status ComputeCameraPath( const std::vector& focus_point_frames, const std::vector& prior_focus_point_frames, const int original_width, const int original_height, const int output_width, const int output_height, - std::vector* all_xforms); + std::vector* all_transforms); private: // Adds a new cost function, constructed using |in| and |out|, into |problem|. diff --git a/mediapipe/examples/desktop/hair_segmentation/BUILD b/mediapipe/examples/desktop/hair_segmentation/BUILD index 0338feddf..69948e437 100644 --- a/mediapipe/examples/desktop/hair_segmentation/BUILD +++ b/mediapipe/examples/desktop/hair_segmentation/BUILD @@ -24,3 +24,17 @@ cc_binary( "//mediapipe/graphs/hair_segmentation:mobile_calculators", ], ) + +cc_binary( + name = "hair_segmentation_cpu", + deps = [ + "//mediapipe/examples/desktop:demo_run_graph_main", + ] + select({ + "//mediapipe/gpu:disable_gpu": [ + "//mediapipe/graphs/hair_segmentation:desktop_calculators", + ], + "//conditions:default": [ + "//mediapipe/graphs/hair_segmentation:mobile_calculators", + ], + }), +) diff --git a/mediapipe/framework/BUILD b/mediapipe/framework/BUILD index 4176bcd9c..4ab28fbd2 100644 --- a/mediapipe/framework/BUILD +++ b/mediapipe/framework/BUILD @@ -361,6 +361,7 @@ cc_library( "//mediapipe/framework:mediapipe_options_cc_proto", "//mediapipe/framework:packet_generator_cc_proto", "//mediapipe/framework:status_handler_cc_proto", + "//mediapipe/framework:stream_handler_cc_proto", "//mediapipe/framework/port:any_proto", "//mediapipe/framework/port:status", "//mediapipe/framework/tool:options_util", diff --git a/mediapipe/framework/calculator_contract.h b/mediapipe/framework/calculator_contract.h index 2402c2525..a47632fc9 100644 --- a/mediapipe/framework/calculator_contract.h +++ b/mediapipe/framework/calculator_contract.h @@ -84,7 +84,7 @@ class CalculatorContract { return *output_side_packets_; } - // Set this Node's default InputStreamHandler. + // Specifies the preferred InputStreamHandler for this Node. // If there is an InputStreamHandler specified in the graph (.pbtxt) for this // Node, then the graph's InputStreamHandler will take priority. void SetInputStreamHandler(const std::string& name) { @@ -104,6 +104,29 @@ class CalculatorContract { return input_stream_handler_options_; } + // The next few methods are concerned with timestamp bound propagation + // (see scheduling_sync.md#input-policies). Every calculator that processes + // live inputs should specify either ProcessTimestampBounds or + // TimestampOffset. Calculators that produce output at the same timestamp as + // the input, or with a fixed offset, should declare this fact using + // SetTimestampOffset. Calculators that require custom timestamp bound + // calculations should use SetProcessTimestampBounds. + + // When true, Process is called for every new timestamp bound, with or without + // new packets. A call to Process with only an input timestamp bound is + // normally used to compute a new output timestamp bound. + void SetProcessTimestampBounds(bool process_timestamps) { + process_timestamps_ = process_timestamps; + } + bool GetProcessTimestampBounds() const { return process_timestamps_; } + + // Specifies the maximum difference between input and output timestamps. + // When specified, the mediapipe framework automatically computes output + // timestamp bounds based on input timestamps. The special value + // TimestampDiff::Unset disables the timestamp offset. + void SetTimestampOffset(TimestampDiff offset) { timestamp_offset_ = offset; } + TimestampDiff GetTimestampOffset() const { return timestamp_offset_; } + class GraphServiceRequest { public: // APIs that should be used by calculators. @@ -147,6 +170,8 @@ class CalculatorContract { MediaPipeOptions input_stream_handler_options_; std::string node_name_; std::map service_requests_; + bool process_timestamps_ = false; + TimestampDiff timestamp_offset_ = TimestampDiff::Unset(); }; } // namespace mediapipe diff --git a/mediapipe/framework/calculator_graph.h b/mediapipe/framework/calculator_graph.h index 63520b90d..50a4069a8 100644 --- a/mediapipe/framework/calculator_graph.h +++ b/mediapipe/framework/calculator_graph.h @@ -143,7 +143,7 @@ class CalculatorGraph { const std::string& graph_type = "", const Subgraph::SubgraphOptions* options = nullptr); - // Resturns the canonicalized CalculatorGraphConfig for this graph. + // Returns the canonicalized CalculatorGraphConfig for this graph. const CalculatorGraphConfig& Config() const { return validated_graph_->Config(); } diff --git a/mediapipe/framework/calculator_graph_bounds_test.cc b/mediapipe/framework/calculator_graph_bounds_test.cc index 17998a1ff..4de8ffb7b 100644 --- a/mediapipe/framework/calculator_graph_bounds_test.cc +++ b/mediapipe/framework/calculator_graph_bounds_test.cc @@ -31,6 +31,17 @@ namespace { typedef std::function<::mediapipe::Status(CalculatorContext* cc)> CalculatorContextFunction; +// Returns the contents of a set of Packets. +// The contents must be copyable. +template +std::vector GetContents(const std::vector& packets) { + std::vector result; + for (Packet p : packets) { + result.push_back(p.Get()); + } + return result; +} + // A simple Semaphore for synchronizing test threads. class AtomicSemaphore { public: @@ -671,9 +682,9 @@ REGISTER_CALCULATOR(BoundToPacketCalculator); // A Calculator that produces packets at timestamps beyond the input timestamp. class FuturePacketCalculator : public CalculatorBase { + public: static constexpr int64 kOutputFutureMicros = 3; - public: static ::mediapipe::Status GetContract(CalculatorContract* cc) { cc->Inputs().Index(0).Set(); cc->Outputs().Index(0).Set(); @@ -742,9 +753,8 @@ TEST(CalculatorGraphBoundsTest, OffsetBoundPropagation) { MP_ASSERT_OK(graph.WaitUntilDone()); } -// Shows that bounds changes alone do not invoke Process. -// Note: Bounds changes alone will invoke Process eventually -// when SetOffset is cleared, see: go/mediapipe-realtime-graph. +// Shows that timestamp bounds changes alone do not invoke Process, +// without SetProcessTimestampBounds(true). TEST(CalculatorGraphBoundsTest, BoundWithoutInputPackets) { // OffsetBoundCalculator produces only timestamp bounds. // The BoundToPacketCalculator delivers an output packet whenever the @@ -753,8 +763,13 @@ TEST(CalculatorGraphBoundsTest, BoundWithoutInputPackets) { ::mediapipe::ParseTextProtoOrDie(R"( input_stream: 'input' node { - calculator: 'OffsetBoundCalculator' + calculator: 'FuturePacketCalculator' input_stream: 'input' + output_stream: 'input_2' + } + node { + calculator: 'OffsetBoundCalculator' + input_stream: 'input_2' output_stream: 'bounds' } node { @@ -778,6 +793,7 @@ TEST(CalculatorGraphBoundsTest, BoundWithoutInputPackets) { for (int i = 0; i < kNumInputs; ++i) { Packet p = MakePacket(33).At(Timestamp(i)); MP_ASSERT_OK(graph.AddPacketToInputStream("input", p)); + MP_ASSERT_OK(graph.WaitUntilIdle()); } // No packets arrive, because updated timestamp bounds do not invoke @@ -1104,5 +1120,254 @@ TEST(CalculatorGraphBoundsTest, BoundsForEmptyInputs_SyncSets) { )"); } +// A Calculator that produces a packet for each timestamp bounds update. +class ProcessBoundToPacketCalculator : public CalculatorBase { + public: + static ::mediapipe::Status GetContract(CalculatorContract* cc) { + for (int i = 0; i < cc->Inputs().NumEntries(); ++i) { + cc->Inputs().Index(i).SetAny(); + } + for (int i = 0; i < cc->Outputs().NumEntries(); ++i) { + cc->Outputs().Index(i).Set(); + } + cc->SetInputStreamHandler("ImmediateInputStreamHandler"); + cc->SetProcessTimestampBounds(true); + return ::mediapipe::OkStatus(); + } + + ::mediapipe::Status Process(CalculatorContext* cc) final { + for (int i = 0; i < cc->Outputs().NumEntries(); ++i) { + Timestamp t = cc->Inputs().Index(i).Value().Timestamp(); + if (t == cc->InputTimestamp() && + t >= cc->Outputs().Index(i).NextTimestampBound()) { + cc->Outputs().Index(i).Add(new auto(t), t); + } + } + return ::mediapipe::OkStatus(); + } +}; +REGISTER_CALCULATOR(ProcessBoundToPacketCalculator); + +// A Calculator that passes through each packet and timestamp immediately. +class ImmediatePassthroughCalculator : public CalculatorBase { + public: + static ::mediapipe::Status GetContract(CalculatorContract* cc) { + for (int i = 0; i < cc->Inputs().NumEntries(); ++i) { + cc->Inputs().Index(i).SetAny(); + } + for (int i = 0; i < cc->Outputs().NumEntries(); ++i) { + cc->Outputs().Index(i).SetSameAs(&cc->Inputs().Index(i)); + } + cc->SetInputStreamHandler("ImmediateInputStreamHandler"); + cc->SetProcessTimestampBounds(true); + return ::mediapipe::OkStatus(); + } + + ::mediapipe::Status Process(CalculatorContext* cc) final { + for (int i = 0; i < cc->Outputs().NumEntries(); ++i) { + if (!cc->Inputs().Index(i).IsEmpty()) { + cc->Outputs().Index(i).AddPacket(cc->Inputs().Index(i).Value()); + } else { + Timestamp input_bound = + cc->Inputs().Index(i).Value().Timestamp().NextAllowedInStream(); + if (cc->Outputs().Index(i).NextTimestampBound() < input_bound) { + cc->Outputs().Index(i).SetNextTimestampBound(input_bound); + } + } + } + return ::mediapipe::OkStatus(); + } +}; +REGISTER_CALCULATOR(ImmediatePassthroughCalculator); + +// Shows that Process is called for input-sets without input packets. +void TestProcessForEmptyInputs(const std::string& input_stream_handler) { + // FuturePacketCalculator and OffsetBoundCalculator produce only ts bounds, + // The ProcessBoundToPacketCalculator has SetProcessTimestampBounds(true), + // and produces an output packet for every timestamp bound update. + std::string config_str = R"( + input_stream: 'input' + node { + calculator: 'FuturePacketCalculator' + input_stream: 'input' + output_stream: 'futures' + } + node { + calculator: 'OffsetBoundCalculator' + input_stream: 'futures' + output_stream: 'bounds' + } + node { + calculator: 'ProcessBoundToPacketCalculator' + input_stream: 'bounds' + output_stream: 'bounds_ts' + input_stream_handler { $input_stream_handler } + } + )"; + absl::StrReplaceAll({{"$input_stream_handler", input_stream_handler}}, + &config_str); + CalculatorGraphConfig config = + ::mediapipe::ParseTextProtoOrDie(config_str); + CalculatorGraph graph; + std::vector input_ts_packets; + std::vector bounds_ts_packets; + MP_ASSERT_OK(graph.Initialize(config)); + MP_ASSERT_OK(graph.ObserveOutputStream("bounds_ts", [&](const Packet& p) { + bounds_ts_packets.push_back(p); + return ::mediapipe::OkStatus(); + })); + MP_ASSERT_OK(graph.StartRun({})); + MP_ASSERT_OK(graph.WaitUntilIdle()); + + // Add four packets into the graph. + constexpr int kFutureMicros = FuturePacketCalculator::kOutputFutureMicros; + Packet p; + p = MakePacket(33).At(Timestamp(0)); + MP_ASSERT_OK(graph.AddPacketToInputStream("input", p)); + MP_ASSERT_OK(graph.WaitUntilIdle()); + + p = MakePacket(33).At(Timestamp(10)); + MP_ASSERT_OK(graph.AddPacketToInputStream("input", p)); + MP_ASSERT_OK(graph.WaitUntilIdle()); + + p = MakePacket(33).At(Timestamp(20)); + MP_ASSERT_OK(graph.AddPacketToInputStream("input", p)); + MP_ASSERT_OK(graph.WaitUntilIdle()); + + p = MakePacket(33).At(Timestamp(30)); + MP_ASSERT_OK(graph.AddPacketToInputStream("input", p)); + MP_ASSERT_OK(graph.WaitUntilIdle()); + + // Packets arrive. + MP_ASSERT_OK(graph.WaitUntilIdle()); + EXPECT_EQ(bounds_ts_packets.size(), 4); + + std::vector expected = { + Timestamp(0 + kFutureMicros), Timestamp(10 + kFutureMicros), + Timestamp(20 + kFutureMicros), Timestamp(30 + kFutureMicros)}; + EXPECT_EQ(GetContents(bounds_ts_packets), expected); + + // Shutdown the graph. + MP_ASSERT_OK(graph.CloseAllPacketSources()); + MP_ASSERT_OK(graph.WaitUntilDone()); +} + +// Shows that Process is called for input-sets without input packets +// using an DefaultInputStreamHandler. +TEST(CalculatorGraphBoundsTest, ProcessTimestampBounds_Default) { + TestProcessForEmptyInputs(R"( + input_stream_handler: "DefaultInputStreamHandler")"); +} + +// Shows that Process is called for input-sets without input packets +// using an ImmediateInputStreamHandler. +TEST(CalculatorGraphBoundsTest, ProcessTimestampBounds_Immediate) { + TestProcessForEmptyInputs(R"( + input_stream_handler: "ImmediateInputStreamHandler")"); +} + +// Shows that Process is called for input-sets without input packets +// using a SyncSetInputStreamHandler with a single sync-set. +TEST(CalculatorGraphBoundsTest, ProcessTimestampBounds_SyncSet) { + TestProcessForEmptyInputs(R"( + input_stream_handler: "SyncSetInputStreamHandler")"); +} + +// Shows that Process is called for input-sets without input packets +// using a SyncSetInputStreamHandler with multiple sync-sets. +TEST(CalculatorGraphBoundsTest, ProcessTimestampBounds_SyncSets) { + TestProcessForEmptyInputs(R"( + input_stream_handler: "SyncSetInputStreamHandler" + options { + [mediapipe.SyncSetInputStreamHandlerOptions.ext] { + sync_set { tag_index: ":0" } + } + } + )"); +} + +// Demonstrates the functionality of an "ImmediatePassthroughCalculator". +// The ImmediatePassthroughCalculator simply relays each input packet to +// the corresponding output stream. ProcessTimestampBounds is needed to +// relay timestamp bounds as well as packets. +TEST(CalculatorGraphBoundsTest, ProcessTimestampBounds_Passthrough) { + // OffsetBoundCalculator produces timestamp bounds. + // ImmediatePassthroughCalculator relays packets and bounds. + // ProcessBoundToPacketCalculator reports packets and bounds as packets. + std::string config_str = R"( + input_stream: "input_0" + input_stream: "input_1" + node { + calculator: "OffsetBoundCalculator" + input_stream: "input_1" + output_stream: "bound_1" + } + node { + calculator: "ImmediatePassthroughCalculator" + input_stream: "input_0" + input_stream: "bound_1" + output_stream: "same_0" + output_stream: "same_1" + } + node { + calculator: "ProcessBoundToPacketCalculator" + input_stream: "same_0" + input_stream: "same_1" + output_stream: "output_0" + output_stream: "output_1" + } + )"; + CalculatorGraphConfig config = + ::mediapipe::ParseTextProtoOrDie(config_str); + CalculatorGraph graph; + std::vector output_0_packets; + std::vector output_1_packets; + MP_ASSERT_OK(graph.Initialize(config)); + MP_ASSERT_OK(graph.ObserveOutputStream("output_0", [&](const Packet& p) { + output_0_packets.push_back(p); + return ::mediapipe::OkStatus(); + })); + MP_ASSERT_OK(graph.ObserveOutputStream("output_1", [&](const Packet& p) { + output_1_packets.push_back(p); + return ::mediapipe::OkStatus(); + })); + MP_ASSERT_OK(graph.StartRun({})); + MP_ASSERT_OK(graph.WaitUntilIdle()); + + // Add four packets to input_0. + for (int i = 0; i < 4; ++i) { + Packet p = MakePacket(33).At(Timestamp(i * 10)); + MP_ASSERT_OK(graph.AddPacketToInputStream("input_0", p)); + MP_ASSERT_OK(graph.WaitUntilIdle()); + } + + // Packets arrive. + MP_ASSERT_OK(graph.WaitUntilIdle()); + EXPECT_EQ(output_0_packets.size(), 4); + EXPECT_EQ(output_1_packets.size(), 0); + std::vector expected = // + {Timestamp(0), Timestamp(10), Timestamp(20), Timestamp(30)}; + EXPECT_EQ(GetContents(output_0_packets), expected); + + // Add two timestamp bounds to bound_1. + for (int i = 0; i < 2; ++i) { + Packet p = MakePacket(33).At(Timestamp(10 + i * 10)); + MP_ASSERT_OK(graph.AddPacketToInputStream("input_1", p)); + MP_ASSERT_OK(graph.WaitUntilIdle()); + } + + // Bounds arrive. + MP_ASSERT_OK(graph.WaitUntilIdle()); + EXPECT_EQ(output_0_packets.size(), 4); + EXPECT_EQ(output_1_packets.size(), 2); + expected = // + {Timestamp(10), Timestamp(20)}; + EXPECT_EQ(GetContents(output_1_packets), expected); + + // Shutdown the graph. + MP_ASSERT_OK(graph.CloseAllPacketSources()); + MP_ASSERT_OK(graph.WaitUntilDone()); +} + } // namespace } // namespace mediapipe diff --git a/mediapipe/framework/calculator_node.cc b/mediapipe/framework/calculator_node.cc index 2834f011f..f69254db0 100644 --- a/mediapipe/framework/calculator_node.cc +++ b/mediapipe/framework/calculator_node.cc @@ -97,6 +97,7 @@ Timestamp CalculatorNode::SourceProcessOrder( const NodeTypeInfo& node_type_info = validated_graph_->CalculatorInfos()[node_id_]; + const CalculatorContract& contract = node_type_info.Contract(); uses_gpu_ = node_type_info.InputSidePacketTypes().HasTag(kGpuSharedTagName) || @@ -147,6 +148,14 @@ Timestamp CalculatorNode::SourceProcessOrder( use_calc_specified ? handler_config : node_config.input_stream_handler(), node_type_info.InputStreamTypes())); + for (auto& stream : output_stream_handler_->OutputStreams()) { + stream->Spec()->offset_enabled = + (contract.GetTimestampOffset() != TimestampDiff::Unset()); + stream->Spec()->offset = contract.GetTimestampOffset(); + } + input_stream_handler_->SetProcessTimestampBounds( + contract.GetProcessTimestampBounds()); + return InitializeInputStreams(input_stream_managers, output_stream_managers); } diff --git a/mediapipe/framework/deps/registration.cc b/mediapipe/framework/deps/registration.cc index f12a3834f..c467b700b 100644 --- a/mediapipe/framework/deps/registration.cc +++ b/mediapipe/framework/deps/registration.cc @@ -18,6 +18,10 @@ namespace mediapipe { namespace { +// List of namespaces that can register calculators inside the namespace +// and still refer to them using an unqualified name. This whitelist +// is meant to facilitate migration from unqualified to fully qualified +// calculator names. constexpr char const* kTopNamespaces[] = { "mediapipe", }; diff --git a/mediapipe/framework/formats/annotation/BUILD b/mediapipe/framework/formats/annotation/BUILD index 5ea495abe..d501901a7 100644 --- a/mediapipe/framework/formats/annotation/BUILD +++ b/mediapipe/framework/formats/annotation/BUILD @@ -49,3 +49,10 @@ mediapipe_cc_proto_library( visibility = ["//visibility:public"], deps = [":rasterization_proto"], ) + +# Expose the proto source files for building mediapipe AAR. +filegroup( + name = "protos_src", + srcs = glob(["*.proto"]), + visibility = ["//mediapipe:__subpackages__"], +) diff --git a/mediapipe/framework/formats/annotation/rasterization.proto b/mediapipe/framework/formats/annotation/rasterization.proto index 9aad7e88f..38414dff3 100644 --- a/mediapipe/framework/formats/annotation/rasterization.proto +++ b/mediapipe/framework/formats/annotation/rasterization.proto @@ -16,6 +16,9 @@ syntax = "proto2"; package mediapipe; +option java_package = "com.google.mediapipe.formats.annotation.proto"; +option java_outer_classname = "RasterizationProto"; + // A Region can be represented in each frame as a set of scanlines // (compressed RLE, similar to rasterization of polygons). // For each scanline with y-coordinate y, we save (possibly multiple) intervals diff --git a/mediapipe/framework/formats/location_data.proto b/mediapipe/framework/formats/location_data.proto index dbbf2909d..3edd54208 100644 --- a/mediapipe/framework/formats/location_data.proto +++ b/mediapipe/framework/formats/location_data.proto @@ -23,6 +23,9 @@ package mediapipe; import "mediapipe/framework/formats/annotation/rasterization.proto"; +option java_package = "com.google.mediapipe.formats.proto"; +option java_outer_classname = "LocationDataProto"; + message LocationData { // The supported formats for representing location data. A single location // must store its data in exactly one way. diff --git a/mediapipe/framework/input_stream_handler.cc b/mediapipe/framework/input_stream_handler.cc index 858360d25..0d6965056 100644 --- a/mediapipe/framework/input_stream_handler.cc +++ b/mediapipe/framework/input_stream_handler.cc @@ -22,6 +22,8 @@ namespace mediapipe { +using SyncSet = InputStreamHandler::SyncSet; + ::mediapipe::Status InputStreamHandler::InitializeInputStreamManagers( InputStreamManager* flat_input_stream_managers) { for (CollectionItemId id = input_stream_managers_.BeginId(); @@ -300,4 +302,92 @@ void InputStreamHandler::SetLatePreparation(bool late_preparation) { late_preparation_ = late_preparation; } +SyncSet::SyncSet(InputStreamHandler* input_stream_handler, + std::vector stream_ids) + : input_stream_handler_(input_stream_handler), + stream_ids_(std::move(stream_ids)) {} + +NodeReadiness SyncSet::GetReadiness(Timestamp* min_stream_timestamp) { + Timestamp min_bound = Timestamp::Done(); + Timestamp min_packet = Timestamp::Done(); + for (CollectionItemId id : stream_ids_) { + const auto& stream = input_stream_handler_->input_stream_managers_.Get(id); + bool empty; + Timestamp stream_timestamp = stream->MinTimestampOrBound(&empty); + if (empty) { + min_bound = std::min(min_bound, stream_timestamp); + } else { + min_packet = std::min(min_packet, stream_timestamp); + } + } + *min_stream_timestamp = std::min(min_packet, min_bound); + if (*min_stream_timestamp == Timestamp::Done()) { + last_processed_ts_ = Timestamp::Done().PreviousAllowedInStream(); + return NodeReadiness::kReadyForClose; + } + if (!input_stream_handler_->process_timestamps_) { + // Only an input_ts with packets can be processed. + // Note that (min_bound - 1) is the highest fully settled timestamp. + if (min_bound > min_packet) { + last_processed_ts_ = *min_stream_timestamp; + return NodeReadiness::kReadyForProcess; + } + } else { + // Any unprocessed input_ts can be processed. + // Note that (min_bound - 1) is the highest fully settled timestamp. + Timestamp input_timestamp = + std::min(min_packet, min_bound.PreviousAllowedInStream()); + if (input_timestamp > + std::max(last_processed_ts_, Timestamp::Unstarted())) { + *min_stream_timestamp = input_timestamp; + last_processed_ts_ = input_timestamp; + return NodeReadiness::kReadyForProcess; + } + } + return NodeReadiness::kNotReady; +} + +Timestamp SyncSet::LastProcessed() const { return last_processed_ts_; } + +Timestamp SyncSet::MinPacketTimestamp() const { + Timestamp result = Timestamp::Done(); + for (CollectionItemId id : stream_ids_) { + const auto& stream = input_stream_handler_->input_stream_managers_.Get(id); + bool empty; + Timestamp stream_timestamp = stream->MinTimestampOrBound(&empty); + if (!empty) { + result = std::min(result, stream_timestamp); + } + } + return result; +} + +void SyncSet::FillInputSet(Timestamp input_timestamp, + InputStreamShardSet* input_set) { + CHECK(input_timestamp.IsAllowedInStream()); + CHECK(input_set); + for (CollectionItemId id : stream_ids_) { + const auto& stream = input_stream_handler_->input_stream_managers_.Get(id); + int num_packets_dropped = 0; + bool stream_is_done = false; + Packet current_packet = stream->PopPacketAtTimestamp( + input_timestamp, &num_packets_dropped, &stream_is_done); + CHECK_EQ(num_packets_dropped, 0) + << absl::Substitute("Dropped $0 packet(s) on input stream \"$1\".", + num_packets_dropped, stream->Name()); + input_stream_handler_->AddPacketToShard( + &input_set->Get(id), std::move(current_packet), stream_is_done); + } +} + +void SyncSet::FillInputBounds(InputStreamShardSet* input_set) { + for (CollectionItemId id : stream_ids_) { + const auto* stream = input_stream_handler_->input_stream_managers_.Get(id); + Timestamp bound = stream->MinTimestampOrBound(nullptr); + input_stream_handler_->AddPacketToShard( + &input_set->Get(id), Packet().At(bound.PreviousAllowedInStream()), + bound == Timestamp::Done()); + } +} + } // namespace mediapipe diff --git a/mediapipe/framework/input_stream_handler.h b/mediapipe/framework/input_stream_handler.h index 859610397..8d8b5a716 100644 --- a/mediapipe/framework/input_stream_handler.h +++ b/mediapipe/framework/input_stream_handler.h @@ -74,9 +74,7 @@ class InputStreamHandler { : input_stream_managers_(std::move(tag_map)), calculator_context_manager_(calculator_context_manager), options_(options), - calculator_run_in_parallel_(calculator_run_in_parallel), - late_preparation_(false), - batch_size_(1) {} + calculator_run_in_parallel_(calculator_run_in_parallel) {} virtual ~InputStreamHandler() = default; @@ -174,6 +172,57 @@ class InputStreamHandler { return unset_header_count_.load(std::memory_order_relaxed); } + // When true, Calculator::Process is called for any increase in the + // timestamp bound, whether or not any packets are available. + // Calculator::Process is called when the minimum timestamp bound + // increases for any synchronized set of input streams. + // DefaultInputStreamHandler groups all input streams into a single set. + // ImmediateInputStreamHandler treats each input stream as a separate set. + void SetProcessTimestampBounds(bool process_ts) { + process_timestamps_ = process_ts; + } + + // When true, Calculator::Process is called for every input timestamp bound. + bool ProcessTimestampBounds() { return process_timestamps_; } + + // A helper class to build input packet sets for a certain set of streams. + // + // ReadyForProcess requires all of the streams to be fully determined + // at the same input-timestamp. + // This is the readiness policy for all streams in DefaultInputStreamHandler. + // It is also the policy for each sync-set in SyncSetInputStreamHandler. + // It is also the policy for each input-stream in ImmediateInputStreamHandler. + // + // If ProcessTimestampBounds() is set, then a fully determined input timestamp + // with only empty input packets will qualify as ReadyForProcess. + class SyncSet { + public: + // Creates a SyncSet for a certain set of streams, |stream_ids|. + SyncSet(InputStreamHandler* input_stream_handler, + std::vector stream_ids); + + // Answers whether this stream is ready for Process or Close. + NodeReadiness GetReadiness(Timestamp* min_stream_timestamp); + + // Returns the latest timestamp returned for processing. + Timestamp LastProcessed() const; + + // The earliest available packet timestamp, or Timestamp::Done. + Timestamp MinPacketTimestamp() const; + + // Moves packets from all input streams to the input_set. + void FillInputSet(Timestamp input_timestamp, + InputStreamShardSet* input_set); + + // Copies timestamp bounds from all input streams to the input_set. + void FillInputBounds(InputStreamShardSet* input_set); + + private: + InputStreamHandler* input_stream_handler_; + std::vector stream_ids_; + Timestamp last_processed_ts_ = Timestamp::Unset(); + }; + protected: typedef internal::Collection InputStreamManagerSet; @@ -240,11 +289,14 @@ class InputStreamHandler { // The variable is set to false by default. A subclass should set it to true // with SetLatePreparation(true) in the constructor if the input sets need to // be filled in ProcessNode(). - bool late_preparation_; + bool late_preparation_ = false; // Determines how many sets of input packets are collected before a // CalculatorNode is scheduled. - int batch_size_; + int batch_size_ = 1; + + // When true, any increase in timestamp bound invokes Calculator::Process. + bool process_timestamps_ = false; // A callback to notify the observer when all the input stream headers // (excluding headers of back edges) become available. diff --git a/mediapipe/framework/legacy_calculator_support.h b/mediapipe/framework/legacy_calculator_support.h index a78a21b91..019473e67 100644 --- a/mediapipe/framework/legacy_calculator_support.h +++ b/mediapipe/framework/legacy_calculator_support.h @@ -107,6 +107,9 @@ CalculatorContext* LegacyCalculatorSupport::Scoped::current_; template <> CalculatorContract* LegacyCalculatorSupport::Scoped::current_; +#elif _MSC_VER +// MSVC interprets these declarations as definitions and during linking it +// generates an error about multiple definitions of current_. #else template <> thread_local CalculatorContext* diff --git a/mediapipe/framework/output_stream_handler.h b/mediapipe/framework/output_stream_handler.h index db1d4089a..b9ec42b92 100644 --- a/mediapipe/framework/output_stream_handler.h +++ b/mediapipe/framework/output_stream_handler.h @@ -46,6 +46,7 @@ class OutputStreamHandler { // ids of upstream sources that affect it. typedef std::unordered_map> OutputStreamToSourcesMap; + typedef internal::Collection OutputStreamManagerSet; // The constructor of the OutputStreamHandler takes four arguments. // The tag_map argument holds the information needed for tag/index retrieval @@ -119,9 +120,11 @@ class OutputStreamHandler { // collection for debugging purpose. std::string FirstStreamName() const; - protected: - typedef internal::Collection OutputStreamManagerSet; + const OutputStreamManagerSet& OutputStreams() { + return output_stream_managers_; + } + protected: // Checks if the given input bound should be propagated or not. If any output // streams with OffsetEnabled() need to have the timestamp bounds updated, // then propagates the timestamp bounds of all output streams with diff --git a/mediapipe/framework/output_stream_poller.h b/mediapipe/framework/output_stream_poller.h index ff803317f..26c0e72b2 100644 --- a/mediapipe/framework/output_stream_poller.h +++ b/mediapipe/framework/output_stream_poller.h @@ -27,6 +27,9 @@ class OutputStreamPoller { OutputStreamPoller(const OutputStreamPoller&) = delete; OutputStreamPoller& operator=(const OutputStreamPoller&) = delete; OutputStreamPoller(OutputStreamPoller&&) = default; + // Move assignment needs to be explicitly defaulted to allow ASSIGN_OR_RETURN + // on `StatusOr`. + OutputStreamPoller& operator=(OutputStreamPoller&&) = default; // Resets OutputStramPollerImpl and cleans the internal packet queue. void Reset() { diff --git a/mediapipe/framework/profiler/testdata/profile_latency_test.pbtxt b/mediapipe/framework/profiler/testdata/profile_latency_test.pbtxt new file mode 100644 index 000000000..60e7a0e47 --- /dev/null +++ b/mediapipe/framework/profiler/testdata/profile_latency_test.pbtxt @@ -0,0 +1,97 @@ +graph_trace: { + calculator_name : ["ACalculator", "BCalculator"] + stream_name : [ "", "input1", "a_b"] + base_time : 0 + base_timestamp : 100 + + # Fire off three input packets and have them spend time in Calculator A. + # Drop the middle packet. + + calculator_trace: { + node_id: -1 + input_timestamp: 100 + event_type : PROCESS + finish_time : 1000 + output_trace: { + packet_timestamp: 100 + stream_id : 1 + } + thread_id : 1 + } + calculator_trace: { + node_id: -1 + input_timestamp: 101 + event_type : PROCESS + finish_time : 2000 + output_trace: { + packet_timestamp: 101 + stream_id : 1 + } + thread_id : 1 + } + + calculator_trace: { + node_id: 0 + input_timestamp: 100 + event_type : PROCESS + start_time : 1200 # 200 after initial input (emits at 1000) + finish_time : 1500 # Speed to delivery is 500 (1500 - 1000) + input_trace: { + packet_timestamp: 100 + stream_id : 1 + } + thread_id : 1 + } + + calculator_trace: { + node_id: 0 + input_timestamp: 101 + event_type : PROCESS + start_time : 2100 # 100 after initial input (emits at 2000) + finish_time : 2500 # Speed to delivery is 500 (2500 - 2000) + input_trace: { + packet_timestamp: 101 + stream_id : 1 + } + thread_id : 1 + } + + calculator_trace: { + node_id: 1 + input_timestamp: 100 + event_type : PROCESS + start_time : 1600 # 600 after the initial input (emits at 1000) + finish_time : 2000 # Speed to delivery is 1000 (2000 - 1000) + input_trace: { + packet_timestamp: 100 + stream_id : 1 + } + thread_id : 1 + } + + calculator_trace: { + node_id: 1 + input_timestamp: 101 + event_type : PROCESS + start_time : 2900 # 700 after the initial input (emits at 2000) + finish_time : 3100 # Speed to delivery is 1000 (3000 - 2000) + input_trace: { + packet_timestamp: 101 + stream_id : 1 + } + thread_id : 1 + } +} +config: { + node: { + name: "ACalculator" + calculator: "ACalculator" + input_stream: "input1" + output_stream: "a_b" + } + node: { + name: "BCalculator" + calculator: "BCalculator" + input_stream: "a_b" + } +} diff --git a/mediapipe/framework/profiler/testdata/profile_process_test.pbtxt b/mediapipe/framework/profiler/testdata/profile_process_test.pbtxt new file mode 100644 index 000000000..0f4bfab52 --- /dev/null +++ b/mediapipe/framework/profiler/testdata/profile_process_test.pbtxt @@ -0,0 +1,122 @@ +graph_trace: { + calculator_name : ["ACalculator", "BCalculator"] + stream_name : [ "", "input1"] + base_time : 0 + base_timestamp : 100 + + # Fire off three input packets and have them spend time in Calculator A. + # Drop the middle packet. + + calculator_trace: { + node_id: -1 + input_timestamp: 100 + event_type : PROCESS + finish_time : 1000 + output_trace: { + packet_timestamp: 100 + stream_id : 1 + } + thread_id : 1 + } + calculator_trace: { + node_id: -1 + input_timestamp: 101 + event_type : PROCESS + finish_time : 2000 + output_trace: { + packet_timestamp: 101 + stream_id : 1 + } + thread_id : 1 + } + calculator_trace: { + node_id: -1 + input_timestamp: 102 + event_type : PROCESS + finish_time : 3000 + output_trace: { + packet_timestamp: 102 + stream_id : 1 + } + thread_id : 1 + } + + # First event is disconnected. We'll see the output_trace later. + calculator_trace: { + node_id: 0 + input_timestamp: 100 + event_type : PROCESS + start_time : 1100 + input_trace: { + packet_timestamp: 100 + stream_id : 1 + } + thread_id : 1 + } + + # # We're going to drop this packet. + calculator_trace: { + node_id: 0 + input_timestamp: 101 + event_type : PROCESS + start_time : 2100 + input_trace: { + packet_timestamp: 101 + stream_id : 1 + } + thread_id : 1 + } + # # Here's that matching output trace. + calculator_trace: { + node_id: 0 + input_timestamp: 100 + event_type : PROCESS + finish_time : 1500 + input_trace: { + packet_timestamp: 100 + stream_id : 1 + } + thread_id : 1 + } + # Third packet is processed all at the same time. + calculator_trace: { + node_id: 0 + input_timestamp: 102 + event_type : PROCESS + start_time : 3100 + finish_time : 3600 + input_trace: { + packet_timestamp: 102 + stream_id : 1 + } + thread_id : 1 + } + + # A second calculator will process an input in order to affect the + # time_percent. + + calculator_trace: { + node_id: 1 + input_timestamp: 102 + event_type : PROCESS + start_time : 3200 + finish_time : 3500 + input_trace: { + packet_timestamp: 102 + stream_id : 1 + } + thread_id : 1 + } +} +config: { + node: { + name: "ACalculator" + calculator: "ACalculator" + input_stream: "input1" + } + node: { + name: "BCalculator" + calculator: "BCalculator" + input_stream: "input1" + } +} diff --git a/mediapipe/framework/scheduler_queue.cc b/mediapipe/framework/scheduler_queue.cc index 06f8a50cd..1b491cc36 100644 --- a/mediapipe/framework/scheduler_queue.cc +++ b/mediapipe/framework/scheduler_queue.cc @@ -25,7 +25,11 @@ #include "mediapipe/framework/port/logging.h" #include "mediapipe/framework/port/status.h" +#ifdef __APPLE__ +#define AUTORELEASEPOOL @autoreleasepool +#else #define AUTORELEASEPOOL +#endif // __APPLE__ namespace mediapipe { namespace internal { diff --git a/mediapipe/framework/stream_handler/default_input_stream_handler.cc b/mediapipe/framework/stream_handler/default_input_stream_handler.cc index 4d825ce92..a5f6878b1 100644 --- a/mediapipe/framework/stream_handler/default_input_stream_handler.cc +++ b/mediapipe/framework/stream_handler/default_input_stream_handler.cc @@ -17,16 +17,28 @@ #include #include "absl/strings/substitute.h" +#include "mediapipe/framework/input_stream_handler.h" namespace mediapipe { REGISTER_INPUT_STREAM_HANDLER(DefaultInputStreamHandler); +// Returns all CollectionItemId's for a Collection TagMap. +std::vector GetIds( + const std::shared_ptr& tag_map) { + std::vector result; + for (auto id = tag_map->BeginId(); id < tag_map->EndId(); ++id) { + result.push_back(id); + } + return result; +} + DefaultInputStreamHandler::DefaultInputStreamHandler( std::shared_ptr tag_map, CalculatorContextManager* cc_manager, const MediaPipeOptions& options, bool calculator_run_in_parallel) : InputStreamHandler(std::move(tag_map), cc_manager, options, - calculator_run_in_parallel) { + calculator_run_in_parallel), + sync_set_(this, GetIds(input_stream_managers_.TagMap())) { if (options.HasExtension(DefaultInputStreamHandlerOptions::ext)) { SetBatchSize(options.GetExtension(DefaultInputStreamHandlerOptions::ext) .batch_size()); @@ -35,47 +47,12 @@ DefaultInputStreamHandler::DefaultInputStreamHandler( NodeReadiness DefaultInputStreamHandler::GetNodeReadiness( Timestamp* min_stream_timestamp) { - DCHECK(min_stream_timestamp); - *min_stream_timestamp = Timestamp::Done(); - Timestamp min_bound = Timestamp::Done(); - for (const auto& stream : input_stream_managers_) { - bool empty; - Timestamp stream_timestamp = stream->MinTimestampOrBound(&empty); - if (empty) { - min_bound = std::min(min_bound, stream_timestamp); - } - *min_stream_timestamp = std::min(*min_stream_timestamp, stream_timestamp); - } - - if (*min_stream_timestamp == Timestamp::Done()) { - return NodeReadiness::kReadyForClose; - } - - if (min_bound > *min_stream_timestamp) { - return NodeReadiness::kReadyForProcess; - } - - CHECK_EQ(min_bound, *min_stream_timestamp); - return NodeReadiness::kNotReady; + return sync_set_.GetReadiness(min_stream_timestamp); } void DefaultInputStreamHandler::FillInputSet(Timestamp input_timestamp, InputStreamShardSet* input_set) { - CHECK(input_timestamp.IsAllowedInStream()); - CHECK(input_set); - for (CollectionItemId id = input_stream_managers_.BeginId(); - id < input_stream_managers_.EndId(); ++id) { - auto& stream = input_stream_managers_.Get(id); - int num_packets_dropped = 0; - bool stream_is_done = false; - Packet current_packet = stream->PopPacketAtTimestamp( - input_timestamp, &num_packets_dropped, &stream_is_done); - CHECK_EQ(num_packets_dropped, 0) - << absl::Substitute("Dropped $0 packet(s) on input stream \"$1\".", - num_packets_dropped, stream->Name()); - AddPacketToShard(&input_set->Get(id), std::move(current_packet), - stream_is_done); - } + sync_set_.FillInputSet(input_timestamp, input_set); } } // namespace mediapipe diff --git a/mediapipe/framework/stream_handler/default_input_stream_handler.h b/mediapipe/framework/stream_handler/default_input_stream_handler.h index fc90b3b25..b83dc98b4 100644 --- a/mediapipe/framework/stream_handler/default_input_stream_handler.h +++ b/mediapipe/framework/stream_handler/default_input_stream_handler.h @@ -45,6 +45,9 @@ class DefaultInputStreamHandler : public InputStreamHandler { // Only invoked when associated GetNodeReadiness() returned kReadyForProcess. void FillInputSet(Timestamp input_timestamp, InputStreamShardSet* input_set) override; + + // The packet-set builder. + SyncSet sync_set_; }; } // namespace mediapipe diff --git a/mediapipe/framework/stream_handler/immediate_input_stream_handler.cc b/mediapipe/framework/stream_handler/immediate_input_stream_handler.cc index 97b0ad782..b34d08498 100644 --- a/mediapipe/framework/stream_handler/immediate_input_stream_handler.cc +++ b/mediapipe/framework/stream_handler/immediate_input_stream_handler.cc @@ -19,6 +19,8 @@ namespace mediapipe { +using SyncSet = InputStreamHandler::SyncSet; + // An input stream handler that delivers input packets to the Calculator // immediately, with no dependency between input streams. It also invokes // Calculator::Process when any input stream becomes done. @@ -47,8 +49,11 @@ class ImmediateInputStreamHandler : public InputStreamHandler { void FillInputSet(Timestamp input_timestamp, InputStreamShardSet* input_set) override; - // Record of the last reported timestamp bound for each input stream. - mediapipe::internal::Collection timestamp_bounds_; + absl::Mutex mutex_; + // The packet-set builder for each input stream. + std::vector sync_sets_ ABSL_GUARDED_BY(mutex_); + // The input timestamp for each kReadyForProcess input stream. + std::vector ready_timestamps_ ABSL_GUARDED_BY(mutex_); }; REGISTER_INPUT_STREAM_HANDLER(ImmediateInputStreamHandler); @@ -57,31 +62,47 @@ ImmediateInputStreamHandler::ImmediateInputStreamHandler( CalculatorContextManager* calculator_context_manager, const MediaPipeOptions& options, bool calculator_run_in_parallel) : InputStreamHandler(tag_map, calculator_context_manager, options, - calculator_run_in_parallel), - timestamp_bounds_(std::move(tag_map)) {} + calculator_run_in_parallel) { + for (auto id = tag_map->BeginId(); id < tag_map->EndId(); ++id) { + sync_sets_.emplace_back(this, std::vector{id}); + ready_timestamps_.push_back(Timestamp::Unset()); + } +} NodeReadiness ImmediateInputStreamHandler::GetNodeReadiness( Timestamp* min_stream_timestamp) { - *min_stream_timestamp = Timestamp::Done(); + absl::MutexLock lock(&mutex_); Timestamp input_timestamp = Timestamp::Done(); + Timestamp min_bound = Timestamp::Done(); bool stream_became_done = false; - - for (CollectionItemId i = input_stream_managers_.BeginId(); - i < input_stream_managers_.EndId(); ++i) { - const auto& stream = input_stream_managers_.Get(i); - bool empty; - Timestamp stream_timestamp = stream->MinTimestampOrBound(&empty); - if (!empty) { - input_timestamp = std::min(input_timestamp, stream_timestamp); + for (int i = 0; i < sync_sets_.size(); ++i) { + if (ready_timestamps_[i] > Timestamp::Unset()) { + min_bound = std::min(min_bound, ready_timestamps_[i]); + input_timestamp = std::min(input_timestamp, ready_timestamps_[i]); + continue; } - *min_stream_timestamp = std::min(*min_stream_timestamp, stream_timestamp); - if (stream_timestamp != timestamp_bounds_.Get(i)) { - if (stream_timestamp == Timestamp::Done()) { + Timestamp prev_ts = sync_sets_[i].LastProcessed(); + Timestamp stream_ts; + NodeReadiness readiness = sync_sets_[i].GetReadiness(&stream_ts); + min_bound = std::min(min_bound, stream_ts); + if (readiness == NodeReadiness::kReadyForProcess) { + ready_timestamps_[i] = stream_ts; + input_timestamp = std::min(input_timestamp, stream_ts); + } else if (readiness == NodeReadiness::kReadyForClose) { + CHECK_EQ(stream_ts, Timestamp::Done()); + if (ProcessTimestampBounds()) { + // With kReadyForClose, the timestamp-bound Done is returned. + // This bound is processed using the preceding input-timestamp. + // TODO: Make all InputStreamHandlers process Done() like this. + ready_timestamps_[i] = stream_ts.PreviousAllowedInStream(); + input_timestamp = std::min(input_timestamp, ready_timestamps_[i]); + } else if (prev_ts < Timestamp::Done()) { stream_became_done = true; + ready_timestamps_[i] = Timestamp::Done(); } - timestamp_bounds_.Get(i) = stream_timestamp; } } + *min_stream_timestamp = min_bound; if (*min_stream_timestamp == Timestamp::Done()) { return NodeReadiness::kReadyForClose; @@ -94,6 +115,8 @@ NodeReadiness ImmediateInputStreamHandler::GetNodeReadiness( } if (stream_became_done) { + // The stream_became_done logic is kept for backward compatibility. + // Note that the minimum bound is returned in min_stream_timestamp. return NodeReadiness::kReadyForProcess; } @@ -102,23 +125,13 @@ NodeReadiness ImmediateInputStreamHandler::GetNodeReadiness( void ImmediateInputStreamHandler::FillInputSet(Timestamp input_timestamp, InputStreamShardSet* input_set) { - CHECK(input_timestamp.IsAllowedInStream()); - CHECK(input_set); - for (CollectionItemId id = input_stream_managers_.BeginId(); - id < input_stream_managers_.EndId(); ++id) { - auto& stream = input_stream_managers_.Get(id); - if (stream->QueueHead().Timestamp() == input_timestamp) { - int num_packets_dropped = 0; - bool stream_is_done = false; - Packet current_packet = stream->PopPacketAtTimestamp( - input_timestamp, &num_packets_dropped, &stream_is_done); - AddPacketToShard(&input_set->Get(id), std::move(current_packet), - stream_is_done); + absl::MutexLock lock(&mutex_); + for (int i = 0; i < sync_sets_.size(); ++i) { + if (ready_timestamps_[i] == input_timestamp) { + sync_sets_[i].FillInputSet(input_timestamp, input_set); + ready_timestamps_[i] = Timestamp::Unset(); } else { - Timestamp bound = stream->MinTimestampOrBound(nullptr); - AddPacketToShard(&input_set->Get(id), - Packet().At(bound.PreviousAllowedInStream()), - bound == Timestamp::Done()); + sync_sets_[i].FillInputBounds(input_set); } } } diff --git a/mediapipe/framework/stream_handler/sync_set_input_stream_handler.cc b/mediapipe/framework/stream_handler/sync_set_input_stream_handler.cc index d30040bbc..5217366a4 100644 --- a/mediapipe/framework/stream_handler/sync_set_input_stream_handler.cc +++ b/mediapipe/framework/stream_handler/sync_set_input_stream_handler.cc @@ -17,6 +17,7 @@ // TODO: Move protos in another CL after the C++ code migration. #include "absl/strings/substitute.h" #include "absl/synchronization/mutex.h" +#include "mediapipe/framework/collection_item_id.h" #include "mediapipe/framework/input_stream_handler.h" #include "mediapipe/framework/mediapipe_options.pb.h" #include "mediapipe/framework/packet_set.h" @@ -69,7 +70,7 @@ class SyncSetInputStreamHandler : public InputStreamHandler { private: absl::Mutex mutex_; // The ids of each set of inputs. - std::vector> sync_sets_ ABSL_GUARDED_BY(mutex_); + std::vector sync_sets_ ABSL_GUARDED_BY(mutex_); // The index of the ready sync set. A value of -1 indicates that no // sync sets are ready. int ready_sync_set_index_ ABSL_GUARDED_BY(mutex_) = -1; @@ -98,7 +99,7 @@ void SyncSetInputStreamHandler::PrepareForRun( sync_sets_.clear(); std::set used_ids; for (const auto& sync_set : handler_options.sync_set()) { - sync_sets_.emplace_back(); + std::vector stream_ids; CHECK_LT(0, sync_set.tag_index_size()); for (const auto& tag_index : sync_set.tag_index()) { std::string tag; @@ -109,8 +110,9 @@ void SyncSetInputStreamHandler::PrepareForRun( CHECK(!::mediapipe::ContainsKey(used_ids, id)) << "stream \"" << tag_index << "\" is in more than one sync set."; used_ids.insert(id); - sync_sets_.back().push_back(id); + stream_ids.push_back(id); } + sync_sets_.emplace_back(this, std::move(stream_ids)); } std::vector remaining_ids; for (CollectionItemId id = input_stream_managers_.BeginId(); @@ -120,7 +122,7 @@ void SyncSetInputStreamHandler::PrepareForRun( } } if (!remaining_ids.empty()) { - sync_sets_.push_back(std::move(remaining_ids)); + sync_sets_.emplace_back(this, std::move(remaining_ids)); } ready_sync_set_index_ = -1; ready_timestamp_ = Timestamp::Done(); @@ -137,24 +139,14 @@ NodeReadiness SyncSetInputStreamHandler::GetNodeReadiness( absl::MutexLock lock(&mutex_); if (ready_sync_set_index_ >= 0) { *min_stream_timestamp = ready_timestamp_; + // TODO: Return kNotReady unless a new ready syncset is found. return NodeReadiness::kReadyForProcess; } for (int sync_set_index = 0; sync_set_index < sync_sets_.size(); ++sync_set_index) { - const std::vector& sync_set = sync_sets_[sync_set_index]; - *min_stream_timestamp = Timestamp::Done(); - Timestamp min_bound = Timestamp::Done(); - for (CollectionItemId id : sync_set) { - const auto& stream = input_stream_managers_.Get(id); - bool empty; - Timestamp stream_timestamp = stream->MinTimestampOrBound(&empty); - if (empty) { - min_bound = std::min(min_bound, stream_timestamp); - } - *min_stream_timestamp = std::min(*min_stream_timestamp, stream_timestamp); - } - - if (*min_stream_timestamp == Timestamp::Done()) { + NodeReadiness readiness = + sync_sets_[sync_set_index].GetReadiness(min_stream_timestamp); + if (readiness == NodeReadiness::kReadyForClose) { // This sync set is done, remove it. Note that this invalidates // sync set indexes higher than sync_set_index. However, we are // guaranteed that we were not ready before entering the outer @@ -165,15 +157,14 @@ NodeReadiness SyncSetInputStreamHandler::GetNodeReadiness( continue; } - if (min_bound > *min_stream_timestamp) { + if (readiness == NodeReadiness::kReadyForProcess) { + // TODO: Prioritize sync-sets to avoid starvation. if (*min_stream_timestamp < ready_timestamp_) { // Store the timestamp and corresponding sync set index for the // sync set with the earliest arrival timestamp. ready_timestamp_ = *min_stream_timestamp; ready_sync_set_index_ = sync_set_index; } - } else { - CHECK_EQ(min_bound, *min_stream_timestamp); } } if (ready_sync_set_index_ >= 0) { @@ -188,44 +179,17 @@ NodeReadiness SyncSetInputStreamHandler::GetNodeReadiness( return NodeReadiness::kNotReady; } -void SyncSetInputStreamHandler::FillInputBounds( - Timestamp input_timestamp, InputStreamShardSet* input_set) { - for (int i = 0; i < sync_sets_.size(); ++i) { - if (i != ready_sync_set_index_) { - // Set the input streams for the not-ready sync sets. - for (CollectionItemId id : sync_sets_[i]) { - const auto stream = input_stream_managers_.Get(id); - Timestamp bound = stream->MinTimestampOrBound(nullptr); - AddPacketToShard(&input_set->Get(id), - Packet().At(bound.PreviousAllowedInStream()), - bound == Timestamp::Done()); - } - } - } -} - void SyncSetInputStreamHandler::FillInputSet(Timestamp input_timestamp, InputStreamShardSet* input_set) { // Assume that all current packets are already cleared. - CHECK(input_timestamp.IsAllowedInStream()); - CHECK(input_set); absl::MutexLock lock(&mutex_); CHECK_LE(0, ready_sync_set_index_); - CHECK_EQ(input_timestamp, ready_timestamp_); - // Set the input streams for the ready sync set. - for (CollectionItemId id : sync_sets_[ready_sync_set_index_]) { - const auto& stream = input_stream_managers_.Get(id); - int num_packets_dropped = 0; - bool stream_is_done = false; - Packet current_packet = stream->PopPacketAtTimestamp( - input_timestamp, &num_packets_dropped, &stream_is_done); - CHECK_EQ(num_packets_dropped, 0) - << absl::Substitute("Dropped $0 packet(s) on input stream \"$1\".", - num_packets_dropped, stream->Name()); - AddPacketToShard(&input_set->Get(id), std::move(current_packet), - stream_is_done); + sync_sets_[ready_sync_set_index_].FillInputSet(input_timestamp, input_set); + for (int i = 0; i < sync_sets_.size(); ++i) { + if (i != ready_sync_set_index_) { + sync_sets_[i].FillInputBounds(input_set); + } } - FillInputBounds(input_timestamp, input_set); ready_sync_set_index_ = -1; ready_timestamp_ = Timestamp::Done(); } diff --git a/mediapipe/framework/timestamp.cc b/mediapipe/framework/timestamp.cc index 99d48b14b..05b69747f 100644 --- a/mediapipe/framework/timestamp.cc +++ b/mediapipe/framework/timestamp.cc @@ -122,7 +122,6 @@ std::string TimestampDiff::DebugString() const { } Timestamp Timestamp::NextAllowedInStream() const { - CHECK(IsAllowedInStream()) << "Timestamp is: " << DebugString(); if (*this >= Max() || *this == PreStream()) { // Indicates that no further timestamps may occur. return OneOverPostStream(); diff --git a/mediapipe/framework/timestamp.h b/mediapipe/framework/timestamp.h index dc574cbdc..179388942 100644 --- a/mediapipe/framework/timestamp.h +++ b/mediapipe/framework/timestamp.h @@ -247,6 +247,12 @@ class TimestampDiff { TimestampDiff operator-(const TimestampDiff other) const; Timestamp operator+(const Timestamp other) const; + // Special values. + + static TimestampDiff Unset() { + return TimestampDiff(Timestamp::Unset().Value()); + } + private: TimestampBaseType timestamp_; }; diff --git a/mediapipe/framework/validated_graph_config.cc b/mediapipe/framework/validated_graph_config.cc index 31ade5845..13d236560 100644 --- a/mediapipe/framework/validated_graph_config.cc +++ b/mediapipe/framework/validated_graph_config.cc @@ -815,16 +815,25 @@ NodeTypeInfo::NodeRef ValidatedGraphConfig::NodeForSorterIndex( sorted_nodes_.push_back(&tmp_calculators.back()); } } + if (cyclic) { + // This reads from partilly altered config_ (by node Swap()) but we assume + // the nodes in the cycle are not altered, as TopologicalSorter reports + // cyclicity before processing any node in cycle. + auto node_name_formatter = [this](std::string* out, int i) { + const auto& n = NodeForSorterIndex(i); + absl::StrAppend(out, n.type == NodeTypeInfo::NodeType::CALCULATOR + ? tool::CanonicalNodeName(Config(), n.index) + : DebugName(Config(), n.type, n.index)); + }; + return ::mediapipe::UnknownErrorBuilder(MEDIAPIPE_LOC) + << "Generator side packet cycle or calculator stream cycle detected " + "in graph: [" + << absl::StrJoin(cycle_indexes, ", ", node_name_formatter) << "]"; + } generator_configs.Swap(config_.mutable_packet_generator()); tmp_generators.swap(generators_); node_configs.Swap(config_.mutable_node()); tmp_calculators.swap(calculators_); - if (cyclic) { - return ::mediapipe::UnknownErrorBuilder(MEDIAPIPE_LOC) - << "Generator side packet cycle or calculator stream cycle detected " - "in graph. Cycle indexes: " - << absl::StrJoin(cycle_indexes, ", "); - } #if !(defined(MEDIAPIPE_LITE) || defined(MEDIAPIPE_MOBILE)) VLOG(2) << "AFTER TOPOLOGICAL SORT:\n" << config_.DebugString(); #endif // !(MEDIAPIPE_LITE || MEDIAPIPE_MOBILE) diff --git a/mediapipe/gpu/gl_base.h b/mediapipe/gpu/gl_base.h index 3fd823388..9aa8b02dc 100644 --- a/mediapipe/gpu/gl_base.h +++ b/mediapipe/gpu/gl_base.h @@ -57,15 +57,14 @@ #include #include #include - -#ifdef __ANDROID__ +#if defined(__ANDROID__) // Weak-link all GL APIs included from this point on. // TODO: Annotate these with availability attributes for the // appropriate versions of Android, by including gl{3,31,31}.h and resetting // GL_APICALL for each. #undef GL_APICALL #define GL_APICALL __attribute__((weak_import)) KHRONOS_APICALL -#endif // __ANDROID__ +#endif // defined(__ANDROID__) #include diff --git a/mediapipe/gpu/gl_calculator_helper_impl.h b/mediapipe/gpu/gl_calculator_helper_impl.h index 3d92ca671..8f90eb5fd 100644 --- a/mediapipe/gpu/gl_calculator_helper_impl.h +++ b/mediapipe/gpu/gl_calculator_helper_impl.h @@ -83,6 +83,10 @@ class GlCalculatorHelperImpl { GLuint framebuffer_ = 0; GpuResources& gpu_resources_; + + // Necessary to compute for a given GlContext in order to properly enforce the + // SetStandardTextureParams. + bool can_linear_filter_float_textures_; }; } // namespace mediapipe diff --git a/mediapipe/gpu/gl_calculator_helper_impl_common.cc b/mediapipe/gpu/gl_calculator_helper_impl_common.cc index cf2dcf582..d26888ace 100644 --- a/mediapipe/gpu/gl_calculator_helper_impl_common.cc +++ b/mediapipe/gpu/gl_calculator_helper_impl_common.cc @@ -22,6 +22,17 @@ GlCalculatorHelperImpl::GlCalculatorHelperImpl(CalculatorContext* cc, GpuResources* gpu_resources) : gpu_resources_(*gpu_resources) { gl_context_ = gpu_resources_.gl_context(cc); +// GL_ES_VERSION_2_0 and up (at least through ES 3.2) may contain the extension. +// Checking against one also checks against higher ES versions. So this checks +// against GLES >= 2.0. +#if GL_ES_VERSION_2_0 + // No linear float filtering by default, check extensions. + can_linear_filter_float_textures_ = + gl_context_->HasGlExtension("OES_texture_float_linear"); +#else + // Any float32 texture we create should automatically have linear filtering. + can_linear_filter_float_textures_ = true; +#endif // GL_ES_VERSION_2_0 } GlCalculatorHelperImpl::~GlCalculatorHelperImpl() { @@ -89,13 +100,15 @@ void GlCalculatorHelperImpl::BindFramebuffer(const GlTexture& dst) { void GlCalculatorHelperImpl::SetStandardTextureParams(GLenum target, GLint internal_format) { + // Default to using linear filter everywhere. For float32 textures, fall back + // to GL_NEAREST if linear filtering unsupported. GLint filter; switch (internal_format) { case GL_R32F: case GL_RGBA32F: - // 32F (unlike 16f) textures do not support texture filtering + // 32F (unlike 16f) textures do not always support texture filtering // (According to OpenGL ES specification [TEXTURE IMAGE SPECIFICATION]) - filter = GL_NEAREST; + filter = can_linear_filter_float_textures_ ? GL_LINEAR : GL_NEAREST; break; default: filter = GL_LINEAR; diff --git a/mediapipe/gpu/gl_context.cc b/mediapipe/gpu/gl_context.cc index 497a28e1f..dd1b6fa21 100644 --- a/mediapipe/gpu/gl_context.cc +++ b/mediapipe/gpu/gl_context.cc @@ -203,6 +203,69 @@ bool GlContext::ParseGlVersion(absl::string_view version_string, GLint* major, return true; } +bool GlContext::HasGlExtension(absl::string_view extension) const { + return gl_extensions_.find(extension) != gl_extensions_.end(); +} + +// Function for GL3.0+ to query for and store all of our available GL extensions +// in an easily-accessible set. The glGetString call is actually *not* required +// to work with GL_EXTENSIONS for newer GL versions, so we must maintain both +// variations of this function. +::mediapipe::Status GlContext::GetGlExtensions() { + gl_extensions_.clear(); + // glGetStringi only introduced in GL 3.0+; so we exit out this function if + // we don't have that function defined, regardless of version number reported. + // The function itself is also fully stubbed out if we're linking against an + // API version without a glGetStringi declaration. Although Emscripten + // sometimes provides this function, its default library implementation + // appears to only provide glGetString, so we skip this for Emscripten + // platforms to avoid possible undefined symbol or runtime errors. +#if (GL_VERSION_3_0 || GL_ES_VERSION_3_0) && !defined(__EMSCRIPTEN__) + if (!SymbolAvailable(&glGetStringi)) { + LOG(ERROR) << "GL major version > 3.0 indicated, but glGetStringi not " + << "defined. Falling back to deprecated GL extensions querying " + << "method."; + return ::mediapipe::InternalError("glGetStringi not defined, but queried"); + } + int num_extensions = 0; + glGetIntegerv(GL_NUM_EXTENSIONS, &num_extensions); + if (glGetError() != 0) { + return ::mediapipe::InternalError( + "Error querying for number of extensions"); + } + + for (int i = 0; i < num_extensions; ++i) { + const GLubyte* res = glGetStringi(GL_EXTENSIONS, i); + if (glGetError() != 0 || res == nullptr) { + return ::mediapipe::InternalError( + "Error querying for an extension by index"); + } + const char* signed_res = reinterpret_cast(res); + gl_extensions_.insert(signed_res); + } + + return ::mediapipe::OkStatus(); +#else + return ::mediapipe::InternalError("GL version mismatch in GlGetExtensions"); +#endif // (GL_VERSION_3_0 || GL_ES_VERSION_3_0) && !defined(__EMSCRIPTEN__) +} + +// Same as GetGlExtensions() above, but for pre-GL3.0, where glGetStringi did +// not exist. +::mediapipe::Status GlContext::GetGlExtensionsCompat() { + gl_extensions_.clear(); + + const GLubyte* res = glGetString(GL_EXTENSIONS); + if (glGetError() != 0 || res == nullptr) { + LOG(ERROR) << "Error querying for GL extensions"; + return ::mediapipe::InternalError("Error querying for GL extensions"); + } + const char* signed_res = reinterpret_cast(res); + gl_extensions_ = absl::StrSplit(signed_res, ' '); + + return ::mediapipe::OkStatus(); +} + ::mediapipe::Status GlContext::FinishInitialization(bool create_thread) { if (create_thread) { thread_ = absl::make_unique(); @@ -232,8 +295,13 @@ bool GlContext::ParseGlVersion(absl::string_view version_string, GLint* major, LOG(INFO) << "GL version: " << gl_major_version_ << "." << gl_minor_version_ << " (" << glGetString(GL_VERSION) << ")"; - - return ::mediapipe::OkStatus(); + if (gl_major_version_ >= 3) { + auto status = GetGlExtensions(); + if (status.ok()) { + return ::mediapipe::OkStatus(); + } + } + return GetGlExtensionsCompat(); }); } diff --git a/mediapipe/gpu/gl_context.h b/mediapipe/gpu/gl_context.h index c28e310b6..455b232d7 100644 --- a/mediapipe/gpu/gl_context.h +++ b/mediapipe/gpu/gl_context.h @@ -237,6 +237,10 @@ class GlContext : public std::enable_shared_from_this { static bool ParseGlVersion(absl::string_view version_string, GLint* major, GLint* minor); + // Simple query for GL extension support; only valid after GlContext has + // finished its initialization successfully. + bool HasGlExtension(absl::string_view extension) const; + int64_t gl_finish_count() { return gl_finish_count_; } // Used by GlFinishSyncPoint. The count_to_pass cannot exceed the current @@ -346,6 +350,8 @@ class GlContext : public std::enable_shared_from_this { bool HasContext() const; bool CheckForGlErrors(); void LogUncheckedGlErrors(bool had_gl_errors); + ::mediapipe::Status GetGlExtensions(); + ::mediapipe::Status GetGlExtensionsCompat(); // The following ContextBinding functions have platform-specific // implementations. @@ -366,6 +372,10 @@ class GlContext : public std::enable_shared_from_this { GLint gl_major_version_ = 0; GLint gl_minor_version_ = 0; + // glGetString and glGetStringi both return pointers to static strings, + // so we should be fine storing the extension pieces as string_view's. + std::set gl_extensions_; + // Number of glFinish calls completed on the GL thread. // Changes should be guarded by mutex_. However, we use simple atomic // loads for efficiency on the fast path. diff --git a/mediapipe/gpu/gl_simple_shaders.cc b/mediapipe/gpu/gl_simple_shaders.cc index 6b56288a2..b038c88aa 100644 --- a/mediapipe/gpu/gl_simple_shaders.cc +++ b/mediapipe/gpu/gl_simple_shaders.cc @@ -24,6 +24,19 @@ namespace mediapipe { #define _STRINGIFY(_x) __STRINGIFY(_x) #endif +// Our fragment shaders use DEFAULT_PRECISION to define the default precision +// for a type. The macro strips out the precision declaration on desktop GL, +// where it's not supported. +// +// Note: this does not use a raw std::string because some compilers don't handle +// raw strings inside macros correctly. It uses a macro because we want to be +// able to concatenate strings by juxtaposition. We want to concatenate strings +// by juxtaposition so we can export const char* static data containing the +// pre-expanded strings. +// +// TODO: this was written before we could rely on C++11 support. +// Consider replacing it with constexpr std::string concatenation, or replacing +// the static variables with functions. #define PRECISION_COMPAT \ GLES_VERSION_COMPAT \ "#ifdef GL_ES \n" \ @@ -42,10 +55,15 @@ namespace mediapipe { "#define out varying\n" \ "#endif // __VERSION__ < 130\n" -#define FRAGMENT_PREAMBLE \ - PRECISION_COMPAT \ - "#if __VERSION__ < 130\n" \ - "#define in varying\n" \ +// Note: on systems where highp precision for floats is not supported (look up +// GL_FRAGMENT_PRECISION_HIGH), we replace it with mediump. +#define FRAGMENT_PREAMBLE \ + PRECISION_COMPAT \ + "#if __VERSION__ < 130\n" \ + "#define in varying\n" \ + "#if GL_ES && !GL_FRAGMENT_PRECISION_HIGH\n" \ + "#define highp mediump\n" \ + "#endif // GL_ES && !GL_FRAGMENT_PRECISION_HIGH\n" \ "#endif // __VERSION__ < 130\n" const GLchar* const kMediaPipeVertexShaderPreamble = VERTEX_PREAMBLE; diff --git a/mediapipe/graphs/hair_segmentation/BUILD b/mediapipe/graphs/hair_segmentation/BUILD index eec0732e3..52598175f 100644 --- a/mediapipe/graphs/hair_segmentation/BUILD +++ b/mediapipe/graphs/hair_segmentation/BUILD @@ -12,6 +12,11 @@ # See the License for the specific language governing permissions and # limitations under the License. +load( + "//mediapipe/framework/tool:mediapipe_graph.bzl", + "mediapipe_binary_graph", +) + licenses(["notice"]) # Apache 2.0 package(default_visibility = ["//visibility:public"]) @@ -33,9 +38,19 @@ cc_library( ], ) -load( - "//mediapipe/framework/tool:mediapipe_graph.bzl", - "mediapipe_binary_graph", +cc_library( + name = "desktop_calculators", + deps = [ + "//mediapipe/calculators/core:flow_limiter_calculator", + "//mediapipe/calculators/core:previous_loopback_calculator", + "//mediapipe/calculators/image:image_transformation_calculator", + "//mediapipe/calculators/image:recolor_calculator", + "//mediapipe/calculators/image:set_alpha_calculator", + "//mediapipe/calculators/tflite:tflite_converter_calculator", + "//mediapipe/calculators/tflite:tflite_custom_op_resolver_calculator", + "//mediapipe/calculators/tflite:tflite_inference_calculator", + "//mediapipe/calculators/tflite:tflite_tensors_to_segmentation_calculator", + ], ) mediapipe_binary_graph( diff --git a/mediapipe/graphs/hair_segmentation/hair_segmentation_desktop_live.pbtxt b/mediapipe/graphs/hair_segmentation/hair_segmentation_desktop_live.pbtxt new file mode 100644 index 000000000..36c6970e1 --- /dev/null +++ b/mediapipe/graphs/hair_segmentation/hair_segmentation_desktop_live.pbtxt @@ -0,0 +1,152 @@ +# MediaPipe graph that performs hair segmentation with TensorFlow Lite on CPU. +# Used in the example in +# mediapipie/examples/desktop/hair_segmentation:hair_segmentation_cpu + +# Images on CPU coming into and out of the graph. +input_stream: "input_video" +output_stream: "output_video" + +# Throttles the images flowing downstream for flow control. It passes through +# the very first incoming image unaltered, and waits for +# TfLiteTensorsToSegmentationCalculator downstream in the graph to finish +# generating the corresponding hair mask before it passes through another +# image. All images that come in while waiting are dropped, limiting the number +# of in-flight images between this calculator and +# TfLiteTensorsToSegmentationCalculator to 1. This prevents the nodes in between +# from queuing up incoming images and data excessively, which leads to increased +# latency and memory usage, unwanted in real-time mobile applications. It also +# eliminates unnecessarily computation, e.g., a transformed image produced by +# ImageTransformationCalculator may get dropped downstream if the subsequent +# TfLiteConverterCalculator or TfLiteInferenceCalculator is still busy +# processing previous inputs. +node { + calculator: "FlowLimiterCalculator" + input_stream: "input_video" + input_stream: "FINISHED:hair_mask" + input_stream_info: { + tag_index: "FINISHED" + back_edge: true + } + output_stream: "throttled_input_video" +} + +# Transforms the input image on CPU to a 512x512 image. To scale the image, by +# default it uses the STRETCH scale mode that maps the entire input image to the +# entire transformed image. As a result, image aspect ratio may be changed and +# objects in the image may be deformed (stretched or squeezed), but the hair +# segmentation model used in this graph is agnostic to that deformation. +node: { + calculator: "ImageTransformationCalculator" + input_stream: "IMAGE:throttled_input_video" + output_stream: "IMAGE:transformed_input_video" + node_options: { + [type.googleapis.com/mediapipe.ImageTransformationCalculatorOptions] { + output_width: 512 + output_height: 512 + } + } +} + +# Caches a mask fed back from the previous round of hair segmentation, and upon +# the arrival of the next input image sends out the cached mask with the +# timestamp replaced by that of the input image, essentially generating a packet +# that carries the previous mask. Note that upon the arrival of the very first +# input image, an empty packet is sent out to jump start the feedback loop. +node { + calculator: "PreviousLoopbackCalculator" + input_stream: "MAIN:throttled_input_video" + input_stream: "LOOP:hair_mask" + input_stream_info: { + tag_index: "LOOP" + back_edge: true + } + output_stream: "PREV_LOOP:previous_hair_mask" +} + +# Embeds the hair mask generated from the previous round of hair segmentation +# as the alpha channel of the current input image. +node { + calculator: "SetAlphaCalculator" + input_stream: "IMAGE:transformed_input_video" + input_stream: "ALPHA:previous_hair_mask" + output_stream: "IMAGE:mask_embedded_input_video" +} + +# Converts the transformed input image on CPU into an image tensor stored in +# TfLiteTensor. The zero_center option is set to false to normalize the +# pixel values to [0.f, 1.f] as opposed to [-1.f, 1.f]. With the +# max_num_channels option set to 4, all 4 RGBA channels are contained in the +# image tensor. +node { + calculator: "TfLiteConverterCalculator" + input_stream: "IMAGE:mask_embedded_input_video" + output_stream: "TENSORS:image_tensor" + node_options: { + [type.googleapis.com/mediapipe.TfLiteConverterCalculatorOptions] { + zero_center: false + max_num_channels: 4 + } + } +} + +# Generates a single side packet containing a TensorFlow Lite op resolver that +# supports custom ops needed by the model used in this graph. +node { + calculator: "TfLiteCustomOpResolverCalculator" + output_side_packet: "op_resolver" + node_options: { + [type.googleapis.com/mediapipe.TfLiteCustomOpResolverCalculatorOptions] { + use_gpu: false + } + } +} + +# Runs a TensorFlow Lite model on CPU that takes an image tensor and outputs a +# tensor representing the hair segmentation, which has the same width and height +# as the input image tensor. +node { + calculator: "TfLiteInferenceCalculator" + input_stream: "TENSORS:image_tensor" + output_stream: "TENSORS:segmentation_tensor" + input_side_packet: "CUSTOM_OP_RESOLVER:op_resolver" + node_options: { + [type.googleapis.com/mediapipe.TfLiteInferenceCalculatorOptions] { + model_path: "mediapipe/models/hair_segmentation.tflite" + use_gpu: false + } + } +} + +# Decodes the segmentation tensor generated by the TensorFlow Lite model into a +# mask of values in [0, 255], stored in a CPU buffer. It also +# takes the mask generated previously as another input to improve the temporal +# consistency. +node { + calculator: "TfLiteTensorsToSegmentationCalculator" + input_stream: "TENSORS:segmentation_tensor" + input_stream: "PREV_MASK:previous_hair_mask" + output_stream: "MASK:hair_mask" + node_options: { + [type.googleapis.com/mediapipe.TfLiteTensorsToSegmentationCalculatorOptions] { + tensor_width: 512 + tensor_height: 512 + tensor_channels: 2 + combine_with_previous_ratio: 0.9 + output_layer_index: 1 + } + } +} + +# Colors the hair segmentation with the color specified in the option. +node { + calculator: "RecolorCalculator" + input_stream: "IMAGE:throttled_input_video" + input_stream: "MASK:hair_mask" + output_stream: "IMAGE:output_video" + node_options: { + [type.googleapis.com/mediapipe.RecolorCalculatorOptions] { + color { r: 0 g: 0 b: 255 } + mask_channel: RED + } + } +} diff --git a/mediapipe/java/com/google/mediapipe/mediapipe_aar.bzl b/mediapipe/java/com/google/mediapipe/mediapipe_aar.bzl index 0c0d2aac4..6078318ac 100644 --- a/mediapipe/java/com/google/mediapipe/mediapipe_aar.bzl +++ b/mediapipe/java/com/google/mediapipe/mediapipe_aar.bzl @@ -78,6 +78,33 @@ cat > $(OUTS) < $(OUTS) <:d>") ++- ELSEIF(CPUINFO_RUNTIME_TYPE STREQUAL "static") ++- TARGET_COMPILE_OPTIONS(${target} PRIVATE ++- "/MT$<$:d>") ++- ENDIF() ++- ENDIF() ++-ENDMACRO() ++- ++-# ---[ Build flags ++-SET(CPUINFO_SUPPORTED_PLATFORM TRUE) ++-IF(NOT CMAKE_SYSTEM_PROCESSOR) ++- IF(NOT IOS) ++- MESSAGE(WARNING ++- "Target processor architecture is not specified. " ++- "cpuinfo will compile, but cpuinfo_initialize() will always fail.") ++- SET(CPUINFO_SUPPORTED_PLATFORM FALSE) ++- ENDIF() ++-ELSEIF(NOT CMAKE_SYSTEM_PROCESSOR MATCHES "^(i[3-6]86|AMD64|x86(_64)?|armv[5-8].*|aarch64)$") ++- MESSAGE(WARNING ++- "Target processor architecture \"${CMAKE_SYSTEM_PROCESSOR}\" is not supported in cpuinfo. " ++- "cpuinfo will compile, but cpuinfo_initialize() will always fail.") ++- SET(CPUINFO_SUPPORTED_PLATFORM FALSE) ++-ENDIF() ++- ++-IF(NOT CMAKE_SYSTEM_NAME) ++- MESSAGE(WARNING ++- "Target operating system is not specified. " ++- "cpuinfo will compile, but cpuinfo_initialize() will always fail.") ++- SET(CPUINFO_SUPPORTED_PLATFORM FALSE) ++-ELSEIF(NOT CMAKE_SYSTEM_NAME MATCHES "^(Windows|Darwin|Linux|Android)$") ++- IF(${CMAKE_VERSION} VERSION_GREATER_EQUAL "3.14" AND NOT CMAKE_SYSTEM_NAME STREQUAL "iOS") ++- MESSAGE(WARNING ++- "Target operating system \"${CMAKE_SYSTEM_NAME}\" is not supported in cpuinfo. " ++- "cpuinfo will compile, but cpuinfo_initialize() will always fail.") ++- SET(CPUINFO_SUPPORTED_PLATFORM FALSE) ++- ENDIF() ++-ENDIF() ++- ++-# ---[ Download deps ++-SET(CONFU_DEPENDENCIES_SOURCE_DIR ${CMAKE_SOURCE_DIR}/deps ++- CACHE PATH "Confu-style dependencies source directory") ++-SET(CONFU_DEPENDENCIES_BINARY_DIR ${CMAKE_BINARY_DIR}/deps ++- CACHE PATH "Confu-style dependencies binary directory") ++- ++-IF(CPUINFO_BUILD_MOCK_TESTS OR CPUINFO_BUILD_UNIT_TESTS) ++- IF(CPUINFO_SUPPORTED_PLATFORM AND NOT DEFINED GOOGLETEST_SOURCE_DIR) ++- MESSAGE(STATUS "Downloading Google Test to ${CONFU_DEPENDENCIES_SOURCE_DIR}/googletest (define GOOGLETEST_SOURCE_DIR to avoid it)") ++- CONFIGURE_FILE(cmake/DownloadGoogleTest.cmake "${CONFU_DEPENDENCIES_BINARY_DIR}/googletest-download/CMakeLists.txt") ++- EXECUTE_PROCESS(COMMAND "${CMAKE_COMMAND}" -G "${CMAKE_GENERATOR}" . ++- WORKING_DIRECTORY "${CONFU_DEPENDENCIES_BINARY_DIR}/googletest-download") ++- EXECUTE_PROCESS(COMMAND "${CMAKE_COMMAND}" --build . ++- WORKING_DIRECTORY "${CONFU_DEPENDENCIES_BINARY_DIR}/googletest-download") ++- SET(GOOGLETEST_SOURCE_DIR "${CONFU_DEPENDENCIES_SOURCE_DIR}/googletest" CACHE STRING "Google Test source directory") ++- ENDIF() ++-ENDIF() ++- ++-IF(CPUINFO_BUILD_BENCHMARKS) ++- IF(CPUINFO_SUPPORTED_PLATFORM AND NOT DEFINED GOOGLEBENCHMARK_SOURCE_DIR) ++- MESSAGE(STATUS "Downloading Google Benchmark to ${CONFU_DEPENDENCIES_SOURCE_DIR}/googlebenchmark (define GOOGLEBENCHMARK_SOURCE_DIR to avoid it)") ++- CONFIGURE_FILE(cmake/DownloadGoogleBenchmark.cmake "${CONFU_DEPENDENCIES_BINARY_DIR}/googlebenchmark-download/CMakeLists.txt") ++- EXECUTE_PROCESS(COMMAND "${CMAKE_COMMAND}" -G "${CMAKE_GENERATOR}" . ++- WORKING_DIRECTORY "${CONFU_DEPENDENCIES_BINARY_DIR}/googlebenchmark-download") ++- EXECUTE_PROCESS(COMMAND "${CMAKE_COMMAND}" --build . ++- WORKING_DIRECTORY "${CONFU_DEPENDENCIES_BINARY_DIR}/googlebenchmark-download") ++- SET(GOOGLEBENCHMARK_SOURCE_DIR "${CONFU_DEPENDENCIES_SOURCE_DIR}/googlebenchmark" CACHE STRING "Google Benchmark source directory") ++- ENDIF() ++-ENDIF() ++- ++-# ---[ cpuinfo library ++-SET(CPUINFO_SRCS ++- src/init.c ++- src/api.c) ++- ++-IF(CPUINFO_SUPPORTED_PLATFORM) ++- IF(CMAKE_SYSTEM_PROCESSOR MATCHES "^(i[3-6]86|AMD64|x86(_64)?)$" OR IOS_ARCH MATCHES "^(i386|x86_64)$") ++- LIST(APPEND CPUINFO_SRCS ++- src/x86/init.c ++- src/x86/info.c ++- src/x86/vendor.c ++- src/x86/uarch.c ++- src/x86/name.c ++- src/x86/topology.c ++- src/x86/isa.c ++- src/x86/cache/init.c ++- src/x86/cache/descriptor.c ++- src/x86/cache/deterministic.c) ++- IF(CMAKE_SYSTEM_NAME STREQUAL "Linux" OR CMAKE_SYSTEM_NAME STREQUAL "Android") ++- LIST(APPEND CPUINFO_SRCS ++- src/x86/linux/init.c ++- src/x86/linux/cpuinfo.c) ++- ELSEIF(CMAKE_SYSTEM_NAME STREQUAL "Darwin" OR CMAKE_SYSTEM_NAME STREQUAL "iOS") ++- LIST(APPEND CPUINFO_SRCS src/x86/mach/init.c) ++- ELSEIF(CMAKE_SYSTEM_NAME STREQUAL "Windows") ++- LIST(APPEND CPUINFO_SRCS src/x86/windows/init.c) ++- ENDIF() ++- ELSEIF(CMAKE_SYSTEM_PROCESSOR MATCHES "^(armv[5-8].*|aarch64)$" OR IOS_ARCH MATCHES "^(armv7.*|arm64.*)$") ++- LIST(APPEND CPUINFO_SRCS ++- src/arm/uarch.c ++- src/arm/cache.c) ++- IF(CMAKE_SYSTEM_NAME STREQUAL "Linux" OR CMAKE_SYSTEM_NAME STREQUAL "Android") ++- LIST(APPEND CPUINFO_SRCS ++- src/arm/linux/init.c ++- src/arm/linux/cpuinfo.c ++- src/arm/linux/clusters.c ++- src/arm/linux/chipset.c ++- src/arm/linux/midr.c ++- src/arm/linux/hwcap.c) ++- IF(CMAKE_SYSTEM_PROCESSOR MATCHES "^armv[5-8]") ++- LIST(APPEND CPUINFO_SRCS src/arm/linux/aarch32-isa.c) ++- IF(CMAKE_SYSTEM_NAME STREQUAL "Android" AND ANDROID_ABI STREQUAL "armeabi") ++- SET_SOURCE_FILES_PROPERTIES(src/arm/linux/aarch32-isa.c PROPERTIES COMPILE_FLAGS -marm) ++- ENDIF() ++- ELSEIF(CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64") ++- LIST(APPEND CPUINFO_SRCS src/arm/linux/aarch64-isa.c) ++- ENDIF() ++- ELSEIF(IOS) ++- LIST(APPEND CPUINFO_SRCS src/arm/mach/init.c) ++- ENDIF() ++- IF(CMAKE_SYSTEM_NAME STREQUAL "Android") ++- LIST(APPEND CPUINFO_SRCS ++- src/arm/android/properties.c) ++- ENDIF() ++- ENDIF() ++- ++- IF(CMAKE_SYSTEM_NAME STREQUAL "Linux" OR CMAKE_SYSTEM_NAME STREQUAL "Android") ++- LIST(APPEND CPUINFO_SRCS ++- src/linux/smallfile.c ++- src/linux/multiline.c ++- src/linux/current.c ++- src/linux/cpulist.c ++- src/linux/processors.c) ++- ELSEIF(CMAKE_SYSTEM_NAME STREQUAL "Darwin" OR CMAKE_SYSTEM_NAME STREQUAL "iOS") ++- LIST(APPEND CPUINFO_SRCS src/mach/topology.c) ++- ENDIF() ++- ++- IF(CMAKE_SYSTEM_NAME STREQUAL "Linux" OR CMAKE_SYSTEM_NAME STREQUAL "Android") ++- SET(CMAKE_THREAD_PREFER_PTHREAD TRUE) ++- SET(THREADS_PREFER_PTHREAD_FLAG TRUE) ++- FIND_PACKAGE(Threads REQUIRED) ++- ENDIF() ++-ENDIF() ++- ++-IF(CPUINFO_LIBRARY_TYPE STREQUAL "default") ++- ADD_LIBRARY(cpuinfo ${CPUINFO_SRCS}) ++-ELSEIF(CPUINFO_LIBRARY_TYPE STREQUAL "shared") ++- ADD_LIBRARY(cpuinfo SHARED ${CPUINFO_SRCS}) ++-ELSEIF(CPUINFO_LIBRARY_TYPE STREQUAL "static") ++- ADD_LIBRARY(cpuinfo STATIC ${CPUINFO_SRCS}) ++-ELSE() ++- MESSAGE(FATAL_ERROR "Unsupported library type ${CPUINFO_LIBRARY_TYPE}") ++-ENDIF() ++-ADD_LIBRARY(cpuinfo_internals STATIC ${CPUINFO_SRCS}) ++-CPUINFO_TARGET_ENABLE_C99(cpuinfo) ++-CPUINFO_TARGET_ENABLE_C99(cpuinfo_internals) ++-CPUINFO_TARGET_RUNTIME_LIBRARY(cpuinfo) ++-SET_TARGET_PROPERTIES(cpuinfo PROPERTIES PUBLIC_HEADER include/cpuinfo.h) ++-TARGET_INCLUDE_DIRECTORIES(cpuinfo BEFORE PUBLIC include) ++-TARGET_INCLUDE_DIRECTORIES(cpuinfo BEFORE PRIVATE src) ++-TARGET_INCLUDE_DIRECTORIES(cpuinfo_internals BEFORE PUBLIC include src) ++-IF(CPUINFO_LOG_LEVEL STREQUAL "default") ++- # default logging level: error (subject to change) ++- TARGET_COMPILE_DEFINITIONS(cpuinfo PRIVATE CPUINFO_LOG_LEVEL=2) ++-ELSEIF(CPUINFO_LOG_LEVEL STREQUAL "debug") ++- TARGET_COMPILE_DEFINITIONS(cpuinfo PRIVATE CPUINFO_LOG_LEVEL=5) ++-ELSEIF(CPUINFO_LOG_LEVEL STREQUAL "info") ++- TARGET_COMPILE_DEFINITIONS(cpuinfo PRIVATE CPUINFO_LOG_LEVEL=4) ++-ELSEIF(CPUINFO_LOG_LEVEL STREQUAL "warning") ++- TARGET_COMPILE_DEFINITIONS(cpuinfo PRIVATE CPUINFO_LOG_LEVEL=3) ++-ELSEIF(CPUINFO_LOG_LEVEL STREQUAL "error") ++- TARGET_COMPILE_DEFINITIONS(cpuinfo PRIVATE CPUINFO_LOG_LEVEL=2) ++-ELSEIF(CPUINFO_LOG_LEVEL STREQUAL "fatal") ++- TARGET_COMPILE_DEFINITIONS(cpuinfo PRIVATE CPUINFO_LOG_LEVEL=1) ++-ELSEIF(CPUINFO_LOG_LEVEL STREQUAL "none") ++- TARGET_COMPILE_DEFINITIONS(cpuinfo PRIVATE CPUINFO_LOG_LEVEL=0) ++-ELSE() ++- MESSAGE(FATAL_ERROR "Unsupported logging level ${CPUINFO_LOG_LEVEL}") ++-ENDIF() ++-TARGET_COMPILE_DEFINITIONS(cpuinfo_internals PRIVATE CPUINFO_LOG_LEVEL=0) ++- ++-IF(CPUINFO_SUPPORTED_PLATFORM) ++- TARGET_COMPILE_DEFINITIONS(cpuinfo INTERFACE CPUINFO_SUPPORTED_PLATFORM=1) ++- IF(CMAKE_SYSTEM_NAME STREQUAL "Linux" OR CMAKE_SYSTEM_NAME STREQUAL "Android") ++- TARGET_LINK_LIBRARIES(cpuinfo PUBLIC ${CMAKE_THREAD_LIBS_INIT}) ++- TARGET_LINK_LIBRARIES(cpuinfo_internals PUBLIC ${CMAKE_THREAD_LIBS_INIT}) ++- TARGET_COMPILE_DEFINITIONS(cpuinfo PRIVATE _GNU_SOURCE=1) ++- TARGET_COMPILE_DEFINITIONS(cpuinfo_internals PRIVATE _GNU_SOURCE=1) ++- ENDIF() ++-ELSE() ++- TARGET_COMPILE_DEFINITIONS(cpuinfo INTERFACE CPUINFO_SUPPORTED_PLATFORM=0) ++-ENDIF() ++- ++-# ---[ cpuinfo dependencies: clog ++-IF(NOT DEFINED CLOG_SOURCE_DIR) ++- SET(CLOG_SOURCE_DIR "${PROJECT_SOURCE_DIR}/deps/clog") ++-ENDIF() ++-IF(NOT TARGET clog) ++- SET(CLOG_BUILD_TESTS OFF CACHE BOOL "") ++- SET(CLOG_RUNTIME_TYPE "${CPUINFO_RUNTIME_TYPE}" CACHE STRING "") ++- ADD_SUBDIRECTORY( ++- "${CLOG_SOURCE_DIR}") ++- # We build static version of clog but a dynamic library may indirectly depend on it ++- SET_PROPERTY(TARGET clog PROPERTY POSITION_INDEPENDENT_CODE ON) ++-ENDIF() ++-TARGET_LINK_LIBRARIES(cpuinfo PRIVATE clog) ++-TARGET_LINK_LIBRARIES(cpuinfo_internals PRIVATE clog) ++- ++-INSTALL(TARGETS cpuinfo ++- LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} ++- ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} ++- PUBLIC_HEADER DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}) ++- ++-# ---[ cpuinfo micro-benchmarks ++-IF(CPUINFO_SUPPORTED_PLATFORM AND CPUINFO_BUILD_BENCHMARKS) ++- # ---[ Build google benchmark ++- IF(NOT TARGET benchmark) ++- SET(BENCHMARK_ENABLE_TESTING OFF CACHE BOOL "") ++- ADD_SUBDIRECTORY( ++- "${GOOGLEBENCHMARK_SOURCE_DIR}" ++- "${CONFU_DEPENDENCIES_BINARY_DIR}/googlebenchmark") ++- ENDIF() ++- ++- IF(CMAKE_SYSTEM_NAME MATCHES "^(Linux|Android)$") ++- ADD_EXECUTABLE(get-current-bench bench/get-current.cc) ++- TARGET_LINK_LIBRARIES(get-current-bench cpuinfo benchmark) ++- ENDIF() ++- ++- ADD_EXECUTABLE(init-bench bench/init.cc) ++- TARGET_LINK_LIBRARIES(init-bench cpuinfo benchmark) ++-ENDIF() ++- ++-IF(CPUINFO_SUPPORTED_PLATFORM) ++- IF(CPUINFO_BUILD_MOCK_TESTS OR CPUINFO_BUILD_UNIT_TESTS) ++- # ---[ Build google test ++- IF(NOT TARGET gtest) ++- IF(MSVC AND NOT CPUINFO_RUNTIME_TYPE STREQUAL "static") ++- SET(gtest_force_shared_crt ON CACHE BOOL "" FORCE) ++- ENDIF() ++- ADD_SUBDIRECTORY( ++- "${GOOGLETEST_SOURCE_DIR}" ++- "${CONFU_DEPENDENCIES_BINARY_DIR}/googletest") ++- ENDIF() ++- ENDIF() ++-ENDIF() ++- ++-# ---[ cpuinfo mock library and mock tests ++-IF(CPUINFO_SUPPORTED_PLATFORM AND CPUINFO_BUILD_MOCK_TESTS) ++- SET(CPUINFO_MOCK_SRCS "${CPUINFO_SRCS}") ++- IF(CMAKE_SYSTEM_PROCESSOR MATCHES "^(i[3-6]86|AMD64|x86_64)$") ++- LIST(APPEND CPUINFO_MOCK_SRCS src/x86/mockcpuid.c) ++- ENDIF() ++- IF(CMAKE_SYSTEM_NAME STREQUAL "Linux" OR CMAKE_SYSTEM_NAME STREQUAL "Android") ++- LIST(APPEND CPUINFO_MOCK_SRCS src/linux/mockfile.c) ++- ENDIF() ++- ++- ADD_LIBRARY(cpuinfo_mock STATIC ${CPUINFO_MOCK_SRCS}) ++- CPUINFO_TARGET_ENABLE_C99(cpuinfo_mock) ++- CPUINFO_TARGET_RUNTIME_LIBRARY(cpuinfo_mock) ++- SET_TARGET_PROPERTIES(cpuinfo_mock PROPERTIES PUBLIC_HEADER include/cpuinfo.h) ++- TARGET_INCLUDE_DIRECTORIES(cpuinfo_mock BEFORE PUBLIC include) ++- TARGET_INCLUDE_DIRECTORIES(cpuinfo_mock BEFORE PRIVATE src) ++- TARGET_COMPILE_DEFINITIONS(cpuinfo_mock PUBLIC CPUINFO_MOCK=1) ++- TARGET_COMPILE_DEFINITIONS(cpuinfo_mock PRIVATE CLOG_LOG_TO_STDIO=1) ++- IF(CMAKE_SYSTEM_NAME STREQUAL "Linux" OR CMAKE_SYSTEM_NAME STREQUAL "Android") ++- TARGET_LINK_LIBRARIES(cpuinfo_mock PUBLIC ${CMAKE_THREAD_LIBS_INIT}) ++- TARGET_COMPILE_DEFINITIONS(cpuinfo_mock PRIVATE _GNU_SOURCE=1) ++- ENDIF() ++- TARGET_LINK_LIBRARIES(cpuinfo_mock PRIVATE clog) ++- ++- IF(CMAKE_SYSTEM_NAME STREQUAL "Android" AND CMAKE_SYSTEM_PROCESSOR MATCHES "^(armv5te|armv7-a)$") ++- ADD_EXECUTABLE(atm7029b-tablet-test test/mock/atm7029b-tablet.cc) ++- TARGET_INCLUDE_DIRECTORIES(atm7029b-tablet-test BEFORE PRIVATE test/mock) ++- TARGET_LINK_LIBRARIES(atm7029b-tablet-test PRIVATE cpuinfo_mock gtest) ++- ADD_TEST(atm7029b-tablet-test atm7029b-tablet-test) ++- ++- ADD_EXECUTABLE(blu-r1-hd-test test/mock/blu-r1-hd.cc) ++- TARGET_INCLUDE_DIRECTORIES(blu-r1-hd-test BEFORE PRIVATE test/mock) ++- TARGET_LINK_LIBRARIES(blu-r1-hd-test PRIVATE cpuinfo_mock gtest) ++- ADD_TEST(blu-r1-hd-test blu-r1-hd-test) ++- ++- ADD_EXECUTABLE(galaxy-a3-2016-eu-test test/mock/galaxy-a3-2016-eu.cc) ++- TARGET_INCLUDE_DIRECTORIES(galaxy-a3-2016-eu-test BEFORE PRIVATE test/mock) ++- TARGET_LINK_LIBRARIES(galaxy-a3-2016-eu-test PRIVATE cpuinfo_mock gtest) ++- ADD_TEST(galaxy-a3-2016-eu-test galaxy-a3-2016-eu-test) ++- ++- ADD_EXECUTABLE(galaxy-a8-2016-duos-test test/mock/galaxy-a8-2016-duos.cc) ++- TARGET_INCLUDE_DIRECTORIES(galaxy-a8-2016-duos-test BEFORE PRIVATE test/mock) ++- TARGET_LINK_LIBRARIES(galaxy-a8-2016-duos-test PRIVATE cpuinfo_mock gtest) ++- ADD_TEST(galaxy-a8-2016-duos-test galaxy-a8-2016-duos-test) ++- ++- ADD_EXECUTABLE(galaxy-grand-prime-value-edition-test test/mock/galaxy-grand-prime-value-edition.cc) ++- TARGET_INCLUDE_DIRECTORIES(galaxy-grand-prime-value-edition-test BEFORE PRIVATE test/mock) ++- TARGET_LINK_LIBRARIES(galaxy-grand-prime-value-edition-test PRIVATE cpuinfo_mock gtest) ++- ADD_TEST(galaxy-grand-prime-value-edition-test galaxy-grand-prime-value-edition-test) ++- ++- ADD_EXECUTABLE(galaxy-j1-2016-test test/mock/galaxy-j1-2016.cc) ++- TARGET_INCLUDE_DIRECTORIES(galaxy-j1-2016-test BEFORE PRIVATE test/mock) ++- TARGET_LINK_LIBRARIES(galaxy-j1-2016-test PRIVATE cpuinfo_mock gtest) ++- ADD_TEST(galaxy-j1-2016-test galaxy-j1-2016-test) ++- ++- ADD_EXECUTABLE(galaxy-j5-test test/mock/galaxy-j5.cc) ++- TARGET_INCLUDE_DIRECTORIES(galaxy-j5-test BEFORE PRIVATE test/mock) ++- TARGET_LINK_LIBRARIES(galaxy-j5-test PRIVATE cpuinfo_mock gtest) ++- ADD_TEST(galaxy-j5-test galaxy-j5-test) ++- ++- ADD_EXECUTABLE(galaxy-j7-prime-test test/mock/galaxy-j7-prime.cc) ++- TARGET_INCLUDE_DIRECTORIES(galaxy-j7-prime-test BEFORE PRIVATE test/mock) ++- TARGET_LINK_LIBRARIES(galaxy-j7-prime-test PRIVATE cpuinfo_mock gtest) ++- ADD_TEST(galaxy-j7-prime-test galaxy-j7-prime-test) ++- ++- ADD_EXECUTABLE(galaxy-j7-tmobile-test test/mock/galaxy-j7-tmobile.cc) ++- TARGET_INCLUDE_DIRECTORIES(galaxy-j7-tmobile-test BEFORE PRIVATE test/mock) ++- TARGET_LINK_LIBRARIES(galaxy-j7-tmobile-test PRIVATE cpuinfo_mock gtest) ++- ADD_TEST(galaxy-j7-tmobile-test galaxy-j7-tmobile-test) ++- ++- ADD_EXECUTABLE(galaxy-j7-uae-test test/mock/galaxy-j7-uae.cc) ++- TARGET_INCLUDE_DIRECTORIES(galaxy-j7-uae-test BEFORE PRIVATE test/mock) ++- TARGET_LINK_LIBRARIES(galaxy-j7-uae-test PRIVATE cpuinfo_mock gtest) ++- ADD_TEST(galaxy-j7-uae-test galaxy-j7-uae-test) ++- ++- ADD_EXECUTABLE(galaxy-s3-us-test test/mock/galaxy-s3-us.cc) ++- TARGET_INCLUDE_DIRECTORIES(galaxy-s3-us-test BEFORE PRIVATE test/mock) ++- TARGET_LINK_LIBRARIES(galaxy-s3-us-test PRIVATE cpuinfo_mock gtest) ++- ADD_TEST(galaxy-s3-us-test galaxy-s3-us-test) ++- ++- ADD_EXECUTABLE(galaxy-s4-us-test test/mock/galaxy-s4-us.cc) ++- TARGET_INCLUDE_DIRECTORIES(galaxy-s4-us-test BEFORE PRIVATE test/mock) ++- TARGET_LINK_LIBRARIES(galaxy-s4-us-test PRIVATE cpuinfo_mock gtest) ++- ADD_TEST(galaxy-s4-us-test galaxy-s4-us-test) ++- ++- ADD_EXECUTABLE(galaxy-s5-global-test test/mock/galaxy-s5-global.cc) ++- TARGET_INCLUDE_DIRECTORIES(galaxy-s5-global-test BEFORE PRIVATE test/mock) ++- TARGET_LINK_LIBRARIES(galaxy-s5-global-test PRIVATE cpuinfo_mock gtest) ++- ADD_TEST(galaxy-s5-global-test galaxy-s5-global-test) ++- ++- ADD_EXECUTABLE(galaxy-s5-us-test test/mock/galaxy-s5-us.cc) ++- TARGET_INCLUDE_DIRECTORIES(galaxy-s5-us-test BEFORE PRIVATE test/mock) ++- TARGET_LINK_LIBRARIES(galaxy-s5-us-test PRIVATE cpuinfo_mock gtest) ++- ADD_TEST(galaxy-s5-us-test galaxy-s5-us-test) ++- ++- ADD_EXECUTABLE(galaxy-tab-3-7.0-test test/mock/galaxy-tab-3-7.0.cc) ++- TARGET_INCLUDE_DIRECTORIES(galaxy-tab-3-7.0-test BEFORE PRIVATE test/mock) ++- TARGET_LINK_LIBRARIES(galaxy-tab-3-7.0-test PRIVATE cpuinfo_mock gtest) ++- ADD_TEST(galaxy-tab-3-7.0-test galaxy-tab-3-7.0-test) ++- ++- ADD_EXECUTABLE(galaxy-tab-3-lite-test test/mock/galaxy-tab-3-lite.cc) ++- TARGET_INCLUDE_DIRECTORIES(galaxy-tab-3-lite-test BEFORE PRIVATE test/mock) ++- TARGET_LINK_LIBRARIES(galaxy-tab-3-lite-test PRIVATE cpuinfo_mock gtest) ++- ADD_TEST(galaxy-tab-3-lite-test galaxy-tab-3-lite-test) ++- ++- ADD_EXECUTABLE(galaxy-win-duos-test test/mock/galaxy-win-duos.cc) ++- TARGET_INCLUDE_DIRECTORIES(galaxy-win-duos-test BEFORE PRIVATE test/mock) ++- TARGET_LINK_LIBRARIES(galaxy-win-duos-test PRIVATE cpuinfo_mock gtest) ++- ADD_TEST(galaxy-win-duos-test galaxy-win-duos-test) ++- ++- ADD_EXECUTABLE(huawei-ascend-p7-test test/mock/huawei-ascend-p7.cc) ++- TARGET_INCLUDE_DIRECTORIES(huawei-ascend-p7-test BEFORE PRIVATE test/mock) ++- TARGET_LINK_LIBRARIES(huawei-ascend-p7-test PRIVATE cpuinfo_mock gtest) ++- ADD_TEST(huawei-ascend-p7-test huawei-ascend-p7-test) ++- ++- ADD_EXECUTABLE(huawei-honor-6-test test/mock/huawei-honor-6.cc) ++- TARGET_INCLUDE_DIRECTORIES(huawei-honor-6-test BEFORE PRIVATE test/mock) ++- TARGET_LINK_LIBRARIES(huawei-honor-6-test PRIVATE cpuinfo_mock gtest) ++- ADD_TEST(huawei-honor-6-test huawei-honor-6-test) ++- ++- ADD_EXECUTABLE(lenovo-a6600-plus-test test/mock/lenovo-a6600-plus.cc) ++- TARGET_INCLUDE_DIRECTORIES(lenovo-a6600-plus-test BEFORE PRIVATE test/mock) ++- TARGET_LINK_LIBRARIES(lenovo-a6600-plus-test PRIVATE cpuinfo_mock gtest) ++- ADD_TEST(lenovo-a6600-plus-test lenovo-a6600-plus-test) ++- ++- ADD_EXECUTABLE(lenovo-vibe-x2-test test/mock/lenovo-vibe-x2.cc) ++- TARGET_INCLUDE_DIRECTORIES(lenovo-vibe-x2-test BEFORE PRIVATE test/mock) ++- TARGET_LINK_LIBRARIES(lenovo-vibe-x2-test PRIVATE cpuinfo_mock gtest) ++- ADD_TEST(lenovo-vibe-x2-test lenovo-vibe-x2-test) ++- ++- ADD_EXECUTABLE(lg-k10-eu-test test/mock/lg-k10-eu.cc) ++- TARGET_INCLUDE_DIRECTORIES(lg-k10-eu-test BEFORE PRIVATE test/mock) ++- TARGET_LINK_LIBRARIES(lg-k10-eu-test PRIVATE cpuinfo_mock gtest) ++- ADD_TEST(lg-k10-eu-test lg-k10-eu-test) ++- ++- ADD_EXECUTABLE(lg-optimus-g-pro-test test/mock/lg-optimus-g-pro.cc) ++- TARGET_INCLUDE_DIRECTORIES(lg-optimus-g-pro-test BEFORE PRIVATE test/mock) ++- TARGET_LINK_LIBRARIES(lg-optimus-g-pro-test PRIVATE cpuinfo_mock gtest) ++- ADD_TEST(lg-optimus-g-pro-test lg-optimus-g-pro-test) ++- ++- ADD_EXECUTABLE(moto-e-gen1-test test/mock/moto-e-gen1.cc) ++- TARGET_INCLUDE_DIRECTORIES(moto-e-gen1-test BEFORE PRIVATE test/mock) ++- TARGET_LINK_LIBRARIES(moto-e-gen1-test PRIVATE cpuinfo_mock gtest) ++- ADD_TEST(moto-e-gen1-test moto-e-gen1-test) ++- ++- ADD_EXECUTABLE(moto-g-gen1-test test/mock/moto-g-gen1.cc) ++- TARGET_INCLUDE_DIRECTORIES(moto-g-gen1-test BEFORE PRIVATE test/mock) ++- TARGET_LINK_LIBRARIES(moto-g-gen1-test PRIVATE cpuinfo_mock gtest) ++- ADD_TEST(moto-g-gen1-test moto-g-gen1-test) ++- ++- ADD_EXECUTABLE(moto-g-gen2-test test/mock/moto-g-gen2.cc) ++- TARGET_INCLUDE_DIRECTORIES(moto-g-gen2-test BEFORE PRIVATE test/mock) ++- TARGET_LINK_LIBRARIES(moto-g-gen2-test PRIVATE cpuinfo_mock gtest) ++- ADD_TEST(moto-g-gen2-test moto-g-gen2-test) ++- ++- ADD_EXECUTABLE(moto-g-gen3-test test/mock/moto-g-gen3.cc) ++- TARGET_INCLUDE_DIRECTORIES(moto-g-gen3-test BEFORE PRIVATE test/mock) ++- TARGET_LINK_LIBRARIES(moto-g-gen3-test PRIVATE cpuinfo_mock gtest) ++- ADD_TEST(moto-g-gen3-test moto-g-gen3-test) ++- ++- ADD_EXECUTABLE(moto-g-gen4-test test/mock/moto-g-gen4.cc) ++- TARGET_INCLUDE_DIRECTORIES(moto-g-gen4-test BEFORE PRIVATE test/mock) ++- TARGET_LINK_LIBRARIES(moto-g-gen4-test PRIVATE cpuinfo_mock gtest) ++- ADD_TEST(moto-g-gen4-test moto-g-gen4-test) ++- ++- ADD_EXECUTABLE(moto-g-gen5-test test/mock/moto-g-gen5.cc) ++- TARGET_INCLUDE_DIRECTORIES(moto-g-gen5-test BEFORE PRIVATE test/mock) ++- TARGET_LINK_LIBRARIES(moto-g-gen5-test PRIVATE cpuinfo_mock gtest) ++- ADD_TEST(moto-g-gen5-test moto-g-gen5-test) ++- ++- ADD_EXECUTABLE(nexus-s-test test/mock/nexus-s.cc) ++- TARGET_INCLUDE_DIRECTORIES(nexus-s-test BEFORE PRIVATE test/mock) ++- TARGET_LINK_LIBRARIES(nexus-s-test PRIVATE cpuinfo_mock gtest) ++- ADD_TEST(nexus-s-test nexus-s-test) ++- ++- ADD_EXECUTABLE(nexus4-test test/mock/nexus4.cc) ++- TARGET_INCLUDE_DIRECTORIES(nexus4-test BEFORE PRIVATE test/mock) ++- TARGET_LINK_LIBRARIES(nexus4-test PRIVATE cpuinfo_mock gtest) ++- ADD_TEST(nexus4-test nexus4-test) ++- ++- ADD_EXECUTABLE(nexus6-test test/mock/nexus6.cc) ++- TARGET_INCLUDE_DIRECTORIES(nexus6-test BEFORE PRIVATE test/mock) ++- TARGET_LINK_LIBRARIES(nexus6-test PRIVATE cpuinfo_mock gtest) ++- ADD_TEST(nexus6-test nexus6-test) ++- ++- ADD_EXECUTABLE(nexus10-test test/mock/nexus10.cc) ++- TARGET_INCLUDE_DIRECTORIES(nexus10-test BEFORE PRIVATE test/mock) ++- TARGET_LINK_LIBRARIES(nexus10-test PRIVATE cpuinfo_mock gtest) ++- ADD_TEST(nexus10-test nexus10-test) ++- ++- ADD_EXECUTABLE(padcod-10.1-test test/mock/padcod-10.1.cc) ++- TARGET_INCLUDE_DIRECTORIES(padcod-10.1-test BEFORE PRIVATE test/mock) ++- TARGET_LINK_LIBRARIES(padcod-10.1-test PRIVATE cpuinfo_mock gtest) ++- ADD_TEST(padcod-10.1-test padcod-10.1-test) ++- ++- ADD_EXECUTABLE(xiaomi-redmi-2a-test test/mock/xiaomi-redmi-2a.cc) ++- TARGET_INCLUDE_DIRECTORIES(xiaomi-redmi-2a-test BEFORE PRIVATE test/mock) ++- TARGET_LINK_LIBRARIES(xiaomi-redmi-2a-test PRIVATE cpuinfo_mock gtest) ++- ADD_TEST(xiaomi-redmi-2a-test xiaomi-redmi-2a-test) ++- ++- ADD_EXECUTABLE(xperia-sl-test test/mock/xperia-sl.cc) ++- TARGET_INCLUDE_DIRECTORIES(xperia-sl-test BEFORE PRIVATE test/mock) ++- TARGET_LINK_LIBRARIES(xperia-sl-test PRIVATE cpuinfo_mock gtest) ++- ADD_TEST(xperia-sl-test xperia-sl-test) ++- ENDIF() ++- ++- IF(CMAKE_SYSTEM_NAME STREQUAL "Android" AND CMAKE_SYSTEM_PROCESSOR MATCHES "^(armv5te|armv7-a|aarch64)$") ++- ADD_EXECUTABLE(alcatel-revvl-test test/mock/alcatel-revvl.cc) ++- TARGET_INCLUDE_DIRECTORIES(alcatel-revvl-test BEFORE PRIVATE test/mock) ++- TARGET_LINK_LIBRARIES(alcatel-revvl-test PRIVATE cpuinfo_mock gtest) ++- ADD_TEST(alcatel-revvl-test alcatel-revvl-test) ++- ++- ADD_EXECUTABLE(galaxy-a8-2018-test test/mock/galaxy-a8-2018.cc) ++- TARGET_INCLUDE_DIRECTORIES(galaxy-a8-2018-test BEFORE PRIVATE test/mock) ++- TARGET_LINK_LIBRARIES(galaxy-a8-2018-test PRIVATE cpuinfo_mock gtest) ++- ADD_TEST(galaxy-a8-2018-test galaxy-a8-2018-test) ++- ++- ADD_EXECUTABLE(galaxy-c9-pro-test test/mock/galaxy-c9-pro.cc) ++- TARGET_INCLUDE_DIRECTORIES(galaxy-c9-pro-test BEFORE PRIVATE test/mock) ++- TARGET_LINK_LIBRARIES(galaxy-c9-pro-test PRIVATE cpuinfo_mock gtest) ++- ADD_TEST(galaxy-c9-pro-test galaxy-c9-pro-test) ++- ++- ADD_EXECUTABLE(galaxy-s6-test test/mock/galaxy-s6.cc) ++- TARGET_INCLUDE_DIRECTORIES(galaxy-s6-test BEFORE PRIVATE test/mock) ++- TARGET_LINK_LIBRARIES(galaxy-s6-test PRIVATE cpuinfo_mock gtest) ++- ADD_TEST(galaxy-s6-test galaxy-s6-test) ++- ++- ADD_EXECUTABLE(galaxy-s7-us-test test/mock/galaxy-s7-us.cc) ++- TARGET_INCLUDE_DIRECTORIES(galaxy-s7-us-test BEFORE PRIVATE test/mock) ++- TARGET_LINK_LIBRARIES(galaxy-s7-us-test PRIVATE cpuinfo_mock gtest) ++- ADD_TEST(galaxy-s7-us-test galaxy-s7-us-test) ++- ++- ADD_EXECUTABLE(galaxy-s7-global-test test/mock/galaxy-s7-global.cc) ++- TARGET_INCLUDE_DIRECTORIES(galaxy-s7-global-test BEFORE PRIVATE test/mock) ++- TARGET_LINK_LIBRARIES(galaxy-s7-global-test PRIVATE cpuinfo_mock gtest) ++- ADD_TEST(galaxy-s7-global-test galaxy-s7-global-test) ++- ++- ADD_EXECUTABLE(galaxy-s8-us-test test/mock/galaxy-s8-us.cc) ++- TARGET_INCLUDE_DIRECTORIES(galaxy-s8-us-test BEFORE PRIVATE test/mock) ++- TARGET_LINK_LIBRARIES(galaxy-s8-us-test PRIVATE cpuinfo_mock gtest) ++- ADD_TEST(galaxy-s8-us-test galaxy-s8-us-test) ++- ++- ADD_EXECUTABLE(galaxy-s8-global-test test/mock/galaxy-s8-global.cc) ++- TARGET_INCLUDE_DIRECTORIES(galaxy-s8-global-test BEFORE PRIVATE test/mock) ++- TARGET_LINK_LIBRARIES(galaxy-s8-global-test PRIVATE cpuinfo_mock gtest) ++- ADD_TEST(galaxy-s8-global-test galaxy-s8-global-test) ++- ++- ADD_EXECUTABLE(galaxy-s9-us-test test/mock/galaxy-s9-us.cc) ++- TARGET_INCLUDE_DIRECTORIES(galaxy-s9-us-test BEFORE PRIVATE test/mock) ++- TARGET_LINK_LIBRARIES(galaxy-s9-us-test PRIVATE cpuinfo_mock gtest) ++- ADD_TEST(galaxy-s9-us-test galaxy-s9-us-test) ++- ++- ADD_EXECUTABLE(galaxy-s9-global-test test/mock/galaxy-s9-global.cc) ++- TARGET_INCLUDE_DIRECTORIES(galaxy-s9-global-test BEFORE PRIVATE test/mock) ++- TARGET_LINK_LIBRARIES(galaxy-s9-global-test PRIVATE cpuinfo_mock gtest) ++- ADD_TEST(galaxy-s9-global-test galaxy-s9-global-test) ++- ++- ADD_EXECUTABLE(huawei-mate-8-test test/mock/huawei-mate-8.cc) ++- TARGET_INCLUDE_DIRECTORIES(huawei-mate-8-test BEFORE PRIVATE test/mock) ++- TARGET_LINK_LIBRARIES(huawei-mate-8-test PRIVATE cpuinfo_mock gtest) ++- ADD_TEST(huawei-mate-8-test huawei-mate-8-test) ++- ++- ADD_EXECUTABLE(huawei-mate-9-test test/mock/huawei-mate-9.cc) ++- TARGET_INCLUDE_DIRECTORIES(huawei-mate-9-test BEFORE PRIVATE test/mock) ++- TARGET_LINK_LIBRARIES(huawei-mate-9-test PRIVATE cpuinfo_mock gtest) ++- ADD_TEST(huawei-mate-9-test huawei-mate-9-test) ++- ++- ADD_EXECUTABLE(huawei-mate-10-test test/mock/huawei-mate-10.cc) ++- TARGET_INCLUDE_DIRECTORIES(huawei-mate-10-test BEFORE PRIVATE test/mock) ++- TARGET_LINK_LIBRARIES(huawei-mate-10-test PRIVATE cpuinfo_mock gtest) ++- ADD_TEST(huawei-mate-10-test huawei-mate-10-test) ++- ++- ADD_EXECUTABLE(huawei-mate-20-test test/mock/huawei-mate-20.cc) ++- TARGET_INCLUDE_DIRECTORIES(huawei-mate-20-test BEFORE PRIVATE test/mock) ++- TARGET_LINK_LIBRARIES(huawei-mate-20-test PRIVATE cpuinfo_mock gtest) ++- ADD_TEST(huawei-mate-20-test huawei-mate-20-test) ++- ++- ADD_EXECUTABLE(huawei-p8-lite-test test/mock/huawei-p8-lite.cc) ++- TARGET_INCLUDE_DIRECTORIES(huawei-p8-lite-test BEFORE PRIVATE test/mock) ++- TARGET_LINK_LIBRARIES(huawei-p8-lite-test PRIVATE cpuinfo_mock gtest) ++- ADD_TEST(huawei-p8-lite-test huawei-p8-lite-test) ++- ++- ADD_EXECUTABLE(huawei-p9-lite-test test/mock/huawei-p9-lite.cc) ++- TARGET_INCLUDE_DIRECTORIES(huawei-p9-lite-test BEFORE PRIVATE test/mock) ++- TARGET_LINK_LIBRARIES(huawei-p9-lite-test PRIVATE cpuinfo_mock gtest) ++- ADD_TEST(huawei-p9-lite-test huawei-p9-lite-test) ++- ++- ADD_EXECUTABLE(huawei-p20-pro-test test/mock/huawei-p20-pro.cc) ++- TARGET_INCLUDE_DIRECTORIES(huawei-p20-pro-test BEFORE PRIVATE test/mock) ++- TARGET_LINK_LIBRARIES(huawei-p20-pro-test PRIVATE cpuinfo_mock gtest) ++- ADD_TEST(huawei-p20-pro-test huawei-p20-pro-test) ++- ++- ADD_EXECUTABLE(iconia-one-10-test test/mock/iconia-one-10.cc) ++- TARGET_INCLUDE_DIRECTORIES(iconia-one-10-test BEFORE PRIVATE test/mock) ++- TARGET_LINK_LIBRARIES(iconia-one-10-test PRIVATE cpuinfo_mock gtest) ++- ADD_TEST(iconia-one-10-test iconia-one-10-test) ++- ++- ADD_EXECUTABLE(meizu-pro-6-test test/mock/meizu-pro-6.cc) ++- TARGET_INCLUDE_DIRECTORIES(meizu-pro-6-test BEFORE PRIVATE test/mock) ++- TARGET_LINK_LIBRARIES(meizu-pro-6-test PRIVATE cpuinfo_mock gtest) ++- ADD_TEST(meizu-pro-6-test meizu-pro-6-test) ++- ++- ADD_EXECUTABLE(meizu-pro-6s-test test/mock/meizu-pro-6s.cc) ++- TARGET_INCLUDE_DIRECTORIES(meizu-pro-6s-test BEFORE PRIVATE test/mock) ++- TARGET_LINK_LIBRARIES(meizu-pro-6s-test PRIVATE cpuinfo_mock gtest) ++- ADD_TEST(meizu-pro-6s-test meizu-pro-6s-test) ++- ++- ADD_EXECUTABLE(meizu-pro-7-plus-test test/mock/meizu-pro-7-plus.cc) ++- TARGET_INCLUDE_DIRECTORIES(meizu-pro-7-plus-test BEFORE PRIVATE test/mock) ++- TARGET_LINK_LIBRARIES(meizu-pro-7-plus-test PRIVATE cpuinfo_mock gtest) ++- ADD_TEST(meizu-pro-7-plus-test meizu-pro-7-plus-test) ++- ++- ADD_EXECUTABLE(nexus5x-test test/mock/nexus5x.cc) ++- TARGET_INCLUDE_DIRECTORIES(nexus5x-test BEFORE PRIVATE test/mock) ++- TARGET_LINK_LIBRARIES(nexus5x-test PRIVATE cpuinfo_mock gtest) ++- ADD_TEST(nexus5x-test nexus5x-test) ++- ++- ADD_EXECUTABLE(nexus6p-test test/mock/nexus6p.cc) ++- TARGET_INCLUDE_DIRECTORIES(nexus6p-test BEFORE PRIVATE test/mock) ++- TARGET_LINK_LIBRARIES(nexus6p-test PRIVATE cpuinfo_mock gtest) ++- ADD_TEST(nexus6p-test nexus6p-test) ++- ++- ADD_EXECUTABLE(nexus9-test test/mock/nexus9.cc) ++- TARGET_INCLUDE_DIRECTORIES(nexus9-test BEFORE PRIVATE test/mock) ++- TARGET_LINK_LIBRARIES(nexus9-test PRIVATE cpuinfo_mock gtest) ++- ADD_TEST(nexus9-test nexus9-test) ++- ++- ADD_EXECUTABLE(oneplus-3t-test test/mock/oneplus-3t.cc) ++- TARGET_INCLUDE_DIRECTORIES(oneplus-3t-test BEFORE PRIVATE test/mock) ++- TARGET_LINK_LIBRARIES(oneplus-3t-test PRIVATE cpuinfo_mock gtest) ++- ADD_TEST(oneplus-3t-test oneplus-3t-test) ++- ++- ADD_EXECUTABLE(oneplus-5-test test/mock/oneplus-5.cc) ++- TARGET_INCLUDE_DIRECTORIES(oneplus-5-test BEFORE PRIVATE test/mock) ++- TARGET_LINK_LIBRARIES(oneplus-5-test PRIVATE cpuinfo_mock gtest) ++- ADD_TEST(oneplus-5-test oneplus-5-test) ++- ++- ADD_EXECUTABLE(oneplus-5t-test test/mock/oneplus-5t.cc) ++- TARGET_INCLUDE_DIRECTORIES(oneplus-5t-test BEFORE PRIVATE test/mock) ++- TARGET_LINK_LIBRARIES(oneplus-5t-test PRIVATE cpuinfo_mock gtest) ++- ADD_TEST(oneplus-5t-test oneplus-5t-test) ++- ++- ADD_EXECUTABLE(oppo-a37-test test/mock/oppo-a37.cc) ++- TARGET_INCLUDE_DIRECTORIES(oppo-a37-test BEFORE PRIVATE test/mock) ++- TARGET_LINK_LIBRARIES(oppo-a37-test PRIVATE cpuinfo_mock gtest) ++- ADD_TEST(oppo-a37-test oppo-a37-test) ++- ++- ADD_EXECUTABLE(oppo-r9-test test/mock/oppo-r9.cc) ++- TARGET_INCLUDE_DIRECTORIES(oppo-r9-test BEFORE PRIVATE test/mock) ++- TARGET_LINK_LIBRARIES(oppo-r9-test PRIVATE cpuinfo_mock gtest) ++- ADD_TEST(oppo-r9-test oppo-r9-test) ++- ++- ADD_EXECUTABLE(oppo-r15-test test/mock/oppo-r15.cc) ++- TARGET_INCLUDE_DIRECTORIES(oppo-r15-test BEFORE PRIVATE test/mock) ++- TARGET_LINK_LIBRARIES(oppo-r15-test PRIVATE cpuinfo_mock gtest) ++- ADD_TEST(oppo-r15-test oppo-r15-test) ++- ++- ADD_EXECUTABLE(pixel-test test/mock/pixel.cc) ++- TARGET_INCLUDE_DIRECTORIES(pixel-test BEFORE PRIVATE test/mock) ++- TARGET_LINK_LIBRARIES(pixel-test PRIVATE cpuinfo_mock gtest) ++- ADD_TEST(pixel-test pixel-test) ++- ++- ADD_EXECUTABLE(pixel-c-test test/mock/pixel-c.cc) ++- TARGET_INCLUDE_DIRECTORIES(pixel-c-test BEFORE PRIVATE test/mock) ++- TARGET_LINK_LIBRARIES(pixel-c-test PRIVATE cpuinfo_mock gtest) ++- ADD_TEST(pixel-c-test pixel-c-test) ++- ++- ADD_EXECUTABLE(pixel-xl-test test/mock/pixel-xl.cc) ++- TARGET_INCLUDE_DIRECTORIES(pixel-xl-test BEFORE PRIVATE test/mock) ++- TARGET_LINK_LIBRARIES(pixel-xl-test PRIVATE cpuinfo_mock gtest) ++- ADD_TEST(pixel-xl-test pixel-xl-test) ++- ++- ADD_EXECUTABLE(pixel-2-xl-test test/mock/pixel-2-xl.cc) ++- TARGET_INCLUDE_DIRECTORIES(pixel-2-xl-test BEFORE PRIVATE test/mock) ++- TARGET_LINK_LIBRARIES(pixel-2-xl-test PRIVATE cpuinfo_mock gtest) ++- ADD_TEST(pixel-2-xl-test pixel-2-xl-test) ++- ++- ADD_EXECUTABLE(xiaomi-mi-5c-test test/mock/xiaomi-mi-5c.cc) ++- TARGET_INCLUDE_DIRECTORIES(xiaomi-mi-5c-test BEFORE PRIVATE test/mock) ++- TARGET_LINK_LIBRARIES(xiaomi-mi-5c-test PRIVATE cpuinfo_mock gtest) ++- ADD_TEST(xiaomi-mi-5c-test xiaomi-mi-5c-test) ++- ++- ADD_EXECUTABLE(xiaomi-redmi-note-3-test test/mock/xiaomi-redmi-note-3.cc) ++- TARGET_INCLUDE_DIRECTORIES(xiaomi-redmi-note-3-test BEFORE PRIVATE test/mock) ++- TARGET_LINK_LIBRARIES(xiaomi-redmi-note-3-test PRIVATE cpuinfo_mock gtest) ++- ADD_TEST(xiaomi-redmi-note-3-test xiaomi-redmi-note-3-test) ++- ++- ADD_EXECUTABLE(xiaomi-redmi-note-4-test test/mock/xiaomi-redmi-note-4.cc) ++- TARGET_INCLUDE_DIRECTORIES(xiaomi-redmi-note-4-test BEFORE PRIVATE test/mock) ++- TARGET_LINK_LIBRARIES(xiaomi-redmi-note-4-test PRIVATE cpuinfo_mock gtest) ++- ADD_TEST(xiaomi-redmi-note-4-test xiaomi-redmi-note-4-test) ++- ++- ADD_EXECUTABLE(xperia-c4-dual-test test/mock/xperia-c4-dual.cc) ++- TARGET_INCLUDE_DIRECTORIES(xperia-c4-dual-test BEFORE PRIVATE test/mock) ++- TARGET_LINK_LIBRARIES(xperia-c4-dual-test PRIVATE cpuinfo_mock gtest) ++- ADD_TEST(xperia-c4-dual-test xperia-c4-dual-test) ++- ENDIF() ++- ++- IF(CMAKE_SYSTEM_NAME STREQUAL "Android" AND CMAKE_SYSTEM_PROCESSOR MATCHES "^(i686|x86_64)$") ++- ADD_EXECUTABLE(alldocube-iwork8-test test/mock/alldocube-iwork8.cc) ++- TARGET_INCLUDE_DIRECTORIES(alldocube-iwork8-test BEFORE PRIVATE test/mock) ++- TARGET_LINK_LIBRARIES(alldocube-iwork8-test PRIVATE cpuinfo_mock gtest) ++- ADD_TEST(alldocube-iwork8-test alldocube-iwork8-test) ++- ++- ADD_EXECUTABLE(leagoo-t5c-test test/mock/leagoo-t5c.cc) ++- TARGET_INCLUDE_DIRECTORIES(leagoo-t5c-test BEFORE PRIVATE test/mock) ++- TARGET_LINK_LIBRARIES(leagoo-t5c-test PRIVATE cpuinfo_mock gtest) ++- ADD_TEST(leagoo-t5c-test leagoo-t5c-test) ++- ++- ADD_EXECUTABLE(memo-pad-7-test test/mock/memo-pad-7.cc) ++- TARGET_INCLUDE_DIRECTORIES(memo-pad-7-test BEFORE PRIVATE test/mock) ++- TARGET_LINK_LIBRARIES(memo-pad-7-test PRIVATE cpuinfo_mock gtest) ++- ADD_TEST(memo-pad-7-test memo-pad-7-test) ++- ++- ADD_EXECUTABLE(zenfone-c-test test/mock/zenfone-c.cc) ++- TARGET_INCLUDE_DIRECTORIES(zenfone-c-test BEFORE PRIVATE test/mock) ++- TARGET_LINK_LIBRARIES(zenfone-c-test PRIVATE cpuinfo_mock gtest) ++- ADD_TEST(zenfone-c-test zenfone-c-test) ++- ++- ADD_EXECUTABLE(zenfone-2-test test/mock/zenfone-2.cc) ++- TARGET_INCLUDE_DIRECTORIES(zenfone-2-test BEFORE PRIVATE test/mock) ++- TARGET_LINK_LIBRARIES(zenfone-2-test PRIVATE cpuinfo_mock gtest) ++- ADD_TEST(zenfone-2-test zenfone-2-test) ++- ++- ADD_EXECUTABLE(zenfone-2e-test test/mock/zenfone-2e.cc) ++- TARGET_INCLUDE_DIRECTORIES(zenfone-2e-test BEFORE PRIVATE test/mock) ++- TARGET_LINK_LIBRARIES(zenfone-2e-test PRIVATE cpuinfo_mock gtest) ++- ADD_TEST(zenfone-2e-test zenfone-2e-test) ++- ENDIF() ++-ENDIF() ++- ++-# ---[ cpuinfo unit tests ++-IF(CPUINFO_SUPPORTED_PLATFORM AND CPUINFO_BUILD_UNIT_TESTS) ++- ADD_EXECUTABLE(init-test test/init.cc) ++- CPUINFO_TARGET_ENABLE_CXX11(init-test) ++- CPUINFO_TARGET_RUNTIME_LIBRARY(init-test) ++- TARGET_LINK_LIBRARIES(init-test PRIVATE cpuinfo gtest gtest_main) ++- ADD_TEST(init-test init-test) ++- ++- IF(CMAKE_SYSTEM_NAME STREQUAL "Linux" OR CMAKE_SYSTEM_NAME STREQUAL "Android") ++- ADD_EXECUTABLE(get-current-test test/get-current.cc) ++- CPUINFO_TARGET_ENABLE_CXX11(get-current-test) ++- CPUINFO_TARGET_RUNTIME_LIBRARY(get-current-test) ++- TARGET_LINK_LIBRARIES(get-current-test PRIVATE cpuinfo gtest gtest_main) ++- ADD_TEST(get-current-test get-current-test) ++- ENDIF() ++- ++- IF(CMAKE_SYSTEM_PROCESSOR MATCHES "^(i[3-6]86|AMD64|x86_64)$") ++- ADD_EXECUTABLE(brand-string-test test/name/brand-string.cc) ++- CPUINFO_TARGET_ENABLE_CXX11(brand-string-test) ++- CPUINFO_TARGET_RUNTIME_LIBRARY(brand-string-test) ++- TARGET_LINK_LIBRARIES(brand-string-test PRIVATE cpuinfo_internals gtest gtest_main) ++- ADD_TEST(brand-string-test brand-string-test) ++- ENDIF() ++- ++- IF(CMAKE_SYSTEM_NAME STREQUAL "Android" AND CMAKE_SYSTEM_PROCESSOR MATCHES "^(armv[5-8].*|aarch64)$") ++- ADD_LIBRARY(android_properties_interface STATIC test/name/android-properties-interface.c) ++- CPUINFO_TARGET_ENABLE_C99(android_properties_interface) ++- CPUINFO_TARGET_RUNTIME_LIBRARY(android_properties_interface) ++- TARGET_LINK_LIBRARIES(android_properties_interface PRIVATE cpuinfo_internals) ++- ++- ADD_EXECUTABLE(chipset-test ++- test/name/proc-cpuinfo-hardware.cc ++- test/name/ro-product-board.cc ++- test/name/ro-board-platform.cc ++- test/name/ro-mediatek-platform.cc ++- test/name/ro-arch.cc ++- test/name/ro-chipname.cc ++- test/name/android-properties.cc) ++- CPUINFO_TARGET_ENABLE_CXX11(chipset-test) ++- CPUINFO_TARGET_RUNTIME_LIBRARY(chipset-test) ++- TARGET_LINK_LIBRARIES(chipset-test PRIVATE android_properties_interface gtest gtest_main) ++- ADD_TEST(chipset-test chipset-test) ++- ++- ADD_EXECUTABLE(cache-test test/arm-cache.cc) ++- CPUINFO_TARGET_ENABLE_CXX11(cache-test) ++- CPUINFO_TARGET_RUNTIME_LIBRARY(cache-test) ++- TARGET_COMPILE_DEFINITIONS(cache-test PRIVATE __STDC_LIMIT_MACROS=1 __STDC_CONSTANT_MACROS=1) ++- TARGET_LINK_LIBRARIES(cache-test PRIVATE cpuinfo_internals gtest gtest_main) ++- ADD_TEST(cache-test, cache-test) ++- ENDIF() ++-ENDIF() ++- ++-# ---[ Helper and debug tools ++-IF(CPUINFO_SUPPORTED_PLATFORM AND CPUINFO_BUILD_TOOLS) ++- ADD_EXECUTABLE(isa-info tools/isa-info.c) ++- CPUINFO_TARGET_ENABLE_C99(isa-info) ++- CPUINFO_TARGET_RUNTIME_LIBRARY(isa-info) ++- TARGET_LINK_LIBRARIES(isa-info PRIVATE cpuinfo) ++- INSTALL(TARGETS isa-info RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}) ++- ++- ADD_EXECUTABLE(cpu-info tools/cpu-info.c) ++- CPUINFO_TARGET_ENABLE_C99(cpu-info) ++- CPUINFO_TARGET_RUNTIME_LIBRARY(cpu-info) ++- TARGET_LINK_LIBRARIES(cpu-info PRIVATE cpuinfo) ++- INSTALL(TARGETS cpu-info RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}) ++- ++- ADD_EXECUTABLE(cache-info tools/cache-info.c) ++- CPUINFO_TARGET_ENABLE_C99(cache-info) ++- CPUINFO_TARGET_RUNTIME_LIBRARY(cache-info) ++- TARGET_LINK_LIBRARIES(cache-info PRIVATE cpuinfo) ++- INSTALL(TARGETS cache-info RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}) ++- ++- IF(CMAKE_SYSTEM_NAME MATCHES "^(Android|Linux)$" AND CMAKE_SYSTEM_PROCESSOR MATCHES "^(armv[5-8].*|aarch64)$") ++- ADD_EXECUTABLE(auxv-dump tools/auxv-dump.c) ++- CPUINFO_TARGET_ENABLE_C99(auxv-dump) ++- CPUINFO_TARGET_RUNTIME_LIBRARY(auxv-dump) ++- TARGET_LINK_LIBRARIES(auxv-dump PRIVATE ${CMAKE_DL_LIBS} cpuinfo) ++- ++- ADD_EXECUTABLE(cpuinfo-dump tools/cpuinfo-dump.c) ++- CPUINFO_TARGET_ENABLE_C99(cpuinfo-dump) ++- CPUINFO_TARGET_RUNTIME_LIBRARY(cpuinfo-dump) ++- ENDIF() ++- ++- IF(CMAKE_SYSTEM_PROCESSOR MATCHES "^(i[3-6]86|AMD64|x86_64)$") ++- ADD_EXECUTABLE(cpuid-dump tools/cpuid-dump.c) ++- CPUINFO_TARGET_ENABLE_C99(cpuid-dump) ++- CPUINFO_TARGET_RUNTIME_LIBRARY(cpuid-dump) ++- TARGET_INCLUDE_DIRECTORIES(cpuid-dump BEFORE PRIVATE src) ++- TARGET_INCLUDE_DIRECTORIES(cpuid-dump BEFORE PRIVATE include) ++- INSTALL(TARGETS cpuid-dump RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}) ++- ENDIF() ++-ENDIF() ++diff --git a/README.md b/README.md ++index 7d383ff..ee5fb82 100644 ++--- a/README.md +++++ b/README.md ++@@ -152,21 +152,20 @@ pthread_setaffinity_np(pthread_self(), sizeof(cpu_set_t), &cpu_set); ++ - [x] Using `ro.chipname`, `ro.board.platform`, `ro.product.board`, `ro.mediatek.platform`, `ro.arch` properties (Android) ++ - [ ] Using kernel log (`dmesg`) on ARM Linux ++ - Vendor and microarchitecture detection ++- - [x] Intel-designed x86/x86-64 cores (up to Kaby Lake, Airmont, and Knights Mill) ++- - [x] AMD-designed x86/x86-64 cores (up to Puma/Jaguar and Zen) +++ - [x] Intel-designed x86/x86-64 cores (up to Sunny Cove, Goldmont Plus, and Knights Mill) +++ - [x] AMD-designed x86/x86-64 cores (up to Puma/Jaguar and Zen 2) ++ - [ ] VIA-designed x86/x86-64 cores ++ - [ ] Other x86 cores (DM&P, RDC, Transmeta, Cyrix, Rise) ++- - [x] ARM-designed ARM cores (up to Cortex-A55 and Cortex-A75) ++- - [x] Qualcomm-designed ARM cores (up to Kryo, Kryo-280, and Kryo-385) ++- - [x] Nvidia-designed ARM cores (Denver) +++ - [x] ARM-designed ARM cores (up to Cortex-A55, Cortex-A77, and Neoverse E1/N1) +++ - [x] Qualcomm-designed ARM cores (Scorpion, Krait, and Kryo) +++ - [x] Nvidia-designed ARM cores (Denver and Carmel) ++ - [x] Samsung-designed ARM cores (Exynos) ++ - [x] Intel-designed ARM cores (XScale up to 3rd-gen) ++- - [x] Apple-designed ARM cores (up to Hurricane) +++ - [x] Apple-designed ARM cores (up to Lightning and Thunder) ++ - [x] Cavium-designed ARM cores (ThunderX) ++ - [x] AppliedMicro-designed ARM cores (X-Gene) ++ - Instruction set detection ++ - [x] Using CPUID (x86/x86-64) ++- - [x] Using dynamic code generation validator (Native Client/x86-64) ++ - [x] Using `/proc/cpuinfo` on 32-bit ARM EABI (Linux) ++ - [x] Using microarchitecture heuristics on (32-bit ARM) ++ - [x] Using `FPSID` and `WCID` registers (32-bit ARM) ++diff --git a/bench/get-current.cc b/bench/get-current.cc ++index 91b35a0..b547df0 100644 ++--- a/bench/get-current.cc +++++ b/bench/get-current.cc ++@@ -21,4 +21,13 @@ static void cpuinfo_get_current_core(benchmark::State& state) { ++ } ++ BENCHMARK(cpuinfo_get_current_core)->Unit(benchmark::kNanosecond); ++ +++static void cpuinfo_get_current_uarch_index(benchmark::State& state) { +++ cpuinfo_initialize(); +++ while (state.KeepRunning()) { +++ const uint32_t uarch_index = cpuinfo_get_current_uarch_index(); +++ benchmark::DoNotOptimize(uarch_index); +++ } +++} +++BENCHMARK(cpuinfo_get_current_uarch_index)->Unit(benchmark::kNanosecond); +++ ++ BENCHMARK_MAIN(); ++diff --git a/cmake/DownloadGoogleTest.cmake b/cmake/DownloadGoogleTest.cmake ++index d69d19a..dc86c9c 100644 ++--- a/cmake/DownloadGoogleTest.cmake +++++ b/cmake/DownloadGoogleTest.cmake ++@@ -4,8 +4,8 @@ PROJECT(googletest-download NONE) ++ ++ INCLUDE(ExternalProject) ++ ExternalProject_Add(googletest ++- URL https://github.com/google/googletest/archive/release-1.8.0.zip ++- URL_HASH SHA256=f3ed3b58511efd272eb074a3a6d6fb79d7c2e6a0e374323d1e6bcbcc1ef141bf +++ URL https://github.com/google/googletest/archive/release-1.10.0.zip +++ URL_HASH SHA256=94c634d499558a76fa649edb13721dce6e98fb1e7018dfaeba3cd7a083945e91 ++ SOURCE_DIR "${CONFU_DEPENDENCIES_SOURCE_DIR}/googletest" ++ BINARY_DIR "${CONFU_DEPENDENCIES_BINARY_DIR}/googletest" ++ CONFIGURE_COMMAND "" ++diff --git a/configure.py b/configure.py ++index a340c4c..0e58dba 100755 ++--- a/configure.py +++++ b/configure.py ++@@ -26,8 +26,8 @@ def main(args): ++ sources = ["init.c", "api.c"] ++ if build.target.is_x86 or build.target.is_x86_64: ++ sources += [ ++- "x86/init.c", "x86/info.c", "x86/vendor.c", "x86/uarch.c", "x86/name.c", ++- "x86/topology.c", +++ "x86/init.c", "x86/info.c", "x86/isa.c", "x86/vendor.c", +++ "x86/uarch.c", "x86/name.c", "x86/topology.c", ++ "x86/cache/init.c", "x86/cache/descriptor.c", "x86/cache/deterministic.c", ++ ] ++ if build.target.is_macos: ++@@ -37,7 +37,6 @@ def main(args): ++ "x86/linux/init.c", ++ "x86/linux/cpuinfo.c", ++ ] ++- sources.append("x86/isa.c" if not build.target.is_nacl else "x86/nacl/isa.c") ++ if build.target.is_arm or build.target.is_arm64: ++ sources += ["arm/uarch.c", "arm/cache.c"] ++ if build.target.is_linux or build.target.is_android: ++diff --git a/include/cpuinfo.h b/include/cpuinfo.h ++index 9938d2b..e4d2d0c 100644 ++--- a/include/cpuinfo.h +++++ b/include/cpuinfo.h ++@@ -34,10 +34,6 @@ ++ #define CPUINFO_ARCH_PPC64 1 ++ #endif ++ ++-#if defined(__pnacl__) ++- #define CPUINFO_ARCH_PNACL 1 ++-#endif ++- ++ #if defined(__asmjs__) ++ #define CPUINFO_ARCH_ASMJS 1 ++ #endif ++@@ -80,10 +76,6 @@ ++ #define CPUINFO_ARCH_PPC64 0 ++ #endif ++ ++-#ifndef CPUINFO_ARCH_PNACL ++- #define CPUINFO_ARCH_PNACL 0 ++-#endif ++- ++ #ifndef CPUINFO_ARCH_ASMJS ++ #define CPUINFO_ARCH_ASMJS 0 ++ #endif ++@@ -190,6 +182,12 @@ enum cpuinfo_vendor { ++ * Processors are designed by HiSilicon, a subsidiary of Huawei. ++ */ ++ cpuinfo_vendor_huawei = 15, +++ /** +++ * Hygon (Chengdu Haiguang Integrated Circuit Design Co., Ltd), Vendor of x86-64 processor microarchitectures. +++ * +++ * Processors are variants of AMD cores. +++ */ +++ cpuinfo_vendor_hygon = 16, ++ ++ /* Active vendors of embedded CPUs */ ++ ++@@ -401,6 +399,8 @@ enum cpuinfo_uarch { ++ cpuinfo_uarch_cortex_a35 = 0x00300335, ++ /** ARM Cortex-A53. */ ++ cpuinfo_uarch_cortex_a53 = 0x00300353, +++ /** ARM Cortex-A55 revision 0 (restricted dual-issue capabilities compared to revision 1+). */ +++ cpuinfo_uarch_cortex_a55r0 = 0x00300354, ++ /** ARM Cortex-A55. */ ++ cpuinfo_uarch_cortex_a55 = 0x00300355, ++ /** ARM Cortex-A57. */ ++@@ -478,6 +478,10 @@ enum cpuinfo_uarch { ++ cpuinfo_uarch_vortex = 0x00700107, ++ /** Apple A12 processor (little cores). */ ++ cpuinfo_uarch_tempest = 0x00700108, +++ /** Apple A13 processor (big cores). */ +++ cpuinfo_uarch_lightning = 0x00700109, +++ /** Apple A13 processor (little cores). */ +++ cpuinfo_uarch_thunder = 0x0070010A, ++ ++ /** Cavium ThunderX. */ ++ cpuinfo_uarch_thunderx = 0x00800100, ++@@ -494,6 +498,9 @@ enum cpuinfo_uarch { ++ ++ /** Applied Micro X-Gene. */ ++ cpuinfo_uarch_xgene = 0x00B00100, +++ +++ /* Hygon Dhyana (a modification of AMD Zen for Chinese market). */ +++ cpuinfo_uarch_dhyana = 0x01000100, ++ }; ++ ++ struct cpuinfo_processor { ++@@ -613,6 +620,22 @@ struct cpuinfo_package { ++ uint32_t cluster_count; ++ }; ++ +++struct cpuinfo_uarch_info { +++ /** Type of CPU microarchitecture */ +++ enum cpuinfo_uarch uarch; +++#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 +++ /** Value of CPUID leaf 1 EAX register for the microarchitecture */ +++ uint32_t cpuid; +++#elif CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64 +++ /** Value of Main ID Register (MIDR) for the microarchitecture */ +++ uint32_t midr; +++#endif +++ /** Number of logical processors with the microarchitecture */ +++ uint32_t processor_count; +++ /** Number of cores with the microarchitecture */ +++ uint32_t core_count; +++}; +++ ++ #ifdef __cplusplus ++ extern "C" { ++ #endif ++@@ -1721,6 +1744,7 @@ const struct cpuinfo_processor* CPUINFO_ABI cpuinfo_get_processors(void); ++ const struct cpuinfo_core* CPUINFO_ABI cpuinfo_get_cores(void); ++ const struct cpuinfo_cluster* CPUINFO_ABI cpuinfo_get_clusters(void); ++ const struct cpuinfo_package* CPUINFO_ABI cpuinfo_get_packages(void); +++const struct cpuinfo_uarch_info* CPUINFO_ABI cpuinfo_get_uarchs(void); ++ const struct cpuinfo_cache* CPUINFO_ABI cpuinfo_get_l1i_caches(void); ++ const struct cpuinfo_cache* CPUINFO_ABI cpuinfo_get_l1d_caches(void); ++ const struct cpuinfo_cache* CPUINFO_ABI cpuinfo_get_l2_caches(void); ++@@ -1731,6 +1755,7 @@ const struct cpuinfo_processor* CPUINFO_ABI cpuinfo_get_processor(uint32_t index ++ const struct cpuinfo_core* CPUINFO_ABI cpuinfo_get_core(uint32_t index); ++ const struct cpuinfo_cluster* CPUINFO_ABI cpuinfo_get_cluster(uint32_t index); ++ const struct cpuinfo_package* CPUINFO_ABI cpuinfo_get_package(uint32_t index); +++const struct cpuinfo_uarch_info* CPUINFO_ABI cpuinfo_get_uarch(uint32_t index); ++ const struct cpuinfo_cache* CPUINFO_ABI cpuinfo_get_l1i_cache(uint32_t index); ++ const struct cpuinfo_cache* CPUINFO_ABI cpuinfo_get_l1d_cache(uint32_t index); ++ const struct cpuinfo_cache* CPUINFO_ABI cpuinfo_get_l2_cache(uint32_t index); ++@@ -1741,6 +1766,7 @@ uint32_t CPUINFO_ABI cpuinfo_get_processors_count(void); ++ uint32_t CPUINFO_ABI cpuinfo_get_cores_count(void); ++ uint32_t CPUINFO_ABI cpuinfo_get_clusters_count(void); ++ uint32_t CPUINFO_ABI cpuinfo_get_packages_count(void); +++uint32_t CPUINFO_ABI cpuinfo_get_uarchs_count(void); ++ uint32_t CPUINFO_ABI cpuinfo_get_l1i_caches_count(void); ++ uint32_t CPUINFO_ABI cpuinfo_get_l1d_caches_count(void); ++ uint32_t CPUINFO_ABI cpuinfo_get_l2_caches_count(void); ++@@ -1752,9 +1778,31 @@ uint32_t CPUINFO_ABI cpuinfo_get_l4_caches_count(void); ++ */ ++ uint32_t CPUINFO_ABI cpuinfo_get_max_cache_size(void); ++ +++/** +++ * Identify the logical processor that executes the current thread. +++ * +++ * There is no guarantee that the thread will stay on the same logical processor for any time. +++ * Callers should treat the result as only a hint, and be prepared to handle NULL return value. +++ */ ++ const struct cpuinfo_processor* CPUINFO_ABI cpuinfo_get_current_processor(void); +++ +++/** +++ * Identify the core that executes the current thread. +++ * +++ * There is no guarantee that the thread will stay on the same core for any time. +++ * Callers should treat the result as only a hint, and be prepared to handle NULL return value. +++ */ ++ const struct cpuinfo_core* CPUINFO_ABI cpuinfo_get_current_core(void); ++ +++/** +++ * Identify the microarchitecture index of the core that executes the current thread. +++ * If the system does not support such identification, the function return 0. +++ * +++ * There is no guarantee that the thread will stay on the same type of core for any time. +++ * Callers should treat the result as only a hint. +++ */ +++uint32_t CPUINFO_ABI cpuinfo_get_current_uarch_index(void); +++ ++ #ifdef __cplusplus ++ } /* extern "C" */ ++ #endif ++diff --git a/src/api.c b/src/api.c ++index b180d80..0cc5d4e 100644 ++--- a/src/api.c +++++ b/src/api.c ++@@ -1,9 +1,16 @@ +++#include ++ #include ++ ++ #include ++ #include ++ #include ++ +++#ifdef __linux__ +++ #include +++ +++ #include +++ #include +++#endif ++ ++ bool cpuinfo_is_initialized = false; ++ ++@@ -20,235 +27,347 @@ uint32_t cpuinfo_packages_count = 0; ++ uint32_t cpuinfo_cache_count[cpuinfo_cache_level_max] = { 0 }; ++ uint32_t cpuinfo_max_cache_size = 0; ++ +++#if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64 +++ struct cpuinfo_uarch_info* cpuinfo_uarchs = NULL; +++ uint32_t cpuinfo_uarchs_count = 0; +++#else +++ struct cpuinfo_uarch_info cpuinfo_global_uarch = { cpuinfo_uarch_unknown }; +++#endif +++ +++#ifdef __linux__ +++ uint32_t cpuinfo_linux_cpu_max = 0; +++ const struct cpuinfo_processor** cpuinfo_linux_cpu_to_processor_map = NULL; +++ const struct cpuinfo_core** cpuinfo_linux_cpu_to_core_map = NULL; +++ #if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64 +++ const uint32_t* cpuinfo_linux_cpu_to_uarch_index_map = NULL; +++ #endif +++#endif +++ ++ ++ const struct cpuinfo_processor* cpuinfo_get_processors(void) { ++- if (!cpuinfo_is_initialized) { +++ if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) { ++ cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "processors"); ++ } ++ return cpuinfo_processors; ++ } ++ ++ const struct cpuinfo_core* cpuinfo_get_cores(void) { ++- if (!cpuinfo_is_initialized) { +++ if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) { ++ cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "core"); ++ } ++ return cpuinfo_cores; ++ } ++ ++ const struct cpuinfo_cluster* cpuinfo_get_clusters(void) { ++- if (!cpuinfo_is_initialized) { +++ if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) { ++ cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "clusters"); ++ } ++ return cpuinfo_clusters; ++ } ++ ++ const struct cpuinfo_package* cpuinfo_get_packages(void) { ++- if (!cpuinfo_is_initialized) { +++ if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) { ++ cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "packages"); ++ } ++ return cpuinfo_packages; ++ } ++ ++-const struct cpuinfo_processor* cpuinfo_get_processor(uint32_t index) { +++const struct cpuinfo_uarch_info* cpuinfo_get_uarchs() { ++ if (!cpuinfo_is_initialized) { +++ cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "uarchs"); +++ } +++ #if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64 +++ return cpuinfo_uarchs; +++ #else +++ return &cpuinfo_global_uarch; +++ #endif +++} +++ +++const struct cpuinfo_processor* cpuinfo_get_processor(uint32_t index) { +++ if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) { ++ cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "processor"); ++ } ++- if (index < cpuinfo_processors_count) { ++- return cpuinfo_processors + index; ++- } else { +++ if CPUINFO_UNLIKELY(index >= cpuinfo_processors_count) { ++ return NULL; ++ } +++ return &cpuinfo_processors[index]; ++ } ++ ++ const struct cpuinfo_core* cpuinfo_get_core(uint32_t index) { ++- if (!cpuinfo_is_initialized) { +++ if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) { ++ cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "core"); ++ } ++- if (index < cpuinfo_cores_count) { ++- return cpuinfo_cores + index; ++- } else { +++ if CPUINFO_UNLIKELY(index >= cpuinfo_cores_count) { ++ return NULL; ++ } +++ return &cpuinfo_cores[index]; ++ } ++ ++ const struct cpuinfo_cluster* cpuinfo_get_cluster(uint32_t index) { ++- if (!cpuinfo_is_initialized) { +++ if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) { ++ cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "cluster"); ++ } ++- if (index < cpuinfo_clusters_count) { ++- return cpuinfo_clusters + index; ++- } else { +++ if CPUINFO_UNLIKELY(index >= cpuinfo_clusters_count) { ++ return NULL; ++ } +++ return &cpuinfo_clusters[index]; ++ } ++ ++ const struct cpuinfo_package* cpuinfo_get_package(uint32_t index) { ++- if (!cpuinfo_is_initialized) { +++ if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) { ++ cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "package"); ++ } ++- if (index < cpuinfo_packages_count) { ++- return cpuinfo_packages + index; ++- } else { +++ if CPUINFO_UNLIKELY(index >= cpuinfo_packages_count) { ++ return NULL; ++ } +++ return &cpuinfo_packages[index]; ++ } ++ ++-uint32_t cpuinfo_get_processors_count(void) { +++const struct cpuinfo_uarch_info* cpuinfo_get_uarch(uint32_t index) { ++ if (!cpuinfo_is_initialized) { +++ cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "uarch"); +++ } +++ #if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64 +++ if CPUINFO_UNLIKELY(index >= cpuinfo_uarchs_count) { +++ return NULL; +++ } +++ return &cpuinfo_uarchs[index]; +++ #else +++ if CPUINFO_UNLIKELY(index != 0) { +++ return NULL; +++ } +++ return &cpuinfo_global_uarch; +++ #endif +++} +++ +++uint32_t cpuinfo_get_processors_count(void) { +++ if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) { ++ cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "processors_count"); ++ } ++ return cpuinfo_processors_count; ++ } ++ ++ uint32_t cpuinfo_get_cores_count(void) { ++- if (!cpuinfo_is_initialized) { +++ if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) { ++ cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "cores_count"); ++ } ++ return cpuinfo_cores_count; ++ } ++ ++ uint32_t cpuinfo_get_clusters_count(void) { ++- if (!cpuinfo_is_initialized) { +++ if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) { ++ cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "clusters_count"); ++ } ++ return cpuinfo_clusters_count; ++ } ++ ++ uint32_t cpuinfo_get_packages_count(void) { ++- if (!cpuinfo_is_initialized) { +++ if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) { ++ cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "packages_count"); ++ } ++ return cpuinfo_packages_count; ++ } ++ ++-const struct cpuinfo_cache* CPUINFO_ABI cpuinfo_get_l1i_caches(void) { +++uint32_t cpuinfo_get_uarchs_count(void) { ++ if (!cpuinfo_is_initialized) { +++ cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "uarchs_count"); +++ } +++ #if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64 +++ return cpuinfo_uarchs_count; +++ #else +++ return 1; +++ #endif +++} +++ +++const struct cpuinfo_cache* CPUINFO_ABI cpuinfo_get_l1i_caches(void) { +++ if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) { ++ cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "l1i_caches"); ++ } ++ return cpuinfo_cache[cpuinfo_cache_level_1i]; ++ } ++ ++ const struct cpuinfo_cache* CPUINFO_ABI cpuinfo_get_l1d_caches(void) { ++- if (!cpuinfo_is_initialized) { +++ if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) { ++ cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "l1d_caches"); ++ } ++ return cpuinfo_cache[cpuinfo_cache_level_1d]; ++ } ++ ++ const struct cpuinfo_cache* CPUINFO_ABI cpuinfo_get_l2_caches(void) { ++- if (!cpuinfo_is_initialized) { +++ if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) { ++ cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "l2_caches"); ++ } ++ return cpuinfo_cache[cpuinfo_cache_level_2]; ++ } ++ ++ const struct cpuinfo_cache* CPUINFO_ABI cpuinfo_get_l3_caches(void) { ++- if (!cpuinfo_is_initialized) { +++ if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) { ++ cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "l3_caches"); ++ } ++ return cpuinfo_cache[cpuinfo_cache_level_3]; ++ } ++ ++ const struct cpuinfo_cache* CPUINFO_ABI cpuinfo_get_l4_caches(void) { ++- if (!cpuinfo_is_initialized) { +++ if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) { ++ cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "l4_caches"); ++ } ++ return cpuinfo_cache[cpuinfo_cache_level_4]; ++ } ++ ++ const struct cpuinfo_cache* CPUINFO_ABI cpuinfo_get_l1i_cache(uint32_t index) { ++- if (!cpuinfo_is_initialized) { +++ if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) { ++ cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "l1i_cache"); ++ } ++- if (index < cpuinfo_cache_count[cpuinfo_cache_level_1i]) { ++- return cpuinfo_cache[cpuinfo_cache_level_1i] + index; ++- } else { +++ if CPUINFO_UNLIKELY(index >= cpuinfo_cache_count[cpuinfo_cache_level_1i]) { ++ return NULL; ++ } +++ return &cpuinfo_cache[cpuinfo_cache_level_1i][index]; ++ } ++ ++ const struct cpuinfo_cache* CPUINFO_ABI cpuinfo_get_l1d_cache(uint32_t index) { ++- if (!cpuinfo_is_initialized) { +++ if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) { ++ cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "l1d_cache"); ++ } ++- if (index < cpuinfo_cache_count[cpuinfo_cache_level_1d]) { ++- return cpuinfo_cache[cpuinfo_cache_level_1d] + index; ++- } else { +++ if CPUINFO_UNLIKELY(index >= cpuinfo_cache_count[cpuinfo_cache_level_1d]) { ++ return NULL; ++ } +++ return &cpuinfo_cache[cpuinfo_cache_level_1d][index]; ++ } ++ ++ const struct cpuinfo_cache* CPUINFO_ABI cpuinfo_get_l2_cache(uint32_t index) { ++- if (!cpuinfo_is_initialized) { +++ if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) { ++ cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "l2_cache"); ++ } ++- if (index < cpuinfo_cache_count[cpuinfo_cache_level_2]) { ++- return cpuinfo_cache[cpuinfo_cache_level_2] + index; ++- } else { +++ if CPUINFO_UNLIKELY(index >= cpuinfo_cache_count[cpuinfo_cache_level_2]) { ++ return NULL; ++ } +++ return &cpuinfo_cache[cpuinfo_cache_level_2][index]; ++ } ++ ++ const struct cpuinfo_cache* CPUINFO_ABI cpuinfo_get_l3_cache(uint32_t index) { ++- if (!cpuinfo_is_initialized) { +++ if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) { ++ cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "l3_cache"); ++ } ++- if (index < cpuinfo_cache_count[cpuinfo_cache_level_3]) { ++- return cpuinfo_cache[cpuinfo_cache_level_3] + index; ++- } else { +++ if CPUINFO_UNLIKELY(index >= cpuinfo_cache_count[cpuinfo_cache_level_3]) { ++ return NULL; ++ } +++ return &cpuinfo_cache[cpuinfo_cache_level_3][index]; ++ } ++ ++ const struct cpuinfo_cache* CPUINFO_ABI cpuinfo_get_l4_cache(uint32_t index) { ++- if (!cpuinfo_is_initialized) { +++ if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) { ++ cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "l4_cache"); ++ } ++- if (index < cpuinfo_cache_count[cpuinfo_cache_level_4]) { ++- return cpuinfo_cache[cpuinfo_cache_level_4] + index; ++- } else { +++ if CPUINFO_UNLIKELY(index >= cpuinfo_cache_count[cpuinfo_cache_level_4]) { ++ return NULL; ++ } +++ return &cpuinfo_cache[cpuinfo_cache_level_4][index]; ++ } ++ ++ uint32_t CPUINFO_ABI cpuinfo_get_l1i_caches_count(void) { ++- if (!cpuinfo_is_initialized) { +++ if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) { ++ cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "l1i_caches_count"); ++ } ++ return cpuinfo_cache_count[cpuinfo_cache_level_1i]; ++ } ++ ++ uint32_t CPUINFO_ABI cpuinfo_get_l1d_caches_count(void) { ++- if (!cpuinfo_is_initialized) { +++ if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) { ++ cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "l1d_caches_count"); ++ } ++ return cpuinfo_cache_count[cpuinfo_cache_level_1d]; ++ } ++ ++ uint32_t CPUINFO_ABI cpuinfo_get_l2_caches_count(void) { ++- if (!cpuinfo_is_initialized) { +++ if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) { ++ cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "l2_caches_count"); ++ } ++ return cpuinfo_cache_count[cpuinfo_cache_level_2]; ++ } ++ ++ uint32_t CPUINFO_ABI cpuinfo_get_l3_caches_count(void) { ++- if (!cpuinfo_is_initialized) { +++ if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) { ++ cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "l3_caches_count"); ++ } ++ return cpuinfo_cache_count[cpuinfo_cache_level_3]; ++ } ++ ++ uint32_t CPUINFO_ABI cpuinfo_get_l4_caches_count(void) { ++- if (!cpuinfo_is_initialized) { +++ if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) { ++ cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "l4_caches_count"); ++ } ++ return cpuinfo_cache_count[cpuinfo_cache_level_4]; ++ } ++ ++ uint32_t CPUINFO_ABI cpuinfo_get_max_cache_size(void) { ++- if (!cpuinfo_is_initialized) { +++ if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) { ++ cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "max_cache_size"); ++ } ++ return cpuinfo_max_cache_size; ++ } +++ +++const struct cpuinfo_processor* CPUINFO_ABI cpuinfo_get_current_processor(void) { +++ if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) { +++ cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "current_processor"); +++ } +++ #ifdef __linux__ +++ unsigned cpu; +++ if CPUINFO_UNLIKELY(syscall(__NR_getcpu, &cpu, NULL, NULL) != 0) { +++ return 0; +++ } +++ if CPUINFO_UNLIKELY((uint32_t) cpu >= cpuinfo_linux_cpu_max) { +++ return 0; +++ } +++ return cpuinfo_linux_cpu_to_processor_map[cpu]; +++ #else +++ return NULL; +++ #endif +++} +++ +++const struct cpuinfo_core* CPUINFO_ABI cpuinfo_get_current_core(void) { +++ if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) { +++ cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "current_core"); +++ } +++ #ifdef __linux__ +++ unsigned cpu; +++ if CPUINFO_UNLIKELY(syscall(__NR_getcpu, &cpu, NULL, NULL) != 0) { +++ return 0; +++ } +++ if CPUINFO_UNLIKELY((uint32_t) cpu >= cpuinfo_linux_cpu_max) { +++ return 0; +++ } +++ return cpuinfo_linux_cpu_to_core_map[cpu]; +++ #else +++ return NULL; +++ #endif +++} +++ +++uint32_t CPUINFO_ABI cpuinfo_get_current_uarch_index(void) { +++ if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) { +++ cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "current_uarch_index"); +++ } +++ #if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64 +++ #ifdef __linux__ +++ if (cpuinfo_linux_cpu_to_uarch_index_map == NULL) { +++ /* Special case: avoid syscall on systems with only a single type of cores */ +++ return 0; +++ } +++ +++ /* General case */ +++ unsigned cpu; +++ if CPUINFO_UNLIKELY(syscall(__NR_getcpu, &cpu, NULL, NULL) != 0) { +++ return 0; +++ } +++ if CPUINFO_UNLIKELY((uint32_t) cpu >= cpuinfo_linux_cpu_max) { +++ return 0; +++ } +++ return cpuinfo_linux_cpu_to_uarch_index_map[cpu]; +++ #else +++ /* Fallback: pretend to be on the big core. */ +++ return 0; +++ #endif +++ #else +++ /* Only ARM/ARM64 processors may include cores of different types in the same package. */ +++ return 0; +++ #endif +++} ++diff --git a/src/arm/cache.c b/src/arm/cache.c ++index ccadeb4..c2bc7d2 100644 ++--- a/src/arm/cache.c +++++ b/src/arm/cache.c ++@@ -659,6 +659,7 @@ void cpuinfo_arm_decode_cache( ++ }; ++ } ++ break; +++ case cpuinfo_uarch_cortex_a55r0: ++ case cpuinfo_uarch_cortex_a55: ++ /* ++ * ARM Cortex-A55 Core Technical Reference Manual ++diff --git a/src/arm/linux/api.h b/src/arm/linux/api.h ++index 275d072..f99da66 100644 ++--- a/src/arm/linux/api.h +++++ b/src/arm/linux/api.h ++@@ -153,6 +153,7 @@ struct cpuinfo_arm_linux_processor { ++ uint32_t midr; ++ enum cpuinfo_vendor vendor; ++ enum cpuinfo_uarch uarch; +++ uint32_t uarch_index; ++ /** ++ * ID of the physical package which includes this logical processor. ++ * The value is parsed from /sys/devices/system/cpu/cpu/topology/physical_package_id ++@@ -346,3 +347,6 @@ CPUINFO_INTERNAL uint32_t cpuinfo_arm_linux_detect_cluster_midr( ++ uint32_t max_processors, ++ uint32_t usable_processors, ++ struct cpuinfo_arm_linux_processor processors[restrict static max_processors]); +++ +++extern CPUINFO_INTERNAL const uint32_t* cpuinfo_linux_cpu_to_uarch_index_map; +++extern CPUINFO_INTERNAL uint32_t cpuinfo_linux_cpu_to_uarch_index_map_entries; ++diff --git a/src/arm/linux/init.c b/src/arm/linux/init.c ++index f0c432c..6272abf 100644 ++--- a/src/arm/linux/init.c +++++ b/src/arm/linux/init.c ++@@ -106,12 +106,14 @@ void cpuinfo_arm_linux_init(void) { ++ struct cpuinfo_processor* processors = NULL; ++ struct cpuinfo_core* cores = NULL; ++ struct cpuinfo_cluster* clusters = NULL; ++- const struct cpuinfo_processor** linux_cpu_to_processor_map = NULL; ++- const struct cpuinfo_core** linux_cpu_to_core_map = NULL; +++ struct cpuinfo_uarch_info* uarchs = NULL; ++ struct cpuinfo_cache* l1i = NULL; ++ struct cpuinfo_cache* l1d = NULL; ++ struct cpuinfo_cache* l2 = NULL; ++ struct cpuinfo_cache* l3 = NULL; +++ const struct cpuinfo_processor** linux_cpu_to_processor_map = NULL; +++ const struct cpuinfo_core** linux_cpu_to_core_map = NULL; +++ uint32_t* linux_cpu_to_uarch_index_map = NULL; ++ ++ const uint32_t max_processors_count = cpuinfo_linux_get_max_processors_count(); ++ cpuinfo_log_debug("system maximum processors count: %"PRIu32, max_processors_count); ++@@ -400,6 +402,18 @@ void cpuinfo_arm_linux_init(void) { ++ } ++ } ++ +++ uint32_t uarchs_count = 0; +++ enum cpuinfo_uarch last_uarch; +++ for (uint32_t i = 0; i < arm_linux_processors_count; i++) { +++ if (bitmask_all(arm_linux_processors[i].flags, CPUINFO_LINUX_FLAG_VALID)) { +++ if (uarchs_count == 0 || arm_linux_processors[i].uarch != last_uarch) { +++ last_uarch = arm_linux_processors[i].uarch; +++ uarchs_count += 1; +++ } +++ arm_linux_processors[i].uarch_index = uarchs_count - 1; +++ } +++ } +++ ++ /* ++ * Assumptions: ++ * - No SMP (i.e. each core supports only one hardware thread). ++@@ -432,6 +446,13 @@ void cpuinfo_arm_linux_init(void) { ++ goto cleanup; ++ } ++ +++ uarchs = calloc(uarchs_count, sizeof(struct cpuinfo_uarch_info)); +++ if (uarchs == NULL) { +++ cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" microarchitectures", +++ uarchs_count * sizeof(struct cpuinfo_uarch_info), uarchs_count); +++ goto cleanup; +++ } +++ ++ linux_cpu_to_processor_map = calloc(arm_linux_processors_count, sizeof(struct cpuinfo_processor*)); ++ if (linux_cpu_to_processor_map == NULL) { ++ cpuinfo_log_error("failed to allocate %zu bytes for %"PRIu32" logical processor mapping entries", ++@@ -446,6 +467,15 @@ void cpuinfo_arm_linux_init(void) { ++ goto cleanup; ++ } ++ +++ if (uarchs_count > 1) { +++ linux_cpu_to_uarch_index_map = calloc(arm_linux_processors_count, sizeof(uint32_t)); +++ if (linux_cpu_to_uarch_index_map == NULL) { +++ cpuinfo_log_error("failed to allocate %zu bytes for %"PRIu32" uarch index mapping entries", +++ arm_linux_processors_count * sizeof(uint32_t), arm_linux_processors_count); +++ goto cleanup; +++ } +++ } +++ ++ l1i = calloc(valid_processors, sizeof(struct cpuinfo_cache)); ++ if (l1i == NULL) { ++ cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" L1I caches", ++@@ -460,6 +490,22 @@ void cpuinfo_arm_linux_init(void) { ++ goto cleanup; ++ } ++ +++ uint32_t uarchs_index = 0; +++ for (uint32_t i = 0; i < arm_linux_processors_count; i++) { +++ if (bitmask_all(arm_linux_processors[i].flags, CPUINFO_LINUX_FLAG_VALID)) { +++ if (uarchs_index == 0 || arm_linux_processors[i].uarch != last_uarch) { +++ last_uarch = arm_linux_processors[i].uarch; +++ uarchs[uarchs_index] = (struct cpuinfo_uarch_info) { +++ .uarch = arm_linux_processors[i].uarch, +++ .midr = arm_linux_processors[i].midr, +++ }; +++ uarchs_index += 1; +++ } +++ uarchs[uarchs_index - 1].processor_count += 1; +++ uarchs[uarchs_index - 1].core_count += 1; +++ } +++ } +++ ++ uint32_t l2_count = 0, l3_count = 0, big_l3_size = 0, cluster_id = UINT32_MAX; ++ /* Indication whether L3 (if it exists) is shared between all cores */ ++ bool shared_l3 = true; ++@@ -499,6 +545,11 @@ void cpuinfo_arm_linux_init(void) { ++ cores[i].midr = arm_linux_processors[i].midr; ++ linux_cpu_to_core_map[arm_linux_processors[i].system_processor_id] = &cores[i]; ++ +++ if (linux_cpu_to_uarch_index_map != NULL) { +++ linux_cpu_to_uarch_index_map[arm_linux_processors[i].system_processor_id] = +++ arm_linux_processors[i].uarch_index; +++ } +++ ++ struct cpuinfo_cache temp_l2 = { 0 }, temp_l3 = { 0 }; ++ cpuinfo_arm_decode_cache( ++ arm_linux_processors[i].uarch, ++@@ -658,12 +709,11 @@ void cpuinfo_arm_linux_init(void) { ++ } ++ ++ /* Commit */ ++- cpuinfo_linux_cpu_to_processor_map = linux_cpu_to_processor_map; ++- cpuinfo_linux_cpu_to_core_map = linux_cpu_to_core_map; ++ cpuinfo_processors = processors; ++ cpuinfo_cores = cores; ++ cpuinfo_clusters = clusters; ++ cpuinfo_packages = &package; +++ cpuinfo_uarchs = uarchs; ++ cpuinfo_cache[cpuinfo_cache_level_1i] = l1i; ++ cpuinfo_cache[cpuinfo_cache_level_1d] = l1d; ++ cpuinfo_cache[cpuinfo_cache_level_2] = l2; ++@@ -673,33 +723,42 @@ void cpuinfo_arm_linux_init(void) { ++ cpuinfo_cores_count = valid_processors; ++ cpuinfo_clusters_count = cluster_count; ++ cpuinfo_packages_count = 1; +++ cpuinfo_uarchs_count = uarchs_count; ++ cpuinfo_cache_count[cpuinfo_cache_level_1i] = valid_processors; ++ cpuinfo_cache_count[cpuinfo_cache_level_1d] = valid_processors; ++ cpuinfo_cache_count[cpuinfo_cache_level_2] = l2_count; ++ cpuinfo_cache_count[cpuinfo_cache_level_3] = l3_count; ++- ++ cpuinfo_max_cache_size = cpuinfo_arm_compute_max_cache_size(&processors[0]); ++ +++ cpuinfo_linux_cpu_max = arm_linux_processors_count; +++ cpuinfo_linux_cpu_to_processor_map = linux_cpu_to_processor_map; +++ cpuinfo_linux_cpu_to_core_map = linux_cpu_to_core_map; +++ cpuinfo_linux_cpu_to_uarch_index_map = linux_cpu_to_uarch_index_map; +++ ++ __sync_synchronize(); ++ ++ cpuinfo_is_initialized = true; ++ ++- linux_cpu_to_processor_map = NULL; ++- linux_cpu_to_core_map = NULL; ++ processors = NULL; ++ cores = NULL; ++ clusters = NULL; +++ uarchs = NULL; ++ l1i = l1d = l2 = l3 = NULL; +++ linux_cpu_to_processor_map = NULL; +++ linux_cpu_to_core_map = NULL; +++ linux_cpu_to_uarch_index_map = NULL; ++ ++ cleanup: ++ free(arm_linux_processors); ++- free(linux_cpu_to_processor_map); ++- free(linux_cpu_to_core_map); ++ free(processors); ++ free(cores); ++ free(clusters); +++ free(uarchs); ++ free(l1i); ++ free(l1d); ++ free(l2); ++ free(l3); +++ free(linux_cpu_to_processor_map); +++ free(linux_cpu_to_core_map); +++ free(linux_cpu_to_uarch_index_map); ++ } ++diff --git a/src/arm/mach/init.c b/src/arm/mach/init.c ++index e64cc18..bd27259 100644 ++--- a/src/arm/mach/init.c +++++ b/src/arm/mach/init.c ++@@ -14,6 +14,16 @@ ++ #include ++ #include ++ +++/* Polyfill recent CPUFAMILY_ARM_* values for older SDKs */ +++#ifndef CPUFAMILY_ARM_MONSOON_MISTRAL +++ #define CPUFAMILY_ARM_MONSOON_MISTRAL 0xE81E7EF6 +++#endif +++#ifndef CPUFAMILY_ARM_VORTEX_TEMPEST +++ #define CPUFAMILY_ARM_VORTEX_TEMPEST 0x07D34B9F +++#endif +++#ifndef CPUFAMILY_ARM_LIGHTNING_THUNDER +++ #define CPUFAMILY_ARM_LIGHTNING_THUNDER 0x462504D2 +++#endif ++ ++ struct cpuinfo_arm_isa cpuinfo_isa = { ++ #if CPUINFO_ARCH_ARM ++@@ -82,37 +92,34 @@ static enum cpuinfo_uarch decode_uarch(uint32_t cpu_family, uint32_t cpu_subtype ++ return cpuinfo_uarch_twister; ++ case CPUFAMILY_ARM_HURRICANE: ++ return cpuinfo_uarch_hurricane; ++-#ifdef CPUFAMILY_ARM_MONSOON_MISTRAL ++ case CPUFAMILY_ARM_MONSOON_MISTRAL: ++-#else ++- case 0xe81e7ef6: ++- /* Hard-coded value for older SDKs which do not define CPUFAMILY_ARM_MONSOON_MISTRAL */ ++-#endif ++ /* 2x Monsoon + 4x Mistral cores */ ++ return core_index < 2 ? cpuinfo_uarch_monsoon : cpuinfo_uarch_mistral; ++-#ifdef CPUFAMILY_ARM_VORTEX_TEMPEST ++ case CPUFAMILY_ARM_VORTEX_TEMPEST: ++-#else ++- case 0x07d34b9f: ++- /* Hard-coded value for older SDKs which do not define CPUFAMILY_ARM_VORTEX_TEMPEST */ ++-#endif ++ /* Hexa-core: 2x Vortex + 4x Tempest; Octa-core: 4x Cortex + 4x Tempest */ ++ return core_index + 4 < core_count ? cpuinfo_uarch_vortex : cpuinfo_uarch_tempest; +++ case CPUFAMILY_ARM_LIGHTNING_THUNDER: +++ /* Hexa-core: 2x Lightning + 4x Thunder; Octa-core (presumed): 4x Lightning + 4x Thunder */ +++ return core_index + 4 < core_count ? cpuinfo_uarch_lightning : cpuinfo_uarch_thunder; ++ default: ++ /* Use hw.cpusubtype for detection */ ++ break; ++ } ++ ++- switch (cpu_subtype) { ++- case CPU_SUBTYPE_ARM_V7: ++- return cpuinfo_uarch_cortex_a8; ++- case CPU_SUBTYPE_ARM_V7F: ++- return cpuinfo_uarch_cortex_a9; ++- case CPU_SUBTYPE_ARM_V7K: ++- return cpuinfo_uarch_cortex_a7; ++- default: ++- return cpuinfo_uarch_unknown; ++- } +++ #if CPUINFO_ARCH_ARM +++ switch (cpu_subtype) { +++ case CPU_SUBTYPE_ARM_V7: +++ return cpuinfo_uarch_cortex_a8; +++ case CPU_SUBTYPE_ARM_V7F: +++ return cpuinfo_uarch_cortex_a9; +++ case CPU_SUBTYPE_ARM_V7K: +++ return cpuinfo_uarch_cortex_a7; +++ default: +++ return cpuinfo_uarch_unknown; +++ } +++ #else +++ return cpuinfo_uarch_unknown; +++ #endif ++ } ++ ++ static void decode_package_name(char* package_name) { ++@@ -244,6 +251,7 @@ void cpuinfo_arm_mach_init(void) { ++ struct cpuinfo_core* cores = NULL; ++ struct cpuinfo_cluster* clusters = NULL; ++ struct cpuinfo_package* packages = NULL; +++ struct cpuinfo_uarch_info* uarchs = NULL; ++ struct cpuinfo_cache* l1i = NULL; ++ struct cpuinfo_cache* l1d = NULL; ++ struct cpuinfo_cache* l2 = NULL; ++@@ -330,21 +338,12 @@ void cpuinfo_arm_mach_init(void) { ++ * Thus, we whitelist CPUs known to support these instructions. ++ */ ++ switch (cpu_family) { ++-#ifdef CPUFAMILY_ARM_MONSOON_MISTRAL ++ case CPUFAMILY_ARM_MONSOON_MISTRAL: ++-#else ++- case 0xe81e7ef6: ++- /* Hard-coded value for older SDKs which do not define CPUFAMILY_ARM_MONSOON_MISTRAL */ ++-#endif ++-#ifdef CPUFAMILY_ARM_VORTEX_TEMPEST ++ case CPUFAMILY_ARM_VORTEX_TEMPEST: ++-#else ++- case 0x07d34b9f: ++- /* Hard-coded value for older SDKs which do not define CPUFAMILY_ARM_VORTEX_TEMPEST */ ++-#endif ++-#if CPUINFO_ARCH_ARM64 ++- cpuinfo_isa.atomics = true; ++-#endif +++ case CPUFAMILY_ARM_LIGHTNING_THUNDER: +++ #if CPUINFO_ARCH_ARM64 +++ cpuinfo_isa.atomics = true; +++ #endif ++ cpuinfo_isa.fp16arith = true; ++ } ++ ++@@ -379,10 +378,22 @@ void cpuinfo_arm_mach_init(void) { ++ num_clusters * sizeof(struct cpuinfo_cluster), num_clusters); ++ goto cleanup; ++ } +++ uarchs = calloc(num_clusters, sizeof(struct cpuinfo_uarch_info)); +++ if (uarchs == NULL) { +++ cpuinfo_log_error( +++ "failed to allocate %zu bytes for descriptions of %"PRIu32" uarchs", +++ num_clusters * sizeof(enum cpuinfo_uarch), num_clusters); +++ goto cleanup; +++ } ++ uint32_t cluster_idx = UINT32_MAX; ++ for (uint32_t i = 0; i < mach_topology.cores; i++) { ++ if (i == 0 || cores[i].uarch != cores[i - 1].uarch) { ++ cluster_idx++; +++ uarchs[cluster_idx] = (struct cpuinfo_uarch_info) { +++ .uarch = cores[i].uarch, +++ .processor_count = 1, +++ .core_count = 1, +++ }; ++ clusters[cluster_idx] = (struct cpuinfo_cluster) { ++ .processor_start = i * threads_per_core, ++ .processor_count = 1, ++@@ -394,6 +405,8 @@ void cpuinfo_arm_mach_init(void) { ++ .uarch = cores[i].uarch, ++ }; ++ } else { +++ uarchs[cluster_idx].processor_count++; +++ uarchs[cluster_idx].core_count++; ++ clusters[cluster_idx].processor_count++; ++ clusters[cluster_idx].core_count++; ++ } ++@@ -542,26 +555,25 @@ void cpuinfo_arm_mach_init(void) { ++ } ++ ++ /* Commit changes */ ++- cpuinfo_cache[cpuinfo_cache_level_1i] = l1i; ++- cpuinfo_cache[cpuinfo_cache_level_1d] = l1d; ++- cpuinfo_cache[cpuinfo_cache_level_2] = l2; ++- cpuinfo_cache[cpuinfo_cache_level_3] = l3; ++- ++ cpuinfo_processors = processors; ++ cpuinfo_cores = cores; ++ cpuinfo_clusters = clusters; ++ cpuinfo_packages = packages; ++- ++- cpuinfo_cache_count[cpuinfo_cache_level_1i] = l1_count; ++- cpuinfo_cache_count[cpuinfo_cache_level_1d] = l1_count; ++- cpuinfo_cache_count[cpuinfo_cache_level_2] = l2_count; ++- cpuinfo_cache_count[cpuinfo_cache_level_3] = l3_count; +++ cpuinfo_uarchs = uarchs; +++ cpuinfo_cache[cpuinfo_cache_level_1i] = l1i; +++ cpuinfo_cache[cpuinfo_cache_level_1d] = l1d; +++ cpuinfo_cache[cpuinfo_cache_level_2] = l2; +++ cpuinfo_cache[cpuinfo_cache_level_3] = l3; ++ ++ cpuinfo_processors_count = mach_topology.threads; ++ cpuinfo_cores_count = mach_topology.cores; ++ cpuinfo_clusters_count = num_clusters; ++ cpuinfo_packages_count = mach_topology.packages; ++- +++ cpuinfo_uarchs_count = num_clusters; +++ cpuinfo_cache_count[cpuinfo_cache_level_1i] = l1_count; +++ cpuinfo_cache_count[cpuinfo_cache_level_1d] = l1_count; +++ cpuinfo_cache_count[cpuinfo_cache_level_2] = l2_count; +++ cpuinfo_cache_count[cpuinfo_cache_level_3] = l3_count; ++ cpuinfo_max_cache_size = cpuinfo_compute_max_cache_size(&processors[0]); ++ ++ __sync_synchronize(); ++@@ -572,6 +584,7 @@ void cpuinfo_arm_mach_init(void) { ++ cores = NULL; ++ clusters = NULL; ++ packages = NULL; +++ uarchs = NULL; ++ l1i = l1d = l2 = l3 = NULL; ++ ++ cleanup: ++@@ -579,6 +592,7 @@ cleanup: ++ free(cores); ++ free(clusters); ++ free(packages); +++ free(uarchs); ++ free(l1i); ++ free(l1d); ++ free(l2); ++diff --git a/src/arm/uarch.c b/src/arm/uarch.c ++index a38250a..2aef9e7 100644 ++--- a/src/arm/uarch.c +++++ b/src/arm/uarch.c ++@@ -58,7 +58,9 @@ void cpuinfo_arm_decode_vendor_uarch( ++ *uarch = cpuinfo_uarch_cortex_a35; ++ break; ++ case 0xD05: ++- *uarch = cpuinfo_uarch_cortex_a55; +++ // Note: use Variant, not Revision, field +++ *uarch = (midr & CPUINFO_ARM_MIDR_VARIANT_MASK) == 0 ? +++ cpuinfo_uarch_cortex_a55r0 : cpuinfo_uarch_cortex_a55; ++ break; ++ case 0xD06: ++ *uarch = cpuinfo_uarch_cortex_a65; ++@@ -257,9 +259,9 @@ void cpuinfo_arm_decode_vendor_uarch( ++ *vendor = cpuinfo_vendor_arm; ++ *uarch = cpuinfo_uarch_cortex_a75; ++ break; ++- case 0x803: /* Low-power Kryo 385 "Silver" -> Cortex-A55 */ +++ case 0x803: /* Low-power Kryo 385 "Silver" -> Cortex-A55r0 */ ++ *vendor = cpuinfo_vendor_arm; ++- *uarch = cpuinfo_uarch_cortex_a55; +++ *uarch = cpuinfo_uarch_cortex_a55r0; ++ break; ++ case 0x804: /* High-performance Kryo 485 "Gold" / "Gold Prime" -> Cortex-A76 */ ++ *vendor = cpuinfo_vendor_arm; ++diff --git a/src/cpuinfo/common.h b/src/cpuinfo/common.h ++index 6ba746e..b2b404d 100644 ++--- a/src/cpuinfo/common.h +++++ b/src/cpuinfo/common.h ++@@ -12,29 +12,29 @@ ++ #define CPUINFO_COUNT_OF(array) (sizeof(array) / sizeof(0[array])) ++ ++ #if defined(__GNUC__) ++- #define CPUINFO_LIKELY(condition) (__builtin_expect(!!(condition), 1)) ++- #define CPUINFO_UNLIKELY(condition) (__builtin_expect(!!(condition), 0)) +++ #define CPUINFO_LIKELY(condition) (__builtin_expect(!!(condition), 1)) +++ #define CPUINFO_UNLIKELY(condition) (__builtin_expect(!!(condition), 0)) ++ #else ++- #define CPUINFO_LIKELY(condition) (!!(condition)) ++- #define CPUINFO_UNLIKELY(condition) (!!(condition)) +++ #define CPUINFO_LIKELY(condition) (!!(condition)) +++ #define CPUINFO_UNLIKELY(condition) (!!(condition)) ++ #endif ++ ++ #ifndef CPUINFO_INTERNAL ++- #if defined(__ELF__) ++- #define CPUINFO_INTERNAL __attribute__((__visibility__("internal"))) ++- #elif defined(__MACH__) ++- #define CPUINFO_INTERNAL __attribute__((__visibility__("hidden"))) ++- #else ++- #define CPUINFO_INTERNAL ++- #endif +++ #if defined(__ELF__) +++ #define CPUINFO_INTERNAL __attribute__((__visibility__("internal"))) +++ #elif defined(__MACH__) +++ #define CPUINFO_INTERNAL __attribute__((__visibility__("hidden"))) +++ #else +++ #define CPUINFO_INTERNAL +++ #endif ++ #endif ++ ++ #ifndef CPUINFO_PRIVATE ++- #if defined(__ELF__) ++- #define CPUINFO_PRIVATE __attribute__((__visibility__("hidden"))) ++- #elif defined(__MACH__) ++- #define CPUINFO_PRIVATE __attribute__((__visibility__("hidden"))) ++- #else ++- #define CPUINFO_PRIVATE ++- #endif +++ #if defined(__ELF__) +++ #define CPUINFO_PRIVATE __attribute__((__visibility__("hidden"))) +++ #elif defined(__MACH__) +++ #define CPUINFO_PRIVATE __attribute__((__visibility__("hidden"))) +++ #else +++ #define CPUINFO_PRIVATE +++ #endif ++ #endif ++diff --git a/src/cpuinfo/internal-api.h b/src/cpuinfo/internal-api.h ++index f12c48d..c6eed0b 100644 ++--- a/src/cpuinfo/internal-api.h +++++ b/src/cpuinfo/internal-api.h ++@@ -21,11 +21,13 @@ enum cpuinfo_cache_level { ++ }; ++ ++ extern CPUINFO_INTERNAL bool cpuinfo_is_initialized; +++ ++ extern CPUINFO_INTERNAL struct cpuinfo_processor* cpuinfo_processors; ++ extern CPUINFO_INTERNAL struct cpuinfo_core* cpuinfo_cores; ++ extern CPUINFO_INTERNAL struct cpuinfo_cluster* cpuinfo_clusters; ++ extern CPUINFO_INTERNAL struct cpuinfo_package* cpuinfo_packages; ++ extern CPUINFO_INTERNAL struct cpuinfo_cache* cpuinfo_cache[cpuinfo_cache_level_max]; +++ ++ extern CPUINFO_INTERNAL uint32_t cpuinfo_processors_count; ++ extern CPUINFO_INTERNAL uint32_t cpuinfo_cores_count; ++ extern CPUINFO_INTERNAL uint32_t cpuinfo_clusters_count; ++@@ -33,6 +35,19 @@ extern CPUINFO_INTERNAL uint32_t cpuinfo_packages_count; ++ extern CPUINFO_INTERNAL uint32_t cpuinfo_cache_count[cpuinfo_cache_level_max]; ++ extern CPUINFO_INTERNAL uint32_t cpuinfo_max_cache_size; ++ +++#if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64 +++ extern CPUINFO_INTERNAL struct cpuinfo_uarch_info* cpuinfo_uarchs; +++ extern CPUINFO_INTERNAL uint32_t cpuinfo_uarchs_count; +++#else +++ extern CPUINFO_INTERNAL struct cpuinfo_uarch_info cpuinfo_global_uarch; +++#endif +++ +++#ifdef __linux__ +++ extern CPUINFO_INTERNAL uint32_t cpuinfo_linux_cpu_max; +++ extern CPUINFO_INTERNAL const struct cpuinfo_processor** cpuinfo_linux_cpu_to_processor_map; +++ extern CPUINFO_INTERNAL const struct cpuinfo_core** cpuinfo_linux_cpu_to_core_map; +++#endif +++ ++ CPUINFO_PRIVATE void cpuinfo_x86_mach_init(void); ++ CPUINFO_PRIVATE void cpuinfo_x86_linux_init(void); ++ #ifdef _WIN32 ++diff --git a/src/linux/current.c b/src/linux/current.c ++deleted file mode 100644 ++index 472a4c9..0000000 ++--- a/src/linux/current.c +++++ /dev/null ++@@ -1,41 +0,0 @@ ++-#include ++-#include ++-#include ++-#include ++-#include ++- ++-#include ++- ++-#include ++-#include ++-#include ++-#include ++- ++- ++-const struct cpuinfo_processor** cpuinfo_linux_cpu_to_processor_map = NULL; ++-const struct cpuinfo_core** cpuinfo_linux_cpu_to_core_map = NULL; ++- ++- ++-const struct cpuinfo_processor* CPUINFO_ABI cpuinfo_get_current_processor(void) { ++- if (!cpuinfo_is_initialized) { ++- cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "current_processor"); ++- } ++- const int cpu = sched_getcpu(); ++- if (cpu >= 0) { ++- return cpuinfo_linux_cpu_to_processor_map[cpu]; ++- } else { ++- return &cpuinfo_processors[0]; ++- } ++-} ++- ++-const struct cpuinfo_core* CPUINFO_ABI cpuinfo_get_current_core(void) { ++- if (!cpuinfo_is_initialized) { ++- cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "current_core"); ++- } ++- const int cpu = sched_getcpu(); ++- if (cpu >= 0) { ++- return cpuinfo_linux_cpu_to_core_map[cpu]; ++- } else { ++- return &cpuinfo_cores[0]; ++- } ++-} ++diff --git a/src/x86/api.h b/src/x86/api.h ++index 5f5e76d..213c2d8 100644 ++--- a/src/x86/api.h +++++ b/src/x86/api.h ++@@ -93,7 +93,6 @@ CPUINFO_INTERNAL struct cpuinfo_x86_isa cpuinfo_x86_detect_isa( ++ const struct cpuid_regs basic_info, const struct cpuid_regs extended_info, ++ uint32_t max_base_index, uint32_t max_extended_index, ++ enum cpuinfo_vendor vendor, enum cpuinfo_uarch uarch); ++-CPUINFO_INTERNAL struct cpuinfo_x86_isa cpuinfo_x86_nacl_detect_isa(void); ++ ++ CPUINFO_INTERNAL void cpuinfo_x86_detect_topology( ++ uint32_t max_base_index, ++diff --git a/src/x86/cache/init.c b/src/x86/cache/init.c ++index d581016..dd1f1ea 100644 ++--- a/src/x86/cache/init.c +++++ b/src/x86/cache/init.c ++@@ -65,7 +65,7 @@ iterate_descriptors: ++ } ++ } ++ ++- if (vendor != cpuinfo_vendor_amd && max_base_index >= 4) { +++ if (vendor != cpuinfo_vendor_amd && vendor != cpuinfo_vendor_hygon && max_base_index >= 4) { ++ struct cpuid_regs leaf4; ++ uint32_t input_ecx = 0; ++ uint32_t package_cores_max = 0; ++diff --git a/src/x86/cpuid.h b/src/x86/cpuid.h ++index 829ec21..9e9e013 100644 ++--- a/src/x86/cpuid.h +++++ b/src/x86/cpuid.h ++@@ -67,18 +67,13 @@ ++ } ++ #endif ++ ++-/* ++- * This instruction may be not supported by Native Client validator, ++- * make sure it doesn't appear in the binary ++- */ ++-#ifndef __native_client__ ++- static inline uint64_t xgetbv(uint32_t ext_ctrl_reg) { ++- #ifdef _MSC_VER ++- return (uint64_t)_xgetbv((unsigned int)ext_ctrl_reg); ++- #else ++- uint32_t lo, hi; ++- __asm__(".byte 0x0F, 0x01, 0xD0" : "=a" (lo), "=d" (hi) : "c" (ext_ctrl_reg)); ++- return ((uint64_t) hi << 32) | (uint64_t) lo; ++- #endif ++- } ++-#endif +++static inline uint64_t xgetbv(uint32_t ext_ctrl_reg) { +++ #ifdef _MSC_VER +++ return (uint64_t)_xgetbv((unsigned int)ext_ctrl_reg); +++ #else +++ uint32_t lo, hi; +++ __asm__(".byte 0x0F, 0x01, 0xD0" : "=a" (lo), "=d" (hi) : "c" (ext_ctrl_reg)); +++ return ((uint64_t) hi << 32) | (uint64_t) lo; +++ #endif +++} +++ ++diff --git a/src/x86/init.c b/src/x86/init.c ++index d736578..244359c 100644 ++--- a/src/x86/init.c +++++ b/src/x86/init.c ++@@ -61,12 +61,8 @@ void cpuinfo_x86_init_processor(struct cpuinfo_x86_processor* processor) { ++ ++ cpuinfo_x86_detect_topology(max_base_index, max_extended_index, leaf1, &processor->topology); ++ ++- #ifdef __native_client__ ++- cpuinfo_isa = cpuinfo_x86_nacl_detect_isa(); ++- #else ++- cpuinfo_isa = cpuinfo_x86_detect_isa(leaf1, leaf0x80000001, ++- max_base_index, max_extended_index, vendor, uarch); ++- #endif +++ cpuinfo_isa = cpuinfo_x86_detect_isa(leaf1, leaf0x80000001, +++ max_base_index, max_extended_index, vendor, uarch); ++ } ++ if (max_extended_index >= UINT32_C(0x80000004)) { ++ struct cpuid_regs brand_string[3]; ++diff --git a/src/x86/isa.c b/src/x86/isa.c ++index d27dbca..f2e5a28 100644 ++--- a/src/x86/isa.c +++++ b/src/x86/isa.c ++@@ -244,6 +244,7 @@ struct cpuinfo_x86_isa cpuinfo_x86_detect_isa( ++ */ ++ break; ++ case cpuinfo_vendor_amd: +++ case cpuinfo_vendor_hygon: ++ isa.prefetch = !!((extended_info.ecx & UINT32_C(0x00000100)) | (extended_info.edx & UINT32_C(0xE0000000))); ++ break; ++ default: ++@@ -265,6 +266,7 @@ struct cpuinfo_x86_isa cpuinfo_x86_detect_isa( ++ */ ++ switch (vendor) { ++ case cpuinfo_vendor_amd: +++ case cpuinfo_vendor_hygon: ++ isa.prefetchw = !!((extended_info.ecx & UINT32_C(0x00000100)) | (extended_info.edx & UINT32_C(0xE0000000))); ++ break; ++ default: ++diff --git a/src/x86/linux/init.c b/src/x86/linux/init.c ++index c096336..f565789 100644 ++--- a/src/x86/linux/init.c +++++ b/src/x86/linux/init.c ++@@ -569,9 +569,6 @@ void cpuinfo_x86_linux_init(void) { ++ } ++ ++ /* Commit changes */ ++- cpuinfo_linux_cpu_to_processor_map = linux_cpu_to_processor_map; ++- cpuinfo_linux_cpu_to_core_map = linux_cpu_to_core_map; ++- ++ cpuinfo_processors = processors; ++ cpuinfo_cores = cores; ++ cpuinfo_clusters = clusters; ++@@ -591,24 +588,32 @@ void cpuinfo_x86_linux_init(void) { ++ cpuinfo_cache_count[cpuinfo_cache_level_2] = l2_count; ++ cpuinfo_cache_count[cpuinfo_cache_level_3] = l3_count; ++ cpuinfo_cache_count[cpuinfo_cache_level_4] = l4_count; ++- ++ cpuinfo_max_cache_size = cpuinfo_compute_max_cache_size(&processors[0]); ++ +++ cpuinfo_global_uarch = (struct cpuinfo_uarch_info) { +++ .uarch = x86_processor.uarch, +++ .cpuid = x86_processor.cpuid, +++ .processor_count = processors_count, +++ .core_count = cores_count, +++ }; +++ +++ cpuinfo_linux_cpu_max = x86_linux_processors_count; +++ cpuinfo_linux_cpu_to_processor_map = linux_cpu_to_processor_map; +++ cpuinfo_linux_cpu_to_core_map = linux_cpu_to_core_map; +++ ++ __sync_synchronize(); ++ ++ cpuinfo_is_initialized = true; ++ ++- linux_cpu_to_processor_map = NULL; ++- linux_cpu_to_core_map = NULL; ++ processors = NULL; ++ cores = NULL; ++ clusters = NULL; ++ packages = NULL; ++ l1i = l1d = l2 = l3 = l4 = NULL; +++ linux_cpu_to_processor_map = NULL; +++ linux_cpu_to_core_map = NULL; ++ ++ cleanup: ++- free(linux_cpu_to_processor_map); ++- free(linux_cpu_to_core_map); ++ free(x86_linux_processors); ++ free(processors); ++ free(cores); ++@@ -619,4 +624,6 @@ cleanup: ++ free(l2); ++ free(l3); ++ free(l4); +++ free(linux_cpu_to_processor_map); +++ free(linux_cpu_to_core_map); ++ } ++diff --git a/src/x86/mach/init.c b/src/x86/mach/init.c ++index ae2be33..b44d3ad 100644 ++--- a/src/x86/mach/init.c +++++ b/src/x86/mach/init.c ++@@ -305,30 +305,34 @@ void cpuinfo_x86_mach_init(void) { ++ } ++ ++ /* Commit changes */ +++ cpuinfo_processors = processors; +++ cpuinfo_cores = cores; +++ cpuinfo_clusters = clusters; +++ cpuinfo_packages = packages; ++ cpuinfo_cache[cpuinfo_cache_level_1i] = l1i; ++ cpuinfo_cache[cpuinfo_cache_level_1d] = l1d; ++ cpuinfo_cache[cpuinfo_cache_level_2] = l2; ++ cpuinfo_cache[cpuinfo_cache_level_3] = l3; ++ cpuinfo_cache[cpuinfo_cache_level_4] = l4; ++ ++- cpuinfo_processors = processors; ++- cpuinfo_cores = cores; ++- cpuinfo_clusters = clusters; ++- cpuinfo_packages = packages; ++- +++ cpuinfo_processors_count = mach_topology.threads; +++ cpuinfo_cores_count = mach_topology.cores; +++ cpuinfo_clusters_count = mach_topology.packages; +++ cpuinfo_packages_count = mach_topology.packages; ++ cpuinfo_cache_count[cpuinfo_cache_level_1i] = l1_count; ++ cpuinfo_cache_count[cpuinfo_cache_level_1d] = l1_count; ++ cpuinfo_cache_count[cpuinfo_cache_level_2] = l2_count; ++ cpuinfo_cache_count[cpuinfo_cache_level_3] = l3_count; ++ cpuinfo_cache_count[cpuinfo_cache_level_4] = l4_count; ++- ++- cpuinfo_processors_count = mach_topology.threads; ++- cpuinfo_cores_count = mach_topology.cores; ++- cpuinfo_clusters_count = mach_topology.packages; ++- cpuinfo_packages_count = mach_topology.packages; ++- ++ cpuinfo_max_cache_size = cpuinfo_compute_max_cache_size(&processors[0]); ++ +++ cpuinfo_global_uarch = (struct cpuinfo_uarch_info) { +++ .uarch = x86_processor.uarch, +++ .cpuid = x86_processor.cpuid, +++ .processor_count = mach_topology.threads, +++ .core_count = mach_topology.cores, +++ }; +++ ++ __sync_synchronize(); ++ ++ cpuinfo_is_initialized = true; ++diff --git a/src/x86/nacl/isa.c b/src/x86/nacl/isa.c ++deleted file mode 100644 ++index 662be33..0000000 ++--- a/src/x86/nacl/isa.c +++++ /dev/null ++@@ -1,306 +0,0 @@ ++-#include ++-#include ++-#include ++- ++-#include ++- ++-#define NACL_CODE_BUNDLE_SIZE 32 ++-#include ++-#include ++- ++-static const uint8_t cmpxchg16b_bundle[NACL_CODE_BUNDLE_SIZE] = { ++- /* MOV edi, edi */ ++- 0x89, 0xFF, ++- /* CMPXCHG16B [r15 + rdi * 1] */ ++- 0x49, 0x0F, 0xC7, 0x0C, 0x3F, ++- /* Fill remainder with HLTs */ ++- 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, ++- 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, ++-}; ++- ++-static const uint8_t lzcnt_bundle[NACL_CODE_BUNDLE_SIZE] = { ++- /* LZCNT eax, ecx */ ++- 0xF3, 0x0F, 0xBD, 0xC1, ++- /* Fill remainder with HLTs */ ++- 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, ++- 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, ++-}; ++- ++-static const uint8_t popcnt_bundle[NACL_CODE_BUNDLE_SIZE] = { ++- /* POPCNT eax, ecx */ ++- 0xF3, 0x0F, 0xB8, 0xC1, ++- /* Fill remainder with HLTs */ ++- 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, ++- 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, ++-}; ++- ++-static const uint8_t movbe_bundle[NACL_CODE_BUNDLE_SIZE] = { ++- /* MOV ecx, ecx */ ++- 0x89, 0xC9, ++- /* MOVBE eax, [r15 + rcx * 1] */ ++- 0x41, 0x0F, 0x38, 0xF0, 0x04, 0x0F, ++- /* Fill remainder with HLTs */ ++- 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, ++- 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, ++-}; ++- ++-static const uint8_t bmi_bundle[NACL_CODE_BUNDLE_SIZE] = { ++- /* ANDN eax, ecx, edx */ ++- 0xC4, 0xE2, 0x70, 0xF2, 0xC2, ++- /* Fill remainder with HLTs */ ++- 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, ++- 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, ++-}; ++- ++-static const uint8_t tbm_bundle[NACL_CODE_BUNDLE_SIZE] = { ++- /* BLCS eax, ecx */ ++- 0x8F, 0xE9, 0x78, 0x01, 0xD9, ++- /* Fill remainder with HLTs */ ++- 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, ++- 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, ++-}; ++- ++-static const uint8_t three_d_now_bundle[NACL_CODE_BUNDLE_SIZE] = { ++- /* PFADD mm0, mm1 */ ++- 0x0F, 0x0F, 0xC1, 0x9E, ++- /* Fill remainder with HLTs */ ++- 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, ++- 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, ++-}; ++- ++-static const uint8_t three_d_now_plus_bundle[NACL_CODE_BUNDLE_SIZE] = { ++- /* PFNACC mm0, mm1 */ ++- 0x0F, 0x0F, 0xC1, 0x8A, ++- /* Fill remainder with HLTs */ ++- 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, ++- 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, ++-}; ++- ++-static const uint8_t sse3_bundle[NACL_CODE_BUNDLE_SIZE] = { ++- /* HADDPS xmm0, xmm1 */ ++- 0xF2, 0x0F, 0x7C, 0xC1, ++- /* Fill remainder with HLTs */ ++- 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, ++- 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, ++-}; ++- ++-static const uint8_t ssse3_bundle[NACL_CODE_BUNDLE_SIZE] = { ++- /* PSHUFB xmm0, xmm1 */ ++- 0x66, 0x0F, 0x38, 0x00, 0xC1, ++- /* Fill remainder with HLTs */ ++- 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, ++- 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, ++-}; ++- ++-static const uint8_t sse4_1_bundle[NACL_CODE_BUNDLE_SIZE] = { ++- /* PMULLD xmm0, xmm1 */ ++- 0x66, 0x0F, 0x38, 0x40, 0xC1, ++- /* Fill remainder with HLTs */ ++- 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, ++- 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, ++-}; ++- ++-static const uint8_t sse4_2_bundle[NACL_CODE_BUNDLE_SIZE] = { ++- /* PCMPGTQ xmm0, xmm1 */ ++- 0x66, 0x0F, 0x38, 0x37, 0xC1, ++- /* Fill remainder with HLTs */ ++- 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, ++- 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, ++-}; ++- ++-static const uint8_t sse4a_bundle[NACL_CODE_BUNDLE_SIZE] = { ++- /* EXTRQ xmm0, xmm1 */ ++- 0x66, 0x0F, 0x79, 0xC1, ++- /* Fill remainder with HLTs */ ++- 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, ++- 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, ++-}; ++- ++-static const uint8_t aes_bundle[NACL_CODE_BUNDLE_SIZE] = { ++- /* AESENC xmm0, xmm1 */ ++- 0x66, 0x0F, 0x38, 0xDC, 0xC1, ++- /* Fill remainder with HLTs */ ++- 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, ++- 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, ++-}; ++- ++-static const uint8_t pclmulqdq_bundle[NACL_CODE_BUNDLE_SIZE] = { ++- /* PCLMULQDQ xmm0, xmm1, 0 */ ++- 0x66, 0x0F, 0x3A, 0x44, 0xC1, 0x00, ++- /* Fill remainder with HLTs */ ++- 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, ++- 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, ++-}; ++- ++-static const uint8_t avx_bundle[NACL_CODE_BUNDLE_SIZE] = { ++- /* VPERMILPS ymm0, ymm1, 0xAA */ ++- 0xC4, 0xE3, 0x7D, 0x04, 0xC1, 0xAA, ++- /* Fill remainder with HLTs */ ++- 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, ++- 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, ++-}; ++- ++-static const uint8_t fma3_bundle[NACL_CODE_BUNDLE_SIZE] = { ++- /* VFMADDSUB213PS ymm0, ymm1, ymm2 */ ++- 0xC4, 0xE2, 0x75, 0xA6, 0xC2, ++- /* Fill remainder with HLTs */ ++- 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, ++- 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, ++-}; ++- ++-static const uint8_t fma4_bundle[NACL_CODE_BUNDLE_SIZE] = { ++- /* VFMADDPS ymm0, ymm1, ymm2, ymm3 */ ++- 0xC4, 0xE3, 0xF5, 0x68, 0xC3, 0x20, ++- /* Fill remainder with HLTs */ ++- 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, ++- 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, ++-}; ++- ++-static const uint8_t xop_bundle[NACL_CODE_BUNDLE_SIZE] = { ++- /* VPHADDBQ xmm0, xmm1 */ ++- 0x8F, 0xE9, 0x78, 0xC3, 0xC1, ++- /* Fill remainder with HLTs */ ++- 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, ++- 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, ++-}; ++- ++-static const uint8_t f16c_bundle[NACL_CODE_BUNDLE_SIZE] = { ++- /* VCVTPH2PS ymm0, xmm1 */ ++- 0xC4, 0xE2, 0x7D, 0x13, 0xC1, ++- /* Fill remainder with HLTs */ ++- 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, ++- 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, ++-}; ++- ++-static const uint8_t avx2_bundle[NACL_CODE_BUNDLE_SIZE] = { ++- /* VPERMPS ymm0, ymm1, ymm2 */ ++- 0xC4, 0xE2, 0x75, 0x16, 0xC2, ++- /* Fill remainder with HLTs */ ++- 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, ++- 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, 0xF4, ++-}; ++- ++- ++-struct cpuinfo_x86_isa cpuinfo_x86_nacl_detect_isa(void) { ++- /* ++- * Under Native Client sandbox we can't just ask the CPU: ++- * - First, some instructions (XGETBV) necessary to query AVX support are not white-listed in the validator. ++- * - Secondly, even if CPU supports some instruction, but validator doesn't know about it (e.g. due a bug in the ++- * ISA detection in the validator), all instructions from the "unsupported" ISA extensions will be replaced by ++- * HLTs when the module is loaded. ++- * Thus, instead of quering the CPU about supported ISA extensions, we query the validator: we pass bundles with ++- * instructions from ISA extensions to dynamic code generation APIs, and test if they are accepted. ++- */ ++- ++- struct cpuinfo_x86_isa isa = { 0 }; ++- ++- struct nacl_irt_code_data_alloc nacl_irt_code_data_alloc = { 0 }; ++- struct nacl_irt_dyncode nacl_irt_dyncode = { 0 }; ++- if (sizeof(nacl_irt_code_data_alloc) != nacl_interface_query(NACL_IRT_CODE_DATA_ALLOC_v0_1, ++- &nacl_irt_code_data_alloc, ++- sizeof(nacl_irt_code_data_alloc))) ++- { ++- goto finish; ++- } ++- ++- if (sizeof(nacl_irt_dyncode) != nacl_interface_query(NACL_IRT_DYNCODE_v0_1, ++- &nacl_irt_dyncode, ++- sizeof(nacl_irt_dyncode))) ++- { ++- goto finish; ++- } ++- ++- const size_t allocation_size = 65536; ++- uintptr_t code_segment = 0; ++- if (0 != nacl_irt_code_data_alloc.allocate_code_data(0, allocation_size, 0, 0, &code_segment)) ++- { ++- goto finish; ++- } ++- ++- isa.cmpxchg16b = !nacl_irt_dyncode.dyncode_create((void*) code_segment, cmpxchg16b_bundle, NACL_CODE_BUNDLE_SIZE) && ++- (*((const uint8_t*) code_segment) != 0xF4); ++- code_segment += NACL_CODE_BUNDLE_SIZE; ++- ++- isa.lzcnt = !nacl_irt_dyncode.dyncode_create((void*) code_segment, lzcnt_bundle, NACL_CODE_BUNDLE_SIZE) && ++- (*((const uint8_t*) code_segment) != 0xF4); ++- code_segment += NACL_CODE_BUNDLE_SIZE; ++- ++- isa.popcnt = !nacl_irt_dyncode.dyncode_create((void*) code_segment, popcnt_bundle, NACL_CODE_BUNDLE_SIZE) && ++- (*((const uint8_t*) code_segment) != 0xF4); ++- code_segment += NACL_CODE_BUNDLE_SIZE; ++- ++- isa.movbe = !nacl_irt_dyncode.dyncode_create((void*) code_segment, movbe_bundle, NACL_CODE_BUNDLE_SIZE) && ++- (*((const uint8_t*) code_segment) != 0xF4); ++- code_segment += NACL_CODE_BUNDLE_SIZE; ++- ++- isa.bmi = !nacl_irt_dyncode.dyncode_create((void*) code_segment, bmi_bundle, NACL_CODE_BUNDLE_SIZE) && ++- (*((const uint8_t*) code_segment) != 0xF4); ++- code_segment += NACL_CODE_BUNDLE_SIZE; ++- ++- isa.tbm = !nacl_irt_dyncode.dyncode_create((void*) code_segment, tbm_bundle, NACL_CODE_BUNDLE_SIZE) && ++- (*((const uint8_t*) code_segment) != 0xF4); ++- code_segment += NACL_CODE_BUNDLE_SIZE; ++- ++- isa.three_d_now = !nacl_irt_dyncode.dyncode_create((void*) code_segment, three_d_now_bundle, NACL_CODE_BUNDLE_SIZE) && ++- (*((const uint8_t*) code_segment) != 0xF4); ++- code_segment += NACL_CODE_BUNDLE_SIZE; ++- ++- isa.three_d_now_plus = ++- !nacl_irt_dyncode.dyncode_create((void*) code_segment, three_d_now_plus_bundle, NACL_CODE_BUNDLE_SIZE) && ++- (*((const uint8_t*) code_segment) != 0xF4); ++- code_segment += NACL_CODE_BUNDLE_SIZE; ++- ++- isa.sse3 = !nacl_irt_dyncode.dyncode_create((void*) code_segment, sse3_bundle, NACL_CODE_BUNDLE_SIZE) && ++- (*((const uint8_t*) code_segment) != 0xF4); ++- code_segment += NACL_CODE_BUNDLE_SIZE; ++- ++- isa.ssse3 = !nacl_irt_dyncode.dyncode_create((void*) code_segment, ssse3_bundle, NACL_CODE_BUNDLE_SIZE) && ++- (*((const uint8_t*) code_segment) != 0xF4); ++- code_segment += NACL_CODE_BUNDLE_SIZE; ++- ++- isa.sse4_1 = !nacl_irt_dyncode.dyncode_create((void*) code_segment, sse4_1_bundle, NACL_CODE_BUNDLE_SIZE) && ++- (*((const uint8_t*) code_segment) != 0xF4); ++- code_segment += NACL_CODE_BUNDLE_SIZE; ++- ++- isa.sse4_2 = !nacl_irt_dyncode.dyncode_create((void*) code_segment, sse4_2_bundle, NACL_CODE_BUNDLE_SIZE) && ++- (*((const uint8_t*) code_segment) != 0xF4); ++- code_segment += NACL_CODE_BUNDLE_SIZE; ++- ++- isa.sse4a = !nacl_irt_dyncode.dyncode_create((void*) code_segment, sse4a_bundle, NACL_CODE_BUNDLE_SIZE) && ++- (*((const uint8_t*) code_segment) != 0xF4); ++- code_segment += NACL_CODE_BUNDLE_SIZE; ++- ++- isa.aes = !nacl_irt_dyncode.dyncode_create((void*) code_segment, aes_bundle, NACL_CODE_BUNDLE_SIZE) && ++- (*((const uint8_t*) code_segment) != 0xF4); ++- code_segment += NACL_CODE_BUNDLE_SIZE; ++- ++- isa.pclmulqdq = !nacl_irt_dyncode.dyncode_create((void*) code_segment, pclmulqdq_bundle, NACL_CODE_BUNDLE_SIZE) && ++- (*((const uint8_t*) code_segment) != 0xF4); ++- code_segment += NACL_CODE_BUNDLE_SIZE; ++- ++- isa.avx = !nacl_irt_dyncode.dyncode_create((void*) code_segment, avx_bundle, NACL_CODE_BUNDLE_SIZE) && ++- (*((const uint8_t*) code_segment) != 0xF4); ++- code_segment += NACL_CODE_BUNDLE_SIZE; ++- ++- isa.fma3 = !nacl_irt_dyncode.dyncode_create((void*) code_segment, fma3_bundle, NACL_CODE_BUNDLE_SIZE) && ++- (*((const uint8_t*) code_segment) != 0xF4); ++- code_segment += NACL_CODE_BUNDLE_SIZE; ++- ++- isa.fma4 = !nacl_irt_dyncode.dyncode_create((void*) code_segment, fma4_bundle, NACL_CODE_BUNDLE_SIZE) && ++- (*((const uint8_t*) code_segment) != 0xF4); ++- code_segment += NACL_CODE_BUNDLE_SIZE; ++- ++- isa.xop = !nacl_irt_dyncode.dyncode_create((void*) code_segment, xop_bundle, NACL_CODE_BUNDLE_SIZE) && ++- (*((const uint8_t*) code_segment) != 0xF4); ++- code_segment += NACL_CODE_BUNDLE_SIZE; ++- ++- isa.f16c = !nacl_irt_dyncode.dyncode_create((void*) code_segment, f16c_bundle, NACL_CODE_BUNDLE_SIZE) && ++- (*((const uint8_t*) code_segment) != 0xF4); ++- code_segment += NACL_CODE_BUNDLE_SIZE; ++- ++- isa.avx2 = !nacl_irt_dyncode.dyncode_create((void*) code_segment, avx2_bundle, NACL_CODE_BUNDLE_SIZE) && ++- (*((const uint8_t*) code_segment) != 0xF4); ++- ++-finish: ++- return isa; ++-} ++diff --git a/src/x86/name.c b/src/x86/name.c ++index 708be1d..e0d5a5b 100644 ++--- a/src/x86/name.c +++++ b/src/x86/name.c ++@@ -671,6 +671,7 @@ static const char* vendor_string_map[] = { ++ [cpuinfo_vendor_intel] = "Intel", ++ [cpuinfo_vendor_amd] = "AMD", ++ [cpuinfo_vendor_via] = "VIA", +++ [cpuinfo_vendor_hygon] = "Hygon", ++ [cpuinfo_vendor_rdc] = "RDC", ++ [cpuinfo_vendor_dmp] = "DM&P", ++ [cpuinfo_vendor_transmeta] = "Transmeta", ++diff --git a/src/x86/uarch.c b/src/x86/uarch.c ++index ba72d8a..ecaa762 100644 ++--- a/src/x86/uarch.c +++++ b/src/x86/uarch.c ++@@ -79,6 +79,8 @@ enum cpuinfo_uarch cpuinfo_x86_decode_uarch( ++ case 0x5E: // Sky Lake Client DT/H/S ++ case 0x8E: // Kaby/Whiskey/Amber/Comet Lake Y/U ++ case 0x9E: // Kaby/Coffee Lake DT/H/S +++ case 0xA5: // Comet Lake H/S +++ case 0xA6: // Comet Lake U/Y ++ return cpuinfo_uarch_sky_lake; ++ case 0x66: // Cannon Lake (Core i3-8121U) ++ return cpuinfo_uarch_palm_cove; ++@@ -94,7 +96,7 @@ enum cpuinfo_uarch cpuinfo_x86_decode_uarch( ++ return cpuinfo_uarch_bonnell; ++ case 0x27: // Medfield ++ case 0x35: // Cloverview ++- case 0x36: // Cedarview, Centerton +++ case 0x36: // Cedarview, Centerton ++ return cpuinfo_uarch_saltwell; ++ case 0x37: // Bay Trail ++ case 0x4A: // Merrifield ++@@ -110,6 +112,7 @@ enum cpuinfo_uarch cpuinfo_x86_decode_uarch( ++ return cpuinfo_uarch_goldmont; ++ case 0x7A: // Gemini Lake ++ return cpuinfo_uarch_goldmont_plus; +++ ++ /* Knights-series cores */ ++ case 0x57: ++ return cpuinfo_uarch_knights_landing; ++@@ -173,7 +176,7 @@ enum cpuinfo_uarch cpuinfo_x86_decode_uarch( ++ case 0x38: // Godavari ++ case 0x30: // Kaveri ++ return cpuinfo_uarch_steamroller; ++- case 0x60: // Carrizo +++ case 0x60: // Carrizo ++ case 0x65: // Bristol Ridge ++ case 0x70: // Stoney Ridge ++ return cpuinfo_uarch_excavator; ++@@ -201,14 +204,22 @@ enum cpuinfo_uarch cpuinfo_x86_decode_uarch( ++ switch (model_info->model) { ++ case 0x01: // 14 nm Naples, Whitehaven, Summit Ridge, Snowy Owl ++ case 0x08: // 12 nm Pinnacle Ridge ++- case 0x11: // 14 nm Raven Ridge +++ case 0x11: // 14 nm Raven Ridge, Great Horned Owl ++ case 0x18: // 12 nm Picasso ++ return cpuinfo_uarch_zen; +++ case 0x31: // Rome, Castle Peak +++ case 0x60: // Renoir ++ case 0x71: // Matisse ++ return cpuinfo_uarch_zen2; ++ } ++ } ++ break; +++ case cpuinfo_vendor_hygon: +++ switch (model_info->family) { +++ case 0x00: +++ return cpuinfo_uarch_dhyana; +++ } +++ break; ++ default: ++ break; ++ } ++diff --git a/src/x86/vendor.c b/src/x86/vendor.c ++index 3f3c753..2bba90d 100644 ++--- a/src/x86/vendor.c +++++ b/src/x86/vendor.c ++@@ -26,6 +26,11 @@ ++ #define auls UINT32_C(0x736C7561) ++ #define VIA UINT32_C(0x20414956) ++ +++/* Hygon vendor string: "HygonGenuine" */ +++#define Hygo UINT32_C(0x6F677948) +++#define nGen UINT32_C(0x6E65476E) +++#define uine UINT32_C(0x656E6975) +++ ++ /* Transmeta vendor strings: "GenuineTMx86", "TransmetaCPU" */ ++ #define ineT UINT32_C(0x54656E69) ++ #define Mx86 UINT32_C(0x3638784D) ++@@ -105,6 +110,12 @@ enum cpuinfo_vendor cpuinfo_x86_decode_vendor(uint32_t ebx, uint32_t ecx, uint32 ++ return cpuinfo_vendor_via; ++ } ++ break; +++ case Hygo: +++ if (edx == nGen && ecx == uine) { +++ /* "HygonGenuine" */ +++ return cpuinfo_vendor_hygon; +++ } +++ break; ++ #if CPUINFO_ARCH_X86 ++ case AMDi: ++ if (edx == sbet && ecx == ter) { ++diff --git a/src/x86/windows/init.c b/src/x86/windows/init.c ++index 7a2090e..2c7e3cd 100644 ++--- a/src/x86/windows/init.c +++++ b/src/x86/windows/init.c ++@@ -417,9 +417,6 @@ BOOL CALLBACK cpuinfo_x86_windows_init(PINIT_ONCE init_once, PVOID parameter, PV ++ for (uint32_t i = 0; i < processors_count; i++) { ++ const uint32_t apic_id = processors[i].apic_id; ++ ++- //linux_cpu_to_processor_map[x86_linux_processors[i].linux_id] = processors + processor_index; ++- //linux_cpu_to_core_map[x86_linux_processors[i].linux_id] = cores + core_index; ++- ++ if (x86_processor.cache.l1i.size != 0) { ++ const uint32_t l1i_id = apic_id & ~bit_mask(x86_processor.cache.l1i.apic_bits); ++ processors[i].cache.l1i = &l1i[l1i_index]; ++@@ -549,30 +546,34 @@ BOOL CALLBACK cpuinfo_x86_windows_init(PINIT_ONCE init_once, PVOID parameter, PV ++ ++ ++ /* Commit changes */ +++ cpuinfo_processors = processors; +++ cpuinfo_cores = cores; +++ cpuinfo_clusters = clusters; +++ cpuinfo_packages = packages; ++ cpuinfo_cache[cpuinfo_cache_level_1i] = l1i; ++ cpuinfo_cache[cpuinfo_cache_level_1d] = l1d; ++ cpuinfo_cache[cpuinfo_cache_level_2] = l2; ++ cpuinfo_cache[cpuinfo_cache_level_3] = l3; ++ cpuinfo_cache[cpuinfo_cache_level_4] = l4; ++ ++- cpuinfo_processors = processors; ++- cpuinfo_cores = cores; ++- cpuinfo_clusters = clusters; ++- cpuinfo_packages = packages; ++- +++ cpuinfo_processors_count = processors_count; +++ cpuinfo_cores_count = cores_count; +++ cpuinfo_clusters_count = packages_count; +++ cpuinfo_packages_count = packages_count; ++ cpuinfo_cache_count[cpuinfo_cache_level_1i] = l1i_count; ++ cpuinfo_cache_count[cpuinfo_cache_level_1d] = l1d_count; ++ cpuinfo_cache_count[cpuinfo_cache_level_2] = l2_count; ++ cpuinfo_cache_count[cpuinfo_cache_level_3] = l3_count; ++ cpuinfo_cache_count[cpuinfo_cache_level_4] = l4_count; ++- ++- cpuinfo_processors_count = processors_count; ++- cpuinfo_cores_count = cores_count; ++- cpuinfo_clusters_count = packages_count; ++- cpuinfo_packages_count = packages_count; ++- ++ cpuinfo_max_cache_size = cpuinfo_compute_max_cache_size(&processors[0]); ++ +++ cpuinfo_global_uarch = (struct cpuinfo_uarch_info) { +++ .uarch = x86_processor.uarch, +++ .cpuid = x86_processor.cpuid, +++ .processor_count = processors_count, +++ .core_count = cores_count, +++ }; +++ ++ MemoryBarrier(); ++ ++ cpuinfo_is_initialized = true; ++diff --git a/test/arm-cache.cc b/test/arm-cache.cc ++index 8373f7c..7d2e4a4 100644 ++--- a/test/arm-cache.cc +++++ b/test/arm-cache.cc ++@@ -766,7 +766,7 @@ TEST(QUALCOMM, snapdragon_845) { ++ struct cpuinfo_cache little_l2 = { 0 }; ++ struct cpuinfo_cache little_l3 = { 0 }; ++ cpuinfo_arm_decode_cache( ++- cpuinfo_uarch_cortex_a55, 4, UINT32_C(0x518F803C), +++ cpuinfo_uarch_cortex_a55r0, 4, UINT32_C(0x518F803C), ++ &chipset, 1, 8, ++ &little_l1i, &little_l1d, &little_l2, &little_l3); ++ ++@@ -910,7 +910,7 @@ TEST(SAMSUNG, exynos_9810) { ++ struct cpuinfo_cache little_l2 = { 0 }; ++ struct cpuinfo_cache little_l3 = { 0 }; ++ cpuinfo_arm_decode_cache( ++- cpuinfo_uarch_cortex_a55, 4, UINT32_C(0x410FD051), +++ cpuinfo_uarch_cortex_a55r0, 4, UINT32_C(0x410FD051), ++ &chipset, 1, 8, ++ &little_l1i, &little_l1d, &little_l2, &little_l3); ++ ++diff --git a/test/get-current.cc b/test/get-current.cc ++index 4a80cab..f410b12 100644 ++--- a/test/get-current.cc +++++ b/test/get-current.cc ++@@ -3,34 +3,36 @@ ++ #include ++ ++ ++-TEST(CURRENT_PROCESSOR, not_null) { ++- ASSERT_TRUE(cpuinfo_initialize()); ++- ++- ASSERT_TRUE(cpuinfo_get_current_processor()); ++-} ++- ++ TEST(CURRENT_PROCESSOR, within_bounds) { ++ ASSERT_TRUE(cpuinfo_initialize()); ++ ++ const struct cpuinfo_processor* current_processor = cpuinfo_get_current_processor(); +++ if (current_processor == nullptr) { +++ GTEST_SKIP(); +++ } +++ ++ const struct cpuinfo_processor* processors_begin = cpuinfo_get_processors(); ++ const struct cpuinfo_processor* processors_end = processors_begin + cpuinfo_get_processors_count(); ++ ASSERT_GE(current_processor, processors_begin); ++ ASSERT_LT(current_processor, processors_end); ++ } ++ ++-TEST(CURRENT_CORE, not_null) { ++- ASSERT_TRUE(cpuinfo_initialize()); ++- ++- ASSERT_TRUE(cpuinfo_get_current_core()); ++-} ++- ++ TEST(CURRENT_CORE, within_bounds) { ++ ASSERT_TRUE(cpuinfo_initialize()); ++ ++ const struct cpuinfo_core* current_core = cpuinfo_get_current_core(); +++ if (current_core == nullptr) { +++ GTEST_SKIP(); +++ } +++ ++ const struct cpuinfo_core* cores_begin = cpuinfo_get_cores(); ++ const struct cpuinfo_core* cores_end = cores_begin + cpuinfo_get_cores_count(); ++ ASSERT_GE(current_core, cores_begin); ++ ASSERT_LT(current_core, cores_end); ++ } +++ +++TEST(CURRENT_UARCH_INDEX, within_bounds) { +++ ASSERT_TRUE(cpuinfo_initialize()); +++ +++ ASSERT_LT(cpuinfo_get_current_uarch_index(), cpuinfo_get_uarchs_count()); +++} ++diff --git a/test/init.cc b/test/init.cc ++index 941cb97..718eb96 100644 ++--- a/test/init.cc +++++ b/test/init.cc ++@@ -678,6 +678,72 @@ TEST(PACKAGE, consistent_cluster) { ++ cpuinfo_deinitialize(); ++ } ++ +++TEST(UARCHS_COUNT, within_bounds) { +++ ASSERT_TRUE(cpuinfo_initialize()); +++ EXPECT_NE(0, cpuinfo_get_uarchs_count()); +++ EXPECT_LE(cpuinfo_get_packages_count(), cpuinfo_get_cores_count()); +++ EXPECT_LE(cpuinfo_get_packages_count(), cpuinfo_get_processors_count()); +++ cpuinfo_deinitialize(); +++} +++ +++TEST(UARCHS, non_null) { +++ ASSERT_TRUE(cpuinfo_initialize()); +++ EXPECT_TRUE(cpuinfo_get_uarchs()); +++ cpuinfo_deinitialize(); +++} +++ +++TEST(UARCH, non_null) { +++ ASSERT_TRUE(cpuinfo_initialize()); +++ for (uint32_t i = 0; i < cpuinfo_get_uarchs_count(); i++) { +++ EXPECT_TRUE(cpuinfo_get_uarch(i)); +++ } +++ cpuinfo_deinitialize(); +++} +++ +++TEST(UARCH, non_zero_processors) { +++ ASSERT_TRUE(cpuinfo_initialize()); +++ for (uint32_t i = 0; i < cpuinfo_get_uarchs_count(); i++) { +++ const cpuinfo_uarch_info* uarch = cpuinfo_get_uarch(i); +++ ASSERT_TRUE(uarch); +++ +++ EXPECT_NE(0, uarch->processor_count); +++ } +++ cpuinfo_deinitialize(); +++} +++ +++TEST(UARCH, valid_processors) { +++ ASSERT_TRUE(cpuinfo_initialize()); +++ for (uint32_t i = 0; i < cpuinfo_get_uarchs_count(); i++) { +++ const cpuinfo_uarch_info* uarch = cpuinfo_get_uarch(i); +++ ASSERT_TRUE(uarch); +++ +++ EXPECT_LE(uarch->processor_count, cpuinfo_get_processors_count()); +++ } +++ cpuinfo_deinitialize(); +++} +++ +++TEST(UARCH, non_zero_cores) { +++ ASSERT_TRUE(cpuinfo_initialize()); +++ for (uint32_t i = 0; i < cpuinfo_get_uarchs_count(); i++) { +++ const cpuinfo_uarch_info* uarch = cpuinfo_get_uarch(i); +++ ASSERT_TRUE(uarch); +++ +++ EXPECT_NE(0, uarch->core_count); +++ } +++ cpuinfo_deinitialize(); +++} +++ +++TEST(UARCH, valid_cores) { +++ ASSERT_TRUE(cpuinfo_initialize()); +++ for (uint32_t i = 0; i < cpuinfo_get_uarchs_count(); i++) { +++ const cpuinfo_uarch_info* uarch = cpuinfo_get_uarch(i); +++ ASSERT_TRUE(uarch); +++ +++ EXPECT_LE(uarch->core_count, cpuinfo_get_cores_count()); +++ } +++ cpuinfo_deinitialize(); +++} +++ ++ TEST(L1I_CACHES_COUNT, within_bounds) { ++ ASSERT_TRUE(cpuinfo_initialize()); ++ EXPECT_NE(0, cpuinfo_get_l1i_caches_count()); ++diff --git a/test/mock/galaxy-s9-global.cc b/test/mock/galaxy-s9-global.cc ++index 7a67129..6c72513 100644 ++--- a/test/mock/galaxy-s9-global.cc +++++ b/test/mock/galaxy-s9-global.cc ++@@ -207,7 +207,7 @@ TEST(CORES, uarch) { ++ case 5: ++ case 6: ++ case 7: ++- ASSERT_EQ(cpuinfo_uarch_cortex_a55, cpuinfo_get_core(i)->uarch); +++ ASSERT_EQ(cpuinfo_uarch_cortex_a55r0, cpuinfo_get_core(i)->uarch); ++ break; ++ } ++ } ++@@ -329,7 +329,7 @@ TEST(CLUSTERS, uarch) { ++ ASSERT_EQ(cpuinfo_uarch_exynos_m3, cpuinfo_get_cluster(i)->uarch); ++ break; ++ case 1: ++- ASSERT_EQ(cpuinfo_uarch_cortex_a55, cpuinfo_get_cluster(i)->uarch); +++ ASSERT_EQ(cpuinfo_uarch_cortex_a55r0, cpuinfo_get_cluster(i)->uarch); ++ break; ++ } ++ } ++diff --git a/test/mock/galaxy-s9-us.cc b/test/mock/galaxy-s9-us.cc ++index 6df7f3c..ceea969 100644 ++--- a/test/mock/galaxy-s9-us.cc +++++ b/test/mock/galaxy-s9-us.cc ++@@ -168,7 +168,7 @@ TEST(CORES, uarch) { ++ case 5: ++ case 6: ++ case 7: ++- ASSERT_EQ(cpuinfo_uarch_cortex_a55, cpuinfo_get_core(i)->uarch); +++ ASSERT_EQ(cpuinfo_uarch_cortex_a55r0, cpuinfo_get_core(i)->uarch); ++ break; ++ } ++ } ++@@ -283,7 +283,7 @@ TEST(CLUSTERS, uarch) { ++ ASSERT_EQ(cpuinfo_uarch_cortex_a75, cpuinfo_get_cluster(i)->uarch); ++ break; ++ case 1: ++- ASSERT_EQ(cpuinfo_uarch_cortex_a55, cpuinfo_get_cluster(i)->uarch); +++ ASSERT_EQ(cpuinfo_uarch_cortex_a55r0, cpuinfo_get_cluster(i)->uarch); ++ break; ++ } ++ } ++@@ -817,4 +817,4 @@ int main(int argc, char* argv[]) { ++ cpuinfo_initialize(); ++ ::testing::InitGoogleTest(&argc, argv); ++ return RUN_ALL_TESTS(); ++-} ++\ No newline at end of file +++} ++diff --git a/tools/cpu-info.c b/tools/cpu-info.c ++index 7fa5187..7963c00 100644 ++--- a/tools/cpu-info.c +++++ b/tools/cpu-info.c ++@@ -14,6 +14,8 @@ static const char* vendor_to_string(enum cpuinfo_vendor vendor) { ++ return "Intel"; ++ case cpuinfo_vendor_amd: ++ return "AMD"; +++ case cpuinfo_vendor_hygon: +++ return "Hygon"; ++ case cpuinfo_vendor_arm: ++ return "ARM"; ++ case cpuinfo_vendor_qualcomm: ++@@ -161,6 +163,8 @@ static const char* uarch_to_string(enum cpuinfo_uarch uarch) { ++ return "Cortex-A35"; ++ case cpuinfo_uarch_cortex_a53: ++ return "Cortex-A53"; +++ case cpuinfo_uarch_cortex_a55r0: +++ return "Cortex-A55r0"; ++ case cpuinfo_uarch_cortex_a55: ++ return "Cortex-A55"; ++ case cpuinfo_uarch_cortex_a57: ++@@ -223,6 +227,10 @@ static const char* uarch_to_string(enum cpuinfo_uarch uarch) { ++ return "Vortex"; ++ case cpuinfo_uarch_tempest: ++ return "Tempest"; +++ case cpuinfo_uarch_lightning: +++ return "Lightning"; +++ case cpuinfo_uarch_thunder: +++ return "Thunder"; ++ case cpuinfo_uarch_thunderx: ++ return "ThunderX"; ++ case cpuinfo_uarch_thunderx2: ++@@ -253,6 +261,17 @@ int main(int argc, char** argv) { ++ printf("\t%"PRIu32": %s\n", i, cpuinfo_get_package(i)->name); ++ } ++ #endif +++ printf("Microarchitectures:\n"); +++ for (uint32_t i = 0; i < cpuinfo_get_uarchs_count(); i++) { +++ const struct cpuinfo_uarch_info* uarch_info = cpuinfo_get_uarch(i); +++ const char* uarch_string = uarch_to_string(uarch_info->uarch); +++ if (uarch_string == NULL) { +++ printf("\t%"PRIu32"x Unknown (0x%08"PRIx32"\n", +++ uarch_info->core_count, (uint32_t) uarch_info->uarch); +++ } else { +++ printf("\t%"PRIu32"x %s\n", uarch_info->core_count, uarch_string); +++ } +++ } ++ printf("Cores:\n"); ++ for (uint32_t i = 0; i < cpuinfo_get_cores_count(); i++) { ++ const struct cpuinfo_core* core = cpuinfo_get_core(i); ++@@ -277,17 +296,17 @@ int main(int argc, char** argv) { ++ } ++ } ++ printf("Logical processors"); ++- #if defined(__linux__) ++- printf(" (System ID)"); ++- #endif ++- printf(":\n"); +++ #if defined(__linux__) +++ printf(" (System ID)"); +++ #endif +++ printf(":\n"); ++ for (uint32_t i = 0; i < cpuinfo_get_processors_count(); i++) { ++ const struct cpuinfo_processor* processor = cpuinfo_get_processor(i); ++- printf("\t%"PRIu32"", i); +++ printf("\t%"PRIu32"", i); ++ ++- #if defined(__linux__) ++- printf(" (%"PRId32")", processor->linux_id); ++- #endif +++ #if defined(__linux__) +++ printf(" (%"PRId32")", processor->linux_id); +++ #endif ++ ++ #if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 ++ printf(": APIC ID 0x%08"PRIx32"\n", processor->apic_id); +diff --git a/third_party/cpuinfo/workspace.bzl b/third_party/cpuinfo/workspace.bzl +index c2eeede8a0..77aecf5a9a 100644 +--- a/third_party/cpuinfo/workspace.bzl ++++ b/third_party/cpuinfo/workspace.bzl +@@ -2,14 +2,20 @@ + + load("//third_party:repo.bzl", "third_party_http_archive") + ++# Sanitize a dependency so that it works correctly from code that includes ++# TensorFlow as a submodule. ++def clean_dep(dep): ++ return str(Label(dep)) ++ + def repo(): + third_party_http_archive( + name = "cpuinfo", +- strip_prefix = "cpuinfo-e39a5790059b6b8274ed91f7b5b5b13641dff267", +- sha256 = "e5caa8b7c58f1623eed88f4d5147e3753ff19cde821526bc9aa551b004f751fe", ++ strip_prefix = "cpuinfo-d6c0f915ee737f961915c9d17f1679b6777af207", ++ sha256 = "146fc61c3cf63d7d88db963876929a4d373f621fb65568b895efa0857f467770", + urls = [ +- "https://storage.googleapis.com/mirror.tensorflow.org/github.com/pytorch/cpuinfo/archive/e39a5790059b6b8274ed91f7b5b5b13641dff267.tar.gz", +- "https://github.com/pytorch/cpuinfo/archive/e39a5790059b6b8274ed91f7b5b5b13641dff267.tar.gz", ++ "https://storage.googleapis.com/mirror.tensorflow.org/github.com/pytorch/cpuinfo/archive/d6c0f915ee737f961915c9d17f1679b6777af207.tar.gz", ++ "https://github.com/pytorch/cpuinfo/archive/d6c0f915ee737f961915c9d17f1679b6777af207.tar.gz", + ], + build_file = "//third_party/cpuinfo:BUILD.bazel", ++ patch_file = clean_dep("//third_party/cpuinfo:cpuinfo.patch"), + )