diff --git a/.bazelrc b/.bazelrc index 7817ca081..4ffb73bd4 100644 --- a/.bazelrc +++ b/.bazelrc @@ -3,7 +3,7 @@ # Basic build settings build --jobs 128 build --define='absl=1' -build --cxxopt='-std=c++11' +build --cxxopt='-std=c++14' build --copt='-Wno-sign-compare' build --copt='-Wno-unused-function' build --copt='-Wno-uninitialized' diff --git a/README.md b/README.md index 97d727466..e7a9c10d9 100644 --- a/README.md +++ b/README.md @@ -37,10 +37,18 @@ A web-based visualizer is hosted on [viz.mediapipe.dev](https://viz.mediapipe.de * [Discuss](https://groups.google.com/forum/#!forum/mediapipe) - General community discussion around MediaPipe ## Publications +* [On-Device, Real-Time Hand Tracking with MediaPipe](https://ai.googleblog.com/2019/08/on-device-real-time-hand-tracking-with.html) * [MediaPipe: A Framework for Building Perception Pipelines](https://arxiv.org/abs/1906.08172) ## Events -[Open sourced at CVPR 2019](https://sites.google.com/corp/view/perception-cv4arvr/mediapipe) on June 17~20 in Long Beach, CA +* [MediaPipe Madrid Meetup, 16 Dec 2019](https://www.meetup.com/Madrid-AI-Developers-Group/events/266329088/) +* [MediaPipe London Meetup, Google 123 Building, 12 Dec 2019](https://www.meetup.com/London-AI-Tech-Talk/events/266329038) +* [ML Conference, Berlin, 11 Dec 2019](https://mlconference.ai/machine-learning-advanced-development/mediapipe-building-real-time-cross-platform-mobile-web-edge-desktop-video-audio-ml-pipelines/) +* [MediaPipe Berlin Meetup, Google Berlin, 11 Dec 2019](https://www.meetup.com/Berlin-AI-Tech-Talk/events/266328794/) +* [The 3rd Workshop on YouTube-8M Large Scale Video Understanding Workshop](https://research.google.com/youtube8m/workshop2019/index.html) Seoul, Korea ICCV 2019 +* [AI DevWorld 2019](https://aidevworld.com) on Oct 10 in San Jose, California +* [Google Industry Workshop at ICIP 2019](http://2019.ieeeicip.org/?action=page4&id=14#Google) [Presentation](https://docs.google.com/presentation/d/e/2PACX-1vRIBBbO_LO9v2YmvbHHEt1cwyqH6EjDxiILjuT0foXy1E7g6uyh4CesB2DkkEwlRDO9_lWfuKMZx98T/pub?start=false&loop=false&delayms=3000&slide=id.g556cc1a659_0_5) on Sept 24 in Taipei, Taiwan +* [Open sourced at CVPR 2019](https://sites.google.com/corp/view/perception-cv4arvr/mediapipe) on June 17~20 in Long Beach, CA ## Alpha Disclaimer MediaPipe is currently in alpha for v0.6. We are still making breaking API changes and expect to get to stable API by v1.0. diff --git a/WORKSPACE b/WORKSPACE index 0aee35c67..ca87c83c9 100644 --- a/WORKSPACE +++ b/WORKSPACE @@ -103,9 +103,9 @@ http_archive( ], ) -# 2019-08-15 -_TENSORFLOW_GIT_COMMIT = "67def62936e28f97c16182dfcc467d8d1cae02b4" -_TENSORFLOW_SHA256= "ddd4e3c056e7c0ff2ef29133b30fa62781dfbf8a903e99efb91a02d292fa9562" +# 2019-11-12 +_TENSORFLOW_GIT_COMMIT = "a5f9bcd64453ff3d1f64cb4da4786db3d2da7f82" +_TENSORFLOW_SHA256= "f2b6f2ab2ffe63e86eccd3ce4bea6b7197383d726638dfeeebcdc1e7de73f075" http_archive( name = "org_tensorflow", urls = [ @@ -114,13 +114,6 @@ http_archive( ], strip_prefix = "tensorflow-%s" % _TENSORFLOW_GIT_COMMIT, sha256 = _TENSORFLOW_SHA256, - patches = [ - "@//third_party:tensorflow_065c20bf79253257c87bd4614bb9a7fdef015cbb.diff", - "@//third_party:tensorflow_f67fcbefce906cd419e4657f0d41e21019b71abd.diff", - ], - patch_args = [ - "-p1", - ], ) load("@org_tensorflow//tensorflow:workspace.bzl", "tf_workspace") @@ -254,18 +247,11 @@ android_sdk_repository( # iOS basic build deps. -load("@bazel_tools//tools/build_defs/repo:git.bzl", "git_repository") - -git_repository( +http_archive( name = "build_bazel_rules_apple", - remote = "https://github.com/bazelbuild/rules_apple.git", - tag = "0.18.0", - patches = [ - "@//third_party:rules_apple_c0863d0596ae6b769a29fa3fb72ff036444fd249.diff", - ], - patch_args = [ - "-p1", - ], + sha256 = "bdc8e66e70b8a75da23b79f1f8c6207356df07d041d96d2189add7ee0780cf4e", + strip_prefix = "rules_apple-b869b0d3868d78a1d4ffd866ccb304fb68aa12c3", + url = "https://github.com/bazelbuild/rules_apple/archive/b869b0d3868d78a1d4ffd866ccb304fb68aa12c3.tar.gz", ) load( diff --git a/mediapipe/calculators/audio/spectrogram_calculator.cc b/mediapipe/calculators/audio/spectrogram_calculator.cc index 5f7f20c06..56a6338f9 100644 --- a/mediapipe/calculators/audio/spectrogram_calculator.cc +++ b/mediapipe/calculators/audio/spectrogram_calculator.cc @@ -113,8 +113,15 @@ class SpectrogramCalculator : public CalculatorBase { ::mediapipe::Status Close(CalculatorContext* cc) override; private: - Timestamp CurrentOutputTimestamp() { - // Current output timestamp is the *center* of the next frame to be + Timestamp CurrentOutputTimestamp(CalculatorContext* cc) { + if (use_local_timestamp_) { + return cc->InputTimestamp(); + } + return CumulativeOutputTimestamp(); + } + + Timestamp CumulativeOutputTimestamp() { + // Cumulative output timestamp is the *center* of the next frame to be // emitted, hence delayed by half a window duration compared to relevant // input timestamp. return initial_input_timestamp_ + @@ -141,6 +148,7 @@ class SpectrogramCalculator : public CalculatorBase { const OutputMatrixType postprocess_output_fn(const OutputMatrixType&), CalculatorContext* cc); + bool use_local_timestamp_; double input_sample_rate_; bool pad_final_packet_; int frame_duration_samples_; @@ -173,6 +181,8 @@ const float SpectrogramCalculator::kLnPowerToDb = 4.342944819032518; SpectrogramCalculatorOptions spectrogram_options = cc->Options(); + use_local_timestamp_ = spectrogram_options.use_local_timestamp(); + if (spectrogram_options.frame_duration_seconds() <= 0.0) { ::mediapipe::InvalidArgumentErrorBuilder(MEDIAPIPE_LOC) << "Invalid or missing frame_duration_seconds.\n" @@ -351,11 +361,11 @@ template << "Inconsistent number of spectrogram channels."; if (allow_multichannel_input_) { cc->Outputs().Index(0).Add(spectrogram_matrices.release(), - CurrentOutputTimestamp()); + CurrentOutputTimestamp(cc)); } else { cc->Outputs().Index(0).Add( new OutputMatrixType(spectrogram_matrices->at(0)), - CurrentOutputTimestamp()); + CurrentOutputTimestamp(cc)); } cumulative_completed_frames_ += output_vectors.size(); } diff --git a/mediapipe/calculators/audio/spectrogram_calculator.proto b/mediapipe/calculators/audio/spectrogram_calculator.proto index af3ad9d19..b721117d4 100644 --- a/mediapipe/calculators/audio/spectrogram_calculator.proto +++ b/mediapipe/calculators/audio/spectrogram_calculator.proto @@ -66,4 +66,11 @@ message SpectrogramCalculatorOptions { // uniformly regardless of output type (i.e., even dBs are multiplied, not // offset). optional double output_scale = 7 [default = 1.0]; + + // If use_local_timestamp is true, the output packet's timestamp is based on + // the last sample of the packet and it's inferred from the latest input + // packet's timestamp. If false, the output packet's timestamp is based on + // the cumulative timestamping, which is inferred from the intial input + // timestamp and the cumulative number of samples. + optional bool use_local_timestamp = 8 [default = false]; } diff --git a/mediapipe/calculators/core/BUILD b/mediapipe/calculators/core/BUILD index 80205f90e..86cb28522 100644 --- a/mediapipe/calculators/core/BUILD +++ b/mediapipe/calculators/core/BUILD @@ -13,12 +13,12 @@ # limitations under the License. # +load("//mediapipe/framework/port:build_config.bzl", "mediapipe_cc_proto_library") + licenses(["notice"]) # Apache 2.0 package(default_visibility = ["//visibility:private"]) -load("//mediapipe/framework/port:build_config.bzl", "mediapipe_cc_proto_library") - proto_library( name = "concatenate_vector_calculator_proto", srcs = ["concatenate_vector_calculator.proto"], @@ -26,6 +26,13 @@ proto_library( deps = ["//mediapipe/framework:calculator_proto"], ) +proto_library( + name = "dequantize_byte_array_calculator_proto", + srcs = ["dequantize_byte_array_calculator.proto"], + visibility = ["//visibility:public"], + deps = ["//mediapipe/framework:calculator_proto"], +) + proto_library( name = "packet_cloner_calculator_proto", srcs = ["packet_cloner_calculator.proto"], @@ -72,6 +79,13 @@ proto_library( ], ) +proto_library( + name = "clip_vector_size_calculator_proto", + srcs = ["clip_vector_size_calculator.proto"], + visibility = ["//visibility:public"], + deps = ["//mediapipe/framework:calculator_proto"], +) + mediapipe_cc_proto_library( name = "packet_cloner_calculator_cc_proto", srcs = ["packet_cloner_calculator.proto"], @@ -104,6 +118,22 @@ mediapipe_cc_proto_library( deps = [":concatenate_vector_calculator_proto"], ) +mediapipe_cc_proto_library( + name = "clip_vector_size_calculator_cc_proto", + srcs = ["clip_vector_size_calculator.proto"], + cc_deps = ["//mediapipe/framework:calculator_cc_proto"], + visibility = ["//visibility:public"], + deps = [":clip_vector_size_calculator_proto"], +) + +mediapipe_cc_proto_library( + name = "dequantize_byte_array_calculator_cc_proto", + srcs = ["dequantize_byte_array_calculator.proto"], + cc_deps = ["//mediapipe/framework:calculator_cc_proto"], + visibility = ["//visibility:public"], + deps = [":dequantize_byte_array_calculator_proto"], +) + mediapipe_cc_proto_library( name = "quantize_float_vector_calculator_cc_proto", srcs = ["quantize_float_vector_calculator.proto"], @@ -154,6 +184,66 @@ cc_test( ], ) +cc_library( + name = "begin_loop_calculator", + srcs = ["begin_loop_calculator.cc"], + hdrs = ["begin_loop_calculator.h"], + visibility = ["//visibility:public"], + deps = [ + "//mediapipe/framework:calculator_context", + "//mediapipe/framework:calculator_contract", + "//mediapipe/framework:calculator_framework", + "//mediapipe/framework:collection_item_id", + "//mediapipe/framework:packet", + "//mediapipe/framework/formats:landmark_cc_proto", + "//mediapipe/framework/formats:rect_cc_proto", + "//mediapipe/framework/port:integral_types", + "//mediapipe/framework/port:ret_check", + "//mediapipe/framework/port:status", + "@com_google_absl//absl/memory", + ], + alwayslink = 1, +) + +cc_library( + name = "end_loop_calculator", + srcs = ["end_loop_calculator.cc"], + hdrs = ["end_loop_calculator.h"], + visibility = ["//visibility:public"], + deps = [ + "//mediapipe/framework:calculator_context", + "//mediapipe/framework:calculator_contract", + "//mediapipe/framework:calculator_framework", + "//mediapipe/framework:collection_item_id", + "//mediapipe/framework:packet", + "//mediapipe/framework/formats:landmark_cc_proto", + "//mediapipe/framework/formats:rect_cc_proto", + "//mediapipe/framework/port:integral_types", + "//mediapipe/framework/port:ret_check", + "//mediapipe/framework/port:status", + "//mediapipe/util:render_data_cc_proto", + ], + alwayslink = 1, +) + +cc_test( + name = "begin_end_loop_calculator_graph_test", + srcs = ["begin_end_loop_calculator_graph_test.cc"], + deps = [ + ":begin_loop_calculator", + ":end_loop_calculator", + "//mediapipe/calculators/core:packet_cloner_calculator", + "//mediapipe/framework:calculator_context", + "//mediapipe/framework:calculator_contract", + "//mediapipe/framework:calculator_framework", + "//mediapipe/framework/port:gtest_main", + "//mediapipe/framework/port:integral_types", + "//mediapipe/framework/port:parse_text_proto", + "//mediapipe/framework/port:status", + "@com_google_absl//absl/memory", + ], +) + cc_library( name = "concatenate_vector_calculator", srcs = ["concatenate_vector_calculator.cc"], @@ -204,6 +294,50 @@ cc_test( ], ) +cc_library( + name = "clip_vector_size_calculator", + srcs = ["clip_vector_size_calculator.cc"], + hdrs = ["clip_vector_size_calculator.h"], + visibility = ["//visibility:public"], + deps = [ + ":clip_vector_size_calculator_cc_proto", + "//mediapipe/framework:calculator_framework", + "//mediapipe/framework/formats:rect_cc_proto", + "//mediapipe/framework/port:ret_check", + "//mediapipe/framework/port:status", + "@org_tensorflow//tensorflow/lite:framework", + ], + alwayslink = 1, +) + +cc_library( + name = "clip_detection_vector_size_calculator", + srcs = ["clip_detection_vector_size_calculator.cc"], + visibility = ["//visibility:public"], + deps = [ + ":clip_vector_size_calculator", + "//mediapipe/framework:calculator_framework", + "//mediapipe/framework/formats:detection_cc_proto", + ], + alwayslink = 1, +) + +cc_test( + name = "clip_vector_size_calculator_test", + srcs = ["clip_vector_size_calculator_test.cc"], + deps = [ + ":clip_vector_size_calculator", + "//mediapipe/calculators/core:packet_resampler_calculator_cc_proto", + "//mediapipe/framework:calculator_framework", + "//mediapipe/framework:calculator_runner", + "//mediapipe/framework:timestamp", + "//mediapipe/framework/port:gtest_main", + "//mediapipe/framework/port:parse_text_proto", + "//mediapipe/framework/port:status", + "@com_google_absl//absl/strings", + ], +) + cc_library( name = "counting_source_calculator", srcs = ["counting_source_calculator.cc"], @@ -285,7 +419,7 @@ cc_library( "//visibility:public", ], deps = [ - "//mediapipe/calculators/core:packet_cloner_calculator_cc_proto", + ":packet_cloner_calculator_cc_proto", "//mediapipe/framework:calculator_framework", "@com_google_absl//absl/strings", ], @@ -387,6 +521,32 @@ cc_library( alwayslink = 1, ) +cc_library( + name = "string_to_int_calculator", + srcs = ["string_to_int_calculator.cc"], + visibility = ["//visibility:public"], + deps = [ + "//mediapipe/framework:calculator_framework", + "//mediapipe/framework/port:integral_types", + "//mediapipe/framework/port:status", + "@com_google_absl//absl/strings", + ], + alwayslink = 1, +) + +cc_library( + name = "side_packet_to_stream_calculator", + srcs = ["side_packet_to_stream_calculator.cc"], + visibility = ["//visibility:public"], + deps = [ + "//mediapipe/framework:calculator_framework", + "//mediapipe/framework/port:logging", + "//mediapipe/framework/port:ret_check", + "//mediapipe/framework/port:status", + ], + alwayslink = 1, +) + cc_test( name = "immediate_mux_calculator_test", srcs = ["immediate_mux_calculator_test.cc"], @@ -558,6 +718,32 @@ cc_test( ], ) +cc_library( + name = "dequantize_byte_array_calculator", + srcs = ["dequantize_byte_array_calculator.cc"], + visibility = ["//visibility:public"], + deps = [ + ":dequantize_byte_array_calculator_cc_proto", + "//mediapipe/framework:calculator_context", + "//mediapipe/framework:calculator_framework", + "//mediapipe/framework/port:status", + ], + alwayslink = 1, +) + +cc_test( + name = "dequantize_byte_array_calculator_test", + srcs = ["dequantize_byte_array_calculator_test.cc"], + deps = [ + ":dequantize_byte_array_calculator", + "//mediapipe/framework:calculator_framework", + "//mediapipe/framework:calculator_runner", + "//mediapipe/framework/port:gtest_main", + "//mediapipe/framework/port:parse_text_proto", + "//mediapipe/framework/port:status", + ], +) + cc_library( name = "quantize_float_vector_calculator", srcs = ["quantize_float_vector_calculator.cc"], @@ -694,3 +880,29 @@ cc_test( "//mediapipe/framework/port:status", ], ) + +cc_library( + name = "stream_to_side_packet_calculator", + srcs = ["stream_to_side_packet_calculator.cc"], + visibility = ["//visibility:public"], + deps = [ + "//mediapipe/framework:calculator_framework", + "//mediapipe/framework:timestamp", + "//mediapipe/framework/port:status", + ], + alwayslink = 1, +) + +cc_test( + name = "stream_to_side_packet_calculator_test", + srcs = ["stream_to_side_packet_calculator_test.cc"], + deps = [ + ":stream_to_side_packet_calculator", + "//mediapipe/framework:calculator_runner", + "//mediapipe/framework:packet", + "//mediapipe/framework:timestamp", + "//mediapipe/framework/port:gtest_main", + "//mediapipe/framework/port:status", + "@com_google_absl//absl/memory", + ], +) diff --git a/mediapipe/calculators/core/begin_end_loop_calculator_graph_test.cc b/mediapipe/calculators/core/begin_end_loop_calculator_graph_test.cc new file mode 100644 index 000000000..03e1a4439 --- /dev/null +++ b/mediapipe/calculators/core/begin_end_loop_calculator_graph_test.cc @@ -0,0 +1,335 @@ +// Copyright 2019 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include + +#include "absl/memory/memory.h" +#include "mediapipe/calculators/core/begin_loop_calculator.h" +#include "mediapipe/calculators/core/end_loop_calculator.h" +#include "mediapipe/framework/calculator_contract.h" +#include "mediapipe/framework/calculator_framework.h" +#include "mediapipe/framework/port/gtest.h" +#include "mediapipe/framework/port/integral_types.h" +#include "mediapipe/framework/port/parse_text_proto.h" +#include "mediapipe/framework/port/status_matchers.h" // NOLINT + +namespace mediapipe { +namespace { + +typedef BeginLoopCalculator> BeginLoopIntegerCalculator; +REGISTER_CALCULATOR(BeginLoopIntegerCalculator); + +class IncrementCalculator : public CalculatorBase { + public: + static ::mediapipe::Status GetContract(CalculatorContract* cc) { + cc->Inputs().Index(0).Set(); + cc->Outputs().Index(0).Set(); + return ::mediapipe::OkStatus(); + } + + ::mediapipe::Status Open(CalculatorContext* cc) override { + cc->SetOffset(TimestampDiff(0)); + return ::mediapipe::OkStatus(); + } + + ::mediapipe::Status Process(CalculatorContext* cc) override { + const int& input_int = cc->Inputs().Index(0).Get(); + auto output_int = absl::make_unique(input_int + 1); + cc->Outputs().Index(0).Add(output_int.release(), cc->InputTimestamp()); + return ::mediapipe::OkStatus(); + } +}; + +REGISTER_CALCULATOR(IncrementCalculator); + +typedef EndLoopCalculator> EndLoopIntegersCalculator; +REGISTER_CALCULATOR(EndLoopIntegersCalculator); + +class BeginEndLoopCalculatorGraphTest : public ::testing::Test { + protected: + BeginEndLoopCalculatorGraphTest() { + graph_config_ = ParseTextProtoOrDie( + R"( + num_threads: 4 + input_stream: "ints" + node { + calculator: "BeginLoopIntegerCalculator" + input_stream: "ITERABLE:ints" + output_stream: "ITEM:int" + output_stream: "BATCH_END:timestamp" + } + node { + calculator: "IncrementCalculator" + input_stream: "int" + output_stream: "int_plus_one" + } + node { + calculator: "EndLoopIntegersCalculator" + input_stream: "ITEM:int_plus_one" + input_stream: "BATCH_END:timestamp" + output_stream: "ITERABLE:ints_plus_one" + } + )"); + tool::AddVectorSink("ints_plus_one", &graph_config_, &output_packets_); + } + + CalculatorGraphConfig graph_config_; + std::vector output_packets_; +}; + +TEST_F(BeginEndLoopCalculatorGraphTest, SingleEmptyVector) { + CalculatorGraph graph; + MP_EXPECT_OK(graph.Initialize(graph_config_)); + MP_EXPECT_OK(graph.StartRun({})); + auto input_vector = absl::make_unique>(); + Timestamp input_timestamp = Timestamp(0); + MP_ASSERT_OK(graph.AddPacketToInputStream( + "ints", Adopt(input_vector.release()).At(input_timestamp))); + MP_ASSERT_OK(graph.WaitUntilIdle()); + + // EndLoopCalc will forward the timestamp bound because there are no elements + // in collection to output. + ASSERT_EQ(0, output_packets_.size()); + + MP_ASSERT_OK(graph.CloseAllPacketSources()); + MP_ASSERT_OK(graph.WaitUntilDone()); +} + +TEST_F(BeginEndLoopCalculatorGraphTest, SingleNonEmptyVector) { + CalculatorGraph graph; + MP_EXPECT_OK(graph.Initialize(graph_config_)); + MP_EXPECT_OK(graph.StartRun({})); + auto input_vector = absl::make_unique>(); + input_vector->emplace_back(0); + input_vector->emplace_back(1); + input_vector->emplace_back(2); + Timestamp input_timestamp = Timestamp(0); + MP_ASSERT_OK(graph.AddPacketToInputStream( + "ints", Adopt(input_vector.release()).At(input_timestamp))); + MP_ASSERT_OK(graph.WaitUntilIdle()); + + ASSERT_EQ(1, output_packets_.size()); + EXPECT_EQ(input_timestamp, output_packets_[0].Timestamp()); + std::vector expected_output_vector = {1, 2, 3}; + EXPECT_EQ(expected_output_vector, output_packets_[0].Get>()); + + MP_ASSERT_OK(graph.CloseAllPacketSources()); + MP_ASSERT_OK(graph.WaitUntilDone()); +} + +TEST_F(BeginEndLoopCalculatorGraphTest, MultipleVectors) { + CalculatorGraph graph; + MP_EXPECT_OK(graph.Initialize(graph_config_)); + MP_EXPECT_OK(graph.StartRun({})); + + auto input_vector0 = absl::make_unique>(); + input_vector0->emplace_back(0); + input_vector0->emplace_back(1); + Timestamp input_timestamp0 = Timestamp(0); + MP_ASSERT_OK(graph.AddPacketToInputStream( + "ints", Adopt(input_vector0.release()).At(input_timestamp0))); + + auto input_vector1 = absl::make_unique>(); + Timestamp input_timestamp1 = Timestamp(1); + MP_ASSERT_OK(graph.AddPacketToInputStream( + "ints", Adopt(input_vector1.release()).At(input_timestamp1))); + + auto input_vector2 = absl::make_unique>(); + input_vector2->emplace_back(2); + input_vector2->emplace_back(3); + Timestamp input_timestamp2 = Timestamp(2); + MP_ASSERT_OK(graph.AddPacketToInputStream( + "ints", Adopt(input_vector2.release()).At(input_timestamp2))); + + MP_ASSERT_OK(graph.CloseAllPacketSources()); + MP_ASSERT_OK(graph.WaitUntilDone()); + + ASSERT_EQ(2, output_packets_.size()); + + EXPECT_EQ(input_timestamp0, output_packets_[0].Timestamp()); + std::vector expected_output_vector0 = {1, 2}; + EXPECT_EQ(expected_output_vector0, + output_packets_[0].Get>()); + + // At input_timestamp1, EndLoopCalc will forward timestamp bound as there are + // no elements in vector to process. + + EXPECT_EQ(input_timestamp2, output_packets_[1].Timestamp()); + std::vector expected_output_vector2 = {3, 4}; + EXPECT_EQ(expected_output_vector2, + output_packets_[1].Get>()); +} + +class MultiplierCalculator : public CalculatorBase { + public: + static ::mediapipe::Status GetContract(CalculatorContract* cc) { + cc->Inputs().Index(0).Set(); + cc->Inputs().Index(1).Set(); + cc->Outputs().Index(0).Set(); + return ::mediapipe::OkStatus(); + } + + ::mediapipe::Status Open(CalculatorContext* cc) override { + cc->SetOffset(TimestampDiff(0)); + return ::mediapipe::OkStatus(); + } + + ::mediapipe::Status Process(CalculatorContext* cc) override { + const int& input_int = cc->Inputs().Index(0).Get(); + const int& multiplier_int = cc->Inputs().Index(1).Get(); + auto output_int = absl::make_unique(input_int * multiplier_int); + cc->Outputs().Index(0).Add(output_int.release(), cc->InputTimestamp()); + return ::mediapipe::OkStatus(); + } +}; + +REGISTER_CALCULATOR(MultiplierCalculator); + +class BeginEndLoopCalculatorGraphWithClonedInputsTest : public ::testing::Test { + protected: + BeginEndLoopCalculatorGraphWithClonedInputsTest() { + graph_config_ = ParseTextProtoOrDie( + R"( + num_threads: 4 + input_stream: "ints" + input_stream: "multiplier" + node { + calculator: "BeginLoopIntegerCalculator" + input_stream: "ITERABLE:ints" + input_stream: "CLONE:multiplier" + output_stream: "ITEM:int_at_loop" + output_stream: "CLONE:multiplier_cloned_at_loop" + output_stream: "BATCH_END:timestamp" + } + node { + calculator: "MultiplierCalculator" + input_stream: "int_at_loop" + input_stream: "multiplier_cloned_at_loop" + output_stream: "multiplied_int_at_loop" + } + node { + calculator: "EndLoopIntegersCalculator" + input_stream: "ITEM:multiplied_int_at_loop" + input_stream: "BATCH_END:timestamp" + output_stream: "ITERABLE:multiplied_ints" + } + )"); + tool::AddVectorSink("multiplied_ints", &graph_config_, &output_packets_); + } + + CalculatorGraphConfig graph_config_; + std::vector output_packets_; +}; + +TEST_F(BeginEndLoopCalculatorGraphWithClonedInputsTest, SingleEmptyVector) { + CalculatorGraph graph; + MP_EXPECT_OK(graph.Initialize(graph_config_)); + MP_EXPECT_OK(graph.StartRun({})); + auto input_vector = absl::make_unique>(); + Timestamp input_timestamp = Timestamp(42); + MP_ASSERT_OK(graph.AddPacketToInputStream( + "ints", Adopt(input_vector.release()).At(input_timestamp))); + auto multiplier = absl::make_unique(2); + MP_ASSERT_OK(graph.AddPacketToInputStream( + "multiplier", Adopt(multiplier.release()).At(input_timestamp))); + MP_ASSERT_OK(graph.WaitUntilIdle()); + + // EndLoopCalc will forward the timestamp bound because there are no elements + // in collection to output. + ASSERT_EQ(0, output_packets_.size()); + + MP_ASSERT_OK(graph.CloseAllPacketSources()); + MP_ASSERT_OK(graph.WaitUntilDone()); +} + +TEST_F(BeginEndLoopCalculatorGraphWithClonedInputsTest, SingleNonEmptyVector) { + CalculatorGraph graph; + MP_EXPECT_OK(graph.Initialize(graph_config_)); + MP_EXPECT_OK(graph.StartRun({})); + auto input_vector = absl::make_unique>(); + input_vector->emplace_back(0); + input_vector->emplace_back(1); + input_vector->emplace_back(2); + Timestamp input_timestamp = Timestamp(42); + MP_ASSERT_OK(graph.AddPacketToInputStream( + "ints", Adopt(input_vector.release()).At(input_timestamp))); + auto multiplier = absl::make_unique(2); + MP_ASSERT_OK(graph.AddPacketToInputStream( + "multiplier", Adopt(multiplier.release()).At(input_timestamp))); + MP_ASSERT_OK(graph.WaitUntilIdle()); + + ASSERT_EQ(1, output_packets_.size()); + EXPECT_EQ(input_timestamp, output_packets_[0].Timestamp()); + std::vector expected_output_vector = {0, 2, 4}; + EXPECT_EQ(expected_output_vector, output_packets_[0].Get>()); + + MP_ASSERT_OK(graph.CloseAllPacketSources()); + MP_ASSERT_OK(graph.WaitUntilDone()); +} + +TEST_F(BeginEndLoopCalculatorGraphWithClonedInputsTest, MultipleVectors) { + CalculatorGraph graph; + MP_EXPECT_OK(graph.Initialize(graph_config_)); + MP_EXPECT_OK(graph.StartRun({})); + + auto input_vector0 = absl::make_unique>(); + input_vector0->emplace_back(0); + input_vector0->emplace_back(1); + Timestamp input_timestamp0 = Timestamp(42); + MP_ASSERT_OK(graph.AddPacketToInputStream( + "ints", Adopt(input_vector0.release()).At(input_timestamp0))); + auto multiplier0 = absl::make_unique(2); + MP_ASSERT_OK(graph.AddPacketToInputStream( + "multiplier", Adopt(multiplier0.release()).At(input_timestamp0))); + + auto input_vector1 = absl::make_unique>(); + Timestamp input_timestamp1 = Timestamp(43); + MP_ASSERT_OK(graph.AddPacketToInputStream( + "ints", Adopt(input_vector1.release()).At(input_timestamp1))); + auto multiplier1 = absl::make_unique(2); + MP_ASSERT_OK(graph.AddPacketToInputStream( + "multiplier", Adopt(multiplier1.release()).At(input_timestamp1))); + + auto input_vector2 = absl::make_unique>(); + input_vector2->emplace_back(2); + input_vector2->emplace_back(3); + Timestamp input_timestamp2 = Timestamp(44); + MP_ASSERT_OK(graph.AddPacketToInputStream( + "ints", Adopt(input_vector2.release()).At(input_timestamp2))); + auto multiplier2 = absl::make_unique(3); + MP_ASSERT_OK(graph.AddPacketToInputStream( + "multiplier", Adopt(multiplier2.release()).At(input_timestamp2))); + + MP_ASSERT_OK(graph.CloseAllPacketSources()); + MP_ASSERT_OK(graph.WaitUntilDone()); + + ASSERT_EQ(2, output_packets_.size()); + + EXPECT_EQ(input_timestamp0, output_packets_[0].Timestamp()); + std::vector expected_output_vector0 = {0, 2}; + EXPECT_EQ(expected_output_vector0, + output_packets_[0].Get>()); + + // At input_timestamp1, EndLoopCalc will forward timestamp bound as there are + // no elements in vector to process. + + EXPECT_EQ(input_timestamp2, output_packets_[1].Timestamp()); + std::vector expected_output_vector2 = {6, 9}; + EXPECT_EQ(expected_output_vector2, + output_packets_[1].Get>()); +} + +} // namespace +} // namespace mediapipe diff --git a/mediapipe/calculators/core/begin_loop_calculator.cc b/mediapipe/calculators/core/begin_loop_calculator.cc new file mode 100644 index 000000000..cc7f6b85e --- /dev/null +++ b/mediapipe/calculators/core/begin_loop_calculator.cc @@ -0,0 +1,40 @@ +// Copyright 2019 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "mediapipe/calculators/core/begin_loop_calculator.h" + +#include + +#include "mediapipe/framework/formats/landmark.pb.h" +#include "mediapipe/framework/formats/rect.pb.h" + +namespace mediapipe { + +// A calculator to process std::vector. +typedef BeginLoopCalculator> + BeginLoopNormalizedLandmarkCalculator; +REGISTER_CALCULATOR(BeginLoopNormalizedLandmarkCalculator); + +// A calculator to process std::vector>. +typedef BeginLoopCalculator< + std::vector>> + BeginLoopNormalizedLandmarksVectorCalculator; +REGISTER_CALCULATOR(BeginLoopNormalizedLandmarksVectorCalculator); + +// A calculator to process std::vector. +typedef BeginLoopCalculator> + BeginLoopNormalizedRectCalculator; +REGISTER_CALCULATOR(BeginLoopNormalizedRectCalculator); + +} // namespace mediapipe diff --git a/mediapipe/calculators/core/begin_loop_calculator.h b/mediapipe/calculators/core/begin_loop_calculator.h new file mode 100644 index 000000000..7258b4bf7 --- /dev/null +++ b/mediapipe/calculators/core/begin_loop_calculator.h @@ -0,0 +1,157 @@ +// Copyright 2019 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef MEDIAPIPE_CALCULATORS_CORE_BEGIN_LOOP_CALCULATOR_H_ +#define MEDIAPIPE_CALCULATORS_CORE_BEGIN_LOOP_CALCULATOR_H_ + +#include "absl/memory/memory.h" +#include "mediapipe/framework/calculator_context.h" +#include "mediapipe/framework/calculator_contract.h" +#include "mediapipe/framework/calculator_framework.h" +#include "mediapipe/framework/collection_item_id.h" +#include "mediapipe/framework/packet.h" +#include "mediapipe/framework/port/integral_types.h" +#include "mediapipe/framework/port/ret_check.h" +#include "mediapipe/framework/port/status.h" + +namespace mediapipe { + +// Calculator for implementing loops on iterable collections inside a MediaPipe +// graph. +// +// It is designed to be used like: +// +// node { +// calculator: "BeginLoopWithIterableCalculator" +// input_stream: "ITERABLE:input_iterable" # IterableT @ext_ts +// output_stream: "ITEM:input_element" # ItemT @loop_internal_ts +// output_stream: "BATCH_END:ext_ts" # Timestamp @loop_internal_ts +// } +// +// node { +// calculator: "ElementToBlaConverterSubgraph" +// input_stream: "ITEM:input_to_loop_body" # ItemT @loop_internal_ts +// output_stream: "BLA:output_of_loop_body" # ItemU @loop_internal_ts +// } +// +// node { +// calculator: "EndLoopWithOutputCalculator" +// input_stream: "ITEM:output_of_loop_body" # ItemU @loop_internal_ts +// input_stream: "BATCH_END:ext_ts" # Timestamp @loop_internal_ts +// output_stream: "OUTPUT:aggregated_result" # IterableU @ext_ts +// } +// +// BeginLoopCalculator accepts an optional input stream tagged with "TICK" +// which if non-empty, wakes up the calculator and calls +// BeginLoopCalculator::Process(). Input streams tagged with "CLONE" are cloned +// to the corresponding output streams at loop timestamps. This ensures that a +// MediaPipe graph or sub-graph can run multiple times, once per element in the +// "ITERABLE" for each pakcet clone of the packets in the "CLONE" input streams. +template +class BeginLoopCalculator : public CalculatorBase { + using ItemT = typename IterableT::value_type; + + public: + static ::mediapipe::Status GetContract(CalculatorContract* cc) { + // A non-empty packet in the optional "TICK" input stream wakes up the + // calculator. + if (cc->Inputs().HasTag("TICK")) { + cc->Inputs().Tag("TICK").SetAny(); + } + + // An iterable collection in the input stream. + RET_CHECK(cc->Inputs().HasTag("ITERABLE")); + cc->Inputs().Tag("ITERABLE").Set(); + + // An element from the collection. + RET_CHECK(cc->Outputs().HasTag("ITEM")); + cc->Outputs().Tag("ITEM").Set(); + + RET_CHECK(cc->Outputs().HasTag("BATCH_END")); + cc->Outputs() + .Tag("BATCH_END") + .Set( + // A flush signal to the corresponding EndLoopCalculator for it to + // emit the aggregated result with the timestamp contained in this + // flush signal packet. + ); + + // Input streams tagged with "CLONE" are cloned to the corresponding + // "CLONE" output streams at loop timestamps. + RET_CHECK(cc->Inputs().NumEntries("CLONE") == + cc->Outputs().NumEntries("CLONE")); + if (cc->Inputs().NumEntries("CLONE") > 0) { + for (int i = 0; i < cc->Inputs().NumEntries("CLONE"); ++i) { + cc->Inputs().Get("CLONE", i).SetAny(); + cc->Outputs().Get("CLONE", i).SetSameAs(&cc->Inputs().Get("CLONE", i)); + } + } + + return ::mediapipe::OkStatus(); + } + + ::mediapipe::Status Process(CalculatorContext* cc) final { + Timestamp last_timestamp = loop_internal_timestamp_; + if (!cc->Inputs().Tag("ITERABLE").IsEmpty()) { + const IterableT& collection = + cc->Inputs().Tag("ITERABLE").template Get(); + for (const auto& item : collection) { + cc->Outputs().Tag("ITEM").AddPacket( + MakePacket(item).At(loop_internal_timestamp_)); + ForwardClonePackets(cc, loop_internal_timestamp_); + ++loop_internal_timestamp_; + } + } + + // The collection was empty and nothing was processed. + if (last_timestamp == loop_internal_timestamp_) { + // Increment loop_internal_timestamp_ because it is used up now. + ++loop_internal_timestamp_; + for (auto it = cc->Outputs().begin(); it < cc->Outputs().end(); ++it) { + it->SetNextTimestampBound(loop_internal_timestamp_); + } + } + + // The for loop processing the input collection already incremented + // loop_internal_timestamp_. To emit BATCH_END packet along the last + // non-BATCH_END packet, decrement by one. + cc->Outputs() + .Tag("BATCH_END") + .AddPacket(MakePacket(cc->InputTimestamp()) + .At(Timestamp(loop_internal_timestamp_ - 1))); + + return ::mediapipe::OkStatus(); + } + + private: + void ForwardClonePackets(CalculatorContext* cc, Timestamp output_timestamp) { + if (cc->Inputs().NumEntries("CLONE") > 0) { + for (int i = 0; i < cc->Inputs().NumEntries("CLONE"); ++i) { + if (!cc->Inputs().Get("CLONE", i).IsEmpty()) { + auto input_packet = cc->Inputs().Get("CLONE", i).Value(); + cc->Outputs() + .Get("CLONE", i) + .AddPacket(std::move(input_packet).At(output_timestamp)); + } + } + } + } + + // Fake timestamps generated per element in collection. + Timestamp loop_internal_timestamp_ = Timestamp(0); +}; + +} // namespace mediapipe + +#endif // MEDIAPIPE_CALCULATORS_CORE_BEGIN_LOOP_CALCULATOR_H_ diff --git a/mediapipe/calculators/core/clip_detection_vector_size_calculator.cc b/mediapipe/calculators/core/clip_detection_vector_size_calculator.cc new file mode 100644 index 000000000..55bcf2feb --- /dev/null +++ b/mediapipe/calculators/core/clip_detection_vector_size_calculator.cc @@ -0,0 +1,26 @@ +// Copyright 2019 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include + +#include "mediapipe/calculators/core/clip_vector_size_calculator.h" +#include "mediapipe/framework/formats/detection.pb.h" + +namespace mediapipe { + +typedef ClipVectorSizeCalculator<::mediapipe::Detection> + ClipDetectionVectorSizeCalculator; +REGISTER_CALCULATOR(ClipDetectionVectorSizeCalculator); + +} // namespace mediapipe diff --git a/mediapipe/calculators/core/clip_vector_size_calculator.cc b/mediapipe/calculators/core/clip_vector_size_calculator.cc new file mode 100644 index 000000000..388cc3a6a --- /dev/null +++ b/mediapipe/calculators/core/clip_vector_size_calculator.cc @@ -0,0 +1,28 @@ +// Copyright 2019 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "mediapipe/calculators/core/clip_vector_size_calculator.h" + +#include + +#include "mediapipe/framework/calculator_framework.h" +#include "mediapipe/framework/formats/rect.pb.h" + +namespace mediapipe { + +typedef ClipVectorSizeCalculator<::mediapipe::NormalizedRect> + ClipNormalizedRectVectorSizeCalculator; +REGISTER_CALCULATOR(ClipNormalizedRectVectorSizeCalculator); + +} // namespace mediapipe diff --git a/mediapipe/calculators/core/clip_vector_size_calculator.h b/mediapipe/calculators/core/clip_vector_size_calculator.h new file mode 100644 index 000000000..f8e12ff28 --- /dev/null +++ b/mediapipe/calculators/core/clip_vector_size_calculator.h @@ -0,0 +1,137 @@ +// Copyright 2019 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef MEDIAPIPE_CALCULATORS_CORE_CLIP_VECTOR_SIZE_CALCULATOR_H_ +#define MEDIAPIPE_CALCULATORS_CORE_CLIP_VECTOR_SIZE_CALCULATOR_H_ + +#include +#include + +#include "mediapipe/calculators/core/clip_vector_size_calculator.pb.h" +#include "mediapipe/framework/calculator_framework.h" +#include "mediapipe/framework/port/canonical_errors.h" +#include "mediapipe/framework/port/ret_check.h" +#include "mediapipe/framework/port/status.h" + +namespace mediapipe { + +// Clips the size of the input vector of type T to a specified max_vec_size. +// In a graph it will be used as: +// node { +// calculator: "ClipIntVectorSizeCalculator" +// input_stream: "input_vector" +// output_stream: "output_vector" +// options { +// [mediapipe.ClipIntVectorSizeCalculatorOptions.ext] { +// max_vec_size: 5 +// } +// } +// } +template +class ClipVectorSizeCalculator : public CalculatorBase { + public: + static ::mediapipe::Status GetContract(CalculatorContract* cc) { + RET_CHECK(cc->Inputs().NumEntries() == 1); + RET_CHECK(cc->Outputs().NumEntries() == 1); + + if (cc->Options<::mediapipe::ClipVectorSizeCalculatorOptions>() + .max_vec_size() < 1) { + return ::mediapipe::InternalError( + "max_vec_size should be greater than or equal to 1."); + } + + cc->Inputs().Index(0).Set>(); + cc->Outputs().Index(0).Set>(); + + return ::mediapipe::OkStatus(); + } + + ::mediapipe::Status Open(CalculatorContext* cc) override { + cc->SetOffset(TimestampDiff(0)); + max_vec_size_ = cc->Options<::mediapipe::ClipVectorSizeCalculatorOptions>() + .max_vec_size(); + return ::mediapipe::OkStatus(); + } + + ::mediapipe::Status Process(CalculatorContext* cc) override { + if (max_vec_size_ < 1) { + return ::mediapipe::InternalError( + "max_vec_size should be greater than or equal to 1."); + } + if (cc->Inputs().Index(0).IsEmpty()) { + return ::mediapipe::OkStatus(); + } + + return ClipVectorSize(std::is_copy_constructible(), cc); + } + + template + ::mediapipe::Status ClipVectorSize(std::true_type, CalculatorContext* cc) { + auto output = absl::make_unique>(); + const std::vector& input_vector = + cc->Inputs().Index(0).Get>(); + if (max_vec_size_ >= input_vector.size()) { + output->insert(output->end(), input_vector.begin(), input_vector.end()); + } else { + for (int i = 0; i < max_vec_size_; ++i) { + output->push_back(input_vector[i]); + } + } + cc->Outputs().Index(0).Add(output.release(), cc->InputTimestamp()); + return ::mediapipe::OkStatus(); + } + + template + ::mediapipe::Status ClipVectorSize(std::false_type, CalculatorContext* cc) { + return ConsumeAndClipVectorSize(std::is_move_constructible(), cc); + } + + template + ::mediapipe::Status ConsumeAndClipVectorSize(std::true_type, + CalculatorContext* cc) { + auto output = absl::make_unique>(); + ::mediapipe::StatusOr>> input_status = + cc->Inputs().Index(0).Value().Consume>(); + + if (input_status.ok()) { + std::unique_ptr> input_vector = + std::move(input_status).ValueOrDie(); + auto begin_it = input_vector->begin(); + auto end_it = input_vector->end(); + if (max_vec_size_ < input_vector->size()) { + end_it = input_vector->begin() + max_vec_size_; + } + output->insert(output->end(), std::make_move_iterator(begin_it), + std::make_move_iterator(end_it)); + } else { + return input_status.status(); + } + cc->Outputs().Index(0).Add(output.release(), cc->InputTimestamp()); + return ::mediapipe::OkStatus(); + } + + template + ::mediapipe::Status ConsumeAndClipVectorSize(std::false_type, + CalculatorContext* cc) { + return ::mediapipe::InternalError( + "Cannot copy or move input vectors and clip their size."); + } + + private: + int max_vec_size_ = 0; +}; + +} // namespace mediapipe + +#endif // MEDIAPIPE_CALCULATORS_CORE_CLIP_VECTOR_SIZE_CALCULATOR_H_ diff --git a/mediapipe/calculators/core/clip_vector_size_calculator.proto b/mediapipe/calculators/core/clip_vector_size_calculator.proto new file mode 100644 index 000000000..5dea660d6 --- /dev/null +++ b/mediapipe/calculators/core/clip_vector_size_calculator.proto @@ -0,0 +1,28 @@ +// Copyright 2019 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +syntax = "proto2"; + +package mediapipe; + +import "mediapipe/framework/calculator.proto"; + +message ClipVectorSizeCalculatorOptions { + extend CalculatorOptions { + optional ClipVectorSizeCalculatorOptions ext = 274674998; + } + + // Maximum size of output vector. + optional int32 max_vec_size = 1 [default = 1]; +} diff --git a/mediapipe/calculators/core/clip_vector_size_calculator_test.cc b/mediapipe/calculators/core/clip_vector_size_calculator_test.cc new file mode 100644 index 000000000..60a42120e --- /dev/null +++ b/mediapipe/calculators/core/clip_vector_size_calculator_test.cc @@ -0,0 +1,179 @@ +// Copyright 2019 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "mediapipe/calculators/core/clip_vector_size_calculator.h" + +#include +#include +#include + +#include "mediapipe/framework/calculator_framework.h" +#include "mediapipe/framework/calculator_runner.h" +#include "mediapipe/framework/port/gmock.h" +#include "mediapipe/framework/port/gtest.h" +#include "mediapipe/framework/port/parse_text_proto.h" +#include "mediapipe/framework/port/status_matchers.h" // NOLINT + +namespace mediapipe { + +typedef ClipVectorSizeCalculator TestClipIntVectorSizeCalculator; +REGISTER_CALCULATOR(TestClipIntVectorSizeCalculator); + +void AddInputVector(const std::vector& input, int64 timestamp, + CalculatorRunner* runner) { + runner->MutableInputs()->Index(0).packets.push_back( + MakePacket>(input).At(Timestamp(timestamp))); +} + +TEST(TestClipIntVectorSizeCalculatorTest, EmptyVectorInput) { + CalculatorGraphConfig::Node node_config = + ParseTextProtoOrDie(R"( + calculator: "TestClipIntVectorSizeCalculator" + input_stream: "input_vector" + output_stream: "output_vector" + options { + [mediapipe.ClipVectorSizeCalculatorOptions.ext] { max_vec_size: 1 } + } + )"); + CalculatorRunner runner(node_config); + + std::vector input = {}; + AddInputVector(input, /*timestamp=*/1, &runner); + MP_ASSERT_OK(runner.Run()); + + const std::vector& outputs = runner.Outputs().Index(0).packets; + EXPECT_EQ(1, outputs.size()); + EXPECT_EQ(Timestamp(1), outputs[0].Timestamp()); + EXPECT_TRUE(outputs[0].Get>().empty()); +} + +TEST(TestClipIntVectorSizeCalculatorTest, OneTimestamp) { + CalculatorGraphConfig::Node node_config = + ParseTextProtoOrDie(R"( + calculator: "TestClipIntVectorSizeCalculator" + input_stream: "input_vector" + output_stream: "output_vector" + options { + [mediapipe.ClipVectorSizeCalculatorOptions.ext] { max_vec_size: 2 } + } + )"); + CalculatorRunner runner(node_config); + + std::vector input = {0, 1, 2, 3}; + AddInputVector(input, /*timestamp=*/1, &runner); + MP_ASSERT_OK(runner.Run()); + + const std::vector& outputs = runner.Outputs().Index(0).packets; + EXPECT_EQ(1, outputs.size()); + EXPECT_EQ(Timestamp(1), outputs[0].Timestamp()); + const std::vector& output = outputs[0].Get>(); + EXPECT_EQ(2, output.size()); + std::vector expected_vector = {0, 1}; + EXPECT_EQ(expected_vector, output); +} + +TEST(TestClipIntVectorSizeCalculatorTest, TwoInputsAtTwoTimestamps) { + CalculatorGraphConfig::Node node_config = + ParseTextProtoOrDie(R"( + calculator: "TestClipIntVectorSizeCalculator" + input_stream: "input_vector" + output_stream: "output_vector" + options { + [mediapipe.ClipVectorSizeCalculatorOptions.ext] { max_vec_size: 3 } + } + )"); + CalculatorRunner runner(node_config); + + { + std::vector input = {0, 1, 2, 3}; + AddInputVector(input, /*timestamp=*/1, &runner); + } + { + std::vector input = {2, 3, 4, 5}; + AddInputVector(input, /*timestamp=*/2, &runner); + } + MP_ASSERT_OK(runner.Run()); + + const std::vector& outputs = runner.Outputs().Index(0).packets; + EXPECT_EQ(2, outputs.size()); + { + EXPECT_EQ(Timestamp(1), outputs[0].Timestamp()); + const std::vector& output = outputs[0].Get>(); + EXPECT_EQ(3, output.size()); + std::vector expected_vector = {0, 1, 2}; + EXPECT_EQ(expected_vector, output); + } + { + EXPECT_EQ(Timestamp(2), outputs[1].Timestamp()); + const std::vector& output = outputs[1].Get>(); + EXPECT_EQ(3, output.size()); + std::vector expected_vector = {2, 3, 4}; + EXPECT_EQ(expected_vector, output); + } +} + +typedef ClipVectorSizeCalculator> + TestClipUniqueIntPtrVectorSizeCalculator; +REGISTER_CALCULATOR(TestClipUniqueIntPtrVectorSizeCalculator); + +TEST(TestClipUniqueIntPtrVectorSizeCalculatorTest, ConsumeOneTimestamp) { + /* Note: We don't use CalculatorRunner for this test because it keeps copies + * of input packets, so packets sent to the graph don't have sole ownership. + * The test needs to send packets that own the data. + */ + CalculatorGraphConfig graph_config = + ParseTextProtoOrDie(R"( + input_stream: "input_vector" + node { + calculator: "TestClipUniqueIntPtrVectorSizeCalculator" + input_stream: "input_vector" + output_stream: "output_vector" + options { + [mediapipe.ClipVectorSizeCalculatorOptions.ext] { max_vec_size: 3 } + } + } + )"); + + std::vector outputs; + tool::AddVectorSink("output_vector", &graph_config, &outputs); + + CalculatorGraph graph; + MP_EXPECT_OK(graph.Initialize(graph_config)); + MP_EXPECT_OK(graph.StartRun({})); + + // input1 : {0, 1, 2, 3, 4, 5} + auto input_vector = absl::make_unique>>(6); + for (int i = 0; i < 6; ++i) { + input_vector->at(i) = absl::make_unique(i); + } + + MP_EXPECT_OK(graph.AddPacketToInputStream( + "input_vector", Adopt(input_vector.release()).At(Timestamp(1)))); + + MP_EXPECT_OK(graph.WaitUntilIdle()); + MP_EXPECT_OK(graph.CloseAllPacketSources()); + MP_EXPECT_OK(graph.WaitUntilDone()); + + EXPECT_EQ(1, outputs.size()); + EXPECT_EQ(Timestamp(1), outputs[0].Timestamp()); + const std::vector>& result = + outputs[0].Get>>(); + EXPECT_EQ(3, result.size()); + for (int i = 0; i < 3; ++i) { + const std::unique_ptr& v = result[i]; + EXPECT_EQ(i, *v); + } +} + +} // namespace mediapipe diff --git a/mediapipe/calculators/core/dequantize_byte_array_calculator.cc b/mediapipe/calculators/core/dequantize_byte_array_calculator.cc new file mode 100644 index 000000000..4f1a3ed86 --- /dev/null +++ b/mediapipe/calculators/core/dequantize_byte_array_calculator.cc @@ -0,0 +1,90 @@ +// Copyright 2019 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include + +#include "mediapipe/calculators/core/dequantize_byte_array_calculator.pb.h" +#include "mediapipe/framework/calculator_framework.h" +#include "mediapipe/framework/port/canonical_errors.h" +#include "mediapipe/framework/port/status.h" + +// Dequantizes a byte array to a vector of floats. +// +// Example config: +// node { +// calculator: "DequantizeByteArrayCalculator" +// input_stream: "ENCODED:encoded" +// output_stream: "FLOAT_VECTOR:float_vector" +// options { +// [mediapipe.DequantizeByteArrayCalculatorOptions.ext]: { +// max_quantized_value: 2 +// min_quantized_value: -2 +// } +// } +// } +namespace mediapipe { + +class DequantizeByteArrayCalculator : public CalculatorBase { + public: + static ::mediapipe::Status GetContract(CalculatorContract* cc) { + cc->Inputs().Tag("ENCODED").Set(); + cc->Outputs().Tag("FLOAT_VECTOR").Set>(); + return ::mediapipe::OkStatus(); + } + + ::mediapipe::Status Open(CalculatorContext* cc) final { + const auto options = + cc->Options<::mediapipe::DequantizeByteArrayCalculatorOptions>(); + if (!options.has_max_quantized_value() || + !options.has_min_quantized_value()) { + return ::mediapipe::InvalidArgumentError( + "Both max_quantized_value and min_quantized_value must be provided " + "in DequantizeByteArrayCalculatorOptions."); + } + float max_quantized_value = options.max_quantized_value(); + float min_quantized_value = options.min_quantized_value(); + if (max_quantized_value < min_quantized_value + FLT_EPSILON) { + return ::mediapipe::InvalidArgumentError( + "max_quantized_value must be greater than min_quantized_value."); + } + float range = max_quantized_value - min_quantized_value; + scalar_ = range / 255.0; + bias_ = (range / 512.0) + min_quantized_value; + return ::mediapipe::OkStatus(); + } + + ::mediapipe::Status Process(CalculatorContext* cc) final { + const std::string& encoded = + cc->Inputs().Tag("ENCODED").Value().Get(); + std::vector float_vector; + float_vector.reserve(encoded.length()); + for (int i = 0; i < encoded.length(); ++i) { + float_vector.push_back( + static_cast(encoded.at(i)) * scalar_ + bias_); + } + cc->Outputs() + .Tag("FLOAT_VECTOR") + .AddPacket(MakePacket>(float_vector) + .At(cc->InputTimestamp())); + return ::mediapipe::OkStatus(); + } + + private: + float scalar_; + float bias_; +}; + +REGISTER_CALCULATOR(DequantizeByteArrayCalculator); + +} // namespace mediapipe diff --git a/mediapipe/calculators/core/dequantize_byte_array_calculator.proto b/mediapipe/calculators/core/dequantize_byte_array_calculator.proto new file mode 100644 index 000000000..3032dbf48 --- /dev/null +++ b/mediapipe/calculators/core/dequantize_byte_array_calculator.proto @@ -0,0 +1,28 @@ +// Copyright 2019 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +syntax = "proto2"; + +package mediapipe; + +import "mediapipe/framework/calculator.proto"; + +message DequantizeByteArrayCalculatorOptions { + extend CalculatorOptions { + optional DequantizeByteArrayCalculatorOptions ext = 272316343; + } + + optional float max_quantized_value = 1; + optional float min_quantized_value = 2; +} diff --git a/mediapipe/calculators/core/dequantize_byte_array_calculator_test.cc b/mediapipe/calculators/core/dequantize_byte_array_calculator_test.cc new file mode 100644 index 000000000..a17fb6281 --- /dev/null +++ b/mediapipe/calculators/core/dequantize_byte_array_calculator_test.cc @@ -0,0 +1,137 @@ +// Copyright 2019 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include + +#include "mediapipe/framework/calculator_framework.h" +#include "mediapipe/framework/calculator_runner.h" +#include "mediapipe/framework/port/gmock.h" +#include "mediapipe/framework/port/gtest.h" +#include "mediapipe/framework/port/parse_text_proto.h" +#include "mediapipe/framework/port/status.h" +#include "mediapipe/framework/port/status_matchers.h" // NOLINT + +namespace mediapipe { + +TEST(QuantizeFloatVectorCalculatorTest, WrongConfig) { + CalculatorGraphConfig::Node node_config = + ParseTextProtoOrDie(R"( + calculator: "DequantizeByteArrayCalculator" + input_stream: "ENCODED:encoded" + output_stream: "FLOAT_VECTOR:float_vector" + options { + [mediapipe.DequantizeByteArrayCalculatorOptions.ext]: { + max_quantized_value: 2 + } + } + )"); + CalculatorRunner runner(node_config); + std::string empty_string; + runner.MutableInputs()->Tag("ENCODED").packets.push_back( + MakePacket(empty_string).At(Timestamp(0))); + auto status = runner.Run(); + EXPECT_FALSE(status.ok()); + EXPECT_THAT( + status.message(), + testing::HasSubstr( + "Both max_quantized_value and min_quantized_value must be provided")); +} + +TEST(QuantizeFloatVectorCalculatorTest, WrongConfig2) { + CalculatorGraphConfig::Node node_config = + ParseTextProtoOrDie(R"( + calculator: "DequantizeByteArrayCalculator" + input_stream: "ENCODED:encoded" + output_stream: "FLOAT_VECTOR:float_vector" + options { + [mediapipe.DequantizeByteArrayCalculatorOptions.ext]: { + max_quantized_value: -2 + min_quantized_value: 2 + } + } + )"); + CalculatorRunner runner(node_config); + std::string empty_string; + runner.MutableInputs()->Tag("ENCODED").packets.push_back( + MakePacket(empty_string).At(Timestamp(0))); + auto status = runner.Run(); + EXPECT_FALSE(status.ok()); + EXPECT_THAT( + status.message(), + testing::HasSubstr( + "max_quantized_value must be greater than min_quantized_value")); +} + +TEST(QuantizeFloatVectorCalculatorTest, WrongConfig3) { + CalculatorGraphConfig::Node node_config = + ParseTextProtoOrDie(R"( + calculator: "DequantizeByteArrayCalculator" + input_stream: "ENCODED:encoded" + output_stream: "FLOAT_VECTOR:float_vector" + options { + [mediapipe.DequantizeByteArrayCalculatorOptions.ext]: { + max_quantized_value: 1 + min_quantized_value: 1 + } + } + )"); + CalculatorRunner runner(node_config); + std::string empty_string; + runner.MutableInputs()->Tag("ENCODED").packets.push_back( + MakePacket(empty_string).At(Timestamp(0))); + auto status = runner.Run(); + EXPECT_FALSE(status.ok()); + EXPECT_THAT( + status.message(), + testing::HasSubstr( + "max_quantized_value must be greater than min_quantized_value")); +} + +TEST(DequantizeByteArrayCalculatorTest, TestDequantization) { + CalculatorGraphConfig::Node node_config = + ParseTextProtoOrDie(R"( + calculator: "DequantizeByteArrayCalculator" + input_stream: "ENCODED:encoded" + output_stream: "FLOAT_VECTOR:float_vector" + options { + [mediapipe.DequantizeByteArrayCalculatorOptions.ext]: { + max_quantized_value: 2 + min_quantized_value: -2 + } + } + )"); + CalculatorRunner runner(node_config); + unsigned char input[4] = {0x7F, 0xFF, 0x00, 0x01}; + runner.MutableInputs()->Tag("ENCODED").packets.push_back( + MakePacket( + std::string(reinterpret_cast(input), 4)) + .At(Timestamp(0))); + auto status = runner.Run(); + MP_ASSERT_OK(runner.Run()); + const std::vector& outputs = + runner.Outputs().Tag("FLOAT_VECTOR").packets; + EXPECT_EQ(1, outputs.size()); + const std::vector& result = outputs[0].Get>(); + ASSERT_FALSE(result.empty()); + EXPECT_EQ(4, result.size()); + EXPECT_NEAR(0, result[0], 0.01); + EXPECT_NEAR(2, result[1], 0.01); + EXPECT_NEAR(-2, result[2], 0.01); + EXPECT_NEAR(-1.976, result[3], 0.01); + + EXPECT_EQ(Timestamp(0), outputs[0].Timestamp()); +} + +} // namespace mediapipe diff --git a/mediapipe/calculators/core/end_loop_calculator.cc b/mediapipe/calculators/core/end_loop_calculator.cc new file mode 100644 index 000000000..8991e97f1 --- /dev/null +++ b/mediapipe/calculators/core/end_loop_calculator.cc @@ -0,0 +1,45 @@ +// Copyright 2019 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "mediapipe/calculators/core/end_loop_calculator.h" + +#include + +#include "mediapipe/framework/formats/landmark.pb.h" +#include "mediapipe/framework/formats/rect.pb.h" +#include "mediapipe/util/render_data.pb.h" + +namespace mediapipe { + +typedef EndLoopCalculator> + EndLoopNormalizedRectCalculator; +REGISTER_CALCULATOR(EndLoopNormalizedRectCalculator); + +typedef EndLoopCalculator> + EndLoopNormalizedLandmarkCalculator; +REGISTER_CALCULATOR(EndLoopNormalizedLandmarkCalculator); + +typedef EndLoopCalculator< + std::vector>> + EndLoopNormalizedLandmarksVectorCalculator; +REGISTER_CALCULATOR(EndLoopNormalizedLandmarksVectorCalculator); + +typedef EndLoopCalculator> EndLoopBooleanCalculator; +REGISTER_CALCULATOR(EndLoopBooleanCalculator); + +typedef EndLoopCalculator> + EndLoopRenderDataCalculator; +REGISTER_CALCULATOR(EndLoopRenderDataCalculator); + +} // namespace mediapipe diff --git a/mediapipe/calculators/core/end_loop_calculator.h b/mediapipe/calculators/core/end_loop_calculator.h new file mode 100644 index 000000000..869cc29a2 --- /dev/null +++ b/mediapipe/calculators/core/end_loop_calculator.h @@ -0,0 +1,106 @@ +// Copyright 2019 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef MEDIAPIPE_CALCULATORS_CORE_END_LOOP_CALCULATOR_H_ +#define MEDIAPIPE_CALCULATORS_CORE_END_LOOP_CALCULATOR_H_ + +#include "mediapipe/framework/calculator_context.h" +#include "mediapipe/framework/calculator_contract.h" +#include "mediapipe/framework/calculator_framework.h" +#include "mediapipe/framework/collection_item_id.h" +#include "mediapipe/framework/port/integral_types.h" +#include "mediapipe/framework/port/ret_check.h" +#include "mediapipe/framework/port/status.h" + +namespace mediapipe { + +// Calculator for completing the processing of loops on iterable collections +// inside a MediaPipe graph. The EndLoopCalculator collects all input packets +// from ITEM input_stream into a collection and upon receiving the flush signal +// from the "BATCH_END" tagged input stream, it emits the aggregated results +// at the original timestamp contained in the "BATCH_END" input stream. +// +// It is designed to be used like: +// +// node { +// calculator: "BeginLoopWithIterableCalculator" +// input_stream: "ITERABLE:input_iterable" # IterableT @ext_ts +// output_stream: "ITEM:input_element" # ItemT @loop_internal_ts +// output_stream: "BATCH_END:ext_ts" # Timestamp @loop_internal_ts +// } +// +// node { +// calculator: "ElementToBlaConverterSubgraph" +// input_stream: "ITEM:input_to_loop_body" # ItemT @loop_internal_ts +// output_stream: "BLA:output_of_loop_body" # ItemU @loop_internal_ts +// } +// +// node { +// calculator: "EndLoopWithOutputCalculator" +// input_stream: "ITEM:output_of_loop_body" # ItemU @loop_internal_ts +// input_stream: "BATCH_END:ext_ts" # Timestamp @loop_internal_ts +// output_stream: "OUTPUT:aggregated_result" # IterableU @ext_ts +// } +template +class EndLoopCalculator : public CalculatorBase { + using ItemT = typename IterableT::value_type; + + public: + static ::mediapipe::Status GetContract(CalculatorContract* cc) { + RET_CHECK(cc->Inputs().HasTag("BATCH_END")) + << "Missing BATCH_END tagged input_stream."; + cc->Inputs().Tag("BATCH_END").Set(); + + RET_CHECK(cc->Inputs().HasTag("ITEM")); + cc->Inputs().Tag("ITEM").Set(); + + RET_CHECK(cc->Outputs().HasTag("ITERABLE")); + cc->Outputs().Tag("ITERABLE").Set(); + return ::mediapipe::OkStatus(); + } + + ::mediapipe::Status Process(CalculatorContext* cc) override { + if (!cc->Inputs().Tag("ITEM").IsEmpty()) { + if (!input_stream_collection_) { + input_stream_collection_.reset(new IterableT); + } + input_stream_collection_->push_back( + cc->Inputs().Tag("ITEM").template Get()); + } + + if (!cc->Inputs().Tag("BATCH_END").Value().IsEmpty()) { // flush signal + Timestamp loop_control_ts = + cc->Inputs().Tag("BATCH_END").template Get(); + if (input_stream_collection_) { + cc->Outputs() + .Tag("ITERABLE") + .Add(input_stream_collection_.release(), loop_control_ts); + } else { + // Since there is no collection, inform downstream calculators to not + // expect any packet by updating the timestamp bounds. + cc->Outputs() + .Tag("ITERABLE") + .SetNextTimestampBound(Timestamp(loop_control_ts.Value() + 1)); + } + } + return ::mediapipe::OkStatus(); + } + + private: + std::unique_ptr input_stream_collection_; +}; + +} // namespace mediapipe + +#endif // MEDIAPIPE_CALCULATORS_CORE_END_LOOP_CALCULATOR_H_ diff --git a/mediapipe/calculators/core/packet_resampler_calculator.cc b/mediapipe/calculators/core/packet_resampler_calculator.cc index 4271435fd..2966e4589 100644 --- a/mediapipe/calculators/core/packet_resampler_calculator.cc +++ b/mediapipe/calculators/core/packet_resampler_calculator.cc @@ -74,6 +74,12 @@ class PacketResamplerCalculator : public CalculatorBase { ::mediapipe::Status Process(CalculatorContext* cc) override; private: + // Calculates the first sampled timestamp that incorporates a jittering + // offset. + void InitializeNextOutputTimestampWithJitter(); + // Calculates the next sampled timestamp that incorporates a jittering offset. + void UpdateNextOutputTimestampWithJitter(); + // Logic for Process() when jitter_ != 0.0. ::mediapipe::Status ProcessWithJitter(CalculatorContext* cc); @@ -233,6 +239,7 @@ TimestampDiff TimestampDiffFromSeconds(double seconds) { << Timestamp::kTimestampUnitsPerSecond; frame_time_usec_ = static_cast(1000000.0 / frame_rate_); + video_header_.frame_rate = frame_rate_; if (resampler_options.output_header() != @@ -295,6 +302,17 @@ TimestampDiff TimestampDiffFromSeconds(double seconds) { return ::mediapipe::OkStatus(); } +void PacketResamplerCalculator::InitializeNextOutputTimestampWithJitter() { + next_output_timestamp_ = + first_timestamp_ + frame_time_usec_ * random_->RandFloat(); +} + +void PacketResamplerCalculator::UpdateNextOutputTimestampWithJitter() { + next_output_timestamp_ += + frame_time_usec_ * + ((1.0 - jitter_) + 2.0 * jitter_ * random_->RandFloat()); +} + ::mediapipe::Status PacketResamplerCalculator::ProcessWithJitter( CalculatorContext* cc) { RET_CHECK_GT(cc->InputTimestamp(), Timestamp::PreStream()); @@ -302,8 +320,13 @@ TimestampDiff TimestampDiffFromSeconds(double seconds) { if (first_timestamp_ == Timestamp::Unset()) { first_timestamp_ = cc->InputTimestamp(); - next_output_timestamp_ = - first_timestamp_ + frame_time_usec_ * random_->RandFloat(); + InitializeNextOutputTimestampWithJitter(); + if (first_timestamp_ == next_output_timestamp_) { + OutputWithinLimits( + cc, + cc->Inputs().Get(input_data_id_).Value().At(next_output_timestamp_)); + UpdateNextOutputTimestampWithJitter(); + } return ::mediapipe::OkStatus(); } @@ -322,9 +345,7 @@ TimestampDiff TimestampDiffFromSeconds(double seconds) { ? last_packet_ : cc->Inputs().Get(input_data_id_).Value()) .At(next_output_timestamp_)); - next_output_timestamp_ += - frame_time_usec_ * - ((1.0 - jitter_) + 2.0 * jitter_ * random_->RandFloat()); + UpdateNextOutputTimestampWithJitter(); return ::mediapipe::OkStatus(); } diff --git a/mediapipe/calculators/core/previous_loopback_calculator.cc b/mediapipe/calculators/core/previous_loopback_calculator.cc index 6b23a0e70..8c470ef7d 100644 --- a/mediapipe/calculators/core/previous_loopback_calculator.cc +++ b/mediapipe/calculators/core/previous_loopback_calculator.cc @@ -102,6 +102,12 @@ class PreviousLoopbackCalculator : public CalculatorBase { cc->Outputs().Get(loop_out_id_).AddPacket(std::move(previous_loopback)); } } + if (!main_ts_.empty()) { + cc->Outputs().Get(loop_out_id_).SetNextTimestampBound(main_ts_.front()); + } + if (cc->Inputs().Get(main_id_).IsDone() && main_ts_.empty()) { + cc->Outputs().Get(loop_out_id_).Close(); + } return ::mediapipe::OkStatus(); } diff --git a/mediapipe/calculators/core/previous_loopback_calculator_test.cc b/mediapipe/calculators/core/previous_loopback_calculator_test.cc index 6ad569865..4ac38e9f0 100644 --- a/mediapipe/calculators/core/previous_loopback_calculator_test.cc +++ b/mediapipe/calculators/core/previous_loopback_calculator_test.cc @@ -107,5 +107,96 @@ TEST(PreviousLoopbackCalculator, CorrectTimestamps) { MP_EXPECT_OK(graph_.WaitUntilDone()); } +// A Calculator that outputs a summary packet in CalculatorBase::Close(). +class PacketOnCloseCalculator : public CalculatorBase { + public: + static ::mediapipe::Status GetContract(CalculatorContract* cc) { + cc->Inputs().Index(0).Set(); + cc->Outputs().Index(0).Set(); + return ::mediapipe::OkStatus(); + } + + ::mediapipe::Status Open(CalculatorContext* cc) final { + cc->SetOffset(TimestampDiff(0)); + return ::mediapipe::OkStatus(); + } + + ::mediapipe::Status Process(CalculatorContext* cc) final { + sum_ += cc->Inputs().Index(0).Value().Get(); + cc->Outputs().Index(0).AddPacket(cc->Inputs().Index(0).Value()); + return ::mediapipe::OkStatus(); + } + + ::mediapipe::Status Close(CalculatorContext* cc) final { + cc->Outputs().Index(0).AddPacket( + MakePacket(sum_).At(Timestamp::Max())); + return ::mediapipe::OkStatus(); + } + + private: + int sum_ = 0; +}; +REGISTER_CALCULATOR(PacketOnCloseCalculator); + +// Demonstrates that all ouput and input streams in PreviousLoopbackCalculator +// will close as expected when all graph input streams are closed. +TEST(PreviousLoopbackCalculator, ClosesCorrectly) { + std::vector outputs; + CalculatorGraphConfig graph_config_ = + ParseTextProtoOrDie(R"( + input_stream: 'in' + node { + calculator: 'PreviousLoopbackCalculator' + input_stream: 'MAIN:in' + input_stream: 'LOOP:out' + input_stream_info: { tag_index: 'LOOP' back_edge: true } + output_stream: 'PREV_LOOP:previous' + } + # This calculator synchronizes its inputs as normal, so it is used + # to check that both "in" and "previous" are ready. + node { + calculator: 'PassThroughCalculator' + input_stream: 'in' + input_stream: 'previous' + output_stream: 'out' + output_stream: 'previous2' + } + node { + calculator: 'PacketOnCloseCalculator' + input_stream: 'out' + output_stream: 'close_out' + } + )"); + tool::AddVectorSink("close_out", &graph_config_, &outputs); + + CalculatorGraph graph_; + MP_ASSERT_OK(graph_.Initialize(graph_config_, {})); + MP_ASSERT_OK(graph_.StartRun({})); + + auto send_packet = [&graph_](const std::string& input_name, int n) { + MP_EXPECT_OK(graph_.AddPacketToInputStream( + input_name, MakePacket(n).At(Timestamp(n)))); + }; + + send_packet("in", 1); + MP_EXPECT_OK(graph_.WaitUntilIdle()); + EXPECT_EQ(TimestampValues(outputs), (std::vector{1})); + + send_packet("in", 5); + MP_EXPECT_OK(graph_.WaitUntilIdle()); + EXPECT_EQ(TimestampValues(outputs), (std::vector{1, 5})); + + send_packet("in", 15); + MP_EXPECT_OK(graph_.WaitUntilIdle()); + EXPECT_EQ(TimestampValues(outputs), (std::vector{1, 5, 15})); + + MP_EXPECT_OK(graph_.CloseAllInputStreams()); + MP_EXPECT_OK(graph_.WaitUntilIdle()); + EXPECT_EQ(TimestampValues(outputs), + (std::vector{1, 5, 15, Timestamp::Max().Value()})); + + MP_EXPECT_OK(graph_.WaitUntilDone()); +} + } // anonymous namespace } // namespace mediapipe diff --git a/mediapipe/calculators/core/side_packet_to_stream_calculator.cc b/mediapipe/calculators/core/side_packet_to_stream_calculator.cc new file mode 100644 index 000000000..043c91f32 --- /dev/null +++ b/mediapipe/calculators/core/side_packet_to_stream_calculator.cc @@ -0,0 +1,83 @@ +// Copyright 2019 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include +#include + +#include "mediapipe/framework/calculator_framework.h" +#include "mediapipe/framework/port/logging.h" +#include "mediapipe/framework/port/ret_check.h" +#include "mediapipe/framework/port/status.h" + +namespace mediapipe { + +using mediapipe::PacketTypeSet; +using mediapipe::Timestamp; + +namespace { +static std::map* kTimestampMap = []() { + auto* res = new std::map(); + res->emplace("AT_PRESTREAM", Timestamp::PreStream()); + res->emplace("AT_POSTSTREAM", Timestamp::PostStream()); + res->emplace("AT_ZERO", Timestamp(0)); + return res; +}(); + +} // namespace + +// Outputs the single input_side_packet at the timestamp specified in the +// output_stream tag. Valid tags are AT_PRESTREAM, AT_POSTSTREAM and AT_ZERO. +class SidePacketToStreamCalculator : public CalculatorBase { + public: + SidePacketToStreamCalculator() = default; + ~SidePacketToStreamCalculator() override = default; + + static ::mediapipe::Status GetContract(CalculatorContract* cc); + ::mediapipe::Status Process(CalculatorContext* cc) override; + ::mediapipe::Status Close(CalculatorContext* cc) override; +}; +REGISTER_CALCULATOR(SidePacketToStreamCalculator); + +::mediapipe::Status SidePacketToStreamCalculator::GetContract( + CalculatorContract* cc) { + cc->InputSidePackets().Index(0).SetAny(); + + std::set tags = cc->Outputs().GetTags(); + RET_CHECK_EQ(tags.size(), 1); + + RET_CHECK_EQ(kTimestampMap->count(*tags.begin()), 1); + cc->Outputs().Tag(*tags.begin()).SetAny(); + + return ::mediapipe::OkStatus(); +} + +::mediapipe::Status SidePacketToStreamCalculator::Process( + CalculatorContext* cc) { + return mediapipe::tool::StatusStop(); +} + +::mediapipe::Status SidePacketToStreamCalculator::Close(CalculatorContext* cc) { + std::set tags = cc->Outputs().GetTags(); + RET_CHECK_EQ(tags.size(), 1); + const std::string& tag = *tags.begin(); + RET_CHECK_EQ(kTimestampMap->count(tag), 1); + cc->Outputs().Tag(tag).AddPacket( + cc->InputSidePackets().Index(0).At(kTimestampMap->at(tag))); + + return ::mediapipe::OkStatus(); +} + +} // namespace mediapipe diff --git a/mediapipe/calculators/core/split_vector_calculator.h b/mediapipe/calculators/core/split_vector_calculator.h index def156474..0eae022c7 100644 --- a/mediapipe/calculators/core/split_vector_calculator.h +++ b/mediapipe/calculators/core/split_vector_calculator.h @@ -34,7 +34,9 @@ namespace mediapipe { // SplitVectorCalculatorOptions. If the option "element_only" is set to true, // all ranges should be of size 1 and all outputs will be elements of type T. If // "element_only" is false, ranges can be non-zero in size and all outputs will -// be of type std::vector. +// be of type std::vector. If the option "combine_outputs" is set to true, +// only one output stream can be specified and all ranges of elements will be +// combined into one vector. // To use this class for a particular type T, register a calculator using // SplitVectorCalculator. template @@ -49,28 +51,47 @@ class SplitVectorCalculator : public CalculatorBase { const auto& options = cc->Options<::mediapipe::SplitVectorCalculatorOptions>(); - if (cc->Outputs().NumEntries() != options.ranges_size()) { - return ::mediapipe::InvalidArgumentError( - "The number of output streams should match the number of ranges " - "specified in the CalculatorOptions."); - } - - // Set the output types for each output stream. - for (int i = 0; i < cc->Outputs().NumEntries(); ++i) { - if (options.ranges(i).begin() < 0 || options.ranges(i).end() < 0 || - options.ranges(i).begin() >= options.ranges(i).end()) { - return ::mediapipe::InvalidArgumentError( - "Indices should be non-negative and begin index should be less " - "than the end index."); - } - if (options.element_only()) { - if (options.ranges(i).end() - options.ranges(i).begin() != 1) { - return ::mediapipe::InvalidArgumentError( - "Since element_only is true, all ranges should be of size 1."); + if (options.combine_outputs()) { + RET_CHECK_EQ(cc->Outputs().NumEntries(), 1); + cc->Outputs().Index(0).Set>(); + for (int i = 0; i < options.ranges_size() - 1; ++i) { + for (int j = i + 1; j < options.ranges_size(); ++j) { + const auto& range_0 = options.ranges(i); + const auto& range_1 = options.ranges(j); + if ((range_0.begin() >= range_1.begin() && + range_0.begin() < range_1.end()) || + (range_1.begin() >= range_0.begin() && + range_1.begin() < range_0.end())) { + return ::mediapipe::InvalidArgumentError( + "Ranges must be non-overlapping when using combine_outputs " + "option."); + } + } + } + } else { + if (cc->Outputs().NumEntries() != options.ranges_size()) { + return ::mediapipe::InvalidArgumentError( + "The number of output streams should match the number of ranges " + "specified in the CalculatorOptions."); + } + + // Set the output types for each output stream. + for (int i = 0; i < cc->Outputs().NumEntries(); ++i) { + if (options.ranges(i).begin() < 0 || options.ranges(i).end() < 0 || + options.ranges(i).begin() >= options.ranges(i).end()) { + return ::mediapipe::InvalidArgumentError( + "Indices should be non-negative and begin index should be less " + "than the end index."); + } + if (options.element_only()) { + if (options.ranges(i).end() - options.ranges(i).begin() != 1) { + return ::mediapipe::InvalidArgumentError( + "Since element_only is true, all ranges should be of size 1."); + } + cc->Outputs().Index(i).Set(); + } else { + cc->Outputs().Index(i).Set>(); } - cc->Outputs().Index(i).Set(); - } else { - cc->Outputs().Index(i).Set>(); } } @@ -83,13 +104,15 @@ class SplitVectorCalculator : public CalculatorBase { const auto& options = cc->Options<::mediapipe::SplitVectorCalculatorOptions>(); + element_only_ = options.element_only(); + combine_outputs_ = options.combine_outputs(); + for (const auto& range : options.ranges()) { ranges_.push_back({range.begin(), range.end()}); max_range_end_ = std::max(max_range_end_, range.end()); + total_elements_ += range.end() - range.begin(); } - element_only_ = options.element_only(); - return ::mediapipe::OkStatus(); } @@ -97,17 +120,29 @@ class SplitVectorCalculator : public CalculatorBase { const auto& input = cc->Inputs().Index(0).Get>(); RET_CHECK_GE(input.size(), max_range_end_); - if (element_only_) { + if (combine_outputs_) { + auto output = absl::make_unique>(); + output->reserve(total_elements_); for (int i = 0; i < ranges_.size(); ++i) { - cc->Outputs().Index(i).AddPacket( - MakePacket(input[ranges_[i].first]).At(cc->InputTimestamp())); - } - } else { - for (int i = 0; i < ranges_.size(); ++i) { - auto output = absl::make_unique>( + auto elements = absl::make_unique>( input.begin() + ranges_[i].first, input.begin() + ranges_[i].second); - cc->Outputs().Index(i).Add(output.release(), cc->InputTimestamp()); + output->insert(output->end(), elements->begin(), elements->end()); + } + cc->Outputs().Index(0).Add(output.release(), cc->InputTimestamp()); + } else { + if (element_only_) { + for (int i = 0; i < ranges_.size(); ++i) { + cc->Outputs().Index(i).AddPacket( + MakePacket(input[ranges_[i].first]).At(cc->InputTimestamp())); + } + } else { + for (int i = 0; i < ranges_.size(); ++i) { + auto output = absl::make_unique>( + input.begin() + ranges_[i].first, + input.begin() + ranges_[i].second); + cc->Outputs().Index(i).Add(output.release(), cc->InputTimestamp()); + } } } @@ -117,7 +152,9 @@ class SplitVectorCalculator : public CalculatorBase { private: std::vector> ranges_; int32 max_range_end_ = -1; + int32 total_elements_ = 0; bool element_only_ = false; + bool combine_outputs_ = false; }; } // namespace mediapipe diff --git a/mediapipe/calculators/core/split_vector_calculator.proto b/mediapipe/calculators/core/split_vector_calculator.proto index 3ef31475b..53acbb7bf 100644 --- a/mediapipe/calculators/core/split_vector_calculator.proto +++ b/mediapipe/calculators/core/split_vector_calculator.proto @@ -37,4 +37,7 @@ message SplitVectorCalculatorOptions { // just element of type T. By default, if a range specifies only one element, // it is outputted as an std::vector. optional bool element_only = 2 [default = false]; + + // Combines output elements to one vector. + optional bool combine_outputs = 3 [default = false]; } diff --git a/mediapipe/calculators/core/split_vector_calculator_test.cc b/mediapipe/calculators/core/split_vector_calculator_test.cc index 4187e8aba..79243c149 100644 --- a/mediapipe/calculators/core/split_vector_calculator_test.cc +++ b/mediapipe/calculators/core/split_vector_calculator_test.cc @@ -105,6 +105,34 @@ class SplitTfLiteTensorVectorCalculatorTest : public ::testing::Test { } } + void ValidateCombinedVectorOutput(std::vector& output_packets, + int expected_elements, + std::vector& input_begin_indices, + std::vector& input_end_indices) { + ASSERT_EQ(1, output_packets.size()); + ASSERT_EQ(input_begin_indices.size(), input_end_indices.size()); + const std::vector& output_vec = + output_packets[0].Get>(); + ASSERT_EQ(expected_elements, output_vec.size()); + const int num_ranges = input_begin_indices.size(); + + int element_id = 0; + for (int range_id = 0; range_id < num_ranges; ++range_id) { + for (int i = input_begin_indices[range_id]; + i < input_end_indices[range_id]; ++i) { + const int expected_value = i; + const TfLiteTensor* result = &output_vec[element_id]; + float* result_buffer = result->data.f; + ASSERT_NE(result_buffer, nullptr); + ASSERT_EQ(result_buffer, input_buffers_[i]); + for (int j = 0; j < width * height * channels; ++j) { + ASSERT_EQ(expected_value, result_buffer[j]); + } + element_id++; + } + } + } + void ValidateElementOutput(std::vector& output_packets, int input_begin_index) { ASSERT_EQ(1, output_packets.size()); @@ -234,6 +262,65 @@ TEST_F(SplitTfLiteTensorVectorCalculatorTest, InvalidOutputStreamCountTest) { ASSERT_FALSE(graph.Initialize(graph_config).ok()); } +TEST_F(SplitTfLiteTensorVectorCalculatorTest, + InvalidCombineOutputsMultipleOutputsTest) { + ASSERT_NE(interpreter_, nullptr); + + // Prepare a graph to use the SplitTfLiteTensorVectorCalculator. + CalculatorGraphConfig graph_config = + ::mediapipe::ParseTextProtoOrDie( + R"( + input_stream: "tensor_in" + node { + calculator: "SplitTfLiteTensorVectorCalculator" + input_stream: "tensor_in" + output_stream: "range_0" + output_stream: "range_1" + options { + [mediapipe.SplitVectorCalculatorOptions.ext] { + ranges: { begin: 0 end: 1 } + ranges: { begin: 2 end: 3 } + combine_outputs: true + } + } + } + )"); + + // Run the graph. + CalculatorGraph graph; + // The graph should fail running because the number of output streams does not + // match the number of range elements in the options. + ASSERT_FALSE(graph.Initialize(graph_config).ok()); +} + +TEST_F(SplitTfLiteTensorVectorCalculatorTest, InvalidOverlappingRangesTest) { + ASSERT_NE(interpreter_, nullptr); + + // Prepare a graph to use the SplitTfLiteTensorVectorCalculator. + CalculatorGraphConfig graph_config = + ::mediapipe::ParseTextProtoOrDie( + R"( + input_stream: "tensor_in" + node { + calculator: "SplitTfLiteTensorVectorCalculator" + input_stream: "tensor_in" + output_stream: "range_0" + options { + [mediapipe.SplitVectorCalculatorOptions.ext] { + ranges: { begin: 0 end: 3 } + ranges: { begin: 1 end: 4 } + combine_outputs: true + } + } + } + )"); + + // Run the graph. + CalculatorGraph graph; + // The graph should fail running because there are overlapping ranges. + ASSERT_FALSE(graph.Initialize(graph_config).ok()); +} + TEST_F(SplitTfLiteTensorVectorCalculatorTest, SmokeTestElementOnly) { ASSERT_NE(interpreter_, nullptr); @@ -289,6 +376,53 @@ TEST_F(SplitTfLiteTensorVectorCalculatorTest, SmokeTestElementOnly) { MP_ASSERT_OK(graph.WaitUntilDone()); } +TEST_F(SplitTfLiteTensorVectorCalculatorTest, SmokeTestCombiningOutputs) { + ASSERT_NE(interpreter_, nullptr); + + PrepareTfLiteTensorVector(/*vector_size=*/5); + ASSERT_NE(input_vec_, nullptr); + + // Prepare a graph to use the SplitTfLiteTensorVectorCalculator. + CalculatorGraphConfig graph_config = + ::mediapipe::ParseTextProtoOrDie( + R"( + input_stream: "tensor_in" + node { + calculator: "SplitTfLiteTensorVectorCalculator" + input_stream: "tensor_in" + output_stream: "range_0" + options { + [mediapipe.SplitVectorCalculatorOptions.ext] { + ranges: { begin: 0 end: 1 } + ranges: { begin: 2 end: 3 } + ranges: { begin: 4 end: 5 } + combine_outputs: true + } + } + } + )"); + std::vector range_0_packets; + tool::AddVectorSink("range_0", &graph_config, &range_0_packets); + + // Run the graph. + CalculatorGraph graph; + MP_ASSERT_OK(graph.Initialize(graph_config)); + MP_ASSERT_OK(graph.StartRun({})); + MP_ASSERT_OK(graph.AddPacketToInputStream( + "tensor_in", Adopt(input_vec_.release()).At(Timestamp(0)))); + // Wait until the calculator finishes processing. + MP_ASSERT_OK(graph.WaitUntilIdle()); + + std::vector input_begin_indices = {0, 2, 4}; + std::vector input_end_indices = {1, 3, 5}; + ValidateCombinedVectorOutput(range_0_packets, /*expected_elements=*/3, + input_begin_indices, input_end_indices); + + // Fully close the graph at the end. + MP_ASSERT_OK(graph.CloseInputStream("tensor_in")); + MP_ASSERT_OK(graph.WaitUntilDone()); +} + TEST_F(SplitTfLiteTensorVectorCalculatorTest, ElementOnlyDisablesVectorOutputs) { // Prepare a graph to use the SplitTfLiteTensorVectorCalculator. diff --git a/mediapipe/calculators/core/stream_to_side_packet_calculator.cc b/mediapipe/calculators/core/stream_to_side_packet_calculator.cc new file mode 100644 index 000000000..07bb8c852 --- /dev/null +++ b/mediapipe/calculators/core/stream_to_side_packet_calculator.cc @@ -0,0 +1,48 @@ +// Copyright 2019 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "mediapipe/framework/calculator_framework.h" +#include "mediapipe/framework/port/status.h" +#include "mediapipe/framework/timestamp.h" + +namespace mediapipe { + +// A calculator that takes a packet of an input stream and converts it to an +// output side packet. This calculator only works under the assumption that the +// input stream only has a single packet passing through. +// +// Example config: +// node { +// calculator: "StreamToSidePacketCalculator" +// input_stream: "stream" +// output_side_packet: "side_packet" +// } +class StreamToSidePacketCalculator : public mediapipe::CalculatorBase { + public: + static mediapipe::Status GetContract(mediapipe::CalculatorContract* cc) { + cc->Inputs().Index(0).SetAny(); + cc->OutputSidePackets().Index(0).SetAny(); + return mediapipe::OkStatus(); + } + + mediapipe::Status Process(mediapipe::CalculatorContext* cc) override { + mediapipe::Packet& packet = cc->Inputs().Index(0).Value(); + cc->OutputSidePackets().Index(0).Set( + packet.At(mediapipe::Timestamp::Unset())); + return mediapipe::OkStatus(); + } +}; +REGISTER_CALCULATOR(StreamToSidePacketCalculator); + +} // namespace mediapipe diff --git a/mediapipe/calculators/core/stream_to_side_packet_calculator_test.cc b/mediapipe/calculators/core/stream_to_side_packet_calculator_test.cc new file mode 100644 index 000000000..12f417c58 --- /dev/null +++ b/mediapipe/calculators/core/stream_to_side_packet_calculator_test.cc @@ -0,0 +1,67 @@ +// Copyright 2019 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include + +#include "absl/memory/memory.h" +#include "mediapipe/framework/calculator_runner.h" +#include "mediapipe/framework/packet.h" +#include "mediapipe/framework/port/gmock.h" +#include "mediapipe/framework/port/gtest.h" +#include "mediapipe/framework/port/status.h" +#include "mediapipe/framework/port/status_matchers.h" +#include "mediapipe/framework/timestamp.h" + +namespace mediapipe { + +using ::testing::Test; + +class StreamToSidePacketCalculatorTest : public Test { + protected: + StreamToSidePacketCalculatorTest() { + const char kConfig[] = R"( + calculator: "StreamToSidePacketCalculator" + input_stream: "stream" + output_side_packet: "side_packet" + )"; + runner_ = absl::make_unique(kConfig); + } + + std::unique_ptr runner_; +}; + +TEST_F(StreamToSidePacketCalculatorTest, + StreamToSidePacketCalculatorWithEmptyStreamFails) { + EXPECT_EQ(runner_->Run().code(), mediapipe::StatusCode::kUnavailable); +} + +TEST_F(StreamToSidePacketCalculatorTest, + StreamToSidePacketCalculatorWithSinglePacketCreatesSidePacket) { + runner_->MutableInputs()->Index(0).packets.push_back( + Adopt(new std::string("test")).At(Timestamp(1))); + MP_ASSERT_OK(runner_->Run()); + EXPECT_EQ(runner_->OutputSidePackets().Index(0).Get(), "test"); +} + +TEST_F(StreamToSidePacketCalculatorTest, + StreamToSidePacketCalculatorWithMultiplePacketsFails) { + runner_->MutableInputs()->Index(0).packets.push_back( + Adopt(new std::string("test1")).At(Timestamp(1))); + runner_->MutableInputs()->Index(0).packets.push_back( + Adopt(new std::string("test2")).At(Timestamp(2))); + EXPECT_EQ(runner_->Run().code(), mediapipe::StatusCode::kAlreadyExists); +} + +} // namespace mediapipe diff --git a/mediapipe/calculators/core/string_to_int_calculator.cc b/mediapipe/calculators/core/string_to_int_calculator.cc new file mode 100644 index 000000000..64600cde3 --- /dev/null +++ b/mediapipe/calculators/core/string_to_int_calculator.cc @@ -0,0 +1,79 @@ +// Copyright 2019 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include + +#include +#include + +#include "absl/strings/numbers.h" +#include "mediapipe/framework/calculator_framework.h" +#include "mediapipe/framework/port/integral_types.h" +#include "mediapipe/framework/port/status.h" + +namespace mediapipe { + +// Calculator that converts a std::string into an integer type, or fails if the +// conversion is not possible. +// +// Example config: +// node { +// calculator: "StringToIntCalculator" +// input_side_packet: "string" +// output_side_packet: "index" +// } +template +class StringToIntCalculatorTemplate : public CalculatorBase { + public: + static ::mediapipe::Status GetContract(CalculatorContract* cc) { + cc->InputSidePackets().Index(0).Set(); + cc->OutputSidePackets().Index(0).Set(); + return ::mediapipe::OkStatus(); + } + + ::mediapipe::Status Open(CalculatorContext* cc) override { + IntType number; + if (!absl::SimpleAtoi(cc->InputSidePackets().Index(0).Get(), + &number)) { + return ::mediapipe::InvalidArgumentError( + "The std::string could not be parsed as an integer."); + } + cc->OutputSidePackets().Index(0).Set(MakePacket(number)); + return ::mediapipe::OkStatus(); + } + + ::mediapipe::Status Process(CalculatorContext* cc) override { + return ::mediapipe::OkStatus(); + } +}; + +using StringToIntCalculator = StringToIntCalculatorTemplate; +REGISTER_CALCULATOR(StringToIntCalculator); + +using StringToUintCalculator = StringToIntCalculatorTemplate; +REGISTER_CALCULATOR(StringToUintCalculator); + +using StringToInt32Calculator = StringToIntCalculatorTemplate; +REGISTER_CALCULATOR(StringToInt32Calculator); + +using StringToUint32Calculator = StringToIntCalculatorTemplate; +REGISTER_CALCULATOR(StringToUint32Calculator); + +using StringToInt64Calculator = StringToIntCalculatorTemplate; +REGISTER_CALCULATOR(StringToInt64Calculator); + +using StringToUint64Calculator = StringToIntCalculatorTemplate; +REGISTER_CALCULATOR(StringToUint64Calculator); + +} // namespace mediapipe diff --git a/mediapipe/calculators/image/BUILD b/mediapipe/calculators/image/BUILD index b8fdbdfae..5a0a75619 100644 --- a/mediapipe/calculators/image/BUILD +++ b/mediapipe/calculators/image/BUILD @@ -12,14 +12,14 @@ # See the License for the specific language governing permissions and # limitations under the License. +load("//mediapipe/framework/port:build_config.bzl", "mediapipe_cc_proto_library") + licenses(["notice"]) # Apache 2.0 package(default_visibility = ["//visibility:private"]) exports_files(["LICENSE"]) -load("//mediapipe/framework/port:build_config.bzl", "mediapipe_cc_proto_library") - proto_library( name = "opencv_image_encoder_calculator_proto", srcs = ["opencv_image_encoder_calculator.proto"], diff --git a/mediapipe/calculators/tensorflow/BUILD b/mediapipe/calculators/tensorflow/BUILD index 4231b899e..8c9d0df84 100644 --- a/mediapipe/calculators/tensorflow/BUILD +++ b/mediapipe/calculators/tensorflow/BUILD @@ -13,12 +13,12 @@ # limitations under the License. # +load("//mediapipe/framework/port:build_config.bzl", "mediapipe_cc_proto_library") + licenses(["notice"]) # Apache 2.0 package(default_visibility = ["//visibility:private"]) -load("//mediapipe/framework/port:build_config.bzl", "mediapipe_cc_proto_library") - proto_library( name = "graph_tensors_packet_generator_proto", srcs = ["graph_tensors_packet_generator.proto"], @@ -104,6 +104,17 @@ proto_library( deps = ["//mediapipe/framework:calculator_proto"], ) +proto_library( + name = "unpack_media_sequence_calculator_proto", + srcs = ["unpack_media_sequence_calculator.proto"], + visibility = ["//visibility:public"], + deps = [ + "//mediapipe/calculators/core:packet_resampler_calculator_proto", + "//mediapipe/framework:calculator_proto", + "//mediapipe/util:audio_decoder_proto", + ], +) + proto_library( name = "vector_float_to_tensor_calculator_options_proto", srcs = ["vector_float_to_tensor_calculator_options.proto"], @@ -127,7 +138,7 @@ mediapipe_cc_proto_library( srcs = ["image_frame_to_tensor_calculator.proto"], cc_deps = [ "//mediapipe/framework:calculator_cc_proto", - "@org_tensorflow//tensorflow/core:protos_all_cc", + "@org_tensorflow//tensorflow/core:protos_all", ], visibility = ["//visibility:public"], deps = [":image_frame_to_tensor_calculator_proto"], @@ -162,7 +173,7 @@ mediapipe_cc_proto_library( srcs = ["pack_media_sequence_calculator.proto"], cc_deps = [ "//mediapipe/framework:calculator_cc_proto", - "@org_tensorflow//tensorflow/core:protos_all_cc", + "@org_tensorflow//tensorflow/core:protos_all", ], visibility = ["//visibility:public"], deps = [":pack_media_sequence_calculator_proto"], @@ -181,7 +192,7 @@ mediapipe_cc_proto_library( srcs = ["tensorflow_session_from_frozen_graph_generator.proto"], cc_deps = [ "//mediapipe/framework:packet_generator_cc_proto", - "@org_tensorflow//tensorflow/core:protos_all_cc", + "@org_tensorflow//tensorflow/core:protos_all", ], visibility = ["//visibility:public"], deps = [":tensorflow_session_from_frozen_graph_generator_proto"], @@ -192,7 +203,7 @@ mediapipe_cc_proto_library( srcs = ["tensorflow_session_from_frozen_graph_calculator.proto"], cc_deps = [ "//mediapipe/framework:calculator_cc_proto", - "@org_tensorflow//tensorflow/core:protos_all_cc", + "@org_tensorflow//tensorflow/core:protos_all", ], visibility = ["//visibility:public"], deps = [":tensorflow_session_from_frozen_graph_calculator_proto"], @@ -261,6 +272,17 @@ mediapipe_cc_proto_library( deps = [":unpack_media_sequence_calculator_proto"], ) +mediapipe_cc_proto_library( + name = "vector_int_to_tensor_calculator_options_cc_proto", + srcs = ["vector_int_to_tensor_calculator_options.proto"], + cc_deps = [ + "//mediapipe/framework:calculator_cc_proto", + "@org_tensorflow//tensorflow/core:protos_all", + ], + visibility = ["//visibility:public"], + deps = [":vector_int_to_tensor_calculator_options_proto"], +) + mediapipe_cc_proto_library( name = "vector_float_to_tensor_calculator_options_cc_proto", srcs = ["vector_float_to_tensor_calculator_options.proto"], @@ -274,7 +296,7 @@ cc_library( srcs = ["graph_tensors_packet_generator.cc"], visibility = ["//visibility:public"], deps = [ - "//mediapipe/calculators/tensorflow:graph_tensors_packet_generator_cc_proto", + ":graph_tensors_packet_generator_cc_proto", "//mediapipe/framework:calculator_framework", "//mediapipe/framework/port:ret_check", "//mediapipe/framework/port:status", @@ -289,7 +311,7 @@ cc_library( srcs = ["image_frame_to_tensor_calculator.cc"], visibility = ["//visibility:public"], deps = [ - "//mediapipe/calculators/tensorflow:image_frame_to_tensor_calculator_cc_proto", + ":image_frame_to_tensor_calculator_cc_proto", "//mediapipe/framework:calculator_framework", "//mediapipe/framework/formats:image_frame", "//mediapipe/framework/port:ret_check", @@ -311,7 +333,7 @@ cc_library( visibility = ["//visibility:public"], deps = [ "//mediapipe/framework/formats:time_series_header_cc_proto", - "//mediapipe/calculators/tensorflow:matrix_to_tensor_calculator_options_cc_proto", + ":matrix_to_tensor_calculator_options_cc_proto", "//mediapipe/framework:calculator_framework", "//mediapipe/framework/formats:matrix", "//mediapipe/framework/port:status", @@ -332,7 +354,7 @@ cc_library( srcs = ["lapped_tensor_buffer_calculator.cc"], visibility = ["//visibility:public"], deps = [ - "//mediapipe/calculators/tensorflow:lapped_tensor_buffer_calculator_cc_proto", + ":lapped_tensor_buffer_calculator_cc_proto", "//mediapipe/framework:calculator_framework", "//mediapipe/framework/port:ret_check", "//mediapipe/framework/port:status", @@ -386,7 +408,7 @@ cc_library( "//mediapipe/util/sequence:media_sequence", "//mediapipe/util/sequence:media_sequence_util", "@com_google_absl//absl/strings", - "@org_tensorflow//tensorflow/core:protos_all_cc", + "@org_tensorflow//tensorflow/core:protos_all", ], alwayslink = 1, ) @@ -401,7 +423,7 @@ cc_library( "//mediapipe/framework:calculator_framework", "//mediapipe/framework/port:ret_check", "//mediapipe/framework/port:status", - "@org_tensorflow//tensorflow/core:protos_all_cc", + "@org_tensorflow//tensorflow/core:protos_all", ], alwayslink = 1, ) @@ -414,7 +436,7 @@ cc_library( visibility = ["//visibility:public"], deps = [ ":tensorflow_session", - "//mediapipe/calculators/tensorflow:tensorflow_inference_calculator_cc_proto", + ":tensorflow_inference_calculator_cc_proto", "//mediapipe/framework:calculator_framework", "//mediapipe/framework/tool:status_util", "@com_google_absl//absl/strings", @@ -492,7 +514,7 @@ cc_library( visibility = ["//visibility:public"], deps = [ ":tensorflow_session", - "//mediapipe/calculators/tensorflow:tensorflow_session_from_frozen_graph_generator_cc_proto", + ":tensorflow_session_from_frozen_graph_generator_cc_proto", "//mediapipe/framework:calculator_framework", "//mediapipe/framework/tool:status_util", "//mediapipe/framework/port:status", @@ -551,7 +573,7 @@ cc_library( visibility = ["//visibility:public"], deps = [ ":tensorflow_session", - "//mediapipe/calculators/tensorflow:tensorflow_session_from_saved_model_generator_cc_proto", + ":tensorflow_session_from_saved_model_generator_cc_proto", "//mediapipe/framework:packet_generator", "//mediapipe/framework:packet_type", "//mediapipe/framework/tool:status_util", @@ -575,7 +597,7 @@ cc_library( srcs = ["tensor_squeeze_dimensions_calculator.cc"], visibility = ["//visibility:public"], deps = [ - "//mediapipe/calculators/tensorflow:tensor_squeeze_dimensions_calculator_cc_proto", + ":tensor_squeeze_dimensions_calculator_cc_proto", "//mediapipe/framework:calculator_framework", "//mediapipe/framework/port:ret_check", "//mediapipe/framework/port:status", @@ -589,7 +611,7 @@ cc_library( srcs = ["tensor_to_image_frame_calculator.cc"], visibility = ["//visibility:public"], deps = [ - "//mediapipe/calculators/tensorflow:tensor_to_image_frame_calculator_cc_proto", + ":tensor_to_image_frame_calculator_cc_proto", "//mediapipe/framework:calculator_framework", "//mediapipe/framework/formats:image_frame", "//mediapipe/framework/port:ret_check", @@ -605,7 +627,7 @@ cc_library( visibility = ["//visibility:public"], deps = [ "//mediapipe/framework/formats:time_series_header_cc_proto", - "//mediapipe/calculators/tensorflow:tensor_to_matrix_calculator_cc_proto", + ":tensor_to_matrix_calculator_cc_proto", "//mediapipe/framework:calculator_framework", "//mediapipe/framework/formats:matrix", "//mediapipe/framework/port:status", @@ -621,6 +643,22 @@ cc_library( alwayslink = 1, ) +cc_library( + name = "tfrecord_reader_calculator", + srcs = ["tfrecord_reader_calculator.cc"], + visibility = ["//visibility:public"], + deps = [ + "//mediapipe/framework:calculator_framework", + "//mediapipe/framework/port:integral_types", + "//mediapipe/framework/port:logging", + "//mediapipe/framework/port:ret_check", + "//mediapipe/framework/port:status", + "@org_tensorflow//tensorflow/core:lib", + "@org_tensorflow//tensorflow/core:protos_all", + ], + alwayslink = 1, +) + cc_library( name = "tensor_to_vector_float_calculator", srcs = ["tensor_to_vector_float_calculator.cc"], @@ -629,7 +667,7 @@ cc_library( "//mediapipe/framework:calculator_framework", "//mediapipe/framework/port:status", "//mediapipe/framework/port:ret_check", - "//mediapipe/calculators/tensorflow:tensor_to_vector_float_calculator_options_cc_proto", + ":tensor_to_vector_float_calculator_options_cc_proto", ] + select({ "//conditions:default": [ "@org_tensorflow//tensorflow/core:framework", @@ -657,7 +695,21 @@ cc_library( "//mediapipe/util:audio_decoder_cc_proto", "//mediapipe/util/sequence:media_sequence", "@com_google_absl//absl/strings", - "@org_tensorflow//tensorflow/core:protos_all_cc", + "@org_tensorflow//tensorflow/core:protos_all", + ], + alwayslink = 1, +) + +cc_library( + name = "vector_int_to_tensor_calculator", + srcs = ["vector_int_to_tensor_calculator.cc"], + visibility = ["//visibility:public"], + deps = [ + ":vector_int_to_tensor_calculator_options_cc_proto", + "//mediapipe/framework:calculator_framework", + "//mediapipe/framework/port:ret_check", + "//mediapipe/framework/port:status", + "@org_tensorflow//tensorflow/core:framework", ], alwayslink = 1, ) @@ -667,7 +719,7 @@ cc_library( srcs = ["vector_float_to_tensor_calculator.cc"], visibility = ["//visibility:public"], deps = [ - "//mediapipe/calculators/tensorflow:vector_float_to_tensor_calculator_options_cc_proto", + ":vector_float_to_tensor_calculator_options_cc_proto", "//mediapipe/framework:calculator_framework", "//mediapipe/framework/port:ret_check", "//mediapipe/framework/port:status", @@ -676,12 +728,26 @@ cc_library( alwayslink = 1, ) +cc_library( + name = "unpack_yt8m_sequence_example_calculator", + srcs = ["unpack_yt8m_sequence_example_calculator.cc"], + visibility = ["//visibility:public"], + deps = [ + ":lapped_tensor_buffer_calculator_cc_proto", + "//mediapipe/framework:calculator_framework", + "//mediapipe/framework:packet", + "//mediapipe/framework/port:status", + "@org_tensorflow//tensorflow/core:protos_all", + ], + alwayslink = 1, +) + cc_test( name = "graph_tensors_packet_generator_test", srcs = ["graph_tensors_packet_generator_test.cc"], deps = [ ":graph_tensors_packet_generator", - "//mediapipe/calculators/tensorflow:graph_tensors_packet_generator_cc_proto", + ":graph_tensors_packet_generator_cc_proto", "//mediapipe/framework:packet", "//mediapipe/framework:packet_generator_cc_proto", "//mediapipe/framework:packet_set", @@ -713,7 +779,7 @@ cc_test( srcs = ["matrix_to_tensor_calculator_test.cc"], deps = [ ":matrix_to_tensor_calculator", - "//mediapipe/calculators/tensorflow:matrix_to_tensor_calculator_options_cc_proto", + ":matrix_to_tensor_calculator_options_cc_proto", "//mediapipe/framework:calculator_framework", "//mediapipe/framework:calculator_runner", "//mediapipe/framework/formats:matrix", @@ -729,13 +795,13 @@ cc_test( srcs = ["lapped_tensor_buffer_calculator_test.cc"], deps = [ ":lapped_tensor_buffer_calculator", - "//mediapipe/calculators/tensorflow:lapped_tensor_buffer_calculator_cc_proto", + ":lapped_tensor_buffer_calculator_cc_proto", "//mediapipe/framework:calculator_framework", "//mediapipe/framework:calculator_runner", "//mediapipe/framework/port:gtest_main", "@com_google_absl//absl/memory", "@org_tensorflow//tensorflow/core:framework", - "@org_tensorflow//tensorflow/core:protos_all_cc", + "@org_tensorflow//tensorflow/core:protos_all", ], ) @@ -774,7 +840,7 @@ cc_test( "//mediapipe/util/sequence:media_sequence", "@com_google_absl//absl/memory", "@com_google_absl//absl/strings", - "@org_tensorflow//tensorflow/core:protos_all_cc", + "@org_tensorflow//tensorflow/core:protos_all", ], ) @@ -801,7 +867,7 @@ cc_test( "@com_google_absl//absl/strings", "@org_tensorflow//tensorflow/core:direct_session", "@org_tensorflow//tensorflow/core:framework", - "@org_tensorflow//tensorflow/core:protos_all_cc", + "@org_tensorflow//tensorflow/core:protos_all", "@org_tensorflow//tensorflow/core:testlib", "@org_tensorflow//tensorflow/core/kernels:conv_ops", "@org_tensorflow//tensorflow/core/kernels:math", @@ -817,7 +883,7 @@ cc_test( ":tensorflow_inference_calculator", ":tensorflow_session", ":tensorflow_session_from_frozen_graph_generator", - "//mediapipe/calculators/tensorflow:tensorflow_session_from_frozen_graph_generator_cc_proto", + ":tensorflow_session_from_frozen_graph_generator_cc_proto", "//mediapipe/framework:calculator_framework", "//mediapipe/framework:packet", "//mediapipe/framework:packet_generator_cc_proto", @@ -831,7 +897,7 @@ cc_test( "@com_google_absl//absl/strings", "@org_tensorflow//tensorflow/core:direct_session", "@org_tensorflow//tensorflow/core:framework", - "@org_tensorflow//tensorflow/core:protos_all_cc", + "@org_tensorflow//tensorflow/core:protos_all", "@org_tensorflow//tensorflow/core:testlib", "@org_tensorflow//tensorflow/core/kernels:conv_ops", "@org_tensorflow//tensorflow/core/kernels:math", @@ -847,7 +913,7 @@ cc_test( ":tensorflow_inference_calculator", ":tensorflow_session", ":tensorflow_session_from_saved_model_generator", - "//mediapipe/calculators/tensorflow:tensorflow_session_from_saved_model_generator_cc_proto", + ":tensorflow_session_from_saved_model_generator_cc_proto", "//mediapipe/framework:calculator_framework", "//mediapipe/framework:packet", "//mediapipe/framework:packet_generator_cc_proto", @@ -857,14 +923,8 @@ cc_test( "//mediapipe/framework/tool:tag_map_helper", "//mediapipe/framework/tool:validate_type", "@com_google_absl//absl/strings", + "@org_tensorflow//tensorflow/core:all_kernels", "@org_tensorflow//tensorflow/core:direct_session", - "@org_tensorflow//tensorflow/core/kernels:array", - "@org_tensorflow//tensorflow/core/kernels:bitcast_op", - "@org_tensorflow//tensorflow/core/kernels:conv_ops", - "@org_tensorflow//tensorflow/core/kernels:io", - "@org_tensorflow//tensorflow/core/kernels:state", - "@org_tensorflow//tensorflow/core/kernels:string", - "@org_tensorflow//tensorflow/core/kernels/data:tensor_dataset_op", ], ) @@ -888,14 +948,8 @@ cc_test( "//mediapipe/framework/tool:tag_map_helper", "//mediapipe/framework/tool:validate_type", "@com_google_absl//absl/strings", + "@org_tensorflow//tensorflow/core:all_kernels", "@org_tensorflow//tensorflow/core:direct_session", - "@org_tensorflow//tensorflow/core/kernels:array", - "@org_tensorflow//tensorflow/core/kernels:bitcast_op", - "@org_tensorflow//tensorflow/core/kernels:conv_ops", - "@org_tensorflow//tensorflow/core/kernels:io", - "@org_tensorflow//tensorflow/core/kernels:state", - "@org_tensorflow//tensorflow/core/kernels:string", - "@org_tensorflow//tensorflow/core/kernels/data:tensor_dataset_op", ], ) @@ -904,12 +958,12 @@ cc_test( srcs = ["tensor_squeeze_dimensions_calculator_test.cc"], deps = [ ":tensor_squeeze_dimensions_calculator", - "//mediapipe/calculators/tensorflow:tensor_squeeze_dimensions_calculator_cc_proto", + ":tensor_squeeze_dimensions_calculator_cc_proto", "//mediapipe/framework:calculator_framework", "//mediapipe/framework:calculator_runner", "//mediapipe/framework/port:gtest_main", "@org_tensorflow//tensorflow/core:framework", - "@org_tensorflow//tensorflow/core:protos_all_cc", + "@org_tensorflow//tensorflow/core:protos_all", ], ) @@ -919,13 +973,13 @@ cc_test( srcs = ["tensor_to_image_frame_calculator_test.cc"], deps = [ ":tensor_to_image_frame_calculator", - "//mediapipe/calculators/tensorflow:tensor_to_image_frame_calculator_cc_proto", + ":tensor_to_image_frame_calculator_cc_proto", "//mediapipe/framework:calculator_framework", "//mediapipe/framework:calculator_runner", "//mediapipe/framework/formats:image_frame", "//mediapipe/framework/port:gtest_main", "@org_tensorflow//tensorflow/core:framework", - "@org_tensorflow//tensorflow/core:protos_all_cc", + "@org_tensorflow//tensorflow/core:protos_all", ], ) @@ -935,14 +989,14 @@ cc_test( srcs = ["tensor_to_matrix_calculator_test.cc"], deps = [ ":tensor_to_matrix_calculator", - "//mediapipe/calculators/tensorflow:tensor_to_matrix_calculator_cc_proto", + ":tensor_to_matrix_calculator_cc_proto", "//mediapipe/framework:calculator_framework", "//mediapipe/framework:calculator_runner", "//mediapipe/framework/formats:matrix", "//mediapipe/framework/formats:time_series_header_cc_proto", "//mediapipe/framework/port:gtest_main", "@org_tensorflow//tensorflow/core:framework", - "@org_tensorflow//tensorflow/core:protos_all_cc", + "@org_tensorflow//tensorflow/core:protos_all", ], ) @@ -951,12 +1005,12 @@ cc_test( srcs = ["tensor_to_vector_float_calculator_test.cc"], deps = [ ":tensor_to_vector_float_calculator", - "//mediapipe/calculators/tensorflow:tensor_to_vector_float_calculator_options_cc_proto", + ":tensor_to_vector_float_calculator_options_cc_proto", "//mediapipe/framework:calculator_framework", "//mediapipe/framework:calculator_runner", "//mediapipe/framework/port:gtest_main", "@org_tensorflow//tensorflow/core:framework", - "@org_tensorflow//tensorflow/core:protos_all_cc", + "@org_tensorflow//tensorflow/core:protos_all", ], ) @@ -976,7 +1030,21 @@ cc_test( "//mediapipe/util/sequence:media_sequence", "@com_google_absl//absl/memory", "@com_google_absl//absl/strings", - "@org_tensorflow//tensorflow/core:protos_all_cc", + "@org_tensorflow//tensorflow/core:protos_all", + ], +) + +cc_test( + name = "vector_int_to_tensor_calculator_test", + srcs = ["vector_int_to_tensor_calculator_test.cc"], + deps = [ + ":vector_int_to_tensor_calculator", + ":vector_int_to_tensor_calculator_options_cc_proto", + "//mediapipe/framework:calculator_framework", + "//mediapipe/framework:calculator_runner", + "//mediapipe/framework/port:gtest_main", + "@org_tensorflow//tensorflow/core:framework", + "@org_tensorflow//tensorflow/core:protos_all", ], ) @@ -985,12 +1053,12 @@ cc_test( srcs = ["vector_float_to_tensor_calculator_test.cc"], deps = [ ":vector_float_to_tensor_calculator", - "//mediapipe/calculators/tensorflow:vector_float_to_tensor_calculator_options_cc_proto", + ":vector_float_to_tensor_calculator_options_cc_proto", "//mediapipe/framework:calculator_framework", "//mediapipe/framework:calculator_runner", "//mediapipe/framework/port:gtest_main", "@org_tensorflow//tensorflow/core:framework", - "@org_tensorflow//tensorflow/core:protos_all_cc", + "@org_tensorflow//tensorflow/core:protos_all", ], ) @@ -1014,7 +1082,7 @@ cc_test( ":tensorflow_session", ":tensorflow_inference_calculator", ":tensorflow_session_from_frozen_graph_generator", - "//mediapipe/calculators/tensorflow:tensorflow_session_from_frozen_graph_generator_cc_proto", + ":tensorflow_session_from_frozen_graph_generator_cc_proto", "//mediapipe/framework/deps:file_path", "//mediapipe/framework:calculator_framework", "//mediapipe/framework:calculator_runner", diff --git a/mediapipe/calculators/tensorflow/lapped_tensor_buffer_calculator.cc b/mediapipe/calculators/tensorflow/lapped_tensor_buffer_calculator.cc index 78ee50871..5ad8e853c 100644 --- a/mediapipe/calculators/tensorflow/lapped_tensor_buffer_calculator.cc +++ b/mediapipe/calculators/tensorflow/lapped_tensor_buffer_calculator.cc @@ -29,6 +29,11 @@ namespace mediapipe { +const char kBufferSize[] = "BUFFER_SIZE"; +const char kOverlap[] = "OVERLAP"; +const char kTimestampOffset[] = "TIMESTAMP_OFFSET"; +const char kCalculatorOptions[] = "CALCULATOR_OPTIONS"; + namespace tf = tensorflow; // Given an input stream of tensors, concatenates the tensors over timesteps. @@ -72,6 +77,9 @@ class LappedTensorBufferCalculator : public CalculatorBase { ::mediapipe::Status AddBatchDimension(tf::Tensor* input_tensor); int steps_until_output_; + int buffer_size_; + int overlap_; + int timestamp_offset_; std::unique_ptr> timestamp_buffer_; std::unique_ptr> buffer_; LappedTensorBufferCalculatorOptions options_; @@ -87,6 +95,21 @@ REGISTER_CALCULATOR(LappedTensorBufferCalculator); ); RET_CHECK_EQ(cc->Inputs().NumEntries(), 1) << "Only one output stream is supported."; + + if (cc->InputSidePackets().HasTag(kBufferSize)) { + cc->InputSidePackets().Tag(kBufferSize).Set(); + } + if (cc->InputSidePackets().HasTag(kOverlap)) { + cc->InputSidePackets().Tag(kOverlap).Set(); + } + if (cc->InputSidePackets().HasTag(kTimestampOffset)) { + cc->InputSidePackets().Tag(kTimestampOffset).Set(); + } + if (cc->InputSidePackets().HasTag(kCalculatorOptions)) { + cc->InputSidePackets() + .Tag(kCalculatorOptions) + .Set(); + } cc->Outputs().Index(0).Set( // Output tensorflow::Tensor stream with possibly overlapping steps. ); @@ -95,16 +118,33 @@ REGISTER_CALCULATOR(LappedTensorBufferCalculator); ::mediapipe::Status LappedTensorBufferCalculator::Open(CalculatorContext* cc) { options_ = cc->Options(); - RET_CHECK_LT(options_.overlap(), options_.buffer_size()); - RET_CHECK_GE(options_.timestamp_offset(), 0) + if (cc->InputSidePackets().HasTag(kCalculatorOptions)) { + options_ = cc->InputSidePackets() + .Tag(kCalculatorOptions) + .Get(); + } + buffer_size_ = options_.buffer_size(); + if (cc->InputSidePackets().HasTag(kBufferSize)) { + buffer_size_ = cc->InputSidePackets().Tag(kBufferSize).Get(); + } + overlap_ = options_.overlap(); + if (cc->InputSidePackets().HasTag(kOverlap)) { + overlap_ = cc->InputSidePackets().Tag(kOverlap).Get(); + } + timestamp_offset_ = options_.timestamp_offset(); + if (cc->InputSidePackets().HasTag(kTimestampOffset)) { + timestamp_offset_ = cc->InputSidePackets().Tag(kTimestampOffset).Get(); + } + + RET_CHECK_LT(overlap_, buffer_size_); + RET_CHECK_GE(timestamp_offset_, 0) << "Negative timestamp_offset is not allowed."; - RET_CHECK_LT(options_.timestamp_offset(), options_.buffer_size()) + RET_CHECK_LT(timestamp_offset_, buffer_size_) << "output_frame_num_offset has to be less than buffer_size."; timestamp_buffer_ = - absl::make_unique>(options_.buffer_size()); - buffer_ = - absl::make_unique>(options_.buffer_size()); - steps_until_output_ = options_.buffer_size(); + absl::make_unique>(buffer_size_); + buffer_ = absl::make_unique>(buffer_size_); + steps_until_output_ = buffer_size_; return ::mediapipe::OkStatus(); } @@ -128,11 +168,10 @@ REGISTER_CALCULATOR(LappedTensorBufferCalculator); concatenated.get()); RET_CHECK(concat_status.ok()) << concat_status.ToString(); - cc->Outputs().Index(0).Add( - concatenated.release(), - timestamp_buffer_->Get(options_.timestamp_offset())); + cc->Outputs().Index(0).Add(concatenated.release(), + timestamp_buffer_->Get(timestamp_offset_)); - steps_until_output_ = options_.buffer_size() - options_.overlap(); + steps_until_output_ = buffer_size_ - overlap_; } return ::mediapipe::OkStatus(); } diff --git a/mediapipe/calculators/tensorflow/tfrecord_reader_calculator.cc b/mediapipe/calculators/tensorflow/tfrecord_reader_calculator.cc new file mode 100644 index 000000000..5de7b0c0d --- /dev/null +++ b/mediapipe/calculators/tensorflow/tfrecord_reader_calculator.cc @@ -0,0 +1,126 @@ +// Copyright 2019 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include + +#include "mediapipe/framework/calculator_framework.h" +#include "mediapipe/framework/port/integral_types.h" +#include "mediapipe/framework/port/logging.h" +#include "mediapipe/framework/port/ret_check.h" +#include "mediapipe/framework/port/status.h" +#include "tensorflow/core/example/example.pb.h" +#include "tensorflow/core/lib/core/status.h" +#include "tensorflow/core/lib/io/record_reader.h" +#include "tensorflow/core/platform/env.h" +#include "tensorflow/core/platform/file_system.h" + +namespace mediapipe { + +const char kTFRecordPath[] = "TFRECORD_PATH"; +const char kRecordIndex[] = "RECORD_INDEX"; +const char kExampleTag[] = "EXAMPLE"; +const char kSequenceExampleTag[] = "SEQUENCE_EXAMPLE"; + +// Reads a tensorflow example/sequence example from a tfrecord file. +// If the "RECORD_INDEX" input side packet is provided, the calculator is going +// to fetch the example/sequence example of the tfrecord file at the target +// record index. Otherwise, the reader always reads the first example/sequence +// example of the tfrecord file. +// +// Example config: +// node { +// calculator: "TFRecordReaderCalculator" +// input_side_packet: "TFRECORD_PATH:tfrecord_path" +// input_side_packet: "RECORD_INDEX:record_index" +// output_side_packet: "SEQUENCE_EXAMPLE:sequence_example" +// } +class TFRecordReaderCalculator : public CalculatorBase { + public: + static ::mediapipe::Status GetContract(CalculatorContract* cc); + + ::mediapipe::Status Open(CalculatorContext* cc) override; + ::mediapipe::Status Process(CalculatorContext* cc) override; +}; + +::mediapipe::Status TFRecordReaderCalculator::GetContract( + CalculatorContract* cc) { + cc->InputSidePackets().Tag(kTFRecordPath).Set(); + if (cc->InputSidePackets().HasTag(kRecordIndex)) { + cc->InputSidePackets().Tag(kRecordIndex).Set(); + } + + RET_CHECK(cc->OutputSidePackets().HasTag(kExampleTag) || + cc->OutputSidePackets().HasTag(kSequenceExampleTag)) + << "TFRecordReaderCalculator must output either Tensorflow example or " + "sequence example."; + if (cc->OutputSidePackets().HasTag(kExampleTag)) { + cc->OutputSidePackets().Tag(kExampleTag).Set(); + } else { + cc->OutputSidePackets() + .Tag(kSequenceExampleTag) + .Set(); + } + return ::mediapipe::OkStatus(); +} + +::mediapipe::Status TFRecordReaderCalculator::Open(CalculatorContext* cc) { + std::unique_ptr file; + auto tf_status = tensorflow::Env::Default()->NewRandomAccessFile( + cc->InputSidePackets().Tag(kTFRecordPath).Get(), &file); + RET_CHECK(tf_status.ok()) + << "Failed to open tfrecord file: " << tf_status.error_message(); + tensorflow::io::RecordReader reader(file.get(), + tensorflow::io::RecordReaderOptions()); + tensorflow::uint64 offset = 0; + std::string example_str; + const int target_idx = + cc->InputSidePackets().HasTag(kRecordIndex) + ? cc->InputSidePackets().Tag(kRecordIndex).Get() + : 0; + int current_idx = 0; + while (current_idx <= target_idx) { + tf_status = reader.ReadRecord(&offset, &example_str); + RET_CHECK(tf_status.ok()) + << "Failed to read tfrecord: " << tf_status.error_message(); + if (current_idx == target_idx) { + if (cc->OutputSidePackets().HasTag(kExampleTag)) { + tensorflow::Example tf_example; + tf_example.ParseFromString(example_str); + cc->OutputSidePackets() + .Tag(kExampleTag) + .Set(MakePacket(std::move(tf_example))); + } else { + tensorflow::SequenceExample tf_sequence_example; + tf_sequence_example.ParseFromString(example_str); + cc->OutputSidePackets() + .Tag(kSequenceExampleTag) + .Set(MakePacket( + std::move(tf_sequence_example))); + } + } + ++current_idx; + } + + return ::mediapipe::OkStatus(); +} + +::mediapipe::Status TFRecordReaderCalculator::Process(CalculatorContext* cc) { + return ::mediapipe::OkStatus(); +} + +REGISTER_CALCULATOR(TFRecordReaderCalculator); + +} // namespace mediapipe diff --git a/mediapipe/calculators/tensorflow/unpack_yt8m_sequence_example_calculator.cc b/mediapipe/calculators/tensorflow/unpack_yt8m_sequence_example_calculator.cc new file mode 100644 index 000000000..daf7f1117 --- /dev/null +++ b/mediapipe/calculators/tensorflow/unpack_yt8m_sequence_example_calculator.cc @@ -0,0 +1,192 @@ +// Copyright 2019 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include + +#include "mediapipe/calculators/tensorflow/lapped_tensor_buffer_calculator.pb.h" +#include "mediapipe/framework/calculator_framework.h" +#include "mediapipe/framework/packet.h" +#include "mediapipe/framework/port/canonical_errors.h" +#include "tensorflow/core/example/example.pb.h" +#include "tensorflow/core/example/feature.pb.h" + +namespace mediapipe { +namespace { + +const char kId[] = "id"; +const char kRgb[] = "rgb"; +const char kAudio[] = "audio"; +const char kDesiredSegmentSize[] = "DESIRED_SEGMENT_SIZE"; +const char kYt8mId[] = "YT8M_ID"; +const char kYt8mSequenceExample[] = "YT8M_SEQUENCE_EXAMPLE"; +const char kQuantizedRgbFeature[] = "QUANTIZED_RGB_FEATURE"; +const char kQuantizedAudioFeature[] = "QUANTIZED_AUDIO_FEATURE"; +const char kSegmentSize[] = "SEGMENT_SIZE"; +const char kLappedTensorBufferCalculatorOptions[] = + "LAPPED_TENSOR_BUFFER_CALCULATOR_OPTIONS"; + +std::string GetQuantizedFeature( + const tensorflow::SequenceExample& sequence_example, const std::string& key, + int index) { + const auto& bytes_list = sequence_example.feature_lists() + .feature_list() + .at(key) + .feature() + .Get(index) + .bytes_list() + .value(); + CHECK_EQ(1, bytes_list.size()); + return bytes_list.Get(0); +} +} // namespace + +// Unpacks YT8M Sequence Example. Note that the audio feature and rgb feature +// output are quantized. DequantizeByteArrayCalculator can do the dequantization +// for you. +// +// Example config: +// node { +// calculator: "UnpackYt8mSequenceExampleCalculator" +// input_side_packet: "YT8M_SEQUENCE_EXAMPLE:yt8m_sequence_example" +// output_stream: "QUANTIZED_RGB_FEATURE:quantized_rgb_feature" +// output_stream: "QUANTIZED_AUDIO_FEATURE:quantized_audio_feature" +// } +class UnpackYt8mSequenceExampleCalculator : public CalculatorBase { + public: + static ::mediapipe::Status GetContract(CalculatorContract* cc) { + cc->InputSidePackets() + .Tag(kYt8mSequenceExample) + .Set(); + if (cc->InputSidePackets().HasTag(kDesiredSegmentSize)) { + cc->InputSidePackets().Tag(kDesiredSegmentSize).Set(); + } + cc->Outputs().Tag(kQuantizedRgbFeature).Set(); + cc->Outputs().Tag(kQuantizedAudioFeature).Set(); + if (cc->OutputSidePackets().HasTag(kYt8mId)) { + cc->OutputSidePackets().Tag(kYt8mId).Set(); + } + if (cc->OutputSidePackets().HasTag(kLappedTensorBufferCalculatorOptions)) { + cc->OutputSidePackets() + .Tag(kLappedTensorBufferCalculatorOptions) + .Set<::mediapipe::LappedTensorBufferCalculatorOptions>(); + } + if (cc->OutputSidePackets().HasTag(kSegmentSize)) { + cc->OutputSidePackets().Tag(kSegmentSize).Set(); + } + return ::mediapipe::OkStatus(); + } + + ::mediapipe::Status Open(CalculatorContext* cc) override { + const tensorflow::SequenceExample& sequence_example = + cc->InputSidePackets() + .Tag(kYt8mSequenceExample) + .Get(); + const std::string& yt8m_id = + sequence_example.context().feature().at(kId).bytes_list().value().Get( + 0); + if (cc->OutputSidePackets().HasTag(kYt8mId)) { + cc->OutputSidePackets().Tag(kYt8mId).Set( + MakePacket(yt8m_id)); + } + + int rgb_feature_list_length = + sequence_example.feature_lists().feature_list().at(kRgb).feature_size(); + int audio_feature_list_length = sequence_example.feature_lists() + .feature_list() + .at(kAudio) + .feature_size(); + + if (rgb_feature_list_length != audio_feature_list_length) { + return ::mediapipe::FailedPreconditionError(absl::StrCat( + "Data corruption: the length of audio features and rgb features are " + "not equal. Please check the sequence example that contains yt8m " + "id: ", + yt8m_id)); + } + feature_list_length_ = rgb_feature_list_length; + if (cc->OutputSidePackets().HasTag(kLappedTensorBufferCalculatorOptions) || + cc->OutputSidePackets().HasTag(kSegmentSize)) { + // If the desired segment size is specified, take the min of the length of + // the feature list and the desired size to be the output segment size. + int segment_size = feature_list_length_; + if (cc->InputSidePackets().HasTag(kDesiredSegmentSize)) { + int desired_segment_size = + cc->InputSidePackets().Tag(kDesiredSegmentSize).Get(); + RET_CHECK(desired_segment_size > 0) + << "The desired segment size must be greater than zero."; + segment_size = std::min( + feature_list_length_, + cc->InputSidePackets().Tag(kDesiredSegmentSize).Get()); + } + if (cc->OutputSidePackets().HasTag( + kLappedTensorBufferCalculatorOptions)) { + auto lapped_tensor_buffer_calculator_options = absl::make_unique< + ::mediapipe::LappedTensorBufferCalculatorOptions>(); + lapped_tensor_buffer_calculator_options->set_add_batch_dim_to_tensors( + true); + lapped_tensor_buffer_calculator_options->set_buffer_size(segment_size); + lapped_tensor_buffer_calculator_options->set_overlap(segment_size - 1); + lapped_tensor_buffer_calculator_options->set_timestamp_offset( + segment_size - 1); + cc->OutputSidePackets() + .Tag(kLappedTensorBufferCalculatorOptions) + .Set(Adopt(lapped_tensor_buffer_calculator_options.release())); + } + if (cc->OutputSidePackets().HasTag(kSegmentSize)) { + cc->OutputSidePackets() + .Tag(kSegmentSize) + .Set(MakePacket(segment_size)); + } + } + LOG(INFO) << "Reading the sequence example that contains yt8m id: " + << yt8m_id << ". Feature list length: " << feature_list_length_; + return ::mediapipe::OkStatus(); + } + + ::mediapipe::Status Process(CalculatorContext* cc) override { + if (current_index_ >= feature_list_length_) { + return ::mediapipe::tool::StatusStop(); + } + const tensorflow::SequenceExample& sequence_example = + cc->InputSidePackets() + .Tag(kYt8mSequenceExample) + .Get(); + + // Uses microsecond as the unit of time. In the YT8M dataset, each feature + // represents a second. + const Timestamp timestamp = Timestamp(current_index_ * 1000000); + cc->Outputs() + .Tag(kQuantizedRgbFeature) + .AddPacket( + MakePacket( + GetQuantizedFeature(sequence_example, kRgb, current_index_)) + .At(timestamp)); + cc->Outputs() + .Tag(kQuantizedAudioFeature) + .AddPacket( + MakePacket( + GetQuantizedFeature(sequence_example, kAudio, current_index_)) + .At(timestamp)); + ++current_index_; + return ::mediapipe::OkStatus(); + } + + private: + int current_index_ = 0; + int feature_list_length_ = 0; +}; + +REGISTER_CALCULATOR(UnpackYt8mSequenceExampleCalculator); + +} // namespace mediapipe diff --git a/mediapipe/calculators/tensorflow/vector_float_to_tensor_calculator.cc b/mediapipe/calculators/tensorflow/vector_float_to_tensor_calculator.cc index a96e39918..068be5714 100644 --- a/mediapipe/calculators/tensorflow/vector_float_to_tensor_calculator.cc +++ b/mediapipe/calculators/tensorflow/vector_float_to_tensor_calculator.cc @@ -23,10 +23,12 @@ namespace mediapipe { -namespace tf = ::tensorflow; - +namespace { auto& INPUT_1D = VectorFloatToTensorCalculatorOptions::INPUT_1D; auto& INPUT_2D = VectorFloatToTensorCalculatorOptions::INPUT_2D; +} // namespace + +namespace tf = ::tensorflow; // The calculator expects one input (a packet containing a vector or // vector>) and generates one output (a packet containing a diff --git a/mediapipe/calculators/tensorflow/vector_int_to_tensor_calculator.cc b/mediapipe/calculators/tensorflow/vector_int_to_tensor_calculator.cc new file mode 100644 index 000000000..1269e2761 --- /dev/null +++ b/mediapipe/calculators/tensorflow/vector_int_to_tensor_calculator.cc @@ -0,0 +1,203 @@ +// Copyright 2019 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Converts a single int or vector or vector> to 1D (or 2D) +// tf::Tensor. + +#include "mediapipe/calculators/tensorflow/vector_int_to_tensor_calculator_options.pb.h" +#include "mediapipe/framework/calculator_framework.h" +#include "mediapipe/framework/port/ret_check.h" +#include "mediapipe/framework/port/status.h" +#include "tensorflow/core/framework/tensor.h" +#include "tensorflow/core/framework/types.h" + +namespace mediapipe { + +const char kVectorInt[] = "VECTOR_INT"; +const char kSingleInt[] = "SINGLE_INT"; +const char kTensorOut[] = "TENSOR_OUT"; + +namespace { +auto& INPUT_1D = VectorIntToTensorCalculatorOptions::INPUT_1D; +auto& INPUT_2D = VectorIntToTensorCalculatorOptions::INPUT_2D; +} // namespace + +namespace tf = ::tensorflow; + +template +void AssignMatrixValue(int r, int c, int value, tf::Tensor* output_tensor) { + output_tensor->tensor()(r, c) = value; +} + +// The calculator expects one input (a packet containing a single int or +// vector or vector>) and generates one output (a packet +// containing a tf::Tensor containing the same data). The output tensor will be +// either 1D or 2D with dimensions corresponding to the input vector int. It +// will hold DT_INT32 or DT_UINT8 or DT_INT64 values. +// +// Example config: +// node { +// calculator: "VectorIntToTensorCalculator" +// input_stream: "SINGLE_INT:segment_size_int_stream" +// output_stream: "TENSOR_OUT:segment_size_tensor" +// } +// +// or +// +// node { +// calculator: "VectorIntToTensorCalculator" +// input_stream: "VECTOR_INT:vector_int_features" +// output_stream: "TENSOR_OUT:tensor_features" +// } +class VectorIntToTensorCalculator : public CalculatorBase { + public: + static ::mediapipe::Status GetContract(CalculatorContract* cc); + + ::mediapipe::Status Open(CalculatorContext* cc) override; + ::mediapipe::Status Process(CalculatorContext* cc) override; + + private: + VectorIntToTensorCalculatorOptions options_; +}; +REGISTER_CALCULATOR(VectorIntToTensorCalculator); + +::mediapipe::Status VectorIntToTensorCalculator::GetContract( + CalculatorContract* cc) { + const auto& options = cc->Options(); + // Start with only one input packet. + RET_CHECK_EQ(cc->Inputs().NumEntries(), 1) + << "Only one input stream is supported."; + if (options.input_size() == INPUT_2D) { + cc->Inputs().Tag(kVectorInt).Set>>(); + } else if (options.input_size() == INPUT_1D) { + if (cc->Inputs().HasTag(kSingleInt)) { + cc->Inputs().Tag(kSingleInt).Set(); + } else { + cc->Inputs().Tag(kVectorInt).Set>(); + } + } else { + LOG(FATAL) << "input size not supported"; + } + RET_CHECK_EQ(cc->Outputs().NumEntries(), 1) + << "Only one output stream is supported."; + cc->Outputs().Tag(kTensorOut).Set(); + return ::mediapipe::OkStatus(); +} + +::mediapipe::Status VectorIntToTensorCalculator::Open(CalculatorContext* cc) { + options_ = cc->Options(); + RET_CHECK(options_.tensor_data_type() == tf::DT_UINT8 || + options_.tensor_data_type() == tf::DT_INT32 || + options_.tensor_data_type() == tf::DT_INT64) + << "Output tensor data type is not supported."; + return ::mediapipe::OkStatus(); +} + +::mediapipe::Status VectorIntToTensorCalculator::Process( + CalculatorContext* cc) { + tf::TensorShape tensor_shape; + if (options_.input_size() == INPUT_2D) { + const std::vector>& input = + cc->Inputs() + .Tag(kVectorInt) + .Value() + .Get>>(); + + const int32 rows = input.size(); + CHECK_GE(rows, 1); + const int32 cols = input[0].size(); + CHECK_GE(cols, 1); + for (int i = 1; i < rows; ++i) { + CHECK_EQ(input[i].size(), cols); + } + if (options_.transpose()) { + tensor_shape = tf::TensorShape({cols, rows}); + } else { + tensor_shape = tf::TensorShape({rows, cols}); + } + auto output = ::absl::make_unique(options_.tensor_data_type(), + tensor_shape); + if (options_.transpose()) { + for (int r = 0; r < rows; ++r) { + for (int c = 0; c < cols; ++c) { + switch (options_.tensor_data_type()) { + case tf::DT_INT64: + AssignMatrixValue(c, r, input[r][c], output.get()); + break; + case tf::DT_UINT8: + AssignMatrixValue(c, r, input[r][c], output.get()); + break; + case tf::DT_INT32: + AssignMatrixValue(c, r, input[r][c], output.get()); + break; + default: + LOG(FATAL) << "tensor data type is not supported."; + } + } + } + } else { + for (int r = 0; r < rows; ++r) { + for (int c = 0; c < cols; ++c) { + switch (options_.tensor_data_type()) { + case tf::DT_INT64: + AssignMatrixValue(r, c, input[r][c], output.get()); + break; + case tf::DT_UINT8: + AssignMatrixValue(r, c, input[r][c], output.get()); + break; + case tf::DT_INT32: + AssignMatrixValue(r, c, input[r][c], output.get()); + break; + default: + LOG(FATAL) << "tensor data type is not supported."; + } + } + } + } + cc->Outputs().Tag(kTensorOut).Add(output.release(), cc->InputTimestamp()); + } else if (options_.input_size() == INPUT_1D) { + std::vector input; + if (cc->Inputs().HasTag(kSingleInt)) { + input.push_back(cc->Inputs().Tag(kSingleInt).Get()); + } else { + input = cc->Inputs().Tag(kVectorInt).Value().Get>(); + } + CHECK_GE(input.size(), 1); + const int32 length = input.size(); + tensor_shape = tf::TensorShape({length}); + auto output = ::absl::make_unique(options_.tensor_data_type(), + tensor_shape); + for (int i = 0; i < length; ++i) { + switch (options_.tensor_data_type()) { + case tf::DT_INT64: + output->tensor()(i) = input.at(i); + break; + case tf::DT_UINT8: + output->tensor()(i) = input.at(i); + break; + case tf::DT_INT32: + output->tensor()(i) = input.at(i); + break; + default: + LOG(FATAL) << "tensor data type is not supported."; + } + } + cc->Outputs().Tag(kTensorOut).Add(output.release(), cc->InputTimestamp()); + } else { + LOG(FATAL) << "input size not supported"; + } + return ::mediapipe::OkStatus(); +} + +} // namespace mediapipe diff --git a/mediapipe/calculators/tensorflow/vector_int_to_tensor_calculator_options.proto b/mediapipe/calculators/tensorflow/vector_int_to_tensor_calculator_options.proto new file mode 100644 index 000000000..65554bb14 --- /dev/null +++ b/mediapipe/calculators/tensorflow/vector_int_to_tensor_calculator_options.proto @@ -0,0 +1,43 @@ +// Copyright 2019 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +syntax = "proto2"; + +package mediapipe; + +import "mediapipe/framework/calculator.proto"; +import "tensorflow/core/framework/types.proto"; + +message VectorIntToTensorCalculatorOptions { + extend mediapipe.CalculatorOptions { + optional VectorIntToTensorCalculatorOptions ext = 275364184; + } + enum InputSize { + UNKNOWN = 0; + INPUT_1D = 1; + INPUT_2D = 2; + } + + // If input_size is INPUT_2D, unpack a vector> to a + // 2d tensor (matrix). If INPUT_1D, convert a single int or vector + // into a 1d tensor (vector). + optional InputSize input_size = 1 [default = INPUT_1D]; + + // If true, the output tensor is transposed. + // Otherwise, the output tensor is not transposed. + // It will be ignored if tensor_is_2d is INPUT_1D. + optional bool transpose = 2 [default = false]; + + optional tensorflow.DataType tensor_data_type = 3 [default = DT_INT32]; +} diff --git a/mediapipe/calculators/tensorflow/vector_int_to_tensor_calculator_test.cc b/mediapipe/calculators/tensorflow/vector_int_to_tensor_calculator_test.cc new file mode 100644 index 000000000..052a78516 --- /dev/null +++ b/mediapipe/calculators/tensorflow/vector_int_to_tensor_calculator_test.cc @@ -0,0 +1,202 @@ +// Copyright 2018 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "mediapipe/calculators/tensorflow/vector_int_to_tensor_calculator_options.pb.h" +#include "mediapipe/framework/calculator_framework.h" +#include "mediapipe/framework/calculator_runner.h" +#include "mediapipe/framework/port/gmock.h" +#include "mediapipe/framework/port/gtest.h" +#include "tensorflow/core/framework/tensor.h" +#include "tensorflow/core/framework/types.pb.h" + +namespace mediapipe { + +namespace { + +namespace tf = ::tensorflow; + +class VectorIntToTensorCalculatorTest : public ::testing::Test { + protected: + void SetUpRunner( + const VectorIntToTensorCalculatorOptions::InputSize input_size, + const tensorflow::DataType tensor_data_type, const bool transpose, + const bool single_value) { + CalculatorGraphConfig::Node config; + config.set_calculator("VectorIntToTensorCalculator"); + if (single_value) { + config.add_input_stream("SINGLE_INT:input_int"); + } else { + config.add_input_stream("VECTOR_INT:input_int"); + } + config.add_output_stream("TENSOR_OUT:output_tensor"); + auto options = config.mutable_options()->MutableExtension( + VectorIntToTensorCalculatorOptions::ext); + options->set_input_size(input_size); + options->set_transpose(transpose); + options->set_tensor_data_type(tensor_data_type); + runner_ = ::absl::make_unique(config); + } + + void TestConvertFromVectoVectorInt(const bool transpose) { + SetUpRunner(VectorIntToTensorCalculatorOptions::INPUT_2D, + tensorflow::DT_INT32, transpose, false); + auto input = ::absl::make_unique>>( + 2, std::vector(2)); + for (int i = 0; i < 2; ++i) { + for (int j = 0; j < 2; ++j) { + input->at(i).at(j) = i * 2 + j; + } + } + + const int64 time = 1234; + runner_->MutableInputs() + ->Tag("VECTOR_INT") + .packets.push_back(Adopt(input.release()).At(Timestamp(time))); + + EXPECT_TRUE(runner_->Run().ok()); + + const std::vector& output_packets = + runner_->Outputs().Tag("TENSOR_OUT").packets; + EXPECT_EQ(1, output_packets.size()); + EXPECT_EQ(time, output_packets[0].Timestamp().Value()); + const tf::Tensor& output_tensor = output_packets[0].Get(); + + EXPECT_EQ(2, output_tensor.dims()); + EXPECT_EQ(tf::DT_INT32, output_tensor.dtype()); + const auto matrix = output_tensor.matrix(); + + for (int i = 0; i < 2; ++i) { + for (int j = 0; j < 2; ++j) { + if (!transpose) { + EXPECT_EQ(i * 2 + j, matrix(i, j)); + } else { + EXPECT_EQ(j * 2 + i, matrix(i, j)); + } + } + } + } + + std::unique_ptr runner_; +}; + +TEST_F(VectorIntToTensorCalculatorTest, TestSingleValue) { + SetUpRunner(VectorIntToTensorCalculatorOptions::INPUT_1D, + tensorflow::DT_INT32, false, true); + const int64 time = 1234; + runner_->MutableInputs() + ->Tag("SINGLE_INT") + .packets.push_back(MakePacket(1).At(Timestamp(time))); + + EXPECT_TRUE(runner_->Run().ok()); + + const std::vector& output_packets = + runner_->Outputs().Tag("TENSOR_OUT").packets; + EXPECT_EQ(1, output_packets.size()); + EXPECT_EQ(time, output_packets[0].Timestamp().Value()); + const tf::Tensor& output_tensor = output_packets[0].Get(); + + EXPECT_EQ(1, output_tensor.dims()); + EXPECT_EQ(tf::DT_INT32, output_tensor.dtype()); + const auto vec = output_tensor.vec(); + EXPECT_EQ(1, vec(0)); +} + +TEST_F(VectorIntToTensorCalculatorTest, TesOneDim) { + SetUpRunner(VectorIntToTensorCalculatorOptions::INPUT_1D, + tensorflow::DT_INT32, false, false); + auto input = ::absl::make_unique>(5); + for (int i = 0; i < 5; ++i) { + input->at(i) = i; + } + const int64 time = 1234; + runner_->MutableInputs() + ->Tag("VECTOR_INT") + .packets.push_back(Adopt(input.release()).At(Timestamp(time))); + + EXPECT_TRUE(runner_->Run().ok()); + + const std::vector& output_packets = + runner_->Outputs().Tag("TENSOR_OUT").packets; + EXPECT_EQ(1, output_packets.size()); + EXPECT_EQ(time, output_packets[0].Timestamp().Value()); + const tf::Tensor& output_tensor = output_packets[0].Get(); + + EXPECT_EQ(1, output_tensor.dims()); + EXPECT_EQ(tf::DT_INT32, output_tensor.dtype()); + const auto vec = output_tensor.vec(); + + for (int i = 0; i < 5; ++i) { + EXPECT_EQ(i, vec(i)); + } +} + +TEST_F(VectorIntToTensorCalculatorTest, TestTwoDims) { + for (bool transpose : {false, true}) { + TestConvertFromVectoVectorInt(transpose); + } +} + +TEST_F(VectorIntToTensorCalculatorTest, TestInt64) { + SetUpRunner(VectorIntToTensorCalculatorOptions::INPUT_1D, + tensorflow::DT_INT64, false, true); + const int64 time = 1234; + runner_->MutableInputs() + ->Tag("SINGLE_INT") + .packets.push_back(MakePacket(2 ^ 31).At(Timestamp(time))); + + EXPECT_TRUE(runner_->Run().ok()); + + const std::vector& output_packets = + runner_->Outputs().Tag("TENSOR_OUT").packets; + EXPECT_EQ(1, output_packets.size()); + EXPECT_EQ(time, output_packets[0].Timestamp().Value()); + const tf::Tensor& output_tensor = output_packets[0].Get(); + + EXPECT_EQ(1, output_tensor.dims()); + EXPECT_EQ(tf::DT_INT64, output_tensor.dtype()); + const auto vec = output_tensor.vec(); + EXPECT_EQ(2 ^ 31, vec(0)); +} + +TEST_F(VectorIntToTensorCalculatorTest, TestUint8) { + SetUpRunner(VectorIntToTensorCalculatorOptions::INPUT_1D, + tensorflow::DT_UINT8, false, false); + auto input = ::absl::make_unique>(5); + for (int i = 0; i < 5; ++i) { + input->at(i) = i; + } + const int64 time = 1234; + runner_->MutableInputs() + ->Tag("VECTOR_INT") + .packets.push_back(Adopt(input.release()).At(Timestamp(time))); + + EXPECT_TRUE(runner_->Run().ok()); + + const std::vector& output_packets = + runner_->Outputs().Tag("TENSOR_OUT").packets; + EXPECT_EQ(1, output_packets.size()); + EXPECT_EQ(time, output_packets[0].Timestamp().Value()); + const tf::Tensor& output_tensor = output_packets[0].Get(); + + EXPECT_EQ(1, output_tensor.dims()); + EXPECT_EQ(tf::DT_UINT8, output_tensor.dtype()); + const auto vec = output_tensor.vec(); + + for (int i = 0; i < 5; ++i) { + EXPECT_EQ(i, vec(i)); + } +} + +} // namespace +} // namespace mediapipe diff --git a/mediapipe/calculators/tflite/BUILD b/mediapipe/calculators/tflite/BUILD index a0b1fc0b6..89b4d980b 100644 --- a/mediapipe/calculators/tflite/BUILD +++ b/mediapipe/calculators/tflite/BUILD @@ -238,6 +238,7 @@ cc_library( "@org_tensorflow//tensorflow/lite/delegates/gpu/common:shape", "@org_tensorflow//tensorflow/lite/delegates/gpu/metal:buffer_convert", "@org_tensorflow//tensorflow/lite/delegates/gpu:metal_delegate", + "@org_tensorflow//tensorflow/lite/delegates/gpu:metal_delegate_internal", ], "//conditions:default": [ "//mediapipe/gpu:gl_calculator_helper", diff --git a/mediapipe/calculators/tflite/tflite_converter_calculator.cc b/mediapipe/calculators/tflite/tflite_converter_calculator.cc index 598ae4965..a9dccaed8 100644 --- a/mediapipe/calculators/tflite/tflite_converter_calculator.cc +++ b/mediapipe/calculators/tflite/tflite_converter_calculator.cc @@ -25,7 +25,8 @@ #include "tensorflow/lite/error_reporter.h" #include "tensorflow/lite/interpreter.h" -#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__APPLE__) +#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__EMSCRIPTEN__) && \ + !defined(__APPLE__) #include "mediapipe/gpu/gl_calculator_helper.h" #include "mediapipe/gpu/gpu_buffer.h" #include "tensorflow/lite/delegates/gpu/gl/gl_buffer.h" @@ -45,7 +46,8 @@ #include "tensorflow/lite/delegates/gpu/metal_delegate.h" #endif // iOS -#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__APPLE__) +#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__EMSCRIPTEN__) && \ + !defined(__APPLE__) typedef ::tflite::gpu::gl::GlBuffer GpuTensor; #elif defined(__APPLE__) && !TARGET_OS_OSX // iOS typedef id GpuTensor; @@ -67,7 +69,8 @@ typedef Eigen::Matrix namespace mediapipe { -#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__APPLE__) +#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__EMSCRIPTEN__) && \ + !defined(__APPLE__) using ::tflite::gpu::gl::CreateReadWriteShaderStorageBuffer; using ::tflite::gpu::gl::GlProgram; using ::tflite::gpu::gl::GlShader; @@ -146,7 +149,8 @@ class TfLiteConverterCalculator : public CalculatorBase { std::unique_ptr interpreter_ = nullptr; -#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__APPLE__) +#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__EMSCRIPTEN__) && \ + !defined(__APPLE__) mediapipe::GlCalculatorHelper gpu_helper_; std::unique_ptr gpu_data_out_; #elif defined(__APPLE__) && !TARGET_OS_OSX // iOS @@ -181,7 +185,7 @@ REGISTER_CALCULATOR(TfLiteConverterCalculator); if (cc->Inputs().HasTag("IMAGE")) cc->Inputs().Tag("IMAGE").Set(); if (cc->Inputs().HasTag("MATRIX")) cc->Inputs().Tag("MATRIX").Set(); -#if !defined(MEDIAPIPE_DISABLE_GPU) +#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__EMSCRIPTEN__) if (cc->Inputs().HasTag("IMAGE_GPU")) { cc->Inputs().Tag("IMAGE_GPU").Set(); use_gpu |= true; @@ -190,7 +194,7 @@ REGISTER_CALCULATOR(TfLiteConverterCalculator); if (cc->Outputs().HasTag("TENSORS")) cc->Outputs().Tag("TENSORS").Set>(); -#if !defined(MEDIAPIPE_DISABLE_GPU) +#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__EMSCRIPTEN__) if (cc->Outputs().HasTag("TENSORS_GPU")) { cc->Outputs().Tag("TENSORS_GPU").Set>(); use_gpu |= true; @@ -198,7 +202,8 @@ REGISTER_CALCULATOR(TfLiteConverterCalculator); #endif // !MEDIAPIPE_DISABLE_GPU if (use_gpu) { -#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__APPLE__) +#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__EMSCRIPTEN__) && \ + !defined(__APPLE__) MP_RETURN_IF_ERROR(mediapipe::GlCalculatorHelper::UpdateContract(cc)); #elif defined(__APPLE__) && !TARGET_OS_OSX // iOS MP_RETURN_IF_ERROR([MPPMetalHelper updateContract:cc]); @@ -218,7 +223,7 @@ REGISTER_CALCULATOR(TfLiteConverterCalculator); if (cc->Inputs().HasTag("IMAGE_GPU") || cc->Outputs().HasTag("IMAGE_OUT_GPU")) { -#if !defined(MEDIAPIPE_DISABLE_GPU) +#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__EMSCRIPTEN__) use_gpu_ = true; #else RET_CHECK_FAIL() << "GPU processing not enabled."; @@ -231,7 +236,8 @@ REGISTER_CALCULATOR(TfLiteConverterCalculator); cc->Outputs().HasTag("TENSORS_GPU")); // Cannot use quantization. use_quantized_tensors_ = false; -#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__APPLE__) +#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__EMSCRIPTEN__) && \ + !defined(__APPLE__) MP_RETURN_IF_ERROR(gpu_helper_.Open(cc)); #elif defined(__APPLE__) && !TARGET_OS_OSX // iOS gpu_helper_ = [[MPPMetalHelper alloc] initWithCalculatorContext:cc]; @@ -264,7 +270,8 @@ REGISTER_CALCULATOR(TfLiteConverterCalculator); } ::mediapipe::Status TfLiteConverterCalculator::Close(CalculatorContext* cc) { -#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__APPLE__) +#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__EMSCRIPTEN__) && \ + !defined(__APPLE__) gpu_helper_.RunInGlContext([this] { gpu_data_out_.reset(); }); #endif #if defined(__APPLE__) && !TARGET_OS_OSX // iOS @@ -383,7 +390,8 @@ REGISTER_CALCULATOR(TfLiteConverterCalculator); ::mediapipe::Status TfLiteConverterCalculator::ProcessGPU( CalculatorContext* cc) { -#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__APPLE__) +#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__EMSCRIPTEN__) && \ + !defined(__APPLE__) // GpuBuffer to tflite::gpu::GlBuffer conversion. const auto& input = cc->Inputs().Tag("IMAGE_GPU").Get(); MP_RETURN_IF_ERROR( @@ -468,7 +476,7 @@ REGISTER_CALCULATOR(TfLiteConverterCalculator); } ::mediapipe::Status TfLiteConverterCalculator::InitGpu(CalculatorContext* cc) { -#if !defined(MEDIAPIPE_DISABLE_GPU) +#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__EMSCRIPTEN__) // Get input image sizes. const auto& input = cc->Inputs().Tag("IMAGE_GPU").Get(); mediapipe::ImageFormat::Format format = @@ -485,7 +493,8 @@ REGISTER_CALCULATOR(TfLiteConverterCalculator); RET_CHECK_FAIL() << "Num input channels is less than desired output."; #endif // !MEDIAPIPE_DISABLE_GPU -#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__APPLE__) +#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__EMSCRIPTEN__) && \ + !defined(__APPLE__) MP_RETURN_IF_ERROR(gpu_helper_.RunInGlContext( [this, &include_alpha, &input, &single_channel]() -> ::mediapipe::Status { // Device memory. diff --git a/mediapipe/calculators/tflite/tflite_inference_calculator.cc b/mediapipe/calculators/tflite/tflite_inference_calculator.cc index 9bc02b48c..0abb48329 100644 --- a/mediapipe/calculators/tflite/tflite_inference_calculator.cc +++ b/mediapipe/calculators/tflite/tflite_inference_calculator.cc @@ -27,7 +27,7 @@ #include "tensorflow/lite/kernels/register.h" #include "tensorflow/lite/model.h" -#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__APPLE__) +#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(MEDIAPIPE_DISABLE_GL_COMPUTE) #include "mediapipe/gpu/gl_calculator_helper.h" #include "mediapipe/gpu/gpu_buffer.h" #include "tensorflow/lite/delegates/gpu/common/shape.h" @@ -48,11 +48,13 @@ #include "tensorflow/lite/delegates/gpu/common/shape.h" #include "tensorflow/lite/delegates/gpu/metal/buffer_convert.h" #include "tensorflow/lite/delegates/gpu/metal_delegate.h" +#include "tensorflow/lite/delegates/gpu/metal_delegate_internal.h" #endif // iOS namespace { -#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__APPLE__) +#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__EMSCRIPTEN__) && \ + !defined(__APPLE__) typedef ::tflite::gpu::gl::GlBuffer GpuTensor; #elif defined(__APPLE__) && !TARGET_OS_OSX // iOS typedef id GpuTensor; @@ -68,13 +70,14 @@ size_t RoundUp(size_t n, size_t m) { return ((n + m - 1) / m) * m; } // NOLINT // * Aux namespace mediapipe { -#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__APPLE__) +#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__EMSCRIPTEN__) && \ + !defined(__APPLE__) using ::tflite::gpu::gl::CopyBuffer; using ::tflite::gpu::gl::CreateReadWriteShaderStorageBuffer; using ::tflite::gpu::gl::GlBuffer; #endif -#if !defined(MEDIAPIPE_DISABLE_GPU) +#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__EMSCRIPTEN__) struct GPUData { int elements = 1; GpuTensor buffer; @@ -147,7 +150,8 @@ class TfLiteInferenceCalculator : public CalculatorBase { std::unique_ptr model_; TfLiteDelegate* delegate_ = nullptr; -#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__APPLE__) +#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__EMSCRIPTEN__) && \ + !defined(__APPLE__) mediapipe::GlCalculatorHelper gpu_helper_; std::unique_ptr gpu_data_in_; std::vector> gpu_data_out_; @@ -179,7 +183,7 @@ REGISTER_CALCULATOR(TfLiteInferenceCalculator); if (cc->Inputs().HasTag("TENSORS")) cc->Inputs().Tag("TENSORS").Set>(); -#if !defined(MEDIAPIPE_DISABLE_GPU) +#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__EMSCRIPTEN__) if (cc->Inputs().HasTag("TENSORS_GPU")) { cc->Inputs().Tag("TENSORS_GPU").Set>(); use_gpu |= true; @@ -188,7 +192,7 @@ REGISTER_CALCULATOR(TfLiteInferenceCalculator); if (cc->Outputs().HasTag("TENSORS")) cc->Outputs().Tag("TENSORS").Set>(); -#if !defined(MEDIAPIPE_DISABLE_GPU) +#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__EMSCRIPTEN__) if (cc->Outputs().HasTag("TENSORS_GPU")) { cc->Outputs().Tag("TENSORS_GPU").Set>(); use_gpu |= true; @@ -206,7 +210,8 @@ REGISTER_CALCULATOR(TfLiteInferenceCalculator); use_gpu |= options.use_gpu(); if (use_gpu) { -#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__APPLE__) +#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__EMSCRIPTEN__) && \ + !defined(__APPLE__) MP_RETURN_IF_ERROR(mediapipe::GlCalculatorHelper::UpdateContract(cc)); #elif defined(__APPLE__) && !TARGET_OS_OSX // iOS MP_RETURN_IF_ERROR([MPPMetalHelper updateContract:cc]); @@ -225,7 +230,7 @@ REGISTER_CALCULATOR(TfLiteInferenceCalculator); MP_RETURN_IF_ERROR(LoadOptions(cc)); if (cc->Inputs().HasTag("TENSORS_GPU")) { -#if !defined(MEDIAPIPE_DISABLE_GPU) +#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__EMSCRIPTEN__) gpu_input_ = true; gpu_inference_ = true; // Inference must be on GPU also. #else @@ -235,7 +240,7 @@ REGISTER_CALCULATOR(TfLiteInferenceCalculator); } if (cc->Outputs().HasTag("TENSORS_GPU")) { -#if !defined(MEDIAPIPE_DISABLE_GPU) +#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__EMSCRIPTEN__) gpu_output_ = true; RET_CHECK(cc->Inputs().HasTag("TENSORS_GPU")) << "GPU output must also have GPU Input."; @@ -248,13 +253,15 @@ REGISTER_CALCULATOR(TfLiteInferenceCalculator); MP_RETURN_IF_ERROR(LoadModel(cc)); if (gpu_inference_) { -#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__APPLE__) +#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__EMSCRIPTEN__) && \ + !defined(__APPLE__) MP_RETURN_IF_ERROR(gpu_helper_.Open(cc)); #elif defined(__APPLE__) && !TARGET_OS_OSX // iOS gpu_helper_ = [[MPPMetalHelper alloc] initWithCalculatorContext:cc]; RET_CHECK(gpu_helper_); #endif -#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__APPLE__) +#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__EMSCRIPTEN__) && \ + !defined(__APPLE__) MP_RETURN_IF_ERROR(gpu_helper_.RunInGlContext( [this, &cc]() -> ::mediapipe::Status { return LoadDelegate(cc); })); #else @@ -262,6 +269,10 @@ REGISTER_CALCULATOR(TfLiteInferenceCalculator); #endif } +#if defined(__EMSCRIPTEN__) + MP_RETURN_IF_ERROR(LoadDelegate(cc)); +#endif // __EMSCRIPTEN__ + return ::mediapipe::OkStatus(); } @@ -269,7 +280,8 @@ REGISTER_CALCULATOR(TfLiteInferenceCalculator); // 1. Receive pre-processed tensor inputs. if (gpu_input_) { // Read GPU input into SSBO. -#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__APPLE__) +#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__EMSCRIPTEN__) && \ + !defined(__APPLE__) const auto& input_tensors = cc->Inputs().Tag("TENSORS_GPU").Get>(); RET_CHECK_EQ(input_tensors.size(), 1); @@ -315,7 +327,8 @@ REGISTER_CALCULATOR(TfLiteInferenceCalculator); // 2. Run inference. if (gpu_inference_) { -#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__APPLE__) +#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__EMSCRIPTEN__) && \ + !defined(__APPLE__) MP_RETURN_IF_ERROR( gpu_helper_.RunInGlContext([this]() -> ::mediapipe::Status { RET_CHECK_EQ(interpreter_->Invoke(), kTfLiteOk); @@ -330,7 +343,8 @@ REGISTER_CALCULATOR(TfLiteInferenceCalculator); // 3. Output processed tensors. if (gpu_output_) { -#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__APPLE__) +#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__EMSCRIPTEN__) && \ + !defined(__APPLE__) // Output result tensors (GPU). auto output_tensors = absl::make_unique>(); MP_RETURN_IF_ERROR(gpu_helper_.RunInGlContext( @@ -392,7 +406,8 @@ REGISTER_CALCULATOR(TfLiteInferenceCalculator); ::mediapipe::Status TfLiteInferenceCalculator::Close(CalculatorContext* cc) { if (delegate_) { -#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__APPLE__) +#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__EMSCRIPTEN__) && \ + !defined(__APPLE__) MP_RETURN_IF_ERROR(gpu_helper_.RunInGlContext([this]() -> Status { TfLiteGpuDelegateDelete(delegate_); gpu_data_in_.reset(); @@ -456,6 +471,10 @@ REGISTER_CALCULATOR(TfLiteInferenceCalculator); RET_CHECK(interpreter_); +#if defined(__EMSCRIPTEN__) + interpreter_->SetNumThreads(1); +#endif // __EMSCRIPTEN__ + if (gpu_output_) { use_quantized_tensors_ = false; } else { @@ -471,7 +490,8 @@ REGISTER_CALCULATOR(TfLiteInferenceCalculator); ::mediapipe::Status TfLiteInferenceCalculator::LoadDelegate( CalculatorContext* cc) { -#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__APPLE__) +#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__EMSCRIPTEN__) && \ + !defined(__APPLE__) // Configure and create the delegate. TfLiteGpuDelegateOptions options = TfLiteGpuDelegateOptionsDefault(); options.compile_options.precision_loss_allowed = 1; @@ -533,9 +553,9 @@ REGISTER_CALCULATOR(TfLiteInferenceCalculator); #if defined(__APPLE__) && !TARGET_OS_OSX // iOS // Configure and create the delegate. - GpuDelegateOptions options; + TFLGpuDelegateOptions options; options.allow_precision_loss = false; // Must match converter, F=float/T=half - options.wait_type = GpuDelegateOptions::WaitType::kPassive; + options.wait_type = TFLGpuDelegateWaitType::TFLGpuDelegateWaitTypePassive; if (!delegate_) delegate_ = TFLGpuDelegateCreate(&options); id device = gpu_helper_.mtlDevice; diff --git a/mediapipe/calculators/tflite/tflite_tensors_to_classification_calculator.cc b/mediapipe/calculators/tflite/tflite_tensors_to_classification_calculator.cc index 5e9e9988e..906b4242f 100644 --- a/mediapipe/calculators/tflite/tflite_tensors_to_classification_calculator.cc +++ b/mediapipe/calculators/tflite/tflite_tensors_to_classification_calculator.cc @@ -24,7 +24,8 @@ #include "mediapipe/framework/port/ret_check.h" #include "mediapipe/util/resource_util.h" #include "tensorflow/lite/interpreter.h" -#if defined(__ANDROID__) || (defined(__APPLE__) && !TARGET_OS_OSX) +#if defined(__EMSCRIPTEN__) || defined(__ANDROID__) || \ + (defined(__APPLE__) && !TARGET_OS_OSX) #include "mediapipe/util/android/file/base/file.h" #include "mediapipe/util/android/file/base/helpers.h" #else @@ -66,8 +67,8 @@ class TfLiteTensorsToClassificationCalculator : public CalculatorBase { ::mediapipe::Status Close(CalculatorContext* cc) override; private: + ::mediapipe::TfLiteTensorsToClassificationCalculatorOptions options_; int top_k_ = 0; - double min_score_threshold_ = 0; std::unordered_map label_map_; bool label_map_loaded_ = false; }; @@ -93,15 +94,14 @@ REGISTER_CALCULATOR(TfLiteTensorsToClassificationCalculator); CalculatorContext* cc) { cc->SetOffset(TimestampDiff(0)); - auto options = cc->Options< + options_ = cc->Options< ::mediapipe::TfLiteTensorsToClassificationCalculatorOptions>(); - top_k_ = options.top_k(); - min_score_threshold_ = options.min_score_threshold(); - if (options.has_label_map_path()) { + top_k_ = options_.top_k(); + if (options_.has_label_map_path()) { std::string string_path; ASSIGN_OR_RETURN(string_path, - PathToResourceAsFile(options.label_map_path())); + PathToResourceAsFile(options_.label_map_path())); std::string label_map_string; MP_RETURN_IF_ERROR(file::GetContents(string_path, &label_map_string)); @@ -125,9 +125,11 @@ REGISTER_CALCULATOR(TfLiteTensorsToClassificationCalculator); RET_CHECK_EQ(input_tensors.size(), 1); const TfLiteTensor* raw_score_tensor = &input_tensors[0]; - RET_CHECK_EQ(raw_score_tensor->dims->size, 2); - RET_CHECK_EQ(raw_score_tensor->dims->data[0], 1); - int num_classes = raw_score_tensor->dims->data[1]; + int num_classes = 1; + for (int i = 0; i < raw_score_tensor->dims->size; ++i) { + num_classes *= raw_score_tensor->dims->data[i]; + } + if (label_map_loaded_) { RET_CHECK_EQ(num_classes, label_map_.size()); } @@ -135,7 +137,8 @@ REGISTER_CALCULATOR(TfLiteTensorsToClassificationCalculator); auto classification_list = absl::make_unique(); for (int i = 0; i < num_classes; ++i) { - if (raw_scores[i] < min_score_threshold_) { + if (options_.has_min_score_threshold() && + raw_scores[i] < options_.min_score_threshold()) { continue; } Classification* classification = classification_list->add_classification(); @@ -148,6 +151,7 @@ REGISTER_CALCULATOR(TfLiteTensorsToClassificationCalculator); // Note that partial_sort will raise error when top_k_ > // classification_list->classification_size(). + CHECK_GE(classification_list->classification_size(), top_k_); auto raw_classification_list = classification_list->mutable_classification(); if (top_k_ > 0 && classification_list->classification_size() >= top_k_) { std::partial_sort(raw_classification_list->begin(), diff --git a/mediapipe/calculators/tflite/tflite_tensors_to_detections_calculator.cc b/mediapipe/calculators/tflite/tflite_tensors_to_detections_calculator.cc index 8e790b00a..bac852f44 100644 --- a/mediapipe/calculators/tflite/tflite_tensors_to_detections_calculator.cc +++ b/mediapipe/calculators/tflite/tflite_tensors_to_detections_calculator.cc @@ -27,7 +27,8 @@ #include "mediapipe/framework/port/ret_check.h" #include "tensorflow/lite/interpreter.h" -#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__APPLE__) +#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__EMSCRIPTEN__) && \ + !defined(__APPLE__) #include "mediapipe/gpu/gl_calculator_helper.h" #include "tensorflow/lite/delegates/gpu/gl/gl_buffer.h" #include "tensorflow/lite/delegates/gpu/gl/gl_program.h" @@ -55,12 +56,14 @@ constexpr int kNumCoordsPerBox = 4; namespace mediapipe { -#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__APPLE__) +#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__EMSCRIPTEN__) && \ + !defined(__APPLE__) using ::tflite::gpu::gl::CreateReadWriteShaderStorageBuffer; using ::tflite::gpu::gl::GlShader; #endif -#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__APPLE__) +#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__EMSCRIPTEN__) && \ + !defined(__APPLE__) typedef ::tflite::gpu::gl::GlBuffer GpuTensor; typedef ::tflite::gpu::gl::GlProgram GpuProgram; #elif defined(__APPLE__) && !TARGET_OS_OSX // iOS @@ -70,7 +73,7 @@ typedef id GpuProgram; namespace { -#if !defined(MEDIAPIPE_DISABLE_GPU) +#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__EMSCRIPTEN__) struct GPUData { GpuProgram decode_program; GpuProgram score_program; @@ -180,7 +183,8 @@ class TfLiteTensorsToDetectionsCalculator : public CalculatorBase { std::vector anchors_; bool side_packet_anchors_{}; -#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__APPLE__) +#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__EMSCRIPTEN__) && \ + !defined(__APPLE__) mediapipe::GlCalculatorHelper gpu_helper_; std::unique_ptr gpu_data_; #elif defined(__APPLE__) && !TARGET_OS_OSX // iOS @@ -204,7 +208,7 @@ REGISTER_CALCULATOR(TfLiteTensorsToDetectionsCalculator); cc->Inputs().Tag("TENSORS").Set>(); } -#if !defined(MEDIAPIPE_DISABLE_GPU) +#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__EMSCRIPTEN__) if (cc->Inputs().HasTag("TENSORS_GPU")) { cc->Inputs().Tag("TENSORS_GPU").Set>(); use_gpu |= true; @@ -222,7 +226,8 @@ REGISTER_CALCULATOR(TfLiteTensorsToDetectionsCalculator); } if (use_gpu) { -#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__APPLE__) +#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__EMSCRIPTEN__) && \ + !defined(__APPLE__) MP_RETURN_IF_ERROR(mediapipe::GlCalculatorHelper::UpdateContract(cc)); #elif defined(__APPLE__) && !TARGET_OS_OSX // iOS MP_RETURN_IF_ERROR([MPPMetalHelper updateContract:cc]); @@ -238,7 +243,8 @@ REGISTER_CALCULATOR(TfLiteTensorsToDetectionsCalculator); if (cc->Inputs().HasTag("TENSORS_GPU")) { gpu_input_ = true; -#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__APPLE__) +#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__EMSCRIPTEN__) && \ + !defined(__APPLE__) MP_RETURN_IF_ERROR(gpu_helper_.Open(cc)); #elif defined(__APPLE__) && !TARGET_OS_OSX // iOS gpu_helper_ = [[MPPMetalHelper alloc] initWithCalculatorContext:cc]; @@ -400,7 +406,8 @@ REGISTER_CALCULATOR(TfLiteTensorsToDetectionsCalculator); } ::mediapipe::Status TfLiteTensorsToDetectionsCalculator::ProcessGPU( CalculatorContext* cc, std::vector* output_detections) { -#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__APPLE__) +#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__EMSCRIPTEN__) && \ + !defined(__APPLE__) const auto& input_tensors = cc->Inputs().Tag("TENSORS_GPU").Get>(); RET_CHECK_GE(input_tensors.size(), 2); @@ -562,7 +569,8 @@ REGISTER_CALCULATOR(TfLiteTensorsToDetectionsCalculator); ::mediapipe::Status TfLiteTensorsToDetectionsCalculator::Close( CalculatorContext* cc) { -#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__APPLE__) +#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__EMSCRIPTEN__) && \ + !defined(__APPLE__) gpu_helper_.RunInGlContext([this] { gpu_data_.reset(); }); #elif defined(__APPLE__) && !TARGET_OS_OSX // iOS gpu_data_.reset(); @@ -715,7 +723,8 @@ Detection TfLiteTensorsToDetectionsCalculator::ConvertToDetection( ::mediapipe::Status TfLiteTensorsToDetectionsCalculator::GpuInit( CalculatorContext* cc) { -#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__APPLE__) +#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__EMSCRIPTEN__) && \ + !defined(__APPLE__) MP_RETURN_IF_ERROR(gpu_helper_.RunInGlContext([this]() -> ::mediapipe::Status { gpu_data_ = absl::make_unique(); diff --git a/mediapipe/calculators/tflite/tflite_tensors_to_landmarks_calculator.cc b/mediapipe/calculators/tflite/tflite_tensors_to_landmarks_calculator.cc index 1d646e4a3..996b1fa35 100644 --- a/mediapipe/calculators/tflite/tflite_tensors_to_landmarks_calculator.cc +++ b/mediapipe/calculators/tflite/tflite_tensors_to_landmarks_calculator.cc @@ -21,7 +21,8 @@ namespace mediapipe { // A calculator for converting TFLite tensors from regression models into -// landmarks. +// landmarks. Note that if the landmarks in the tensor has more than 3 +// dimensions, only the first 3 dimensions will be converted to x,y,z. // // Input: // TENSORS - Vector of TfLiteTensor of type kTfLiteFloat32. Only the first @@ -122,9 +123,6 @@ REGISTER_CALCULATOR(TfLiteTensorsToLandmarksCalculator); num_values *= raw_tensor->dims->data[i]; } const int num_dimensions = num_values / num_landmarks_; - // Landmarks must have less than 3 dimensions. Otherwise please consider - // using matrix. - CHECK_LE(num_dimensions, 3); CHECK_GT(num_dimensions, 0); const float* raw_landmarks = raw_tensor->data.f; diff --git a/mediapipe/calculators/tflite/tflite_tensors_to_segmentation_calculator.cc b/mediapipe/calculators/tflite/tflite_tensors_to_segmentation_calculator.cc index 16805a066..55279308a 100644 --- a/mediapipe/calculators/tflite/tflite_tensors_to_segmentation_calculator.cc +++ b/mediapipe/calculators/tflite/tflite_tensors_to_segmentation_calculator.cc @@ -28,7 +28,8 @@ #include "mediapipe/util/resource_util.h" #include "tensorflow/lite/interpreter.h" -#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__APPLE__) +#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__EMSCRIPTEN__) && \ + !defined(__APPLE__) #include "mediapipe/gpu/gl_calculator_helper.h" #include "mediapipe/gpu/gl_simple_shaders.h" #include "mediapipe/gpu/shader_util.h" @@ -53,7 +54,8 @@ float Clamp(float val, float min, float max) { namespace mediapipe { -#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__APPLE__) +#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__EMSCRIPTEN__) && \ + !defined(__APPLE__) using ::tflite::gpu::gl::CopyBuffer; using ::tflite::gpu::gl::CreateReadWriteRgbaImageTexture; using ::tflite::gpu::gl::CreateReadWriteShaderStorageBuffer; @@ -129,7 +131,8 @@ class TfLiteTensorsToSegmentationCalculator : public CalculatorBase { int tensor_channels_ = 0; bool use_gpu_ = false; -#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__APPLE__) +#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__EMSCRIPTEN__) && \ + !defined(__APPLE__) mediapipe::GlCalculatorHelper gpu_helper_; std::unique_ptr mask_program_with_prev_; std::unique_ptr mask_program_no_prev_; @@ -159,7 +162,8 @@ REGISTER_CALCULATOR(TfLiteTensorsToSegmentationCalculator); } // Inputs GPU. -#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__APPLE__) +#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__EMSCRIPTEN__) && \ + !defined(__APPLE__) if (cc->Inputs().HasTag("TENSORS_GPU")) { cc->Inputs().Tag("TENSORS_GPU").Set>(); use_gpu |= true; @@ -178,7 +182,8 @@ REGISTER_CALCULATOR(TfLiteTensorsToSegmentationCalculator); if (cc->Outputs().HasTag("MASK")) { cc->Outputs().Tag("MASK").Set(); } -#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__APPLE__) +#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__EMSCRIPTEN__) && \ + !defined(__APPLE__) if (cc->Outputs().HasTag("MASK_GPU")) { cc->Outputs().Tag("MASK_GPU").Set(); use_gpu |= true; @@ -186,7 +191,8 @@ REGISTER_CALCULATOR(TfLiteTensorsToSegmentationCalculator); #endif // !MEDIAPIPE_DISABLE_GPU if (use_gpu) { -#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__APPLE__) +#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__EMSCRIPTEN__) && \ + !defined(__APPLE__) MP_RETURN_IF_ERROR(mediapipe::GlCalculatorHelper::UpdateContract(cc)); #endif // !MEDIAPIPE_DISABLE_GPU } @@ -199,7 +205,8 @@ REGISTER_CALCULATOR(TfLiteTensorsToSegmentationCalculator); if (cc->Inputs().HasTag("TENSORS_GPU")) { use_gpu_ = true; -#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__APPLE__) +#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__EMSCRIPTEN__) && \ + !defined(__APPLE__) MP_RETURN_IF_ERROR(gpu_helper_.Open(cc)); #endif // !MEDIAPIPE_DISABLE_GPU } @@ -207,7 +214,8 @@ REGISTER_CALCULATOR(TfLiteTensorsToSegmentationCalculator); MP_RETURN_IF_ERROR(LoadOptions(cc)); if (use_gpu_) { -#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__APPLE__) +#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__EMSCRIPTEN__) && \ + !defined(__APPLE__) MP_RETURN_IF_ERROR( gpu_helper_.RunInGlContext([this, cc]() -> ::mediapipe::Status { MP_RETURN_IF_ERROR(InitGpu(cc)); @@ -224,7 +232,8 @@ REGISTER_CALCULATOR(TfLiteTensorsToSegmentationCalculator); ::mediapipe::Status TfLiteTensorsToSegmentationCalculator::Process( CalculatorContext* cc) { if (use_gpu_) { -#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__APPLE__) +#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__EMSCRIPTEN__) && \ + !defined(__APPLE__) MP_RETURN_IF_ERROR( gpu_helper_.RunInGlContext([this, cc]() -> ::mediapipe::Status { MP_RETURN_IF_ERROR(ProcessGpu(cc)); @@ -240,7 +249,8 @@ REGISTER_CALCULATOR(TfLiteTensorsToSegmentationCalculator); ::mediapipe::Status TfLiteTensorsToSegmentationCalculator::Close( CalculatorContext* cc) { -#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__APPLE__) +#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__EMSCRIPTEN__) && \ + !defined(__APPLE__) gpu_helper_.RunInGlContext([this] { if (upsample_program_) glDeleteProgram(upsample_program_); upsample_program_ = 0; @@ -367,7 +377,8 @@ REGISTER_CALCULATOR(TfLiteTensorsToSegmentationCalculator); if (cc->Inputs().Tag("TENSORS_GPU").IsEmpty()) { return ::mediapipe::OkStatus(); } -#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__APPLE__) +#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__EMSCRIPTEN__) && \ + !defined(__APPLE__) // Get input streams. const auto& input_tensors = cc->Inputs().Tag("TENSORS_GPU").Get>(); @@ -453,7 +464,8 @@ REGISTER_CALCULATOR(TfLiteTensorsToSegmentationCalculator); } void TfLiteTensorsToSegmentationCalculator::GlRender() { -#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__APPLE__) +#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__EMSCRIPTEN__) && \ + !defined(__APPLE__) static const GLfloat square_vertices[] = { -1.0f, -1.0f, // bottom left 1.0f, -1.0f, // bottom right @@ -525,7 +537,8 @@ void TfLiteTensorsToSegmentationCalculator::GlRender() { ::mediapipe::Status TfLiteTensorsToSegmentationCalculator::InitGpu( CalculatorContext* cc) { -#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__APPLE__) +#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__EMSCRIPTEN__) && \ + !defined(__APPLE__) MP_RETURN_IF_ERROR(gpu_helper_.RunInGlContext([this]() -> ::mediapipe::Status { // A shader to process a segmentation tensor into an output mask, diff --git a/mediapipe/calculators/util/BUILD b/mediapipe/calculators/util/BUILD index 7bd06fe97..d3a2e341d 100644 --- a/mediapipe/calculators/util/BUILD +++ b/mediapipe/calculators/util/BUILD @@ -12,14 +12,14 @@ # See the License for the specific language governing permissions and # limitations under the License. +load("//mediapipe/framework/port:build_config.bzl", "mediapipe_cc_proto_library") + licenses(["notice"]) # Apache 2.0 -package(default_visibility = ["//visibility:private"]) +package(default_visibility = ["//visibility:public"]) exports_files(["LICENSE"]) -load("//mediapipe/framework/port:build_config.bzl", "mediapipe_cc_proto_library") - proto_library( name = "annotation_overlay_calculator_proto", srcs = ["annotation_overlay_calculator.proto"], @@ -72,6 +72,24 @@ proto_library( ], ) +proto_library( + name = "collection_has_min_size_calculator_proto", + srcs = ["collection_has_min_size_calculator.proto"], + visibility = ["//visibility:public"], + deps = [ + "//mediapipe/framework:calculator_proto", + ], +) + +proto_library( + name = "association_calculator_proto", + srcs = ["association_calculator.proto"], + visibility = ["//visibility:public"], + deps = [ + "//mediapipe/framework:calculator_proto", + ], +) + mediapipe_cc_proto_library( name = "annotation_overlay_calculator_cc_proto", srcs = ["annotation_overlay_calculator.proto"], @@ -141,6 +159,26 @@ mediapipe_cc_proto_library( ], ) +mediapipe_cc_proto_library( + name = "collection_has_min_size_calculator_cc_proto", + srcs = ["collection_has_min_size_calculator.proto"], + cc_deps = [ + "//mediapipe/framework:calculator_cc_proto", + ], + visibility = ["//mediapipe:__subpackages__"], + deps = [":collection_has_min_size_calculator_proto"], +) + +mediapipe_cc_proto_library( + name = "association_calculator_cc_proto", + srcs = ["association_calculator.proto"], + cc_deps = [ + "//mediapipe/framework:calculator_cc_proto", + ], + visibility = ["//mediapipe:__subpackages__"], + deps = [":association_calculator_proto"], +) + cc_library( name = "packet_frequency_calculator", srcs = ["packet_frequency_calculator.cc"], @@ -234,6 +272,7 @@ cc_library( "//mediapipe/framework/port:status", "//mediapipe/framework/port:vector", "//mediapipe/util:annotation_renderer", + "//mediapipe/util:render_data_cc_proto", ] + select({ "//mediapipe/gpu:disable_gpu": [], "//conditions:default": [ @@ -360,6 +399,16 @@ mediapipe_cc_proto_library( deps = [":landmark_projection_calculator_proto"], ) +mediapipe_cc_proto_library( + name = "landmarks_to_floats_calculator_cc_proto", + srcs = ["landmarks_to_floats_calculator.proto"], + cc_deps = [ + "//mediapipe/framework:calculator_cc_proto", + ], + visibility = ["//visibility:public"], + deps = [":landmarks_to_floats_calculator_proto"], +) + mediapipe_cc_proto_library( name = "rect_transformation_calculator_cc_proto", srcs = ["rect_transformation_calculator.proto"], @@ -372,7 +421,12 @@ mediapipe_cc_proto_library( cc_library( name = "detections_to_rects_calculator", - srcs = ["detections_to_rects_calculator.cc"], + srcs = [ + "detections_to_rects_calculator.cc", + ], + hdrs = [ + "detections_to_rects_calculator.h", + ], visibility = ["//visibility:public"], deps = [ ":detections_to_rects_calculator_cc_proto", @@ -454,6 +508,17 @@ proto_library( ], ) +proto_library( + name = "labels_to_render_data_calculator_proto", + srcs = ["labels_to_render_data_calculator.proto"], + visibility = ["//visibility:public"], + deps = [ + "//mediapipe/framework:calculator_proto", + "//mediapipe/util:color_proto", + "//mediapipe/util:render_data_proto", + ], +) + proto_library( name = "thresholding_calculator_proto", srcs = ["thresholding_calculator.proto"], @@ -483,6 +548,15 @@ proto_library( ], ) +proto_library( + name = "landmarks_to_floats_calculator_proto", + srcs = ["landmarks_to_floats_calculator.proto"], + visibility = ["//visibility:public"], + deps = [ + "//mediapipe/framework:calculator_proto", + ], +) + proto_library( name = "rect_transformation_calculator_proto", srcs = ["rect_transformation_calculator.proto"], @@ -577,6 +651,26 @@ cc_library( alwayslink = 1, ) +cc_library( + name = "labels_to_render_data_calculator", + srcs = ["labels_to_render_data_calculator.cc"], + visibility = ["//visibility:public"], + deps = [ + ":labels_to_render_data_calculator_cc_proto", + "//mediapipe/framework:calculator_framework", + "//mediapipe/framework:calculator_options_cc_proto", + "//mediapipe/framework/formats:classification_cc_proto", + "//mediapipe/framework/formats:video_stream_header", + "//mediapipe/framework/port:ret_check", + "//mediapipe/framework/port:status", + "//mediapipe/framework/port:statusor", + "//mediapipe/util:color_cc_proto", + "//mediapipe/util:render_data_cc_proto", + "@com_google_absl//absl/strings", + ], + alwayslink = 1, +) + cc_library( name = "rect_to_render_data_calculator", srcs = ["rect_to_render_data_calculator.cc"], @@ -658,6 +752,22 @@ cc_library( alwayslink = 1, ) +cc_library( + name = "landmarks_to_floats_calculator", + srcs = ["landmarks_to_floats_calculator.cc"], + visibility = ["//visibility:public"], + deps = [ + ":landmarks_to_floats_calculator_cc_proto", + "//mediapipe/framework:calculator_framework", + "//mediapipe/framework/formats:landmark_cc_proto", + "//mediapipe/framework/formats:matrix", + "//mediapipe/framework/port:ret_check", + "//mediapipe/framework/port:status", + "@eigen_archive//:eigen", + ], + alwayslink = 1, +) + cc_test( name = "detection_letterbox_removal_calculator_test", srcs = ["detection_letterbox_removal_calculator_test.cc"], @@ -714,6 +824,7 @@ cc_library( visibility = ["//visibility:public"], deps = [ ":top_k_scores_calculator_cc_proto", + "//mediapipe/framework/formats:classification_cc_proto", "//mediapipe/framework/port:ret_check", "//mediapipe/framework/port:status", "//mediapipe/framework/port:statusor", @@ -750,3 +861,125 @@ cc_test( "//mediapipe/framework/port:status", ], ) + +mediapipe_cc_proto_library( + name = "labels_to_render_data_calculator_cc_proto", + srcs = ["labels_to_render_data_calculator.proto"], + cc_deps = [ + "//mediapipe/framework:calculator_cc_proto", + "//mediapipe/util:color_cc_proto", + ], + visibility = ["//visibility:public"], + deps = [":labels_to_render_data_calculator_proto"], +) + +cc_library( + name = "local_file_contents_calculator", + srcs = ["local_file_contents_calculator.cc"], + visibility = ["//visibility:public"], + deps = [ + "//mediapipe/framework:calculator_framework", + "//mediapipe/framework/port:file_helpers", + "//mediapipe/framework/port:ret_check", + "//mediapipe/framework/port:status", + ], + alwayslink = 1, +) + +cc_library( + name = "filter_collection_calculator", + srcs = ["filter_collection_calculator.cc"], + hdrs = ["filter_collection_calculator.h"], + visibility = ["//visibility:public"], + deps = [ + "//mediapipe/framework:calculator_framework", + "//mediapipe/framework/formats:landmark_cc_proto", + "//mediapipe/framework/formats:rect_cc_proto", + "//mediapipe/framework/port:ret_check", + "//mediapipe/framework/port:status", + "@com_google_absl//absl/strings", + ], + alwayslink = 1, +) + +cc_library( + name = "collection_has_min_size_calculator", + srcs = ["collection_has_min_size_calculator.cc"], + hdrs = ["collection_has_min_size_calculator.h"], + visibility = ["//visibility:public"], + deps = [ + ":collection_has_min_size_calculator_cc_proto", + "//mediapipe/framework:calculator_framework", + "//mediapipe/framework/formats:rect_cc_proto", + "//mediapipe/framework/port:ret_check", + "//mediapipe/framework/port:status", + ], + alwayslink = 1, +) + +cc_library( + name = "association_calculator", + hdrs = ["association_calculator.h"], + visibility = ["//visibility:public"], + deps = [ + ":association_calculator_cc_proto", + "//mediapipe/framework:calculator_context", + "//mediapipe/framework:calculator_framework", + "//mediapipe/framework:collection_item_id", + "//mediapipe/framework/port:rectangle", + "//mediapipe/framework/port:status", + "@com_google_absl//absl/memory", + ], + alwayslink = 1, +) + +cc_library( + name = "association_norm_rect_calculator", + srcs = ["association_norm_rect_calculator.cc"], + visibility = ["//visibility:public"], + deps = [ + ":association_calculator", + "//mediapipe/framework:calculator_context", + "//mediapipe/framework:calculator_framework", + "//mediapipe/framework/formats:rect_cc_proto", + "//mediapipe/framework/port:rectangle", + "//mediapipe/framework/port:status", + ], + alwayslink = 1, +) + +cc_library( + name = "association_detection_calculator", + srcs = ["association_detection_calculator.cc"], + visibility = ["//visibility:public"], + deps = [ + ":association_calculator", + "//mediapipe/framework:calculator_context", + "//mediapipe/framework:calculator_framework", + "//mediapipe/framework/formats:detection_cc_proto", + "//mediapipe/framework/formats:location", + "//mediapipe/framework/port:rectangle", + "//mediapipe/framework/port:status", + ], + alwayslink = 1, +) + +cc_test( + name = "association_calculator_test", + srcs = ["association_calculator_test.cc"], + deps = [ + ":association_detection_calculator", + ":association_norm_rect_calculator", + "//mediapipe/framework:calculator_cc_proto", + "//mediapipe/framework:calculator_framework", + "//mediapipe/framework:calculator_runner", + "//mediapipe/framework:collection_item_id", + "//mediapipe/framework:packet", + "//mediapipe/framework/deps:message_matchers", + "//mediapipe/framework/formats:detection_cc_proto", + "//mediapipe/framework/formats:location_data_cc_proto", + "//mediapipe/framework/formats:rect_cc_proto", + "//mediapipe/framework/port:gtest_main", + "//mediapipe/framework/port:parse_text_proto", + ], +) diff --git a/mediapipe/calculators/util/annotation_overlay_calculator.cc b/mediapipe/calculators/util/annotation_overlay_calculator.cc index 5f5c53582..35341656f 100644 --- a/mediapipe/calculators/util/annotation_overlay_calculator.cc +++ b/mediapipe/calculators/util/annotation_overlay_calculator.cc @@ -26,6 +26,7 @@ #include "mediapipe/framework/port/vector.h" #include "mediapipe/util/annotation_renderer.h" #include "mediapipe/util/color.pb.h" +#include "mediapipe/util/render_data.pb.h" #if !defined(MEDIAPIPE_DISABLE_GPU) #include "mediapipe/gpu/gl_calculator_helper.h" @@ -41,6 +42,8 @@ namespace { constexpr char kInputFrameTag[] = "INPUT_FRAME"; constexpr char kOutputFrameTag[] = "OUTPUT_FRAME"; +constexpr char kInputVectorTag[] = "VECTOR"; + constexpr char kInputFrameTagGpu[] = "INPUT_FRAME_GPU"; constexpr char kOutputFrameTagGpu[] = "OUTPUT_FRAME_GPU"; @@ -65,6 +68,9 @@ constexpr int kAnnotationBackgroundColor[] = {100, 101, 102}; // 2. RenderData proto on variable number of input streams. All the RenderData // at a particular timestamp is drawn on the image in the order of their // input streams. No tags required. +// 3. std::vector on variable number of input streams. RenderData +// objects at a particular timestamp are drawn on the image in order of the +// input vector items. These input streams are tagged with "VECTOR". // // Output: // 1. OUTPUT_FRAME or OUTPUT_FRAME_GPU: A rendered ImageFrame (or GpuBuffer). @@ -85,6 +91,8 @@ constexpr int kAnnotationBackgroundColor[] = {100, 101, 102}; // input_stream: "render_data_1" // input_stream: "render_data_2" // input_stream: "render_data_3" +// input_stream: "VECTOR:0:render_data_vec_0" +// input_stream: "VECTOR:1:render_data_vec_1" // output_stream: "OUTPUT_FRAME:decorated_frames" // options { // [mediapipe.AnnotationOverlayCalculatorOptions.ext] { @@ -99,6 +107,8 @@ constexpr int kAnnotationBackgroundColor[] = {100, 101, 102}; // input_stream: "render_data_1" // input_stream: "render_data_2" // input_stream: "render_data_3" +// input_stream: "VECTOR:0:render_data_vec_0" +// input_stream: "VECTOR:1:render_data_vec_1" // output_stream: "OUTPUT_FRAME_GPU:decorated_frames" // options { // [mediapipe.AnnotationOverlayCalculatorOptions.ext] { @@ -138,9 +148,6 @@ class AnnotationOverlayCalculator : public CalculatorBase { // Underlying helper renderer library. std::unique_ptr renderer_; - // Number of input streams with render data. - int num_render_streams_; - // Indicates if image frame is available as input. bool image_frame_available_ = false; @@ -171,25 +178,28 @@ REGISTER_CALCULATOR(AnnotationOverlayCalculator); return ::mediapipe::InternalError("GPU output must have GPU input."); } - // Assume all inputs are render streams; adjust below. - int num_render_streams = cc->Inputs().NumEntries(); - // Input image to render onto copy of. #if !defined(MEDIAPIPE_DISABLE_GPU) if (cc->Inputs().HasTag(kInputFrameTagGpu)) { cc->Inputs().Tag(kInputFrameTagGpu).Set(); - num_render_streams = cc->Inputs().NumEntries() - 1; use_gpu |= true; } #endif // !MEDIAPIPE_DISABLE_GPU if (cc->Inputs().HasTag(kInputFrameTag)) { cc->Inputs().Tag(kInputFrameTag).Set(); - num_render_streams = cc->Inputs().NumEntries() - 1; } // Data streams to render. - for (int i = 0; i < num_render_streams; ++i) { - cc->Inputs().Index(i).Set(); + for (CollectionItemId id = cc->Inputs().BeginId(); id < cc->Inputs().EndId(); + ++id) { + auto tag_and_index = cc->Inputs().TagAndIndexFromId(id); + std::string tag = tag_and_index.first; + if (tag == kInputVectorTag) { + cc->Inputs().Get(id).Set>(); + } else if (tag.empty()) { + // Empty tag defaults to accepting a single object of RenderData type. + cc->Inputs().Get(id).Set(); + } } // Rendered image. @@ -228,12 +238,10 @@ REGISTER_CALCULATOR(AnnotationOverlayCalculator); if (cc->Inputs().HasTag(kInputFrameTagGpu) || cc->Inputs().HasTag(kInputFrameTag)) { image_frame_available_ = true; - num_render_streams_ = cc->Inputs().NumEntries() - 1; } else { image_frame_available_ = false; RET_CHECK(options_.has_canvas_width_px()); RET_CHECK(options_.has_canvas_height_px()); - num_render_streams_ = cc->Inputs().NumEntries(); } // Initialize the helper renderer library. @@ -285,12 +293,28 @@ REGISTER_CALCULATOR(AnnotationOverlayCalculator); renderer_->AdoptImage(image_mat.get()); // Render streams onto render target. - for (int i = 0; i < num_render_streams_; ++i) { - if (cc->Inputs().Index(i).IsEmpty()) { + for (CollectionItemId id = cc->Inputs().BeginId(); id < cc->Inputs().EndId(); + ++id) { + auto tag_and_index = cc->Inputs().TagAndIndexFromId(id); + std::string tag = tag_and_index.first; + if (!tag.empty() && tag != kInputVectorTag) { continue; } - const RenderData& render_data = cc->Inputs().Index(i).Get(); - renderer_->RenderDataOnImage(render_data); + if (cc->Inputs().Get(id).IsEmpty()) { + continue; + } + if (tag.empty()) { + // Empty tag defaults to accepting a single object of RenderData type. + const RenderData& render_data = cc->Inputs().Get(id).Get(); + renderer_->RenderDataOnImage(render_data); + } else { + RET_CHECK_EQ(kInputVectorTag, tag); + const std::vector& render_data_vec = + cc->Inputs().Get(id).Get>(); + for (const RenderData& render_data : render_data_vec) { + renderer_->RenderDataOnImage(render_data); + } + } } if (use_gpu_) { diff --git a/mediapipe/calculators/util/association_calculator.h b/mediapipe/calculators/util/association_calculator.h new file mode 100644 index 000000000..a16de4977 --- /dev/null +++ b/mediapipe/calculators/util/association_calculator.h @@ -0,0 +1,259 @@ +// Copyright 2019 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef MEDIAPIPE_CALCULATORS_UTIL_ASSOCIATION_CALCULATOR_H_ +#define MEDIAPIPE_CALCULATORS_UTIL_ASSOCIATION_CALCULATOR_H_ + +#include +#include + +#include "absl/memory/memory.h" +#include "mediapipe/calculators/util/association_calculator.pb.h" +#include "mediapipe/framework/calculator_context.h" +#include "mediapipe/framework/calculator_framework.h" +#include "mediapipe/framework/collection_item_id.h" +#include "mediapipe/framework/port/canonical_errors.h" +#include "mediapipe/framework/port/rectangle.h" +#include "mediapipe/framework/port/status.h" + +namespace mediapipe { + +// Computes the overlap similarity based on Intersection over Union (IoU) of +// two rectangles. +inline float OverlapSimilarity(const Rectangle_f& rect1, + const Rectangle_f& rect2) { + if (!rect1.Intersects(rect2)) return 0.0f; + // Compute IoU similarity score. + const float intersection_area = Rectangle_f(rect1).Intersect(rect2).Area(); + const float normalization = rect1.Area() + rect2.Area() - intersection_area; + return normalization > 0.0f ? intersection_area / normalization : 0.0f; +} + +// AssocationCalculator accepts multiple inputs of vectors of type T that can +// be converted to Rectangle_f. The output is a vector of type T that contains +// elements from the input vectors that don't overlap with each other. When +// two elements overlap, the element that comes in from a later input stream +// is kept in the output. This association operation is useful for multiple +// instance inference pipelines in MediaPipe. +// If an input stream is tagged with "PREV" tag, IDs of overlapping elements +// from "PREV" input stream are propagated to the output. Elements in the "PREV" +// input stream that don't overlap with other elements are not added to the +// output. This stream is designed to take detections from previous timestamp, +// e.g. output of PreviousLoopbackCalculator to provide temporal association. +// See AssociationDetectionCalculator and AssociationNormRectCalculator for +// example uses. +template +class AssociationCalculator : public CalculatorBase { + public: + static ::mediapipe::Status GetContract(CalculatorContract* cc) { + // Atmost one input stream can be tagged with "PREV". + RET_CHECK_LE(cc->Inputs().NumEntries("PREV"), 1); + + if (cc->Inputs().HasTag("PREV")) { + RET_CHECK_GE(cc->Inputs().NumEntries(), 2); + } + + for (CollectionItemId id = cc->Inputs().BeginId(); + id < cc->Inputs().EndId(); ++id) { + cc->Inputs().Get(id).Set>(); + } + + cc->Outputs().Index(0).Set>(); + + return ::mediapipe::OkStatus(); + } + + ::mediapipe::Status Open(CalculatorContext* cc) override { + cc->SetOffset(TimestampDiff(0)); + + has_prev_input_stream_ = cc->Inputs().HasTag("PREV"); + if (has_prev_input_stream_) { + prev_input_stream_id_ = cc->Inputs().GetId("PREV", 0); + } + options_ = cc->Options<::mediapipe::AssociationCalculatorOptions>(); + CHECK_GE(options_.min_similarity_threshold(), 0); + + return ::mediapipe::OkStatus(); + } + + ::mediapipe::Status Process(CalculatorContext* cc) override { + auto get_non_overlapping_elements = GetNonOverlappingElements(cc); + if (!get_non_overlapping_elements.ok()) { + return get_non_overlapping_elements.status(); + } + std::list result = get_non_overlapping_elements.ValueOrDie(); + + if (has_prev_input_stream_ && + !cc->Inputs().Get(prev_input_stream_id_).IsEmpty()) { + // Processed all regular input streams. Now compare the result list + // elements with those in the PREV input stream, and propagate IDs from + // PREV input stream as appropriate. + const std::vector& prev_input_vec = + cc->Inputs() + .Get(prev_input_stream_id_) + .template Get>(); + + MP_RETURN_IF_ERROR( + PropagateIdsFromPreviousToCurrent(prev_input_vec, &result)); + } + + auto output = absl::make_unique>(); + for (auto it = result.begin(); it != result.end(); ++it) { + output->push_back(*it); + } + cc->Outputs().Index(0).Add(output.release(), cc->InputTimestamp()); + + return ::mediapipe::OkStatus(); + } + + protected: + ::mediapipe::AssociationCalculatorOptions options_; + + bool has_prev_input_stream_; + CollectionItemId prev_input_stream_id_; + + virtual ::mediapipe::StatusOr GetRectangle(const T& input) { + return ::mediapipe::OkStatus(); + } + + virtual std::pair GetId(const T& input) { return {false, -1}; } + + virtual void SetId(T* input, int id) {} + + private: + // Get a list of non-overlapping elements from all input streams, with + // increasing order of priority based on input stream index. + mediapipe::StatusOr> GetNonOverlappingElements( + CalculatorContext* cc) { + std::list result; + + // Initialize result with the first non-empty input vector. + CollectionItemId non_empty_id = cc->Inputs().BeginId(); + for (CollectionItemId id = cc->Inputs().BeginId(); + id < cc->Inputs().EndId(); ++id) { + if (id == prev_input_stream_id_ || cc->Inputs().Get(id).IsEmpty()) { + continue; + } + const std::vector& input_vec = + cc->Inputs().Get(id).Get>(); + if (!input_vec.empty()) { + non_empty_id = id; + result.push_back(input_vec[0]); + for (int j = 1; j < input_vec.size(); ++j) { + MP_RETURN_IF_ERROR(AddElementToList(input_vec[j], &result)); + } + break; + } + } + + // Compare remaining input vectors with the non-empty result vector, + // remove lower-priority overlapping elements from the result vector and + // had corresponding higher-priority elements as necessary. + for (CollectionItemId id = non_empty_id + 1; id < cc->Inputs().EndId(); + ++id) { + if (id == prev_input_stream_id_ || cc->Inputs().Get(id).IsEmpty()) { + continue; + } + const std::vector& input_vec = + cc->Inputs().Get(id).Get>(); + + for (int vi = 0; vi < input_vec.size(); ++vi) { + MP_RETURN_IF_ERROR(AddElementToList(input_vec[vi], &result)); + } + } + + return result; + } + + ::mediapipe::Status AddElementToList(T element, std::list* current) { + // Compare this element with elements of the input collection. If this + // element has high overlap with elements of the collection, remove + // those elements from the collection and add this element. + ASSIGN_OR_RETURN(auto cur_rect, GetRectangle(element)); + + bool change_id = false; + int new_elem_id = -1; + + for (auto uit = current->begin(); uit != current->end();) { + ASSIGN_OR_RETURN(auto prev_rect, GetRectangle(*uit)); + if (OverlapSimilarity(cur_rect, prev_rect) > + options_.min_similarity_threshold()) { + std::pair prev_id = GetId(*uit); + // If prev_id.first is false when some element doesn't have an ID, + // change_id and new_elem_id will not be updated. + if (prev_id.first) { + change_id = prev_id.first; + new_elem_id = prev_id.second; + } + uit = current->erase(uit); + } else { + ++uit; + } + } + + if (change_id) { + SetId(&element, new_elem_id); + } + current->push_back(element); + + return ::mediapipe::OkStatus(); + } + + // Compare elements of the current list with elements in from the collection + // of elements from the previous input stream, and propagate IDs from the + // previous input stream as appropriate. + ::mediapipe::Status PropagateIdsFromPreviousToCurrent( + const std::vector& prev_input_vec, std::list* current) { + for (auto vit = current->begin(); vit != current->end(); ++vit) { + auto get_cur_rectangle = GetRectangle(*vit); + if (!get_cur_rectangle.ok()) { + return get_cur_rectangle.status(); + } + const Rectangle_f& cur_rect = get_cur_rectangle.ValueOrDie(); + + bool change_id = false; + int id_for_vi = -1; + + for (int ui = 0; ui < prev_input_vec.size(); ++ui) { + auto get_prev_rectangle = GetRectangle(prev_input_vec[ui]); + if (!get_prev_rectangle.ok()) { + return get_prev_rectangle.status(); + } + const Rectangle_f& prev_rect = get_prev_rectangle.ValueOrDie(); + + if (OverlapSimilarity(cur_rect, prev_rect) > + options_.min_similarity_threshold()) { + std::pair prev_id = GetId(prev_input_vec[ui]); + // If prev_id.first is false when some element doesn't have an ID, + // change_id and id_for_vi will not be updated. + if (prev_id.first) { + change_id = prev_id.first; + id_for_vi = prev_id.second; + } + } + } + + if (change_id) { + T element = *vit; + SetId(&element, id_for_vi); + *vit = element; + } + } + return ::mediapipe::OkStatus(); + } +}; + +} // namespace mediapipe + +#endif // MEDIAPIPE_CALCULATORS_UTIL_ASSOCIATION_CALCULATOR_H_ diff --git a/mediapipe/calculators/util/association_calculator.proto b/mediapipe/calculators/util/association_calculator.proto new file mode 100644 index 000000000..ca66f80b8 --- /dev/null +++ b/mediapipe/calculators/util/association_calculator.proto @@ -0,0 +1,27 @@ +// Copyright 2019 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +syntax = "proto2"; + +package mediapipe; + +import "mediapipe/framework/calculator.proto"; + +message AssociationCalculatorOptions { + extend CalculatorOptions { + optional AssociationCalculatorOptions ext = 275124847; + } + + optional float min_similarity_threshold = 1 [default = 1.0]; +} diff --git a/mediapipe/calculators/util/association_calculator_test.cc b/mediapipe/calculators/util/association_calculator_test.cc new file mode 100644 index 000000000..67fee7e82 --- /dev/null +++ b/mediapipe/calculators/util/association_calculator_test.cc @@ -0,0 +1,476 @@ +// Copyright 2019 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "mediapipe/framework/calculator.pb.h" +#include "mediapipe/framework/calculator_framework.h" +#include "mediapipe/framework/calculator_runner.h" +#include "mediapipe/framework/collection_item_id.h" +#include "mediapipe/framework/deps/message_matchers.h" +#include "mediapipe/framework/formats/detection.pb.h" +#include "mediapipe/framework/formats/location_data.pb.h" +#include "mediapipe/framework/formats/rect.pb.h" +#include "mediapipe/framework/packet.h" +#include "mediapipe/framework/port/gmock.h" +#include "mediapipe/framework/port/gtest.h" +#include "mediapipe/framework/port/parse_text_proto.h" +#include "mediapipe/framework/port/status_matchers.h" + +namespace mediapipe { + +namespace { + +::mediapipe::Detection DetectionWithRelativeLocationData(double xmin, + double ymin, + double width, + double height) { + ::mediapipe::Detection detection; + ::mediapipe::LocationData* location_data = detection.mutable_location_data(); + location_data->set_format(::mediapipe::LocationData::RELATIVE_BOUNDING_BOX); + location_data->mutable_relative_bounding_box()->set_xmin(xmin); + location_data->mutable_relative_bounding_box()->set_ymin(ymin); + location_data->mutable_relative_bounding_box()->set_width(width); + location_data->mutable_relative_bounding_box()->set_height(height); + return detection; +} + +} // namespace + +class AssociationDetectionCalculatorTest : public ::testing::Test { + protected: + AssociationDetectionCalculatorTest() { + // 0.4 ================ + // | | | | + // 0.3 ===================== | DET2 | | + // | | | DET1 | | | DET4 | + // 0.2 | DET0 | =========== ================ + // | | | | | | + // 0.1 =====|=============== | + // | DET3 | | | + // 0.0 ================ | + // | DET5 | + // -0.1 =========== + // 0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9 1.0 1.1 1.2 + + // Detection det_0. + det_0 = DetectionWithRelativeLocationData(/*xmin=*/0.1, /*ymin=*/0.1, + /*width=*/0.2, /*height=*/0.2); + det_0.set_detection_id(0); + + // Detection det_1. + det_1 = DetectionWithRelativeLocationData(/*xmin=*/0.3, /*ymin=*/0.1, + /*width=*/0.2, /*height=*/0.2); + det_1.set_detection_id(1); + + // Detection det_2. + det_2 = DetectionWithRelativeLocationData(/*xmin=*/0.9, /*ymin=*/0.2, + /*width=*/0.2, /*height=*/0.2); + det_2.set_detection_id(2); + + // Detection det_3. + det_3 = DetectionWithRelativeLocationData(/*xmin=*/0.2, /*ymin=*/0.0, + /*width=*/0.3, /*height=*/0.3); + det_3.set_detection_id(3); + + // Detection det_4. + det_4 = DetectionWithRelativeLocationData(/*xmin=*/1.0, /*ymin=*/0.2, + /*width=*/0.2, /*height=*/0.2); + det_4.set_detection_id(4); + + // Detection det_5. + det_5 = DetectionWithRelativeLocationData(/*xmin=*/0.3, /*ymin=*/-0.1, + /*width=*/0.3, /*height=*/0.3); + det_5.set_detection_id(5); + } + + ::mediapipe::Detection det_0, det_1, det_2, det_3, det_4, det_5; +}; + +TEST_F(AssociationDetectionCalculatorTest, DetectionAssocTest) { + CalculatorRunner runner(ParseTextProtoOrDie(R"( + calculator: "AssociationDetectionCalculator" + input_stream: "input_vec_0" + input_stream: "input_vec_1" + input_stream: "input_vec_2" + output_stream: "output_vec" + options { + [mediapipe.AssociationCalculatorOptions.ext] { + min_similarity_threshold: 0.1 + } + } + )")); + + // Input Stream 0: det_0, det_1, det_2. + auto input_vec_0 = absl::make_unique>(); + input_vec_0->push_back(det_0); + input_vec_0->push_back(det_1); + input_vec_0->push_back(det_2); + runner.MutableInputs()->Index(0).packets.push_back( + Adopt(input_vec_0.release()).At(Timestamp(1))); + + // Input Stream 1: det_3, det_4. + auto input_vec_1 = absl::make_unique>(); + input_vec_1->push_back(det_3); + input_vec_1->push_back(det_4); + runner.MutableInputs()->Index(1).packets.push_back( + Adopt(input_vec_1.release()).At(Timestamp(1))); + + // Input Stream 2: det_5. + auto input_vec_2 = absl::make_unique>(); + input_vec_2->push_back(det_5); + runner.MutableInputs()->Index(2).packets.push_back( + Adopt(input_vec_2.release()).At(Timestamp(1))); + + MP_ASSERT_OK(runner.Run()) << "Calculator execution failed."; + const std::vector& output = runner.Outputs().Index(0).packets; + EXPECT_EQ(1, output.size()); + const auto& assoc_rects = + output[0].Get>(); + + // det_3 overlaps with det_0, det_1 and det_5 overlaps with det_3. Since det_5 + // is in the highest priority, we remove other rects. det_4 overlaps with + // det_2, and det_4 is higher priority, so we keep it. The final output + // therefore contains 2 elements. + EXPECT_EQ(2, assoc_rects.size()); + // Outputs are in order of inputs, so det_4 is before det_5 in output vector. + + // det_4 overlaps with det_2, so new id for det_4 is 2. + EXPECT_TRUE(assoc_rects[0].has_detection_id()); + EXPECT_EQ(2, assoc_rects[0].detection_id()); + det_4.set_detection_id(2); + EXPECT_THAT(assoc_rects[0], EqualsProto(det_4)); + + // det_3 overlaps with det_0, so new id for det_3 is 0. + // det_3 overlaps with det_1, so new id for det_3 is 1. + // det_5 overlaps with det_3, so new id for det_5 is 1. + EXPECT_TRUE(assoc_rects[1].has_detection_id()); + EXPECT_EQ(1, assoc_rects[1].detection_id()); + det_5.set_detection_id(1); + EXPECT_THAT(assoc_rects[1], EqualsProto(det_5)); +} + +TEST_F(AssociationDetectionCalculatorTest, DetectionAssocTestWithPrev) { + CalculatorRunner runner(ParseTextProtoOrDie(R"( + calculator: "AssociationDetectionCalculator" + input_stream: "PREV:input_vec_0" + input_stream: "input_vec_1" + output_stream: "output_vec" + options { + [mediapipe.AssociationCalculatorOptions.ext] { + min_similarity_threshold: 0.1 + } + } + )")); + + // Input Stream 0: det_3, det_4. + auto input_vec_0 = absl::make_unique>(); + input_vec_0->push_back(det_3); + input_vec_0->push_back(det_4); + CollectionItemId prev_input_stream_id = + runner.MutableInputs()->GetId("PREV", 0); + runner.MutableInputs() + ->Get(prev_input_stream_id) + .packets.push_back(Adopt(input_vec_0.release()).At(Timestamp(1))); + + // Input Stream 1: det_5. + auto input_vec_1 = absl::make_unique>(); + input_vec_1->push_back(det_5); + CollectionItemId input_stream_id = runner.MutableInputs()->GetId("", 0); + runner.MutableInputs() + ->Get(input_stream_id) + .packets.push_back(Adopt(input_vec_1.release()).At(Timestamp(1))); + + MP_ASSERT_OK(runner.Run()) << "Calculator execution failed."; + const std::vector& output = runner.Outputs().Index(0).packets; + EXPECT_EQ(1, output.size()); + const auto& assoc_rects = + output[0].Get>(); + + // det_5 overlaps with det_3 and doesn't overlap with det_4. Since det_4 is + // in the PREV input stream, it doesn't get copied to the output, so the final + // output contains 1 element. + EXPECT_EQ(1, assoc_rects.size()); + + // det_5 overlaps with det_3, det_3 is in PREV, so new id for det_5 is 3. + EXPECT_TRUE(assoc_rects[0].has_detection_id()); + EXPECT_EQ(3, assoc_rects[0].detection_id()); + det_5.set_detection_id(3); + EXPECT_THAT(assoc_rects[0], EqualsProto(det_5)); +} + +TEST_F(AssociationDetectionCalculatorTest, DetectionAssocTestReverse) { + CalculatorRunner runner(ParseTextProtoOrDie(R"( + calculator: "AssociationDetectionCalculator" + input_stream: "input_vec_0" + input_stream: "input_vec_1" + input_stream: "input_vec_2" + output_stream: "output_vec" + options { + [mediapipe.AssociationCalculatorOptions.ext] { + min_similarity_threshold: 0.1 + } + } + )")); + + // Input Stream 0: det_5. + auto input_vec_0 = absl::make_unique>(); + input_vec_0->push_back(det_5); + runner.MutableInputs()->Index(0).packets.push_back( + Adopt(input_vec_0.release()).At(Timestamp(1))); + + // Input Stream 1: det_3, det_4. + auto input_vec_1 = absl::make_unique>(); + input_vec_1->push_back(det_3); + input_vec_1->push_back(det_4); + runner.MutableInputs()->Index(1).packets.push_back( + Adopt(input_vec_1.release()).At(Timestamp(1))); + + // Input Stream 2: det_0, det_1, det_2. + auto input_vec_2 = absl::make_unique>(); + input_vec_2->push_back(det_0); + input_vec_2->push_back(det_1); + input_vec_2->push_back(det_2); + runner.MutableInputs()->Index(2).packets.push_back( + Adopt(input_vec_2.release()).At(Timestamp(1))); + + MP_ASSERT_OK(runner.Run()) << "Calculator execution failed."; + const std::vector& output = runner.Outputs().Index(0).packets; + EXPECT_EQ(1, output.size()); + const auto& assoc_rects = + output[0].Get>(); + + // det_3 overlaps with det_5, so det_5 is removed. det_0 overlaps with det_3, + // so det_3 is removed as det_0 is in higher priority for keeping. det_2 + // overlaps with det_4 so det_4 is removed as det_2 is higher priority for + // keeping. The final output therefore contains 3 elements. + EXPECT_EQ(3, assoc_rects.size()); + // Outputs are in same order as inputs. + + // det_3 overlaps with det_5, so new id for det_3 is 5. + // det_0 overlaps with det_3, so new id for det_0 is 5. + EXPECT_TRUE(assoc_rects[0].has_detection_id()); + EXPECT_EQ(5, assoc_rects[0].detection_id()); + det_0.set_detection_id(5); + EXPECT_THAT(assoc_rects[0], EqualsProto(det_0)); + + // det_1 stays with id 1. + EXPECT_TRUE(assoc_rects[1].has_detection_id()); + EXPECT_EQ(1, assoc_rects[1].detection_id()); + EXPECT_THAT(assoc_rects[1], EqualsProto(det_1)); + + // det_2 overlaps with det_4, so new id for det_2 is 4. + EXPECT_TRUE(assoc_rects[2].has_detection_id()); + EXPECT_EQ(4, assoc_rects[2].detection_id()); + det_2.set_detection_id(4); + EXPECT_THAT(assoc_rects[2], EqualsProto(det_2)); +} + +class AssociationNormRectCalculatorTest : public ::testing::Test { + protected: + AssociationNormRectCalculatorTest() { + // 0.4 ================ + // | | | | + // 0.3 ===================== | NR2 | | + // | | | NR1 | | | NR4 | + // 0.2 | NR0 | =========== ================ + // | | | | | | + // 0.1 =====|=============== | + // | NR3 | | | + // 0.0 ================ | + // | NR5 | + // -0.1 =========== + // 0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9 1.0 1.1 1.2 + + // NormalizedRect nr_0. + nr_0.set_x_center(0.2); + nr_0.set_y_center(0.2); + nr_0.set_width(0.2); + nr_0.set_height(0.2); + + // NormalizedRect nr_1. + nr_1.set_x_center(0.4); + nr_1.set_y_center(0.2); + nr_1.set_width(0.2); + nr_1.set_height(0.2); + + // NormalizedRect nr_2. + nr_2.set_x_center(1.0); + nr_2.set_y_center(0.3); + nr_2.set_width(0.2); + nr_2.set_height(0.2); + + // NormalizedRect nr_3. + nr_3.set_x_center(0.35); + nr_3.set_y_center(0.15); + nr_3.set_width(0.3); + nr_3.set_height(0.3); + + // NormalizedRect nr_4. + nr_4.set_x_center(1.1); + nr_4.set_y_center(0.3); + nr_4.set_width(0.2); + nr_4.set_height(0.2); + + // NormalizedRect nr_5. + nr_5.set_x_center(0.45); + nr_5.set_y_center(0.05); + nr_5.set_width(0.3); + nr_5.set_height(0.3); + } + + ::mediapipe::NormalizedRect nr_0, nr_1, nr_2, nr_3, nr_4, nr_5; +}; + +TEST_F(AssociationNormRectCalculatorTest, NormRectAssocTest) { + CalculatorRunner runner(ParseTextProtoOrDie(R"( + calculator: "AssociationNormRectCalculator" + input_stream: "input_vec_0" + input_stream: "input_vec_1" + input_stream: "input_vec_2" + output_stream: "output_vec" + options { + [mediapipe.AssociationCalculatorOptions.ext] { + min_similarity_threshold: 0.1 + } + } + )")); + + // Input Stream 0: nr_0, nr_1, nr_2. + auto input_vec_0 = + absl::make_unique>(); + input_vec_0->push_back(nr_0); + input_vec_0->push_back(nr_1); + input_vec_0->push_back(nr_2); + runner.MutableInputs()->Index(0).packets.push_back( + Adopt(input_vec_0.release()).At(Timestamp(1))); + + // Input Stream 1: nr_3, nr_4. + auto input_vec_1 = + absl::make_unique>(); + input_vec_1->push_back(nr_3); + input_vec_1->push_back(nr_4); + runner.MutableInputs()->Index(1).packets.push_back( + Adopt(input_vec_1.release()).At(Timestamp(1))); + + // Input Stream 2: nr_5. + auto input_vec_2 = + absl::make_unique>(); + input_vec_2->push_back(nr_5); + runner.MutableInputs()->Index(2).packets.push_back( + Adopt(input_vec_2.release()).At(Timestamp(1))); + + MP_ASSERT_OK(runner.Run()) << "Calculator execution failed."; + const std::vector& output = runner.Outputs().Index(0).packets; + EXPECT_EQ(1, output.size()); + const auto& assoc_rects = + output[0].Get>(); + + // nr_3 overlaps with nr_0, nr_1 and nr_5 overlaps with nr_3. Since nr_5 is + // in the highest priority, we remove other rects. + // nr_4 overlaps with nr_2, and nr_4 is higher priority, so we keep it. + // The final output therefore contains 2 elements. + EXPECT_EQ(2, assoc_rects.size()); + // Outputs are in order of inputs, so nr_4 is before nr_5 in output vector. + EXPECT_THAT(assoc_rects[0], EqualsProto(nr_4)); + EXPECT_THAT(assoc_rects[1], EqualsProto(nr_5)); +} + +TEST_F(AssociationNormRectCalculatorTest, NormRectAssocTestReverse) { + CalculatorRunner runner(ParseTextProtoOrDie(R"( + calculator: "AssociationNormRectCalculator" + input_stream: "input_vec_0" + input_stream: "input_vec_1" + input_stream: "input_vec_2" + output_stream: "output_vec" + options { + [mediapipe.AssociationCalculatorOptions.ext] { + min_similarity_threshold: 0.1 + } + } + )")); + + // Input Stream 0: nr_5. + auto input_vec_0 = + absl::make_unique>(); + input_vec_0->push_back(nr_5); + runner.MutableInputs()->Index(0).packets.push_back( + Adopt(input_vec_0.release()).At(Timestamp(1))); + + // Input Stream 1: nr_3, nr_4. + auto input_vec_1 = + absl::make_unique>(); + input_vec_1->push_back(nr_3); + input_vec_1->push_back(nr_4); + runner.MutableInputs()->Index(1).packets.push_back( + Adopt(input_vec_1.release()).At(Timestamp(1))); + + // Input Stream 2: nr_0, nr_1, nr_2. + auto input_vec_2 = + absl::make_unique>(); + input_vec_2->push_back(nr_0); + input_vec_2->push_back(nr_1); + input_vec_2->push_back(nr_2); + runner.MutableInputs()->Index(2).packets.push_back( + Adopt(input_vec_2.release()).At(Timestamp(1))); + + MP_ASSERT_OK(runner.Run()) << "Calculator execution failed."; + const std::vector& output = runner.Outputs().Index(0).packets; + EXPECT_EQ(1, output.size()); + const auto& assoc_rects = + output[0].Get>(); + + // nr_3 overlaps with nr_5, so nr_5 is removed. nr_0 overlaps with nr_3, so + // nr_3 is removed as nr_0 is in higher priority for keeping. nr_2 overlaps + // with nr_4 so nr_4 is removed as nr_2 is higher priority for keeping. + // The final output therefore contains 3 elements. + EXPECT_EQ(3, assoc_rects.size()); + // Outputs are in same order as inputs. + EXPECT_THAT(assoc_rects[0], EqualsProto(nr_0)); + EXPECT_THAT(assoc_rects[1], EqualsProto(nr_1)); + EXPECT_THAT(assoc_rects[2], EqualsProto(nr_2)); +} + +TEST_F(AssociationNormRectCalculatorTest, NormRectAssocSingleInputStream) { + CalculatorRunner runner(ParseTextProtoOrDie(R"( + calculator: "AssociationNormRectCalculator" + input_stream: "input_vec" + output_stream: "output_vec" + options { + [mediapipe.AssociationCalculatorOptions.ext] { + min_similarity_threshold: 0.1 + } + } + )")); + + // Input Stream : nr_3, nr_5. + auto input_vec = + absl::make_unique>(); + input_vec->push_back(nr_3); + input_vec->push_back(nr_5); + runner.MutableInputs()->Index(0).packets.push_back( + Adopt(input_vec.release()).At(Timestamp(1))); + + MP_ASSERT_OK(runner.Run()) << "Calculator execution failed."; + const std::vector& output = runner.Outputs().Index(0).packets; + EXPECT_EQ(1, output.size()); + const auto& assoc_rects = + output[0].Get>(); + + // nr_5 overlaps with nr_3. Since nr_5 is after nr_3 in the same input stream + // we remove nr_3 and keep nr_5. + // The final output therefore contains 1 elements. + EXPECT_EQ(1, assoc_rects.size()); + EXPECT_THAT(assoc_rects[0], EqualsProto(nr_5)); +} + +} // namespace mediapipe diff --git a/mediapipe/calculators/util/association_detection_calculator.cc b/mediapipe/calculators/util/association_detection_calculator.cc new file mode 100644 index 000000000..125e8c4ff --- /dev/null +++ b/mediapipe/calculators/util/association_detection_calculator.cc @@ -0,0 +1,77 @@ +// Copyright 2019 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "mediapipe/calculators/util/association_calculator.h" +#include "mediapipe/framework/calculator_context.h" +#include "mediapipe/framework/calculator_framework.h" +#include "mediapipe/framework/formats/detection.pb.h" +#include "mediapipe/framework/formats/location.h" +#include "mediapipe/framework/port/rectangle.h" +#include "mediapipe/framework/port/status.h" + +namespace mediapipe { + +// A subclass of AssociationCalculator for Detection. Example: +// node { +// calculator: "AssociationDetectionCalculator" +// input_stream: "PREV:input_vec_0" +// input_stream: "input_vec_1" +// input_stream: "input_vec_2" +// output_stream: "output_vec" +// options { +// [mediapipe.AssociationCalculatorOptions.ext] { +// min_similarity_threshold: 0.1 +// } +// } +class AssociationDetectionCalculator + : public AssociationCalculator<::mediapipe::Detection> { + public: + static ::mediapipe::Status GetContract(CalculatorContract* cc) { + return AssociationCalculator<::mediapipe::Detection>::GetContract(cc); + } + + ::mediapipe::Status Open(CalculatorContext* cc) override { + return AssociationCalculator<::mediapipe::Detection>::Open(cc); + } + + ::mediapipe::Status Process(CalculatorContext* cc) override { + return AssociationCalculator<::mediapipe::Detection>::Process(cc); + } + + ::mediapipe::Status Close(CalculatorContext* cc) override { + return AssociationCalculator<::mediapipe::Detection>::Close(cc); + } + + protected: + ::mediapipe::StatusOr GetRectangle( + const ::mediapipe::Detection& input) override { + if (!input.has_location_data()) { + return ::mediapipe::InternalError("Missing location_data in Detection"); + } + const Location location(input.location_data()); + return location.GetRelativeBBox(); + } + + std::pair GetId(const ::mediapipe::Detection& input) override { + return {input.has_detection_id(), input.detection_id()}; + } + + void SetId(::mediapipe::Detection* input, int id) override { + input->set_detection_id(id); + } +}; + +REGISTER_CALCULATOR(AssociationDetectionCalculator); + +} // namespace mediapipe diff --git a/mediapipe/calculators/util/association_norm_rect_calculator.cc b/mediapipe/calculators/util/association_norm_rect_calculator.cc new file mode 100644 index 000000000..4069eda60 --- /dev/null +++ b/mediapipe/calculators/util/association_norm_rect_calculator.cc @@ -0,0 +1,72 @@ +// Copyright 2019 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "mediapipe/calculators/util/association_calculator.h" +#include "mediapipe/framework/calculator_context.h" +#include "mediapipe/framework/calculator_framework.h" +#include "mediapipe/framework/formats/rect.pb.h" +#include "mediapipe/framework/port/rectangle.h" +#include "mediapipe/framework/port/status.h" + +namespace mediapipe { + +// A subclass of AssociationCalculator for NormalizedRect. Example use case: +// node { +// calculator: "AssociationNormRectCalculator" +// input_stream: "input_vec_0" +// input_stream: "input_vec_1" +// input_stream: "input_vec_2" +// output_stream: "output_vec" +// options { +// [mediapipe.AssociationCalculatorOptions.ext] { +// min_similarity_threshold: 0.1 +// } +// } +class AssociationNormRectCalculator + : public AssociationCalculator<::mediapipe::NormalizedRect> { + public: + static ::mediapipe::Status GetContract(CalculatorContract* cc) { + return AssociationCalculator<::mediapipe::NormalizedRect>::GetContract(cc); + } + + ::mediapipe::Status Open(CalculatorContext* cc) override { + return AssociationCalculator<::mediapipe::NormalizedRect>::Open(cc); + } + + ::mediapipe::Status Process(CalculatorContext* cc) override { + return AssociationCalculator<::mediapipe::NormalizedRect>::Process(cc); + } + + ::mediapipe::Status Close(CalculatorContext* cc) override { + return AssociationCalculator<::mediapipe::NormalizedRect>::Close(cc); + } + + protected: + ::mediapipe::StatusOr GetRectangle( + const ::mediapipe::NormalizedRect& input) override { + if (!input.has_x_center() || !input.has_y_center() || !input.has_width() || + !input.has_height()) { + return ::mediapipe::InternalError( + "Missing dimensions in NormalizedRect."); + } + const float xmin = input.x_center() - input.width() / 2.0; + const float ymin = input.y_center() - input.height() / 2.0; + // TODO: Support rotation for rectangle. + return Rectangle_f(xmin, ymin, input.width(), input.height()); + } +}; + +REGISTER_CALCULATOR(AssociationNormRectCalculator); + +} // namespace mediapipe diff --git a/mediapipe/calculators/util/collection_has_min_size_calculator.cc b/mediapipe/calculators/util/collection_has_min_size_calculator.cc new file mode 100644 index 000000000..5ff43c605 --- /dev/null +++ b/mediapipe/calculators/util/collection_has_min_size_calculator.cc @@ -0,0 +1,26 @@ + +// Copyright 2019 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "mediapipe/calculators/util/collection_has_min_size_calculator.h" + +#include "mediapipe/framework/formats/rect.pb.h" + +namespace mediapipe { + +typedef CollectionHasMinSizeCalculator> + NormalizedRectVectorHasMinSizeCalculator; +REGISTER_CALCULATOR(NormalizedRectVectorHasMinSizeCalculator); + +} // namespace mediapipe diff --git a/mediapipe/calculators/util/collection_has_min_size_calculator.h b/mediapipe/calculators/util/collection_has_min_size_calculator.h new file mode 100644 index 000000000..d8f0a2828 --- /dev/null +++ b/mediapipe/calculators/util/collection_has_min_size_calculator.h @@ -0,0 +1,84 @@ +// Copyright 2019 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef MEDIAPIPE_CALCULATORS_UTIL_COLLECTION_HAS_MIN_SIZE_CALCULATOR_H_ +#define MEDIAPIPE_CALCULATORS_UTIL_COLLECTION_HAS_MIN_SIZE_CALCULATOR_H_ + +#include + +#include "mediapipe/calculators/util/collection_has_min_size_calculator.pb.h" +#include "mediapipe/framework/calculator_framework.h" +#include "mediapipe/framework/port/canonical_errors.h" +#include "mediapipe/framework/port/ret_check.h" +#include "mediapipe/framework/port/status.h" + +namespace mediapipe { + +// Deterimines if an input iterable collection has a minimum size, specified +// in CollectionHasMinSizeCalculatorOptions. Example usage: +// node { +// calculator: "IntVectorHasMinSizeCalculator" +// input_stream: "ITERABLE:input_int_vector" +// output_stream: "has_min_ints" +// options { +// [mediapipe.CollectionHasMinSizeCalculatorOptions.ext] { +// min_size: 2 +// } +// } +// } +template +class CollectionHasMinSizeCalculator : public CalculatorBase { + public: + static ::mediapipe::Status GetContract(CalculatorContract* cc) { + RET_CHECK(cc->Inputs().HasTag("ITERABLE")); + RET_CHECK_EQ(1, cc->Inputs().NumEntries()); + + RET_CHECK_EQ(1, cc->Outputs().NumEntries()); + + RET_CHECK_GE( + cc->Options<::mediapipe::CollectionHasMinSizeCalculatorOptions>() + .min_size(), + 0); + + cc->Inputs().Tag("ITERABLE").Set(); + cc->Outputs().Index(0).Set(); + + return ::mediapipe::OkStatus(); + } + + ::mediapipe::Status Open(CalculatorContext* cc) override { + cc->SetOffset(TimestampDiff(0)); + min_size_ = + cc->Options<::mediapipe::CollectionHasMinSizeCalculatorOptions>() + .min_size(); + return ::mediapipe::OkStatus(); + } + + ::mediapipe::Status Process(CalculatorContext* cc) override { + const IterableT& input = cc->Inputs().Tag("ITERABLE").Get(); + bool has_min_size = input.size() >= min_size_; + + cc->Outputs().Index(0).AddPacket( + MakePacket(has_min_size).At(cc->InputTimestamp())); + + return ::mediapipe::OkStatus(); + } + + private: + int min_size_ = 0; +}; + +} // namespace mediapipe + +#endif // MEDIAPIPE_CALCULATORS_UTIL_COLLECTION_HAS_MIN_SIZE_CALCULATOR_H_ diff --git a/mediapipe/calculators/util/collection_has_min_size_calculator.proto b/mediapipe/calculators/util/collection_has_min_size_calculator.proto new file mode 100644 index 000000000..f482277c9 --- /dev/null +++ b/mediapipe/calculators/util/collection_has_min_size_calculator.proto @@ -0,0 +1,29 @@ +// Copyright 2019 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +syntax = "proto2"; + +package mediapipe; + +import "mediapipe/framework/calculator.proto"; + +message CollectionHasMinSizeCalculatorOptions { + extend CalculatorOptions { + optional CollectionHasMinSizeCalculatorOptions ext = 259397840; + } + + // The minimum size an input iterable collection should have for the + // calculator to output true. + optional int32 min_size = 1 [default = 0]; +} diff --git a/mediapipe/calculators/util/detection_label_id_to_text_calculator.cc b/mediapipe/calculators/util/detection_label_id_to_text_calculator.cc index 0fb2d30b8..7e8beadf1 100644 --- a/mediapipe/calculators/util/detection_label_id_to_text_calculator.cc +++ b/mediapipe/calculators/util/detection_label_id_to_text_calculator.cc @@ -19,8 +19,8 @@ #include "mediapipe/framework/port/status.h" #include "mediapipe/util/resource_util.h" -#if defined(MEDIAPIPE_LITE) || defined(__ANDROID__) || \ - (defined(__APPLE__) && !TARGET_OS_OSX) +#if defined(MEDIAPIPE_LITE) || defined(__EMSCRIPTEN__) || \ + defined(__ANDROID__) || (defined(__APPLE__) && !TARGET_OS_OSX) #include "mediapipe/util/android/file/base/file.h" #include "mediapipe/util/android/file/base/helpers.h" #else diff --git a/mediapipe/calculators/util/detections_to_rects_calculator.cc b/mediapipe/calculators/util/detections_to_rects_calculator.cc index bb5ba6d4d..91a400ca1 100644 --- a/mediapipe/calculators/util/detections_to_rects_calculator.cc +++ b/mediapipe/calculators/util/detections_to_rects_calculator.cc @@ -11,6 +11,8 @@ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. +#include "mediapipe/calculators/util/detections_to_rects_calculator.h" + #include #include "mediapipe/calculators/util/detections_to_rects_calculator.pb.h" @@ -24,8 +26,6 @@ namespace mediapipe { -using mediapipe::DetectionsToRectsCalculatorOptions; - namespace { constexpr char kDetectionTag[] = "DETECTION"; @@ -36,7 +36,10 @@ constexpr char kNormRectTag[] = "NORM_RECT"; constexpr char kRectsTag[] = "RECTS"; constexpr char kNormRectsTag[] = "NORM_RECTS"; -::mediapipe::Status DetectionToRect(const Detection& detection, Rect* rect) { +} // namespace + +::mediapipe::Status DetectionsToRectsCalculator::DetectionToRect( + const Detection& detection, Rect* rect) { const LocationData location_data = detection.location_data(); RET_CHECK(location_data.format() == LocationData::BOUNDING_BOX) << "Only Detection with formats of BOUNDING_BOX can be converted to Rect"; @@ -48,8 +51,8 @@ constexpr char kNormRectsTag[] = "NORM_RECTS"; return ::mediapipe::OkStatus(); } -::mediapipe::Status DetectionToNormalizedRect(const Detection& detection, - NormalizedRect* rect) { +::mediapipe::Status DetectionsToRectsCalculator::DetectionToNormalizedRect( + const Detection& detection, NormalizedRect* rect) { const LocationData location_data = detection.location_data(); RET_CHECK(location_data.format() == LocationData::RELATIVE_BOUNDING_BOX) << "Only Detection with formats of RELATIVE_BOUNDING_BOX can be " @@ -63,79 +66,6 @@ constexpr char kNormRectsTag[] = "NORM_RECTS"; return ::mediapipe::OkStatus(); } -// Wraps around an angle in radians to within -M_PI and M_PI. -inline float NormalizeRadians(float angle) { - return angle - 2 * M_PI * std::floor((angle - (-M_PI)) / (2 * M_PI)); -} - -} // namespace - -// A calculator that converts Detection proto to Rect proto. -// -// Detection is the format for encoding one or more detections in an image. -// The input can be a single Detection or std::vector. The output can -// be either a single Rect or NormalizedRect, or std::vector or -// std::vector. If Rect is used, the LocationData format is -// expected to be BOUNDING_BOX, and if NormalizedRect is used it is expected to -// be RELATIVE_BOUNDING_BOX. -// -// When the input is std::vector and the output is a Rect or -// NormalizedRect, only the first detection is converted. When the input is a -// single Detection and the output is a std::vector or -// std::vector, the output is a vector of size 1. -// -// Inputs: -// -// One of the following: -// DETECTION: A Detection proto. -// DETECTIONS: An std::vector. -// -// IMAGE_SIZE (optional): A std::pair represention image width and -// height. This is required only when rotation needs to be computed (see -// calculator options). -// -// Output: -// One of the following: -// RECT: A Rect proto. -// NORM_RECT: A NormalizedRect proto. -// RECTS: An std::vector. -// NORM_RECTS: An std::vector. -// -// Example config: -// node { -// calculator: "DetectionsToRectsCalculator" -// input_stream: "DETECTIONS:detections" -// input_stream: "IMAGE_SIZE:image_size" -// output_stream: "NORM_RECT:rect" -// options: { -// [mediapipe.DetectionsToRectCalculatorOptions.ext] { -// rotation_vector_start_keypoint_index: 0 -// rotation_vector_end_keypoint_index: 2 -// rotation_vector_target_angle_degrees: 90 -// output_zero_rect_for_empty_detections: true -// } -// } -// } -class DetectionsToRectsCalculator : public CalculatorBase { - public: - static ::mediapipe::Status GetContract(CalculatorContract* cc); - - ::mediapipe::Status Open(CalculatorContext* cc) override; - ::mediapipe::Status Process(CalculatorContext* cc) override; - - private: - float ComputeRotation(const Detection& detection, - const std::pair image_size); - - DetectionsToRectsCalculatorOptions options_; - int start_keypoint_index_; - int end_keypoint_index_; - float target_angle_; // In radians. - bool rotate_; - bool output_zero_rect_for_empty_detections_; -}; -REGISTER_CALCULATOR(DetectionsToRectsCalculator); - ::mediapipe::Status DetectionsToRectsCalculator::GetContract( CalculatorContract* cc) { RET_CHECK(cc->Inputs().HasTag(kDetectionTag) ^ @@ -232,6 +162,13 @@ REGISTER_CALCULATOR(DetectionsToRectsCalculator); .Tag(kNormRectTag) .AddPacket(MakePacket().At(cc->InputTimestamp())); } + if (cc->Outputs().HasTag(kNormRectsTag)) { + auto rect_vector = absl::make_unique>(); + rect_vector->emplace_back(NormalizedRect()); + cc->Outputs() + .Tag(kNormRectsTag) + .Add(rect_vector.release(), cc->InputTimestamp()); + } } return ::mediapipe::OkStatus(); } @@ -312,4 +249,6 @@ float DetectionsToRectsCalculator::ComputeRotation( return NormalizeRadians(rotation); } +REGISTER_CALCULATOR(DetectionsToRectsCalculator); + } // namespace mediapipe diff --git a/mediapipe/calculators/util/detections_to_rects_calculator.h b/mediapipe/calculators/util/detections_to_rects_calculator.h new file mode 100644 index 000000000..82b9f7bcc --- /dev/null +++ b/mediapipe/calculators/util/detections_to_rects_calculator.h @@ -0,0 +1,105 @@ +// Copyright 2019 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#ifndef MEDIAPIPE_CALCULATORS_UTIL_DETECTIONS_TO_RECTS_CALCULATOR_H_ +#define MEDIAPIPE_CALCULATORS_UTIL_DETECTIONS_TO_RECTS_CALCULATOR_H_ + +#include + +#include "mediapipe/calculators/util/detections_to_rects_calculator.pb.h" +#include "mediapipe/framework/calculator_framework.h" +#include "mediapipe/framework/calculator_options.pb.h" +#include "mediapipe/framework/formats/detection.pb.h" +#include "mediapipe/framework/formats/location_data.pb.h" +#include "mediapipe/framework/formats/rect.pb.h" +#include "mediapipe/framework/port/ret_check.h" +#include "mediapipe/framework/port/status.h" + +namespace mediapipe { + +// A calculator that converts Detection proto to Rect proto. +// +// Detection is the format for encoding one or more detections in an image. +// The input can be a single Detection or std::vector. The output can +// be either a single Rect or NormalizedRect, or std::vector or +// std::vector. If Rect is used, the LocationData format is +// expected to be BOUNDING_BOX, and if NormalizedRect is used it is expected to +// be RELATIVE_BOUNDING_BOX. +// +// When the input is std::vector and the output is a Rect or +// NormalizedRect, only the first detection is converted. When the input is a +// single Detection and the output is a std::vector or +// std::vector, the output is a vector of size 1. +// +// Inputs: +// +// One of the following: +// DETECTION: A Detection proto. +// DETECTIONS: An std::vector. +// +// IMAGE_SIZE (optional): A std::pair represention image width and +// height. This is required only when rotation needs to be computed (see +// calculator options). +// +// Output: +// One of the following: +// RECT: A Rect proto. +// NORM_RECT: A NormalizedRect proto. +// RECTS: An std::vector. +// NORM_RECTS: An std::vector. +// +// Example config: +// node { +// calculator: "DetectionsToRectsCalculator" +// input_stream: "DETECTIONS:detections" +// input_stream: "IMAGE_SIZE:image_size" +// output_stream: "NORM_RECT:rect" +// options: { +// [mediapipe.DetectionsToRectCalculatorOptions.ext] { +// rotation_vector_start_keypoint_index: 0 +// rotation_vector_end_keypoint_index: 2 +// rotation_vector_target_angle_degrees: 90 +// output_zero_rect_for_empty_detections: true +// } +// } +// } +class DetectionsToRectsCalculator : public CalculatorBase { + public: + static ::mediapipe::Status GetContract(CalculatorContract* cc); + + ::mediapipe::Status Open(CalculatorContext* cc) override; + ::mediapipe::Status Process(CalculatorContext* cc) override; + + protected: + virtual float ComputeRotation(const ::mediapipe::Detection& detection, + const std::pair image_size); + virtual ::mediapipe::Status DetectionToRect( + const ::mediapipe::Detection& detection, ::mediapipe::Rect* rect); + virtual ::mediapipe::Status DetectionToNormalizedRect( + const ::mediapipe::Detection& detection, + ::mediapipe::NormalizedRect* rect); + + static inline float NormalizeRadians(float angle) { + return angle - 2 * M_PI * std::floor((angle - (-M_PI)) / (2 * M_PI)); + } + + ::mediapipe::DetectionsToRectsCalculatorOptions options_; + int start_keypoint_index_; + int end_keypoint_index_; + float target_angle_ = 0.0f; // In radians. + bool rotate_; + bool output_zero_rect_for_empty_detections_; +}; + +} // namespace mediapipe +#endif // MEDIAPIPE_CALCULATORS_UTIL_DETECTIONS_TO_RECTS_CALCULATOR_H_ diff --git a/mediapipe/calculators/util/filter_collection_calculator.cc b/mediapipe/calculators/util/filter_collection_calculator.cc new file mode 100644 index 000000000..f86de04f0 --- /dev/null +++ b/mediapipe/calculators/util/filter_collection_calculator.cc @@ -0,0 +1,34 @@ + +// Copyright 2019 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "mediapipe/calculators/util/filter_collection_calculator.h" + +#include + +#include "mediapipe/framework/formats/landmark.pb.h" +#include "mediapipe/framework/formats/rect.pb.h" + +namespace mediapipe { + +typedef FilterCollectionCalculator> + FilterNormalizedRectCollectionCalculator; +REGISTER_CALCULATOR(FilterNormalizedRectCollectionCalculator); + +typedef FilterCollectionCalculator< + std::vector>> + FilterLandmarksCollectionCalculator; +REGISTER_CALCULATOR(FilterLandmarksCollectionCalculator); + +} // namespace mediapipe diff --git a/mediapipe/calculators/util/filter_collection_calculator.h b/mediapipe/calculators/util/filter_collection_calculator.h new file mode 100644 index 000000000..5f08dd982 --- /dev/null +++ b/mediapipe/calculators/util/filter_collection_calculator.h @@ -0,0 +1,109 @@ +// Copyright 2019 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef MEDIAPIPE_CALCULATORS_UTIL_FILTER_VECTOR_CALCULATOR_H_ +#define MEDIAPIPE_CALCULATORS_UTIL_FILTER_VECTOR_CALCULATOR_H_ + +#include + +#include "absl/strings/str_cat.h" +#include "mediapipe/framework/calculator_framework.h" +#include "mediapipe/framework/port/canonical_errors.h" +#include "mediapipe/framework/port/ret_check.h" +#include "mediapipe/framework/port/status.h" + +namespace mediapipe { + +// A calculator that gates elements of an input collection based on +// corresponding boolean values of the "CONDITION" vector. If there is no input +// collection or "CONDITION" vector, the calculator forwards timestamp bounds +// for downstream calculators. If the "CONDITION" vector has false values for +// all elements of the input collection, the calculator outputs a packet +// containing an empty collection. +// Example usage: +// node { +// calculator: "FilterCollectionCalculator" +// input_stream: "ITERABLE:input_collection" +// input_stream: "CONDITION:condition_vector" +// output_stream: "ITERABLE:output_collection" +// } +// This calculator is able to handle collections of copyable types T. +template +class FilterCollectionCalculator : public CalculatorBase { + public: + static ::mediapipe::Status GetContract(CalculatorContract* cc) { + RET_CHECK(cc->Inputs().HasTag("ITERABLE")); + RET_CHECK(cc->Inputs().HasTag("CONDITION")); + RET_CHECK(cc->Outputs().HasTag("ITERABLE")); + + cc->Inputs().Tag("ITERABLE").Set(); + cc->Inputs().Tag("CONDITION").Set>(); + + cc->Outputs().Tag("ITERABLE").Set(); + + return ::mediapipe::OkStatus(); + } + + ::mediapipe::Status Open(CalculatorContext* cc) override { + cc->SetOffset(TimestampDiff(0)); + return ::mediapipe::OkStatus(); + } + + ::mediapipe::Status Process(CalculatorContext* cc) override { + if (cc->Inputs().Tag("ITERABLE").IsEmpty()) { + return ::mediapipe::OkStatus(); + } + if (cc->Inputs().Tag("CONDITION").IsEmpty()) { + return ::mediapipe::OkStatus(); + } + + const std::vector& filter_by = + cc->Inputs().Tag("CONDITION").Get>(); + + return FilterCollection( + std::is_copy_constructible(), cc, + filter_by); + } + + template + ::mediapipe::Status FilterCollection(std::true_type, CalculatorContext* cc, + const std::vector& filter_by) { + const IterableU& input = cc->Inputs().Tag("ITERABLE").Get(); + if (input.size() != filter_by.size()) { + return ::mediapipe::InternalError(absl::StrCat( + "Input vector size: ", input.size(), + " doesn't mach condition vector size: ", filter_by.size())); + } + + auto output = absl::make_unique(); + for (int i = 0; i < input.size(); ++i) { + if (filter_by[i]) { + output->push_back(input[i]); + } + } + cc->Outputs().Tag("ITERABLE").Add(output.release(), cc->InputTimestamp()); + return ::mediapipe::OkStatus(); + } + + template + ::mediapipe::Status FilterCollection(std::false_type, CalculatorContext* cc, + const std::vector& filter_by) { + return ::mediapipe::InternalError( + "Cannot copy input collection to filter it."); + } +}; + +} // namespace mediapipe + +#endif // MEDIAPIPE_CALCULATORS_UTIL_FILTER_VECTOR_CALCULATOR_H_ diff --git a/mediapipe/calculators/util/labels_to_render_data_calculator.cc b/mediapipe/calculators/util/labels_to_render_data_calculator.cc new file mode 100644 index 000000000..a7f517291 --- /dev/null +++ b/mediapipe/calculators/util/labels_to_render_data_calculator.cc @@ -0,0 +1,181 @@ +// Copyright 2019 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include + +#include +#include +#include +#include + +#include "absl/strings/str_cat.h" +#include "mediapipe/calculators/util/labels_to_render_data_calculator.pb.h" +#include "mediapipe/framework/calculator_framework.h" +#include "mediapipe/framework/formats/classification.pb.h" +#include "mediapipe/framework/formats/video_stream_header.h" +#include "mediapipe/framework/port/ret_check.h" +#include "mediapipe/framework/port/status.h" +#include "mediapipe/framework/port/statusor.h" +#include "mediapipe/util/color.pb.h" +#include "mediapipe/util/render_data.pb.h" + +namespace mediapipe { + +constexpr float kFontHeightScale = 1.25f; + +// A calculator takes in pairs of labels and scores or classifications, outputs +// generates render data. Either both "LABELS" and "SCORES" or "CLASSIFICATIONS" +// must be present. +// +// Usage example: +// node { +// calculator: "LabelsToRenderDataCalculator" +// input_stream: "LABELS:labels" +// input_stream: "SCORES:scores" +// output_stream: "VIDEO_PRESTREAM:video_header" +// options { +// [LabelsToRenderDataCalculatorOptions.ext] { +// color { r: 255 g: 0 b: 0 } +// color { r: 0 g: 255 b: 0 } +// color { r: 0 g: 0 b: 255 } +// thickness: 2.0 +// font_height_px: 20 +// max_num_labels: 3 +// font_face: 1 +// location: TOP_LEFT +// } +// } +// } +class LabelsToRenderDataCalculator : public CalculatorBase { + public: + static ::mediapipe::Status GetContract(CalculatorContract* cc); + ::mediapipe::Status Open(CalculatorContext* cc) override; + ::mediapipe::Status Process(CalculatorContext* cc) override; + + private: + LabelsToRenderDataCalculatorOptions options_; + int num_colors_ = 0; + int video_width_ = 0; + int video_height_ = 0; + int label_height_px_ = 0; + int label_left_px_ = 0; +}; +REGISTER_CALCULATOR(LabelsToRenderDataCalculator); + +::mediapipe::Status LabelsToRenderDataCalculator::GetContract( + CalculatorContract* cc) { + if (cc->Inputs().HasTag("CLASSIFICATIONS")) { + cc->Inputs().Tag("CLASSIFICATIONS").Set(); + } else { + RET_CHECK(cc->Inputs().HasTag("LABELS")) + << "Must provide input stream \"LABELS\""; + cc->Inputs().Tag("LABELS").Set>(); + if (cc->Inputs().HasTag("SCORES")) { + cc->Inputs().Tag("SCORES").Set>(); + } + } + if (cc->Inputs().HasTag("VIDEO_PRESTREAM")) { + cc->Inputs().Tag("VIDEO_PRESTREAM").Set(); + } + cc->Outputs().Tag("RENDER_DATA").Set(); + return ::mediapipe::OkStatus(); +} + +::mediapipe::Status LabelsToRenderDataCalculator::Open(CalculatorContext* cc) { + options_ = cc->Options(); + num_colors_ = options_.color_size(); + label_height_px_ = std::ceil(options_.font_height_px() * kFontHeightScale); + return ::mediapipe::OkStatus(); +} + +::mediapipe::Status LabelsToRenderDataCalculator::Process( + CalculatorContext* cc) { + if (cc->Inputs().HasTag("VIDEO_PRESTREAM") && + cc->InputTimestamp() == Timestamp::PreStream()) { + const VideoHeader& video_header = + cc->Inputs().Tag("VIDEO_PRESTREAM").Get(); + video_width_ = video_header.width; + video_height_ = video_header.height; + return ::mediapipe::OkStatus(); + } else { + CHECK_EQ(options_.location(), LabelsToRenderDataCalculatorOptions::TOP_LEFT) + << "Only TOP_LEFT is supported without VIDEO_PRESTREAM."; + } + + std::vector labels; + std::vector scores; + if (cc->Inputs().HasTag("CLASSIFICATIONS")) { + const ClassificationList& classifications = + cc->Inputs().Tag("CLASSIFICATIONS").Get(); + labels.resize(classifications.classification_size()); + scores.resize(classifications.classification_size()); + for (int i = 0; i < classifications.classification_size(); ++i) { + labels[i] = classifications.classification(i).label(); + scores[i] = classifications.classification(i).score(); + } + } else { + const std::vector& label_vector = + cc->Inputs().Tag("LABELS").Get>(); + std::vector score_vector; + if (cc->Inputs().HasTag("SCORES")) { + score_vector = cc->Inputs().Tag("SCORES").Get>(); + } + CHECK_EQ(label_vector.size(), score_vector.size()); + labels.resize(label_vector.size()); + scores.resize(label_vector.size()); + for (int i = 0; i < label_vector.size(); ++i) { + labels[i] = label_vector[i]; + scores[i] = score_vector[i]; + } + } + + RenderData render_data; + int num_label = std::min((int)labels.size(), options_.max_num_labels()); + int label_baseline_px = options_.vertical_offset_px(); + if (options_.location() == LabelsToRenderDataCalculatorOptions::TOP_LEFT) { + label_baseline_px += label_height_px_; + } else if (options_.location() == + LabelsToRenderDataCalculatorOptions::BOTTOM_LEFT) { + label_baseline_px += video_height_ - label_height_px_ * (num_label - 1); + } + label_left_px_ = options_.horizontal_offset_px(); + for (int i = 0; i < num_label; ++i) { + auto* label_annotation = render_data.add_render_annotations(); + label_annotation->set_thickness(options_.thickness()); + if (num_colors_ > 0) { + *(label_annotation->mutable_color()) = options_.color(i % num_colors_); + } else { + label_annotation->mutable_color()->set_r(255); + label_annotation->mutable_color()->set_g(0); + label_annotation->mutable_color()->set_b(0); + } + + auto* text = label_annotation->mutable_text(); + std::string display_text = labels[i]; + if (cc->Inputs().HasTag("SCORES")) { + absl::StrAppend(&display_text, ":", scores[i]); + } + text->set_display_text(display_text); + text->set_font_height(options_.font_height_px()); + text->set_left(label_left_px_); + text->set_baseline(label_baseline_px + i * label_height_px_); + text->set_font_face(options_.font_face()); + } + cc->Outputs() + .Tag("RENDER_DATA") + .AddPacket(MakePacket(render_data).At(cc->InputTimestamp())); + + return ::mediapipe::OkStatus(); +} +} // namespace mediapipe diff --git a/mediapipe/calculators/util/labels_to_render_data_calculator.proto b/mediapipe/calculators/util/labels_to_render_data_calculator.proto new file mode 100644 index 000000000..cd98934a5 --- /dev/null +++ b/mediapipe/calculators/util/labels_to_render_data_calculator.proto @@ -0,0 +1,62 @@ +// Copyright 2019 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +syntax = "proto2"; + +package mediapipe; + +import "mediapipe/framework/calculator.proto"; +import "mediapipe/util/color.proto"; + +message LabelsToRenderDataCalculatorOptions { + extend CalculatorOptions { + optional LabelsToRenderDataCalculatorOptions ext = 271660364; + } + + // Colors for drawing the label(s). + repeated Color color = 1; + + // Thickness for drawing the label(s). + optional double thickness = 2 [default = 2]; + + // The font height in absolute pixels. + optional int32 font_height_px = 3 [default = 50]; + + // The offset of the starting text in horizontal direction in absolute pixels. + optional int32 horizontal_offset_px = 7 [default = 0]; + // The offset of the starting text in vertical direction in absolute pixels. + optional int32 vertical_offset_px = 8 [default = 0]; + + // The maximum number of labels to display. + optional int32 max_num_labels = 4 [default = 1]; + + // Specifies the font for the text. Font must be one of the following from + // OpenCV: + // cv::FONT_HERSHEY_SIMPLEX (0) + // cv::FONT_HERSHEY_PLAIN (1) + // cv::FONT_HERSHEY_DUPLEX (2) + // cv::FONT_HERSHEY_COMPLEX (3) + // cv::FONT_HERSHEY_TRIPLEX (4) + // cv::FONT_HERSHEY_COMPLEX_SMALL (5) + // cv::FONT_HERSHEY_SCRIPT_SIMPLEX (6) + // cv::FONT_HERSHEY_SCRIPT_COMPLEX (7) + optional int32 font_face = 5 [default = 0]; + + // Label location. + enum Location { + TOP_LEFT = 0; + BOTTOM_LEFT = 1; + } + optional Location location = 6 [default = TOP_LEFT]; +} diff --git a/mediapipe/calculators/util/landmarks_to_floats_calculator.cc b/mediapipe/calculators/util/landmarks_to_floats_calculator.cc new file mode 100644 index 000000000..09ab4b575 --- /dev/null +++ b/mediapipe/calculators/util/landmarks_to_floats_calculator.cc @@ -0,0 +1,138 @@ +// Copyright 2019 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Copyright 2019 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include + +#include "Eigen/Core" +#include "mediapipe/calculators/util/landmarks_to_floats_calculator.pb.h" +#include "mediapipe/framework/calculator_framework.h" +#include "mediapipe/framework/formats/landmark.pb.h" +#include "mediapipe/framework/formats/matrix.h" +#include "mediapipe/framework/port/ret_check.h" + +namespace mediapipe { + +namespace { + +constexpr char kLandmarksTag[] = "NORM_LANDMARKS"; +constexpr char kFloatsTag[] = "FLOATS"; +constexpr char kMatrixTag[] = "MATRIX"; + +} // namespace + +// Converts a vector of landmarks to a vector of floats or a matrix. +// Input: +// NORM_LANDMARKS: An std::vector. +// +// Output: +// FLOATS(optional): A vector of floats from flattened landmarks. +// MATRIX(optional): A matrix of floats of the landmarks. +// +// Usage example: +// node { +// calculator: "LandmarksToFloatsCalculator" +// input_stream: "NORM_LANDMARKS:landmarks" +// output_stream: "MATRIX:landmark_matrix" +// } +class LandmarksToFloatsCalculator : public CalculatorBase { + public: + static ::mediapipe::Status GetContract(CalculatorContract* cc) { + cc->Inputs().Tag(kLandmarksTag).Set>(); + RET_CHECK(cc->Outputs().HasTag(kFloatsTag) || + cc->Outputs().HasTag(kMatrixTag)); + if (cc->Outputs().HasTag(kFloatsTag)) { + cc->Outputs().Tag(kFloatsTag).Set>(); + } + if (cc->Outputs().HasTag(kMatrixTag)) { + cc->Outputs().Tag(kMatrixTag).Set(); + } + + return ::mediapipe::OkStatus(); + } + + ::mediapipe::Status Open(CalculatorContext* cc) override { + cc->SetOffset(TimestampDiff(0)); + const auto& options = + cc->Options<::mediapipe::LandmarksToFloatsCalculatorOptions>(); + num_dimensions_ = options.num_dimensions(); + // Currently number of dimensions must be within [1, 3]. + RET_CHECK_GE(num_dimensions_, 1); + RET_CHECK_LE(num_dimensions_, 3); + return ::mediapipe::OkStatus(); + } + + ::mediapipe::Status Process(CalculatorContext* cc) override { + // Only process if there's input landmarks. + if (cc->Inputs().Tag(kLandmarksTag).IsEmpty()) { + return ::mediapipe::OkStatus(); + } + + const auto& input_landmarks = + cc->Inputs().Tag(kLandmarksTag).Get>(); + + if (cc->Outputs().HasTag(kFloatsTag)) { + auto output_floats = absl::make_unique>(); + for (const auto& landmark : input_landmarks) { + output_floats->emplace_back(landmark.x()); + if (num_dimensions_ > 1) { + output_floats->emplace_back(landmark.y()); + } + if (num_dimensions_ > 2) { + output_floats->emplace_back(landmark.z()); + } + } + + cc->Outputs() + .Tag(kFloatsTag) + .Add(output_floats.release(), cc->InputTimestamp()); + } else { + auto output_matrix = absl::make_unique(); + output_matrix->setZero(num_dimensions_, input_landmarks.size()); + for (int i = 0; i < input_landmarks.size(); ++i) { + (*output_matrix)(0, i) = input_landmarks[i].x(); + if (num_dimensions_ > 1) { + (*output_matrix)(1, i) = input_landmarks[i].y(); + } + if (num_dimensions_ > 2) { + (*output_matrix)(2, i) = input_landmarks[i].z(); + } + } + cc->Outputs() + .Tag(kMatrixTag) + .Add(output_matrix.release(), cc->InputTimestamp()); + } + return ::mediapipe::OkStatus(); + } + + private: + int num_dimensions_ = 0; +}; +REGISTER_CALCULATOR(LandmarksToFloatsCalculator); + +} // namespace mediapipe diff --git a/mediapipe/calculators/util/landmarks_to_floats_calculator.proto b/mediapipe/calculators/util/landmarks_to_floats_calculator.proto new file mode 100644 index 000000000..310251e75 --- /dev/null +++ b/mediapipe/calculators/util/landmarks_to_floats_calculator.proto @@ -0,0 +1,28 @@ +// Copyright 2019 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +syntax = "proto2"; + +package mediapipe; + +import "mediapipe/framework/calculator.proto"; + +message LandmarksToFloatsCalculatorOptions { + extend CalculatorOptions { + optional LandmarksToFloatsCalculatorOptions ext = 274035660; + } + + // Number of dimensions to convert. Must within [1, 3]. + optional int32 num_dimensions = 1 [default = 2]; +} diff --git a/mediapipe/calculators/util/local_file_contents_calculator.cc b/mediapipe/calculators/util/local_file_contents_calculator.cc new file mode 100644 index 000000000..9f8d17724 --- /dev/null +++ b/mediapipe/calculators/util/local_file_contents_calculator.cc @@ -0,0 +1,57 @@ +// Copyright 2019 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include + +#include "mediapipe/framework/calculator_framework.h" +#include "mediapipe/framework/port/file_helpers.h" +#include "mediapipe/framework/port/status.h" + +namespace mediapipe { +// The calculator takes the path to the local file as an input side packet and +// outputs the contents of that file. +// +// Example config: +// node { +// calculator: "LocalFileContentsCalculator" +// input_side_packet: "FILE_PATH:file_path" +// output_side_packet: "CONTENTS:contents" +// } +class LocalFileContentsCalculator : public CalculatorBase { + public: + static ::mediapipe::Status GetContract(CalculatorContract* cc) { + cc->InputSidePackets().Tag("FILE_PATH").Set(); + cc->OutputSidePackets().Tag("CONTENTS").Set(); + return ::mediapipe::OkStatus(); + } + + ::mediapipe::Status Open(CalculatorContext* cc) override { + std::string contents; + MP_RETURN_IF_ERROR(mediapipe::file::GetContents( + cc->InputSidePackets().Tag("FILE_PATH").Get(), &contents)); + cc->OutputSidePackets() + .Tag("CONTENTS") + .Set(MakePacket(std::move(contents))); + return ::mediapipe::OkStatus(); + } + + ::mediapipe::Status Process(CalculatorContext* cc) override { + return ::mediapipe::OkStatus(); + } +}; + +REGISTER_CALCULATOR(LocalFileContentsCalculator); + +} // namespace mediapipe diff --git a/mediapipe/calculators/util/rect_transformation_calculator.cc b/mediapipe/calculators/util/rect_transformation_calculator.cc index 98a7da301..5b42a3499 100644 --- a/mediapipe/calculators/util/rect_transformation_calculator.cc +++ b/mediapipe/calculators/util/rect_transformation_calculator.cc @@ -23,7 +23,9 @@ namespace mediapipe { namespace { constexpr char kNormRectTag[] = "NORM_RECT"; +constexpr char kNormRectsTag[] = "NORM_RECTS"; constexpr char kRectTag[] = "RECT"; +constexpr char kRectsTag[] = "RECTS"; constexpr char kImageSizeTag[] = "IMAGE_SIZE"; // Wraps around an angle in radians to within -M_PI and M_PI. @@ -72,17 +74,31 @@ REGISTER_CALCULATOR(RectTransformationCalculator); ::mediapipe::Status RectTransformationCalculator::GetContract( CalculatorContract* cc) { - RET_CHECK(cc->Inputs().HasTag(kNormRectTag) ^ cc->Inputs().HasTag(kRectTag)); + RET_CHECK_EQ((cc->Inputs().HasTag(kNormRectTag) ? 1 : 0) + + (cc->Inputs().HasTag(kNormRectsTag) ? 1 : 0) + + (cc->Inputs().HasTag(kRectTag) ? 1 : 0) + + (cc->Inputs().HasTag(kRectsTag) ? 1 : 0), + 1); if (cc->Inputs().HasTag(kRectTag)) { cc->Inputs().Tag(kRectTag).Set(); cc->Outputs().Index(0).Set(); } + if (cc->Inputs().HasTag(kRectsTag)) { + cc->Inputs().Tag(kRectsTag).Set>(); + cc->Outputs().Index(0).Set>(); + } if (cc->Inputs().HasTag(kNormRectTag)) { RET_CHECK(cc->Inputs().HasTag(kImageSizeTag)); cc->Inputs().Tag(kNormRectTag).Set(); cc->Inputs().Tag(kImageSizeTag).Set>(); cc->Outputs().Index(0).Set(); } + if (cc->Inputs().HasTag(kNormRectsTag)) { + RET_CHECK(cc->Inputs().HasTag(kImageSizeTag)); + cc->Inputs().Tag(kNormRectsTag).Set>(); + cc->Inputs().Tag(kImageSizeTag).Set>(); + cc->Outputs().Index(0).Set>(); + } return ::mediapipe::OkStatus(); } @@ -105,7 +121,17 @@ REGISTER_CALCULATOR(RectTransformationCalculator); cc->Outputs().Index(0).AddPacket( MakePacket(rect).At(cc->InputTimestamp())); } - + if (cc->Inputs().HasTag(kRectsTag) && + !cc->Inputs().Tag(kRectsTag).IsEmpty()) { + auto rects = cc->Inputs().Tag(kRectsTag).Get>(); + auto output_rects = absl::make_unique>(rects.size()); + for (int i = 0; i < rects.size(); ++i) { + output_rects->at(i) = rects[i]; + auto it = output_rects->begin() + i; + TransformRect(&(*it)); + } + cc->Outputs().Index(0).Add(output_rects.release(), cc->InputTimestamp()); + } if (cc->Inputs().HasTag(kNormRectTag) && !cc->Inputs().Tag(kNormRectTag).IsEmpty()) { auto rect = cc->Inputs().Tag(kNormRectTag).Get(); @@ -115,6 +141,21 @@ REGISTER_CALCULATOR(RectTransformationCalculator); cc->Outputs().Index(0).AddPacket( MakePacket(rect).At(cc->InputTimestamp())); } + if (cc->Inputs().HasTag(kNormRectsTag) && + !cc->Inputs().Tag(kNormRectsTag).IsEmpty()) { + auto rects = + cc->Inputs().Tag(kNormRectsTag).Get>(); + const auto& image_size = + cc->Inputs().Tag(kImageSizeTag).Get>(); + auto output_rects = + absl::make_unique>(rects.size()); + for (int i = 0; i < rects.size(); ++i) { + output_rects->at(i) = rects[i]; + auto it = output_rects->begin() + i; + TransformNormalizedRect(&(*it), image_size.first, image_size.second); + } + cc->Outputs().Index(0).Add(output_rects.release(), cc->InputTimestamp()); + } return ::mediapipe::OkStatus(); } diff --git a/mediapipe/calculators/util/top_k_scores_calculator.cc b/mediapipe/calculators/util/top_k_scores_calculator.cc index 18f2eec62..bc8d30f87 100644 --- a/mediapipe/calculators/util/top_k_scores_calculator.cc +++ b/mediapipe/calculators/util/top_k_scores_calculator.cc @@ -23,13 +23,14 @@ #include "mediapipe/calculators/util/top_k_scores_calculator.pb.h" #include "mediapipe/framework/calculator_framework.h" +#include "mediapipe/framework/formats/classification.pb.h" #include "mediapipe/framework/port/ret_check.h" #include "mediapipe/framework/port/status.h" #include "mediapipe/framework/port/statusor.h" #include "mediapipe/util/resource_util.h" -#if defined(MEDIAPIPE_LITE) || defined(__ANDROID__) || \ - (defined(__APPLE__) && !TARGET_OS_OSX) +#if defined(MEDIAPIPE_LITE) || defined(__EMSCRIPTEN__) || \ + defined(__ANDROID__) || (defined(__APPLE__) && !TARGET_OS_OSX) #include "mediapipe/util/android/file/base/file.h" #include "mediapipe/util/android/file/base/helpers.h" #else @@ -37,8 +38,10 @@ #endif namespace mediapipe { + // A calculator that takes a vector of scores and returns the indexes, scores, -// labels of the top k elements. +// labels of the top k elements, classification protos, and summary std::string +// (in csv format). // // Usage example: // node { @@ -47,6 +50,8 @@ namespace mediapipe { // output_stream: "TOP_K_INDEXES:top_k_indexes" // output_stream: "TOP_K_SCORES:top_k_scores" // output_stream: "TOP_K_LABELS:top_k_labels" +// output_stream: "TOP_K_CLASSIFICATIONS:top_k_classes" +// output_stream: "SUMMARY:summary" // options: { // [mediapipe.TopKScoresCalculatorOptions.ext] { // top_k: 5 @@ -69,6 +74,7 @@ class TopKScoresCalculator : public CalculatorBase { int top_k_ = -1; float threshold_ = 0.0; std::unordered_map label_map_; + bool label_map_loaded_ = false; }; REGISTER_CALCULATOR(TopKScoresCalculator); @@ -84,6 +90,12 @@ REGISTER_CALCULATOR(TopKScoresCalculator); if (cc->Outputs().HasTag("TOP_K_LABELS")) { cc->Outputs().Tag("TOP_K_LABELS").Set>(); } + if (cc->Outputs().HasTag("CLASSIFICATIONS")) { + cc->Outputs().Tag("CLASSIFICATIONS").Set(); + } + if (cc->Outputs().HasTag("SUMMARY")) { + cc->Outputs().Tag("SUMMARY").Set(); + } return ::mediapipe::OkStatus(); } @@ -149,7 +161,7 @@ REGISTER_CALCULATOR(TopKScoresCalculator); reverse(top_k_indexes.begin(), top_k_indexes.end()); reverse(top_k_scores.begin(), top_k_scores.end()); - if (cc->Outputs().HasTag("TOP_K_LABELS")) { + if (label_map_loaded_) { for (int index : top_k_indexes) { top_k_labels.push_back(label_map_[index]); } @@ -172,6 +184,35 @@ REGISTER_CALCULATOR(TopKScoresCalculator); .AddPacket(MakePacket>(top_k_labels) .At(cc->InputTimestamp())); } + + if (cc->Outputs().HasTag("SUMMARY")) { + std::vector results; + for (int index = 0; index < top_k_indexes.size(); ++index) { + if (label_map_loaded_) { + results.push_back( + absl::StrCat(top_k_labels[index], ":", top_k_scores[index])); + } else { + results.push_back( + absl::StrCat(top_k_indexes[index], ":", top_k_scores[index])); + } + } + cc->Outputs().Tag("SUMMARY").AddPacket( + MakePacket(absl::StrJoin(results, ",")) + .At(cc->InputTimestamp())); + } + + if (cc->Outputs().HasTag("TOP_K_CLASSIFICATION")) { + auto classification_list = absl::make_unique(); + for (int index = 0; index < top_k_indexes.size(); ++index) { + Classification* classification = + classification_list->add_classification(); + classification->set_index(top_k_indexes[index]); + classification->set_score(top_k_scores[index]); + if (label_map_loaded_) { + classification->set_label(top_k_labels[index]); + } + } + } return ::mediapipe::OkStatus(); } @@ -188,6 +229,7 @@ REGISTER_CALCULATOR(TopKScoresCalculator); while (std::getline(stream, line)) { label_map_[i++] = line; } + label_map_loaded_ = true; return ::mediapipe::OkStatus(); } diff --git a/mediapipe/calculators/video/BUILD b/mediapipe/calculators/video/BUILD index 63781a8b4..d0f93a104 100644 --- a/mediapipe/calculators/video/BUILD +++ b/mediapipe/calculators/video/BUILD @@ -13,12 +13,16 @@ # limitations under the License. # +load("//mediapipe/framework/port:build_config.bzl", "mediapipe_cc_proto_library") +load( + "//mediapipe/framework/tool:mediapipe_graph.bzl", + "mediapipe_binary_graph", +) + licenses(["notice"]) # Apache 2.0 package(default_visibility = ["//visibility:private"]) -load("//mediapipe/framework/port:build_config.bzl", "mediapipe_cc_proto_library") - proto_library( name = "flow_to_image_calculator_proto", srcs = ["flow_to_image_calculator.proto"], @@ -52,9 +56,7 @@ mediapipe_cc_proto_library( cc_library( name = "flow_to_image_calculator", srcs = ["flow_to_image_calculator.cc"], - visibility = [ - "//visibility:public", - ], + visibility = ["//visibility:public"], deps = [ "//mediapipe/calculators/video:flow_to_image_calculator_cc_proto", "//mediapipe/calculators/video/tool:flow_quantizer_model", @@ -129,10 +131,20 @@ cc_library( alwayslink = 1, ) +filegroup( + name = "test_videos", + srcs = [ + "testdata/format_FLV_H264_AAC.video", + "testdata/format_MKV_VP8_VORBIS.video", + "testdata/format_MP4_AVC720P_AAC.video", + ], + visibility = ["//visibility:public"], +) + cc_test( name = "opencv_video_decoder_calculator_test", srcs = ["opencv_video_decoder_calculator_test.cc"], - data = ["//mediapipe/calculators/video/testdata:test_videos"], + data = [":test_videos"], deps = [ ":opencv_video_decoder_calculator", "//mediapipe/framework:calculator_runner", @@ -151,7 +163,7 @@ cc_test( cc_test( name = "opencv_video_encoder_calculator_test", srcs = ["opencv_video_encoder_calculator_test.cc"], - data = ["//mediapipe/calculators/video/testdata:test_videos"], + data = [":test_videos"], deps = [ ":opencv_video_decoder_calculator", ":opencv_video_encoder_calculator", @@ -175,7 +187,6 @@ cc_test( cc_test( name = "tvl1_optical_flow_calculator_test", srcs = ["tvl1_optical_flow_calculator_test.cc"], - data = ["//mediapipe/calculators/image/testdata:test_images"], deps = [ ":tvl1_optical_flow_calculator", "//mediapipe/framework:calculator_framework", diff --git a/mediapipe/calculators/video/opencv_video_decoder_calculator.cc b/mediapipe/calculators/video/opencv_video_decoder_calculator.cc index b333e3c7e..49c7f0556 100644 --- a/mediapipe/calculators/video/opencv_video_decoder_calculator.cc +++ b/mediapipe/calculators/video/opencv_video_decoder_calculator.cc @@ -123,6 +123,7 @@ class OpenCvVideoDecoderCalculator : public CalculatorBase { cc->Outputs() .Tag("VIDEO_PRESTREAM") .Add(header.release(), Timestamp::PreStream()); + cc->Outputs().Tag("VIDEO_PRESTREAM").Close(); } // Rewind to the very first frame. cap_->set(cv::CAP_PROP_POS_AVI_RATIO, 0); diff --git a/mediapipe/calculators/video/tool/BUILD b/mediapipe/calculators/video/tool/BUILD index 3d561c3a7..96bc35669 100644 --- a/mediapipe/calculators/video/tool/BUILD +++ b/mediapipe/calculators/video/tool/BUILD @@ -13,24 +13,24 @@ # See the License for the specific language governing permissions and # limitations under the License. +load("//mediapipe/framework/port:build_config.bzl", "mediapipe_cc_proto_library") + licenses(["notice"]) # Apache 2.0 package(default_visibility = ["//mediapipe/calculators/video:__subpackages__"]) exports_files(["LICENSE"]) -load("//mediapipe/framework/port:build_config.bzl", "mediapipe_cc_proto_library") - proto_library( name = "flow_quantizer_model_proto", srcs = ["flow_quantizer_model.proto"], - visibility = ["//mediapipe:__subpackages__"], + visibility = ["//visibility:public"], ) mediapipe_cc_proto_library( name = "flow_quantizer_model_cc_proto", srcs = ["flow_quantizer_model.proto"], - visibility = ["//mediapipe:__subpackages__"], + visibility = ["//visibility:public"], deps = [":flow_quantizer_model_proto"], ) diff --git a/mediapipe/docs/android_archive_library.md b/mediapipe/docs/android_archive_library.md new file mode 100644 index 000000000..5351e58f9 --- /dev/null +++ b/mediapipe/docs/android_archive_library.md @@ -0,0 +1,131 @@ +## MediaPipe Android Archive Library + +***Experimental Only*** + +The MediaPipe Android archive library is a convenient way to use MediaPipe with +Android Studio and Gradle. MediaPipe doesn't publish a general AAR that can be +used by all projects. Instead, developers need to add a mediapipe_aar() target +to generate a custom AAR file for their own projects. This is necessary in order +to include specific resources such as MediaPipe calculators needed for each +project. + +### Steps to build a MediaPipe AAR + +1. Create a mediapipe_aar() target. + + In the MediaPipe directory, create a new mediapipe_aar() target in a BUILD + file. You need to figure out what calculators are used in the graph and + provide the calculator dependencies to the mediapipe_aar(). For example, to + build an AAR for [face detection gpu](./face_detection_mobile_gpu.md), you + can put the following code into + mediapipe/examples/android/src/java/com/google/mediapipe/apps/aar_example/BUILD. + + ``` + load("//mediapipe/java/com/google/mediapipe:mediapipe_aar.bzl", "mediapipe_aar") + + mediapipe_aar( + name = "mp_face_detection_aar", + calculators = ["//mediapipe/graphs/face_detection:mobile_calculators"], + ) + ``` + +2. Run the Bazel build command to generate the AAR. + + ```bash + bazel build -c opt --fat_apk_cpu=arm64-v8a,armeabi-v7a //path/to/the/aar/build/file:aar_name + ``` + + For the face detection AAR target we made in the step 1, run: + + ```bash + bazel build -c opt --fat_apk_cpu=arm64-v8a,armeabi-v7a \ + //mediapipe/examples/android/src/java/com/google/mediapipe/apps/aar_example:mp_face_detection_aar + + # It should print: + # Target //mediapipe/examples/android/src/java/com/google/mediapipe/apps/aar_example:mp_face_detection_aar up-to-date: + # bazel-bin/mediapipe/examples/android/src/java/com/google/mediapipe/apps/aar_example/mp_face_detection_aar.aar + ``` + +3. (Optional) Save the AAR to your preferred location. + + ```bash + cp bazel-bin/mediapipe/examples/android/src/java/com/google/mediapipe/apps/aar_example/mp_face_detection_aar.aar + /absolute/path/to/your/preferred/location + ``` + +### Steps to use a MediaPipe AAR in Android Studio with Gradle + +1. Start Android Studio and go to your project. + +2. Copy the AAR into app/libs. + + ```bash + cp bazel-bin/mediapipe/examples/android/src/java/com/google/mediapipe/apps/aar_example/mp_face_detection_aar.aar + /path/to/your/app/libs/ + ``` + + ![Screenshot](images/mobile/aar_location.png) + +3. Make app/src/main/assets and copy assets (graph, model, and etc) into + app/src/main/assets. + + Build the MediaPipe binary graph and copy the assets into + app/src/main/assets, e.g., for the face detection graph, you need to build + and copy + [the binary graph](https://github.com/google/mediapipe/blob/master/mediapipe/examples/android/src/java/com/google/mediapipe/apps/facedetectiongpu/BUILD#L41), + [the tflite model](https://github.com/google/mediapipe/tree/master/mediapipe/models/face_detection_front.tflite), + and + [the label map](https://github.com/google/mediapipe/blob/master/mediapipe/models/face_detection_front_labelmap.txt). + + ```bash + bazel build -c opt mediapipe/examples/android/src/java/com/google/mediapipe/apps/facedetectiongpu:binary_graph + cp bazel-bin/mediapipe/examples/android/src/java/com/google/mediapipe/apps/facedetectiongpu/facedetectiongpu.binarypb /path/to/your/app/src/main/assets/ + cp mediapipe/models/face_detection_front.tflite /path/to/your/app/src/main/assets/ + cp mediapipe/models/face_detection_front_labelmap.txt /path/to/your/app/src/main/assets/ + ``` + + ![Screenshot](images/mobile/assets_location.png) + +4. Make app/src/main/jniLibs and copy OpenCV JNI libraries into + app/src/main/jniLibs. + + MediaPipe depends on OpenCV, you will need to copy the precompiled OpenCV so + files into app/src/main/jniLibs. You can download the official OpenCV + Android SDK from + [here](https://github.com/opencv/opencv/releases/download/4.1.0/opencv-4.1.0-android-sdk.zip) + and run: + + ```bash + cp -R ~/Downloads/OpenCV-android-sdk/sdk/native/libs/arm* /path/to/your/app/src/main/jniLibs/ + ``` + + ![Screenshot](images/mobile/android_studio_opencv_location.png) + +5. Modify app/build.gradle to add MediaPipe dependencies and MediaPipe AAR. + + ``` + dependencies { + implementation fileTree(dir: 'libs', include: ['*.jar', '*.aar']) + implementation 'androidx.appcompat:appcompat:1.0.2' + implementation 'androidx.constraintlayout:constraintlayout:1.1.3' + testImplementation 'junit:junit:4.12' + androidTestImplementation 'androidx.test.ext:junit:1.1.0' + androidTestImplementation 'androidx.test.espresso:espresso-core:3.1.1' + // MediaPipe deps + implementation 'com.google.flogger:flogger:0.3.1' + implementation 'com.google.flogger:flogger-system-backend:0.3.1' + implementation 'com.google.code.findbugs:jsr305:3.0.2' + implementation 'com.google.guava:guava:27.0.1-android' + implementation 'com.google.guava:guava:27.0.1-android' + implementation 'com.google.protobuf:protobuf-lite:3.0.0' + // CameraX core library + def camerax_version = "1.0.0-alpha06" + implementation "androidx.camera:camera-core:$camerax_version" + implementation "androidx.camera:camera-camera2:$camerax_version" + } + ``` + +6. Follow our Android app examples to use MediaPipe in Android Studio for your + use case. If you are looking for an example, a working face detection + example can be found + [here](https://github.com/jiuqiant/mediapipe_aar_example). diff --git a/mediapipe/docs/examples.md b/mediapipe/docs/examples.md index 404024b5f..4e279e935 100644 --- a/mediapipe/docs/examples.md +++ b/mediapipe/docs/examples.md @@ -76,6 +76,15 @@ MediaPipe with a TFLite model for hand tracking in a GPU-accelerated pipeline. * [Android](./hand_tracking_mobile_gpu.md) * [iOS](./hand_tracking_mobile_gpu.md) +### Multi-Hand Tracking with GPU + +[Multi-Hand Tracking with GPU](./multi_hand_tracking_mobile_gpu.md) illustrates +how to use MediaPipe with a TFLite model for multi-hand tracking in a +GPU-accelerated pipeline. + +* [Android](./multi_hand_tracking_mobile_gpu.md) +* [iOS](./multi_hand_tracking_mobile_gpu.md) + ### Hair Segmentation with GPU [Hair Segmentation on GPU](./hair_segmentation_mobile_gpu.md) illustrates how to @@ -96,8 +105,9 @@ using the MediaPipe C++ APIs. ### Feature Extration for YouTube-8M Challenge -[Feature Extration for YouTube-8M Challenge](./youtube_8m.md) shows how to use -MediaPipe to prepare training data for the YouTube-8M Challenge. +[Feature Extration and Model Inference for YouTube-8M Challenge](./youtube_8m.md) +shows how to use MediaPipe to prepare training data for the YouTube-8M Challenge +and do the model inference with the baseline model. ### Preparing Data Sets with MediaSequence @@ -131,6 +141,15 @@ with live video from a webcam. * [Desktop GPU](./hand_tracking_desktop.md) * [Desktop CPU](./hand_tracking_desktop.md) +### Multi-Hand Tracking on Desktop with Webcam + +[Multi-Hand Tracking on Desktop with Webcam](./multi_hand_tracking_desktop.md) +shows how to use MediaPipe with a TFLite model for multi-hand tracking on +desktop using CPU or GPU with live video from a webcam. + +* [Desktop GPU](./multi_hand_tracking_desktop.md) +* [Desktop CPU](./multi_hand_tracking_desktop.md) + ### Hair Segmentation on Desktop with Webcam [Hair Segmentation on Desktop with Webcam](./hair_segmentation_desktop.md) shows diff --git a/mediapipe/docs/face_detection_desktop.md b/mediapipe/docs/face_detection_desktop.md index b95705262..25caa2b79 100644 --- a/mediapipe/docs/face_detection_desktop.md +++ b/mediapipe/docs/face_detection_desktop.md @@ -36,10 +36,9 @@ $ bazel build -c opt --define MEDIAPIPE_DISABLE_GPU=1 \ # INFO: 711 processes: 710 linux-sandbox, 1 local. # INFO: Build completed successfully, 734 total actions -$ export GLOG_logtostderr=1 # This will open up your webcam as long as it is connected and on # Any errors is likely due to your webcam being not accessible -$ bazel-bin/mediapipe/examples/desktop/face_detection/face_detection_cpu \ +$ GLOG_logtostderr=1 bazel-bin/mediapipe/examples/desktop/face_detection/face_detection_cpu \ --calculator_graph_config_file=mediapipe/graphs/face_detection/face_detection_desktop_live.pbtxt ``` @@ -60,11 +59,10 @@ $ bazel build -c opt --copt -DMESA_EGL_NO_X11_HEADERS \ # INFO: 711 processes: 710 linux-sandbox, 1 local. # INFO: Build completed successfully, 734 total actions -$ export GLOG_logtostderr=1 # This will open up your webcam as long as it is connected and on # Any errors is likely due to your webcam being not accessible, # or GPU drivers not setup properly. -$ bazel-bin/mediapipe/examples/desktop/face_detection/face_detection_gpu \ +$ GLOG_logtostderr=1 bazel-bin/mediapipe/examples/desktop/face_detection/face_detection_gpu \ --calculator_graph_config_file=mediapipe/graphs/face_detection/face_detection_mobile_gpu.pbtxt ``` @@ -79,7 +77,7 @@ below and paste it into ```bash # MediaPipe graph that performs face detection with TensorFlow Lite on CPU & GPU. # Used in the examples in -# mediapipie/examples/desktop/face_detection:face_detection_cpu. +# mediapipe/examples/desktop/face_detection:face_detection_cpu. # Images on CPU coming into and out of the graph. input_stream: "input_video" diff --git a/mediapipe/docs/hair_segmentation_desktop.md b/mediapipe/docs/hair_segmentation_desktop.md index 058902363..50f92f8d3 100644 --- a/mediapipe/docs/hair_segmentation_desktop.md +++ b/mediapipe/docs/hair_segmentation_desktop.md @@ -31,15 +31,12 @@ $ bazel build -c opt --copt -DMESA_EGL_NO_X11_HEADERS \ #INFO: Found 1 target... #Target //mediapipe/examples/desktop/hair_segmentation:hair_segmentation_gpu up-to-date: # bazel-bin/mediapipe/examples/desktop/hair_segmentation/hair_segmentation_gpu -#INFO: Elapsed time: 18.209s, Forge stats: 13026/13057 actions cached, 20.8s CPU used, 0.0s queue time, 89.3 MB ObjFS output (novel bytes: 87.4 MB), 0.0 MB local output, Critical Path: 11.88s, Remote (86.01% of the time): [queue: 0.00%, network: 16.83%, setup: 4.59%, process: 38.92%] -#INFO: Streaming build results to: http://sponge2/37d5a184-293b-4e98-a43e-b22084db3142 #INFO: Build completed successfully, 12210 total actions -$ export GLOG_logtostderr=1 # This will open up your webcam as long as it is connected and on # Any errors is likely due to your webcam being not accessible, # or GPU drivers not setup properly. -$ bazel-bin/mediapipe/examples/desktop/hair_segmentation/hair_segmentation_gpu \ +$ GLOG_logtostderr=1 bazel-bin/mediapipe/examples/desktop/hair_segmentation/hair_segmentation_gpu \ --calculator_graph_config_file=mediapipe/graphs/hair_segmentation/hair_segmentation_mobile_gpu.pbtxt ``` @@ -54,7 +51,7 @@ below and paste it into ```bash # MediaPipe graph that performs hair segmentation with TensorFlow Lite on GPU. # Used in the example in -# mediapipie/examples/android/src/java/com/mediapipe/apps/hairsegmentationgpu. +# mediapipe/examples/android/src/java/com/mediapipe/apps/hairsegmentationgpu. # Images on GPU coming into and out of the graph. input_stream: "input_video" diff --git a/mediapipe/docs/hair_segmentation_mobile_gpu.md b/mediapipe/docs/hair_segmentation_mobile_gpu.md index 262d7a0c6..602a27847 100644 --- a/mediapipe/docs/hair_segmentation_mobile_gpu.md +++ b/mediapipe/docs/hair_segmentation_mobile_gpu.md @@ -29,7 +29,7 @@ below and paste it into [MediaPipe Visualizer](https://viz.mediapipe.dev/). ```bash # MediaPipe graph that performs hair segmentation with TensorFlow Lite on GPU. # Used in the example in -# mediapipie/examples/android/src/java/com/mediapipe/apps/hairsegmentationgpu. +# mediapipe/examples/android/src/java/com/mediapipe/apps/hairsegmentationgpu. # Images on GPU coming into and out of the graph. input_stream: "input_video" diff --git a/mediapipe/docs/hand_tracking_desktop.md b/mediapipe/docs/hand_tracking_desktop.md index 6776a4710..f76b1dd99 100644 --- a/mediapipe/docs/hand_tracking_desktop.md +++ b/mediapipe/docs/hand_tracking_desktop.md @@ -31,14 +31,11 @@ $ bazel build -c opt --define MEDIAPIPE_DISABLE_GPU=1 \ # It should print: #Target //mediapipe/examples/desktop/hand_tracking:hand_tracking_cpu up-to-date: # bazel-bin/mediapipe/examples/desktop/hand_tracking/hand_tracking_cpu -#INFO: Elapsed time: 22.645s, Forge stats: 13356/13463 actions cached, 1.5m CPU used, 0.0s queue time, 819.8 MB ObjFS output (novel bytes: 85.6 MB), 0.0 MB local output, Critical Path: 14.43s, Remote (87.25% of the time): [queue: 0.00%, network: 14.88%, setup: 4.80%, process: 39.80%, fetch: 18.15%] -#INFO: Streaming build results to: http://sponge2/360196b9-33ab-44b1-84a7-1022b5043307 #INFO: Build completed successfully, 12517 total actions -$ export GLOG_logtostderr=1 # This will open up your webcam as long as it is connected and on # Any errors is likely due to your webcam being not accessible -$ bazel-bin/mediapipe/examples/desktop/hand_tracking/hand_tracking_cpu \ +$ GLOG_logtostderr=1 bazel-bin/mediapipe/examples/desktop/hand_tracking/hand_tracking_cpu \ --calculator_graph_config_file=mediapipe/graphs/hand_tracking/hand_tracking_desktop_live.pbtxt ``` @@ -55,15 +52,12 @@ $ bazel build -c opt --copt -DMESA_EGL_NO_X11_HEADERS \ # It should print: # Target //mediapipe/examples/desktop/hand_tracking:hand_tracking_gpu up-to-date: # bazel-bin/mediapipe/examples/desktop/hand_tracking/hand_tracking_gpu -#INFO: Elapsed time: 84.055s, Forge stats: 6858/19343 actions cached, 1.6h CPU used, 0.9s queue time, 1.68 GB ObjFS output (novel bytes: 485.1 MB), 0.0 MB local output, Critical Path: 48.14s, Remote (99.40% of the time): [queue: 0.00%, setup: 5.59%, process: 74.44%] -#INFO: Streaming build results to: http://sponge2/00c7f95f-6fbc-432d-8978-f5d361efca3b #INFO: Build completed successfully, 22455 total actions -$ export GLOG_logtostderr=1 # This will open up your webcam as long as it is connected and on # Any errors is likely due to your webcam being not accessible, # or GPU drivers not setup properly. -$ bazel-bin/mediapipe/examples/desktop/hand_tracking/hand_tracking_gpu \ +$ GLOG_logtostderr=1 bazel-bin/mediapipe/examples/desktop/hand_tracking/hand_tracking_gpu \ --calculator_graph_config_file=mediapipe/graphs/hand_tracking/hand_tracking_mobile.pbtxt ``` @@ -79,7 +73,7 @@ below and paste it into # MediaPipe graph that performs hand tracking on desktop with TensorFlow Lite # on CPU & GPU. # Used in the example in -# mediapipie/examples/desktop/hand_tracking:hand_tracking_cpu. +# mediapipe/examples/desktop/hand_tracking:hand_tracking_cpu. # Images coming into and out of the graph. input_stream: "input_video" diff --git a/mediapipe/docs/hand_tracking_mobile_gpu.md b/mediapipe/docs/hand_tracking_mobile_gpu.md index be9cdd264..e097dc9d5 100644 --- a/mediapipe/docs/hand_tracking_mobile_gpu.md +++ b/mediapipe/docs/hand_tracking_mobile_gpu.md @@ -100,8 +100,8 @@ see the Visualizing Subgraphs section in the ```bash # MediaPipe graph that performs hand tracking with TensorFlow Lite on GPU. # Used in the examples in -# mediapipie/examples/android/src/java/com/mediapipe/apps/handtrackinggpu and -# mediapipie/examples/ios/handtrackinggpu. +# mediapipe/examples/android/src/java/com/mediapipe/apps/handtrackinggpu and +# mediapipe/examples/ios/handtrackinggpu. # Images coming into and out of the graph. input_stream: "input_video" diff --git a/mediapipe/docs/images/mobile/aar_location.png b/mediapipe/docs/images/mobile/aar_location.png new file mode 100644 index 000000000..f85e8219e Binary files /dev/null and b/mediapipe/docs/images/mobile/aar_location.png differ diff --git a/mediapipe/docs/images/mobile/android_studio_opencv_location.png b/mediapipe/docs/images/mobile/android_studio_opencv_location.png new file mode 100644 index 000000000..dbb26af1a Binary files /dev/null and b/mediapipe/docs/images/mobile/android_studio_opencv_location.png differ diff --git a/mediapipe/docs/images/mobile/assets_location.png b/mediapipe/docs/images/mobile/assets_location.png new file mode 100644 index 000000000..573b77f04 Binary files /dev/null and b/mediapipe/docs/images/mobile/assets_location.png differ diff --git a/mediapipe/docs/images/mobile/multi_hand_detection_gpu_subgraph.png b/mediapipe/docs/images/mobile/multi_hand_detection_gpu_subgraph.png new file mode 100644 index 000000000..6105283b2 Binary files /dev/null and b/mediapipe/docs/images/mobile/multi_hand_detection_gpu_subgraph.png differ diff --git a/mediapipe/docs/images/mobile/multi_hand_landmark_subgraph.png b/mediapipe/docs/images/mobile/multi_hand_landmark_subgraph.png new file mode 100644 index 000000000..93f02bc42 Binary files /dev/null and b/mediapipe/docs/images/mobile/multi_hand_landmark_subgraph.png differ diff --git a/mediapipe/docs/images/mobile/multi_hand_renderer_gpu_subgraph.png b/mediapipe/docs/images/mobile/multi_hand_renderer_gpu_subgraph.png new file mode 100644 index 000000000..7da438e3f Binary files /dev/null and b/mediapipe/docs/images/mobile/multi_hand_renderer_gpu_subgraph.png differ diff --git a/mediapipe/docs/images/mobile/multi_hand_tracking_3d_android_gpu.gif b/mediapipe/docs/images/mobile/multi_hand_tracking_3d_android_gpu.gif new file mode 100644 index 000000000..fbdd8f573 Binary files /dev/null and b/mediapipe/docs/images/mobile/multi_hand_tracking_3d_android_gpu.gif differ diff --git a/mediapipe/docs/images/mobile/multi_hand_tracking_android_gpu.gif b/mediapipe/docs/images/mobile/multi_hand_tracking_android_gpu.gif new file mode 100644 index 000000000..2cc920c86 Binary files /dev/null and b/mediapipe/docs/images/mobile/multi_hand_tracking_android_gpu.gif differ diff --git a/mediapipe/docs/images/mobile/multi_hand_tracking_mobile.png b/mediapipe/docs/images/mobile/multi_hand_tracking_mobile.png new file mode 100644 index 000000000..b9eb410f3 Binary files /dev/null and b/mediapipe/docs/images/mobile/multi_hand_tracking_mobile.png differ diff --git a/mediapipe/docs/images/multi_hand_tracking_desktop.png b/mediapipe/docs/images/multi_hand_tracking_desktop.png new file mode 100644 index 000000000..5f84ab2f8 Binary files /dev/null and b/mediapipe/docs/images/multi_hand_tracking_desktop.png differ diff --git a/mediapipe/docs/images/web_effect.gif b/mediapipe/docs/images/web_effect.gif new file mode 100644 index 000000000..dac8e236b Binary files /dev/null and b/mediapipe/docs/images/web_effect.gif differ diff --git a/mediapipe/docs/images/web_segmentation.gif b/mediapipe/docs/images/web_segmentation.gif new file mode 100644 index 000000000..516a07d6c Binary files /dev/null and b/mediapipe/docs/images/web_segmentation.gif differ diff --git a/mediapipe/docs/install.md b/mediapipe/docs/install.md index 99473811e..02b6fc149 100644 --- a/mediapipe/docs/install.md +++ b/mediapipe/docs/install.md @@ -7,11 +7,8 @@ future. Note: If you plan to use TensorFlow calculators and example apps, there is a known issue with gcc and g++ version 6.3 and 7.3. Please use other versions. -Note: While Mediapipe configures TensorFlow, if you see the -following error: -`"...git_configure.bzl", line 14, in _fail fail(("%sGit Configuration -Error:%s %...)))`, -please install the python future library using: `$ pip install --user future`. +Note: To make Mediapipe work with TensorFlow, please install the python "future" +library and the python "six" library using `pip install --user future six`. Choose your operating system: @@ -24,7 +21,8 @@ Choose your operating system: To build and run Android apps: - [Setting up Android SDK and NDK](#setting-up-android-sdk-and-ndk) -- [Setting up Android Studio with MediaPipe](#setting-up-android-studio-with-mediapipe) +- [Using MediaPipe with Gradle](#using-mediapipe-with-gradle) +- [Using MediaPipe with Bazel](#using-mediapipe-with-bazel) To build and run iOS apps: @@ -51,8 +49,8 @@ To build and run iOS apps: # Run 'bazel version' to check version of bazel installed ``` - Option 2. Follow Bazel's - [documentation](https://docs.bazel.build/versions/master/install-ubuntu.html) + Option 2. Follow the official + [Bazel documentation](https://docs.bazel.build/versions/master/install-ubuntu.html) to install any version of Bazel manually. 3. Install OpenCV and FFmpeg. @@ -75,10 +73,10 @@ To build and run iOS apps: [documentation](https://docs.opencv.org/3.4.6/d7/d9f/tutorial_linux_install.html) to manually build OpenCV from source code. - Note: You may need to modify [`WORKSAPCE`] and [`opencv_linux.BUILD`] to + Note: You may need to modify [`WORKSPACE`] and [`opencv_linux.BUILD`] to point MediaPipe to your own OpenCV libraries, e.g., if OpenCV 4 is installed in "/usr/local/", you need to update the "linux_opencv" new_local_repository - rule in [`WORKSAPCE`] and "opencv" cc_library rule in [`opencv_linux.BUILD`] + rule in [`WORKSPACE`] and "opencv" cc_library rule in [`opencv_linux.BUILD`] like the following: ```bash @@ -161,8 +159,8 @@ To build and run iOS apps: 2. Install Bazel (0.24.1 and above required). - Follow Bazel's - [documentation](https://docs.bazel.build/versions/master/install-redhat.html) + Follow the official + [Bazel documentation](https://docs.bazel.build/versions/master/install-redhat.html) to install Bazel manually. 3. Install OpenCV. @@ -178,10 +176,10 @@ To build and run iOS apps: Option 2. Build OpenCV from source code. - Note: You may need to modify [`WORKSAPCE`] and [`opencv_linux.BUILD`] to + Note: You may need to modify [`WORKSPACE`] and [`opencv_linux.BUILD`] to point MediaPipe to your own OpenCV libraries, e.g., if OpenCV 4 is installed in "/usr/local/", you need to update the "linux_opencv" new_local_repository - rule in [`WORKSAPCE`] and "opencv" cc_library rule in [`opencv_linux.BUILD`] + rule in [`WORKSPACE`] and "opencv" cc_library rule in [`opencv_linux.BUILD`] like the following: ```bash @@ -237,7 +235,7 @@ To build and run iOS apps: * Install [Homebrew](https://brew.sh). * Install [Xcode](https://developer.apple.com/xcode/) and its Command Line - Tools. + Tools by `xcode-select install`. 2. Checkout MediaPipe repository. @@ -257,8 +255,8 @@ To build and run iOS apps: # Run 'bazel version' to check version of bazel installed ``` - Option 2. Follow Bazel's - [documentation](https://docs.bazel.build/versions/master/install-os-x.html#install-with-installer-mac-os-x) + Option 2. Follow the official + [Bazel documentation](https://docs.bazel.build/versions/master/install-os-x.html#install-with-installer-mac-os-x) to install any version of Bazel manually. 4. Install OpenCV and FFmpeg. @@ -281,7 +279,7 @@ To build and run iOS apps: $ port install opencv ``` - Note: when using MacPorts, please edit the [`WORKSAPCE`], + Note: when using MacPorts, please edit the [`WORKSPACE`], [`opencv_macos.BUILD`], and [`ffmpeg_macos.BUILD`] files like the following: ```bash @@ -419,10 +417,10 @@ To build and run iOS apps: [documentation](https://docs.opencv.org/3.4.6/d7/d9f/tutorial_linux_install.html) to manually build OpenCV from source code. - Note: You may need to modify [`WORKSAPCE`] and [`opencv_linux.BUILD`] to + Note: You may need to modify [`WORKSPACE`] and [`opencv_linux.BUILD`] to point MediaPipe to your own OpenCV libraries, e.g., if OpenCV 4 is installed in "/usr/local/", you need to update the "linux_opencv" new_local_repository - rule in [`WORKSAPCE`] and "opencv" cc_library rule in [`opencv_linux.BUILD`] + rule in [`WORKSPACE`] and "opencv" cc_library rule in [`opencv_linux.BUILD`] like the following: ```bash @@ -581,6 +579,11 @@ export ANDROID_HOME= export ANDROID_NDK_HOME= ``` +In order to use MediaPipe on earlier Android versions, MediaPipe needs to switch +to a lower Android API level. You can achieve this by specifying `api_level = +` in android_ndk_repository() and/or android_sdk_repository() +in the [`WORKSPACE`] file. + Please verify all the necessary packages are installed. * Android SDK Platform API Level 28 or 29 @@ -589,10 +592,20 @@ Please verify all the necessary packages are installed. * Android SDK Tools 26.1.1 * Android NDK 17c or above -### Setting up Android Studio with MediaPipe +### Using MediaPipe with Gradle -The steps below use Android Studio 3.5 to build and install a MediaPipe example -app. +MediaPipe can be used within an existing project, such as a Gradle project, +using the MediaPipe AAR target defined in mediapipe_aar.bzl. Please see the +separate [MediaPipe Android Archive Library](./android_archive_library.md) +documentation. + +### Using MediaPipe with Bazel + +The MediaPipe project can be imported to Android Studio using the Bazel plugins. +This allows the MediaPipe examples and demos to be built and modified in Android +Studio. To incorporate MediaPipe into an existing Android Studio project, see: +"Using MediaPipe with Gradle". The steps below use Android Studio 3.5 to build +and install a MediaPipe example app. 1. Install and launch Android Studio 3.5. @@ -682,7 +695,7 @@ app. * Press the `[+]` button to add the new configuration. * Select `Run` to run the example app on the connected Android device. -[`WORKSAPCE`]: https://github.com/google/mediapipe/tree/master/WORKSPACE +[`WORKSPACE`]: https://github.com/google/mediapipe/tree/master/WORKSPACE [`opencv_linux.BUILD`]: https://github.com/google/mediapipe/tree/master/third_party/opencv_linux.BUILD [`opencv_macos.BUILD`]: https://github.com/google/mediapipe/tree/master/third_party/opencv_macos.BUILD [`ffmpeg_macos.BUILD`]:https://github.com/google/mediapipe/tree/master/third_party/ffmpeg_macos.BUILD diff --git a/mediapipe/docs/multi_hand_tracking_desktop.md b/mediapipe/docs/multi_hand_tracking_desktop.md new file mode 100644 index 000000000..d80707f9b --- /dev/null +++ b/mediapipe/docs/multi_hand_tracking_desktop.md @@ -0,0 +1,177 @@ +## Multi-Hand Tracking on Desktop + +This is an example of using MediaPipe to run hand tracking models (TensorFlow +Lite) and render bounding boxes on the detected hand instances (for multiple +hands). To know more about the hand tracking models, please refer to the model +[`README file`]. Moreover, if you are interested in running the same TensorfFlow +Lite model on Android/iOS, please see the +[Mulit-Hand Tracking on GPU on Android/iOS](multi_hand_tracking_mobile_gpu.md) +and + +We show the hand tracking demos with TensorFlow Lite model using the Webcam: + +- [TensorFlow Lite Multi-Hand Tracking Demo with Webcam (CPU)](#tensorflow-lite-multi-hand-tracking-demo-with-webcam-cpu) + +- [TensorFlow Lite Multi-Hand Tracking Demo with Webcam (GPU)](#tensorflow-lite-multi-hand-tracking-demo-with-webcam-gpu) + +Note: Desktop GPU works only on Linux. Mesa drivers need to be installed. Please +see +[step 4 of "Installing on Debian and Ubuntu" in the installation guide](./install.md). + +Note: If MediaPipe depends on OpenCV 2, please see the +[known issues with OpenCV 2](#known-issues-with-opencv-2) section. + +### TensorFlow Lite Multi-Hand Tracking Demo with Webcam (CPU) + +To build and run the TensorFlow Lite example on desktop (CPU) with Webcam, run: + +```bash +# Video from webcam running on desktop CPU +$ bazel build -c opt --define MEDIAPIPE_DISABLE_GPU=1 \ + mediapipe/examples/desktop/multi_hand_tracking:multi_hand_tracking_cpu + +# It should print: +#Target //mediapipe/examples/desktop/multi_hand_tracking:multi_hand_tracking_cpu up-to-date: +# bazel-bin/mediapipe/examples/desktop/multi_hand_tracking/multi_hand_tracking_cpu + +# This will open up your webcam as long as it is connected and on +# Any errors is likely due to your webcam being not accessible +$ GLOG_logtostderr=1 bazel-bin/mediapipe/examples/desktop/multi_hand_tracking/multi_hand_tracking_cpu \ + --calculator_graph_config_file=mediapipe/graphs/hand_tracking/multi_hand_tracking_desktop_live.pbtxt +``` + +### TensorFlow Lite Multi-Hand Tracking Demo with Webcam (GPU) + +To build and run the TensorFlow Lite example on desktop (GPU) with Webcam, run: + +```bash +# Video from webcam running on desktop GPU +# This works only for linux currently +$ bazel build -c opt --copt -DMESA_EGL_NO_X11_HEADERS \ + mediapipe/examples/desktop/multi_hand_tracking:multi_hand_tracking_gpu + +# It should print: +# Target //mediapipe/examples/desktop/multi_hand_tracking:multi_hand_tracking_gpu up-to-date: +# bazel-bin/mediapipe/examples/desktop/multi_hand_tracking/multi_hand_tracking_gpu + +# This will open up your webcam as long as it is connected and on +# Any errors is likely due to your webcam being not accessible, +# or GPU drivers not setup properly. +$ GLOG_logtostderr=1 bazel-bin/mediapipe/examples/desktop/multi_hand_tracking/multi_hand_tracking_gpu \ + --calculator_graph_config_file=mediapipe/graphs/hand_tracking/multi_hand_tracking_mobile.pbtxt +``` + +#### Graph + +![graph visualization](images/multi_hand_tracking_desktop.png) + +To visualize the graph as shown above, copy the text specification of the graph +below and paste it into [MediaPipe Visualizer](https://viz.mediapipe.dev). + +```bash +# MediaPipe graph that performs multi-hand tracking on desktop with TensorFlow +# Lite on CPU. +# Used in the example in +# mediapipie/examples/desktop/multi_hand_tracking:multi_hand_tracking_cpu. + +# Images coming into and out of the graph. +input_stream: "input_video" +output_stream: "output_video" + +# Determines if an input vector of NormalizedRect has a size greater than or +# equal to the provided min_size. +node { + calculator: "NormalizedRectVectorHasMinSizeCalculator" + input_stream: "ITERABLE:prev_multi_hand_rects_from_landmarks" + output_stream: "prev_has_enough_hands" + node_options: { + [type.googleapis.com/mediapipe.CollectionHasMinSizeCalculatorOptions] { + # This value can be changed to support tracking arbitrary number of hands. + # Please also remember to modify max_vec_size in + # ClipVectorSizeCalculatorOptions in + # mediapipe/graphs/hand_tracking/subgraphs/multi_hand_detection_gpu.pbtxt + min_size: 2 + } + } +} + +# Drops the incoming image if the previous frame had at least N hands. +# Otherwise, passes the incoming image through to trigger a new round of hand +# detection in MultiHandDetectionSubgraph. +node { + calculator: "GateCalculator" + input_stream: "input_video" + input_stream: "DISALLOW:prev_has_enough_hands" + output_stream: "multi_hand_detection_input_video" + node_options: { + [type.googleapis.com/mediapipe.GateCalculatorOptions] { + empty_packets_as_allow: true + } + } +} + +# Subgraph that detections hands (see multi_hand_detection_cpu.pbtxt). +node { + calculator: "MultiHandDetectionSubgraph" + input_stream: "multi_hand_detection_input_video" + output_stream: "DETECTIONS:multi_palm_detections" + output_stream: "NORM_RECTS:multi_palm_rects" +} + +# Subgraph that localizes hand landmarks for multiple hands (see +# multi_hand_landmark.pbtxt). +node { + calculator: "MultiHandLandmarkSubgraph" + input_stream: "IMAGE:input_video" + input_stream: "NORM_RECTS:multi_hand_rects" + output_stream: "LANDMARKS:multi_hand_landmarks" + output_stream: "NORM_RECTS:multi_hand_rects_from_landmarks" +} + +# Caches a hand rectangle fed back from MultiHandLandmarkSubgraph, and upon the +# arrival of the next input image sends out the cached rectangle with the +# timestamp replaced by that of the input image, essentially generating a packet +# that carries the previous hand rectangle. Note that upon the arrival of the +# very first input image, an empty packet is sent out to jump start the +# feedback loop. +node { + calculator: "PreviousLoopbackCalculator" + input_stream: "MAIN:input_video" + input_stream: "LOOP:multi_hand_rects_from_landmarks" + input_stream_info: { + tag_index: "LOOP" + back_edge: true + } + output_stream: "PREV_LOOP:prev_multi_hand_rects_from_landmarks" +} + +# Performs association between NormalizedRect vector elements from previous +# frame and those from the current frame if MultiHandDetectionSubgraph runs. +# This calculator ensures that the output multi_hand_rects vector doesn't +# contain overlapping regions based on the specified min_similarity_threshold. +node { + calculator: "AssociationNormRectCalculator" + input_stream: "prev_multi_hand_rects_from_landmarks" + input_stream: "multi_palm_rects" + output_stream: "multi_hand_rects" + node_options: { + [type.googleapis.com/mediapipe.AssociationCalculatorOptions] { + min_similarity_threshold: 0.1 + } + } +} + +# Subgraph that renders annotations and overlays them on top of the input +# images (see multi_hand_renderer_cpu.pbtxt). +node { + calculator: "MultiHandRendererSubgraph" + input_stream: "IMAGE:input_video" + input_stream: "DETECTIONS:multi_palm_detections" + input_stream: "LANDMARKS:multi_hand_landmarks" + input_stream: "NORM_RECTS:0:multi_palm_rects" + input_stream: "NORM_RECTS:1:multi_hand_rects" + output_stream: "IMAGE:output_video" +} +``` + +[`README file`]:https://github.com/google/mediapipe/tree/master/mediapipe/README.md diff --git a/mediapipe/docs/multi_hand_tracking_mobile_gpu.md b/mediapipe/docs/multi_hand_tracking_mobile_gpu.md new file mode 100644 index 000000000..b57a6631d --- /dev/null +++ b/mediapipe/docs/multi_hand_tracking_mobile_gpu.md @@ -0,0 +1,755 @@ +# Multi-Hand Tracking (GPU) + +This doc focuses on the +[example graph](https://github.com/google/mediapipe/tree/master/mediapipe/graphs/hand_tracking/multi_hand_tracking_mobile.pbtxt) +that performs multi-hand tracking with TensorFlow Lite on GPU. It is related to +the [hand_tracking_example](./hand_tracking_mobile_gpu.md), and we recommend +users to review the (single) hand tracking example first. + +![multi_hand_tracking_android_gpu.gif](images/mobile/multi_hand_tracking_android_gpu.gif) + +In the visualization above, the red dots represent the hand landmarks and the +green lines are simply connections between selected landmark paris for +visualization of the hand skeleton. When there are fewer than `N` hands (`N=2` +in the graphs here), the purple box represents a hand rectangle that covers the +entire hand, derived from hand detection (see +[hand_detection_example](./hand_detection_mobile_gpu.md)). When there are `N` +hands (i.e. 2 hands for the graphs here), the red boxes represent hand +rectangles for each of the hands, derived from the previous round of hand +landmark localization using an ML model (see also +[model card](https://mediapipe.page.link/handmc)). Hand landmark localization +for each hand is performed only within the hand rectangle for computational +efficiency and accuracy. Hand detection is only invoked whenever there are fewer +than `N` hands in the previous iteration. + +This example can also run a model that localizes hand landmarks in 3D (i.e., +estimating an extra z coordinate): + +![multi_hand_tracking_3d_android_gpu.gif](images/mobile/multi_hand_tracking_3d_android_gpu.gif) + +In the visualization above, the localized hand landmarks are represented by dots +in different shades, with the brighter ones denoting landmarks closer to the +camera. + +## Android + +[Source](https://github.com/google/mediapipe/tree/master/mediapipe/examples/android/src/java/com/google/mediapipe/apps/multihandtrackinggpu) + +To build the app yourself, run: + +```bash +bazel build -c opt --config=android_arm64 mediapipe/examples/android/src/java/com/google/mediapipe/apps/multihandtrackinggpu +``` + +To build for the 3D mode, run: + +```bash +bazel build -c opt --config=android_arm64 --define 3D=true mediapipe/examples/android/src/java/com/google/mediapipe/apps/multihandtrackinggpu +``` + +Once the app is built, install it on Android device with: + +```bash +adb install bazel-bin/mediapipe/examples/android/src/java/com/google/mediapipe/apps/multihandtrackinggpu/multihandtrackinggpu.apk +``` + +## iOS + +[Source](https://github.com/google/mediapipe/tree/master/mediapipe/examples/ios/multihandtrackinggpu). + +See the general [instructions](./mediapipe_ios_setup.md) for building iOS +examples and generating an Xcode project. This will be the HandDetectionGpuApp +target. + +To build on the command line: + +```bash +bazel build -c opt --config=ios_arm64 mediapipe/examples/ios/multihandtrackinggpu:MultiHandTrackingGpuApp +``` + +To build for the 3D mode, run: + +```bash +bazel build -c opt --config=ios_arm64 --define 3D=true mediapipe/examples/ios/multihandtrackinggpu:MultiHandTrackingGpuApp +``` + +## Graph + +The multi-hand tracking [main graph](#main-graph) internal utilizes a +[multi_hand_detection_subgraph](#multi-hand-detection-subgraph), a +[multi_hand_landmark_subgraph](#multi-hand-landmark-subgraph), and a +[multi_hand_renderer_subgraph](#multi-hand-renderer-subgraph). + +The subgraphs show up in the main graph visualization as nodes colored in +purple, and the subgraph itself can also be visualized just like a regular +graph. For more information on how to visualize a graph that includes subgraphs, +see the Visualizing Subgraphs section in the +[visualizer documentation](./visualizer.md). + +### Main Graph + +![multi_hand_tracking_mobile_graph](images/mobile/multi_hand_tracking_mobile.png) + +There are two key differences between this graph and the +[single_hand_tracking_mobile_graph](./hand_tracking_mobile_gpu.md). + +1. There is a `NormalizedRectVectorHasMinSize` calculator, that checks if in + input vector of `NormalizedRect` objects has a minimum size equal to `N`. In + this graph, if the vector contains fewer than `N` objects, + `MultiHandDetection` subgraph runs. Otherwise, the `GateCalculator` doesn't + send any image packets to the `MultiHandDetection` subgraph. This way, the + main graph is efficient in that it avoids running the costly hand detection + step when there are already `N` hands in the frame. +2. The `MergeCalculator` has been replaced by the `AssociationNormRect` + calculator. This `AssociationNormRect` takes as input a vector of + `NormalizedRect` objects from the `MultiHandDetection` subgraph on the + current frame, and a vector of `NormalizedRect` objects from the + `MultiHandLandmark` subgraph from the previous frame, and performs an + association operation between these objects. This calculator ensures that + the output vector doesn't contain overlapping regions based on the specified + `min_similarity_threshold`. + +[Source pbtxt file](https://github.com/google/mediapipe/tree/master/mediapipe/graphs/hand_tracking/multi_hand_tracking_mobile.pbtxt) + +```bash +# MediaPipe graph that performs multi-hand tracking with TensorFlow Lite on GPU. +# Used in the examples in +# mediapipie/examples/android/src/java/com/mediapipe/apps/multihandtrackinggpu. + +# Images coming into and out of the graph. +input_stream: "input_video" +output_stream: "output_video" + +# Throttles the images flowing downstream for flow control. It passes through +# the very first incoming image unaltered, and waits for downstream nodes +# (calculators and subgraphs) in the graph to finish their tasks before it +# passes through another image. All images that come in while waiting are +# dropped, limiting the number of in-flight images in most part of the graph to +# 1. This prevents the downstream nodes from queuing up incoming images and data +# excessively, which leads to increased latency and memory usage, unwanted in +# real-time mobile applications. It also eliminates unnecessarily computation, +# e.g., the output produced by a node may get dropped downstream if the +# subsequent nodes are still busy processing previous inputs. +node { + calculator: "FlowLimiterCalculator" + input_stream: "input_video" + input_stream: "FINISHED:multi_hand_rects" + input_stream_info: { + tag_index: "FINISHED" + back_edge: true + } + output_stream: "throttled_input_video" +} + +# Determines if an input vector of NormalizedRect has a size greater than or +# equal to the provided min_size. +node { + calculator: "NormalizedRectVectorHasMinSizeCalculator" + input_stream: "ITERABLE:prev_multi_hand_rects_from_landmarks" + output_stream: "prev_has_enough_hands" + node_options: { + [type.googleapis.com/mediapipe.CollectionHasMinSizeCalculatorOptions] { + # This value can be changed to support tracking arbitrary number of hands. + # Please also remember to modify max_vec_size in + # ClipVectorSizeCalculatorOptions in + # mediapipe/graphs/hand_tracking/subgraphs/multi_hand_detection_gpu.pbtxt + min_size: 2 + } + } +} + +# Drops the incoming image if the previous frame had at least N hands. +# Otherwise, passes the incoming image through to trigger a new round of hand +# detection in MultiHandDetectionSubgraph. +node { + calculator: "GateCalculator" + input_stream: "throttled_input_video" + input_stream: "DISALLOW:prev_has_enough_hands" + output_stream: "multi_hand_detection_input_video" + node_options: { + [type.googleapis.com/mediapipe.GateCalculatorOptions] { + empty_packets_as_allow: true + } + } +} + +# Subgraph that detections hands (see multi_hand_detection_gpu.pbtxt). +node { + calculator: "MultiHandDetectionSubgraph" + input_stream: "multi_hand_detection_input_video" + output_stream: "DETECTIONS:multi_palm_detections" + output_stream: "NORM_RECTS:multi_palm_rects" +} + +# Subgraph that localizes hand landmarks for multiple hands (see +# multi_hand_landmark.pbtxt). +node { + calculator: "MultiHandLandmarkSubgraph" + input_stream: "IMAGE:throttled_input_video" + input_stream: "NORM_RECTS:multi_hand_rects" + output_stream: "LANDMARKS:multi_hand_landmarks" + output_stream: "NORM_RECTS:multi_hand_rects_from_landmarks" +} + +# Caches a hand rectangle fed back from MultiHandLandmarkSubgraph, and upon the +# arrival of the next input image sends out the cached rectangle with the +# timestamp replaced by that of the input image, essentially generating a packet +# that carries the previous hand rectangle. Note that upon the arrival of the +# very first input image, an empty packet is sent out to jump start the +# feedback loop. +node { + calculator: "PreviousLoopbackCalculator" + input_stream: "MAIN:throttled_input_video" + input_stream: "LOOP:multi_hand_rects_from_landmarks" + input_stream_info: { + tag_index: "LOOP" + back_edge: true + } + output_stream: "PREV_LOOP:prev_multi_hand_rects_from_landmarks" +} + +# Performs association between NormalizedRect vector elements from previous +# frame and those from the current frame if MultiHandDetectionSubgraph runs. +# This calculator ensures that the output multi_hand_rects vector doesn't +# contain overlapping regions based on the specified min_similarity_threshold. +node { + calculator: "AssociationNormRectCalculator" + input_stream: "prev_multi_hand_rects_from_landmarks" + input_stream: "multi_palm_rects" + output_stream: "multi_hand_rects" + node_options: { + [type.googleapis.com/mediapipe.AssociationCalculatorOptions] { + min_similarity_threshold: 0.1 + } + } +} + +# Subgraph that renders annotations and overlays them on top of the input +# images (see multi_hand_renderer_gpu.pbtxt). +node { + calculator: "MultiHandRendererSubgraph" + input_stream: "IMAGE:throttled_input_video" + input_stream: "DETECTIONS:multi_palm_detections" + input_stream: "LANDMARKS:multi_hand_landmarks" + input_stream: "NORM_RECTS:0:multi_palm_rects" + input_stream: "NORM_RECTS:1:multi_hand_rects" + output_stream: "IMAGE:output_video" +} +``` + +### Multi-Hand Detection Subgraph + +![multi_hand_detection_gpu_subgraph](images/mobile/multi_hand_detection_gpu_subgraph.png) + +This graph outputs a vector of `NormalizedRect` objects corresponding to each of +the hand instances visible in the frame. Note that at the end of this graph, +there is a `ClipNormalizedRectVectorSizeCalculator`. This calculator clips the +size of the input vector to a maximum size `N`. This implies that the +`MultiHandDetection` subgraph outputs a vector of maximum `N` hand instance +locations. + +[Source pbtxt file](https://github.com/google/mediapipe/tree/master/mediapipe/graphs/hand_tracking/subgraphs/multi_hand_detection_gpu.pbtxt) + +```bash +# MediaPipe multi-hand detection subgraph. + +type: "MultiHandDetectionSubgraph" + +input_stream: "input_video" +output_stream: "DETECTIONS:palm_detections" +output_stream: "NORM_RECTS:clipped_hand_rects_from_palm_detections" + +# Transforms the input image on GPU to a 256x256 image. To scale the input +# image, the scale_mode option is set to FIT to preserve the aspect ratio, +# resulting in potential letterboxing in the transformed image. +node: { + calculator: "ImageTransformationCalculator" + input_stream: "IMAGE_GPU:input_video" + output_stream: "IMAGE_GPU:transformed_input_video" + output_stream: "LETTERBOX_PADDING:letterbox_padding" + node_options: { + [type.googleapis.com/mediapipe.ImageTransformationCalculatorOptions] { + output_width: 256 + output_height: 256 + scale_mode: FIT + } + } +} + +# Generates a single side packet containing a TensorFlow Lite op resolver that +# supports custom ops needed by the model used in this graph. +node { + calculator: "TfLiteCustomOpResolverCalculator" + output_side_packet: "opresolver" + node_options: { + [type.googleapis.com/mediapipe.TfLiteCustomOpResolverCalculatorOptions] { + use_gpu: true + } + } +} + +# Converts the transformed input image on GPU into an image tensor stored as a +# TfLiteTensor. +node { + calculator: "TfLiteConverterCalculator" + input_stream: "IMAGE_GPU:transformed_input_video" + output_stream: "TENSORS_GPU:image_tensor" +} + +# Runs a TensorFlow Lite model on GPU that takes an image tensor and outputs a +# vector of tensors representing, for instance, detection boxes/keypoints and +# scores. +node { + calculator: "TfLiteInferenceCalculator" + input_stream: "TENSORS_GPU:image_tensor" + output_stream: "TENSORS_GPU:detection_tensors" + input_side_packet: "CUSTOM_OP_RESOLVER:opresolver" + node_options: { + [type.googleapis.com/mediapipe.TfLiteInferenceCalculatorOptions] { + model_path: "mediapipe/models/palm_detection.tflite" + use_gpu: true + } + } +} + +# Generates a single side packet containing a vector of SSD anchors based on +# the specification in the options. +node { + calculator: "SsdAnchorsCalculator" + output_side_packet: "anchors" + node_options: { + [type.googleapis.com/mediapipe.SsdAnchorsCalculatorOptions] { + num_layers: 5 + min_scale: 0.1171875 + max_scale: 0.75 + input_size_height: 256 + input_size_width: 256 + anchor_offset_x: 0.5 + anchor_offset_y: 0.5 + strides: 8 + strides: 16 + strides: 32 + strides: 32 + strides: 32 + aspect_ratios: 1.0 + fixed_anchor_size: true + } + } +} + +# Decodes the detection tensors generated by the TensorFlow Lite model, based on +# the SSD anchors and the specification in the options, into a vector of +# detections. Each detection describes a detected object. +node { + calculator: "TfLiteTensorsToDetectionsCalculator" + input_stream: "TENSORS_GPU:detection_tensors" + input_side_packet: "ANCHORS:anchors" + output_stream: "DETECTIONS:detections" + node_options: { + [type.googleapis.com/mediapipe.TfLiteTensorsToDetectionsCalculatorOptions] { + num_classes: 1 + num_boxes: 2944 + num_coords: 18 + box_coord_offset: 0 + keypoint_coord_offset: 4 + num_keypoints: 7 + num_values_per_keypoint: 2 + sigmoid_score: true + score_clipping_thresh: 100.0 + reverse_output_order: true + + x_scale: 256.0 + y_scale: 256.0 + h_scale: 256.0 + w_scale: 256.0 + min_score_thresh: 0.7 + } + } +} + +# Performs non-max suppression to remove excessive detections. +node { + calculator: "NonMaxSuppressionCalculator" + input_stream: "detections" + output_stream: "filtered_detections" + node_options: { + [type.googleapis.com/mediapipe.NonMaxSuppressionCalculatorOptions] { + min_suppression_threshold: 0.3 + overlap_type: INTERSECTION_OVER_UNION + algorithm: WEIGHTED + return_empty_detections: true + } + } +} + +# Maps detection label IDs to the corresponding label text ("Palm"). The label +# map is provided in the label_map_path option. +node { + calculator: "DetectionLabelIdToTextCalculator" + input_stream: "filtered_detections" + output_stream: "labeled_detections" + node_options: { + [type.googleapis.com/mediapipe.DetectionLabelIdToTextCalculatorOptions] { + label_map_path: "mediapipe/models/palm_detection_labelmap.txt" + } + } +} + +# Adjusts detection locations (already normalized to [0.f, 1.f]) on the +# letterboxed image (after image transformation with the FIT scale mode) to the +# corresponding locations on the same image with the letterbox removed (the +# input image to the graph before image transformation). +node { + calculator: "DetectionLetterboxRemovalCalculator" + input_stream: "DETECTIONS:labeled_detections" + input_stream: "LETTERBOX_PADDING:letterbox_padding" + output_stream: "DETECTIONS:palm_detections" +} + +# Extracts image size from the input images. +node { + calculator: "ImagePropertiesCalculator" + input_stream: "IMAGE_GPU:input_video" + output_stream: "SIZE:image_size" +} + +# Converts each palm detection into a rectangle (normalized by image size) +# that encloses the palm and is rotated such that the line connecting center of +# the wrist and MCP of the middle finger is aligned with the Y-axis of the +# rectangle. +node { + calculator: "DetectionsToRectsCalculator" + input_stream: "DETECTIONS:palm_detections" + input_stream: "IMAGE_SIZE:image_size" + output_stream: "NORM_RECTS:palm_rects" + node_options: { + [type.googleapis.com/mediapipe.DetectionsToRectsCalculatorOptions] { + rotation_vector_start_keypoint_index: 0 # Center of wrist. + rotation_vector_end_keypoint_index: 2 # MCP of middle finger. + rotation_vector_target_angle_degrees: 90 + output_zero_rect_for_empty_detections: true + } + } +} + +# Expands and shifts the rectangle that contains the palm so that it's likely +# to cover the entire hand. +node { + calculator: "RectTransformationCalculator" + input_stream: "NORM_RECTS:palm_rects" + input_stream: "IMAGE_SIZE:image_size" + output_stream: "hand_rects_from_palm_detections" + node_options: { + [type.googleapis.com/mediapipe.RectTransformationCalculatorOptions] { + scale_x: 2.6 + scale_y: 2.6 + shift_y: -0.5 + square_long: true + } + } +} + +# Clips the size of the input vector to the provided max_vec_size. This +# determines the maximum number of hand instances this graph outputs. +# Note that the performance gain of clipping detections earlier in this graph is +# minimal because NMS will minimize overlapping detections and the number of +# detections isn't expected to exceed 5-10. +node { + calculator: "ClipNormalizedRectVectorSizeCalculator" + input_stream: "hand_rects_from_palm_detections" + output_stream: "clipped_hand_rects_from_palm_detections" + node_options: { + [type.googleapis.com/mediapipe.ClipVectorSizeCalculatorOptions] { + # This value can be changed to support tracking arbitrary number of hands. + # Please also remember to modify min_size in + # CollectionHsMinSizeCalculatorOptions in + # mediapipe/graphs/hand_tracking/multi_hand_tracking_mobile.pbtxt and + # mediapipe/graphs/hand_tracking/multi_hand_tracking_desktop_live.pbtxt. + max_vec_size: 2 + } + } +} +``` + +### Multi-Hand Landmark Subgraph + +![multi_hand_landmark_subgraph.pbtxt](images/mobile/multi_hand_landmark_subgraph.png) + +This graph accepts as input a vector of `NormalizedRect` objects, corresponding +the the region of each hand instance in the input image. For each +`NormalizedRect` object, the graph runs the existing `HandLandmark` subgraph and +collect the outputs of this subgraph into vectors. This is enabled by +`BeginLoop` and `EndLoop` calculators. + +The `BeginLoop` calculator accepts as input a packet containing an iterable +collection of elements. This calculator is templatized (see +[begin_loop_calculator.h](https://github.com/google/mediapipe/tree/master/mediapipe/calculators/core/begin_loop_calculator.h)). +If the input packet arrived at a timestamp `ts`, this calculator outputs each +element in the collection at a fake timestamp `internal_ts`. At the end of the +collection, the calculator outputs the arrival timestamp `ts` in the output +stream tagged with `BATCH_END`. + +The nodes between the `BeginLoop` calculator and the corresponding `EndLoop` +calculator process individual packets at the fake timestamps `internal_ts`. +After each element is processed, it is sent to the `EndLoop` calculator (see +[end_loop_calculator.h](https://github.com/google/mediapipe/tree/master/mediapipe/calculators/core/end_loop_calculator.h)), +which collects these elements in an output collection. The `EndLoop` calculator +listens for packets from the `BATCH_END` output stream of the `BeginLoop` +calculator. When the `BATCH_END` packet containing the real timestamp `ts` +arrives at the `EndLoop` calculator, the `EndLoop` calculator outputs a packet +containing the collection of processed elements at the real timestamp `ts`. + +In the multi-hand landmark subgraph, the `EndLoop` calculators collect the +output vector of hand landmarks per hand instance, the boolean values indicating +the presence of each hand and the `NormalizedRect` objects corresponding to the +regions surrounding each hand into vectors. + +Finally, based on the hand presence boolean value, the graph filters the +collections of hand landmarks and `NormalizdRect` objects corresponding to each +hand instance. + +[Source pbtxt file](https://github.com/google/mediapipe/tree/master/mediapipe/graphs/hand_tracking/subgraphs/multi_hand_landmark.pbtxt) + +```bash +# MediaPipe hand landmark localization subgraph. + +type: "MultiHandLandmarkSubgraph" + +input_stream: "IMAGE:input_video" +# A vector of NormalizedRect, one per each hand detected. +input_stream: "NORM_RECTS:multi_hand_rects" +# A vector of NormalizedLandmarks, one set per each hand. +output_stream: "LANDMARKS:filtered_multi_hand_landmarks" +# A vector of NormalizedRect, one per each hand. +output_stream: "NORM_RECTS:filtered_multi_hand_rects_for_next_frame" + +# Outputs each element of multi_hand_rects at a fake timestamp for the rest +# of the graph to process. Clones the input_video packet for each +# single_hand_rect at the fake timestamp. At the end of the loop, +# outputs the BATCH_END timestamp for downstream calculators to inform them +# that all elements in the vector have been processed. +node { + calculator: "BeginLoopNormalizedRectCalculator" + input_stream: "ITERABLE:multi_hand_rects" + input_stream: "CLONE:input_video" + output_stream: "ITEM:single_hand_rect" + output_stream: "CLONE:input_video_cloned" + output_stream: "BATCH_END:single_hand_rect_timestamp" +} + +node { + calculator: "HandLandmarkSubgraph" + input_stream: "IMAGE:input_video_cloned" + input_stream: "NORM_RECT:single_hand_rect" + output_stream: "LANDMARKS:single_hand_landmarks" + output_stream: "NORM_RECT:single_hand_rect_from_landmarks" + output_stream: "PRESENCE:single_hand_presence" +} + +# Collects the boolean presence value for each single hand into a vector. Upon +# receiving the BATCH_END timestamp, outputs a vector of boolean values at the +# BATCH_END timestamp. +node { + calculator: "EndLoopBooleanCalculator" + input_stream: "ITEM:single_hand_presence" + input_stream: "BATCH_END:single_hand_rect_timestamp" + output_stream: "ITERABLE:multi_hand_presence" +} + +# Collects a set of landmarks for each hand into a vector. Upon receiving the +# BATCH_END timestamp, outputs the vector of landmarks at the BATCH_END +# timestamp. +node { + calculator: "EndLoopNormalizedLandmarksVectorCalculator" + input_stream: "ITEM:single_hand_landmarks" + input_stream: "BATCH_END:single_hand_rect_timestamp" + output_stream: "ITERABLE:multi_hand_landmarks" +} + +# Collects a NormalizedRect for each hand into a vector. Upon receiving the +# BATCH_END timestamp, outputs the vector of NormalizedRect at the BATCH_END +# timestamp. +node { + calculator: "EndLoopNormalizedRectCalculator" + input_stream: "ITEM:single_hand_rect_from_landmarks" + input_stream: "BATCH_END:single_hand_rect_timestamp" + output_stream: "ITERABLE:multi_hand_rects_for_next_frame" +} + +# Filters the input vector of landmarks based on hand presence value for each +# hand. If the hand presence for hand #i is false, the set of landmarks +# corresponding to that hand are dropped from the vector. +node { + calculator: "FilterLandmarksCollectionCalculator" + input_stream: "ITERABLE:multi_hand_landmarks" + input_stream: "CONDITION:multi_hand_presence" + output_stream: "ITERABLE:filtered_multi_hand_landmarks" +} + +# Filters the input vector of NormalizedRect based on hand presence value for +# each hand. If the hand presence for hand #i is false, the NormalizedRect +# corresponding to that hand are dropped from the vector. +node { + calculator: "FilterNormalizedRectCollectionCalculator" + input_stream: "ITERABLE:multi_hand_rects_for_next_frame" + input_stream: "CONDITION:multi_hand_presence" + output_stream: "ITERABLE:filtered_multi_hand_rects_for_next_frame" +} +``` + +### Multi-Hand Renderer Subgraph + +![multi_hand_renderer_gpu_subgraph.pbtxt](images/mobile/multi_hand_renderer_gpu_subgraph.png) + +This graph also uses `BeginLoop` and `EndLoop` calculators to iteratively +convert a set of hand landmarks per hand instance into corresponding +`RenderData` objects. + +[Source pbtxt file](https://github.com/google/mediapipe/tree/master/mediapipe/graphs/hand_tracking/subgraphs/multi_hand_renderer_gpu.pbtxt) + +```bash +# MediaPipe multi-hand tracking rendering subgraph. + +type: "MultiHandRendererSubgraph" + +input_stream: "IMAGE:input_image" +# A vector of NormalizedLandmarks, one for each hand. +input_stream: "LANDMARKS:multi_hand_landmarks" +# A vector of NormalizedRect, one for each hand. +input_stream: "NORM_RECTS:0:multi_palm_rects" +# A vector of NormalizedRect, one for each hand. +input_stream: "NORM_RECTS:1:multi_hand_rects" +# A vector of Detection, one for each hand. +input_stream: "DETECTIONS:palm_detections" +output_stream: "IMAGE:output_image" + +# Converts detections to drawing primitives for annotation overlay. +node { + calculator: "DetectionsToRenderDataCalculator" + input_stream: "DETECTIONS:palm_detections" + output_stream: "RENDER_DATA:detection_render_data" + node_options: { + [type.googleapis.com/mediapipe.DetectionsToRenderDataCalculatorOptions] { + thickness: 4.0 + color { r: 0 g: 255 b: 0 } + } + } +} + +# Converts normalized rects to drawing primitives for annotation overlay. +node { + calculator: "RectToRenderDataCalculator" + input_stream: "NORM_RECTS:multi_hand_rects" + output_stream: "RENDER_DATA:multi_hand_rects_render_data" + node_options: { + [type.googleapis.com/mediapipe.RectToRenderDataCalculatorOptions] { + filled: false + color { r: 255 g: 0 b: 0 } + thickness: 4.0 + } + } +} + +# Converts normalized rects to drawing primitives for annotation overlay. +node { + calculator: "RectToRenderDataCalculator" + input_stream: "NORM_RECTS:multi_palm_rects" + output_stream: "RENDER_DATA:multi_palm_rects_render_data" + node_options: { + [type.googleapis.com/mediapipe.RectToRenderDataCalculatorOptions] { + filled: false + color { r: 125 g: 0 b: 122 } + thickness: 4.0 + } + } +} + +# Outputs each element of multi_palm_landmarks at a fake timestamp for the rest +# of the graph to process. At the end of the loop, outputs the BATCH_END +# timestamp for downstream calculators to inform them that all elements in the +# vector have been processed. +node { + calculator: "BeginLoopNormalizedLandmarksVectorCalculator" + input_stream: "ITERABLE:multi_hand_landmarks" + output_stream: "ITEM:single_hand_landmarks" + output_stream: "BATCH_END:landmark_timestamp" +} + +# Converts landmarks to drawing primitives for annotation overlay. +node { + calculator: "LandmarksToRenderDataCalculator" + input_stream: "NORM_LANDMARKS:single_hand_landmarks" + output_stream: "RENDER_DATA:single_hand_landmark_render_data" + node_options: { + [type.googleapis.com/mediapipe.LandmarksToRenderDataCalculatorOptions] { + landmark_connections: 0 + landmark_connections: 1 + landmark_connections: 1 + landmark_connections: 2 + landmark_connections: 2 + landmark_connections: 3 + landmark_connections: 3 + landmark_connections: 4 + landmark_connections: 0 + landmark_connections: 5 + landmark_connections: 5 + landmark_connections: 6 + landmark_connections: 6 + landmark_connections: 7 + landmark_connections: 7 + landmark_connections: 8 + landmark_connections: 5 + landmark_connections: 9 + landmark_connections: 9 + landmark_connections: 10 + landmark_connections: 10 + landmark_connections: 11 + landmark_connections: 11 + landmark_connections: 12 + landmark_connections: 9 + landmark_connections: 13 + landmark_connections: 13 + landmark_connections: 14 + landmark_connections: 14 + landmark_connections: 15 + landmark_connections: 15 + landmark_connections: 16 + landmark_connections: 13 + landmark_connections: 17 + landmark_connections: 0 + landmark_connections: 17 + landmark_connections: 17 + landmark_connections: 18 + landmark_connections: 18 + landmark_connections: 19 + landmark_connections: 19 + landmark_connections: 20 + landmark_color { r: 255 g: 0 b: 0 } + connection_color { r: 0 g: 255 b: 0 } + thickness: 4.0 + } + } +} + +# Collects a RenderData object for each hand into a vector. Upon receiving the +# BATCH_END timestamp, outputs the vector of RenderData at the BATCH_END +# timestamp. +node { + calculator: "EndLoopRenderDataCalculator" + input_stream: "ITEM:single_hand_landmark_render_data" + input_stream: "BATCH_END:landmark_timestamp" + output_stream: "ITERABLE:multi_hand_landmarks_render_data" +} + +# Draws annotations and overlays them on top of the input images. Consumes +# a vector of RenderData objects and draws each of them on the input frame. +node { + calculator: "AnnotationOverlayCalculator" + input_stream: "INPUT_FRAME_GPU:input_image" + input_stream: "detection_render_data" + input_stream: "multi_hand_rects_render_data" + input_stream: "multi_palm_rects_render_data" + input_stream: "VECTOR:0:multi_hand_landmarks_render_data" + output_stream: "OUTPUT_FRAME_GPU:output_image" +} +``` diff --git a/mediapipe/docs/object_detection_desktop.md b/mediapipe/docs/object_detection_desktop.md index 63de4f1ef..cb0b90117 100644 --- a/mediapipe/docs/object_detection_desktop.md +++ b/mediapipe/docs/object_detection_desktop.md @@ -35,10 +35,9 @@ $ bazel build -c opt \ # INFO: 2675 processes: 2673 linux-sandbox, 2 local. # INFO: Build completed successfully, 2807 total actions -$ export GLOG_logtostderr=1 # Replace and . # You can find a test video in mediapipe/examples/desktop/object_detection. -$ bazel-bin/mediapipe/examples/desktop/object_detection/object_detection_tensorflow \ +$ GLOG_logtostderr=1 bazel-bin/mediapipe/examples/desktop/object_detection/object_detection_tensorflow \ --calculator_graph_config_file=mediapipe/graphs/object_detection/object_detection_desktop_tensorflow_graph.pbtxt \ --input_side_packets=input_video_path=,output_video_path= ``` @@ -55,7 +54,7 @@ below and paste it into # MediaPipe graph that performs object detection on desktop with TensorFlow # on CPU. # Used in the example in -# mediapipie/examples/desktop/object_detection:object_detection_tensorflow. +# mediapipe/examples/desktop/object_detection:object_detection_tensorflow. # Decodes an input video file into images and a video header. node { @@ -200,10 +199,9 @@ $ bazel build -c opt --define MEDIAPIPE_DISABLE_GPU=1 \ # INFO: 711 processes: 710 linux-sandbox, 1 local. # INFO: Build completed successfully, 734 total actions -$ export GLOG_logtostderr=1 # Replace and . # You can find a test video in mediapipe/examples/desktop/object_detection. -$ bazel-bin/mediapipe/examples/desktop/object_detection/object_detection_tflite \ +$ GLOG_logtostderr=1 bazel-bin/mediapipe/examples/desktop/object_detection/object_detection_tflite \ --calculator_graph_config_file=mediapipe/graphs/object_detection/object_detection_desktop_tflite_graph.pbtxt \ --input_side_packets=input_video_path=,output_video_path= ``` @@ -220,14 +218,11 @@ $ bazel build -c opt --define MEDIAPIPE_DISABLE_GPU=1 \ # It should print: #Target //mediapipe/examples/desktop/object_detection:object_detection_cpu up-to-date: # bazel-bin/mediapipe/examples/desktop/object_detection/object_detection_cpu -#INFO: Elapsed time: 16.020s, Forge stats: 13001/13003 actions cached, 2.1s CPU used, 0.0s queue time, 89.0 MB ObjFS output (novel bytes: 88.0 MB), 0.0 MB local output, Critical Path: 10.01s, Remote (41.42% of the time): [queue: 0.00%, setup: 4.21%, process: 12.48%] -#INFO: Streaming build results to: http://sponge2/1824d4cc-ba63-4350-bdc0-aacbd45b902b #INFO: Build completed successfully, 12154 total actions -$ export GLOG_logtostderr=1 # This will open up your webcam as long as it is connected and on # Any errors is likely due to your webcam being not accessible -$ bazel-bin/mediapipe/examples/desktop/object_detection/object_detection_cpu \ +$ GLOG_logtostderr=1 bazel-bin/mediapipe/examples/desktop/object_detection/object_detection_cpu \ --calculator_graph_config_file=mediapipe/graphs/object_detection/object_detection_desktop_live.pbtxt ``` @@ -243,7 +238,7 @@ below and paste it into # MediaPipe graph that performs object detection on desktop with TensorFlow Lite # on CPU. # Used in the example in -# mediapipie/examples/desktop/object_detection:object_detection_tflite. +# mediapipe/examples/desktop/object_detection:object_detection_tflite. # max_queue_size limits the number of packets enqueued on any input stream # by throttling inputs to the graph. This makes the graph only process one diff --git a/mediapipe/docs/web.md b/mediapipe/docs/web.md new file mode 100644 index 000000000..0678269c5 --- /dev/null +++ b/mediapipe/docs/web.md @@ -0,0 +1,26 @@ +## MediaPipe on the Web + +MediaPipe on the Web is an effort to use [WebAssembly](https://webassembly.org/) +to bring MediaPipe graphs, calculators, and related technologies to the web. The +aim is to have all the pieces (ML, rendering, and processing) running directly +in the browser client-side. The official API is under construction, but the core +technology has been proven effective, and we can already show interactive +cross-platform demos using your live webcam. + +![image](images/web_effect.gif) ![image](images/web_segmentation.gif) + +### Hand Tracking (with and without SIMD support) + +For [Chrome Developer Summit 2019](https://developer.chrome.com/devsummit/), we +used this technology to showcase the potential for performance improvements +using Chrome experimental [WebAssembly SIMD](https://github.com/WebAssembly/simd) +support. Below are two different versions of the +[MediaPipe Hand Tracking Example](https://mediapipe.readthedocs.io/en/latest/hand_tracking_desktop.html) + running on the web: + + 1. WebAssembly MVP [demo](https://mediapipe.page.link/cds-ht) running around 5-8 frames per second on Desktop Chrome + + 2. WebAssembly SIMD [demo](https://mediapipe.page.link/cds-ht-simd) running around 15-18 frames per second on *Canary* Chrome for Desktop, which must additionally be launched with the option `--js-flags="--experimental-wasm-simd"` + + +NOTE: This page is a work-in-progress. More to come soon! diff --git a/mediapipe/docs/youtube_8m.md b/mediapipe/docs/youtube_8m.md index dc6b26012..045c05845 100644 --- a/mediapipe/docs/youtube_8m.md +++ b/mediapipe/docs/youtube_8m.md @@ -1,9 +1,11 @@ -## Extracting Video Features for YouTube-8M Challenge +# Feature Extration and Model Inference for YouTube-8M Challenge MediaPipe is a useful and general framework for media processing that can assist with research, development, and deployment of ML models. This example focuses on -model development by demonstrating how to prepare training data for the -YouTube-8M Challenge. +model development by demonstrating how to prepare training data and do model +inference for the YouTube-8M Challenge. + +## Extracting Video Features for YouTube-8M Challenge [Youtube-8M Challenge](https://www.kaggle.com/c/youtube8m-2019) is an annual video classification challenge hosted by Google. Over the last two years, the @@ -29,14 +31,14 @@ videos. ### Steps to run the YouTube-8M feature extraction graph -1. Checkout the mediapipe repository +1. Checkout the mediapipe repository. ```bash git clone https://github.com/google/mediapipe.git cd mediapipe ``` -2. Download the PCA and model data +2. Download the PCA and model data. ```bash mkdir /tmp/mediapipe @@ -49,7 +51,7 @@ videos. tar -xvf /tmp/mediapipe/inception-2015-12-05.tgz ``` -3. Get the VGGish frozen graph +3. Get the VGGish frozen graph. Note: To run step 3 and step 4, you must have Python 2.7 or 3.5+ installed with the TensorFlow 1.14+ package installed. @@ -60,24 +62,112 @@ videos. python -m mediapipe.examples.desktop.youtube8m.generate_vggish_frozen_graph ``` -4. Generate a MediaSequence metadata from the input video +4. Generate a MediaSequence metadata from the input video. - Note: the output file is /tmp/mediapipe/metadata.tfrecord + Note: the output file is /tmp/mediapipe/metadata.pb ```bash + # change clip_end_time_sec to match the length of your video. python -m mediapipe.examples.desktop.youtube8m.generate_input_sequence_example \ - --path_to_input_video=/absolute/path/to/the/local/video/file + --path_to_input_video=/absolute/path/to/the/local/video/file \ + --clip_end_time_sec=120 ``` -5. Run the MediaPipe binary to extract the features +5. Run the MediaPipe binary to extract the features. ```bash bazel build -c opt \ --define MEDIAPIPE_DISABLE_GPU=1 --define no_aws_support=true \ mediapipe/examples/desktop/youtube8m:extract_yt8m_features - ./bazel-bin/mediapipe/examples/desktop/youtube8m/extract_yt8m_features + GLOG_logtostderr=1 bazel-bin/mediapipe/examples/desktop/youtube8m/extract_yt8m_features \ --calculator_graph_config_file=mediapipe/graphs/youtube8m/feature_extraction.pbtxt \ - --input_side_packets=input_sequence_example=/tmp/mediapipe/metadata.tfrecord \ - --output_side_packets=output_sequence_example=/tmp/mediapipe/output.tfrecord + --input_side_packets=input_sequence_example=/tmp/mediapipe/metadata.pb \ + --output_side_packets=output_sequence_example=/tmp/mediapipe/features.pb ``` + +6. [Optional] Read the features.pb in Python. + + ``` + import tensorflow as tf + + sequence_example = open('/tmp/mediapipe/features.pb', 'rb').read() + print(tf.train.SequenceExample.FromString(sequence_example)) + ``` + +## Model Inference for YouTube-8M Challenge + +MediaPipe can help you do model inference for YouTube-8M Challenge with both +local videos and the YouTube-8M dataset. To visualize +[the graph for local videos](https://github.com/google/mediapipe/tree/master/mediapipe/graphs/youtube8m/local_video_model_inference.pbtxt) +and +[the graph for the YouTube-8M dataset](https://github.com/google/mediapipe/tree/master/mediapipe/graphs/youtube8m/yt8m_dataset_model_inference.pbtxt), +copy the text specification of the graph and paste it into +[MediaPipe Visualizer](https://viz.mediapipe.dev/). We use the baseline model +[(model card)](https://drive.google.com/file/d/1xTCi9-Nm9dt2KIk8WR0dDFrIssWawyXy/view) +in our example. But, the model inference pipeline is highly customizable. You +are welcome to add new calculators or use your own machine learning models to do +the inference for both local videos and the dataset + +### Steps to run the YouTube-8M model inference graph with Web Interface + +1. Copy the baseline model + [(model card)](https://drive.google.com/file/d/1xTCi9-Nm9dt2KIk8WR0dDFrIssWawyXy/view) + to local. + + ```bash + curl -o /tmp/mediapipe/yt8m_baseline_saved_model.tar.gz data.yt8m.org/models/baseline/saved_model.tar.gz + + tar -xvf /tmp/mediapipe/yt8m_baseline_saved_model.tar.gz -C /tmp/mediapipe + ``` + +2. Build the inference binary. + + ```bash + bazel build -c opt --define='MEDIAPIPE_DISABLE_GPU=1' \ + mediapipe/examples/desktop/youtube8m:model_inference + ``` + +3. Run the python web server. + + Note: pip install absl-py + + ```bash + python mediapipe/examples/desktop/youtube8m/viewer/server.py --root `pwd` + ``` + + Navigate to localhost:8008 in a web browser. + [Here](https://drive.google.com/file/d/19GSvdAAuAlACpBhHOaqMWZ_9p8bLUYKh/view?usp=sharing) + is a demo video showing the steps to use this web application. Also please + read + [youtube8m/README.md](https://github.com/google/mediapipe/tree/master/mediapipe/examples/desktop/youtube8m/README.md) + if you prefer to run the underlying model_inference binary in command line. + +### Steps to run the YouTube-8M model inference graph with a local video + +1. Make sure you have the features.pb from the feature extraction pipeline. + +2. Copy the baseline model + [(model card)](https://drive.google.com/file/d/1xTCi9-Nm9dt2KIk8WR0dDFrIssWawyXy/view) + to local. + + ```bash + curl -o /tmp/mediapipe/yt8m_baseline_saved_model.tar.gz data.yt8m.org/models/baseline/saved_model.tar.gz + + tar -xvf /tmp/mediapipe/yt8m_baseline_saved_model.tar.gz -C /tmp/mediapipe + ``` + +3. Build and run the inference binary. + + ```bash + bazel build -c opt --define='MEDIAPIPE_DISABLE_GPU=1' \ + mediapipe/examples/desktop/youtube8m:model_inference + + # segment_size is the number of seconds window of frames. + # overlap is the number of seconds adjacent segments share. + GLOG_logtostderr=1 bazel-bin/mediapipe/examples/desktop/youtube8m/model_inference \ + --calculator_graph_config_file=mediapipe/graphs/youtube8m/local_video_model_inference.pbtxt \ + --input_side_packets=input_sequence_example_path=/tmp/mediapipe/features.pb,input_video_path=/absolute/path/to/the/local/video/file,output_video_path=/tmp/mediapipe/annotated_video.mp4,segment_size=5,overlap=4 + ``` + +4. View the annotated video. diff --git a/mediapipe/examples/android/src/java/com/google/mediapipe/apps/multihandtrackinggpu/AndroidManifest.xml b/mediapipe/examples/android/src/java/com/google/mediapipe/apps/multihandtrackinggpu/AndroidManifest.xml new file mode 100644 index 000000000..bece05179 --- /dev/null +++ b/mediapipe/examples/android/src/java/com/google/mediapipe/apps/multihandtrackinggpu/AndroidManifest.xml @@ -0,0 +1,33 @@ + + + + + + + + + + + + + + + + + + + + + + + diff --git a/mediapipe/examples/android/src/java/com/google/mediapipe/apps/multihandtrackinggpu/BUILD b/mediapipe/examples/android/src/java/com/google/mediapipe/apps/multihandtrackinggpu/BUILD new file mode 100644 index 000000000..61c2065dd --- /dev/null +++ b/mediapipe/examples/android/src/java/com/google/mediapipe/apps/multihandtrackinggpu/BUILD @@ -0,0 +1,103 @@ +# Copyright 2019 The MediaPipe Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +licenses(["notice"]) # Apache 2.0 + +package(default_visibility = ["//visibility:private"]) + +cc_binary( + name = "libmediapipe_jni.so", + linkshared = 1, + linkstatic = 1, + deps = [ + "//mediapipe/graphs/hand_tracking:multi_hand_mobile_calculators", + "//mediapipe/java/com/google/mediapipe/framework/jni:mediapipe_framework_jni", + ], +) + +cc_library( + name = "mediapipe_jni_lib", + srcs = [":libmediapipe_jni.so"], + alwayslink = 1, +) + +# Maps the binary graph to an alias (e.g., the app name) for convenience so that the alias can be +# easily incorporated into the app via, for example, +# MainActivity.BINARY_GRAPH_NAME = "appname.binarypb". +genrule( + name = "binary_graph", + srcs = ["//mediapipe/graphs/hand_tracking:multi_hand_tracking_mobile_gpu_binary_graph"], + outs = ["multihandtrackinggpu.binarypb"], + cmd = "cp $< $@", +) + +# To use the 3D model instead of the default 2D model, add "--define 3D=true" to the +# bazel build command. +config_setting( + name = "use_3d_model", + define_values = { + "3D": "true", + }, +) + +genrule( + name = "model", + srcs = select({ + "//conditions:default": ["//mediapipe/models:hand_landmark.tflite"], + ":use_3d_model": ["//mediapipe/models:hand_landmark_3d.tflite"], + }), + outs = ["hand_landmark.tflite"], + cmd = "cp $< $@", +) + +android_library( + name = "mediapipe_lib", + srcs = glob(["*.java"]), + assets = [ + ":binary_graph", + ":model", + "//mediapipe/models:palm_detection.tflite", + "//mediapipe/models:palm_detection_labelmap.txt", + ], + assets_dir = "", + manifest = "AndroidManifest.xml", + resource_files = glob(["res/**"]), + deps = [ + ":mediapipe_jni_lib", + "//mediapipe/java/com/google/mediapipe/components:android_camerax_helper", + "//mediapipe/java/com/google/mediapipe/components:android_components", + "//mediapipe/java/com/google/mediapipe/framework:android_framework", + "//mediapipe/java/com/google/mediapipe/glutil", + "//third_party:androidx_appcompat", + "//third_party:androidx_constraint_layout", + "//third_party:androidx_legacy_support_v4", + "//third_party:androidx_material", + "//third_party:androidx_recyclerview", + "//third_party:opencv", + "@androidx_concurrent_futures//jar", + "@androidx_lifecycle//jar", + "@com_google_code_findbugs//jar", + "@com_google_guava_android//jar", + ], +) + +android_binary( + name = "multihandtrackinggpu", + manifest = "AndroidManifest.xml", + manifest_values = {"applicationId": "com.google.mediapipe.apps.multihandtrackinggpu"}, + multidex = "native", + deps = [ + ":mediapipe_lib", + ], +) diff --git a/mediapipe/examples/android/src/java/com/google/mediapipe/apps/multihandtrackinggpu/MainActivity.java b/mediapipe/examples/android/src/java/com/google/mediapipe/apps/multihandtrackinggpu/MainActivity.java new file mode 100644 index 000000000..cef138546 --- /dev/null +++ b/mediapipe/examples/android/src/java/com/google/mediapipe/apps/multihandtrackinggpu/MainActivity.java @@ -0,0 +1,167 @@ +// Copyright 2019 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package com.google.mediapipe.apps.multihandtrackinggpu; + +import android.graphics.SurfaceTexture; +import android.os.Bundle; +import androidx.appcompat.app.AppCompatActivity; +import android.util.Size; +import android.view.SurfaceHolder; +import android.view.SurfaceView; +import android.view.View; +import android.view.ViewGroup; +import com.google.mediapipe.components.CameraHelper; +import com.google.mediapipe.components.CameraXPreviewHelper; +import com.google.mediapipe.components.ExternalTextureConverter; +import com.google.mediapipe.components.FrameProcessor; +import com.google.mediapipe.components.PermissionHelper; +import com.google.mediapipe.framework.AndroidAssetUtil; +import com.google.mediapipe.glutil.EglManager; + +/** Main activity of MediaPipe example apps. */ +public class MainActivity extends AppCompatActivity { + private static final String TAG = "MainActivity"; + + private static final String BINARY_GRAPH_NAME = "multihandtrackinggpu.binarypb"; + private static final String INPUT_VIDEO_STREAM_NAME = "input_video"; + private static final String OUTPUT_VIDEO_STREAM_NAME = "output_video"; + private static final CameraHelper.CameraFacing CAMERA_FACING = CameraHelper.CameraFacing.FRONT; + + // Flips the camera-preview frames vertically before sending them into FrameProcessor to be + // processed in a MediaPipe graph, and flips the processed frames back when they are displayed. + // This is needed because OpenGL represents images assuming the image origin is at the bottom-left + // corner, whereas MediaPipe in general assumes the image origin is at top-left. + private static final boolean FLIP_FRAMES_VERTICALLY = true; + + static { + // Load all native libraries needed by the app. + System.loadLibrary("mediapipe_jni"); + System.loadLibrary("opencv_java4"); + } + + // {@link SurfaceTexture} where the camera-preview frames can be accessed. + private SurfaceTexture previewFrameTexture; + // {@link SurfaceView} that displays the camera-preview frames processed by a MediaPipe graph. + private SurfaceView previewDisplayView; + + // Creates and manages an {@link EGLContext}. + private EglManager eglManager; + // Sends camera-preview frames into a MediaPipe graph for processing, and displays the processed + // frames onto a {@link Surface}. + private FrameProcessor processor; + // Converts the GL_TEXTURE_EXTERNAL_OES texture from Android camera into a regular texture to be + // consumed by {@link FrameProcessor} and the underlying MediaPipe graph. + private ExternalTextureConverter converter; + + // Handles camera access via the {@link CameraX} Jetpack support library. + private CameraXPreviewHelper cameraHelper; + + @Override + protected void onCreate(Bundle savedInstanceState) { + super.onCreate(savedInstanceState); + setContentView(R.layout.activity_main); + + previewDisplayView = new SurfaceView(this); + setupPreviewDisplayView(); + + // Initialize asset manager so that MediaPipe native libraries can access the app assets, e.g., + // binary graphs. + AndroidAssetUtil.initializeNativeAssetManager(this); + + eglManager = new EglManager(null); + processor = + new FrameProcessor( + this, + eglManager.getNativeContext(), + BINARY_GRAPH_NAME, + INPUT_VIDEO_STREAM_NAME, + OUTPUT_VIDEO_STREAM_NAME); + processor.getVideoSurfaceOutput().setFlipY(FLIP_FRAMES_VERTICALLY); + + PermissionHelper.checkAndRequestCameraPermissions(this); + } + + @Override + protected void onResume() { + super.onResume(); + converter = new ExternalTextureConverter(eglManager.getContext()); + converter.setFlipY(FLIP_FRAMES_VERTICALLY); + converter.setConsumer(processor); + if (PermissionHelper.cameraPermissionsGranted(this)) { + startCamera(); + } + } + + @Override + protected void onPause() { + super.onPause(); + converter.close(); + } + + @Override + public void onRequestPermissionsResult( + int requestCode, String[] permissions, int[] grantResults) { + super.onRequestPermissionsResult(requestCode, permissions, grantResults); + PermissionHelper.onRequestPermissionsResult(requestCode, permissions, grantResults); + } + + private void setupPreviewDisplayView() { + previewDisplayView.setVisibility(View.GONE); + ViewGroup viewGroup = findViewById(R.id.preview_display_layout); + viewGroup.addView(previewDisplayView); + + previewDisplayView + .getHolder() + .addCallback( + new SurfaceHolder.Callback() { + @Override + public void surfaceCreated(SurfaceHolder holder) { + processor.getVideoSurfaceOutput().setSurface(holder.getSurface()); + } + + @Override + public void surfaceChanged(SurfaceHolder holder, int format, int width, int height) { + // (Re-)Compute the ideal size of the camera-preview display (the area that the + // camera-preview frames get rendered onto, potentially with scaling and rotation) + // based on the size of the SurfaceView that contains the display. + Size viewSize = new Size(width, height); + Size displaySize = cameraHelper.computeDisplaySizeFromViewSize(viewSize); + + // Connect the converter to the camera-preview frames as its input (via + // previewFrameTexture), and configure the output width and height as the computed + // display size. + converter.setSurfaceTextureAndAttachToGLContext( + previewFrameTexture, displaySize.getWidth(), displaySize.getHeight()); + } + + @Override + public void surfaceDestroyed(SurfaceHolder holder) { + processor.getVideoSurfaceOutput().setSurface(null); + } + }); + } + + private void startCamera() { + cameraHelper = new CameraXPreviewHelper(); + cameraHelper.setOnCameraStartedListener( + surfaceTexture -> { + previewFrameTexture = surfaceTexture; + // Make the display view visible to start showing the preview. This triggers the + // SurfaceHolder.Callback added to (the holder of) previewDisplayView. + previewDisplayView.setVisibility(View.VISIBLE); + }); + cameraHelper.startCamera(this, CAMERA_FACING, /*surfaceTexture=*/ null); + } +} diff --git a/mediapipe/examples/android/src/java/com/google/mediapipe/apps/multihandtrackinggpu/res/layout/activity_main.xml b/mediapipe/examples/android/src/java/com/google/mediapipe/apps/multihandtrackinggpu/res/layout/activity_main.xml new file mode 100644 index 000000000..c19d7e628 --- /dev/null +++ b/mediapipe/examples/android/src/java/com/google/mediapipe/apps/multihandtrackinggpu/res/layout/activity_main.xml @@ -0,0 +1,20 @@ + + + + + + + diff --git a/mediapipe/examples/android/src/java/com/google/mediapipe/apps/multihandtrackinggpu/res/values/colors.xml b/mediapipe/examples/android/src/java/com/google/mediapipe/apps/multihandtrackinggpu/res/values/colors.xml new file mode 100644 index 000000000..69b22338c --- /dev/null +++ b/mediapipe/examples/android/src/java/com/google/mediapipe/apps/multihandtrackinggpu/res/values/colors.xml @@ -0,0 +1,6 @@ + + + #008577 + #00574B + #D81B60 + diff --git a/mediapipe/examples/android/src/java/com/google/mediapipe/apps/multihandtrackinggpu/res/values/strings.xml b/mediapipe/examples/android/src/java/com/google/mediapipe/apps/multihandtrackinggpu/res/values/strings.xml new file mode 100644 index 000000000..b03b7c663 --- /dev/null +++ b/mediapipe/examples/android/src/java/com/google/mediapipe/apps/multihandtrackinggpu/res/values/strings.xml @@ -0,0 +1,4 @@ + + Multi-Hand Tracking GPU + Please grant camera permissions. + diff --git a/mediapipe/examples/android/src/java/com/google/mediapipe/apps/multihandtrackinggpu/res/values/styles.xml b/mediapipe/examples/android/src/java/com/google/mediapipe/apps/multihandtrackinggpu/res/values/styles.xml new file mode 100644 index 000000000..5885930df --- /dev/null +++ b/mediapipe/examples/android/src/java/com/google/mediapipe/apps/multihandtrackinggpu/res/values/styles.xml @@ -0,0 +1,11 @@ + + + + + + diff --git a/mediapipe/examples/desktop/BUILD b/mediapipe/examples/desktop/BUILD index 3a35d724b..f579c49e5 100644 --- a/mediapipe/examples/desktop/BUILD +++ b/mediapipe/examples/desktop/BUILD @@ -27,7 +27,9 @@ cc_library( "//mediapipe/framework/port:file_helpers", "//mediapipe/framework/port:map_util", "//mediapipe/framework/port:parse_text_proto", + "//mediapipe/framework/port:ret_check", "//mediapipe/framework/port:status", + "//mediapipe/framework/port:statusor", "@com_google_absl//absl/strings", ], ) diff --git a/mediapipe/examples/desktop/media_sequence/kinetics_dataset.py b/mediapipe/examples/desktop/media_sequence/kinetics_dataset.py index 83500a6f4..1d7f5b69e 100644 --- a/mediapipe/examples/desktop/media_sequence/kinetics_dataset.py +++ b/mediapipe/examples/desktop/media_sequence/kinetics_dataset.py @@ -68,13 +68,15 @@ import os import random import subprocess import sys +import tarfile import tempfile import urllib -import zipfile + from absl import app from absl import flags from absl import logging import tensorflow as tf + from mediapipe.util.sequence import media_sequence as ms CITATION = r"""@article{kay2017kinetics, @@ -84,21 +86,28 @@ CITATION = r"""@article{kay2017kinetics, year={2017}, url = {https://deepmind.com/research/open-source/kinetics}, }""" -ANNOTATION_URL = "https://storage.googleapis.com/deepmind-media/research/Kinetics_700.zip" +ANNOTATION_URL = "https://storage.googleapis.com/deepmind-media/Datasets/kinetics700.tar.gz" SECONDS_TO_MICROSECONDS = 1000000 GRAPHS = ["tvl1_flow_and_rgb_from_file.pbtxt"] FILEPATTERN = "kinetics_700_%s_25fps_rgb_flow" SPLITS = { "train": { "shards": 1000, - "examples": 545317}, - "val": {"shards": 100, - "examples": 35000}, - "test": {"shards": 100, - "examples": 70000}, - "custom": {"csv": None, # Add a CSV for your own data here. - "shards": 1, # Change this number to increase sharding. - "examples": -1}, # Negative 1 allows any number of examples. + "examples": 541632 + }, + "validate": { + "shards": 100, + "examples": 34727 + }, + "test": { + "shards": 100, + "examples": 69347 + }, + "custom": { + "csv": None, # Add a CSV for your own data here. + "shards": 1, # Change this number to increase sharding. + "examples": -1 + }, # Negative 1 allows any number of examples. } NUM_CLASSES = 700 @@ -312,18 +321,16 @@ class Kinetics(object): logging.info("Downloading annotations.") paths = {} if download_labels_for_map: - zip_path = os.path.join(self.path_to_data, ANNOTATION_URL.split("/")[-1]) - if not tf.io.gfile.exists(zip_path): - urlretrieve(ANNOTATION_URL, zip_path) - with zipfile.ZipFile(zip_path) as annotations_zip: - annotations_zip.extractall(self.path_to_data) - for split in ["train", "test", "val"]: - zip_path = os.path.join(self.path_to_data, - "kinetics_700_%s.zip" % split) - csv_path = zip_path.replace(".zip", ".csv") + tar_path = os.path.join(self.path_to_data, ANNOTATION_URL.split("/")[-1]) + if not tf.io.gfile.exists(tar_path): + urlretrieve(ANNOTATION_URL, tar_path) + with tarfile.open(tar_path) as annotations_tar: + annotations_tar.extractall(self.path_to_data) + for split in ["train", "test", "validate"]: + csv_path = os.path.join(self.path_to_data, "kinetics700/%s.csv" % split) if not tf.io.gfile.exists(csv_path): - with zipfile.ZipFile(zip_path) as annotations_zip: - annotations_zip.extractall(self.path_to_data) + with tarfile.open(tar_path) as annotations_tar: + annotations_tar.extractall(self.path_to_data) paths[split] = csv_path for split, contents in SPLITS.items(): if "csv" in contents and contents["csv"]: diff --git a/mediapipe/examples/desktop/multi_hand_tracking/BUILD b/mediapipe/examples/desktop/multi_hand_tracking/BUILD new file mode 100644 index 000000000..f83133545 --- /dev/null +++ b/mediapipe/examples/desktop/multi_hand_tracking/BUILD @@ -0,0 +1,42 @@ +# Copyright 2019 The MediaPipe Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +licenses(["notice"]) # Apache 2.0 + +package(default_visibility = ["//mediapipe/examples:__subpackages__"]) + +cc_binary( + name = "multi_hand_tracking_tflite", + deps = [ + "//mediapipe/examples/desktop:simple_run_graph_main", + "//mediapipe/graphs/hand_tracking:multi_hand_desktop_tflite_calculators", + ], +) + +cc_binary( + name = "multi_hand_tracking_cpu", + deps = [ + "//mediapipe/examples/desktop:demo_run_graph_main", + "//mediapipe/graphs/hand_tracking:multi_hand_desktop_tflite_calculators", + ], +) + +# Linux only +cc_binary( + name = "multi_hand_tracking_gpu", + deps = [ + "//mediapipe/examples/desktop:demo_run_graph_main_gpu", + "//mediapipe/graphs/hand_tracking:multi_hand_mobile_calculators", + ], +) diff --git a/mediapipe/examples/desktop/object_detection/BUILD b/mediapipe/examples/desktop/object_detection/BUILD index ee6832069..66b6d5698 100644 --- a/mediapipe/examples/desktop/object_detection/BUILD +++ b/mediapipe/examples/desktop/object_detection/BUILD @@ -16,51 +16,12 @@ licenses(["notice"]) # Apache 2.0 package(default_visibility = ["//mediapipe/examples:__subpackages__"]) -cc_library( - name = "object_detection_tensorflow_deps", - deps = [ - "@org_tensorflow//tensorflow/c/kernels:bitcast_op", - "@org_tensorflow//tensorflow/core:direct_session", - "@org_tensorflow//tensorflow/core/kernels:argmax_op", - "@org_tensorflow//tensorflow/core/kernels:bias_op", - "@org_tensorflow//tensorflow/core/kernels:cast_op", - "@org_tensorflow//tensorflow/core/kernels:concat_op", - "@org_tensorflow//tensorflow/core/kernels:constant_op", - "@org_tensorflow//tensorflow/core/kernels:control_flow_ops", - "@org_tensorflow//tensorflow/core/kernels:conv_ops", - "@org_tensorflow//tensorflow/core/kernels:cwise_op", - "@org_tensorflow//tensorflow/core/kernels:depthwise_conv_op", - "@org_tensorflow//tensorflow/core/kernels:fused_batch_norm_op", - "@org_tensorflow//tensorflow/core/kernels:gather_op", - "@org_tensorflow//tensorflow/core/kernels:identity_op", - "@org_tensorflow//tensorflow/core/kernels:logging_ops", - "@org_tensorflow//tensorflow/core/kernels:matmul_op", - "@org_tensorflow//tensorflow/core/kernels:non_max_suppression_op", - "@org_tensorflow//tensorflow/core/kernels:pack_op", - "@org_tensorflow//tensorflow/core/kernels:reduction_ops", - "@org_tensorflow//tensorflow/core/kernels:relu_op", - "@org_tensorflow//tensorflow/core/kernels:reshape_op", - "@org_tensorflow//tensorflow/core/kernels:resize_bilinear_op", - "@org_tensorflow//tensorflow/core/kernels:sequence_ops", - "@org_tensorflow//tensorflow/core/kernels:shape_ops", - "@org_tensorflow//tensorflow/core/kernels:slice_op", - "@org_tensorflow//tensorflow/core/kernels:split_op", - "@org_tensorflow//tensorflow/core/kernels:tensor_array_ops", - "@org_tensorflow//tensorflow/core/kernels:tile_ops", - "@org_tensorflow//tensorflow/core/kernels:topk_op", - "@org_tensorflow//tensorflow/core/kernels:transpose_op", - "@org_tensorflow//tensorflow/core/kernels:unpack_op", - "@org_tensorflow//tensorflow/core/kernels/data:tensor_dataset_op", - ], - alwayslink = 1, -) - cc_binary( name = "object_detection_tensorflow", deps = [ - ":object_detection_tensorflow_deps", "//mediapipe/examples/desktop:simple_run_graph_main", "//mediapipe/graphs/object_detection:desktop_tensorflow_calculators", + "@org_tensorflow//tensorflow/core:all_kernels", "@org_tensorflow//tensorflow/core:direct_session", ], ) diff --git a/mediapipe/examples/desktop/simple_run_graph_main.cc b/mediapipe/examples/desktop/simple_run_graph_main.cc index c912837f8..ee54bf231 100644 --- a/mediapipe/examples/desktop/simple_run_graph_main.cc +++ b/mediapipe/examples/desktop/simple_run_graph_main.cc @@ -13,14 +13,23 @@ // limitations under the License. // // A simple main function to run a MediaPipe graph. +#include +#include +#include +#include +#include +#include "absl/strings/str_cat.h" #include "absl/strings/str_split.h" +#include "absl/strings/string_view.h" #include "mediapipe/framework/calculator_framework.h" #include "mediapipe/framework/port/commandlineflags.h" #include "mediapipe/framework/port/file_helpers.h" #include "mediapipe/framework/port/map_util.h" #include "mediapipe/framework/port/parse_text_proto.h" +#include "mediapipe/framework/port/ret_check.h" #include "mediapipe/framework/port/status.h" +#include "mediapipe/framework/port/statusor.h" DEFINE_string( calculator_graph_config_file, "", @@ -31,14 +40,72 @@ DEFINE_string(input_side_packets, "", "for the CalculatorGraph. All values will be treated as the " "string type even if they represent doubles, floats, etc."); +// Local file output flags. +// Output stream +DEFINE_string(output_stream, "", + "The output stream to output to the local file in csv format."); +DEFINE_string(output_stream_file, "", + "The name of the local file to output all packets sent to " + "the stream specified with --output_stream. "); +DEFINE_bool(strip_timestamps, false, + "If true, only the packet contents (without timestamps) will be " + "written into the local file."); +// Output side packets +DEFINE_string(output_side_packets, "", + "A CSV of output side packets to output to local file."); +DEFINE_string(output_side_packets_file, "", + "The name of the local file to output all side packets specified " + "with --output_side_packets. "); + +::mediapipe::Status OutputStreamToLocalFile( + ::mediapipe::OutputStreamPoller& poller) { + std::ofstream file; + file.open(FLAGS_output_stream_file); + ::mediapipe::Packet packet; + while (poller.Next(&packet)) { + std::string output_data; + if (!FLAGS_strip_timestamps) { + absl::StrAppend(&output_data, packet.Timestamp().Value(), ","); + } + absl::StrAppend(&output_data, packet.Get(), "\n"); + file << output_data; + } + file.close(); + return ::mediapipe::OkStatus(); +} + +::mediapipe::Status OutputSidePacketsToLocalFile( + ::mediapipe::CalculatorGraph& graph) { + if (!FLAGS_output_side_packets.empty() && + !FLAGS_output_side_packets_file.empty()) { + std::ofstream file; + file.open(FLAGS_output_side_packets_file); + std::vector side_packet_names = + absl::StrSplit(FLAGS_output_side_packets, ','); + for (const std::string& side_packet_name : side_packet_names) { + ASSIGN_OR_RETURN(auto status_or_packet, + graph.GetOutputSidePacket(side_packet_name)); + file << absl::StrCat(side_packet_name, ":", + status_or_packet.Get(), "\n"); + } + file.close(); + } else { + RET_CHECK(FLAGS_output_side_packets.empty() && + FLAGS_output_side_packets_file.empty()) + << "--output_side_packets and --output_side_packets_file should be " + "specified in pair."; + } + return ::mediapipe::OkStatus(); +} + ::mediapipe::Status RunMPPGraph() { std::string calculator_graph_config_contents; - MP_RETURN_IF_ERROR(mediapipe::file::GetContents( + MP_RETURN_IF_ERROR(::mediapipe::file::GetContents( FLAGS_calculator_graph_config_file, &calculator_graph_config_contents)); LOG(INFO) << "Get calculator graph config contents: " << calculator_graph_config_contents; - mediapipe::CalculatorGraphConfig config = - mediapipe::ParseTextProtoOrDie( + ::mediapipe::CalculatorGraphConfig config = + ::mediapipe::ParseTextProtoOrDie<::mediapipe::CalculatorGraphConfig>( calculator_graph_config_contents); std::map input_side_packets; std::vector kv_pairs = @@ -51,10 +118,23 @@ DEFINE_string(input_side_packets, "", ::mediapipe::MakePacket(name_and_value[1]); } LOG(INFO) << "Initialize the calculator graph."; - mediapipe::CalculatorGraph graph; + ::mediapipe::CalculatorGraph graph; MP_RETURN_IF_ERROR(graph.Initialize(config, input_side_packets)); - LOG(INFO) << "Start running the calculator graph."; - return graph.Run(); + if (!FLAGS_output_stream.empty() && !FLAGS_output_stream_file.empty()) { + ASSIGN_OR_RETURN(auto poller, + graph.AddOutputStreamPoller(FLAGS_output_stream)); + LOG(INFO) << "Start running the calculator graph."; + MP_RETURN_IF_ERROR(graph.StartRun({})); + MP_RETURN_IF_ERROR(OutputStreamToLocalFile(poller)); + } else { + RET_CHECK(FLAGS_output_stream.empty() && FLAGS_output_stream_file.empty()) + << "--output_stream and --output_stream_file should be specified in " + "pair."; + LOG(INFO) << "Start running the calculator graph."; + MP_RETURN_IF_ERROR(graph.StartRun({})); + } + MP_RETURN_IF_ERROR(graph.WaitUntilDone()); + return OutputSidePacketsToLocalFile(graph); } int main(int argc, char** argv) { diff --git a/mediapipe/examples/desktop/youtube8m/BUILD b/mediapipe/examples/desktop/youtube8m/BUILD index c25c5f50d..16b868bdc 100644 --- a/mediapipe/examples/desktop/youtube8m/BUILD +++ b/mediapipe/examples/desktop/youtube8m/BUILD @@ -33,3 +33,14 @@ cc_binary( "@org_tensorflow//tensorflow/core:direct_session", ], ) + +cc_binary( + name = "model_inference", + deps = [ + "//mediapipe/examples/desktop:simple_run_graph_main", + "//mediapipe/graphs/youtube8m:yt8m_inference_calculators_deps", + # TODO: Figure out the minimum set of the kernels needed by this example. + "@org_tensorflow//tensorflow/core:all_kernels", + "@org_tensorflow//tensorflow/core:direct_session", + ], +) diff --git a/mediapipe/examples/desktop/youtube8m/README.md b/mediapipe/examples/desktop/youtube8m/README.md index 2989a7927..6668c0612 100644 --- a/mediapipe/examples/desktop/youtube8m/README.md +++ b/mediapipe/examples/desktop/youtube8m/README.md @@ -1,13 +1,13 @@ ### Steps to run the YouTube-8M feature extraction graph -1. Checkout the mediapipe repository +1. Checkout the mediapipe repository. ```bash git clone https://github.com/google/mediapipe.git cd mediapipe ``` -2. Download the PCA and model data +2. Download the PCA and model data. ```bash mkdir /tmp/mediapipe @@ -20,7 +20,7 @@ tar -xvf /tmp/mediapipe/inception-2015-12-05.tgz ``` -3. Get the VGGish frozen graph +3. Get the VGGish frozen graph. Note: To run step 3 and step 4, you must have Python 2.7 or 3.5+ installed with the TensorFlow 1.14+ package installed. @@ -31,26 +31,123 @@ python -m mediapipe.examples.desktop.youtube8m.generate_vggish_frozen_graph ``` -4. Generate a MediaSequence metadata from the input video +4. Generate a MediaSequence metadata from the input video. - Note: the output file is /tmp/mediapipe/metadata.tfrecord + Note: the output file is /tmp/mediapipe/metadata.pb ```bash + # change clip_end_time_sec to match the length of your video. python -m mediapipe.examples.desktop.youtube8m.generate_input_sequence_example \ --path_to_input_video=/absolute/path/to/the/local/video/file \ - --clip_start_time_sec=0 \ - --clip_end_time_sec=10 + --clip_end_time_sec=120 ``` -5. Run the MediaPipe binary to extract the features +5. Run the MediaPipe binary to extract the features. ```bash bazel build -c opt \ --define MEDIAPIPE_DISABLE_GPU=1 --define no_aws_support=true \ mediapipe/examples/desktop/youtube8m:extract_yt8m_features - ./bazel-bin/mediapipe/examples/desktop/youtube8m/extract_yt8m_features \ + GLOG_logtostderr=1 bazel-bin/mediapipe/examples/desktop/youtube8m/extract_yt8m_features \ --calculator_graph_config_file=mediapipe/graphs/youtube8m/feature_extraction.pbtxt \ - --input_side_packets=input_sequence_example=/tmp/mediapipe/metadata.tfrecord \ - --output_side_packets=output_sequence_example=/tmp/mediapipe/output.tfrecord + --input_side_packets=input_sequence_example=/tmp/mediapipe/metadata.pb \ + --output_side_packets=output_sequence_example=/tmp/mediapipe/features.pb ``` + +6. [Optional] Read the features.pb in Python. + + ``` + import tensorflow as tf + + sequence_example = open('/tmp/mediapipe/features.pb', 'rb').read() + print(tf.train.SequenceExample.FromString(sequence_example)) + ``` + +### Steps to run the YouTube-8M inference graph with the YT8M dataset + +1. Download the YT8M dataset + + For example, download one shard of the training data: + + ```bash + curl http://us.data.yt8m.org/2/frame/train/trainpj.tfrecord --output /tmp/mediapipe/trainpj.tfrecord + ``` + +2. Copy the baseline model [(model card)](https://drive.google.com/file/d/1xTCi9-Nm9dt2KIk8WR0dDFrIssWawyXy/view) to local. + + ```bash + curl -o /tmp/mediapipe/yt8m_baseline_saved_model.tar.gz data.yt8m.org/models/baseline/saved_model.tar.gz + + tar -xvf /tmp/mediapipe/yt8m_baseline_saved_model.tar.gz -C /tmp/mediapipe + ``` + +3. Build and run the inference binary. + + ```bash + bazel build -c opt --define='MEDIAPIPE_DISABLE_GPU=1' \ + mediapipe/examples/desktop/youtube8m:model_inference + + GLOG_logtostderr=1 bazel-bin/mediapipe/examples/desktop/youtube8m/model_inference \ + --calculator_graph_config_file=mediapipe/graphs/youtube8m/yt8m_dataset_model_inference.pbtxt \ + --input_side_packets=tfrecord_path=/tmp/mediapipe/trainpj.tfrecord,record_index=0,desired_segment_size=5 \ + --output_stream=annotation_summary \ + --output_stream_file=/tmp/summary \ + --output_side_packets=yt8m_id \ + --output_side_packets_file=/tmp/yt8m_id + ``` + +### Steps to run the YouTube-8M model inference graph with Web Interface + +1. Copy the baseline model [(model card)](https://drive.google.com/file/d/1xTCi9-Nm9dt2KIk8WR0dDFrIssWawyXy/view) to local. + + + ```bash + curl -o /tmp/mediapipe/yt8m_baseline_saved_model.tar.gz data.yt8m.org/models/baseline/saved_model.tar.gz + + tar -xvf /tmp/mediapipe/yt8m_baseline_saved_model.tar.gz -C /tmp/mediapipe + ``` + +2. Build the inference binary. + + ```bash + bazel build -c opt --define='MEDIAPIPE_DISABLE_GPU=1' \ + mediapipe/examples/desktop/youtube8m:model_inference + ``` + +3. Run the python web server. + + Note: pip install absl-py + + ```bash + python mediapipe/examples/desktop/youtube8m/viewer/server.py --root `pwd` + ``` + + Navigate to localhost:8008 in a web browser. + +### Steps to run the YouTube-8M model inference graph with a local video + +1. Make sure you have the features.pb from the feature extraction pipeline. + +2. Copy the baseline model [(model card)](https://drive.google.com/file/d/1xTCi9-Nm9dt2KIk8WR0dDFrIssWawyXy/view) to local. + + ```bash + curl -o /tmp/mediapipe/yt8m_baseline_saved_model.tar.gz data.yt8m.org/models/baseline/saved_model.tar.gz + + tar -xvf /tmp/mediapipe/yt8m_baseline_saved_model.tar.gz -C /tmp/mediapipe + ``` + +3. Build and run the inference binary. + + ```bash + bazel build -c opt --define='MEDIAPIPE_DISABLE_GPU=1' \ + mediapipe/examples/desktop/youtube8m:model_inference + + # segment_size is the number of seconds window of frames. + # overlap is the number of seconds adjacent segments share. + GLOG_logtostderr=1 bazel-bin/mediapipe/examples/desktop/youtube8m/model_inference \ + --calculator_graph_config_file=mediapipe/graphs/youtube8m/local_video_model_inference.pbtxt \ + --input_side_packets=input_sequence_example_path=/tmp/mediapipe/features.pb,input_video_path=/absolute/path/to/the/local/video/file,output_video_path=/tmp/mediapipe/annotated_video.mp4,segment_size=5,overlap=4 + ``` + +4. View the annotated video. diff --git a/mediapipe/examples/desktop/youtube8m/generate_input_sequence_example.py b/mediapipe/examples/desktop/youtube8m/generate_input_sequence_example.py index 7438a5134..3a6b98181 100644 --- a/mediapipe/examples/desktop/youtube8m/generate_input_sequence_example.py +++ b/mediapipe/examples/desktop/youtube8m/generate_input_sequence_example.py @@ -53,7 +53,7 @@ def main(argv): flags.FLAGS.clip_start_time_sec * SECONDS_TO_MICROSECONDS, metadata) ms.set_clip_end_timestamp( flags.FLAGS.clip_end_time_sec * SECONDS_TO_MICROSECONDS, metadata) - with open('/tmp/mediapipe/metadata.tfrecord', 'wb') as writer: + with open('/tmp/mediapipe/metadata.pb', 'wb') as writer: writer.write(metadata.SerializeToString()) diff --git a/mediapipe/examples/desktop/youtube8m/viewer/server.py b/mediapipe/examples/desktop/youtube8m/viewer/server.py new file mode 100644 index 000000000..febaad53d --- /dev/null +++ b/mediapipe/examples/desktop/youtube8m/viewer/server.py @@ -0,0 +1,262 @@ +"""Server for YouTube8M Model Inference Demo. + +Serves up both the static files for the website and provides a service that +fetches the video id and timestamp based labels for a video analyzed in a +tfrecord files. + +""" +from __future__ import print_function +import json +import os +import re +import socket +import subprocess +import sys + +from absl import app +from absl import flags +import http.client +import http.server +from six.moves.urllib import parse + +FLAGS = flags.FLAGS +flags.DEFINE_bool("show_label_at_center", False, + "Show labels at the center of the segment.") +flags.DEFINE_integer("port", 8008, "Port that the API is served over.") +flags.DEFINE_string("tmp_dir", "/tmp/mediapipe", + "Temporary asset storage location.") +flags.DEFINE_string("root", "", "MediaPipe root directory.") +# binary, pbtxt, label_map paths are relative to 'root' path +flags.DEFINE_string( + "binary", + "bazel-bin/mediapipe/examples/desktop/youtube8m/model_inference", + "Inference binary location.") +flags.DEFINE_string( + "pbtxt", + "mediapipe/graphs/youtube8m/yt8m_dataset_model_inference.pbtxt", + "Default pbtxt graph file.") +flags.DEFINE_string("label_map", "mediapipe/graphs/youtube8m/label_map.txt", + "Default label map text file.") + + +class HTTPServerV6(http.server.HTTPServer): + address_family = socket.AF_INET6 + + +class Youtube8MRequestHandler(http.server.SimpleHTTPRequestHandler): + """Static file server with /healthz support.""" + + def do_GET(self): + if self.path.startswith("/healthz"): + self.send_response(200) + self.send_header("Content-type", "text/plain") + self.send_header("Content-length", 2) + self.end_headers() + self.wfile.write("ok") + if self.path.startswith("/video"): + parsed_params = parse.urlparse(self.path) + url_params = parse.parse_qs(parsed_params.query) + + tfrecord_path = "" + segment_size = 5 + + print(url_params) + if "file" in url_params: + tfrecord_path = url_params["file"][0] + if "segments" in url_params: + segment_size = int(url_params["segments"][0]) + + self.fetch(tfrecord_path, segment_size) + + else: + if self.path == "/": + self.path = "/index.html" + # Default to serve up a local file + self.path = "/static" + self.path + http.server.SimpleHTTPRequestHandler.do_GET(self) + + def report_error(self, msg): + """Simplifies sending out a string as a 500 http response.""" + self.send_response(500) + self.send_header("Content-type", "text/plain") + self.end_headers() + if sys.version_info[0] < 3: + self.wfile.write(str(msg).encode("utf-8")) + else: + self.wfile.write(bytes(msg, "utf-8")) + + def report_missing_files(self, files): + """Sends out 500 response with missing files.""" + accumulate = "" + for file_path in files: + if not os.path.exists(file_path): + accumulate = "%s '%s'" % (accumulate, file_path) + + if accumulate: + self.report_error("Could not find:%s" % accumulate) + return True + + return False + + def fetch(self, path, segment_size): + """Returns the video id and labels for a tfrecord at a provided index.""" + + print("Received request. File=", path, "Segment Size =", segment_size) + + if (self.report_missing_files([ + "%s/%s" % (FLAGS.root, FLAGS.pbtxt), + "%s/%s" % (FLAGS.root, FLAGS.binary), + "%s/%s" % (FLAGS.root, FLAGS.label_map) + ])): + return + + # Parse the youtube video id off the end of the link or as a standalone id. + filename_match = re.match( + "(?:.*youtube.*v=)?([a-zA-Z-0-9_]{2})([a-zA-Z-0-9_]+)", path) + tfrecord_url = filename_match.expand(r"data.yt8m.org/2/j/r/\1/\1\2.js") + + print("Trying to get tfrecord via", tfrecord_url) + + connection = http.client.HTTPConnection("data.yt8m.org") + connection.request("GET", tfrecord_url) + response = connection.getresponse() + + response_object = json.loads(response.read()) + filename = response_object["filename_raw"] + index = response_object["index"] + + print("TFRecord discovered: ", filename, ", index", index) + + output_file = r"%s/%s" % (FLAGS.tmp_dir, filename) + tfrecord_url = r"http://us.data.yt8m.org/2/frame/train/%s" % filename + + connection = http.client.HTTPConnection("us.data.yt8m.org") + connection.request("HEAD", + filename_match.expand(r"/2/frame/train/%s" % filename)) + response = connection.getresponse() + if response.getheader("Content-Type") != "application/octet-stream": + self.report_error("Filename '%s' is invalid." % path) + + print(output_file, "exists on yt8m.org. Did we fetch this before?") + + if not os.path.exists(output_file): + print(output_file, "doesn't exist locally, download it now.") + return_code = subprocess.call( + ["curl", "--output", output_file, tfrecord_url], + stdout=subprocess.PIPE, + stderr=subprocess.PIPE) + if return_code: + self.report_error("Could not retrieve contents from %s" % tfrecord_url) + return + else: + print(output_file, "exist locally, reuse it.") + + print("Run the graph...") + process = subprocess.Popen([ + "%s/%s" % (FLAGS.root, FLAGS.binary), + "--calculator_graph_config_file=%s/%s" % (FLAGS.root, FLAGS.pbtxt), + "--input_side_packets=tfrecord_path=%s" % output_file + + ",record_index=%d" % index + ",desired_segment_size=%d" % segment_size, + "--output_stream=annotation_summary", + "--output_stream_file=%s/labels" % FLAGS.tmp_dir, + "--output_side_packets=yt8m_id", + "--output_side_packets_file=%s/yt8m_id" % FLAGS.tmp_dir + ], + stdout=subprocess.PIPE, + stderr=subprocess.PIPE) + stdout_str, stderr_str = process.communicate() + process.wait() + + if stderr_str and "success" not in str(stderr_str).lower(): + self.report_error("Error executing server binary: \n%s" % stderr_str) + return + + f = open("%s/yt8m_id" % FLAGS.tmp_dir, "r") + contents = f.read() + print("yt8m_id is", contents[-5:-1]) + + curl_arg = "data.yt8m.org/2/j/i/%s/%s.js" % (contents[-5:-3], + contents[-5:-1]) + print("Grab labels from", curl_arg) + process = subprocess.Popen(["curl", curl_arg], + stdout=subprocess.PIPE, + stderr=subprocess.PIPE) + stdout = process.communicate() + process.wait() + + stdout_str = stdout[0].decode("utf-8") + + match = re.match(""".+"([^"]+)"[^"]+""", stdout_str) + final_results = { + "video_id": match.group(1), + "link": "https://www.youtube.com/watch?v=%s" % match.group(1), + "entries": [] + } + f = open("%s/labels" % FLAGS.tmp_dir, "r") + lines = f.readlines() + show_at_center = FLAGS.show_label_at_center + + print("%s/labels" % FLAGS.tmp_dir, "holds", len(lines), "entries") + for line in lines: + entry = {"labels": []} + final_results["entries"].append(entry) + first = True + for column in line.split(","): + if first: + subtract = segment_size / 2.0 if show_at_center else 0.0 + entry["time"] = float(int(column)) / 1000000.0 - subtract + first = False + else: + label_score = re.match("(.+):([0-9.]+).*", column) + if label_score: + score = float(label_score.group(2)) + entry["labels"].append({ + "label": label_score.group(1), + "score": score + }) + else: + print("empty score") + + response_json = json.dumps(final_results, indent=2, separators=(",", ": ")) + self.send_response(200) + self.send_header("Content-type", "application/json") + self.end_headers() + if sys.version_info[0] < 3: + self.wfile.write(str(response_json).encode("utf-8")) + else: + self.wfile.write(bytes(response_json, "utf-8")) + + +def update_pbtxt(): + """Update graph.pbtxt to use full path to label_map.txt.""" + edited_line = "" + lines = [] + with open("%s/%s" % (FLAGS.root, FLAGS.pbtxt), "r") as f: + lines = f.readlines() + for line in lines: + if "label_map_path" in line: + kv = line.split(":") + edited_line = kv[0] + (": \"%s/%s\"\n" % (FLAGS.root, FLAGS.label_map)) + with open("%s/%s" % (FLAGS.root, FLAGS.pbtxt), "w") as f: + for line in lines: + if "label_map_path" in line: + f.write(edited_line) + else: + f.write(line) + + +def main(unused_args): + dname = os.path.dirname(os.path.abspath(__file__)) + os.chdir(dname) + if not FLAGS.root: + print("Must specify MediaPipe root directory: --root `pwd`") + return + update_pbtxt() + port = FLAGS.port + print("Listening on port %s" % port) # pylint: disable=superfluous-parens + server = HTTPServerV6(("::", int(port)), Youtube8MRequestHandler) + server.serve_forever() + + +if __name__ == "__main__": + app.run(main) diff --git a/mediapipe/examples/desktop/youtube8m/viewer/static/index.html b/mediapipe/examples/desktop/youtube8m/viewer/static/index.html new file mode 100644 index 000000000..400aa0af0 --- /dev/null +++ b/mediapipe/examples/desktop/youtube8m/viewer/static/index.html @@ -0,0 +1,96 @@ + + + + MediaPipe: YouTube8M Model Inference Demo + + + + + + + + +
+

+ MediaPipe: YouTube8M Model Inference Demo +

+
+
+
+
+
+ +
+
+ +
+
+
+
+ +
+
+ + + + e.g., Both "https://youtube.com/watch?v=huGVGe3Afng" or "huGVGe3Afng" will work. + +
+
+ + +
+ + +
+
+ +
+
+
+ + +
+
+ Labels +
+ +
+
+
+
+
+ + + + + + + + diff --git a/mediapipe/examples/desktop/youtube8m/viewer/static/main.js b/mediapipe/examples/desktop/youtube8m/viewer/static/main.js new file mode 100644 index 000000000..ad66e67ea --- /dev/null +++ b/mediapipe/examples/desktop/youtube8m/viewer/static/main.js @@ -0,0 +1,217 @@ +/** + * @license + * Copyright 2019 The MediaPipe Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +const STATE_PLAYER=0; +const STATE_COVER=1; +const STATE_SPINNER=2; + +/** +* Looks up the value of a url parameter. +* +* @param {string} param The name of the parameter. +* @return {?string} The parameter value or null if there is no such parameter. +*/ +var getUrlParameter = function(param) { + const url = decodeURIComponent(window.location.search.substring(1)); + const url_parts = url.split('&'); + for (var i = 0; i < url_parts.length; i++) { + const param_name = url_parts[i].split(/=(.*)/); + if (param_name[0] === param) { + return param_name[1] === undefined ? null : param_name[1]; + } + } +}; + +/** +* Sets the fields in the form to match the values of the URL parameters. +*/ +const updateFormFromURL = function() { + const form_elements = document.getElementById('form').elements; + const url = decodeURIComponent(window.location.search.substring(1)); + const url_parts = url.split('&'); + for (var i = 0; i < url_parts.length; i++) { + const p = url_parts[i].split(/=(.*)/); + if (p.length >= 2) { + if (form_elements[p[0]]) { + form_elements[p[0]].value = decodeURIComponent(p[1]); + } + } + } +}; + +let player = null; +let intervalID = undefined; +let entries = []; + +/** + * Constructs the embedded YouTube player. + */ +window.onYouTubeIframeAPIReady = () => { + player = new YT.Player('ytplayer', { + events: { + 'onReady': onPlayerReady, + 'onStateChange': onStateChange + } + }); +}; + + +/** + * Listens for YouTube video events. When video is playing, periodically checks + * the time signature and updates the feedback with labels. When video stops, + * shuts off interval timer to save cycles. + * @param {!Event} event YouTube API Event. + */ +function onStateChange(event) { + if (event.data === 1) { + // Youtube switched to playing. + intervalID = setInterval(function(){ + const currentTime = player.getCurrentTime(); + let winner = undefined; + let first = undefined; + for (entry of entries) { + if (!first) { + first = entry.labels; + } + if (entry.time < currentTime) { + winner = entry.labels; + } else { + break; + } + } + if (!winner) { + winner = first; + } + const threshold = + document.getElementById('form').elements['threshold'].value; + let message = ""; + for (var label of winner) { + if (label.score >= threshold) { + message = `${message}${label.label} (score: ${label.score})\n`; + } + } + $("textarea#feedback").val(message); + }); + } else { + if (intervalID) { + clearInterval(intervalID); + } + } +} + +/** + * Turns elements of the player on and off to reflect the state of the "app". + * @param {number} state One of STATE_COVER | STATE_SPINNER | STATE_PLAYER. + */ +function showState(state) { + switch(state) { + case STATE_COVER: + $('#cover').show(); + $('#spinner').hide(); + $('#ytplayer').hide(); + break; + case STATE_SPINNER: + $('#cover').hide(); + $('#spinner').show(); + $('#ytplayer').hide(); + break; + case STATE_PLAYER: + default: + $('#cover').hide(); + $('#spinner').hide(); + $('#ytplayer').show(); + break; + } +} + +/** + * Hide error field and clear its message. + */ +function hideError() { + $('#error_msg').css("visibility", "hidden").text(''); +} + +/** + * Set the error to visible and set its message. + * @param {string} msg Error message as a string. + */ +function showError(msg) { + $('#error_msg').css("visibility", "visible").text(msg); +} + +/** + * Privides numeric feedback for the slider. + */ +function connectSlider() { + $('#threshold_label').text( + `Score Threshold (${$('#threshold')[0].value})`); + $('#threshold').on('input', () => { + $('#threshold_label').text( + `Score Threshold (${$('#threshold')[0].value})`); + }); + $('#segments_label').text( + `Segment Size (${$('#segments')[0].value})`); + $('#segments').on('input', () => { + $('#segments_label').text( + `Segment Size (${$('#segments')[0].value})`); + }); +} + +/** + * Retrieve video information from backend. + * @param {string} filePath name of a tfrecord file. + * @param {number} segments desired number of segments (1-300) + */ +function fetchVideo(filePath, segments) { + const url = "/video?file=" + filePath + "&segments=" + segments; + $.ajax({ + url: url, + success: function(result) { + const videoId = result["video_id"]; + player.loadVideoById(videoId); + entries = result['entries']; + showState(STATE_PLAYER); + }, + error: (err) => { + showState(STATE_COVER); + console.log(err); + showError(err.responseText); + }, + datatype: "json" + }); +} + +/** + * Called when the embedded YouTube player has finished loading. It loads the + * requested video into the player and calls the golden6_viewer API to retrieve + * the frame-level data for that video. + */ +function onPlayerReady() { + const filePath = getUrlParameter('file') || ""; + const segments = parseInt(getUrlParameter('segments')) || 0; + + updateFormFromURL(); + hideError(); + connectSlider(); + + if (!filePath) { + return; + } + + showState(STATE_SPINNER); + fetchVideo(filePath, segments); +} diff --git a/mediapipe/examples/ios/multihandtrackinggpu/AppDelegate.h b/mediapipe/examples/ios/multihandtrackinggpu/AppDelegate.h new file mode 100644 index 000000000..6b0377ef2 --- /dev/null +++ b/mediapipe/examples/ios/multihandtrackinggpu/AppDelegate.h @@ -0,0 +1,21 @@ +// Copyright 2019 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#import + +@interface AppDelegate : UIResponder + +@property(strong, nonatomic) UIWindow *window; + +@end diff --git a/mediapipe/examples/ios/multihandtrackinggpu/AppDelegate.m b/mediapipe/examples/ios/multihandtrackinggpu/AppDelegate.m new file mode 100644 index 000000000..9e1b7ff0e --- /dev/null +++ b/mediapipe/examples/ios/multihandtrackinggpu/AppDelegate.m @@ -0,0 +1,59 @@ +// Copyright 2019 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#import "AppDelegate.h" + +@interface AppDelegate () + +@end + +@implementation AppDelegate + +- (BOOL)application:(UIApplication *)application + didFinishLaunchingWithOptions:(NSDictionary *)launchOptions { + // Override point for customization after application launch. + return YES; +} + +- (void)applicationWillResignActive:(UIApplication *)application { + // Sent when the application is about to move from active to inactive state. This can occur for + // certain types of temporary interruptions (such as an incoming phone call or SMS message) or + // when the user quits the application and it begins the transition to the background state. Use + // this method to pause ongoing tasks, disable timers, and invalidate graphics rendering + // callbacks. Games should use this method to pause the game. +} + +- (void)applicationDidEnterBackground:(UIApplication *)application { + // Use this method to release shared resources, save user data, invalidate timers, and store + // enough application state information to restore your application to its current state in case + // it is terminated later. If your application supports background execution, this method is + // called instead of applicationWillTerminate: when the user quits. +} + +- (void)applicationWillEnterForeground:(UIApplication *)application { + // Called as part of the transition from the background to the active state; here you can undo + // many of the changes made on entering the background. +} + +- (void)applicationDidBecomeActive:(UIApplication *)application { + // Restart any tasks that were paused (or not yet started) while the application was inactive. If + // the application was previously in the background, optionally refresh the user interface. +} + +- (void)applicationWillTerminate:(UIApplication *)application { + // Called when the application is about to terminate. Save data if appropriate. See also + // applicationDidEnterBackground:. +} + +@end diff --git a/mediapipe/examples/ios/multihandtrackinggpu/Assets.xcassets/AppIcon.appiconset/Contents.json b/mediapipe/examples/ios/multihandtrackinggpu/Assets.xcassets/AppIcon.appiconset/Contents.json new file mode 100644 index 000000000..a1895a242 --- /dev/null +++ b/mediapipe/examples/ios/multihandtrackinggpu/Assets.xcassets/AppIcon.appiconset/Contents.json @@ -0,0 +1,99 @@ +{ + "images" : [ + { + "idiom" : "iphone", + "size" : "20x20", + "scale" : "2x" + }, + { + "idiom" : "iphone", + "size" : "20x20", + "scale" : "3x" + }, + { + "idiom" : "iphone", + "size" : "29x29", + "scale" : "2x" + }, + { + "idiom" : "iphone", + "size" : "29x29", + "scale" : "3x" + }, + { + "idiom" : "iphone", + "size" : "40x40", + "scale" : "2x" + }, + { + "idiom" : "iphone", + "size" : "40x40", + "scale" : "3x" + }, + { + "idiom" : "iphone", + "size" : "60x60", + "scale" : "2x" + }, + { + "idiom" : "iphone", + "size" : "60x60", + "scale" : "3x" + }, + { + "idiom" : "ipad", + "size" : "20x20", + "scale" : "1x" + }, + { + "idiom" : "ipad", + "size" : "20x20", + "scale" : "2x" + }, + { + "idiom" : "ipad", + "size" : "29x29", + "scale" : "1x" + }, + { + "idiom" : "ipad", + "size" : "29x29", + "scale" : "2x" + }, + { + "idiom" : "ipad", + "size" : "40x40", + "scale" : "1x" + }, + { + "idiom" : "ipad", + "size" : "40x40", + "scale" : "2x" + }, + { + "idiom" : "ipad", + "size" : "76x76", + "scale" : "1x" + }, + { + "idiom" : "ipad", + "size" : "76x76", + "scale" : "2x" + }, + { + "idiom" : "ipad", + "size" : "83.5x83.5", + "scale" : "2x" + }, + { + "idiom" : "ios-marketing", + "size" : "1024x1024", + "scale" : "1x" + } + ], + "info" : { + "version" : 1, + "author" : "xcode" + } +} + diff --git a/mediapipe/examples/ios/multihandtrackinggpu/Assets.xcassets/Contents.json b/mediapipe/examples/ios/multihandtrackinggpu/Assets.xcassets/Contents.json new file mode 100644 index 000000000..7afcdfaf8 --- /dev/null +++ b/mediapipe/examples/ios/multihandtrackinggpu/Assets.xcassets/Contents.json @@ -0,0 +1,7 @@ +{ + "info" : { + "version" : 1, + "author" : "xcode" + } +} + diff --git a/mediapipe/examples/ios/multihandtrackinggpu/BUILD b/mediapipe/examples/ios/multihandtrackinggpu/BUILD new file mode 100644 index 000000000..edfd5bb54 --- /dev/null +++ b/mediapipe/examples/ios/multihandtrackinggpu/BUILD @@ -0,0 +1,95 @@ +# Copyright 2019 The MediaPipe Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +licenses(["notice"]) # Apache 2.0 + +MIN_IOS_VERSION = "10.0" + +load( + "@build_bazel_rules_apple//apple:ios.bzl", + "ios_application", +) + +# To use the 3D model instead of the default 2D model, add "--define 3D=true" to the +# bazel build command. +config_setting( + name = "use_3d_model", + define_values = { + "3D": "true", + }, +) + +genrule( + name = "model", + srcs = select({ + "//conditions:default": ["//mediapipe/models:hand_landmark.tflite"], + ":use_3d_model": ["//mediapipe/models:hand_landmark_3d.tflite"], + }), + outs = ["hand_landmark.tflite"], + cmd = "cp $< $@", +) + +ios_application( + name = "MultiHandTrackingGpuApp", + bundle_id = "com.google.mediapipe.MultiHandTrackingGpu", + families = [ + "iphone", + "ipad", + ], + infoplists = ["Info.plist"], + minimum_os_version = MIN_IOS_VERSION, + provisioning_profile = "//mediapipe/examples/ios:provisioning_profile", + deps = [ + ":MultiHandTrackingGpuAppLibrary", + "@ios_opencv//:OpencvFramework", + ], +) + +objc_library( + name = "MultiHandTrackingGpuAppLibrary", + srcs = [ + "AppDelegate.m", + "ViewController.mm", + "main.m", + ], + hdrs = [ + "AppDelegate.h", + "ViewController.h", + ], + data = [ + "Base.lproj/LaunchScreen.storyboard", + "Base.lproj/Main.storyboard", + ":model", + "//mediapipe/graphs/hand_tracking:multi_hand_tracking_mobile_gpu_binary_graph", + "//mediapipe/models:palm_detection.tflite", + "//mediapipe/models:palm_detection_labelmap.txt", + ], + sdk_frameworks = [ + "AVFoundation", + "CoreGraphics", + "CoreMedia", + "UIKit", + ], + deps = [ + "//mediapipe/objc:mediapipe_framework_ios", + "//mediapipe/objc:mediapipe_input_sources_ios", + "//mediapipe/objc:mediapipe_layer_renderer", + ] + select({ + "//mediapipe:ios_i386": [], + "//mediapipe:ios_x86_64": [], + "//conditions:default": [ + "//mediapipe/graphs/hand_tracking:multi_hand_mobile_calculators", + ], + }), +) diff --git a/mediapipe/examples/ios/multihandtrackinggpu/Base.lproj/LaunchScreen.storyboard b/mediapipe/examples/ios/multihandtrackinggpu/Base.lproj/LaunchScreen.storyboard new file mode 100644 index 000000000..bfa361294 --- /dev/null +++ b/mediapipe/examples/ios/multihandtrackinggpu/Base.lproj/LaunchScreen.storyboard @@ -0,0 +1,25 @@ + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/mediapipe/examples/ios/multihandtrackinggpu/Base.lproj/Main.storyboard b/mediapipe/examples/ios/multihandtrackinggpu/Base.lproj/Main.storyboard new file mode 100644 index 000000000..e3bd912a4 --- /dev/null +++ b/mediapipe/examples/ios/multihandtrackinggpu/Base.lproj/Main.storyboard @@ -0,0 +1,51 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/mediapipe/examples/ios/multihandtrackinggpu/Info.plist b/mediapipe/examples/ios/multihandtrackinggpu/Info.plist new file mode 100644 index 000000000..30db14c62 --- /dev/null +++ b/mediapipe/examples/ios/multihandtrackinggpu/Info.plist @@ -0,0 +1,42 @@ + + + + + NSCameraUsageDescription + This app uses the camera to demonstrate live video processing. + CFBundleDevelopmentRegion + en + CFBundleExecutable + $(EXECUTABLE_NAME) + CFBundleIdentifier + $(PRODUCT_BUNDLE_IDENTIFIER) + CFBundleInfoDictionaryVersion + 6.0 + CFBundleName + $(PRODUCT_NAME) + CFBundlePackageType + APPL + CFBundleShortVersionString + 1.0 + CFBundleVersion + 1 + LSRequiresIPhoneOS + + UILaunchStoryboardName + LaunchScreen + UIMainStoryboardFile + Main + UIRequiredDeviceCapabilities + + armv7 + + UISupportedInterfaceOrientations + + UIInterfaceOrientationPortrait + + UISupportedInterfaceOrientations~ipad + + UIInterfaceOrientationPortrait + + + diff --git a/mediapipe/examples/ios/multihandtrackinggpu/ViewController.h b/mediapipe/examples/ios/multihandtrackinggpu/ViewController.h new file mode 100644 index 000000000..e0a5a6367 --- /dev/null +++ b/mediapipe/examples/ios/multihandtrackinggpu/ViewController.h @@ -0,0 +1,19 @@ +// Copyright 2019 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#import + +@interface ViewController : UIViewController + +@end diff --git a/mediapipe/examples/ios/multihandtrackinggpu/ViewController.mm b/mediapipe/examples/ios/multihandtrackinggpu/ViewController.mm new file mode 100644 index 000000000..2d7c5d7a5 --- /dev/null +++ b/mediapipe/examples/ios/multihandtrackinggpu/ViewController.mm @@ -0,0 +1,178 @@ +// Copyright 2019 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#import "ViewController.h" + +#import "mediapipe/objc/MPPGraph.h" +#import "mediapipe/objc/MPPCameraInputSource.h" +#import "mediapipe/objc/MPPLayerRenderer.h" + +static NSString* const kGraphName = @"multi_hand_tracking_mobile_gpu"; + +static const char* kInputStream = "input_video"; +static const char* kOutputStream = "output_video"; +static const char* kVideoQueueLabel = "com.google.mediapipe.example.videoQueue"; + +@interface ViewController () + +// The MediaPipe graph currently in use. Initialized in viewDidLoad, started in viewWillAppear: and +// sent video frames on _videoQueue. +@property(nonatomic) MPPGraph* mediapipeGraph; + +@end + +@implementation ViewController { + /// Handles camera access via AVCaptureSession library. + MPPCameraInputSource* _cameraSource; + + /// Inform the user when camera is unavailable. + IBOutlet UILabel* _noCameraLabel; + /// Display the camera preview frames. + IBOutlet UIView* _liveView; + /// Render frames in a layer. + MPPLayerRenderer* _renderer; + + /// Process camera frames on this queue. + dispatch_queue_t _videoQueue; +} + +#pragma mark - Cleanup methods + +- (void)dealloc { + self.mediapipeGraph.delegate = nil; + [self.mediapipeGraph cancel]; + // Ignore errors since we're cleaning up. + [self.mediapipeGraph closeAllInputStreamsWithError:nil]; + [self.mediapipeGraph waitUntilDoneWithError:nil]; +} + +#pragma mark - MediaPipe graph methods + ++ (MPPGraph*)loadGraphFromResource:(NSString*)resource { + // Load the graph config resource. + NSError* configLoadError = nil; + NSBundle* bundle = [NSBundle bundleForClass:[self class]]; + if (!resource || resource.length == 0) { + return nil; + } + NSURL* graphURL = [bundle URLForResource:resource withExtension:@"binarypb"]; + NSData* data = [NSData dataWithContentsOfURL:graphURL options:0 error:&configLoadError]; + if (!data) { + NSLog(@"Failed to load MediaPipe graph config: %@", configLoadError); + return nil; + } + + // Parse the graph config resource into mediapipe::CalculatorGraphConfig proto object. + mediapipe::CalculatorGraphConfig config; + config.ParseFromArray(data.bytes, data.length); + + // Create MediaPipe graph with mediapipe::CalculatorGraphConfig proto object. + MPPGraph* newGraph = [[MPPGraph alloc] initWithGraphConfig:config]; + [newGraph addFrameOutputStream:kOutputStream outputPacketType:MPPPacketTypePixelBuffer]; + return newGraph; +} + +#pragma mark - UIViewController methods + +- (void)viewDidLoad { + [super viewDidLoad]; + + _renderer = [[MPPLayerRenderer alloc] init]; + _renderer.layer.frame = _liveView.layer.bounds; + [_liveView.layer addSublayer:_renderer.layer]; + _renderer.frameScaleMode = MPPFrameScaleModeFillAndCrop; + // When using the front camera, mirror the input for a more natural look. + _renderer.mirrored = YES; + + dispatch_queue_attr_t qosAttribute = dispatch_queue_attr_make_with_qos_class( + DISPATCH_QUEUE_SERIAL, QOS_CLASS_USER_INTERACTIVE, /*relative_priority=*/0); + _videoQueue = dispatch_queue_create(kVideoQueueLabel, qosAttribute); + + _cameraSource = [[MPPCameraInputSource alloc] init]; + [_cameraSource setDelegate:self queue:_videoQueue]; + _cameraSource.sessionPreset = AVCaptureSessionPresetHigh; + _cameraSource.cameraPosition = AVCaptureDevicePositionFront; + // The frame's native format is rotated with respect to the portrait orientation. + _cameraSource.orientation = AVCaptureVideoOrientationPortrait; + + self.mediapipeGraph = [[self class] loadGraphFromResource:kGraphName]; + self.mediapipeGraph.delegate = self; + // Set maxFramesInFlight to a small value to avoid memory contention for real-time processing. + self.mediapipeGraph.maxFramesInFlight = 2; +} + +// In this application, there is only one ViewController which has no navigation to other view +// controllers, and there is only one View with live display showing the result of running the +// MediaPipe graph on the live video feed. If more view controllers are needed later, the graph +// setup/teardown and camera start/stop logic should be updated appropriately in response to the +// appearance/disappearance of this ViewController, as viewWillAppear: can be invoked multiple times +// depending on the application navigation flow in that case. +- (void)viewWillAppear:(BOOL)animated { + [super viewWillAppear:animated]; + + [_cameraSource requestCameraAccessWithCompletionHandler:^void(BOOL granted) { + if (granted) { + [self startGraphAndCamera]; + dispatch_async(dispatch_get_main_queue(), ^{ + _noCameraLabel.hidden = YES; + }); + } + }]; +} + +- (void)startGraphAndCamera { + // Start running self.mediapipeGraph. + NSError* error; + if (![self.mediapipeGraph startWithError:&error]) { + NSLog(@"Failed to start graph: %@", error); + } + + // Start fetching frames from the camera. + dispatch_async(_videoQueue, ^{ + [_cameraSource start]; + }); +} + +#pragma mark - MPPGraphDelegate methods + +// Receives CVPixelBufferRef from the MediaPipe graph. Invoked on a MediaPipe worker thread. +- (void)mediapipeGraph:(MPPGraph*)graph + didOutputPixelBuffer:(CVPixelBufferRef)pixelBuffer + fromStream:(const std::string&)streamName { + if (streamName == kOutputStream) { + // Display the captured image on the screen. + CVPixelBufferRetain(pixelBuffer); + dispatch_async(dispatch_get_main_queue(), ^{ + [_renderer renderPixelBuffer:pixelBuffer]; + CVPixelBufferRelease(pixelBuffer); + }); + } +} + +#pragma mark - MPPInputSourceDelegate methods + +// Must be invoked on _videoQueue. +- (void)processVideoFrame:(CVPixelBufferRef)imageBuffer + timestamp:(CMTime)timestamp + fromSource:(MPPInputSource*)source { + if (source != _cameraSource) { + NSLog(@"Unknown source: %@", source); + return; + } + [self.mediapipeGraph sendPixelBuffer:imageBuffer + intoStream:kInputStream + packetType:MPPPacketTypePixelBuffer]; +} + +@end diff --git a/mediapipe/examples/ios/multihandtrackinggpu/main.m b/mediapipe/examples/ios/multihandtrackinggpu/main.m new file mode 100644 index 000000000..7ffe5ea5d --- /dev/null +++ b/mediapipe/examples/ios/multihandtrackinggpu/main.m @@ -0,0 +1,22 @@ +// Copyright 2019 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#import +#import "AppDelegate.h" + +int main(int argc, char * argv[]) { + @autoreleasepool { + return UIApplicationMain(argc, argv, nil, NSStringFromClass([AppDelegate class])); + } +} diff --git a/mediapipe/framework/BUILD b/mediapipe/framework/BUILD index 90a4f672c..d5586ae9a 100644 --- a/mediapipe/framework/BUILD +++ b/mediapipe/framework/BUILD @@ -14,13 +14,12 @@ # limitations under the License. # +load("//mediapipe/framework/port:build_config.bzl", "mediapipe_cc_proto_library", "mediapipe_py_proto_library") + licenses(["notice"]) # Apache 2.0 package(default_visibility = ["//visibility:private"]) -load("//mediapipe/framework/port:build_config.bzl", "mediapipe_cc_proto_library") -load("//mediapipe/framework/port:build_config.bzl", "mediapipe_py_proto_library") - package_group( name = "mediapipe_internal", packages = [ @@ -464,6 +463,8 @@ cc_library( "//mediapipe/framework:packet_generator_cc_proto", "//mediapipe/framework:status_handler_cc_proto", "//mediapipe/framework:thread_pool_executor_cc_proto", + "@com_google_absl//absl/container:flat_hash_map", + "@com_google_absl//absl/container:flat_hash_set", "//mediapipe/gpu:graph_support", "@com_google_absl//absl/base:core_headers", "@com_google_absl//absl/container:fixed_array", @@ -688,6 +689,12 @@ cc_library( cc_library( name = "demangle", hdrs = ["demangle.h"], + defines = select({ + "//mediapipe/framework/profiler:android_release": [ + "MEDIAPIPE_HAS_CXA_DEMANGLE=0", + ], + "//conditions:default": [], + }), visibility = ["//visibility:public"], ) @@ -1266,6 +1273,7 @@ cc_library( "//mediapipe/framework/tool:validate", "//mediapipe/framework/tool:validate_name", "@com_google_absl//absl/base:core_headers", + "@com_google_absl//absl/container:flat_hash_set", "@com_google_absl//absl/memory", "@com_google_absl//absl/strings", ], @@ -1713,3 +1721,10 @@ cc_test( "//mediapipe/framework/tool/testdata:dub_quad_test_subgraph", ], ) + +# Expose the proto source files for building mediapipe AAR. +filegroup( + name = "protos_src", + srcs = glob(["*.proto"]), + visibility = ["//mediapipe:__subpackages__"], +) diff --git a/mediapipe/framework/calculator_graph.cc b/mediapipe/framework/calculator_graph.cc index ecc50cd1d..32a790add 100644 --- a/mediapipe/framework/calculator_graph.cc +++ b/mediapipe/framework/calculator_graph.cc @@ -24,6 +24,7 @@ #include #include "absl/container/fixed_array.h" +#include "absl/container/flat_hash_set.h" #include "absl/memory/memory.h" #include "absl/strings/str_cat.h" #include "absl/strings/str_format.h" @@ -1017,8 +1018,8 @@ void CalculatorGraph::UpdateThrottledNodes(InputStreamManager* stream, // TODO Change the throttling code to use the index directly // rather than looking up a stream name. int node_index = validated_graph_->OutputStreamToNode(stream->Name()); - std::unordered_set owned_set; - const std::unordered_set* upstream_nodes; + absl::flat_hash_set owned_set; + const absl::flat_hash_set* upstream_nodes; if (node_index >= validated_graph_->CalculatorInfos().size()) { // TODO just create a NodeTypeInfo object for each virtual node. owned_set.insert(node_index); @@ -1100,10 +1101,10 @@ bool CalculatorGraph::UnthrottleSources() { // This is a sufficient because succesfully growing at least one full input // stream during each call to UnthrottleSources will eventually resolve // each deadlock. - std::unordered_set full_streams; + absl::flat_hash_set full_streams; { absl::MutexLock lock(&full_input_streams_mutex_); - for (std::unordered_set& s : full_input_streams_) { + for (absl::flat_hash_set& s : full_input_streams_) { if (!s.empty()) { full_streams.insert(s.begin(), s.end()); } diff --git a/mediapipe/framework/calculator_graph.h b/mediapipe/framework/calculator_graph.h index 0e8f29204..9662a81e1 100644 --- a/mediapipe/framework/calculator_graph.h +++ b/mediapipe/framework/calculator_graph.h @@ -23,13 +23,13 @@ #include #include #include -#include -#include #include #include #include "absl/base/macros.h" #include "absl/container/fixed_array.h" +#include "absl/container/flat_hash_map.h" +#include "absl/container/flat_hash_set.h" #include "absl/synchronization/mutex.h" #include "mediapipe/framework/calculator.pb.h" #include "mediapipe/framework/calculator_base.h" @@ -579,18 +579,18 @@ class CalculatorGraph { // A node is scheduled only if this set is empty. Similarly, a packet // is added to a graph input stream only if this set is empty. // Note that this vector contains an unused entry for each non-source node. - std::vector> full_input_streams_ + std::vector> full_input_streams_ GUARDED_BY(full_input_streams_mutex_); // Maps stream names to graph input stream objects. - std::unordered_map> + absl::flat_hash_map> graph_input_streams_; // Maps graph input streams to their virtual node ids. - std::unordered_map graph_input_stream_node_ids_; + absl::flat_hash_map graph_input_stream_node_ids_; // Maps graph input streams to their max queue size. - std::unordered_map graph_input_stream_max_queue_size_; + absl::flat_hash_map graph_input_stream_max_queue_size_; // The factory for making counters associated with this graph. std::unique_ptr counter_factory_; diff --git a/mediapipe/framework/calculator_graph_bounds_test.cc b/mediapipe/framework/calculator_graph_bounds_test.cc index 1b8c3e9f2..b6144c0ae 100644 --- a/mediapipe/framework/calculator_graph_bounds_test.cc +++ b/mediapipe/framework/calculator_graph_bounds_test.cc @@ -756,7 +756,7 @@ TEST(CalculatorGraphBoundsTest, BoundWithoutInputPackets) { MP_ASSERT_OK(graph.WaitUntilDone()); } -// Shows that when fixed-size-input-stream-hanlder drops packets, +// Shows that when fixed-size-input-stream-handler drops packets, // no timetamp bounds are announced. TEST(CalculatorGraphBoundsTest, FixedSizeHandlerBounds) { // LambdaCalculator with FixedSizeInputStreamHandler will drop packets @@ -876,5 +876,93 @@ TEST(CalculatorGraphBoundsTest, FixedSizeHandlerBounds) { MP_ASSERT_OK(graph.WaitUntilDone()); } +// A Calculator that outputs only the last packet from its input stream. +class LastPacketCalculator : public CalculatorBase { + public: + static ::mediapipe::Status GetContract(CalculatorContract* cc) { + cc->Inputs().Index(0).SetAny(); + cc->Outputs().Index(0).SetAny(); + return ::mediapipe::OkStatus(); + } + ::mediapipe::Status Open(CalculatorContext* cc) final { + return ::mediapipe::OkStatus(); + } + ::mediapipe::Status Process(CalculatorContext* cc) final { + cc->Outputs().Index(0).SetNextTimestampBound(cc->InputTimestamp()); + last_packet_ = cc->Inputs().Index(0).Value(); + return ::mediapipe::OkStatus(); + } + ::mediapipe::Status Close(CalculatorContext* cc) final { + cc->Outputs().Index(0).AddPacket(last_packet_); + return ::mediapipe::OkStatus(); + } + + private: + Packet last_packet_; +}; +REGISTER_CALCULATOR(LastPacketCalculator); + +// Shows that the last packet in an input stream can be detected. +TEST(CalculatorGraphBoundsTest, LastPacketCheck) { + // LastPacketCalculator emits only the last input stream packet. + // It emits a timestamp bound after the arrival of a successor input stream + // packet or input stream close. The output "last_output" shows the + // last packet, and "output" shows the timestamp bounds. + CalculatorGraphConfig config = + ::mediapipe::ParseTextProtoOrDie(R"( + input_stream: 'input' + output_stream: 'output' + output_stream: 'last_output' + node { + calculator: 'PassThroughCalculator' + input_stream: 'input' + output_stream: 'input_2' + } + node { + calculator: 'LastPacketCalculator' + input_stream: 'input_2' + output_stream: 'last_packet' + } + node { + calculator: 'PassThroughCalculator' + input_stream: 'input' + input_stream: 'last_packet' + output_stream: 'output' + output_stream: 'last_output' + } + )"); + CalculatorGraph graph; + std::vector output_packets; + MP_ASSERT_OK(graph.Initialize(config)); + MP_ASSERT_OK(graph.ObserveOutputStream("output", [&](const Packet& p) { + output_packets.push_back(p); + return ::mediapipe::OkStatus(); + })); + std::vector last_output_packets; + MP_ASSERT_OK(graph.ObserveOutputStream("last_output", [&](const Packet& p) { + last_output_packets.push_back(p); + return ::mediapipe::OkStatus(); + })); + MP_ASSERT_OK(graph.StartRun({})); + MP_ASSERT_OK(graph.WaitUntilIdle()); + + // Add four packets into the graph. + constexpr int kNumInputs = 4; + for (int i = 0; i < kNumInputs; ++i) { + Packet p = MakePacket(33).At(Timestamp(i)); + MP_ASSERT_OK(graph.AddPacketToInputStream("input", p)); + MP_ASSERT_OK(graph.WaitUntilIdle()); + EXPECT_EQ(i, output_packets.size()); + EXPECT_EQ(0, last_output_packets.size()); + } + + // Shutdown the graph. + MP_ASSERT_OK(graph.CloseAllPacketSources()); + MP_ASSERT_OK(graph.WaitUntilIdle()); + EXPECT_EQ(kNumInputs, output_packets.size()); + EXPECT_EQ(1, last_output_packets.size()); + MP_ASSERT_OK(graph.WaitUntilDone()); +} + } // namespace } // namespace mediapipe diff --git a/mediapipe/framework/calculator_graph_side_packet_test.cc b/mediapipe/framework/calculator_graph_side_packet_test.cc index 166826ff1..fd78dc7d7 100644 --- a/mediapipe/framework/calculator_graph_side_packet_test.cc +++ b/mediapipe/framework/calculator_graph_side_packet_test.cc @@ -68,6 +68,7 @@ class CountAndOutputSummarySidePacketInCloseCalculator : public CalculatorBase { } ::mediapipe::Status Close(CalculatorContext* cc) final { + absl::SleepFor(absl::Milliseconds(300)); // For GetOutputSidePacket test. cc->OutputSidePackets().Index(0).Set( MakePacket(count_).At(Timestamp::Unset())); return ::mediapipe::OkStatus(); @@ -743,5 +744,66 @@ TEST(CalculatorGraph, GetOutputSidePacket) { } } +typedef std::string HugeModel; + +// Generates an output-side-packet once for each calculator-graph. +class OutputSidePacketCachedCalculator : public CalculatorBase { + public: + static ::mediapipe::Status GetContract(CalculatorContract* cc) { + cc->OutputSidePackets().Index(0).Set(); + return ::mediapipe::OkStatus(); + } + + ::mediapipe::Status Open(CalculatorContext* cc) final { + cc->OutputSidePackets().Index(0).Set(MakePacket( + R"(An expensive side-packet created only once per graph)")); + return ::mediapipe::OkStatus(); + } + + ::mediapipe::Status Process(CalculatorContext* cc) final { + LOG(FATAL) << "Not reached."; + return ::mediapipe::OkStatus(); + } +}; +REGISTER_CALCULATOR(OutputSidePacketCachedCalculator); + +// Returns true if two packets hold the same data. +bool Equals(Packet p1, Packet p2) { + return packet_internal::GetHolder(p1) == packet_internal::GetHolder(p2); +} + +TEST(CalculatorGraph, OutputSidePacketCached) { + CalculatorGraphConfig config = + ::mediapipe::ParseTextProtoOrDie(R"( + node { + calculator: "OutputSidePacketCachedCalculator" + output_side_packet: "model" + } + node { + calculator: "SidePacketToStreamPacketCalculator" + input_side_packet: "model" + output_stream: "output" + } + )"); + CalculatorGraph graph; + MP_ASSERT_OK(graph.Initialize(config)); + std::vector output_packets; + MP_ASSERT_OK(graph.ObserveOutputStream( + "output", [&output_packets](const Packet& packet) { + output_packets.push_back(packet); + return ::mediapipe::OkStatus(); + })); + + // Run the graph three times. + for (int run = 0; run < 3; ++run) { + MP_ASSERT_OK(graph.StartRun({})); + MP_ASSERT_OK(graph.WaitUntilDone()); + } + ASSERT_EQ(3, output_packets.size()); + for (int run = 0; run < output_packets.size(); ++run) { + EXPECT_TRUE(Equals(output_packets[0], output_packets[run])); + } +} + } // namespace } // namespace mediapipe diff --git a/mediapipe/framework/calculator_node.cc b/mediapipe/framework/calculator_node.cc index f3cd90eea..d4a81ff9d 100644 --- a/mediapipe/framework/calculator_node.cc +++ b/mediapipe/framework/calculator_node.cc @@ -391,6 +391,38 @@ void CalculatorNode::SetMaxInputStreamQueueSize(int max_queue_size) { return ::mediapipe::OkStatus(); } +namespace { +// Returns the Packet sent to an OutputSidePacket, or an empty packet +// if none available. +const Packet GetPacket(const OutputSidePacket& out) { + auto impl = dynamic_cast(&out); + return (impl == nullptr) ? Packet() : impl->GetPacket(); +} + +// Resends the output-side-packets from the previous graph run. +::mediapipe::Status ResendSidePackets(CalculatorContext* cc) { + auto& outs = cc->OutputSidePackets(); + for (CollectionItemId id = outs.BeginId(); id < outs.EndId(); ++id) { + Packet packet = GetPacket(outs.Get(id)); + if (!packet.IsEmpty()) { + // OutputSidePacket::Set re-announces the side-packet to its mirrors. + outs.Get(id).Set(packet); + } + } + return ::mediapipe::OkStatus(); +} +} // namespace + +bool CalculatorNode::OutputsAreConstant(CalculatorContext* cc) { + if (cc->Inputs().NumEntries() > 0 || cc->Outputs().NumEntries() > 0) { + return false; + } + if (input_side_packet_handler_.InputSidePacketsChanged()) { + return false; + } + return true; +} + ::mediapipe::Status CalculatorNode::OpenNode() { VLOG(2) << "CalculatorNode::OpenNode() for " << DebugName(); @@ -407,8 +439,9 @@ void CalculatorNode::SetMaxInputStreamQueueSize(int max_queue_size) { default_context, Timestamp::Unstarted()); ::mediapipe::Status result; - - { + if (OutputsAreConstant(default_context)) { + result = ResendSidePackets(default_context); + } else { MEDIAPIPE_PROFILING(OPEN, default_context); LegacyCalculatorSupport::Scoped s(default_context); result = calculator_->Open(default_context); @@ -494,7 +527,10 @@ void CalculatorNode::CloseOutputStreams(OutputStreamShardSet* outputs) { ::mediapipe::Status result; - { + if (OutputsAreConstant(default_context)) { + // Do nothing. + result = ::mediapipe::OkStatus(); + } else { MEDIAPIPE_PROFILING(CLOSE, default_context); LegacyCalculatorSupport::Scoped s(default_context); result = calculator_->Close(default_context); @@ -770,7 +806,10 @@ std::string CalculatorNode::DebugName() const { VLOG(2) << "Calling Calculator::Process() for node: " << DebugName(); - { + if (OutputsAreConstant(calculator_context)) { + // Do nothing. + result = ::mediapipe::OkStatus(); + } else { MEDIAPIPE_PROFILING(PROCESS, calculator_context); LegacyCalculatorSupport::Scoped s( calculator_context); diff --git a/mediapipe/framework/calculator_node.h b/mediapipe/framework/calculator_node.h index fd17d4ada..f39636e5d 100644 --- a/mediapipe/framework/calculator_node.h +++ b/mediapipe/framework/calculator_node.h @@ -280,6 +280,9 @@ class CalculatorNode { // Get a std::string describing the input streams. std::string DebugInputStreamNames() const; + // Returns true if all outputs will be identical to the previous graph run. + bool OutputsAreConstant(CalculatorContext* cc); + // The calculator. std::unique_ptr calculator_; // Keeps data which a Calculator subclass needs access to. diff --git a/mediapipe/framework/collection.h b/mediapipe/framework/collection.h index b3f972b0a..448968be2 100644 --- a/mediapipe/framework/collection.h +++ b/mediapipe/framework/collection.h @@ -240,6 +240,22 @@ class Collection { return tag_map_->EndId(tag); } + // Equal Collections contain equal mappings and equal elements. + bool operator==(const Collection& other) const { + if (tag_map_->Mapping() != other.TagMap()->Mapping()) { + return false; + } + for (CollectionItemId id = BeginId(); id < EndId(); ++id) { + if (Get(id) != other.Get(id)) { + return false; + } + } + return true; + } + bool operator!=(const Collection& other) const { + return !(*this == other); + } + private: // An iterator which is identical to ItType** except that the // dereference operator (operator*) does a double dereference and diff --git a/mediapipe/framework/demangle.h b/mediapipe/framework/demangle.h index e9624c5ac..45ebd1691 100644 --- a/mediapipe/framework/demangle.h +++ b/mediapipe/framework/demangle.h @@ -15,23 +15,25 @@ #ifndef MEDIAPIPE_FRAMEWORK_DEMANGLE_H_ #define MEDIAPIPE_FRAMEWORK_DEMANGLE_H_ +#ifndef MEDIAPIPE_HAS_CXA_DEMANGLE // We only support some compilers that support __cxa_demangle. // TODO: Checks if Android NDK has fixed this issue or not. #if defined(__ANDROID__) && (defined(__i386__) || defined(__x86_64__)) -#define HAS_CXA_DEMANGLE 0 +#define MEDIAPIPE_HAS_CXA_DEMANGLE 0 #elif (__GNUC__ >= 4 || (__GNUC__ >= 3 && __GNUC_MINOR__ >= 4)) && \ !defined(__mips__) -#define HAS_CXA_DEMANGLE 1 +#define MEDIAPIPE_HAS_CXA_DEMANGLE 1 #elif defined(__clang__) && !defined(_MSC_VER) -#define HAS_CXA_DEMANGLE 1 +#define MEDIAPIPE_HAS_CXA_DEMANGLE 1 #else -#define HAS_CXA_DEMANGLE 0 +#define MEDIAPIPE_HAS_CXA_DEMANGLE 0 +#endif #endif #include #include -#if HAS_CXA_DEMANGLE +#if MEDIAPIPE_HAS_CXA_DEMANGLE #include #endif @@ -65,7 +67,7 @@ namespace mediapipe { inline std::string Demangle(const char* mangled) { int status = 0; char* demangled = nullptr; -#if HAS_CXA_DEMANGLE +#if MEDIAPIPE_HAS_CXA_DEMANGLE demangled = abi::__cxa_demangle(mangled, nullptr, nullptr, &status); #endif std::string out; diff --git a/mediapipe/framework/deps/BUILD b/mediapipe/framework/deps/BUILD index f3ca5dc1d..cc84a99e7 100644 --- a/mediapipe/framework/deps/BUILD +++ b/mediapipe/framework/deps/BUILD @@ -15,10 +15,9 @@ # Description: # The dependencies of mediapipe. -licenses(["notice"]) # Apache 2.0 - load("//mediapipe/framework/port:build_config.bzl", "mediapipe_cc_proto_library") -load("//mediapipe/framework/port:build_config.bzl", "mediapipe_py_proto_library") + +licenses(["notice"]) # Apache 2.0 package(default_visibility = ["//visibility:private"]) diff --git a/mediapipe/framework/formats/image_format.proto b/mediapipe/framework/formats/image_format.proto index a367f4b62..ea99dfee4 100644 --- a/mediapipe/framework/formats/image_format.proto +++ b/mediapipe/framework/formats/image_format.proto @@ -66,5 +66,9 @@ message ImageFormat { // LAB, interleaved: one byte for L, then one byte for a, then one // byte for b for each pixel. LAB8 = 10; + + // sBGRA, interleaved: one byte for B, one byte for G, one byte for R, + // one byte for alpha or unused. This is the N32 format for Skia. + SBGRA = 11; } } diff --git a/mediapipe/framework/formats/image_frame.cc b/mediapipe/framework/formats/image_frame.cc index 996702ae2..338dfe165 100644 --- a/mediapipe/framework/formats/image_frame.cc +++ b/mediapipe/framework/formats/image_frame.cc @@ -279,6 +279,8 @@ int ImageFrame::NumberOfChannelsForFormat(ImageFormat::Format format) { return 1; case ImageFormat::LAB8: return 3; + case ImageFormat::SBGRA: + return 4; default: LOG(FATAL) << InvalidFormatString(format); } @@ -304,6 +306,8 @@ int ImageFrame::ChannelSizeForFormat(ImageFormat::Format format) { return sizeof(float); case ImageFormat::LAB8: return sizeof(uint8); + case ImageFormat::SBGRA: + return sizeof(uint8); default: LOG(FATAL) << InvalidFormatString(format); } @@ -329,6 +333,8 @@ int ImageFrame::ByteDepthForFormat(ImageFormat::Format format) { return 4; case ImageFormat::LAB8: return 1; + case ImageFormat::SBGRA: + return 1; default: LOG(FATAL) << InvalidFormatString(format); } diff --git a/mediapipe/framework/formats/image_frame_opencv.cc b/mediapipe/framework/formats/image_frame_opencv.cc index bf723cda3..bf8b908b3 100644 --- a/mediapipe/framework/formats/image_frame_opencv.cc +++ b/mediapipe/framework/formats/image_frame_opencv.cc @@ -59,6 +59,9 @@ int GetMatType(const mediapipe::ImageFormat::Format format) { case mediapipe::ImageFormat::LAB8: type = CV_8U; break; + case mediapipe::ImageFormat::SBGRA: + type = CV_8U; + break; default: // Invalid or unknown; Default to uchar. type = CV_8U; diff --git a/mediapipe/framework/formats/landmark.proto b/mediapipe/framework/formats/landmark.proto index cdc2ee151..220b3725d 100644 --- a/mediapipe/framework/formats/landmark.proto +++ b/mediapipe/framework/formats/landmark.proto @@ -32,3 +32,8 @@ message NormalizedLandmark { optional float y = 2; optional float z = 3; } + +// Group of NormalizedLandmark protos. +message NormalizedLandmarkList { + repeated NormalizedLandmark landmark = 1; +} diff --git a/mediapipe/framework/graph_validation_test.cc b/mediapipe/framework/graph_validation_test.cc index 98492b8d0..73e856a00 100644 --- a/mediapipe/framework/graph_validation_test.cc +++ b/mediapipe/framework/graph_validation_test.cc @@ -32,7 +32,7 @@ namespace mediapipe { namespace { // Shows validation success for a graph and a subgraph. -TEST(ValidatedGraphConfigTest, InitializeGraphFromProtos) { +TEST(GraphValidationTest, InitializeGraphFromProtos) { auto config_1 = ParseTextProtoOrDie(R"( type: "PassThroughGraph" input_stream: "INPUT:stream_1" @@ -102,7 +102,7 @@ TEST(ValidatedGraphConfigTest, InitializeGraphFromProtos) { } // Shows validation failure due to an unregistered subgraph. -TEST(ValidatedGraphConfigTest, InitializeGraphFromLinker) { +TEST(GraphValidationTest, InitializeGraphFromLinker) { EXPECT_FALSE(SubgraphRegistry::IsRegistered("DubQuadTestSubgraph")); ValidatedGraphConfig builder_1; ::mediapipe::Status status_1 = @@ -114,7 +114,7 @@ TEST(ValidatedGraphConfigTest, InitializeGraphFromLinker) { } // Shows validation success for a graph and a template subgraph. -TEST(ValidatedGraphConfigTest, InitializeTemplateFromProtos) { +TEST(GraphValidationTest, InitializeTemplateFromProtos) { mediapipe::tool::TemplateParser::Parser parser; CalculatorGraphTemplate config_1; CHECK(parser.ParseFromString(R"( @@ -210,5 +210,109 @@ TEST(ValidatedGraphConfigTest, InitializeTemplateFromProtos) { )"))); } +// Shows passing validation of optional subgraph inputs and output streams. +TEST(GraphValidationTest, OptionalSubgraphStreams) { + // A subgraph defining two optional input streams + // and two optional output streams. + auto config_1 = ParseTextProtoOrDie(R"( + type: "PassThroughGraph" + input_stream: "INPUT:input_0" + input_stream: "INPUT:1:input_1" + output_stream: "OUTPUT:output_0" + output_stream: "OUTPUT:1:output_1" + node { + calculator: "PassThroughCalculator" + input_stream: "input_0" # Any Type. + input_stream: "input_1" # Any Type. + output_stream: "output_0" # Same as input. + } + )"); + + // An enclosing graph that specifies one of the two optional input streams + // and one of the two optional output streams. + auto config_2 = ParseTextProtoOrDie(R"( + input_stream: "INPUT:foo_in" + output_stream: "OUTPUT:foo_out" + node { + calculator: "PassThroughCalculator" + input_stream: "foo_in" # Any Type. + output_stream: "foo_bar" # Same as input. + } + node { + calculator: "PassThroughGraph" + input_stream: "INPUT:foo_bar" # Any Type. + output_stream: "OUTPUT:foo_out" # Same as input. + } + )"); + + GraphValidation validation_1; + MP_EXPECT_OK(validation_1.Validate({config_1, config_2}, {})); + CalculatorGraph graph_1; + MP_EXPECT_OK(graph_1.Initialize({config_1, config_2}, {})); + EXPECT_THAT( + graph_1.Config(), + + // The result includes only the requested input and output streams. + EqualsProto(::mediapipe::ParseTextProtoOrDie(R"( + input_stream: "INPUT:foo_in" + output_stream: "OUTPUT:foo_out" + node { + calculator: "PassThroughCalculator" + input_stream: "foo_in" + output_stream: "foo_bar" + } + node { + calculator: "PassThroughCalculator" + input_stream: "foo_bar" + output_stream: "foo_out" + } + executor {} + )"))); +} + +// Shows failing validation of optional subgraph inputs and output streams. +TEST(GraphValidationTest, OptionalSubgraphStreamsMismatched) { + // A subgraph defining two optional input streams + // and two optional output streams. + auto config_1 = ParseTextProtoOrDie(R"( + type: "PassThroughGraph" + input_stream: "INPUT:input_0" + input_stream: "INPUT:1:input_1" + output_stream: "OUTPUT:output_0" + output_stream: "OUTPUT:1:output_1" + node { + calculator: "PassThroughCalculator" + input_stream: "input_0" # Any Type. + input_stream: "input_1" # Any Type. + output_stream: "output_0" # Same as input. + } + )"); + + // An enclosing graph that specifies one of the two optional input streams + // and both of the two optional output streams. + auto config_2 = ParseTextProtoOrDie(R"( + input_stream: "INPUT:foo_in" + output_stream: "OUTPUT:foo_out" + node { + calculator: "PassThroughCalculator" + input_stream: "foo_in" # Any Type. + output_stream: "foo_bar" # Same as input. + } + node { + calculator: "PassThroughGraph" + input_stream: "INPUT:foo_bar" # Any Type. + input_stream: "INPUT:1:foo_bar" # Any Type. + output_stream: "OUTPUT:foo_out" # Same as input. + } + )"); + + GraphValidation validation_1; + mediapipe::Status status = validation_1.Validate({config_1, config_2}, {}); + ASSERT_EQ(status.code(), ::mediapipe::StatusCode::kInvalidArgument); + ASSERT_THAT(status.ToString(), + testing::HasSubstr( + "PassThroughCalculator must use matching tags and indexes")); +} + } // namespace } // namespace mediapipe diff --git a/mediapipe/framework/input_side_packet_handler.cc b/mediapipe/framework/input_side_packet_handler.cc index fb66f0694..ce43508d2 100644 --- a/mediapipe/framework/input_side_packet_handler.cc +++ b/mediapipe/framework/input_side_packet_handler.cc @@ -27,6 +27,7 @@ namespace mediapipe { std::function input_side_packets_ready_callback, std::function error_callback) { int missing_input_side_packet_count; + prev_input_side_packets_ = std::move(input_side_packets_); ASSIGN_OR_RETURN( input_side_packets_, tool::FillPacketSet(*input_side_packet_types, all_side_packets, @@ -41,6 +42,12 @@ namespace mediapipe { return ::mediapipe::OkStatus(); } +bool InputSidePacketHandler::InputSidePacketsChanged() { + return prev_input_side_packets_ == nullptr || + input_side_packets_ == nullptr || + *input_side_packets_ != *prev_input_side_packets_; +} + void InputSidePacketHandler::Set(CollectionItemId id, const Packet& packet) { ::mediapipe::Status status = SetInternal(id, packet); if (!status.ok()) { diff --git a/mediapipe/framework/input_side_packet_handler.h b/mediapipe/framework/input_side_packet_handler.h index 5112731da..ecfa2239e 100644 --- a/mediapipe/framework/input_side_packet_handler.h +++ b/mediapipe/framework/input_side_packet_handler.h @@ -52,6 +52,10 @@ class InputSidePacketHandler { const PacketSet& InputSidePackets() const { return *input_side_packets_; } + // Returns true if the set of input-side-packets has changed since the + // previous run. + bool InputSidePacketsChanged(); + // Returns the number of missing input side packets. int MissingInputSidePacketCount() const { return missing_input_side_packet_count_.load(std::memory_order_relaxed); @@ -68,6 +72,7 @@ class InputSidePacketHandler { const PacketTypeSet* input_side_packet_types_; std::unique_ptr input_side_packets_; + std::unique_ptr prev_input_side_packets_; std::atomic missing_input_side_packet_count_{0}; diff --git a/mediapipe/framework/output_side_packet_impl.cc b/mediapipe/framework/output_side_packet_impl.cc index 09cc294ff..f2771da5d 100644 --- a/mediapipe/framework/output_side_packet_impl.cc +++ b/mediapipe/framework/output_side_packet_impl.cc @@ -30,7 +30,7 @@ namespace mediapipe { void OutputSidePacketImpl::PrepareForRun( std::function error_callback) { error_callback_ = std::move(error_callback); - packet_ = Packet(); + initialized_ = false; } void OutputSidePacketImpl::Set(const Packet& packet) { @@ -47,7 +47,7 @@ void OutputSidePacketImpl::AddMirror( } ::mediapipe::Status OutputSidePacketImpl::SetInternal(const Packet& packet) { - if (!packet_.IsEmpty()) { + if (initialized_) { return ::mediapipe::AlreadyExistsErrorBuilder(MEDIAPIPE_LOC) << "Output side packet \"" << name_ << "\" was already set."; } @@ -72,6 +72,7 @@ void OutputSidePacketImpl::AddMirror( } packet_ = packet; + initialized_ = true; for (const auto& mirror : mirrors_) { mirror.input_side_packet_handler->Set(mirror.id, packet_); } diff --git a/mediapipe/framework/output_side_packet_impl.h b/mediapipe/framework/output_side_packet_impl.h index c654769c5..df9ac4082 100644 --- a/mediapipe/framework/output_side_packet_impl.h +++ b/mediapipe/framework/output_side_packet_impl.h @@ -80,6 +80,7 @@ class OutputSidePacketImpl : public OutputSidePacket { const PacketType* packet_type_; std::function error_callback_; Packet packet_; + bool initialized_ = false; std::vector mirrors_; }; diff --git a/mediapipe/framework/packet.h b/mediapipe/framework/packet.h index 8782d924c..11cfb5cc0 100644 --- a/mediapipe/framework/packet.h +++ b/mediapipe/framework/packet.h @@ -85,7 +85,7 @@ class Packet { // given timestamp. Does not modify *this. Packet At(class Timestamp timestamp) const&; - // The rvalue reference overload of Packet's memeber function + // The rvalue reference overload of Packet's member function // Packet::At(class Timestamp). Moves *this to a new Packet and returns // the new Packet with the given timestamp. Packet At(class Timestamp timestamp) &&; @@ -653,6 +653,14 @@ Packet PointToForeign(const T* ptr) { return packet_internal::Create(new packet_internal::ForeignHolder(ptr)); } +// Equal Packets refer to the same memory contents, like equal pointers. +inline bool operator==(const Packet& p1, const Packet& p2) { + return packet_internal::GetHolder(p1) == packet_internal::GetHolder(p2); +} +inline bool operator!=(const Packet& p1, const Packet& p2) { + return !(p1 == p2); +} + } // namespace mediapipe #endif // MEDIAPIPE_FRAMEWORK_PACKET_H_ diff --git a/mediapipe/framework/port.h b/mediapipe/framework/port.h index c45a4546d..275f8ca98 100644 --- a/mediapipe/framework/port.h +++ b/mediapipe/framework/port.h @@ -28,4 +28,22 @@ #define MEDIAPIPE_MOBILE #endif +#if !defined(MEDIAPIPE_ANDROID) && defined(__ANDROID__) +#define MEDIAPIPE_ANDROID +#endif + +#if defined(__APPLE__) +#include "TargetConditionals.h" // for TARGET_OS_* +#if !defined(MEDIAPIPE_IOS) && !TARGET_OS_OSX +#define MEDIAPIPE_IOS +#endif +#endif + +// These platforms do not support OpenGL ES Compute Shaders (v3.1 and up), +// but can still run OpenGL ES 3.0 and below. +#if !defined(MEDIAPIPE_DISABLE_GL_COMPUTE) && \ + (defined(__APPLE__) || defined(__EMSCRIPTEN__)) +#define MEDIAPIPE_DISABLE_GL_COMPUTE +#endif + #endif // MEDIAPIPE_FRAMEWORK_PORT_H_ diff --git a/mediapipe/framework/port/BUILD b/mediapipe/framework/port/BUILD index 188c22e5e..18d46e1c3 100644 --- a/mediapipe/framework/port/BUILD +++ b/mediapipe/framework/port/BUILD @@ -247,6 +247,26 @@ cc_library( ], ) +cc_library( + name = "opencv_features2d", + hdrs = ["opencv_features2d_inc.h"], + visibility = ["//visibility:public"], + deps = [ + ":opencv_core", + "//third_party:opencv", + ], +) + +cc_library( + name = "opencv_calib3d", + hdrs = ["opencv_calib3d_inc.h"], + visibility = ["//visibility:public"], + deps = [ + ":opencv_core", + "//third_party:opencv", + ], +) + cc_library( name = "parse_text_proto", hdrs = [ diff --git a/mediapipe/framework/port/opencv_calib3d_inc.h b/mediapipe/framework/port/opencv_calib3d_inc.h new file mode 100644 index 000000000..687e0ca62 --- /dev/null +++ b/mediapipe/framework/port/opencv_calib3d_inc.h @@ -0,0 +1,26 @@ +// Copyright 2019 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef MEDIAPIPE_FRAMEWORK_PORT_OPENCV_CALIB3D_INC_H_ +#define MEDIAPIPE_FRAMEWORK_PORT_OPENCV_CALIB3D_INC_H_ + +#include + +#ifdef CV_VERSION_EPOCH // for OpenCV 2.x +#include +#else +#include +#endif + +#endif // MEDIAPIPE_FRAMEWORK_PORT_OPENCV_CALIB3D_INC_H_ diff --git a/mediapipe/framework/port/opencv_features2d_inc.h b/mediapipe/framework/port/opencv_features2d_inc.h new file mode 100644 index 000000000..31ab2457b --- /dev/null +++ b/mediapipe/framework/port/opencv_features2d_inc.h @@ -0,0 +1,26 @@ +// Copyright 2019 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef MEDIAPIPE_FRAMEWORK_PORT_OPENCV_FEATURES2D_INC_H_ +#define MEDIAPIPE_FRAMEWORK_PORT_OPENCV_FEATURES2D_INC_H_ + +#include + +#ifdef CV_VERSION_EPOCH // for OpenCV 2.x +#include +#else +#include +#endif + +#endif // MEDIAPIPE_FRAMEWORK_PORT_OPENCV_FEATURES2D_INC_H_ diff --git a/mediapipe/framework/profiler/graph_profiler_test.cc b/mediapipe/framework/profiler/graph_profiler_test.cc index cf7717556..86c6a16c3 100644 --- a/mediapipe/framework/profiler/graph_profiler_test.cc +++ b/mediapipe/framework/profiler/graph_profiler_test.cc @@ -247,25 +247,45 @@ TEST_F(GraphProfilerTestPeer, InitializeConfig) { // Checks histogram_interval_size_usec and num_histogram_intervals. CalculatorProfile actual = GetCalculatorProfilesMap()->find(kDummyTestCalculatorName)->second; - ASSERT_EQ(actual.name(), kDummyTestCalculatorName); - ASSERT_FALSE(actual.has_open_runtime()); - ASSERT_FALSE(actual.has_close_runtime()); - - ASSERT_EQ(actual.process_runtime().interval_size_usec(), 1000); - ASSERT_EQ(actual.process_runtime().num_intervals(), 3); - - ASSERT_EQ(actual.process_input_latency().interval_size_usec(), 1000); - ASSERT_EQ(actual.process_input_latency().num_intervals(), 3); - - ASSERT_EQ(actual.process_output_latency().interval_size_usec(), 1000); - ASSERT_EQ(actual.process_output_latency().num_intervals(), 3); - - ASSERT_EQ(actual.input_stream_profiles().size(), 1); - ASSERT_EQ(actual.input_stream_profiles(0).name(), "input_stream"); - ASSERT_FALSE(actual.input_stream_profiles(0).back_edge()); - ASSERT_EQ(actual.input_stream_profiles(0).latency().interval_size_usec(), - 1000); - ASSERT_EQ(actual.input_stream_profiles(0).latency().num_intervals(), 3); + EXPECT_THAT(actual, EqualsProto(R"( + name: "DummyTestCalculator" + process_runtime { + total: 0 + interval_size_usec: 1000 + num_intervals: 3 + count: 0 + count: 0 + count: 0 + } + process_input_latency { + total: 0 + interval_size_usec: 1000 + num_intervals: 3 + count: 0 + count: 0 + count: 0 + } + process_output_latency { + total: 0 + interval_size_usec: 1000 + num_intervals: 3 + count: 0 + count: 0 + count: 0 + } + input_stream_profiles { + name: "input_stream" + back_edge: false + latency { + total: 0 + interval_size_usec: 1000 + num_intervals: 3 + count: 0 + count: 0 + count: 0 + } + } + )")); } // Tests that Initialize() uses the ProfilerConfig in the graph definition. @@ -291,16 +311,17 @@ TEST_F(GraphProfilerTestPeer, InitializeConfigWithoutStreamLatency) { // Checks histogram_interval_size_usec and num_histogram_intervals. CalculatorProfile actual = GetCalculatorProfilesMap()->find(kDummyTestCalculatorName)->second; - ASSERT_EQ(actual.name(), kDummyTestCalculatorName); - ASSERT_FALSE(actual.has_open_runtime()); - ASSERT_FALSE(actual.has_close_runtime()); - - ASSERT_EQ(actual.process_runtime().interval_size_usec(), 1000); - ASSERT_EQ(actual.process_runtime().num_intervals(), 3); - - ASSERT_FALSE(actual.has_process_input_latency()); - ASSERT_FALSE(actual.has_process_output_latency()); - ASSERT_EQ(actual.input_stream_profiles().size(), 0); + EXPECT_THAT(actual, EqualsProto(R"( + name: "DummyTestCalculator" + process_runtime { + total: 0 + interval_size_usec: 1000 + num_intervals: 3 + count: 0 + count: 0 + count: 0 + } + )")); } // Tests that Initialize() reads all the configs defined in the graph @@ -633,10 +654,11 @@ TEST_F(GraphProfilerTestPeer, SetOpenRuntime) { simulation_clock->ThreadFinish(); ASSERT_EQ(profiles.size(), 1); - ASSERT_EQ(profiles[0].open_runtime(), 100); - ASSERT_FALSE(profiles[0].has_close_runtime()); - ASSERT_THAT(profiles[0].process_runtime(), - Partially(EqualsProto(CreateTimeHistogram(/*total=*/0, {0})))); + EXPECT_THAT(profiles[0], Partially(EqualsProto(R"( + name: "DummyTestCalculator" + open_runtime: 100 + process_runtime { total: 0 } + )"))); // Checks packets_info_ map hasn't changed. ASSERT_EQ(GetPacketsInfoMap()->size(), 0); } @@ -688,14 +710,29 @@ TEST_F(GraphProfilerTestPeer, SetOpenRuntimeWithStreamLatency) { ASSERT_EQ(profiles.size(), 2); CalculatorProfile source_profile = GetProfileWithName(profiles, "source_calc"); - ASSERT_EQ(source_profile.open_runtime(), 150); - ASSERT_FALSE(source_profile.has_close_runtime()); - ASSERT_THAT(source_profile.process_runtime(), - Partially(EqualsProto(CreateTimeHistogram(/*total=*/0, {0})))); - ASSERT_THAT(source_profile.process_input_latency(), - Partially(EqualsProto(CreateTimeHistogram(/*total=*/0, {0})))); - ASSERT_THAT(source_profile.process_output_latency(), - Partially(EqualsProto(CreateTimeHistogram(/*total=*/0, {0})))); + + EXPECT_THAT(source_profile, EqualsProto(R"( + name: "source_calc" + open_runtime: 150 + process_runtime { + total: 0 + interval_size_usec: 1000000 + num_intervals: 1 + count: 0 + } + process_input_latency { + total: 0 + interval_size_usec: 1000000 + num_intervals: 1 + count: 0 + } + process_output_latency { + total: 0 + interval_size_usec: 1000000 + num_intervals: 1 + count: 0 + } + )")); // Check packets_info_ map has been updated. ASSERT_EQ(GetPacketsInfoMap()->size(), 1); @@ -736,11 +773,16 @@ TEST_F(GraphProfilerTestPeer, SetCloseRuntime) { std::vector profiles = Profiles(); simulation_clock->ThreadFinish(); - ASSERT_EQ(profiles.size(), 1); - ASSERT_FALSE(profiles[0].open_runtime()); - ASSERT_EQ(profiles[0].close_runtime(), 100); - ASSERT_THAT(profiles[0].process_runtime(), - Partially(EqualsProto(CreateTimeHistogram(/*total=*/0, {0})))); + EXPECT_THAT(profiles[0], EqualsProto(R"( + name: "DummyTestCalculator" + close_runtime: 100 + process_runtime { + total: 0 + interval_size_usec: 1000000 + num_intervals: 1 + count: 0 + } + )")); } // Tests that SetCloseRuntime() updates |close_runtime| and doesn't affect other @@ -789,11 +831,39 @@ TEST_F(GraphProfilerTestPeer, SetCloseRuntimeWithStreamLatency) { ASSERT_EQ(profiles.size(), 2); CalculatorProfile source_profile = GetProfileWithName(profiles, "source_calc"); - ASSERT_FALSE(source_profile.open_runtime()); - ASSERT_EQ(source_profile.close_runtime(), 100); - ASSERT_THAT(source_profile.process_runtime(), - Partially(EqualsProto(CreateTimeHistogram(/*total=*/0, {0})))); - ASSERT_EQ(GetPacketsInfoMap()->size(), 1); + + EXPECT_THAT(source_profile, EqualsProto(R"( + name: "source_calc" + close_runtime: 100 + process_runtime { + total: 0 + interval_size_usec: 1000000 + num_intervals: 1 + count: 0 + } + process_input_latency { + total: 0 + interval_size_usec: 1000000 + num_intervals: 1 + count: 0 + } + process_output_latency { + total: 0 + interval_size_usec: 1000000 + num_intervals: 1 + count: 0 + } + input_stream_profiles { + name: "input_stream" + back_edge: false + latency { + total: 0 + interval_size_usec: 1000000 + num_intervals: 1 + count: 0 + } + } + )")); PacketInfo expected_packet_info = {0, /*production_time_usec=*/1000 + 100, /*source_process_start_usec=*/1000 + 0}; @@ -933,10 +1003,15 @@ TEST_F(GraphProfilerTestPeer, AddProcessSample) { simulation_clock->ThreadFinish(); ASSERT_EQ(profiles.size(), 1); - ASSERT_THAT(profiles[0].process_runtime(), - Partially(EqualsProto(CreateTimeHistogram(/*total=*/150, {1})))); - ASSERT_FALSE(profiles[0].has_open_runtime()); - ASSERT_FALSE(profiles[0].has_close_runtime()); + EXPECT_THAT(profiles[0], EqualsProto(R"( + name: "DummyTestCalculator" + process_runtime { + total: 150 + interval_size_usec: 1000000 + num_intervals: 1 + count: 1 + } + )")); // Checks packets_info_ map hasn't changed. ASSERT_EQ(GetPacketsInfoMap()->size(), 0); } @@ -985,12 +1060,27 @@ TEST_F(GraphProfilerTestPeer, AddProcessSampleWithStreamLatency) { ASSERT_EQ(profiles.size(), 2); CalculatorProfile source_profile = GetProfileWithName(profiles, "source_calc"); - ASSERT_THAT(source_profile.process_runtime(), - Partially(EqualsProto(CreateTimeHistogram(/*total=*/150, {1})))); - ASSERT_THAT(source_profile.process_input_latency(), - Partially(EqualsProto(CreateTimeHistogram(/*total=*/0, {1})))); - ASSERT_THAT(source_profile.process_output_latency(), - Partially(EqualsProto(CreateTimeHistogram(/*total=*/150, {1})))); + + EXPECT_THAT(profiles[0], Partially(EqualsProto(R"( + process_runtime { + total: 150 + interval_size_usec: 1000000 + num_intervals: 1 + count: 1 + } + process_input_latency { + total: 0 + interval_size_usec: 1000000 + num_intervals: 1 + count: 1 + } + process_output_latency { + total: 150 + interval_size_usec: 1000000 + num_intervals: 1 + count: 1 + } + )"))); // Check packets_info_ map has been updated. ASSERT_EQ(GetPacketsInfoMap()->size(), 1); @@ -1019,22 +1109,24 @@ TEST_F(GraphProfilerTestPeer, AddProcessSampleWithStreamLatency) { CalculatorProfile consumer_profile = GetProfileWithName(profiles, "consumer_calc"); - ASSERT_THAT(consumer_profile.process_runtime(), - Partially(EqualsProto(CreateTimeHistogram(/*total=*/250, {1})))); - ASSERT_THAT(consumer_profile.process_input_latency(), - Partially(EqualsProto(CreateTimeHistogram( - /*total=*/2000 - when_source_started, {1})))); - ASSERT_THAT(consumer_profile.process_output_latency(), - Partially(EqualsProto(CreateTimeHistogram( - /*total=*/2000 + 250 - when_source_started, {1})))); - ASSERT_EQ(consumer_profile.input_stream_profiles().size(), 2); - // For "stream_0" should have not changed since it was empty. - ASSERT_THAT(consumer_profile.input_stream_profiles(0).latency(), - Partially(EqualsProto(CreateTimeHistogram(/*total=*/0, {0})))); - // For "stream_1" - ASSERT_THAT(consumer_profile.input_stream_profiles(1).latency(), - Partially(EqualsProto(CreateTimeHistogram( - /*total=*/2000 - when_source_finished, {1})))); + + // process input latency total = 2000 (end) - 1000 (when source started) = + // 1000 process output latency total = 2000 (end) + 250 - 1000 (when source + // started) = 1250 For "stream_0" should have not changed since it was empty. + // For "stream_1" = 2000 (end) - 1250 (when source finished) = 850 + EXPECT_THAT(consumer_profile, Partially(EqualsProto(R"( + name: "consumer_calc" + process_input_latency { total: 1000 } + process_output_latency { total: 1250 } + input_stream_profiles { + name: "stream_0" + latency { total: 0 } + } + input_stream_profiles { + name: "stream_1" + latency { total: 850 } + } + )"))); // Check packets_info_ map for PacketId({"stream_1", 100}) should not yet be // garbage collected. diff --git a/mediapipe/framework/profiler/trace_buffer.h b/mediapipe/framework/profiler/trace_buffer.h index 167bc2a89..c435d0d52 100644 --- a/mediapipe/framework/profiler/trace_buffer.h +++ b/mediapipe/framework/profiler/trace_buffer.h @@ -39,9 +39,20 @@ inline const void* GetPacketDataId(const HolderBase* holder) { struct TraceEvent { using EventType = GraphTrace::EventType; // GraphTrace::EventType constants, repeated here to match GraphProfilerStub. - static const EventType UNKNOWN, OPEN, PROCESS, CLOSE, NOT_READY, - READY_FOR_PROCESS, READY_FOR_CLOSE, THROTTLED, UNTHROTTLED, CPU_TASK_USER, - CPU_TASK_SYSTEM, GPU_TASK, DSP_TASK, TPU_TASK; + static constexpr EventType UNKNOWN = GraphTrace::UNKNOWN; + static constexpr EventType OPEN = GraphTrace::OPEN; + static constexpr EventType PROCESS = GraphTrace::PROCESS; + static constexpr EventType CLOSE = GraphTrace::CLOSE; + static constexpr EventType NOT_READY = GraphTrace::NOT_READY; + static constexpr EventType READY_FOR_PROCESS = GraphTrace::READY_FOR_PROCESS; + static constexpr EventType READY_FOR_CLOSE = GraphTrace::READY_FOR_CLOSE; + static constexpr EventType THROTTLED = GraphTrace::THROTTLED; + static constexpr EventType UNTHROTTLED = GraphTrace::UNTHROTTLED; + static constexpr EventType CPU_TASK_USER = GraphTrace::CPU_TASK_USER; + static constexpr EventType CPU_TASK_SYSTEM = GraphTrace::CPU_TASK_SYSTEM; + static constexpr EventType GPU_TASK = GraphTrace::GPU_TASK; + static constexpr EventType DSP_TASK = GraphTrace::DSP_TASK; + static constexpr EventType TPU_TASK = GraphTrace::TPU_TASK; absl::Time event_time; EventType event_type = UNKNOWN; bool is_finish = false; diff --git a/mediapipe/framework/profiler/trace_builder.cc b/mediapipe/framework/profiler/trace_builder.cc index e609e6dcb..197472b32 100644 --- a/mediapipe/framework/profiler/trace_builder.cc +++ b/mediapipe/framework/profiler/trace_builder.cc @@ -385,21 +385,21 @@ void TraceBuilder::CreateLog(const TraceBuffer& buffer, absl::Time begin_time, } void TraceBuilder::Clear() { impl_->Clear(); } -// Defined here since inline constants fail to link in android builds. -const TraceEvent::EventType // - TraceEvent::UNKNOWN = GraphTrace::UNKNOWN, - TraceEvent::OPEN = GraphTrace::OPEN, - TraceEvent::PROCESS = GraphTrace::PROCESS, - TraceEvent::CLOSE = GraphTrace::CLOSE, - TraceEvent::NOT_READY = GraphTrace::NOT_READY, - TraceEvent::READY_FOR_PROCESS = GraphTrace::READY_FOR_PROCESS, - TraceEvent::READY_FOR_CLOSE = GraphTrace::READY_FOR_CLOSE, - TraceEvent::THROTTLED = GraphTrace::THROTTLED, - TraceEvent::UNTHROTTLED = GraphTrace::UNTHROTTLED, - TraceEvent::CPU_TASK_USER = GraphTrace::CPU_TASK_USER, - TraceEvent::CPU_TASK_SYSTEM = GraphTrace::CPU_TASK_SYSTEM, - TraceEvent::GPU_TASK = GraphTrace::GPU_TASK, - TraceEvent::DSP_TASK = GraphTrace::DSP_TASK, - TraceEvent::TPU_TASK = GraphTrace::TPU_TASK; +// Defined here since constexpr requires out-of-class definition until C++17. +const TraceEvent::EventType // + TraceEvent::UNKNOWN, // + TraceEvent::OPEN, // + TraceEvent::PROCESS, // + TraceEvent::CLOSE, // + TraceEvent::NOT_READY, // + TraceEvent::READY_FOR_PROCESS, // + TraceEvent::READY_FOR_CLOSE, // + TraceEvent::THROTTLED, // + TraceEvent::UNTHROTTLED, // + TraceEvent::CPU_TASK_USER, // + TraceEvent::CPU_TASK_SYSTEM, // + TraceEvent::GPU_TASK, // + TraceEvent::DSP_TASK, // + TraceEvent::TPU_TASK; } // namespace mediapipe diff --git a/mediapipe/framework/stream_handler/fixed_size_input_stream_handler.cc b/mediapipe/framework/stream_handler/fixed_size_input_stream_handler.cc index dad849c1d..9e874c05d 100644 --- a/mediapipe/framework/stream_handler/fixed_size_input_stream_handler.cc +++ b/mediapipe/framework/stream_handler/fixed_size_input_stream_handler.cc @@ -155,7 +155,7 @@ class FixedSizeInputStreamHandler : public DefaultInputStreamHandler { return (fixed_min_size_) ? EraseAllSurplus() : EraseAnySurplus(keep_one); } - NodeReadiness GetNodeReadiness(Timestamp* min_stream_timestamp) { + NodeReadiness GetNodeReadiness(Timestamp* min_stream_timestamp) override { DCHECK(min_stream_timestamp); absl::MutexLock lock(&erase_mutex_); // kReadyForProcess is returned only once until FillInputSet completes. diff --git a/mediapipe/framework/testdata/BUILD b/mediapipe/framework/testdata/BUILD index 599576899..0b96502cf 100644 --- a/mediapipe/framework/testdata/BUILD +++ b/mediapipe/framework/testdata/BUILD @@ -31,7 +31,7 @@ mediapipe_cc_proto_library( name = "sky_light_calculator_cc_proto", srcs = ["sky_light_calculator.proto"], cc_deps = ["//mediapipe/framework:calculator_cc_proto"], - visibility = ["//mediapipe:__subpackages__"], + visibility = ["//visibility:public"], deps = [":sky_light_calculator_proto"], ) @@ -45,7 +45,7 @@ mediapipe_cc_proto_library( name = "night_light_calculator_cc_proto", srcs = ["night_light_calculator.proto"], cc_deps = ["//mediapipe/framework:calculator_cc_proto"], - visibility = ["//mediapipe:__subpackages__"], + visibility = ["//visibility:public"], deps = [":night_light_calculator_proto"], ) diff --git a/mediapipe/framework/tool/BUILD b/mediapipe/framework/tool/BUILD index 06d178cc0..61ec92e1e 100644 --- a/mediapipe/framework/tool/BUILD +++ b/mediapipe/framework/tool/BUILD @@ -177,7 +177,7 @@ cc_library( deps = [ "//mediapipe/framework:packet", "//mediapipe/framework/port:statusor", - "@org_tensorflow//tensorflow/core:protos_all_cc", + "@org_tensorflow//tensorflow/core:protos_all", ], ) diff --git a/mediapipe/framework/tool/mediapipe_graph.bzl b/mediapipe/framework/tool/mediapipe_graph.bzl index d6e7c56a5..564fb3011 100644 --- a/mediapipe/framework/tool/mediapipe_graph.bzl +++ b/mediapipe/framework/tool/mediapipe_graph.bzl @@ -110,7 +110,7 @@ def mediapipe_simple_subgraph( testonly: pass 1 if the graph is to be used only for tests. **kwargs: Remaining keyword args, forwarded to cc_library. """ - graph_base_name = graph.replace(":", "/").split("/")[-1].rsplit(".", 1)[0] + graph_base_name = name mediapipe_binary_graph( name = name + "_graph", graph = graph, diff --git a/mediapipe/framework/tool/subgraph_expansion.cc b/mediapipe/framework/tool/subgraph_expansion.cc index 665fd4cec..ddd8a06e4 100644 --- a/mediapipe/framework/tool/subgraph_expansion.cc +++ b/mediapipe/framework/tool/subgraph_expansion.cc @@ -52,6 +52,31 @@ namespace tool { return ::mediapipe::OkStatus(); } +// Returns subgraph streams not requested by a subgraph-node. +::mediapipe::Status FindIgnoredStreams( + const proto_ns::RepeatedPtrField& src_streams, + const proto_ns::RepeatedPtrField& dst_streams, + std::set* result) { + ASSIGN_OR_RETURN(auto src_map, tool::TagMap::Create(src_streams)); + ASSIGN_OR_RETURN(auto dst_map, tool::TagMap::Create(dst_streams)); + std::set_difference(src_map->Names().begin(), src_map->Names().end(), + dst_map->Names().begin(), dst_map->Names().end(), + std::inserter(*result, result->begin())); + return ::mediapipe::OkStatus(); +} + +// Removes subgraph streams not requested by a subgraph-node. +::mediapipe::Status RemoveIgnoredStreams( + proto_ns::RepeatedPtrField* streams, + const std::set& missing_streams) { + for (int i = streams->size() - 1; i >= 0; --i) { + if (missing_streams.count(streams->Get(i)) > 0) { + streams->DeleteSubrange(i, 1); + } + } + return ::mediapipe::OkStatus(); +} + ::mediapipe::Status TransformNames( CalculatorGraphConfig* config, const std::function& transform) { @@ -190,6 +215,14 @@ static ::mediapipe::Status PrefixNames(int subgraph_index, .SetPrepend() << "while processing the output side packets of subgraph node " << subgraph_node.calculator() << ": "; + std::set ignored_input_streams; + MP_RETURN_IF_ERROR(FindIgnoredStreams(subgraph_config->input_stream(), + subgraph_node.input_stream(), + &ignored_input_streams)); + std::set ignored_input_side_packets; + MP_RETURN_IF_ERROR(FindIgnoredStreams(subgraph_config->input_side_packet(), + subgraph_node.input_side_packet(), + &ignored_input_side_packets)); std::map* name_map; auto replace_names = [&name_map](absl::string_view s) { std::string original(s); @@ -207,6 +240,12 @@ static ::mediapipe::Status PrefixNames(int subgraph_index, TransformStreamNames(node.mutable_input_side_packet(), replace_names)); MP_RETURN_IF_ERROR( TransformStreamNames(node.mutable_output_side_packet(), replace_names)); + + // Remove input streams and side packets ignored by the subgraph-node. + MP_RETURN_IF_ERROR(RemoveIgnoredStreams(node.mutable_input_stream(), + ignored_input_streams)); + MP_RETURN_IF_ERROR(RemoveIgnoredStreams(node.mutable_input_side_packet(), + ignored_input_side_packets)); } name_map = &side_packet_map; for (auto& generator : *subgraph_config->mutable_packet_generator()) { diff --git a/mediapipe/framework/tool/tag_map.h b/mediapipe/framework/tool/tag_map.h index bdc250924..e2ec97599 100644 --- a/mediapipe/framework/tool/tag_map.h +++ b/mediapipe/framework/tool/tag_map.h @@ -127,6 +127,11 @@ class TagMap { std::vector names_; }; +// Equal TagData structs define equal id ranges. +inline bool operator==(const TagMap::TagData& d1, const TagMap::TagData& d2) { + return d1.id == d2.id && d1.count == d2.count; +} + } // namespace tool } // namespace mediapipe diff --git a/mediapipe/framework/tool/template_expander.cc b/mediapipe/framework/tool/template_expander.cc index 2597dd597..e2de6e3e7 100644 --- a/mediapipe/framework/tool/template_expander.cc +++ b/mediapipe/framework/tool/template_expander.cc @@ -567,6 +567,10 @@ class TemplateExpanderImpl { result = AsDict(args); } else if (expr.op() == "list") { result = AsList(args); + } else if (expr.op() == "size") { + return AsArgument(static_cast( + args[0].has_dict() ? args[0].mutable_dict()->arg_size() + : args[0].mutable_element()->size())); } return result; } diff --git a/mediapipe/framework/tool/template_parser.cc b/mediapipe/framework/tool/template_parser.cc index 62380bf19..2954566e8 100644 --- a/mediapipe/framework/tool/template_parser.cc +++ b/mediapipe/framework/tool/template_parser.cc @@ -1318,8 +1318,8 @@ bool IsInfixOperator(const std::string& token) { // A function-style operator, including a for or if expression. bool IsFunctionOperator(const std::string& token) { static auto kTokens = new std::set{ - "min", "max", "for", "if", "!", - "concat", "lowercase", "uppercase", "dict", "list", + "min", "max", "for", "if", "!", "concat", + "lowercase", "uppercase", "size", "dict", "list", }; return kTokens->count(token) > 0; } diff --git a/mediapipe/framework/validated_graph_config.cc b/mediapipe/framework/validated_graph_config.cc index c9b2914ee..a710cdb2e 100644 --- a/mediapipe/framework/validated_graph_config.cc +++ b/mediapipe/framework/validated_graph_config.cc @@ -14,8 +14,7 @@ #include "mediapipe/framework/validated_graph_config.h" -#include - +#include "absl/container/flat_hash_set.h" #include "absl/memory/memory.h" #include "absl/strings/str_cat.h" #include "absl/strings/str_join.h" @@ -934,7 +933,7 @@ NodeTypeInfo::NodeRef ValidatedGraphConfig::NodeForSorterIndex( } ::mediapipe::Status ValidatedGraphConfig::ValidateExecutors() { - std::unordered_set declared_names; + absl::flat_hash_set declared_names; for (const ExecutorConfig& executor_config : config_.executor()) { if (IsReservedExecutorName(executor_config.name())) { return ::mediapipe::InvalidArgumentErrorBuilder(MEDIAPIPE_LOC) @@ -964,7 +963,7 @@ NodeTypeInfo::NodeRef ValidatedGraphConfig::NodeForSorterIndex( << "\"" << executor_name << "\" is a reserved executor name."; } // The executor must be declared in an ExecutorConfig. - if (declared_names.find(executor_name) == declared_names.end()) { + if (!declared_names.contains(executor_name)) { return ::mediapipe::InvalidArgumentErrorBuilder(MEDIAPIPE_LOC) << "The executor \"" << executor_name << "\" is not declared in an ExecutorConfig."; diff --git a/mediapipe/framework/validated_graph_config.h b/mediapipe/framework/validated_graph_config.h index d7a847950..9554b71d8 100644 --- a/mediapipe/framework/validated_graph_config.h +++ b/mediapipe/framework/validated_graph_config.h @@ -16,9 +16,9 @@ #define MEDIAPIPE_FRAMEWORK_VALIDATED_GRAPH_CONFIG_H_ #include -#include #include +#include "absl/container/flat_hash_set.h" #include "mediapipe/framework/calculator.pb.h" #include "mediapipe/framework/calculator_contract.h" #include "mediapipe/framework/packet_generator.pb.h" @@ -169,7 +169,7 @@ class NodeTypeInfo { // be a virtual node corresponding to a graph input stream (which are // listed by index contiguously after all calculators). // This function is only valid for a NodeTypeInfo of NodeType CALCULATOR. - const std::unordered_set& AncestorSources() const { + const absl::flat_hash_set& AncestorSources() const { return ancestor_sources_; } // Returns True if the source was not already there. @@ -213,7 +213,7 @@ class NodeTypeInfo { NodeRef node_; // The set of sources which affect this node. - std::unordered_set ancestor_sources_; + absl::flat_hash_set ancestor_sources_; }; // Information for either the input or output side of an edge. An edge diff --git a/mediapipe/gpu/gl_simple_shaders.h b/mediapipe/gpu/gl_simple_shaders.h index 3fed608ad..8bc612ddd 100644 --- a/mediapipe/gpu/gl_simple_shaders.h +++ b/mediapipe/gpu/gl_simple_shaders.h @@ -101,6 +101,10 @@ static const GLfloat kBasicTextureVertices[] = { 1.0f, 1.0f, // top right }; +// Places a texture on kBasicSquareVertices, flipped horizontally. +static const GLfloat kBasicTextureVerticesFlipX[] = { + V4(kBasicTextureVertices, 1, 0, 3, 2)}; + // Places a texture on kBasicSquareVertices, flipped vertically. static const GLfloat kBasicTextureVerticesFlipY[] = { V4(kBasicTextureVertices, 2, 3, 0, 1)}; diff --git a/mediapipe/graphs/face_detection/face_detection_desktop_live.pbtxt b/mediapipe/graphs/face_detection/face_detection_desktop_live.pbtxt index 95fdb3623..2d22b7a14 100644 --- a/mediapipe/graphs/face_detection/face_detection_desktop_live.pbtxt +++ b/mediapipe/graphs/face_detection/face_detection_desktop_live.pbtxt @@ -1,6 +1,6 @@ # MediaPipe graph that performs face detection with TensorFlow Lite on CPU. # Used in the examples in -# mediapipie/examples/desktop/face_detection:face_detection_cpu. +# mediapipe/examples/desktop/face_detection:face_detection_cpu. # Images on GPU coming into and out of the graph. input_stream: "input_video" diff --git a/mediapipe/graphs/hand_tracking/BUILD b/mediapipe/graphs/hand_tracking/BUILD index 09a8e4d0f..da6776b8d 100644 --- a/mediapipe/graphs/hand_tracking/BUILD +++ b/mediapipe/graphs/hand_tracking/BUILD @@ -12,26 +12,33 @@ # See the License for the specific language governing permissions and # limitations under the License. -licenses(["notice"]) # Apache 2.0 - -package(default_visibility = ["//visibility:public"]) - load( "//mediapipe/framework/tool:mediapipe_graph.bzl", "mediapipe_binary_graph", ) +licenses(["notice"]) # Apache 2.0 + +package(default_visibility = ["//visibility:public"]) + cc_library( - name = "desktop_tflite_calculators", + name = "desktop_offline_calculators", deps = [ "//mediapipe/calculators/core:flow_limiter_calculator", "//mediapipe/calculators/core:gate_calculator", "//mediapipe/calculators/core:immediate_mux_calculator", - "//mediapipe/calculators/core:merge_calculator", "//mediapipe/calculators/core:packet_inner_join_calculator", "//mediapipe/calculators/core:previous_loopback_calculator", "//mediapipe/calculators/video:opencv_video_decoder_calculator", "//mediapipe/calculators/video:opencv_video_encoder_calculator", + ], +) + +cc_library( + name = "desktop_tflite_calculators", + deps = [ + ":desktop_offline_calculators", + "//mediapipe/calculators/core:merge_calculator", "//mediapipe/graphs/hand_tracking/subgraphs:hand_detection_cpu", "//mediapipe/graphs/hand_tracking/subgraphs:hand_landmark_cpu", "//mediapipe/graphs/hand_tracking/subgraphs:renderer_cpu", @@ -58,6 +65,39 @@ mediapipe_binary_graph( deps = [":mobile_calculators"], ) +cc_library( + name = "multi_hand_desktop_tflite_calculators", + deps = [ + ":desktop_offline_calculators", + "//mediapipe/calculators/util:association_norm_rect_calculator", + "//mediapipe/calculators/util:collection_has_min_size_calculator", + "//mediapipe/graphs/hand_tracking/subgraphs:multi_hand_detection_cpu", + "//mediapipe/graphs/hand_tracking/subgraphs:multi_hand_landmark_cpu", + "//mediapipe/graphs/hand_tracking/subgraphs:multi_hand_renderer_cpu", + ], +) + +cc_library( + name = "multi_hand_mobile_calculators", + deps = [ + "//mediapipe/calculators/core:flow_limiter_calculator", + "//mediapipe/calculators/core:gate_calculator", + "//mediapipe/calculators/core:previous_loopback_calculator", + "//mediapipe/calculators/util:association_norm_rect_calculator", + "//mediapipe/calculators/util:collection_has_min_size_calculator", + "//mediapipe/graphs/hand_tracking/subgraphs:multi_hand_detection_gpu", + "//mediapipe/graphs/hand_tracking/subgraphs:multi_hand_landmark_gpu", + "//mediapipe/graphs/hand_tracking/subgraphs:multi_hand_renderer_gpu", + ], +) + +mediapipe_binary_graph( + name = "multi_hand_tracking_mobile_gpu_binary_graph", + graph = "multi_hand_tracking_mobile.pbtxt", + output_name = "multi_hand_tracking_mobile_gpu.binarypb", + deps = [":multi_hand_mobile_calculators"], +) + cc_library( name = "detection_mobile_calculators", deps = [ diff --git a/mediapipe/graphs/hand_tracking/hand_detection_desktop_live.pbtxt b/mediapipe/graphs/hand_tracking/hand_detection_desktop_live.pbtxt index 9e6fdad06..363bb5182 100644 --- a/mediapipe/graphs/hand_tracking/hand_detection_desktop_live.pbtxt +++ b/mediapipe/graphs/hand_tracking/hand_detection_desktop_live.pbtxt @@ -1,7 +1,7 @@ # MediaPipe graph that performs hand detection on desktop with TensorFlow Lite # on CPU. # Used in the example in -# mediapipie/examples/desktop/hand_tracking:hand_detection_cpu. +# mediapipe/examples/desktop/hand_tracking:hand_detection_cpu. # Images coming into and out of the graph. input_stream: "input_video" diff --git a/mediapipe/graphs/hand_tracking/hand_detection_mobile.pbtxt b/mediapipe/graphs/hand_tracking/hand_detection_mobile.pbtxt index f5431c89d..7495c62fc 100644 --- a/mediapipe/graphs/hand_tracking/hand_detection_mobile.pbtxt +++ b/mediapipe/graphs/hand_tracking/hand_detection_mobile.pbtxt @@ -1,7 +1,7 @@ # MediaPipe graph that performs hand detection with TensorFlow Lite on GPU. # Used in the examples in -# mediapipie/examples/android/src/java/com/mediapipe/apps/handdetectiongpu and -# mediapipie/examples/ios/handdetectiongpu. +# mediapipe/examples/android/src/java/com/mediapipe/apps/handdetectiongpu and +# mediapipe/examples/ios/handdetectiongpu. # Images coming into and out of the graph. input_stream: "input_video" diff --git a/mediapipe/graphs/hand_tracking/hand_tracking_desktop.pbtxt b/mediapipe/graphs/hand_tracking/hand_tracking_desktop.pbtxt index 29ad822a8..bd089e774 100644 --- a/mediapipe/graphs/hand_tracking/hand_tracking_desktop.pbtxt +++ b/mediapipe/graphs/hand_tracking/hand_tracking_desktop.pbtxt @@ -1,7 +1,7 @@ # MediaPipe graph that performs hand tracking on desktop with TensorFlow Lite # on CPU. # Used in the example in -# mediapipie/examples/desktop/hand_tracking:hand_tracking_tflite. +# mediapipe/examples/desktop/hand_tracking:hand_tracking_tflite. # max_queue_size limits the number of packets enqueued on any input stream # by throttling inputs to the graph. This makes the graph only process one diff --git a/mediapipe/graphs/hand_tracking/hand_tracking_mobile.pbtxt b/mediapipe/graphs/hand_tracking/hand_tracking_mobile.pbtxt index dc8cb3d11..04cb28db2 100644 --- a/mediapipe/graphs/hand_tracking/hand_tracking_mobile.pbtxt +++ b/mediapipe/graphs/hand_tracking/hand_tracking_mobile.pbtxt @@ -1,7 +1,7 @@ # MediaPipe graph that performs hand tracking with TensorFlow Lite on GPU. # Used in the examples in -# mediapipie/examples/android/src/java/com/mediapipe/apps/handtrackinggpu and -# mediapipie/examples/ios/handtrackinggpu. +# mediapipe/examples/android/src/java/com/mediapipe/apps/handtrackinggpu and +# mediapipe/examples/ios/handtrackinggpu. # Images coming into and out of the graph. input_stream: "input_video" diff --git a/mediapipe/graphs/hand_tracking/multi_hand_tracking_desktop.pbtxt b/mediapipe/graphs/hand_tracking/multi_hand_tracking_desktop.pbtxt new file mode 100644 index 000000000..1a554629c --- /dev/null +++ b/mediapipe/graphs/hand_tracking/multi_hand_tracking_desktop.pbtxt @@ -0,0 +1,127 @@ +# MediaPipe graph that performs multi-hand tracking on desktop with TensorFlow +# Lite on CPU. +# Used in the example in +# mediapipie/examples/desktop/hand_tracking:multi_hand_tracking_tflite. + +# max_queue_size limits the number of packets enqueued on any input stream +# by throttling inputs to the graph. This makes the graph only process one +# frame per time. +max_queue_size: 1 + +# Decodes an input video file into images and a video header. +node { + calculator: "OpenCvVideoDecoderCalculator" + input_side_packet: "INPUT_FILE_PATH:input_video_path" + output_stream: "VIDEO:input_video" + output_stream: "VIDEO_PRESTREAM:input_video_header" +} + +# Determines if an input vector of NormalizedRect has a size greater than or +# equal to the provided min_size. +node { + calculator: "NormalizedRectVectorHasMinSizeCalculator" + input_stream: "ITERABLE:prev_multi_hand_rects_from_landmarks" + output_stream: "prev_has_enough_hands" + node_options: { + [type.googleapis.com/mediapipe.CollectionHasMinSizeCalculatorOptions] { + # This value can be changed to support tracking arbitrary number of hands. + # Please also remember to modify max_vec_size in + # ClipVectorSizeCalculatorOptions in + # mediapipe/graphs/hand_tracking/subgraphs/multi_hand_detection_cpu.pbtxt + min_size: 2 + } + } +} + +# Drops the incoming image if the previous frame had at least N hands. +# Otherwise, passes the incoming image through to trigger a new round of hand +# detection in MultiHandDetectionSubgraph. +node { + calculator: "GateCalculator" + input_stream: "input_video" + input_stream: "DISALLOW:prev_has_enough_hands" + output_stream: "multi_hand_detection_input_video" + node_options: { + [type.googleapis.com/mediapipe.GateCalculatorOptions] { + empty_packets_as_allow: true + } + } +} + +# Subgraph that detections hands (see multi_hand_detection_cpu.pbtxt). +node { + calculator: "MultiHandDetectionSubgraph" + input_stream: "multi_hand_detection_input_video" + output_stream: "DETECTIONS:multi_palm_detections" + output_stream: "NORM_RECTS:multi_palm_rects" +} + +# Subgraph that localizes hand landmarks for multiple hands (see +# multi_hand_landmark.pbtxt). +node { + calculator: "MultiHandLandmarkSubgraph" + input_stream: "IMAGE:input_video" + input_stream: "NORM_RECTS:multi_hand_rects" + output_stream: "LANDMARKS:multi_hand_landmarks" + output_stream: "NORM_RECTS:multi_hand_rects_from_landmarks" +} + +# Caches a hand rectangle fed back from MultiHandLandmarkSubgraph, and upon the +# arrival of the next input image sends out the cached rectangle with the +# timestamp replaced by that of the input image, essentially generating a packet +# that carries the previous hand rectangle. Note that upon the arrival of the +# very first input image, an empty packet is sent out to jump start the +# feedback loop. +node { + calculator: "PreviousLoopbackCalculator" + input_stream: "MAIN:input_video" + input_stream: "LOOP:multi_hand_rects_from_landmarks" + input_stream_info: { + tag_index: "LOOP" + back_edge: true + } + output_stream: "PREV_LOOP:prev_multi_hand_rects_from_landmarks" +} + +# Performs association between NormalizedRect vector elements from previous +# frame and those from the current frame if MultiHandDetectionSubgraph runs. +# This calculator ensures that the output multi_hand_rects vector doesn't +# contain overlapping regions based on the specified min_similarity_threshold. +node { + calculator: "AssociationNormRectCalculator" + input_stream: "prev_multi_hand_rects_from_landmarks" + input_stream: "multi_palm_rects" + output_stream: "multi_hand_rects" + node_options: { + [type.googleapis.com/mediapipe.AssociationCalculatorOptions] { + min_similarity_threshold: 0.1 + } + } +} + +# Subgraph that renders annotations and overlays them on top of the input +# images (see multi_hand_renderer_cpu.pbtxt). +node { + calculator: "MultiHandRendererSubgraph" + input_stream: "IMAGE:input_video" + input_stream: "DETECTIONS:multi_palm_detections" + input_stream: "LANDMARKS:multi_hand_landmarks" + input_stream: "NORM_RECTS:0:multi_palm_rects" + input_stream: "NORM_RECTS:1:multi_hand_rects" + output_stream: "IMAGE:output_video" +} + +# Encodes the annotated images into a video file, adopting properties specified +# in the input video header, e.g., video framerate. +node { + calculator: "OpenCvVideoEncoderCalculator" + input_stream: "VIDEO:output_video" + input_stream: "VIDEO_PRESTREAM:input_video_header" + input_side_packet: "OUTPUT_FILE_PATH:output_video_path" + node_options: { + [type.googleapis.com/mediapipe.OpenCvVideoEncoderCalculatorOptions]: { + codec: "avc1" + video_format: "mp4" + } + } +} diff --git a/mediapipe/graphs/hand_tracking/multi_hand_tracking_desktop_live.pbtxt b/mediapipe/graphs/hand_tracking/multi_hand_tracking_desktop_live.pbtxt new file mode 100644 index 000000000..6e4a0331f --- /dev/null +++ b/mediapipe/graphs/hand_tracking/multi_hand_tracking_desktop_live.pbtxt @@ -0,0 +1,103 @@ +# MediaPipe graph that performs multi-hand tracking on desktop with TensorFlow +# Lite on CPU. +# Used in the example in +# mediapipie/examples/desktop/hand_tracking:multi_hand_tracking_cpu. + +# Images coming into and out of the graph. +input_stream: "input_video" +output_stream: "output_video" + +# Determines if an input vector of NormalizedRect has a size greater than or +# equal to the provided min_size. +node { + calculator: "NormalizedRectVectorHasMinSizeCalculator" + input_stream: "ITERABLE:prev_multi_hand_rects_from_landmarks" + output_stream: "prev_has_enough_hands" + node_options: { + [type.googleapis.com/mediapipe.CollectionHasMinSizeCalculatorOptions] { + # This value can be changed to support tracking arbitrary number of hands. + # Please also remember to modify max_vec_size in + # ClipVectorSizeCalculatorOptions in + # mediapipe/graphs/hand_tracking/subgraphs/multi_hand_detection_gpu.pbtxt + min_size: 2 + } + } +} + +# Drops the incoming image if the previous frame had at least N hands. +# Otherwise, passes the incoming image through to trigger a new round of hand +# detection in MultiHandDetectionSubgraph. +node { + calculator: "GateCalculator" + input_stream: "input_video" + input_stream: "DISALLOW:prev_has_enough_hands" + output_stream: "multi_hand_detection_input_video" + node_options: { + [type.googleapis.com/mediapipe.GateCalculatorOptions] { + empty_packets_as_allow: true + } + } +} + +# Subgraph that detections hands (see multi_hand_detection_cpu.pbtxt). +node { + calculator: "MultiHandDetectionSubgraph" + input_stream: "multi_hand_detection_input_video" + output_stream: "DETECTIONS:multi_palm_detections" + output_stream: "NORM_RECTS:multi_palm_rects" +} + +# Subgraph that localizes hand landmarks for multiple hands (see +# multi_hand_landmark.pbtxt). +node { + calculator: "MultiHandLandmarkSubgraph" + input_stream: "IMAGE:input_video" + input_stream: "NORM_RECTS:multi_hand_rects" + output_stream: "LANDMARKS:multi_hand_landmarks" + output_stream: "NORM_RECTS:multi_hand_rects_from_landmarks" +} + +# Caches a hand rectangle fed back from MultiHandLandmarkSubgraph, and upon the +# arrival of the next input image sends out the cached rectangle with the +# timestamp replaced by that of the input image, essentially generating a packet +# that carries the previous hand rectangle. Note that upon the arrival of the +# very first input image, an empty packet is sent out to jump start the +# feedback loop. +node { + calculator: "PreviousLoopbackCalculator" + input_stream: "MAIN:input_video" + input_stream: "LOOP:multi_hand_rects_from_landmarks" + input_stream_info: { + tag_index: "LOOP" + back_edge: true + } + output_stream: "PREV_LOOP:prev_multi_hand_rects_from_landmarks" +} + +# Performs association between NormalizedRect vector elements from previous +# frame and those from the current frame if MultiHandDetectionSubgraph runs. +# This calculator ensures that the output multi_hand_rects vector doesn't +# contain overlapping regions based on the specified min_similarity_threshold. +node { + calculator: "AssociationNormRectCalculator" + input_stream: "prev_multi_hand_rects_from_landmarks" + input_stream: "multi_palm_rects" + output_stream: "multi_hand_rects" + node_options: { + [type.googleapis.com/mediapipe.AssociationCalculatorOptions] { + min_similarity_threshold: 0.1 + } + } +} + +# Subgraph that renders annotations and overlays them on top of the input +# images (see multi_hand_renderer_cpu.pbtxt). +node { + calculator: "MultiHandRendererSubgraph" + input_stream: "IMAGE:input_video" + input_stream: "DETECTIONS:multi_palm_detections" + input_stream: "LANDMARKS:multi_hand_landmarks" + input_stream: "NORM_RECTS:0:multi_palm_rects" + input_stream: "NORM_RECTS:1:multi_hand_rects" + output_stream: "IMAGE:output_video" +} diff --git a/mediapipe/graphs/hand_tracking/multi_hand_tracking_mobile.pbtxt b/mediapipe/graphs/hand_tracking/multi_hand_tracking_mobile.pbtxt new file mode 100644 index 000000000..c47bc3d8a --- /dev/null +++ b/mediapipe/graphs/hand_tracking/multi_hand_tracking_mobile.pbtxt @@ -0,0 +1,123 @@ +# MediaPipe graph that performs multi-hand tracking with TensorFlow Lite on GPU. +# Used in the examples in +# mediapipe/examples/android/src/java/com/mediapipe/apps/multihandtrackinggpu. + +# Images coming into and out of the graph. +input_stream: "input_video" +output_stream: "output_video" + +# Throttles the images flowing downstream for flow control. It passes through +# the very first incoming image unaltered, and waits for downstream nodes +# (calculators and subgraphs) in the graph to finish their tasks before it +# passes through another image. All images that come in while waiting are +# dropped, limiting the number of in-flight images in most part of the graph to +# 1. This prevents the downstream nodes from queuing up incoming images and data +# excessively, which leads to increased latency and memory usage, unwanted in +# real-time mobile applications. It also eliminates unnecessarily computation, +# e.g., the output produced by a node may get dropped downstream if the +# subsequent nodes are still busy processing previous inputs. +node { + calculator: "FlowLimiterCalculator" + input_stream: "input_video" + input_stream: "FINISHED:multi_hand_rects" + input_stream_info: { + tag_index: "FINISHED" + back_edge: true + } + output_stream: "throttled_input_video" +} + +# Determines if an input vector of NormalizedRect has a size greater than or +# equal to the provided min_size. +node { + calculator: "NormalizedRectVectorHasMinSizeCalculator" + input_stream: "ITERABLE:prev_multi_hand_rects_from_landmarks" + output_stream: "prev_has_enough_hands" + node_options: { + [type.googleapis.com/mediapipe.CollectionHasMinSizeCalculatorOptions] { + # This value can be changed to support tracking arbitrary number of hands. + # Please also remember to modify max_vec_size in + # ClipVectorSizeCalculatorOptions in + # mediapipe/graphs/hand_tracking/subgraphs/multi_hand_detection_gpu.pbtxt + min_size: 2 + } + } +} + +# Drops the incoming image if the previous frame had at least N hands. +# Otherwise, passes the incoming image through to trigger a new round of hand +# detection in MultiHandDetectionSubgraph. +node { + calculator: "GateCalculator" + input_stream: "throttled_input_video" + input_stream: "DISALLOW:prev_has_enough_hands" + output_stream: "multi_hand_detection_input_video" + node_options: { + [type.googleapis.com/mediapipe.GateCalculatorOptions] { + empty_packets_as_allow: true + } + } +} + +# Subgraph that detections hands (see multi_hand_detection_gpu.pbtxt). +node { + calculator: "MultiHandDetectionSubgraph" + input_stream: "multi_hand_detection_input_video" + output_stream: "DETECTIONS:multi_palm_detections" + output_stream: "NORM_RECTS:multi_palm_rects" +} + +# Subgraph that localizes hand landmarks for multiple hands (see +# multi_hand_landmark.pbtxt). +node { + calculator: "MultiHandLandmarkSubgraph" + input_stream: "IMAGE:throttled_input_video" + input_stream: "NORM_RECTS:multi_hand_rects" + output_stream: "LANDMARKS:multi_hand_landmarks" + output_stream: "NORM_RECTS:multi_hand_rects_from_landmarks" +} + +# Caches a hand rectangle fed back from MultiHandLandmarkSubgraph, and upon the +# arrival of the next input image sends out the cached rectangle with the +# timestamp replaced by that of the input image, essentially generating a packet +# that carries the previous hand rectangle. Note that upon the arrival of the +# very first input image, an empty packet is sent out to jump start the +# feedback loop. +node { + calculator: "PreviousLoopbackCalculator" + input_stream: "MAIN:throttled_input_video" + input_stream: "LOOP:multi_hand_rects_from_landmarks" + input_stream_info: { + tag_index: "LOOP" + back_edge: true + } + output_stream: "PREV_LOOP:prev_multi_hand_rects_from_landmarks" +} + +# Performs association between NormalizedRect vector elements from previous +# frame and those from the current frame if MultiHandDetectionSubgraph runs. +# This calculator ensures that the output multi_hand_rects vector doesn't +# contain overlapping regions based on the specified min_similarity_threshold. +node { + calculator: "AssociationNormRectCalculator" + input_stream: "prev_multi_hand_rects_from_landmarks" + input_stream: "multi_palm_rects" + output_stream: "multi_hand_rects" + node_options: { + [type.googleapis.com/mediapipe.AssociationCalculatorOptions] { + min_similarity_threshold: 0.1 + } + } +} + +# Subgraph that renders annotations and overlays them on top of the input +# images (see multi_hand_renderer_gpu.pbtxt). +node { + calculator: "MultiHandRendererSubgraph" + input_stream: "IMAGE:throttled_input_video" + input_stream: "DETECTIONS:multi_palm_detections" + input_stream: "LANDMARKS:multi_hand_landmarks" + input_stream: "NORM_RECTS:0:multi_palm_rects" + input_stream: "NORM_RECTS:1:multi_hand_rects" + output_stream: "IMAGE:output_video" +} diff --git a/mediapipe/graphs/hand_tracking/subgraphs/BUILD b/mediapipe/graphs/hand_tracking/subgraphs/BUILD index 93a0d1048..7692ec771 100644 --- a/mediapipe/graphs/hand_tracking/subgraphs/BUILD +++ b/mediapipe/graphs/hand_tracking/subgraphs/BUILD @@ -12,15 +12,15 @@ # See the License for the specific language governing permissions and # limitations under the License. -licenses(["notice"]) # Apache 2.0 - -package(default_visibility = ["//visibility:public"]) - load( "//mediapipe/framework/tool:mediapipe_graph.bzl", "mediapipe_simple_subgraph", ) +licenses(["notice"]) # Apache 2.0 + +package(default_visibility = ["//visibility:public"]) + mediapipe_simple_subgraph( name = "hand_detection_cpu", graph = "hand_detection_cpu.pbtxt", @@ -42,6 +42,29 @@ mediapipe_simple_subgraph( ], ) +mediapipe_simple_subgraph( + name = "multi_hand_detection_cpu", + graph = "multi_hand_detection_cpu.pbtxt", + register_as = "MultiHandDetectionSubgraph", + deps = [ + "//mediapipe/calculators/core:begin_loop_calculator", + "//mediapipe/calculators/core:clip_vector_size_calculator", + "//mediapipe/calculators/core:end_loop_calculator", + "//mediapipe/calculators/image:image_properties_calculator", + "//mediapipe/calculators/image:image_transformation_calculator", + "//mediapipe/calculators/tflite:ssd_anchors_calculator", + "//mediapipe/calculators/tflite:tflite_converter_calculator", + "//mediapipe/calculators/tflite:tflite_custom_op_resolver_calculator", + "//mediapipe/calculators/tflite:tflite_inference_calculator", + "//mediapipe/calculators/tflite:tflite_tensors_to_detections_calculator", + "//mediapipe/calculators/util:detection_label_id_to_text_calculator", + "//mediapipe/calculators/util:detection_letterbox_removal_calculator", + "//mediapipe/calculators/util:detections_to_rects_calculator", + "//mediapipe/calculators/util:non_max_suppression_calculator", + "//mediapipe/calculators/util:rect_transformation_calculator", + ], +) + mediapipe_simple_subgraph( name = "hand_landmark_cpu", graph = "hand_landmark_cpu.pbtxt", @@ -65,6 +88,18 @@ mediapipe_simple_subgraph( ], ) +mediapipe_simple_subgraph( + name = "multi_hand_landmark_cpu", + graph = "multi_hand_landmark.pbtxt", + register_as = "MultiHandLandmarkSubgraph", + deps = [ + ":hand_landmark_cpu", + "//mediapipe/calculators/core:begin_loop_calculator", + "//mediapipe/calculators/core:end_loop_calculator", + "//mediapipe/calculators/util:filter_collection_calculator", + ], +) + mediapipe_simple_subgraph( name = "renderer_cpu", graph = "renderer_cpu.pbtxt", @@ -77,6 +112,20 @@ mediapipe_simple_subgraph( ], ) +mediapipe_simple_subgraph( + name = "multi_hand_renderer_cpu", + graph = "multi_hand_renderer_cpu.pbtxt", + register_as = "MultiHandRendererSubgraph", + deps = [ + "//mediapipe/calculators/core:begin_loop_calculator", + "//mediapipe/calculators/core:end_loop_calculator", + "//mediapipe/calculators/util:annotation_overlay_calculator", + "//mediapipe/calculators/util:detections_to_render_data_calculator", + "//mediapipe/calculators/util:landmarks_to_render_data_calculator", + "//mediapipe/calculators/util:rect_to_render_data_calculator", + ], +) + mediapipe_simple_subgraph( name = "hand_detection_gpu", graph = "hand_detection_gpu.pbtxt", @@ -97,6 +146,29 @@ mediapipe_simple_subgraph( ], ) +mediapipe_simple_subgraph( + name = "multi_hand_detection_gpu", + graph = "multi_hand_detection_gpu.pbtxt", + register_as = "MultiHandDetectionSubgraph", + deps = [ + "//mediapipe/calculators/core:begin_loop_calculator", + "//mediapipe/calculators/core:clip_vector_size_calculator", + "//mediapipe/calculators/core:end_loop_calculator", + "//mediapipe/calculators/image:image_properties_calculator", + "//mediapipe/calculators/image:image_transformation_calculator", + "//mediapipe/calculators/tflite:ssd_anchors_calculator", + "//mediapipe/calculators/tflite:tflite_converter_calculator", + "//mediapipe/calculators/tflite:tflite_custom_op_resolver_calculator", + "//mediapipe/calculators/tflite:tflite_inference_calculator", + "//mediapipe/calculators/tflite:tflite_tensors_to_detections_calculator", + "//mediapipe/calculators/util:detection_label_id_to_text_calculator", + "//mediapipe/calculators/util:detection_letterbox_removal_calculator", + "//mediapipe/calculators/util:detections_to_rects_calculator", + "//mediapipe/calculators/util:non_max_suppression_calculator", + "//mediapipe/calculators/util:rect_transformation_calculator", + ], +) + mediapipe_simple_subgraph( name = "hand_landmark_gpu", graph = "hand_landmark_gpu.pbtxt", @@ -119,6 +191,18 @@ mediapipe_simple_subgraph( ], ) +mediapipe_simple_subgraph( + name = "multi_hand_landmark_gpu", + graph = "multi_hand_landmark.pbtxt", + register_as = "MultiHandLandmarkSubgraph", + deps = [ + ":hand_landmark_gpu", + "//mediapipe/calculators/core:begin_loop_calculator", + "//mediapipe/calculators/core:end_loop_calculator", + "//mediapipe/calculators/util:filter_collection_calculator", + ], +) + mediapipe_simple_subgraph( name = "renderer_gpu", graph = "renderer_gpu.pbtxt", @@ -130,3 +214,17 @@ mediapipe_simple_subgraph( "//mediapipe/calculators/util:rect_to_render_data_calculator", ], ) + +mediapipe_simple_subgraph( + name = "multi_hand_renderer_gpu", + graph = "multi_hand_renderer_gpu.pbtxt", + register_as = "MultiHandRendererSubgraph", + deps = [ + "//mediapipe/calculators/core:begin_loop_calculator", + "//mediapipe/calculators/core:end_loop_calculator", + "//mediapipe/calculators/util:annotation_overlay_calculator", + "//mediapipe/calculators/util:detections_to_render_data_calculator", + "//mediapipe/calculators/util:landmarks_to_render_data_calculator", + "//mediapipe/calculators/util:rect_to_render_data_calculator", + ], +) diff --git a/mediapipe/graphs/hand_tracking/subgraphs/multi_hand_detection_cpu.pbtxt b/mediapipe/graphs/hand_tracking/subgraphs/multi_hand_detection_cpu.pbtxt new file mode 100644 index 000000000..aa0557318 --- /dev/null +++ b/mediapipe/graphs/hand_tracking/subgraphs/multi_hand_detection_cpu.pbtxt @@ -0,0 +1,213 @@ +# MediaPipe multi-hand detection subgraph. + +type: "MultiHandDetectionSubgraph" + +input_stream: "input_video" +output_stream: "DETECTIONS:palm_detections" +output_stream: "NORM_RECTS:clipped_hand_rects_from_palm_detections" + +# Transforms the input image on CPU to a 256x256 image. To scale the input +# image, the scale_mode option is set to FIT to preserve the aspect ratio, +# resulting in potential letterboxing in the transformed image. +node: { + calculator: "ImageTransformationCalculator" + input_stream: "IMAGE:input_video" + output_stream: "IMAGE:transformed_input_video" + output_stream: "LETTERBOX_PADDING:letterbox_padding" + node_options: { + [type.googleapis.com/mediapipe.ImageTransformationCalculatorOptions] { + output_width: 256 + output_height: 256 + scale_mode: FIT + } + } +} + +# Generates a single side packet containing a TensorFlow Lite op resolver that +# supports custom ops needed by the model used in this graph. +node { + calculator: "TfLiteCustomOpResolverCalculator" + output_side_packet: "opresolver" +} + +# Converts the transformed input image on CPU into an image tensor as a +# TfLiteTensor. The zero_center option is set to true to normalize the +# pixel values to [-1.f, 1.f] as opposed to [0.f, 1.f]. +node { + calculator: "TfLiteConverterCalculator" + input_stream: "IMAGE:transformed_input_video" + output_stream: "TENSORS:image_tensor" +} + +# Runs a TensorFlow Lite model on CPU that takes an image tensor and outputs a +# vector of tensors representing, for instance, detection boxes/keypoints and +# scores. +node { + calculator: "TfLiteInferenceCalculator" + input_stream: "TENSORS:image_tensor" + output_stream: "TENSORS:detection_tensors" + input_side_packet: "CUSTOM_OP_RESOLVER:opresolver" + node_options: { + [type.googleapis.com/mediapipe.TfLiteInferenceCalculatorOptions] { + model_path: "mediapipe/models/palm_detection.tflite" + } + } +} + +# Generates a single side packet containing a vector of SSD anchors based on +# the specification in the options. +node { + calculator: "SsdAnchorsCalculator" + output_side_packet: "anchors" + node_options: { + [type.googleapis.com/mediapipe.SsdAnchorsCalculatorOptions] { + num_layers: 5 + min_scale: 0.1171875 + max_scale: 0.75 + input_size_height: 256 + input_size_width: 256 + anchor_offset_x: 0.5 + anchor_offset_y: 0.5 + strides: 8 + strides: 16 + strides: 32 + strides: 32 + strides: 32 + aspect_ratios: 1.0 + fixed_anchor_size: true + } + } +} + +# Decodes the detection tensors generated by the TensorFlow Lite model, based on +# the SSD anchors and the specification in the options, into a vector of +# detections. Each detection describes a detected object. +node { + calculator: "TfLiteTensorsToDetectionsCalculator" + input_stream: "TENSORS:detection_tensors" + input_side_packet: "ANCHORS:anchors" + output_stream: "DETECTIONS:detections" + node_options: { + [type.googleapis.com/mediapipe.TfLiteTensorsToDetectionsCalculatorOptions] { + num_classes: 1 + num_boxes: 2944 + num_coords: 18 + box_coord_offset: 0 + keypoint_coord_offset: 4 + num_keypoints: 7 + num_values_per_keypoint: 2 + sigmoid_score: true + score_clipping_thresh: 100.0 + reverse_output_order: true + + x_scale: 256.0 + y_scale: 256.0 + h_scale: 256.0 + w_scale: 256.0 + min_score_thresh: 0.5 + } + } +} + +# Performs non-max suppression to remove excessive detections. +node { + calculator: "NonMaxSuppressionCalculator" + input_stream: "detections" + output_stream: "filtered_detections" + node_options: { + [type.googleapis.com/mediapipe.NonMaxSuppressionCalculatorOptions] { + min_suppression_threshold: 0.3 + min_score_threshold: 0.5 + overlap_type: INTERSECTION_OVER_UNION + algorithm: WEIGHTED + return_empty_detections: true + } + } +} + +# Maps detection label IDs to the corresponding label text ("Palm"). The label +# map is provided in the label_map_path option. +node { + calculator: "DetectionLabelIdToTextCalculator" + input_stream: "filtered_detections" + output_stream: "labeled_detections" + node_options: { + [type.googleapis.com/mediapipe.DetectionLabelIdToTextCalculatorOptions] { + label_map_path: "mediapipe/models/palm_detection_labelmap.txt" + } + } +} + +# Adjusts detection locations (already normalized to [0.f, 1.f]) on the +# letterboxed image (after image transformation with the FIT scale mode) to the +# corresponding locations on the same image with the letterbox removed (the +# input image to the graph before image transformation). +node { + calculator: "DetectionLetterboxRemovalCalculator" + input_stream: "DETECTIONS:labeled_detections" + input_stream: "LETTERBOX_PADDING:letterbox_padding" + output_stream: "DETECTIONS:palm_detections" +} + +# Extracts image size from the input images. +node { + calculator: "ImagePropertiesCalculator" + input_stream: "IMAGE:input_video" + output_stream: "SIZE:image_size" +} + +# Converts each palm detection into a rectangle (normalized by image size) +# that encloses the palm and is rotated such that the line connecting center of +# the wrist and MCP of the middle finger is aligned with the Y-axis of the +# rectangle. +node { + calculator: "DetectionsToRectsCalculator" + input_stream: "DETECTIONS:palm_detections" + input_stream: "IMAGE_SIZE:image_size" + output_stream: "NORM_RECTS:palm_rects" + node_options: { + [type.googleapis.com/mediapipe.DetectionsToRectsCalculatorOptions] { + rotation_vector_start_keypoint_index: 0 # Center of wrist. + rotation_vector_end_keypoint_index: 2 # MCP of middle finger. + rotation_vector_target_angle_degrees: 90 + output_zero_rect_for_empty_detections: true + } + } +} + +# Expands and shifts the rectangle that contains the palm so that it's likely +# to cover the entire hand. +node { + calculator: "RectTransformationCalculator" + input_stream: "NORM_RECTS:palm_rects" + input_stream: "IMAGE_SIZE:image_size" + output_stream: "hand_rects_from_palm_detections" + node_options: { + [type.googleapis.com/mediapipe.RectTransformationCalculatorOptions] { + scale_x: 2.6 + scale_y: 2.6 + shift_y: -0.5 + square_long: true + } + } +} + +# Clips the size of the input vector to the provided max_vec_size. This +# determines the maximum number of hand instances this graph outputs. +# Note that the performance gain of clipping detections earlier in this graph is +# minimal because NMS will minimize overlapping detections and the number of +# detections isn't expected to exceed 5-10. +node { + calculator: "ClipNormalizedRectVectorSizeCalculator" + input_stream: "hand_rects_from_palm_detections" + output_stream: "clipped_hand_rects_from_palm_detections" + node_options: { + [type.googleapis.com/mediapipe.ClipVectorSizeCalculatorOptions] { + # This value can be changed to support tracking arbitrary number of hands. + # Please also remember to modify min_size in + # CollectionHsMinSizeCalculatorOptions in + # mediapipe/graphs/hand_tracking/multi_hand_tracking_desktop.pbtxt. + max_vec_size: 2 + } + } +} diff --git a/mediapipe/graphs/hand_tracking/subgraphs/multi_hand_detection_gpu.pbtxt b/mediapipe/graphs/hand_tracking/subgraphs/multi_hand_detection_gpu.pbtxt new file mode 100644 index 000000000..afd1fd152 --- /dev/null +++ b/mediapipe/graphs/hand_tracking/subgraphs/multi_hand_detection_gpu.pbtxt @@ -0,0 +1,218 @@ +# MediaPipe multi-hand detection subgraph. + +type: "MultiHandDetectionSubgraph" + +input_stream: "input_video" +output_stream: "DETECTIONS:palm_detections" +output_stream: "NORM_RECTS:clipped_hand_rects_from_palm_detections" + +# Transforms the input image on GPU to a 256x256 image. To scale the input +# image, the scale_mode option is set to FIT to preserve the aspect ratio, +# resulting in potential letterboxing in the transformed image. +node: { + calculator: "ImageTransformationCalculator" + input_stream: "IMAGE_GPU:input_video" + output_stream: "IMAGE_GPU:transformed_input_video" + output_stream: "LETTERBOX_PADDING:letterbox_padding" + node_options: { + [type.googleapis.com/mediapipe.ImageTransformationCalculatorOptions] { + output_width: 256 + output_height: 256 + scale_mode: FIT + } + } +} + +# Generates a single side packet containing a TensorFlow Lite op resolver that +# supports custom ops needed by the model used in this graph. +node { + calculator: "TfLiteCustomOpResolverCalculator" + output_side_packet: "opresolver" + node_options: { + [type.googleapis.com/mediapipe.TfLiteCustomOpResolverCalculatorOptions] { + use_gpu: true + } + } +} + +# Converts the transformed input image on GPU into an image tensor stored as a +# TfLiteTensor. +node { + calculator: "TfLiteConverterCalculator" + input_stream: "IMAGE_GPU:transformed_input_video" + output_stream: "TENSORS_GPU:image_tensor" +} + +# Runs a TensorFlow Lite model on GPU that takes an image tensor and outputs a +# vector of tensors representing, for instance, detection boxes/keypoints and +# scores. +node { + calculator: "TfLiteInferenceCalculator" + input_stream: "TENSORS_GPU:image_tensor" + output_stream: "TENSORS_GPU:detection_tensors" + input_side_packet: "CUSTOM_OP_RESOLVER:opresolver" + node_options: { + [type.googleapis.com/mediapipe.TfLiteInferenceCalculatorOptions] { + model_path: "mediapipe/models/palm_detection.tflite" + use_gpu: true + } + } +} + +# Generates a single side packet containing a vector of SSD anchors based on +# the specification in the options. +node { + calculator: "SsdAnchorsCalculator" + output_side_packet: "anchors" + node_options: { + [type.googleapis.com/mediapipe.SsdAnchorsCalculatorOptions] { + num_layers: 5 + min_scale: 0.1171875 + max_scale: 0.75 + input_size_height: 256 + input_size_width: 256 + anchor_offset_x: 0.5 + anchor_offset_y: 0.5 + strides: 8 + strides: 16 + strides: 32 + strides: 32 + strides: 32 + aspect_ratios: 1.0 + fixed_anchor_size: true + } + } +} + +# Decodes the detection tensors generated by the TensorFlow Lite model, based on +# the SSD anchors and the specification in the options, into a vector of +# detections. Each detection describes a detected object. +node { + calculator: "TfLiteTensorsToDetectionsCalculator" + input_stream: "TENSORS_GPU:detection_tensors" + input_side_packet: "ANCHORS:anchors" + output_stream: "DETECTIONS:detections" + node_options: { + [type.googleapis.com/mediapipe.TfLiteTensorsToDetectionsCalculatorOptions] { + num_classes: 1 + num_boxes: 2944 + num_coords: 18 + box_coord_offset: 0 + keypoint_coord_offset: 4 + num_keypoints: 7 + num_values_per_keypoint: 2 + sigmoid_score: true + score_clipping_thresh: 100.0 + reverse_output_order: true + + x_scale: 256.0 + y_scale: 256.0 + h_scale: 256.0 + w_scale: 256.0 + min_score_thresh: 0.7 + } + } +} + +# Performs non-max suppression to remove excessive detections. +node { + calculator: "NonMaxSuppressionCalculator" + input_stream: "detections" + output_stream: "filtered_detections" + node_options: { + [type.googleapis.com/mediapipe.NonMaxSuppressionCalculatorOptions] { + min_suppression_threshold: 0.3 + overlap_type: INTERSECTION_OVER_UNION + algorithm: WEIGHTED + return_empty_detections: true + } + } +} + +# Maps detection label IDs to the corresponding label text ("Palm"). The label +# map is provided in the label_map_path option. +node { + calculator: "DetectionLabelIdToTextCalculator" + input_stream: "filtered_detections" + output_stream: "labeled_detections" + node_options: { + [type.googleapis.com/mediapipe.DetectionLabelIdToTextCalculatorOptions] { + label_map_path: "mediapipe/models/palm_detection_labelmap.txt" + } + } +} + +# Adjusts detection locations (already normalized to [0.f, 1.f]) on the +# letterboxed image (after image transformation with the FIT scale mode) to the +# corresponding locations on the same image with the letterbox removed (the +# input image to the graph before image transformation). +node { + calculator: "DetectionLetterboxRemovalCalculator" + input_stream: "DETECTIONS:labeled_detections" + input_stream: "LETTERBOX_PADDING:letterbox_padding" + output_stream: "DETECTIONS:palm_detections" +} + +# Extracts image size from the input images. +node { + calculator: "ImagePropertiesCalculator" + input_stream: "IMAGE_GPU:input_video" + output_stream: "SIZE:image_size" +} + +# Converts each palm detection into a rectangle (normalized by image size) +# that encloses the palm and is rotated such that the line connecting center of +# the wrist and MCP of the middle finger is aligned with the Y-axis of the +# rectangle. +node { + calculator: "DetectionsToRectsCalculator" + input_stream: "DETECTIONS:palm_detections" + input_stream: "IMAGE_SIZE:image_size" + output_stream: "NORM_RECTS:palm_rects" + node_options: { + [type.googleapis.com/mediapipe.DetectionsToRectsCalculatorOptions] { + rotation_vector_start_keypoint_index: 0 # Center of wrist. + rotation_vector_end_keypoint_index: 2 # MCP of middle finger. + rotation_vector_target_angle_degrees: 90 + output_zero_rect_for_empty_detections: true + } + } +} + +# Expands and shifts the rectangle that contains the palm so that it's likely +# to cover the entire hand. +node { + calculator: "RectTransformationCalculator" + input_stream: "NORM_RECTS:palm_rects" + input_stream: "IMAGE_SIZE:image_size" + output_stream: "hand_rects_from_palm_detections" + node_options: { + [type.googleapis.com/mediapipe.RectTransformationCalculatorOptions] { + scale_x: 2.6 + scale_y: 2.6 + shift_y: -0.5 + square_long: true + } + } +} + +# Clips the size of the input vector to the provided max_vec_size. This +# determines the maximum number of hand instances this graph outputs. +# Note that the performance gain of clipping detections earlier in this graph is +# minimal because NMS will minimize overlapping detections and the number of +# detections isn't expected to exceed 5-10. +node { + calculator: "ClipNormalizedRectVectorSizeCalculator" + input_stream: "hand_rects_from_palm_detections" + output_stream: "clipped_hand_rects_from_palm_detections" + node_options: { + [type.googleapis.com/mediapipe.ClipVectorSizeCalculatorOptions] { + # This value can be changed to support tracking arbitrary number of hands. + # Please also remember to modify min_size in + # CollectionHsMinSizeCalculatorOptions in + # mediapipe/graphs/hand_tracking/multi_hand_tracking_mobile.pbtxt and + # mediapipe/graphs/hand_tracking/multi_hand_tracking_desktop_live.pbtxt. + max_vec_size: 2 + } + } +} diff --git a/mediapipe/graphs/hand_tracking/subgraphs/multi_hand_landmark.pbtxt b/mediapipe/graphs/hand_tracking/subgraphs/multi_hand_landmark.pbtxt new file mode 100644 index 000000000..a380966ca --- /dev/null +++ b/mediapipe/graphs/hand_tracking/subgraphs/multi_hand_landmark.pbtxt @@ -0,0 +1,84 @@ +# MediaPipe hand landmark localization subgraph. + +type: "MultiHandLandmarkSubgraph" + +input_stream: "IMAGE:input_video" +# A vector of NormalizedRect, one per each hand detected. +input_stream: "NORM_RECTS:multi_hand_rects" +# A vector of NormalizedLandmarks, one set per each hand. +output_stream: "LANDMARKS:filtered_multi_hand_landmarks" +# A vector of NormalizedRect, one per each hand. +output_stream: "NORM_RECTS:filtered_multi_hand_rects_for_next_frame" + +# Outputs each element of multi_hand_rects at a fake timestamp for the rest +# of the graph to process. Clones the input_video packet for each +# single_hand_rect at the fake timestamp. At the end of the loop, +# outputs the BATCH_END timestamp for downstream calculators to inform them +# that all elements in the vector have been processed. +node { + calculator: "BeginLoopNormalizedRectCalculator" + input_stream: "ITERABLE:multi_hand_rects" + input_stream: "CLONE:input_video" + output_stream: "ITEM:single_hand_rect" + output_stream: "CLONE:input_video_cloned" + output_stream: "BATCH_END:single_hand_rect_timestamp" +} + +node { + calculator: "HandLandmarkSubgraph" + input_stream: "IMAGE:input_video_cloned" + input_stream: "NORM_RECT:single_hand_rect" + output_stream: "LANDMARKS:single_hand_landmarks" + output_stream: "NORM_RECT:single_hand_rect_from_landmarks" + output_stream: "PRESENCE:single_hand_presence" +} + +# Collects the boolean presence value for each single hand into a vector. Upon +# receiving the BATCH_END timestamp, outputs a vector of boolean values at the +# BATCH_END timestamp. +node { + calculator: "EndLoopBooleanCalculator" + input_stream: "ITEM:single_hand_presence" + input_stream: "BATCH_END:single_hand_rect_timestamp" + output_stream: "ITERABLE:multi_hand_presence" +} + +# Collects a set of landmarks for each hand into a vector. Upon receiving the +# BATCH_END timestamp, outputs the vector of landmarks at the BATCH_END +# timestamp. +node { + calculator: "EndLoopNormalizedLandmarksVectorCalculator" + input_stream: "ITEM:single_hand_landmarks" + input_stream: "BATCH_END:single_hand_rect_timestamp" + output_stream: "ITERABLE:multi_hand_landmarks" +} + +# Collects a NormalizedRect for each hand into a vector. Upon receiving the +# BATCH_END timestamp, outputs the vector of NormalizedRect at the BATCH_END +# timestamp. +node { + calculator: "EndLoopNormalizedRectCalculator" + input_stream: "ITEM:single_hand_rect_from_landmarks" + input_stream: "BATCH_END:single_hand_rect_timestamp" + output_stream: "ITERABLE:multi_hand_rects_for_next_frame" +} + +# Filters the input vector of landmarks based on hand presence value for each +# hand. If the hand presence for hand #i is false, the set of landmarks +# corresponding to that hand are dropped from the vector. +node { + calculator: "FilterLandmarksCollectionCalculator" + input_stream: "ITERABLE:multi_hand_landmarks" + input_stream: "CONDITION:multi_hand_presence" + output_stream: "ITERABLE:filtered_multi_hand_landmarks" +} + +# Filters the input vector of NormalizedRect based on hand presence value for +# each hand. If the hand presence for hand #i is false, the NormalizedRect +# corresponding to that hand are dropped from the vector. +node { + calculator: "FilterNormalizedRectCollectionCalculator" + input_stream: "ITERABLE:multi_hand_rects_for_next_frame" + input_stream: "CONDITION:multi_hand_presence" + output_stream: "ITERABLE:filtered_multi_hand_rects_for_next_frame" +} diff --git a/mediapipe/graphs/hand_tracking/subgraphs/multi_hand_renderer_cpu.pbtxt b/mediapipe/graphs/hand_tracking/subgraphs/multi_hand_renderer_cpu.pbtxt new file mode 100644 index 000000000..2dcd6b478 --- /dev/null +++ b/mediapipe/graphs/hand_tracking/subgraphs/multi_hand_renderer_cpu.pbtxt @@ -0,0 +1,144 @@ +# MediaPipe multi-hand tracking rendering subgraph. + +type: "MultiHandRendererSubgraph" + +input_stream: "IMAGE:input_image" +# A vector of NormalizedLandmarks, one for each hand. +input_stream: "LANDMARKS:multi_hand_landmarks" +# A vector of NormalizedRect, one for each hand. +input_stream: "NORM_RECTS:0:multi_palm_rects" +# A vector of NormalizedRect, one for each hand. +input_stream: "NORM_RECTS:1:multi_hand_rects" +# A vector of Detection, one for each hand. +input_stream: "DETECTIONS:palm_detections" +output_stream: "IMAGE:output_image" + +# Converts detections to drawing primitives for annotation overlay. +node { + calculator: "DetectionsToRenderDataCalculator" + input_stream: "DETECTIONS:palm_detections" + output_stream: "RENDER_DATA:detection_render_data" + node_options: { + [type.googleapis.com/mediapipe.DetectionsToRenderDataCalculatorOptions] { + thickness: 4.0 + color { r: 0 g: 255 b: 0 } + } + } +} + +# Converts normalized rects to drawing primitives for annotation overlay. +node { + calculator: "RectToRenderDataCalculator" + input_stream: "NORM_RECTS:multi_hand_rects" + output_stream: "RENDER_DATA:multi_hand_rects_render_data" + node_options: { + [type.googleapis.com/mediapipe.RectToRenderDataCalculatorOptions] { + filled: false + color { r: 255 g: 0 b: 0 } + thickness: 4.0 + } + } +} + +# Converts normalized rects to drawing primitives for annotation overlay. +node { + calculator: "RectToRenderDataCalculator" + input_stream: "NORM_RECTS:multi_palm_rects" + output_stream: "RENDER_DATA:multi_palm_rects_render_data" + node_options: { + [type.googleapis.com/mediapipe.RectToRenderDataCalculatorOptions] { + filled: false + color { r: 125 g: 0 b: 122 } + thickness: 4.0 + } + } +} + +# Outputs each element of multi_palm_landmarks at a fake timestamp for the rest +# of the graph to process. At the end of the loop, outputs the BATCH_END +# timestamp for downstream calculators to inform them that all elements in the +# vector have been processed. +node { + calculator: "BeginLoopNormalizedLandmarksVectorCalculator" + input_stream: "ITERABLE:multi_hand_landmarks" + output_stream: "ITEM:single_hand_landmarks" + output_stream: "BATCH_END:landmark_timestamp" +} + +# Converts landmarks to drawing primitives for annotation overlay. +node { + calculator: "LandmarksToRenderDataCalculator" + input_stream: "NORM_LANDMARKS:single_hand_landmarks" + output_stream: "RENDER_DATA:single_hand_landmark_render_data" + node_options: { + [type.googleapis.com/mediapipe.LandmarksToRenderDataCalculatorOptions] { + landmark_connections: 0 + landmark_connections: 1 + landmark_connections: 1 + landmark_connections: 2 + landmark_connections: 2 + landmark_connections: 3 + landmark_connections: 3 + landmark_connections: 4 + landmark_connections: 0 + landmark_connections: 5 + landmark_connections: 5 + landmark_connections: 6 + landmark_connections: 6 + landmark_connections: 7 + landmark_connections: 7 + landmark_connections: 8 + landmark_connections: 5 + landmark_connections: 9 + landmark_connections: 9 + landmark_connections: 10 + landmark_connections: 10 + landmark_connections: 11 + landmark_connections: 11 + landmark_connections: 12 + landmark_connections: 9 + landmark_connections: 13 + landmark_connections: 13 + landmark_connections: 14 + landmark_connections: 14 + landmark_connections: 15 + landmark_connections: 15 + landmark_connections: 16 + landmark_connections: 13 + landmark_connections: 17 + landmark_connections: 0 + landmark_connections: 17 + landmark_connections: 17 + landmark_connections: 18 + landmark_connections: 18 + landmark_connections: 19 + landmark_connections: 19 + landmark_connections: 20 + landmark_color { r: 255 g: 0 b: 0 } + connection_color { r: 0 g: 255 b: 0 } + thickness: 4.0 + } + } +} + +# Collects a RenderData object for each hand into a vector. Upon receiving the +# BATCH_END timestamp, outputs the vector of RenderData at the BATCH_END +# timestamp. +node { + calculator: "EndLoopRenderDataCalculator" + input_stream: "ITEM:single_hand_landmark_render_data" + input_stream: "BATCH_END:landmark_timestamp" + output_stream: "ITERABLE:multi_hand_landmarks_render_data" +} + +# Draws annotations and overlays them on top of the input images. Consumes +# a vector of RenderData objects and draws each of them on the input frame. +node { + calculator: "AnnotationOverlayCalculator" + input_stream: "INPUT_FRAME:input_image" + input_stream: "detection_render_data" + input_stream: "multi_hand_rects_render_data" + input_stream: "multi_palm_rects_render_data" + input_stream: "VECTOR:0:multi_hand_landmarks_render_data" + output_stream: "OUTPUT_FRAME:output_image" +} diff --git a/mediapipe/graphs/hand_tracking/subgraphs/multi_hand_renderer_gpu.pbtxt b/mediapipe/graphs/hand_tracking/subgraphs/multi_hand_renderer_gpu.pbtxt new file mode 100644 index 000000000..3ea9275dc --- /dev/null +++ b/mediapipe/graphs/hand_tracking/subgraphs/multi_hand_renderer_gpu.pbtxt @@ -0,0 +1,144 @@ +# MediaPipe multi-hand tracking rendering subgraph. + +type: "MultiHandRendererSubgraph" + +input_stream: "IMAGE:input_image" +# A vector of NormalizedLandmarks, one for each hand. +input_stream: "LANDMARKS:multi_hand_landmarks" +# A vector of NormalizedRect, one for each hand. +input_stream: "NORM_RECTS:0:multi_palm_rects" +# A vector of NormalizedRect, one for each hand. +input_stream: "NORM_RECTS:1:multi_hand_rects" +# A vector of Detection, one for each hand. +input_stream: "DETECTIONS:palm_detections" +output_stream: "IMAGE:output_image" + +# Converts detections to drawing primitives for annotation overlay. +node { + calculator: "DetectionsToRenderDataCalculator" + input_stream: "DETECTIONS:palm_detections" + output_stream: "RENDER_DATA:detection_render_data" + node_options: { + [type.googleapis.com/mediapipe.DetectionsToRenderDataCalculatorOptions] { + thickness: 4.0 + color { r: 0 g: 255 b: 0 } + } + } +} + +# Converts normalized rects to drawing primitives for annotation overlay. +node { + calculator: "RectToRenderDataCalculator" + input_stream: "NORM_RECTS:multi_hand_rects" + output_stream: "RENDER_DATA:multi_hand_rects_render_data" + node_options: { + [type.googleapis.com/mediapipe.RectToRenderDataCalculatorOptions] { + filled: false + color { r: 255 g: 0 b: 0 } + thickness: 4.0 + } + } +} + +# Converts normalized rects to drawing primitives for annotation overlay. +node { + calculator: "RectToRenderDataCalculator" + input_stream: "NORM_RECTS:multi_palm_rects" + output_stream: "RENDER_DATA:multi_palm_rects_render_data" + node_options: { + [type.googleapis.com/mediapipe.RectToRenderDataCalculatorOptions] { + filled: false + color { r: 125 g: 0 b: 122 } + thickness: 4.0 + } + } +} + +# Outputs each element of multi_palm_landmarks at a fake timestamp for the rest +# of the graph to process. At the end of the loop, outputs the BATCH_END +# timestamp for downstream calculators to inform them that all elements in the +# vector have been processed. +node { + calculator: "BeginLoopNormalizedLandmarksVectorCalculator" + input_stream: "ITERABLE:multi_hand_landmarks" + output_stream: "ITEM:single_hand_landmarks" + output_stream: "BATCH_END:landmark_timestamp" +} + +# Converts landmarks to drawing primitives for annotation overlay. +node { + calculator: "LandmarksToRenderDataCalculator" + input_stream: "NORM_LANDMARKS:single_hand_landmarks" + output_stream: "RENDER_DATA:single_hand_landmark_render_data" + node_options: { + [type.googleapis.com/mediapipe.LandmarksToRenderDataCalculatorOptions] { + landmark_connections: 0 + landmark_connections: 1 + landmark_connections: 1 + landmark_connections: 2 + landmark_connections: 2 + landmark_connections: 3 + landmark_connections: 3 + landmark_connections: 4 + landmark_connections: 0 + landmark_connections: 5 + landmark_connections: 5 + landmark_connections: 6 + landmark_connections: 6 + landmark_connections: 7 + landmark_connections: 7 + landmark_connections: 8 + landmark_connections: 5 + landmark_connections: 9 + landmark_connections: 9 + landmark_connections: 10 + landmark_connections: 10 + landmark_connections: 11 + landmark_connections: 11 + landmark_connections: 12 + landmark_connections: 9 + landmark_connections: 13 + landmark_connections: 13 + landmark_connections: 14 + landmark_connections: 14 + landmark_connections: 15 + landmark_connections: 15 + landmark_connections: 16 + landmark_connections: 13 + landmark_connections: 17 + landmark_connections: 0 + landmark_connections: 17 + landmark_connections: 17 + landmark_connections: 18 + landmark_connections: 18 + landmark_connections: 19 + landmark_connections: 19 + landmark_connections: 20 + landmark_color { r: 255 g: 0 b: 0 } + connection_color { r: 0 g: 255 b: 0 } + thickness: 4.0 + } + } +} + +# Collects a RenderData object for each hand into a vector. Upon receiving the +# BATCH_END timestamp, outputs the vector of RenderData at the BATCH_END +# timestamp. +node { + calculator: "EndLoopRenderDataCalculator" + input_stream: "ITEM:single_hand_landmark_render_data" + input_stream: "BATCH_END:landmark_timestamp" + output_stream: "ITERABLE:multi_hand_landmarks_render_data" +} + +# Draws annotations and overlays them on top of the input images. Consumes +# a vector of RenderData objects and draws each of them on the input frame. +node { + calculator: "AnnotationOverlayCalculator" + input_stream: "INPUT_FRAME_GPU:input_image" + input_stream: "detection_render_data" + input_stream: "multi_hand_rects_render_data" + input_stream: "multi_palm_rects_render_data" + input_stream: "VECTOR:0:multi_hand_landmarks_render_data" + output_stream: "OUTPUT_FRAME_GPU:output_image" +} diff --git a/mediapipe/graphs/object_detection/object_detection_desktop_live.pbtxt b/mediapipe/graphs/object_detection/object_detection_desktop_live.pbtxt index 899785a1c..7cf19b2ed 100644 --- a/mediapipe/graphs/object_detection/object_detection_desktop_live.pbtxt +++ b/mediapipe/graphs/object_detection/object_detection_desktop_live.pbtxt @@ -1,6 +1,6 @@ # MediaPipe graph that performs object detection with TensorFlow Lite on CPU. # Used in the examples in -# mediapipie/examples/desktop/object_detection:object_detection_cpu. +# mediapipe/examples/desktop/object_detection:object_detection_cpu. # Images on CPU coming into and out of the graph. input_stream: "input_video" diff --git a/mediapipe/graphs/object_detection/object_detection_desktop_tflite_graph.pbtxt b/mediapipe/graphs/object_detection/object_detection_desktop_tflite_graph.pbtxt index 575d933a8..fd63fd97a 100644 --- a/mediapipe/graphs/object_detection/object_detection_desktop_tflite_graph.pbtxt +++ b/mediapipe/graphs/object_detection/object_detection_desktop_tflite_graph.pbtxt @@ -1,7 +1,7 @@ # MediaPipe graph that performs object detection on desktop with TensorFlow Lite # on CPU. # Used in the example in -# mediapipie/examples/desktop/object_detection:object_detection_tflite. +# mediapipe/examples/desktop/object_detection:object_detection_tflite. # max_queue_size limits the number of packets enqueued on any input stream # by throttling inputs to the graph. This makes the graph only process one diff --git a/mediapipe/graphs/object_detection/object_detection_mobile_cpu.pbtxt b/mediapipe/graphs/object_detection/object_detection_mobile_cpu.pbtxt index 3e0e4e6d3..4de82c07f 100644 --- a/mediapipe/graphs/object_detection/object_detection_mobile_cpu.pbtxt +++ b/mediapipe/graphs/object_detection/object_detection_mobile_cpu.pbtxt @@ -1,7 +1,7 @@ # MediaPipe graph that performs object detection with TensorFlow Lite on CPU. # Used in the examples in -# mediapipie/examples/android/src/java/com/mediapipe/apps/objectdetectioncpu and -# mediapipie/examples/ios/objectdetectioncpu. +# mediapipe/examples/android/src/java/com/mediapipe/apps/objectdetectioncpu and +# mediapipe/examples/ios/objectdetectioncpu. # Images on GPU coming into and out of the graph. input_stream: "input_video" diff --git a/mediapipe/graphs/object_detection/object_detection_mobile_gpu.pbtxt b/mediapipe/graphs/object_detection/object_detection_mobile_gpu.pbtxt index dfed16696..f3dc1d9e9 100644 --- a/mediapipe/graphs/object_detection/object_detection_mobile_gpu.pbtxt +++ b/mediapipe/graphs/object_detection/object_detection_mobile_gpu.pbtxt @@ -1,7 +1,7 @@ # MediaPipe graph that performs object detection with TensorFlow Lite on GPU. # Used in the examples in -# mediapipie/examples/android/src/java/com/mediapipe/apps/objectdetectiongpu and -# mediapipie/examples/ios/objectdetectiongpu. +# mediapipe/examples/android/src/java/com/mediapipe/apps/objectdetectiongpu and +# mediapipe/examples/ios/objectdetectiongpu. # Images on GPU coming into and out of the graph. input_stream: "input_video" diff --git a/mediapipe/graphs/youtube8m/BUILD b/mediapipe/graphs/youtube8m/BUILD index be0fff44c..c697d16c0 100644 --- a/mediapipe/graphs/youtube8m/BUILD +++ b/mediapipe/graphs/youtube8m/BUILD @@ -44,3 +44,30 @@ cc_library( "//mediapipe/calculators/video:opencv_video_decoder_calculator", ], ) + +cc_library( + name = "yt8m_inference_calculators_deps", + deps = [ + "//mediapipe/calculators/core:concatenate_vector_calculator", + "//mediapipe/calculators/core:dequantize_byte_array_calculator", + "//mediapipe/calculators/core:packet_cloner_calculator", + "//mediapipe/calculators/core:side_packet_to_stream_calculator", + "//mediapipe/calculators/core:string_to_int_calculator", + "//mediapipe/calculators/tensorflow:lapped_tensor_buffer_calculator", + "//mediapipe/calculators/tensorflow:string_to_sequence_example_calculator", + "//mediapipe/calculators/tensorflow:tensor_to_vector_float_calculator", + "//mediapipe/calculators/tensorflow:tensorflow_inference_calculator", + "//mediapipe/calculators/tensorflow:tensorflow_session_from_saved_model_calculator", + "//mediapipe/calculators/tensorflow:tfrecord_reader_calculator", + "//mediapipe/calculators/tensorflow:unpack_media_sequence_calculator", + "//mediapipe/calculators/tensorflow:unpack_yt8m_sequence_example_calculator", + "//mediapipe/calculators/tensorflow:vector_float_to_tensor_calculator", + "//mediapipe/calculators/tensorflow:vector_int_to_tensor_calculator", + "//mediapipe/calculators/util:annotation_overlay_calculator", + "//mediapipe/calculators/util:labels_to_render_data_calculator", + "//mediapipe/calculators/util:local_file_contents_calculator", + "//mediapipe/calculators/util:top_k_scores_calculator", + "//mediapipe/calculators/video:opencv_video_decoder_calculator", + "//mediapipe/calculators/video:opencv_video_encoder_calculator", + ], +) diff --git a/mediapipe/graphs/youtube8m/label_map.txt b/mediapipe/graphs/youtube8m/label_map.txt new file mode 100644 index 000000000..8321ec772 --- /dev/null +++ b/mediapipe/graphs/youtube8m/label_map.txt @@ -0,0 +1,3862 @@ +Game +Video game +Vehicle +Concert +Musician +Cartoon +Performance art +Car +Dance +Guitar +String instrument +Food +Association football +Musical ensemble +Music video +Animal +Animation +Motorsport +Pet +Racing +Recipe +Mobile phone +Cooking +Smartphone +Gadget +Trailer (promotion) +Toy +Minecraft +Drum kit +Cuisine +Motorcycle +Piano +Dish (food) +Drum +Acoustic guitar +Action-adventure game +Call of Duty +Electric guitar +Drummer +Cosmetics +Keyboard instrument +Choir +Strategy video game +Fishing +Aircraft +Train +Airplane +Pianist +Sports car +Art +Hair +Rail transport +Basketball +Cycling +Orchestra +Motorcycling +Transport +Musical keyboard +Bicycle +Fish +Outdoor recreation +Disc jockey +Machine +Sports game +Radio-controlled model +Hairstyle +Fashion +Dog +Skateboarding +Fighting game +Basketball moves +Wedding +Skateboard +IPhone +Personal computer +Truck +Boat +Railroad car +Snare drum +American football +Drawing +Pokémon +Winter sport +Tractor +Naruto +Grand Theft Auto V +Cymbal +Horse +House +Festival +Engine +Highlight film +Boxing +World of Warcraft +Call of Duty: Black Ops II +Four-wheel drive +Bird +Violin +Skateboarding trick +Christmas +Weight training +Recreational fishing +Warcraft +Ice skating +Driving +Video game console +Microsoft Windows +Airline +Pokémon (video game series) +Landing +Combat +League of Legends +Vegetable +Model aircraft +Airliner +Samsung Galaxy +Sport utility vehicle +Electronic keyboard +Hockey +Radio-controlled aircraft +??? +Eye shadow +Cooking show +Dessert +Battlefield (series) +Slam dunk +Plant +Painting +Drifting (motorsport) +Rallying +Lego +Tablet computer +Call of Duty: Modern Warfare 2 +Comedy (drama) +Grand Theft Auto: San Andreas +Off-road vehicle +The Walt Disney Company +Locomotive +Takeoff +RuneScape +Puppy +Amusement park +Call of Duty: Modern Warfare 3 +Motocross +Dragon Ball +Airport +Photography +Call of Duty: Black Ops +Shoe +Radio-controlled car +Sonic the Hedgehog +Skatepark +Bride +First-person shooter +Accordion +Jet aircraft +Mascara +Halo (series) +Camera +Final Fantasy +Skiing +Gym +Aviation +Mountain bike +Marching band +??? +Extreme sport +FIFA 15 +Brass instrument +Sasuke Uchiha +Cat +Sedan (automobile) +Pickup truck +Meat +BMW +Parade +Cake +Supercar +Aquarium +Weather +Weapon +Nail (anatomy) +Surfing +PlayStation 3 +Room +Call of Duty 4: Modern Warfare +Helicopter +Laptop +Saxophone +Star Wars +Goku +Hotel +Xbox 360 +Arcade game +Doll +News presenter +Exhaust system +Volkswagen +Hatchback +Action figure +Computer +Carnival +Lipstick +Wii +Sonic the Hedgehog (character) +School +Ballet +Eye liner +Heavy equipment +IPad +Running +Baking +Rapid transit +Coupé +Road bicycle +Card game +Nail polish +Playing card +Bus +Counter-Strike (video game) +Gardening +Outline of meals +Nail art +Tank +??? +Bollywood +Tennis +Ship +BMX bike +Drink +Grand Theft Auto IV +Snowboarding +Mountain biking +Rouge (cosmetics) +Super Smash Bros. +??? +Street Fighter +Stadium +Underwater +Hunting +Kickflip +Metin2 +The Sims +Viola +Pony +PlayStation 4 +Television +??? +Beach +Manicure +Chocolate +Wood +Snow +Sneakers +??? +Roller coaster +Afro-textured hair +Timbales +Need for Speed +Robot +Paper +Gymnastics +Farm +Diatonic button accordion +Fighter aircraft +Sketch (drawing) +Mercedes-Benz +Chevrolet +Batman +Loudspeaker +Tool +Nike, Inc. +Race track +Ski +Underwater diving +Computer hardware +Garden +Paint +Cello +Digital camera +Scooter (motorcycle) +Motorboat +Harry Potter +??? +GoPro +Assassin's Creed +Fishing rod +Battlefield 3 +IPod +Nature +Dota 2 +Tree +My Little Pony +Dress +Xbox One +Train station +Firefighter +Jeep +Rail transport modelling +Resort +Flute +Touhou Project +Fruit +Chicken as food +Knife +Dashcam +Clash of Clans +Kitchen +Slide show +The Legend of Zelda +Fireworks +Swimming pool +Rugby football +Building +Kitten +Television advertisement +??? +Battlefield 4 +Horse racing +MapleStory +Subwoofer +Flour +IPod Touch +World of Tanks +Music festival +Comedian +Figurine +Kingdom Hearts +Manga +Wrestling +Trumpet +Xbox +Model (person) +Jumping +Dough +FIFA 13 +Pro Evolution Soccer +Resident Evil +Eye +Guitar Hero +Enduro +Home appliance +News program +Watch +Audi +Off-road racing +Ice dancing +Construction +Organ (music) +PlayStation Portable +Figure skating +Fiddle +WWE 2K +Climbing +Spider-Man +Braid +Muscle +The Elder Scrolls V: Skyrim +Nintendo 3DS +Fire +Human swimming +BMW Motorrad +One Piece +Wildlife +Apartment +Dressage +Scuba diving +Call of Duty: Ghosts +Eating +Kickboxing +Egg as food +Origami +The Elder Scrolls +Ford Mustang +Fishing lure +Light +Running back +Air force +M.U.G.E.N +Transformers +Living room +Soldier +Bag +Ballroom dance +Gohan +Kayak +Sheet music +Destiny (video game) +Wall +Church (building) +Sewing +Chipmunk +Surfboard +Concealer +Drag racing +Mega Man +Walt Disney World +Chicken +Parachuting +Classic car +Furniture +Jewellery +Recreational vehicle +Call of Duty: Advanced Warfare +Street Fighter IV +Sakura Haruno +Restaurant +Halo 3 +Wheelie +Mario Kart +Headphones +Factory +Yu-Gi-Oh! Trading Card Game +Speedometer +Circus +Muscle car +Bedroom +Tekken +Graffiti +River +Lighting +Guitar amplifier +Knitting +Call of Duty: Zombies +PlayStation +Radio-controlled helicopter +Cookware and bakeware +Trail +Camping +University +Indian cuisine +Multiplayer online battle arena +Ball +Nightclub +Book +Lego minifigure +PlayStation 2 +Dodge +Garry's Mod +Camera lens +Hockey puck +Barbie +Thomas the Tank Engine +Go-kart +Vegetarian cuisine +Monster High +Yacht +Collectible card game +Auto Race (Japanese sport) +Role-playing game +Madden NFL +Unidentified flying object +Longboard (skateboard) +Toddler +Digital single-lens reflex camera +Xbox (console) +Rail freight transport +Honda Civic +Convertible +The Sims 2 +Lamborghini +Printer (computing) +Cream +Parrot +Tire +Quadcopter +Littlest Pet Shop +Wii U +Planet +??? +The Sims 3 +Sony Xperia +Salad +Sailboat +Cruise ship +Unmanned aerial vehicle +Naruto: Ultimate Ninja +Barbecue +Mortal Kombat +Slot machine +Longboarding +Halo: Reach +Paragliding +Bread +Monster Hunter +Stitch (textile arts) +Dofus +StarCraft II: Wings of Liberty +Game controller +Gears of War +Mud bogging +Snowboard +Synthesia +Wig +Road bicycle racing +Wheel +Macintosh +Home improvement +Printing +Insect +Road +Parachute +Cattle +Hair coloring +IPhone 4S +Advertising +Potato +Runway +Van +Zoo +Handheld game console +Water +Rock Band +Volkswagen Golf +Bathroom +Stunt performer +Bleach (manga) +Metal Gear +Santa Claus +Hiking +Samsung Electronics +Runway (fashion) +Elevator +Cricket +Gran Turismo (series) +Fire engine +Kinder Surprise +Play-Doh +Grilling +Eyelash +Table tennis +Fiat Automobiles +Dragon +Lion +Nintendo Entertainment System +PlayStation (console) +Stallion +Ice skate +Baseball park +Flamenco +Steam engine +Plough +Farming Simulator +Soup +Snowmobile +Mare +Counter-Strike: Source +Sail +Squat (exercise) +Bass (fish) +Banjo +Harmonica +Quartet +Drum stick +IPhone 5 +Reptile +Prayer +T-shirt +Talent show +Rice +Roasting +Diablo III +CrossFire (video game) +Renault +Pizza +Trombone +Chevrolet Camaro +Barbell +Ryu (Street Fighter) +Clay +Beyblade +Lake +Sauce +??? +Cube +Forza (series) +Cookie +Taiko no Tatsujin +Mixtape +Medicine +Door +Monster +Call of Duty: World at War +Mud +Computer keyboard +Clarinet +Defense of the Ancients +Sora (Kingdom Hearts) +Computer monitor +Super Street Fighter IV +PlayStation Vita +Guild Wars +Album +Model car +Tenor saxophone +The Twilight Saga (film series) +Rubik's Cube +Sailor Moon +Teacher +Mixing console +Card manipulation +Combine harvester +Boeing 737 +Bull +Fish as food +Cheese +Concrete +Board game +Moped +Puzzle +Lego Star Wars +Poker +Portrait +Luigi +Dining room +Pokémon X and Y +Floor +Asus +Inuyasha +Livestock +Lawn mower +Tibia (video game) +Tabletop game +Iron Man +Tomato +Juice +Final Fantasy VII +Lip gloss +Super Smash Bros. Melee +Central processing unit +Sitcom +Cockpit +Emergency vehicle +FIFA 12 +Bodyboarding +Earth +The Lego Group +Ice cream +Microphone +Rallycross +Website +Table (furniture) +Ice +Magic: The Gathering +Ninja +Darth Vader +Saw +Mickey Mouse +Handbag +The King of Fighters +Ballet dancer +Samsung Galaxy Note series +Washing machine +Zee TV +Point Blank (2008 video game) +Gibson Les Paul +Dune buggy +DayZ (video game) +Television set +Dirt track racing +Edward Cullen +Beauty salon +Hetalia: Axis Powers +Vampire +Gliding +Batman: Arkham +Mountain +Rain +Shark +Waterfall +DarkOrbit +Bagpipes +Comics +Rock climbing +Skin +Arena +IPhone 4 +ARMA (series) +Super Smash Bros. for Nintendo 3DS and Wii U +Curry +Pasta +Halo 4 +Superman +Icing (food) +Google Nexus +Marathon +Deer +Guitar Hero III: Legends of Rock +Balloon +Goalkeeper (association football) +Red Bull +Nissan GT-R +Noodle +Fishing bait +Pencil +Plants vs. Zombies +Athlete +Computer case +Stretching +Terrier +Outer space +Textile +Mercedes-AMG +Hard disk drive +Biceps +Handball +Land Rover +Kamen Rider Series +Parakeet +Bear +Rim (wheel) +Chevrolet Corvette +Battery (electricity) +Milk +Roblox +BMW M3 +Christmas decoration +Moon +Microsoft Lumia +Combat Arms (video game) +Maize +Cargo +Headset (audio) +Bee +Helmet +Street art +Clown +Tattoo +Cupcake +Traxxas +Money +Hatsune Miku: Project DIVA +Bead +Angry Birds +Movieclips +Optimus Prime +MacBook +Mass Effect +Bowser (character) +Sega Genesis +Pachinko +Jedi +Jeep Wrangler +Dragon Ball Z: Budokai Tenkaichi +Tales (series) +Loader (equipment) +Water park +Beef +Sewing machine +Beer +Glass +Silage +Seafood +Gran Turismo 5 +Harp +Joker (comics) +Volkswagen Beetle +??? +BlackBerry +AdventureQuest Worlds +Bowling +Guild Wars 2 +Dragon Quest +Washing +Mermaid +Cue stick +Boot +Stir frying +Grand Theft Auto: Vice City +Penguin +Acrylic paint +Cocktail +Kingdom Hearts II +Coral +Borderlands 2 +Telephone +Gears of War (video game) +Far Cry +Tractor pulling +Rock Band (video game) +Crane (machine) +Updo +Stuffed toy +Lawn +Tekken (video game) +Airbus A320 family +IPhone 5S +Watercolor painting +Ten-pin bowling +Duck +Pokémon Trading Card Game +Oven +Subaru Impreza +Porsche 911 +Backpack +Carl Johnson (Grand Theft Auto) +German Shepherd +Turtle +Metal +Left 4 Dead +Ultralight aviation +Comic book +Batting (cricket) +Tram +Mower +Reef aquarium +??? +Swing (dance) +Lego City +Game Boy Advance +Diesel engine +Pitcher +Dance studio +Hamburger +Cake decorating +Left 4 Dead 2 +Bible +Candy +Vacuum cleaner +Pokémon Omega Ruby and Alpha Sapphire +Sowing +Roof +Donkey Kong +Trout +Coin +Tent +Digimon +Costume +Warface +Sandwich +BMW 3 Series +Star Wars: The Old Republic +Trampoline +Pipe organ +Latin dance +Aerobics +Aion: Upheaval +Supermoto +Netbook +Gift +Strum +Mitsubishi Lancer Evolution +Drum and bugle corps (modern) +Gramophone record +Gundam (mobile suit) +Euro Truck Simulator 2 +Tai chi +Teenage Mutant Ninja Turtles +Aerobatics +Wedding dress +Hair conditioner +Achievement (video gaming) +Boeing 777 +Shadow the Hedgehog +Boeing 747 +Simba +Silkroad Online +Kindergarten +Smartwatch +Computer mouse +Bell +Museum +Rabbit +Total War (series) +DVD +Devil May Cry +Face +Lathe +Five Nights at Freddy's +Logging +String quartet +Bridge +Super Mario Bros. +Fishing reel +Badminton +Clock +Stove +Wine +Subaru +Leather +IPad 2 +Terraria +Attack on Titan +Bottle +Kick +Police officer +Raw foodism +Video card +Alpine skiing +String (music) +StarCraft (video game) +Roadster (automobile) +Steak +Hearthstone (video game) +Solo dance +Foreign exchange market +God of War (series) +Hulk (comics) +Easter egg +Ceiling +Yo-kai Watch +Wakeboarding +Monster truck +McDonald's +Assassin's Creed III +Chopper (motorcycle) +Largemouth bass +Roller skating +Glider (aircraft) +Jacket +Marimba +Christmas tree +Sand +Afro +MacBook Pro +Booster pack +Dark Souls II +Bartender +Quarterback +Illustration +ARMA 2 +Star Trek +Itachi Uchiha +Hot rod +Saints Row +Freeza +Need for Speed: Most Wanted (2012 video game) +Hair twists +Super Mario World +Crash Bandicoot +Pork +Shampoo +Mask +Hair iron +Marvel vs. Capcom +Castlevania +Halo 2 +Battery charger +Tower defense +BBC +Kawasaki motorcycles +Link (The Legend of Zelda) +Muffler +Nintendo 64 +Marriage proposal +Fingerboard (skateboard) +Beehive +Pokémon HeartGold and SoulSilver +Bowling ball +Tower of Saviors +Artificial nails +Final Fantasy XIII +Chair +Hijab +Juggling +Nissan Skyline +Anpanman +Car wash +Kite +Diablo (video game) +Resident Evil 4 +Candy Crush Saga +Rocket +Video game arcade cabinet +Whale +Glider (sailplane) +Flooring +Kingdom Hearts (video game) +??? +Fast food +Mandolin +Metal detector +Cinema 4D +Ash Ketchum +Router (computing) +Yamaha YZF-R1 +Uncharted +DC Comics +Egg +Lexus +Ollie (skateboarding) +Hamster +Chainsaw +Galaxy +Embroidery +Suite (hotel) +Brush +Electronic drum +Gran Turismo 6 +NBA 2K15 +Dolphin +Salmon +Window +Drill +Pen +Backpacking (wilderness) +Torte +Web page +Dreadlocks +Hot Wheels +Brake +Tuba +Volcano +Ibiza +Dragon Age +Mini +Perfect World (video game) +Knot +Tails (character) +Thunderstorm +Video camera +Smoothie +Crossover (automobile) +Condominium +Desert +Pump +Strawberry +Coffeemaker +The Legend of Zelda: Ocarina of Time +Tarot +Architecture +Portal (video game) +Dynasty Warriors +Lightning McQueen +Pirates of the Caribbean (film series) +Tile +Battlefield: Bad Company 2 +Sketch comedy +Aikido +V8 engine +Sailor Moon (character) +Lamborghini Aventador +Carp fishing +Kirby (series) +Banana +Police car +Laser lighting display +Necklace +??? +WWE '13 +Mini (marque) +Tanki Online +Oil +Radio-controlled boat +Dinosaur +Pie +President of the United States +NBA 2K14 +Labrador Retriever +Blender +Plarail +Captain America +Electric locomotive +Street racing +Need for Speed: Most Wanted (2005 video game) +Canoe +Golf club +Sheep +Bar +CDJ +Lace +Gold +Glove +Halo: Combat Evolved +Alphabet +Fender Telecaster +IPhone 3GS +Beadwork +Personal water craft +Dietary supplement +James Bond +Ragnarok Online +French braid +Road racing +Star +Dean Winchester +Snake +Seed +Christmas lights +Plaster +Trunks (Dragon Ball) +Forage harvester +Cartoon Network +Honda CBR series +Battlefield Hardline +Tekken 6 +Glitter +Ford Focus +Roland V-Drums +Ski-Doo +Tyrannosaurus +New Super Mario Bros. +Cue sports +Rainbow Loom +Samsung Galaxy S III +Glasses +Italian cuisine +RollerCoaster Tycoon 3 +Pig +Lock (security device) +The Lord of the Rings (film series) +Military parade +Elephant +Pull-up (exercise) +Eyelash extensions +Ring (jewellery) +Minivan +Coca-Cola +Mural +Love song +Portal 2 +Mortal Kombat (2011 video game) +Yarn +Pokémon Ruby and Sapphire +Dragon Nest +Japanese cuisine +Resident Evil 5 +Jeans +Map +Pikachu +Sun +Pond +Bulldog +Greenhouse +Škoda Auto +Baby transport +Apple +The Doctor (Doctor Who) +Turbine +Naruto: Ultimate Ninja Storm +Watch Dogs +VHS +Ariel (Disney) +Sculpture +Bulldozer +Transformice +Sushi +Home run +Fountain +Slopestyle +Fullmetal Alchemist +Ultimate Marvel vs. Capcom 3 +Automotive lighting +Lightsaber +Chevrolet Silverado +Honey +Wangan Midnight +Sword +Toilet +Super Mario Galaxy +Akuma (Street Fighter) +Shiva +Bed +Toy train +Manufacturing +Ram Trucks +Stuffing +Biscuit +Kia Motors +Spa +Samsung Galaxy S II +Demolition +Airbus A330 +Breakfast +Airbus A380 +Pancake +Kawasaki Ninja +Mitsubishi Lancer +Mushroom +Grand Theft Auto: The Lost and Damned +Microsoft Flight Simulator +Spacecraft +Logo +Stock car racing +Goat +Pool (cue sports) +Assassin's Creed (video game) +Majin Boo +Vespa +??? +Samsung Galaxy S4 +Assassin's Creed IV: Black Flag +Batman: Arkham City +Monkey +Death Note +WWE 2K15 +Pumpkin +Shopping mall +Rose +Cola +Minnie Mouse +Caporales +Jet Ski +World of Warcraft: Wrath of the Lich King +Winter +Prom +Karaoke box +Minibike +RFactor +Art exhibition +Plush +Chocolate cake +Ford F-Series +Soap +Knuckles the Echidna +Dump truck +Giant panda +Dance Dance Revolution +Princess +Street food +Flashlight +Animal Crossing +Pilates +Pipe band +Toyota Land Cruiser +Lara Croft +Jumbotron +Ferrari F430 +Cell (Dragon Ball) +BMW 3 Series (E36) +Injustice: Gods Among Us +Dumbbell +Samsung Galaxy Tab series +Bodyweight exercise +Penalty kick (association football) +Lizard +City +Bionicle +Kirby (character) +WWE 2K14 +Pokémon Battle Revolution +Sonic the Hedgehog (1991 video game) +Alliance of Valiant Arms +Racket (sports equipment) +K-1 +Acer Inc. +Recorder (musical instrument) +Earring +National park +The Elder Scrolls IV: Oblivion +Audi R8 +Clothes dryer +Military band +Silver +Warcraft III: Reign of Chaos +Classroom +Samsung Galaxy S5 +Black cat +Scarf +Kratos (God of War) +Skylanders +Super Robot Wars +Electric car +Video lesson +Smoking (cooking) +Antenna (radio) +Sonic Generations +Butter +Chess +Hello Kitty +Goldfish +Carrot +Blu-ray +Squirrel +Balloon (aeronautics) +Microwave oven +Range Rover +Wool +TalesRunner +IPad Mini +Pokémon Emerald +Inflatable boat +Bull riding +Football boot +Gears of War 2 +Bugatti Veyron +Airbrush +Brick +Avengers (comics) +Plants vs. Zombies 2: It's About Time +United States Navy +Ball (association football) +Volkswagen Gol +Yo-yo +Forza Motorsport 4 +Logitech +Shirt +Golden Retriever +Alarm device +Water slide +Paramotor +Fondant icing +Acrobatic gymnastics +Coach (sport) +The Witcher 3: Wild Hunt +Tabla +Kinect +Zee Bangla +??? +Cabinetry +Quilt +Claw crane +Spyro (series) +Yoshi +Tekken Tag Tournament 2 +Diamond +Samsung Galaxy S series +BMW 3 Series (E46) +Tiger +Number +Traffic +Metalworking +Haruhi Suzumiya +Gown +Luxury yacht +Yuna (Final Fantasy) +Station wagon +Softball +The Legend of Zelda: Twilight Princess HD +Dungeon Fighter Online +Plasticine +LG Optimus series +Source (game engine) +Battlefield 2 +BMW 3 Series (E30) +Ink +Half-Life 2 +Hitman (series) +Inline skates +Remote control +Mercedes-Benz C-Class +The Sims 4 +Harlem Shake (meme) +Magic Kingdom +Dune +Prince of Persia +Final Fantasy XIV +Marvel Universe +Draco Malfoy +Ram Pickup +DC Universe Online +Assassin's Creed II +Mars +Xylophone +Dragon Age: Inquisition +Game Boy +Carpet +Roxas (Kingdom Hearts) +Balance beam +Mass Effect 2 +Dragon Ball Xenoverse +Call of Duty: Black Ops – Zombies +Cadillac +Guinea pig +The Hobbit (film series) +Need for Speed: World +Pastry +Chapel +Rayman +Armour +Mouse +Assassin's Creed: Brotherhood +Lord Voldemort +Magnet +The Sims (video game) +Rubber band +Grocery store +Reborn doll +Ford GT +WWE '12 +PlanetSide 2 +Jaguar Cars +Volvo Cars +Jeep Cherokee (SJ) +Homer Simpson +USB flash drive +Torero +Persona (series) +Model railroad layout +Buttercream +Serve (tennis) +Ferrari 458 +Honda Accord +Chevrolet Impala +Command & Conquer +Warframe +Chrysler (brand) +Standup paddleboarding +Pretty Cure +Campsite +Final Fantasy VIII +Audi A4 +Sailing ship +Rafting +Custom car +Belle (Disney) +Rowing (sport) +Jeep Grand Cherokee +Wire +BMW M5 +Hula hoop +Pinball +Spaghetti +Monster Hunter Freedom Unite +Far Cry 4 +Pro Evolution Soccer 2015 +Test Drive (series) +Motorcycle helmet +Router (woodworking) +Cave +Cheesecake +Birthday cake +Suzuki Jimny +New Super Mario Bros. Wii +Ezio Auditore da Firenze +Fisherman +Mime artist +Roller skates +Pump It Up (video game series) +Dissidia Final Fantasy +Supercharger +Gemstone +Titanfall +Downhill +Medal +Garbage truck +Forehand +Heroes of Newerth +Plastic +??? +Astronaut +Guitar Hero World Tour +ArcheAge +Lowrider +Police dog +Toyota Corolla +Ford Fiesta +Helmet camera +Cabal Online +Assassin's Creed Unity +Ceramic +Kidō Senshi Gundam: Senjō no Kizuna +Hot air balloon +Shower +Donald Duck +Multi Theft Auto +Rock Band 3 +Porsche 911 GT3 +Stick figure +Sled +Lemon +Frog +Mexican Creole hairless pig +Forklift +Dog agility +Kettlebell +Shelby Mustang +Candle +Bowling (cricket) +Kick (football) +Electric vehicle +Oboe +Desktop computer +Wing Chun +Statue +DayZ (mod) +Eagle +Fire station +Nike Air Max +Rage (video game) +Woodturning +Fireplace +Volkswagen Jetta +Madison Square Garden +Fly tying +Spore (2008 video game) +Hammond organ +Sam Winchester +The Pink Panther +Saints Row: The Third +Cherry blossom +Doraemon +WWE action figures +Marvel vs. Capcom 3: Fate of Two Worlds +Bugatti Automobiles +Fire Emblem +Border Collie +Aircraft carrier +Snow blower +Culinary art +Ken Masters +Seafight +Sport bike +Dentist +Easter egg (media) +Joystick +Tuna +Crysis 2 +Audi Quattro +Academy Awards +Ponytail +Ramen +Hummer +Fishing tackle +Final Fantasy X-2 +Coupon +Porsche Carrera +Wood carving +Rocksmith +Wallet +Refrigerator +Koi +Battlefield Heroes +Phonograph +Onion +Biceps curl +Trainz +Hat +Jubeat +Nissan Skyline GT-R +Mattel +GameCube +LittleBigPlanet 2 +Epiphone +Inazuma Eleven +Soft tennis +Killer whale +Hair straightening +Merienda +The Witcher (video game) +Skate (video game) +Live for Speed +Rooster +Chihuahua (dog) +Triangle +Land Rover Defender +Marvel Legends +Trousers +SD Gundam Capsule Fighter +Ratchet & Clank +Doughnut +Hatsune Miku: Project DIVA F +Bouzouki +Domestic canary +Half-Life (video game) +Raven (comics) +Black Butler +Mario Kart 8 +Chili pepper +BMW 5 Series +Hail +Ouran High School Host Club +Brain +Chinese cuisine +Playmobil +Model building +Ribbon +Pit bike +Sonic Unleashed +Solar panel +Orange (fruit) +Otis Elevator Company +Mu Online +Hang gliding +Path of Exile +Animal Crossing: New Leaf +Steel guitar +Sword Art Online +Lego Ninjago +Paddle +Second Life +Aikatsu! +IPhone 5C +Gothic (series) +Batman: Arkham Asylum +Carburetor +Crab +Espresso machine +The Phantom of the Opera (1986 musical) +Hellsing +Spider +Super Mario Galaxy 2 +Duel Masters Trading Card Game +Drywall +Laundry +United States Air Force +Assassin's Creed: Revelations +Corel +Omelette +Composer +Ford Escort (Europe) +Grape +Honda CB600F +Tea +Elmo +Temple +Need for Speed: Carbon +Catamaran +Perfect World (company) +Skate 3 +Missile +Infomercial +Chevrolet Chevelle +Airport terminal +Crysis (video game) +StepMania +Red Dead Redemption +Atari +Couch +The Idolmaster +Beatmania IIDX +Big wave surfing +Tokyo Mew Mew +Wheat +Warhammer Fantasy Battle +Rock (geology) +Snowplow +Submarine +Doctor Eggman +Wood flooring +Bangs (hair) +Yamaha YZF-R6 +Pontiac Firebird +Red Dead +Field hockey +Vineyard +Waterfowl hunting +Domestic pigeon +Toyota Hilux +CNET +Preacher +Sonic Adventure +Lamborghini Murciélago +Marinera +Screen printing +Crazyracing Kartrider +The Legend of Zelda: Majora's Mask +Sunglasses +Log cabin +Fungus +Wedding photography +Flag +Devil May Cry 4 +Cappuccino +Flamenco guitar +Projector +Rock dove +The Elder Scrolls Online +LittleBigPlanet (2008 video game) +Digital video recorder +Djembe +Vending machine +Mehndi +Telescope +Flyff +Pattern (sewing) +Stairs +Nissan 350Z +Cell (biology) +Need for Speed: Underground 2 +Incandescent light bulb +Gallon +Greeting card +Balloon modelling +Sensor +Realm of the Mad God +Nest +Writing +Logic Pro +Opel Astra +Campervan +Cooked rice +Muffin +Wind power +Hedgehog +Soft drink +Calculator +Harness racing +Buick +Beast (Disney) +Destroyer +Point guard +Forza Horizon +Mercedes-Benz SLS AMG +Supermarket +Catfish +Final Fantasy XI +The Last of Us +Battleship +Dodge Challenger +Peter Pan +Metal Gear Solid 4: Guns of the Patriots +Toyota 86 +Bakery +Compact disc +Backhoe +Saddle +Total Drama Island +Erhu +Bumblebee (Transformers) +Cajón +Beatmania +Ice rink +Child safety seat +Honda S2000 +Samsung Galaxy Note II +Higurashi When They Cry +Union Pacific Railroad +BMW 3 Series (E90) +V6 engine +BlazBlue +Rottweiler +Necktie +Image scanner +White-tailed deer +TV4 (Sweden) +Bishop +Need for Speed: Hot Pursuit (2010 video game) +Princess Peach +Rust (video game) +Doom (1993 video game) +Fender Custom Shop +Smite (video game) +Nissan Silvia +??? +Pudding +Sephiroth (Final Fantasy) +Irish dance +MacBook Air +Commodore 64 +IMac +Space Shuttle +Automobile repair shop +Collie +Dragon Age: Origins +Sangokushi Taisen +Calligraphy +Black belt (martial arts) +??? +Valve +Crisis Core: Final Fantasy VII +Two-stroke engine +Killzone (series) +Full moon +Hunter × Hunter +New York City Subway +Latte +Mercedes-Benz S-Class +Tetris +Samurai +Predator (alien) +Arabian horse +Mercedes-Benz E-Class +Spinach +Dōjinshi +Polar bear +Body piercing +Amazon Kindle +Biology +Key (lock) +Mobile Suit Gundam: Extreme Vs. +Rappelz +Bobber (motorcycle) +Toy balloon +Mexican cuisine +Rope +Taco +Taxicab +Infestation: Survivor Stories +Clutch +PlayStation Network +Garage (residential) +Milkshake +Cloud Strife +Honda Integra +Eintopf +Primary school +Kingdom Hearts Birth by Sleep +Resident Evil (1996 video game) +Foal +GameSpot +Castle +Human hair color +Scorpion (Mortal Kombat) +Poultry +Poodle +Vans +Forza Horizon 2 +Zero (Mega Man) +Toyota Camry +Chemical reaction +Test Drive Unlimited 2 +Bacon +Mario Party +18 Wheels of Steel +Goose +Sausage +Compost +Cucumber +French horn +Analog synthesizer +Siamese fighting fish +??? +Las Vegas Strip +Crysis 3 +School bus +Oculus Rift +Carnival Cruise Line +Honda CBR600RR +Pokémon Red and Blue +Autobot +Christ (title) +Cockatiel +Ace Combat +Mazda MX-5 +Countertop +Safari +Final Fantasy XIV: A Realm Reborn +Track (rail transport) +Ganon +Two-wheel tractor +??? +Watermelon +Paper plane +Rainbow trout +??? +Tony Hawk's (series) +Korean cuisine +Lip balm +Angry Birds (video game) +Lead guitar +Pug +Monster Hunter Tri +Playground +God of War III +Herd +Niko Bellic +Bungee jumping +Soil +Subway Surfers +Hindu temple +Audi A6 +Hogwarts +Eggplant +Mabinogi (video game) +Sugar +Makeup brush +Rocksmith 2014 +Ocean +Asphalt (series) +Dental braces +Bob cut +Nissan 240SX +Cement +Sharpening +Leopard +United States Army +Tom and Jerry +Xbox 360 controller +Dragon Ball: Raging Blast 2 +Winnie the Pooh (franchise) +Trophy +Inazuma Eleven (manga) +Owl +Street Fighter II: The World Warrior +Golf ball +Floyd Mayweather Jr. vs. Manny Pacquiao +Belt (clothing) +Slender: The Eight Pages +Test Drive Unlimited +Super Mario Bros. 3 +Power supply +Retail +Venom (comics) +IPad (3rd generation) +Teddy bear +Denim +Baseball bat +Halo 3: ODST +Train Simulator (Dovetail Games) +Bowhunting +Lotus Cars +Pineapple +Boeing 737 Next Generation +Audi A3 +Dreamcast +City-building game +Diablo II +Suzuki Hayabusa +Gamepad +Electrical wiring +Kitchen stove +Yamaha Aerox +Monster Hunter Portable 3rd +BMX racing +Katara (Avatar: The Last Airbender) +HP Pavilion (computer) +Emirates (airline) +Amiga +Touchscreen +Winter storm +Driver (video game series) +Pac-Man +Fantage +Land Rover Discovery +Flash (photography) +Human back +Intermodal container +Infiniti +Guilty Gear +Animal shelter +Butterfly +Piccolo (Dragon Ball) +Bicycle frame +Boeing 787 Dreamliner +Toontown Online +Renault Mégane +Age of Empires +Canyon +Ski jumping +Lumber +Carousel +Phantasy Star Online 2 +Dodge Viper +Madden NFL 13 +A-18 Hornet +String trimmer +Mattress +Mixer (cooking) +Sub-Zero (Mortal Kombat) +Ford Ranger (North America) +ESPN +ABS-CBN News and Current Affairs +Synchronised swimming +G-Shock +??? +Angel +Champion +Horse show +??? +Rurouni Kenshin +Halo 5: Guardians +Coconut +Deep frying +Dollhouse +Campus +Volkswagen Golf Mk6 +Curtain +Mountain pass +Dojo +Boiler +PRS Guitars +Diesel locomotive +Monster Hunter 4 +French Bulldog +Prince (Prince of Persia) +Fixed-gear bicycle +Ninja Gaiden +Samsung Galaxy Note 3 +Opel Corsa +Jack Sparrow +Boeing 767 +Lexus IS +Tales of Symphonia +Autumn +Inline skating +Filter (aquarium) +Naruto Shippuden: Ultimate Ninja Storm Generations +Garmon +Flower bouquet +SimCity +Gravy +Bully (video game) +French fries +Kawasaki Ninja 250R +Rock fishing +Batman: Arkham Origins +Ceiling fan +Audi TT +Space Marines (Warhammer 40,000) +Acer Aspire +D.Gray-man +Duct tape +Electromagnetic coil +Heroes of the Storm +Tom Clancy's Ghost Recon +Sponge cake +Steelpan +Modem +The King of Fighters 2002 +Dying Light +Need for Speed: Shift +Riot Games +Rainbow +Bean +Chevrolet Opala +Reborn! +Floral design +Megatron +Kawasaki Ninja ZX-6R +Agriculture +Cottage +Television presenter +Metal Gear Solid V: The Phantom Pain +Juicing +BioShock +Plymouth (automobile) +Crêpe +Fist of the North Star +The Legend of Zelda: The Wind Waker +X-Men +Piston +Deck (building) +Nativity scene +Sega Saturn +Stardoll +Just Dance (video game) +Chun-Li +BMW R1200GS +LG G3 +Fisheye lens +Dragon Ball: Raging Blast +Big Boss (Metal Gear) +Dam +Gel +JBL +Dachshund +Bane (comics) +E-reader +The Lord of the Rings Online +Ferb Fletcher +Yeast +Monastery +Vampire Knight +Vodka +IPhone 3G +Tricycle +Metal Slug (series) +Steel +LED lamp +Geometry Dash +Dominoes +Gibson Les Paul Custom +Street Fighter III: 3rd Strike +Hay +Honda CR-X +Spray painting +Flip Video +Bald eagle +God of War II +Clay animation +Tomato sauce +Clone trooper +Beagle +Popcorn +Rubber stamp +Clannad (visual novel) +Fried rice +Moto G (1st generation) +Toyota Prius +Mega Man Battle Network +Doom II: Hell on Earth +Grand Theft Auto: Vice City Stories +Deadpool +Phantasy Star +Lock picking +Sugar paste +Chevrolet Caprice +??? +Herb +The Legend of Zelda: Skyward Sword +Domesticated turkey +Final Fantasy VI +BMW S1000RR +Mitsubishi Pajero +Mazda3 +IKEA +Chevrolet S-10 +Paper Mario +India TV +Tow truck +Orochimaru (Naruto) +Ape +Line (geometry) +Kawasaki Ninja ZX-10R +Aerosol spray +Power supply unit (computer) +Zucchini +Doberman Pinscher +Wolfenstein (series) +Contortion +Fertilizer +Cooler Master +Highway +Chocolate brownie +Street Fighter III +Tsubasa: Reservoir Chronicle +Parking +Olaf (Disney) +Frets on Fire +Multi-function printer +Suzuki GSX-R1000 +Lush (company) +Hang (instrument) +Nexus 7 (2012) +Skyscraper +Gorilla +Ōendan +Puff pastry +Crossbow +Forza Motorsport 5 +Uncharted 2: Among Thieves +Pokémon Mystery Dungeon +Closet +??? +Daytona International Speedway +VTEC +Cheerleading +Slot car +Garden railway +Albert Wesker +Naruto Shippuden: Ultimate Ninja Storm 2 +Sewing needle +Trials (series) +Sheriff Woody +K +Straw +Mitsubishi Eclipse +Frisbee +TrackMania +Manure +Chocolate chip +Cart +Borderlands: The Pre-Sequel +Diving +Wood-burning stove +Medal game +Chrono Trigger +Sherlock Holmes +Library +Volkswagen Golf Mk2 +Guzheng +Malinois dog +Goofy +Pedal steel guitar +Virtua Fighter 5 +Lego Marvel Super Heroes +Kantai Collection +Electric violin +Firewood +Devil May Cry 3: Dante's Awakening +Digital painting +Flair bartending +Boxer (dog) +Melon +Low-carbohydrate diet +Škoda Octavia +The Crew (video game) +Unicycle +GAZ +Gummy bear +Marker pen +Need for Speed: The Run +Dead Space (2008 video game) +Duke Nukem +Dirt 3 +Movie theater +Final Fantasy XIII-2 +Comet +WWE SmackDown vs. Raw 2010 +Gran Turismo 4 +Star Wars: Battlefront II +Lamb and mutton +Ant +Loki (comics) +Percy the Small Engine +Villain +Plumbing +Avocado +BioShock Infinite +Dormitory +Mango +Lucky Star (manga) +Shadow the Hedgehog (video game) +Cabbage +Peanut butter +Didgeridoo +Hard Rock Cafe +Donkey Kong Country +Amazon.com +Star Wars Battlefront (2015 video game) +Harpsichord +Aston Martin Vantage (2005) +Suzuki Swift +Crocodile +Jet engine +Sonic the Hedgehog 2 +Delta Air Lines +Harry Potter and the Deathly Hallows +Trunk (car) +Zangief +Brave Frontier +Chuck E. Cheese's +Iori Yagami +Robotics +Kebab +Cheeseburger +Hatsune Miku: Project DIVA F 2nd +Humbucker +Camcorder +Mega Man X (video game) +Landscape +Shih Tzu +Volkswagen Golf Mk4 +Pollution +Guppy +Coffeehouse +Killer Instinct +Crusher +Allods Online +??? +Boeing 757 +Eclipse +Meatball +Saints Row 2 +Roulette +Grand Theft Auto: Liberty City Stories +Walleye +Walmart +Bearing (mechanical) +Forest +Forever 21 +Canvas +Rat rod +Soulcalibur V +Sonic the Hedgehog (2006 video game) +Multirotor +??? +LG G2 +Moisturizer +Halo: The Master Chief Collection +SEAT León +Skylanders: Swap Force +Pan flute +Chevrolet Tahoe +Metal Gear Online +Fiat 126 +Mount & Blade: Warband +Kennel +Vibraphone +Satellite +Yamaha Raptor 700R +Sonic & Knuckles +Honda Fit +Caridea +Armored Core +Bull Terrier +Firefighting +Catwoman +Octopus +Fencing +Sitar +Limousine +Nintendo DSi +HTC One (M8) +McDonnell Douglas F-15 Eagle +Rat +GoldenEye 007 (1997 video game) +Gasoline +Ken (doll) +Quadracycle +Dead or Alive (series) +Microsoft Surface +Scooby-Doo +Landscape painting +Toyota Land Cruiser Prado +Hair removal +Sink +Mount & Blade +BMW 5 Series (E39) +Mewtwo +Mambo (music) +The Witcher 2: Assassins of Kings +North American P-51 Mustang +Alien (creature in Alien franchise) +Cloud +Forge +Christian Church +Tom Clancy's Rainbow Six +Mirror +Chevrolet Big-Block engine +Chevrolet Corvette (C6) +Abarth +Mazda RX-8 +Pendant +Metal Gear Solid 3: Snake Eater +Buffet +Haunted house +Cockatoo +Royal Air Force +The Embodiment of Scarlet Devil +LG G series +Fishing vessel +DualShock +Sonic Heroes +Drawer (furniture) +BMW 1 Series +Werewolf +DatPiff +Koi pond +Toyota Celica +Twelve-string guitar +Potato chip +Stargate +Killer Instinct (2013 video game) +Caramel +Sprite (computer graphics) +NHL 14 +Ham +Sky +Sweater +Chocolate chip cookie +stay night +Text (literary theory) +Skate 2 +Engraving +Final Fantasy XV +Cornrows +Light Yagami +Floristry +Sly Cooper +Volkswagen Golf Mk5 +Snowman +??? +Vox (musical equipment) +Happy Farm +Orc +Suit (clothing) +PC game +Ace Online +Saints Row IV +Slingshot +Dead Island +Ratchet (Ratchet & Clank) +Gears of War: Judgment +Dragon Quest X +Furby +Crayon Shin-chan +Soprano saxophone +Tifa Lockhart +European perch +Patio +Fried chicken +Sawmill +Mirror's Edge +Canon PowerShot +Guitar Hero: Warriors of Rock +Rome: Total War +Hummer H2 +Radar +Final Fantasy IV +Table saw +Barista +BMW 7 Series +Camel +Windows Media Video +Felt +Audi S4 +Cowboy +Molding (process) +Contact lens +Fiat Punto +The Hobbit +Indoor cycling +Sunset +??? +Persian cat +Hitman: Absolution +Battlefield: Bad Company +Eren Yeager +Sinterklaas +Crash Bandicoot (video game) +Midnight Club: Los Angeles +Metal Gear Rising: Revengeance +Hand-to-hand combat +Avon Products +Log splitter +Stormtrooper (Star Wars) +Epic Rap Battles of History +Shed +Walking +Belt (mechanical) +Hot dog +Sock +Chicken coop +Humpback whale +Character (arts) +Peugeot 106 +Toast +Princess Jasmine +Exercise ball +Fox +Green Lantern +Looney Tunes +Wedding ring +Tap (valve) +Charizard +Mii +Rolls-Royce Limited +Copic +Mega Man Zero (video game) +Jak and Daxter +Priston Tale +Glacier +IPod Nano +Banknote +Mario & Sonic at the Olympic Games +Hero Factory +Bamboo +Fillet (cut) +Stencil +Winch +Dogfight +Treadmill +Bassoon +Staffordshire Bull Terrier +Cardboard +Epiphone Les Paul +Compact Cassette +Gelatin +White House +Suitcase +MX vs. ATV +Clank (Ratchet & Clank) +Beach volleyball +Loadout +Batter (cooking) +Zack Fair +Cliff +Baggage +Cream cheese +Lantern +Naruto: Clash of Ninja +Treasure +Raccoon +Mini 4WD +Robotic vacuum cleaner +Gate +Ribs (food) +Oatmeal +Water filter +Super Mario Sunshine +Animal Crossing: City Folk +Driver's license +Asus ZenFone +American black bear +Little Red Riding Hood +??? +Stable +Gashapon +Need for Speed: Underground +Dishwasher +Frying pan +Schutzhund +Mario Kart 7 +Disney Infinity +Saab Automobile +F-Zero +Halloween costume +Thor (Marvel Comics) +Foam +Tokyo Ghoul +Chevrolet Monte Carlo +Flush toilet +Axe +Worms (series) +Marble +Driver's education +Madden NFL 12 +Pressure washing +Christmas ornament +Buffalo wing +Duct (flow) +Indiana Jones +Chart +Yoshi's Island +Subaru Forester +Scar (The Lion King) +Mousse +Lalaloopsy +Micropterus +Gibson SG +Express train +Citroën C4 +Submission wrestling +Broccoli +Donkey Kong Country 2: Diddy's Kong Quest +Barrel organ +Mega Man 2 +Dragon boat +New Super Mario Bros. U +Gecko +Pillow +Kemenche +Porsche Cayenne +??? +Shift 2: Unleashed +Bomberman +Dungeons & Dragons +BeamNG.drive +AdventureQuest +Mario Kart 64 +Disc brake +Bloons Tower Defense +Forza Motorsport 3 +Guitar Center +Super Smash Bros. (video game) +Fiat Uno +Printed circuit board +Porcelain +E-book +Macaroni +Lego Friends +Max Payne 3 +StarCraft II: Heart of the Swarm +Medal of Honor: Warfighter +Kamaz +Air France +Porsche Carrera GT +Black Rock Shooter +Rosary +Halo Wars +Car dealership +Toys "R" Us +Total War: Rome II +Need for Speed: ProStreet +Mansion +Cheetah +Marshmallow +Shorts +Unturned +Charango +Lithium polymer battery +Sea turtle +Vatican City +Starbucks +Emergency vehicle lighting +Volkswagen Golf Mk1 +Lupin the Third +Pearl +Wii Sports +Hero +Chrysler 300 +GMC (automobile) +Charm bracelet +Kamen Rider Battle: Ganbaride +Ys (series) +Asus Eee Pad Transformer +BMW 5 Series (E60) +Ford Mustang SVT Cobra +Autocross +Royal icing +Laboratory +Peugeot 206 +Maltese (dog) +Soulcalibur IV +Wardrobe +Garlic +Tugboat +Luke Skywalker +Electronic circuit +Coat (clothing) +Passenger +??? +Cactus +Ford Crown Victoria +Elfen Lied +Circular saw +Radha +Welsh Corgi +Eiffel Tower +Softail +Bajo sexto +Lobster +Colt (horse) +Solar eclipse +Greyhound +Pepsi +Black Widow (Natasha Romanova) +Virtua Fighter +Filly +Canning +Fat +Goth subculture +Slow cooker +Lightning (Final Fantasy) +Water polo +Apple pie +Inkjet printing +Mercedes-Benz SLK-Class +Bandsaw +Cammy +Fight Night (EA video game series) +Tortoise +Multicooker +Ferret +Dipping sauce +Circle +Rocket launch +Pembroke Welsh Corgi +Cold porcelain +Battlefield Play4Free +ThinkPad +BMW X6 +??? +Sony Xperia Z +Selfie +Mahjong +Cherry +IPod Touch (5th generation) +Colin McRae: Dirt 2 +Tekken 5 +Shawl +Ultron +Guitar pick +Elk +Sunrise +Amusement arcade +Hammock +Decoupage +Mug +Sander +Autogyro +Woodchipper +Texas Instruments +Baby Alive +Tarantula +Shrub +Donkey Kong (video game) +Coating +Steirische Harmonika +Racing wheel +Raphael (Teenage Mutant Ninja Turtles) +Bank +Opel Vectra +Skull +Sand art and play +Birth +Lasagne +Infinity Ward +Philippine cuisine +Custard +Lettuce +Megami Tensei +Flappy Bird +Sleeping Dogs (video game) +Fender Jazz Bass +Devil Kings +Blouse +Notebook +Aloe vera +Funko +Lelouch Lamperouge +Macramé +Casserole +Capacitor +I Wanna Be the Guy +Hose +Subaru Legacy +Star Citizen +Sabian +Ventriloquism +Call of Duty (video game) +Kindle Fire +Starfire (Koriand'r) +Zeus +Microscope +Basket +Coyote +Bart Simpson +Volvo FH +Spinnerbait +Honda CR-V +Sony Xperia Z1 +Satan +Mercedes-Benz Sprinter +Team roping +Jeep Cherokee (XJ) +Friendship bracelet +Leonardo (Teenage Mutant Ninja Turtles) +Single track (mountain biking) +Chickpea +Vegetable carving +??? +Spark plug +Akita (dog) +Canoeing +Recumbent bicycle +Boom Beach +Puppetry +Sport stacking +Kendama +Punching bag +Staples Center +Marvel vs. Capcom 2: New Age of Heroes +Apple TV +Davul +Scratchcard +Disgaea +Larva +Used car +DmC: Devil May Cry +Kyo Kusanagi +Mega Man (video game) +K'Nex +Burger King +Dungeon crawl +Pro Evolution Soccer 2009 +Blueberry +Village +Convenience store +Golf cart +BMW M6 +Fiber +Resistance (series) +Picture frame +Trouble in Terrorist Town +Volkswagen Type 2 +Domestic pig +Grand Tourer Injection +Alucard (Hellsing) +Aerith Gainsborough +Batmobile +Gummi candy +Cauliflower +Marlin +Gold medal +Shin Megami Tensei: Persona 3 +Table football +Shikamaru Nara +Truggy +Ford Explorer +Chevrolet Cruze +American Airlines +Jupiter +Galaxy Nexus +KFC +Spec Ops: The Line +Rigs of Rods +EA Sports UFC +Plastic bottle +Hubble Space Telescope +Barn +Hand +Star Wars: Battlefront (2004 video game) +Digimon Masters +Gibson ES-335 +Waffle +Paper model +Ressha Sentai ToQger +Gas tungsten arc welding +Pavement (architecture) +Sonic & Sega All-Stars Racing +??? +Palace +Stealth game +God of War (2005 video game) +Mazda6 +Dragon Age II +Warhammer Online: Age of Reckoning +Switch +Grizzly bear +??? +H.A.V.E. Online +Lowlands (festival) +Wok +Window blind +Nokia N8 +Android Wear +V10 engine +Toyota Tundra +Marble (toy) +Alligator +Screencast +Range Rover Sport +Moose +Polo +Laminate flooring +BVE Trainsim +Baby sling +Garage door +Compact car +Dishonored +Parrot AR.Drone +Giraffe +Need for Speed Rivals +McLaren 12C +Pork ribs +Track cycling +Don't Starve +Marvel: Avengers Alliance +Popeye +Ford Mondeo +HTC One (M7) +Pyramid +Asphalt +Beetle +Canon EOS 600D +Oldsmobile Cutlass +Suzuki GSX-R750 +Audi A8 +World of Warcraft: The Burning Crusade +Homing pigeon +NHL 15 +Touring motorcycle +Goblin +Nissan 370Z +Metro: Last Light +Skylanders: Giants +Ran Online +Gear +Mercedes-Benz G-Class +Travian +Burnout Paradise +Tag team +Electric motorcycles and scooters +Kazuya Mishima +Serious Sam +Nexus 7 (2013) +Super Paper Mario +Doodle +Gelatin dessert +Andalusian horse +Warrior +Ferrari 360 +DVD player +WildStar (video game) +Hyundai Genesis +Chutney +Pizzica +Dead Rising 2 +Potter's wheel +Yoda +Cylinder (engine) +M. Bison +Metal Gear Solid: Peace Walker +Masonry +Edward Elric +Split (gymnastics) +Mario Kart DS +Ghost Rider +Grand Theft Auto: Episodes from Liberty City +F1 2012 (video game) +Cookie Monster +Red hair +Nami (One Piece) +Canon EF lens mount +Finger +Asteroid +Nissan Navara +Riddler +Traffic light +Nikon Coolpix series +Dragonica +Broth +Metal Gear Solid 2: Sons of Liberty +Samsung Galaxy Y +Wedding cake +Half-pipe +Gothic II +Vehicle horn +Motor oil +Credit card +Resident Evil 2 +British Airways +Great Dane +Stain +Super Mario 3D World +Yamaha YZ125 +Atari 2600 +Rover (space exploration) +Cayman +Ragdoll +Basement +Betta +Mobile home +Heroes of Might and Magic +Photograph +Wreath +Universe of The Legend of Zelda +Lamborghini Diablo +Albus Dumbledore +BlackBerry Bold +Prototype 2 +Soybean +Hurdling +Spock +Sony Xperia Z2 +Monopoly (game) +Fruit preserves +SimCity (2013 video game) +Cutlet +Volkswagen Touareg +Aerosol paint +Risotto +Toyota 4Runner +Driveclub +Moshing +Total War: Shogun 2 +Elf +Hot tub +President +NHL 13 +Rudolph the Red-Nosed Reindeer +Bugs Bunny +Mario & Luigi: Superstar Saga +Tulip +Paper Mario: The Thousand-Year Door +Hammer +EarthBound +Meta Knight +La Tale +Shadow of the Colossus +GLaDOS +Hunting dog +BioShock 2 +Supercars Championship +Orbit +God of War: Ascension +Bloons +Ney +Toyota MR2 +Cam +??? +Zoom lens +H&M +Hovercraft +Sanshin +Instant noodle +Luigi's Mansion +Tales of Vesperia +Dekotora +??? +Talking Tom and Friends +Baseball glove +Ale +Meringue +Canon EOS 7D +Shaolin Kung Fu +Hawk +Donkey Kong Country Returns +The Salvation Army +Brown trout +Sugarcane +Cake pop +Suzuki Bandit series +Green tea +Warehouse +Appalachian dulcimer +Kermit the Frog +Unicorn +Fountain pen +Acer Iconia +Master System +Robocraft +Merlin +Sweet potato +Alice's Adventures in Wonderland +Solar flare +DigiTech +Saturn +Flash (comics) +Reindeer +Justice League +Line Rider +Runes of Magic +Chevrolet Suburban +Michael Myers (Halloween) +Need for Speed: Undercover +Wand +Chevrolet Malibu +Coal +Antena 3 (Spain) +Driver: San Francisco +Font +Stingray +Thermostat +Toph Beifong +Vert ramp +Ridge Racer +Goat Simulator +Lineage (video game) +CNBC +Juri (Street Fighter) +TARDIS +Pigeon racing +Lap steel guitar +Shovel +Mosaic +Monster Retsuden Oreca Battle +Pair skating +Wallpaper +The Simpsons: Tapped Out +The Elder Scrolls III: Morrowind +Padel (sport) +Fender (vehicle) +Furnace +Nissan Altima +Cornet +Škoda Fabia +Lockheed Martin F-35 Lightning II +Electribe +Alesis +Motorola Razr +Halo: Combat Evolved Anniversary +Darksiders +Neo Geo (system) +Snail +Milking +Pluto (Disney) +Peanut +Verona Arena +Chubby Bunny +Jerry Mouse +Corvette Stingray (concept car) +Cigarette +Cube World +??? +Cybertron +Dacia Duster +Pastel +Transformer +Split screen (computer graphics) +Sukhoi Su-27 +Gabrielle (Xena: Warrior Princess) +Opel Kadett +Nokia Lumia 920 +Twin-turbo +Jiraiya (Naruto) +The Legend of Zelda: A Link to the Past +Crappie +Rechargeable battery +??? +Super Mario 3D Land +??? +DragonFable +Aragorn +Crash Bandicoot 2: Cortex Strikes Back +Southwest Airlines +Multi-tool +Passport +Porsche Panamera +Airship +Tuxedo Mask +Tom Clancy's Ghost Recon: Future Soldier +Melty Blood +Beam (structure) +Gas metal arc welding +Audi Q7 +Bell pepper +Chewing gum +Drinking water +Heat pump +Kenshiro +Patrick Drake and Robin Scorpio +Miniature wargaming +Kawasaki Ninja 650R +Captain Falcon +J-Stars Victory VS +Imperishable Night +Citrus +Drift trike +Optical illusion +Command & Conquer: Red Alert 3 +Suzuka Circuit +Mayonnaise +Quake III Arena +Keychain +God Mode +Ford Bronco +Crocodilia +Black and white +Llanero +Monorail +Nova +G.I. Joe +S.T.A.L.K.E.R.: Call of Pripyat +Perfect Cherry Blossom +Wine tasting +Olive +Ultra Series +Beat 'em up +Jellyfish +Lego Legends of Chima +Sauna +Tom Clancy's Splinter Cell: Blacklist +Starscream +Aang +Misty (Pokémon) +IPad Air +Ice pop +Lute +Jigsaw puzzle +Baritone saxophone +BMW Z4 +Mana (series) +Motorized bicycle +Dalmatian (dog) +Bose Corporation +Burton Snowboards +Kingdom Hearts: Chain of Memories +Mass Rapid Transit (Singapore) +Boombox +Napkin +Chimpanzee +Guitar Hero: Metallica +Radar detector +Honda NSX +Empire: Total War +Darts +Light fixture +Super Mario Bros. 2 +Temple Run +Kristoff (Disney) +Adrenalyn XL +Tatra (company) +Mini-Z +Tin can +Market garden +Mercedes-Benz Actros +Hug +Whipped cream +Wasp +Oni +Princess Daisy +Constellation +HTC One X +Fender Precision Bass +Prawn +Christmas card +Handbell +Coconut milk +Toshiba Satellite +Riven +Referee +Dragon's Dogma +Dalek +Folding bicycle +2 Days +Kimono +Seiko +Hippopotamus +Resident Evil: Revelations +Billboard (magazine) +Padlock +Butterfly stroke +Mashed potato +Yuan Zai (giant panda) +Aurora +Mop +Tubing (recreation) +Clothes iron +Order & Chaos Online +Zebra +Crème caramel +Warhammer 40,000: Dawn of War +Tom Clancy's Splinter Cell: Conviction +Wakfu +Stitch (Lilo & Stitch) +Calf +Cars 2 (video game) +Crayfish +Engagement ring +Infamous Second Son +Jukebox +Biryani +DJ Hero +Super GT +Chameleon +Oyster +Warcraft III: The Frozen Throne +Dynasty Warriors 7 +Postage stamp +Derek Shepherd +Plotter +Amnesia: The Dark Descent +Jinn +Rayman Legends +Tinker Bell +Patchwork +Doom 3 +Wat +Paiste +Mercedes-Benz CLS-Class +Liquid +GameTrailers +Pep squad +Clam +SaGa (series) +Nollie +Company of Heroes +Green Arrow +Naruto Uzumaki +DeWalt +Putter +Family +Transistor +SOCOM (series) +Pea +Social media +Aliens vs. Predator (2010 video game) +HTC HD2 +Ducati Monster +Aggressive inline skating +Maserati GranTurismo +PortAventura World +Lego Batman: The Videogame +Energy drink +Turban +Pokémon Yellow +Alaskan Malamute +Monica's Gang +Suzuki Vitara +Black Desert Online +Zara (retailer) +Just Dance 2015 +Maid Sama! +Disguise +Kidney +Water well +Farmer +Toyota RAV4 +Night +DJMax +Richter-tuned harmonica +Real Racing 3 +Solid Snake +United States dollar +F1 2010 (video game) +Samsung Galaxy Ace +Trials Evolution +Cadillac CTS +Daihatsu +Balcony +Xperia Play +Rookie +Timing belt (camshaft) +Monster Energy +Ork (Warhammer 40,000) +Toyota JZ engine +Drive-through +Spektrum RC +Hyundai Sonata +Chinchilla +Wii Sports Resort +Interchange (road) +Whitewater slalom +Ticket (admission) +Bayonetta +Salsa (sauce) +PlayStation All-Stars Battle Royale +Lego Minecraft +??? +Mule +Starbound +Scissors +Asparagus +Sony NEX-5 +Electrical connector +Rayquaza +Eight-ball +Steel-string acoustic guitar +Strap +Times Square +Bus driver +SEAT Ibiza +Converse (shoe company) +Atlantic bluefin tuna +Mercedes-Benz W124 +??? +Goggles +Kawasaki Z1000 +Shrimp and prawn as food +Garnier +Semi-trailer +Cod +Carpet cleaning +Lost Planet +Sonic the Hedgehog CD +Final Fantasy V +F1 2013 (video game) +Modelling clay +Audi Sportback concept +WWE All Stars +Mitsubishi Outlander +Punch-Out!! +Disney Infinity: Marvel Super Heroes +Mulch +Willy Wonka +Dead Space 3 +Eurofighter Typhoon +H1Z1: Just Survive +Fakie +Super Mario RPG +Dance Central 3 +Puppet +Cursor (user interface) +Prince of Persia: Warrior Within +Ultimate Mortal Kombat 3 +Macross +Upholstery +The Binding of Isaac (video game) +Deathstroke +The King of Fighters '98 +Dragon Ball Z: Battle of Z +Theatre organ +Valve Corporation +Age of Conan +GameStop +Unreal Tournament +Metroid Prime +Annie (musical) +Cinderella (Disney character) +Eric Cartman +The Prince of Tennis +Kia Sportage +Vase +Nightwing +Wing +Gouken +Loft +Ferris wheel +Newspaper +Cash +A Certain Magical Index +Pretty Rhythm +Marionette +Swing (seat) +He-Man +Cook (profession) +Bentley Continental GT +Shaman King +Hakuōki +Essential oil +Balalaika +Baja 1000 +Hummingbird +PSA HDi engine +Nissan Sentra +??? +Infamous (video game) +Game Boy Color +343 Industries +Six Flags Magic Mountain +Woozworld +It's a Small World +Star Fox 64 +Xenoblade Chronicles +TurboGrafx-16 +Tesla coil +HTC Evo 4G +Super Metroid +Label +Gothic (video game) +Samsung Galaxy Gear +??? +Viola caipira +Space Engineers +Yamaha MT-09 +Mortal Kombat: Armageddon +Angry Birds Star Wars +Aerography (arts) +Python (genus) +Hyundai Elantra +MG Cars +Tesla Model S +Castlevania: Symphony of the Night +Body armor +Bone +Tekken 5: Dark Resurrection +Kimchi +Wedding invitation +Porsche 930 +Whey protein +Winery +Honda Integra DC5 +Hatter (Alice's Adventures in Wonderland) +Double Dutch (jump rope) +Cort Guitars +One-man band +Dentures +Tupperware +The Lion King (musical) +BlackBerry Z10 +Kingdom Hearts III +Zipper +Leaf +Samsung Galaxy Note 10.1 +Bansuri +BMW 5 Series (F10) +Australian Shepherd +Crash Bandicoot: Warped +Pou (video game) +Tilapia +Peugeot 205 +AC Cobra +Tin whistle +Tooth brushing +Battlefield 1942 +Virginia Tech +Quarry +Amphibious ATV +Dome +Portable stove +Sound system (Jamaican) +Suikoden +Lunar eclipse +Tiramisu +Inazuma Eleven GO (video game) +Nissan 300ZX +Neverwinter (video game) +Axle +Altaïr Ibn-La'Ahad +Radiator +Resident Evil (2002 video game) +Prince of Persia: The Sands of Time +Crop circle +Rhinoceros +??? +Bookcase +Common quail +The Hunger Games +Mercedes-Benz A-Class +Sarah Walker (Chuck) +Cinnamon +Hiru TV +Bread roll +Magician (fantasy) +Lotion +Killzone 3 +Cadillac Escalade +Silhouette +Swan +Lemonade +Trabant +Mojito +Fossil +Macy's +Silk +Puma SE +Nissan Maxima +Battlefield 2142 +Twisted Metal +Olive oil +Wii Remote +Universal Studios Hollywood +Berserk (manga) +Wellington boot +Tomb Raider: Anniversary +Almond +Audi RS 6 +Ladder +Fire Emblem Awakening +Stained glass +Tape recorder +Emerald +Ford Fusion (Americas) +Iguana +Might and Magic +Pluto +Mazda Raceway Laguna Seca +Air Force 1 (shoe) +Pub +Oshun +Honda K engine +Nerd +Renault 5 +F1 2011 (video game) +Windscreen wiper +Lex Luthor +Track racing +Escalator +Charlie Brown +Chauffeur +Soba +Window film +Bowl +Alarm clock +Pokémon Mystery Dungeon: Explorers of Time and Explorers of Darkness +Roomba +Honda Shadow +Lightning Returns: Final Fantasy XIII +LATAM Brasil +Top +American Bulldog +Legoland +Caterpillar +Windows Phone 8 +Automated teller machine +Samsung Galaxy S III Mini +Portrait photography +Office +Para Para +Hockey stick +Singapore Airlines +Volvo S60 +Udon +Chevrolet K5 bazelr +Bath & Body Works +Segway PT +Castlevania: Lords of Shadow +Mario Kart: Double Dash +Mew (Pokémon) +Walkman +Mentos +Jilbāb +Canter and gallop +Cinderella +Skylanders: Trap Team +Lego Duplo +Morgan le Fay +Decal +Handycam +Women's Tennis Association +Yeti +Multi-valve +Pokémon Stadium +Matryoshka doll +Lexus LFA +Keirin +??? +Honda Prelude +Burrito +Midna +Shuriken +New Super Mario Bros. 2 +Nebula +BlackBerry PlayBook +Typography +Hare +Mohawk hairstyle +Onsen +Jet pack +Wagon +Just Dance 3 +Nissan S30 +Noah's Ark +Ronald McDonald +Bombardier Dash 8 +Raspberry +Hair dryer +The Simpsons: Hit & Run +Still life +Ice climbing +Lada Riva +Port +Compound bow +Resident Evil 3: Nemesis +R2-D2 +Sand animation +ABS-CBN (television network) +Leica Camera +Final Fantasy (video game) +Arkham Asylum +Dynasty Warriors 8 +Text messaging +Nursery (room) +Donkey Kong 64 +Star Wars Jedi Knight: Jedi Academy +Typing +Mapex Drums +Granado Espada +Calendar +UFC Undisputed 3 +Airbag +DMC World DJ Championships +Gingerbread +Rayman Origins +Lamborghini Reventón +Trials Fusion +Mafia (video game) +Paso Fino +??? +Sport kite +Taco Bell +Envelope +Mazdaspeed3 +Transformers: Generation 1 +Empanada +Mega Man 3 +Transformers: Fall of Cybertron +Rosalina (character) +Mosquito +Volkswagen Tiguan +Metal Gear Solid V: Ground Zeroes +Marmalade +Pandeiro +Miss Saigon +Yosemite National Park +Dutch Warmblood +Pre-flight safety demonstration +Citroën Saxo +Mack Trucks +Medley swimming +??? +Spindle (tool) +Greek cuisine +Hyundai Santa Fe +Chili con carne +Poster +Kawasaki Ninja 300 +Baby food +Grand Theft Auto (Game Boy Advance) +Sim racing +Chromebook +Peter Griffin +Stainless steel +Beverage can +Pixie cut +Chevrolet SS (concept car) +Chokehold +Bullion +Super Mario Kart +The Sims FreePlay +Giant Bicycles +Sgt. Frog +Age of Empires II +Abadá +Kingdom Hearts HD 1.5 Remix +Blackjack +Canon EOS 60D +Filling station +Plywood +Pheasant +Wilson Sporting Goods +Comb +Lighthouse +Rock and Roll Hall of Fame +Tōshirō Hitsugaya +Tales of the Abyss +Maze +Resident Evil: Operation Raccoon City +Cimbalom +??? +Monkey Island (series) +Civilization V +Venus +Peugeot 207 +The Amazing Spider-Man (2012 video game) +Chrono Cross +New Balance +Dassault Rafale +Daredevil (Marvel Comics character) +Silent Hill 2 +Beanie (seamed cap) +Nut (fruit) +Jill Valentine +Scion tC +Percy Jackson +Lord of the Dance (musical) +Far Cry (video game) +Star Wars: The Force Unleashed II +Memory card +Motorola Droid +Skylanders: Spyro's Adventure +Yamaha DT125 +Audi Q5 +Jaguar +Jaguar XJ +Animal Crossing: Wild World +Cockroach +Wetsuit +Funny Car +FarmVille +The Sims 3: Pets +Peel (fruit) +Melting +Aurora (Disney character) +Dry ice +Star Ocean +Duke Nukem Forever +Toribash +Yamaha YZ250 +Tekken 3 +Orihime Inoue +Spyro: Year of the Dragon +Eight-string guitar +Sonic Riders +Penny (The Big Bang Theory) +Honda XR series +Neodymium magnet toys +Leatherman +Maximum Destruction +Super Mario 64 DS +Unreal Tournament 3 +Health club +Chrysler Hemi engine +The North Face +CBS News +Pentium +Cannon +London Fashion Week +Military tactics +Smallmouth bass +Leopard gecko +Top (clothing) +Fable III +Panasonic Lumix DMC-GH4 +Sikorsky UH-60 Black Hawk +Blue Dragon +Loudspeaker enclosure +Ōkami +Tribal Wars +Hot chocolate +Beetroot +??? +Nokia N97 +Blue Exorcist +??? +Sonic and the Black Knight +Headscarf +Plasma display +Woody Woodpecker +??? +Beyblade: Shogun Steel +29er (bicycle) +QR code +Dyson (company) +Yanmar +Gladiator +Nissan Pathfinder +Nissan X-Trail +Autofocus +King Dedede +Zoo Tycoon 2 +Wheat tortilla +Team Rocket +Classical ballet +New York City Police Department +Heihachi Mishima +Crochet hook +Pencil case +Gods Eater Burst +??? +DS 3 +Periodic table +General Electric +Nissan Juke +Lollipop +Jaguar F-Type +MechWarrior Online +Dodge Neon SRT-4 +Fried egg +Revell +Indoor soccer +Gratin +Punisher +Washburn Guitars +Caster board +Eldar (Warhammer 40,000) +Final Fantasy Type-0 +NBA 2K10 +The Lord of the Rings: The Battle for Middle-earth II +Texas Longhorns +3D television +Scorpion +Warhammer 40,000: Dawn of War II +Burpee (exercise) +The Order: 1886 +Poptropica +Tomb Raider: Legend +Pelmeni +Bánh +PriPara +Legacy of Kain +Bowser Jr. +Yonex +Humanoid robot +Sony Ericsson Xperia X10 +Rain gutter +FIFA Street (2012 video game) +Castle Crashers +Meteoroid +Macaroni and cheese +Sega CD +Mac Mini +Tales of Xillia +Sonic Lost World +Orphanage +Siku Toys +Lego Batman 3: Beyond Gotham +Daenerys Targaryen +Orangutan +Town +Command & Conquer: Generals +Samurai Shodown +ZX Spectrum +Quake Live +Weighing scale +Dead Frontier +Wolfenstein: The New Order +Colin McRae: Dirt +Square dance +Assassin's Creed Rogue +Airboat +Uncharted: Drake's Fortune +Diddy Kong +Yamaha Motif +Theremin +Rilakkuma +Tie-dye +Flip-flops +Cylinder +Gothic 3 +Unreal (1998 video game) +Beyond: Two Souls +Umbrella +Dream Club +Gradius +Nexus One +Nokia N900 +Tamagotchi +Husband +Sleeping bag +Look-alike +Papaya +Mother 3 +The Beatles: Rock Band +Prince of Persia: The Two Thrones +??? +Darth Maul +Knife sharpening +Meteor shower +Flugelhorn +One Piece: Pirate Warriors +Asterix +Talk box +With Your Destiny +Alan Wake +Barcode +Recurve bow +Diaper bag +Ferrari F12berlinetta +Taskbar +Mortar (masonry) +Toner (skin care) +Freddy Krueger +Marriott International +Mass Effect (video game) +Hawkeye (comics) +Killing Floor (video game) +Chibiusa +Screenshot +Pear +Injury +Kia Sorento +Shredder (Teenage Mutant Ninja Turtles) +Lifeguard +Kei car +Fight Night Champion +Terra (comics) +Gamblerz diff --git a/mediapipe/graphs/youtube8m/local_video_model_inference.pbtxt b/mediapipe/graphs/youtube8m/local_video_model_inference.pbtxt new file mode 100644 index 000000000..3b598a534 --- /dev/null +++ b/mediapipe/graphs/youtube8m/local_video_model_inference.pbtxt @@ -0,0 +1,178 @@ +input_side_packet: "input_sequence_example_path" +input_side_packet: "input_video_path" +input_side_packet: "output_video_path" +input_side_packet: "segment_size" +input_side_packet: "overlap" + +node { + calculator: "LocalFileContentsCalculator" + input_side_packet: "FILE_PATH:input_sequence_example_path" + output_side_packet: "CONTENTS:input_sequence_example" +} + +node { + calculator: "StringToSequenceExampleCalculator" + input_side_packet: "STRING:input_sequence_example" + output_side_packet: "SEQUENCE_EXAMPLE:parsed_sequence_example" +} + +node { + calculator: "UnpackMediaSequenceCalculator" + input_side_packet: "SEQUENCE_EXAMPLE:parsed_sequence_example" + output_stream: "FLOAT_FEATURE_RGB:rgb_feature_vector" + output_stream: "FLOAT_FEATURE_AUDIO:audio_feature_vector" +} + +node { + calculator: "ConcatenateFloatVectorCalculator" + input_stream: "rgb_feature_vector" + input_stream: "audio_feature_vector" + output_stream: "feature_vector" +} + +node { + calculator: "VectorFloatToTensorCalculator" + input_stream: "feature_vector" + output_stream: "feature_tensor" +} + +node { + calculator: "StringToInt32Calculator" + input_side_packet: "segment_size" + output_side_packet: "segment_size_int" +} + +node { + calculator: "StringToInt32Calculator" + input_side_packet: "overlap" + output_side_packet: "overlap_int" +} + +node { + calculator: "LappedTensorBufferCalculator" + input_stream: "feature_tensor" + output_stream: "lapped_feature_tensor" + input_side_packet: "BUFFER_SIZE:segment_size_int" + input_side_packet: "OVERLAP:overlap_int" + node_options: { + [type.googleapis.com/mediapipe.LappedTensorBufferCalculatorOptions] { + add_batch_dim_to_tensors: true + } + } +} + +node { + calculator: "SidePacketToStreamCalculator" + input_side_packet: "segment_size_int" + output_stream: "AT_ZERO:segment_size_int_stream" +} + +node { + calculator: "VectorIntToTensorCalculator" + input_stream: "SINGLE_INT:segment_size_int_stream" + output_stream: "TENSOR_OUT:segment_size_tensor" +} + +node { + calculator: "PacketClonerCalculator" + input_stream: "segment_size_tensor" + input_stream: "lapped_feature_tensor" + output_stream: "synced_segment_size_tensor" +} + +node { + calculator: "TensorFlowSessionFromSavedModelCalculator" + output_side_packet: "SESSION:session" + node_options: { + [type.googleapis.com/mediapipe.TensorFlowSessionFromSavedModelCalculatorOptions]: { + saved_model_path: "/tmp/mediapipe/saved_model" + } + } +} + +node: { + calculator: "TensorFlowInferenceCalculator" + input_side_packet: "SESSION:session" + input_stream: "NUM_FRAMES:synced_segment_size_tensor" + input_stream: "RGB_AND_AUDIO:lapped_feature_tensor" + output_stream: "PREDICTIONS:prediction_tensor" + node_options: { + [type.googleapis.com/mediapipe.TensorFlowInferenceCalculatorOptions]: { + batch_size: 32 + } + } +} + +node { + calculator: "TensorToVectorFloatCalculator" + input_stream: "prediction_tensor" + output_stream: "prediction_vector" +} + +node { + calculator: "TopKScoresCalculator" + input_stream: "SCORES:prediction_vector" + output_stream: "TOP_K_INDEXES:top_k_indexes" + output_stream: "TOP_K_SCORES:top_k_scores" + output_stream: "TOP_K_LABELS:top_k_labels" + node_options: { + [type.googleapis.com/mediapipe.TopKScoresCalculatorOptions]: { + top_k: 3 + label_map_path: "mediapipe/graphs/youtube8m/label_map.txt" + } + } +} + +node { + calculator: "OpenCvVideoDecoderCalculator" + input_side_packet: "INPUT_FILE_PATH:input_video_path" + output_stream: "VIDEO:input_video" + output_stream: "VIDEO_PRESTREAM:input_video_header" +} + +node { + calculator: "LabelsToRenderDataCalculator" + input_stream: "LABELS:top_k_labels" + input_stream: "SCORES:top_k_scores" + input_stream: "VIDEO_PRESTREAM:input_video_header" + output_stream: "RENDER_DATA:render_data" + node_options: { + [type.googleapis.com/mediapipe.LabelsToRenderDataCalculatorOptions]: { + color { r: 255 g: 0 b: 0 } + color { r: 0 g: 255 b: 0 } + color { r: 0 g: 0 b: 255 } + thickness: 2.0 + font_height_px: 20 + max_num_labels: 3 + location: TOP_LEFT + } + } +} + +node { + calculator: "PacketClonerCalculator" + input_stream: "render_data" + input_stream: "input_video" + output_stream: "synchronized_render_data" +} + +node { + calculator: "AnnotationOverlayCalculator" + input_stream: "INPUT_FRAME:input_video" + input_stream: "synchronized_render_data" + output_stream: "OUTPUT_FRAME:output_video" +} + +node { + calculator: "OpenCvVideoEncoderCalculator" + input_stream: "VIDEO:output_video" + input_stream: "VIDEO_PRESTREAM:input_video_header" + input_side_packet: "OUTPUT_FILE_PATH:output_video_path" + node_options: { + [type.googleapis.com/mediapipe.OpenCvVideoEncoderCalculatorOptions]: { + codec: "avc1" + video_format: "mp4" + } + } +} + diff --git a/mediapipe/graphs/youtube8m/yt8m_dataset_model_inference.pbtxt b/mediapipe/graphs/youtube8m/yt8m_dataset_model_inference.pbtxt new file mode 100644 index 000000000..38a02570b --- /dev/null +++ b/mediapipe/graphs/youtube8m/yt8m_dataset_model_inference.pbtxt @@ -0,0 +1,139 @@ +input_side_packet: "desired_segment_size" +input_side_packet: "record_index" +input_side_packet: "tfrecord_path" +output_side_packet: "yt8m_id" +output_stream: "annotation_summary" + +node { + calculator: "StringToInt32Calculator" + input_side_packet: "record_index" + output_side_packet: "record_index_int" +} + +node { + calculator: "StringToInt32Calculator" + input_side_packet: "desired_segment_size" + output_side_packet: "desired_segment_size_int" +} + +node { + calculator: "TFRecordReaderCalculator" + input_side_packet: "TFRECORD_PATH:tfrecord_path" + input_side_packet: "RECORD_INDEX:record_index_int" + output_side_packet: "SEQUENCE_EXAMPLE:yt8m_sequence_example" +} + +node { + calculator: "UnpackYt8mSequenceExampleCalculator" + input_side_packet: "YT8M_SEQUENCE_EXAMPLE:yt8m_sequence_example" + input_side_packet: "DESIRED_SEGMENT_SIZE:desired_segment_size_int" + output_side_packet: "YT8M_ID:yt8m_id" + output_side_packet: "SEGMENT_SIZE:segment_size" + output_side_packet: "LAPPED_TENSOR_BUFFER_CALCULATOR_OPTIONS:lapped_tensor_buffer_calculator_options" + output_stream: "QUANTIZED_RGB_FEATURE:quantized_rgb_feature" + output_stream: "QUANTIZED_AUDIO_FEATURE:quantized_audio_feature" +} + +node { + calculator: "DequantizeByteArrayCalculator" + input_stream: "ENCODED:quantized_rgb_feature" + output_stream: "FLOAT_VECTOR:rgb_feature_vector" + node_options: { + [type.googleapis.com/mediapipe.DequantizeByteArrayCalculatorOptions]: { + max_quantized_value: 2 + min_quantized_value: -2 + } + } +} + +node { + calculator: "DequantizeByteArrayCalculator" + input_stream: "ENCODED:quantized_audio_feature" + output_stream: "FLOAT_VECTOR:audio_feature_vector" + node_options: { + [type.googleapis.com/mediapipe.DequantizeByteArrayCalculatorOptions]: { + max_quantized_value: 2 + min_quantized_value: -2 + } + } +} + +node { + calculator: "ConcatenateFloatVectorCalculator" + input_stream: "rgb_feature_vector" + input_stream: "audio_feature_vector" + output_stream: "feature_vector" +} + +node { + calculator: "VectorFloatToTensorCalculator" + input_stream: "feature_vector" + output_stream: "feature_tensor" +} + +node { + calculator: "LappedTensorBufferCalculator" + input_stream: "feature_tensor" + input_side_packet: "CALCULATOR_OPTIONS:lapped_tensor_buffer_calculator_options" + output_stream: "lapped_feature_tensor" +} + +node { + calculator: "SidePacketToStreamCalculator" + input_side_packet: "segment_size" + output_stream: "AT_ZERO:segment_size_int_stream" +} + +node { + calculator: "VectorIntToTensorCalculator" + input_stream: "SINGLE_INT:segment_size_int_stream" + output_stream: "TENSOR_OUT:segment_size_tensor" +} + +node { + calculator: "PacketClonerCalculator" + input_stream: "segment_size_tensor" + input_stream: "lapped_feature_tensor" + output_stream: "synced_segment_size_tensor" +} + +node { + calculator: "TensorFlowSessionFromSavedModelCalculator" + output_side_packet: "SESSION:session" + node_options: { + [type.googleapis.com/mediapipe.TensorFlowSessionFromSavedModelCalculatorOptions]: { + saved_model_path: "/tmp/mediapipe/saved_model" + } + } +} + +node: { + calculator: "TensorFlowInferenceCalculator" + input_side_packet: "SESSION:session" + input_stream: "NUM_FRAMES:synced_segment_size_tensor" + input_stream: "RGB_AND_AUDIO:lapped_feature_tensor" + output_stream: "PREDICTIONS:prediction_tensor" + node_options: { + [type.googleapis.com/mediapipe.TensorFlowInferenceCalculatorOptions]: { + batch_size: 32 + } + } +} + +node { + calculator: "TensorToVectorFloatCalculator" + input_stream: "prediction_tensor" + output_stream: "prediction_vector" +} + +node { + calculator: "TopKScoresCalculator" + input_stream: "SCORES:prediction_vector" + output_stream: "SUMMARY:annotation_summary" + node_options: { + [type.googleapis.com/mediapipe.TopKScoresCalculatorOptions]: { + top_k: 9 + label_map_path: "mediapipe/graphs/youtube8m/label_map.txt" + } + } +} diff --git a/mediapipe/calculators/video/testdata/BUILD b/mediapipe/java/com/google/mediapipe/BUILD similarity index 67% rename from mediapipe/calculators/video/testdata/BUILD rename to mediapipe/java/com/google/mediapipe/BUILD index cd7c3d57c..82e2f52c2 100644 --- a/mediapipe/calculators/video/testdata/BUILD +++ b/mediapipe/java/com/google/mediapipe/BUILD @@ -4,23 +4,12 @@ # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # -# http://www.apache.org/licenses/LICENSE-2.0 +# http:#www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# licenses(["notice"]) # Apache 2.0 - -filegroup( - name = "test_videos", - srcs = [ - "format_FLV_H264_AAC.video", - "format_MKV_VP8_VORBIS.video", - "format_MP4_AVC720P_AAC.video", - ], - visibility = ["//visibility:public"], -) diff --git a/mediapipe/java/com/google/mediapipe/components/BUILD b/mediapipe/java/com/google/mediapipe/components/BUILD index 80b65e3d4..7fd808387 100644 --- a/mediapipe/java/com/google/mediapipe/components/BUILD +++ b/mediapipe/java/com/google/mediapipe/components/BUILD @@ -68,3 +68,10 @@ android_library( "@com_google_guava_android//jar", ], ) + +# Expose the java source files for building mediapipe AAR. +filegroup( + name = "java_src", + srcs = glob(["*.java"]), + visibility = ["//mediapipe:__subpackages__"], +) diff --git a/mediapipe/java/com/google/mediapipe/components/ExternalTextureConverter.java b/mediapipe/java/com/google/mediapipe/components/ExternalTextureConverter.java index 122f598ea..0d34e23e3 100644 --- a/mediapipe/java/com/google/mediapipe/components/ExternalTextureConverter.java +++ b/mediapipe/java/com/google/mediapipe/components/ExternalTextureConverter.java @@ -150,6 +150,7 @@ public class ExternalTextureConverter implements TextureFrameProducer { private ExternalTextureRenderer renderer = null; private long timestampOffset = 0; private long previousTimestamp = 0; + private boolean previousTimestampValid = false; protected int destinationWidth = 0; protected int destinationHeight = 0; @@ -335,11 +336,12 @@ public class ExternalTextureConverter implements TextureFrameProducer { // ensures that surface texture has the up-to-date timestamp. (Also adjust |timestampOffset| // to ensure that timestamps increase monotonically.) long textureTimestamp = surfaceTexture.getTimestamp() / NANOS_PER_MICRO; - if (textureTimestamp + timestampOffset <= previousTimestamp) { + if (previousTimestampValid && textureTimestamp + timestampOffset <= previousTimestamp) { timestampOffset = previousTimestamp + 1 - textureTimestamp; } outputFrame.setTimestamp(textureTimestamp + timestampOffset); previousTimestamp = outputFrame.getTimestamp(); + previousTimestampValid = true; } private void waitUntilReleased(AppTextureFrame frame) { diff --git a/mediapipe/java/com/google/mediapipe/framework/BUILD b/mediapipe/java/com/google/mediapipe/framework/BUILD index e6ad76ed9..5e582ebff 100644 --- a/mediapipe/java/com/google/mediapipe/framework/BUILD +++ b/mediapipe/java/com/google/mediapipe/framework/BUILD @@ -82,3 +82,10 @@ android_library( "@com_google_guava_android//jar", ], ) + +# Expose the java source files for building mediapipe AAR. +filegroup( + name = "java_src", + srcs = glob(["*.java"]), + visibility = ["//mediapipe:__subpackages__"], +) diff --git a/mediapipe/java/com/google/mediapipe/glutil/BUILD b/mediapipe/java/com/google/mediapipe/glutil/BUILD index fc378b4eb..4ad0d16d9 100644 --- a/mediapipe/java/com/google/mediapipe/glutil/BUILD +++ b/mediapipe/java/com/google/mediapipe/glutil/BUILD @@ -30,3 +30,10 @@ android_library( "@com_google_guava_android//jar", ], ) + +# Expose the java source files for building mediapipe AAR. +filegroup( + name = "java_src", + srcs = glob(["**/*.java"]), + visibility = ["//mediapipe:__subpackages__"], +) diff --git a/mediapipe/java/com/google/mediapipe/mediapipe_aar.bzl b/mediapipe/java/com/google/mediapipe/mediapipe_aar.bzl new file mode 100644 index 000000000..eaf4612cf --- /dev/null +++ b/mediapipe/java/com/google/mediapipe/mediapipe_aar.bzl @@ -0,0 +1,157 @@ +"""Generate MediaPipe AAR including different variants of .so in jni folder. + +Usage: + +Create a new mediapipe_aar() target in a BUILD file. For example, +putting the following code into mediapipe/examples/android/aar_demo/BUILD. + +``` +load("//mediapipe/java/com/google/mediapipe:mediapipe_aar.bzl", "mediapipe_aar") + +mediapipe_aar( + name = "my_aar", + calculators = ["//mediapipe/calculators/core:pass_through_calculator"], +) +``` + +Then, run the following Bazel command to generate the AAR. + +``` +$ bazel build -c opt --fat_apk_cpu=arm64-v8a,armeabi-v7a mediapipe/examples/android/aar_demo:my_aar +``` + +Finally, import the AAR into Android Studio. + +""" + +load("@build_bazel_rules_android//android:rules.bzl", "android_binary", "android_library") + +def mediapipe_aar(name, calculators = []): + """Generate MediaPipe AAR. + + Args: + name: the name of the AAR. + calculators: the calculator libraries to be compiled into the .so. + """ + native.cc_binary( + name = "libmediapipe_jni.so", + linkshared = 1, + linkstatic = 1, + deps = [ + "//mediapipe/java/com/google/mediapipe/framework/jni:mediapipe_framework_jni", + ] + calculators, + ) + + native.cc_library( + name = name + "_mediapipe_jni_lib", + srcs = [":libmediapipe_jni.so"], + alwayslink = 1, + ) + + native.genrule( + name = name + "_aar_manifest_generator", + outs = ["AndroidManifest.xml"], + cmd = """ +cat > $(OUTS) < + + + + +""", + ) + + native.genrule( + name = name + "_calculator_proto_java_src_generator", + srcs = [ + "//mediapipe/framework:protos_src", + "@com_google_protobuf_javalite//:well_known_protos", + ], + outs = ["CalculatorProto.java"], + cmd = "$(location @com_google_protobuf_javalite//:protoc) " + + "--plugin=protoc-gen-javalite=$(location @com_google_protobuf_javalite//:protoc_gen_javalite) " + + "--proto_path=. --proto_path=$(GENDIR) " + + "--proto_path=$$(pwd)/external/com_google_protobuf_javalite/src " + + "--javalite_out=$$(dirname $(location CalculatorProto.java)) mediapipe/framework/calculator.proto && " + + "mv $$(dirname $(location CalculatorProto.java))/com/google/mediapipe/proto/CalculatorProto.java $$(dirname $(location CalculatorProto.java))", + tools = [ + "@com_google_protobuf_javalite//:protoc", + "@com_google_protobuf_javalite//:protoc_gen_javalite", + ], + ) + + android_library( + name = name + "_android_lib", + srcs = [ + "//mediapipe/java/com/google/mediapipe/components:java_src", + "//mediapipe/java/com/google/mediapipe/framework:java_src", + "//mediapipe/java/com/google/mediapipe/glutil:java_src", + "CalculatorProto.java", + ], + manifest = "AndroidManifest.xml", + proguard_specs = ["//mediapipe/java/com/google/mediapipe/framework:proguard.pgcfg"], + deps = [ + ":" + name + "_mediapipe_jni_lib", + "//mediapipe/framework:calculator_java_proto_lite", + "//mediapipe/framework:calculator_profile_java_proto_lite", + "//mediapipe/framework/tool:calculator_graph_template_java_proto_lite", + "//third_party:androidx_annotation", + "//third_party:androidx_appcompat", + "//third_party:androidx_core", + "//third_party:androidx_legacy_support_v4", + "//third_party:camerax_core", + "//third_party:camera2", + "@com_google_code_findbugs//jar", + "@com_google_common_flogger//jar", + "@com_google_common_flogger_system_backend//jar", + "@com_google_guava_android//jar", + "@androidx_lifecycle//jar", + ], + ) + + _aar_with_jni(name, name + "_android_lib") + +def _aar_with_jni(name, android_library): + # Generate dummy AndroidManifest.xml for dummy apk usage + # (dummy apk is generated by _dummy_app target below) + native.genrule( + name = name + "_binary_manifest_generator", + outs = [name + "_generated_AndroidManifest.xml"], + cmd = """ +cat > $(OUTS) < + + +EOF +""", + ) + + # Generate dummy apk including .so files. + # We extract out .so files and throw away the apk. + android_binary( + name = name + "_dummy_app", + manifest = name + "_generated_AndroidManifest.xml", + custom_package = "dummy.package.for.so", + deps = [android_library], + ) + + native.genrule( + name = name, + srcs = [android_library + ".aar", name + "_dummy_app_unsigned.apk"], + outs = [name + ".aar"], + tags = ["manual"], + cmd = """ +cp $(location {}.aar) $(location :{}.aar) +chmod +w $(location :{}.aar) +origdir=$$PWD +cd $$(mktemp -d) +unzip $$origdir/$(location :{}_dummy_app_unsigned.apk) "lib/*" +cp -r lib jni +zip -r $$origdir/$(location :{}.aar) jni/*/*.so +""".format(android_library, name, name, name, name), + ) diff --git a/mediapipe/util/sequence/BUILD b/mediapipe/util/sequence/BUILD index 82f9e8c98..1e07d0ca4 100644 --- a/mediapipe/util/sequence/BUILD +++ b/mediapipe/util/sequence/BUILD @@ -28,7 +28,7 @@ cc_library( "//mediapipe/framework/port:core_proto", "//mediapipe/framework/port:integral_types", "//mediapipe/framework/port:logging", - "@org_tensorflow//tensorflow/core:protos_all_cc", + "@org_tensorflow//tensorflow/core:protos_all", ], ) @@ -50,7 +50,7 @@ cc_library( "//mediapipe/framework/port:status", "@com_google_absl//absl/memory", "@com_google_absl//absl/strings", - "@org_tensorflow//tensorflow/core:protos_all_cc", + "@org_tensorflow//tensorflow/core:protos_all", ], ) @@ -61,7 +61,7 @@ cc_test( ":media_sequence_util", "//mediapipe/framework/port:gtest_main", "//mediapipe/framework/port:parse_text_proto", - "@org_tensorflow//tensorflow/core:protos_all_cc", + "@org_tensorflow//tensorflow/core:protos_all", ], ) @@ -74,6 +74,6 @@ cc_test( "//mediapipe/framework/port:gtest_main", "//mediapipe/framework/port:opencv_imgcodecs", "//mediapipe/framework/port:status", - "@org_tensorflow//tensorflow/core:protos_all_cc", + "@org_tensorflow//tensorflow/core:protos_all", ], ) diff --git a/mediapipe/util/sequence/README.md b/mediapipe/util/sequence/README.md index 244ba82ed..18b795618 100644 --- a/mediapipe/util/sequence/README.md +++ b/mediapipe/util/sequence/README.md @@ -466,6 +466,7 @@ tasks and tracking (or class) fields for tracking information. |-----|------|------------------------|-------------| |`CLASS_SEGMENTATION/image/encoded`|feature list bytes|`add_class_segmentation_encoded` / `AddClassSegmentationEncoded`|The encoded image of class labels at each timestep.| |`CLASS_SEGMENTATION/image/timestamp`|feature list int|`add_class_segmentation_timestamp` / `AddClassSegmentationTimestamp`|The timestamp in microseconds for the class labels.| +|`CLASS_SEGMENTATION/image/multi_encoded`|feature list bytes list|`add_class_segmentation_multi_encoded` / `AddClassSegmentationMultiEncoded`|Storing multiple segmentation masks in case they overlap.| |`CLASS_SEGMENTATION/image/format`|context bytes|`set_class_segmentation_format` / `SetClassSegmentationFormat`|The encoding format of the class label images.| |`CLASS_SEGMENTATION/image/height`|context int|`set_class_segmentation_height` / `SetClassSegmentationHeight`|The height of the image in pixels.| |`CLASS_SEGMENTATION/image/width`|context int|`set_class_segmentation_width` / `SetClassSegmentationWidth`|The width of the image in pixels.| @@ -477,6 +478,7 @@ tasks and tracking (or class) fields for tracking information. |-----|------|------------------------|-------------| |`INSTANCE_SEGMENTATION/image/ encoded`|feature list bytes|`add_instance_segmentation_encoded` / `AddInstanceSegmentationEncoded`|The encoded image of object instance labels at each timestep.| |`INSTANCE_SEGMENTATION/image/ timestamp`|feature list int|`add_instance_segmentation_timestamp` / `AddInstanceSegmentationTimestamp`|The timestamp in microseconds for the object instance labels.| +|`INSTANCE_SEGMENTATION/image/multi_encoded`|feature list bytes list|`add_instance_segmentation_multi_encoded` / `AddInstanceSegmentationEncoded`|Storing multiple segmentation masks in case they overlap.| |`INSTANCE_SEGMENTATION/image/ format`|context bytes|`set_instance_segmentation_format` / `SetInstanceSegmentationFormat`|The encoding format of the object instance labels.| |`INSTANCE_SEGMENTATION/image/ height`|context int|`set_instance_segmentation_height` / `SetInstanceSegmentationHeight`|The height of the image in pixels.| |`INSTANCE_SEGMENTATION/image/ width`|context int|`set_instance_segmentation_width` / `SetInstanceSegmentationWidth`|The width of the image in pixels.| diff --git a/mediapipe/util/sequence/media_sequence.py b/mediapipe/util/sequence/media_sequence.py index 3191cffef..fc1f15d32 100644 --- a/mediapipe/util/sequence/media_sequence.py +++ b/mediapipe/util/sequence/media_sequence.py @@ -489,7 +489,9 @@ def _create_image_with_prefix(name, prefix): prefix=prefix, module_dict=globals()) msu.create_int_feature_list(name + "_timestamp", IMAGE_TIMESTAMP_KEY, prefix=prefix, module_dict=globals()) - + msu.create_bytes_list_feature_list(name + "_multi_encoded", + IMAGE_MULTI_ENCODED_KEY, prefix=prefix, + module_dict=globals()) FORWARD_FLOW_PREFIX = "FORWARD_FLOW" CLASS_SEGMENTATION_PREFIX = "CLASS_SEGMENTATION" INSTANCE_SEGMENTATION_PREFIX = "INSTANCE_SEGMENTATION" diff --git a/mediapipe/util/sequence/media_sequence_test.py b/mediapipe/util/sequence/media_sequence_test.py index 6c4846c4b..3a634c486 100644 --- a/mediapipe/util/sequence/media_sequence_test.py +++ b/mediapipe/util/sequence/media_sequence_test.py @@ -78,8 +78,10 @@ class MediaSequenceTest(tf.test.TestCase): ms.set_bbox_parts((b"HEAD", b"TOE"), example) # feature lists ms.add_image_encoded(b"test", example) + ms.add_image_multi_encoded([b"test", b"test"], example) ms.add_image_timestamp(47, example) ms.add_forward_flow_encoded(b"test", example) + ms.add_forward_flow_multi_encoded([b"test", b"test"], example) ms.add_forward_flow_timestamp(47, example) ms.add_bbox_ymin((0.47, 0.49), example) ms.add_bbox_xmin((0.47, 0.49), example) @@ -109,7 +111,9 @@ class MediaSequenceTest(tf.test.TestCase): ms.add_predicted_bbox_class_string((b"test", b"strings"), example) ms.add_predicted_bbox_timestamp(47, example) ms.add_class_segmentation_encoded(b"test", example) + ms.add_class_segmentation_multi_encoded([b"test", b"test"], example) ms.add_instance_segmentation_encoded(b"test", example) + ms.add_instance_segmentation_multi_encoded([b"test", b"test"], example) ms.add_class_segmentation_timestamp(47, example) ms.set_bbox_embedding_dimensions_per_region((47, 49), example) ms.set_bbox_embedding_format(b"test", example) diff --git a/third_party/opencv_linux.BUILD b/third_party/opencv_linux.BUILD index 3e57c5c85..ef2aeb49c 100644 --- a/third_party/opencv_linux.BUILD +++ b/third_party/opencv_linux.BUILD @@ -14,6 +14,8 @@ cc_library( srcs = glob( [ "lib/x86_64-linux-gnu/libopencv_core.so", + "lib/x86_64-linux-gnu/libopencv_calib3d.so", + "lib/x86_64-linux-gnu/libopencv_features2d.so", "lib/x86_64-linux-gnu/libopencv_highgui.so", "lib/x86_64-linux-gnu/libopencv_imgcodecs.so", "lib/x86_64-linux-gnu/libopencv_imgproc.so", diff --git a/third_party/opencv_macos.BUILD b/third_party/opencv_macos.BUILD index 275f31ec0..be1733e04 100644 --- a/third_party/opencv_macos.BUILD +++ b/third_party/opencv_macos.BUILD @@ -13,6 +13,8 @@ cc_library( srcs = glob( [ "local/opt/opencv@3/lib/libopencv_core.dylib", + "local/opt/opencv@3/lib/libopencv_calib3d.dylib", + "local/opt/opencv@3/lib/libopencv_features2d.dylib", "local/opt/opencv@3/lib/libopencv_highgui.dylib", "local/opt/opencv@3/lib/libopencv_imgcodecs.dylib", "local/opt/opencv@3/lib/libopencv_imgproc.dylib", diff --git a/third_party/rules_apple_c0863d0596ae6b769a29fa3fb72ff036444fd249.diff b/third_party/rules_apple_c0863d0596ae6b769a29fa3fb72ff036444fd249.diff deleted file mode 100644 index 736292cfa..000000000 --- a/third_party/rules_apple_c0863d0596ae6b769a29fa3fb72ff036444fd249.diff +++ /dev/null @@ -1,25 +0,0 @@ -commit c0863d0596ae6b769a29fa3fb72ff036444fd249 (HEAD -> py3) -Author: Camillo Lugaresi -Date: Fri Aug 16 00:13:16 2019 -0700 - - Fix codesigningtool.py py3 compatibility. - - In recent versions of plistlib, binary data entries are returned as instances of the built-in bytes class, and plistlib.Data is deprecated. - Since this script was expecting a plistlib.Data, it would fail with the error "AttributeError: 'bytes' object has no attribute 'data'". - This change makes it compatible with both new and old versions of plistlib. - -diff --git a/tools/codesigningtool/codesigningtool.py b/tools/codesigningtool/codesigningtool.py -index 59f3841..40cdcf3 100644 ---- a/tools/codesigningtool/codesigningtool.py -+++ b/tools/codesigningtool/codesigningtool.py -@@ -102,7 +102,9 @@ def _certificate_fingerprint(identity): - def _get_identities_from_provisioning_profile(mpf): - """Iterates through all the identities in a provisioning profile, lazily.""" - for identity in mpf["DeveloperCertificates"]: -- yield _certificate_fingerprint(identity.data) -+ if not _PY3: -+ identity = identity.data -+ yield _certificate_fingerprint(identity) - - - def _find_codesign_identities(identity=None): diff --git a/third_party/tensorflow_065c20bf79253257c87bd4614bb9a7fdef015cbb.diff b/third_party/tensorflow_065c20bf79253257c87bd4614bb9a7fdef015cbb.diff deleted file mode 100644 index 16f9c3265..000000000 --- a/third_party/tensorflow_065c20bf79253257c87bd4614bb9a7fdef015cbb.diff +++ /dev/null @@ -1,22 +0,0 @@ -commit 065c20bf79253257c87bd4614bb9a7fdef015cbb -Author: Camillo Lugaresi -Date: Thu Aug 15 18:34:41 2019 -0700 - - Use python3 if available to run gen_git_source.py. - - gen_git_source.py fails with an "ImportError: No module named builtins" on a default installation of Python 2 (at least, the one that comes with macOS). This can be worked around by installing the "future" package from pip. However, instead of requiring users to go through this extra step, we can simply run the script using Python 3 if it's installed. The script works on a default installation of Python 3, without requiring extra packages. - -diff --git a/third_party/git/git_configure.bzl b/third_party/git/git_configure.bzl -index fc18fdb988..3ce64242af 100644 ---- a/third_party/git/git_configure.bzl -+++ b/third_party/git/git_configure.bzl -@@ -18,6 +18,9 @@ def _get_python_bin(repository_ctx): - python_bin = repository_ctx.os.environ.get(_PYTHON_BIN_PATH) - if python_bin != None: - return python_bin -+ python_bin_path = repository_ctx.which("python3") -+ if python_bin_path != None: -+ return str(python_bin_path) - python_bin_path = repository_ctx.which("python") - if python_bin_path != None: - return str(python_bin_path) diff --git a/third_party/tensorflow_f67fcbefce906cd419e4657f0d41e21019b71abd.diff b/third_party/tensorflow_f67fcbefce906cd419e4657f0d41e21019b71abd.diff deleted file mode 100644 index 080e4dc12..000000000 --- a/third_party/tensorflow_f67fcbefce906cd419e4657f0d41e21019b71abd.diff +++ /dev/null @@ -1,24 +0,0 @@ -commit f67fcbefce906cd419e4657f0d41e21019b71abd (HEAD -> formediapipe) -Author: Camillo Lugaresi -Date: Fri Aug 16 12:24:58 2019 -0700 - - elementwise requires C++14 - - This file fails to compile when using C++11, which is the default. This can be worked around by passing --cxxopt='-std=c++14' as a global build option to Bazel, but it is more convenient for users if we just configure this cc_library to be built with C++14 by default. - - The authors may also want to change it to be compatible with C++11, but that's out of scope for this change. - -diff --git a/tensorflow/lite/delegates/gpu/metal/kernels/BUILD b/tensorflow/lite/delegates/gpu/metal/kernels/BUILD -index 17e59e70eb..4302a1f644 100644 ---- a/tensorflow/lite/delegates/gpu/metal/kernels/BUILD -+++ b/tensorflow/lite/delegates/gpu/metal/kernels/BUILD -@@ -197,6 +197,9 @@ cc_library( - name = "elementwise", - srcs = ["elementwise.cc"], - hdrs = ["elementwise.h"], -+ copts = [ -+ "-std=c++14", -+ ], - deps = [ - "//tensorflow/lite/delegates/gpu/common:model", - "//tensorflow/lite/delegates/gpu/common:operations",