diff --git a/README.md b/README.md index 97d727466..56cd7bac0 100644 --- a/README.md +++ b/README.md @@ -37,10 +37,15 @@ A web-based visualizer is hosted on [viz.mediapipe.dev](https://viz.mediapipe.de * [Discuss](https://groups.google.com/forum/#!forum/mediapipe) - General community discussion around MediaPipe ## Publications +* [On-Device, Real-Time Hand Tracking with MediaPipe](https://ai.googleblog.com/2019/08/on-device-real-time-hand-tracking-with.html) * [MediaPipe: A Framework for Building Perception Pipelines](https://arxiv.org/abs/1906.08172) ## Events -[Open sourced at CVPR 2019](https://sites.google.com/corp/view/perception-cv4arvr/mediapipe) on June 17~20 in Long Beach, CA +* [ML Conference, Berlin 9-11 Dec 2019](https://mlconference.ai/machine-learning-advanced-development/mediapipe-building-real-time-cross-platform-mobile-web-edge-desktop-video-audio-ml-pipelines/) +* [The 3rd Workshop on YouTube-8M Large Scale Video Understanding Workshop](https://research.google.com/youtube8m/workshop2019/index.html) Seoul, Korea ICCV 2019 +* [AI DevWorld 2019](https://aidevworld.com) on Oct 10 in San Jose, California +* [Google Industry Workshop at ICIP 2019](http://2019.ieeeicip.org/?action=page4&id=14#Google) [Presentation](https://docs.google.com/presentation/d/e/2PACX-1vRIBBbO_LO9v2YmvbHHEt1cwyqH6EjDxiILjuT0foXy1E7g6uyh4CesB2DkkEwlRDO9_lWfuKMZx98T/pub?start=false&loop=false&delayms=3000&slide=id.g556cc1a659_0_5) on Sept 24 in Taipei, Taiwan +* [Open sourced at CVPR 2019](https://sites.google.com/corp/view/perception-cv4arvr/mediapipe) on June 17~20 in Long Beach, CA ## Alpha Disclaimer MediaPipe is currently in alpha for v0.6. We are still making breaking API changes and expect to get to stable API by v1.0. diff --git a/WORKSPACE b/WORKSPACE index 0aee35c67..f568e8e99 100644 --- a/WORKSPACE +++ b/WORKSPACE @@ -10,7 +10,8 @@ http_archive( sha256 = "2ef429f5d7ce7111263289644d233707dba35e39696377ebab8b0bc701f7818e", ) load("@bazel_skylib//lib:versions.bzl", "versions") -versions.check(minimum_bazel_version = "0.24.1") +versions.check(minimum_bazel_version = "0.24.1", + maximum_bazel_version = "0.29.1") # ABSL cpp library. http_archive( diff --git a/mediapipe/calculators/core/BUILD b/mediapipe/calculators/core/BUILD index 80205f90e..4abb4c4b2 100644 --- a/mediapipe/calculators/core/BUILD +++ b/mediapipe/calculators/core/BUILD @@ -26,6 +26,13 @@ proto_library( deps = ["//mediapipe/framework:calculator_proto"], ) +proto_library( + name = "dequantize_byte_array_calculator_proto", + srcs = ["dequantize_byte_array_calculator.proto"], + visibility = ["//visibility:public"], + deps = ["//mediapipe/framework:calculator_proto"], +) + proto_library( name = "packet_cloner_calculator_proto", srcs = ["packet_cloner_calculator.proto"], @@ -104,6 +111,14 @@ mediapipe_cc_proto_library( deps = [":concatenate_vector_calculator_proto"], ) +mediapipe_cc_proto_library( + name = "dequantize_byte_array_calculator_cc_proto", + srcs = ["dequantize_byte_array_calculator.proto"], + cc_deps = ["//mediapipe/framework:calculator_cc_proto"], + visibility = ["//visibility:public"], + deps = [":dequantize_byte_array_calculator_proto"], +) + mediapipe_cc_proto_library( name = "quantize_float_vector_calculator_cc_proto", srcs = ["quantize_float_vector_calculator.proto"], @@ -387,6 +402,32 @@ cc_library( alwayslink = 1, ) +cc_library( + name = "string_to_int_calculator", + srcs = ["string_to_int_calculator.cc"], + visibility = ["//visibility:public"], + deps = [ + "//mediapipe/framework:calculator_framework", + "//mediapipe/framework/port:integral_types", + "//mediapipe/framework/port:status", + "@com_google_absl//absl/strings", + ], + alwayslink = 1, +) + +cc_library( + name = "side_packet_to_stream_calculator", + srcs = ["side_packet_to_stream_calculator.cc"], + visibility = ["//visibility:public"], + deps = [ + "//mediapipe/framework:calculator_framework", + "//mediapipe/framework/port:logging", + "//mediapipe/framework/port:ret_check", + "//mediapipe/framework/port:status", + ], + alwayslink = 1, +) + cc_test( name = "immediate_mux_calculator_test", srcs = ["immediate_mux_calculator_test.cc"], @@ -558,6 +599,32 @@ cc_test( ], ) +cc_library( + name = "dequantize_byte_array_calculator", + srcs = ["dequantize_byte_array_calculator.cc"], + visibility = ["//visibility:public"], + deps = [ + ":dequantize_byte_array_calculator_cc_proto", + "//mediapipe/framework:calculator_context", + "//mediapipe/framework:calculator_framework", + "//mediapipe/framework/port:status", + ], + alwayslink = 1, +) + +cc_test( + name = "dequantize_byte_array_calculator_test", + srcs = ["dequantize_byte_array_calculator_test.cc"], + deps = [ + ":dequantize_byte_array_calculator", + "//mediapipe/framework:calculator_framework", + "//mediapipe/framework:calculator_runner", + "//mediapipe/framework/port:gtest_main", + "//mediapipe/framework/port:parse_text_proto", + "//mediapipe/framework/port:status", + ], +) + cc_library( name = "quantize_float_vector_calculator", srcs = ["quantize_float_vector_calculator.cc"], diff --git a/mediapipe/calculators/core/dequantize_byte_array_calculator.cc b/mediapipe/calculators/core/dequantize_byte_array_calculator.cc new file mode 100644 index 000000000..4f1a3ed86 --- /dev/null +++ b/mediapipe/calculators/core/dequantize_byte_array_calculator.cc @@ -0,0 +1,90 @@ +// Copyright 2019 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include + +#include "mediapipe/calculators/core/dequantize_byte_array_calculator.pb.h" +#include "mediapipe/framework/calculator_framework.h" +#include "mediapipe/framework/port/canonical_errors.h" +#include "mediapipe/framework/port/status.h" + +// Dequantizes a byte array to a vector of floats. +// +// Example config: +// node { +// calculator: "DequantizeByteArrayCalculator" +// input_stream: "ENCODED:encoded" +// output_stream: "FLOAT_VECTOR:float_vector" +// options { +// [mediapipe.DequantizeByteArrayCalculatorOptions.ext]: { +// max_quantized_value: 2 +// min_quantized_value: -2 +// } +// } +// } +namespace mediapipe { + +class DequantizeByteArrayCalculator : public CalculatorBase { + public: + static ::mediapipe::Status GetContract(CalculatorContract* cc) { + cc->Inputs().Tag("ENCODED").Set(); + cc->Outputs().Tag("FLOAT_VECTOR").Set>(); + return ::mediapipe::OkStatus(); + } + + ::mediapipe::Status Open(CalculatorContext* cc) final { + const auto options = + cc->Options<::mediapipe::DequantizeByteArrayCalculatorOptions>(); + if (!options.has_max_quantized_value() || + !options.has_min_quantized_value()) { + return ::mediapipe::InvalidArgumentError( + "Both max_quantized_value and min_quantized_value must be provided " + "in DequantizeByteArrayCalculatorOptions."); + } + float max_quantized_value = options.max_quantized_value(); + float min_quantized_value = options.min_quantized_value(); + if (max_quantized_value < min_quantized_value + FLT_EPSILON) { + return ::mediapipe::InvalidArgumentError( + "max_quantized_value must be greater than min_quantized_value."); + } + float range = max_quantized_value - min_quantized_value; + scalar_ = range / 255.0; + bias_ = (range / 512.0) + min_quantized_value; + return ::mediapipe::OkStatus(); + } + + ::mediapipe::Status Process(CalculatorContext* cc) final { + const std::string& encoded = + cc->Inputs().Tag("ENCODED").Value().Get(); + std::vector float_vector; + float_vector.reserve(encoded.length()); + for (int i = 0; i < encoded.length(); ++i) { + float_vector.push_back( + static_cast(encoded.at(i)) * scalar_ + bias_); + } + cc->Outputs() + .Tag("FLOAT_VECTOR") + .AddPacket(MakePacket>(float_vector) + .At(cc->InputTimestamp())); + return ::mediapipe::OkStatus(); + } + + private: + float scalar_; + float bias_; +}; + +REGISTER_CALCULATOR(DequantizeByteArrayCalculator); + +} // namespace mediapipe diff --git a/mediapipe/calculators/core/dequantize_byte_array_calculator.proto b/mediapipe/calculators/core/dequantize_byte_array_calculator.proto new file mode 100644 index 000000000..3032dbf48 --- /dev/null +++ b/mediapipe/calculators/core/dequantize_byte_array_calculator.proto @@ -0,0 +1,28 @@ +// Copyright 2019 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +syntax = "proto2"; + +package mediapipe; + +import "mediapipe/framework/calculator.proto"; + +message DequantizeByteArrayCalculatorOptions { + extend CalculatorOptions { + optional DequantizeByteArrayCalculatorOptions ext = 272316343; + } + + optional float max_quantized_value = 1; + optional float min_quantized_value = 2; +} diff --git a/mediapipe/calculators/core/dequantize_byte_array_calculator_test.cc b/mediapipe/calculators/core/dequantize_byte_array_calculator_test.cc new file mode 100644 index 000000000..a17fb6281 --- /dev/null +++ b/mediapipe/calculators/core/dequantize_byte_array_calculator_test.cc @@ -0,0 +1,137 @@ +// Copyright 2019 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include + +#include "mediapipe/framework/calculator_framework.h" +#include "mediapipe/framework/calculator_runner.h" +#include "mediapipe/framework/port/gmock.h" +#include "mediapipe/framework/port/gtest.h" +#include "mediapipe/framework/port/parse_text_proto.h" +#include "mediapipe/framework/port/status.h" +#include "mediapipe/framework/port/status_matchers.h" // NOLINT + +namespace mediapipe { + +TEST(QuantizeFloatVectorCalculatorTest, WrongConfig) { + CalculatorGraphConfig::Node node_config = + ParseTextProtoOrDie(R"( + calculator: "DequantizeByteArrayCalculator" + input_stream: "ENCODED:encoded" + output_stream: "FLOAT_VECTOR:float_vector" + options { + [mediapipe.DequantizeByteArrayCalculatorOptions.ext]: { + max_quantized_value: 2 + } + } + )"); + CalculatorRunner runner(node_config); + std::string empty_string; + runner.MutableInputs()->Tag("ENCODED").packets.push_back( + MakePacket(empty_string).At(Timestamp(0))); + auto status = runner.Run(); + EXPECT_FALSE(status.ok()); + EXPECT_THAT( + status.message(), + testing::HasSubstr( + "Both max_quantized_value and min_quantized_value must be provided")); +} + +TEST(QuantizeFloatVectorCalculatorTest, WrongConfig2) { + CalculatorGraphConfig::Node node_config = + ParseTextProtoOrDie(R"( + calculator: "DequantizeByteArrayCalculator" + input_stream: "ENCODED:encoded" + output_stream: "FLOAT_VECTOR:float_vector" + options { + [mediapipe.DequantizeByteArrayCalculatorOptions.ext]: { + max_quantized_value: -2 + min_quantized_value: 2 + } + } + )"); + CalculatorRunner runner(node_config); + std::string empty_string; + runner.MutableInputs()->Tag("ENCODED").packets.push_back( + MakePacket(empty_string).At(Timestamp(0))); + auto status = runner.Run(); + EXPECT_FALSE(status.ok()); + EXPECT_THAT( + status.message(), + testing::HasSubstr( + "max_quantized_value must be greater than min_quantized_value")); +} + +TEST(QuantizeFloatVectorCalculatorTest, WrongConfig3) { + CalculatorGraphConfig::Node node_config = + ParseTextProtoOrDie(R"( + calculator: "DequantizeByteArrayCalculator" + input_stream: "ENCODED:encoded" + output_stream: "FLOAT_VECTOR:float_vector" + options { + [mediapipe.DequantizeByteArrayCalculatorOptions.ext]: { + max_quantized_value: 1 + min_quantized_value: 1 + } + } + )"); + CalculatorRunner runner(node_config); + std::string empty_string; + runner.MutableInputs()->Tag("ENCODED").packets.push_back( + MakePacket(empty_string).At(Timestamp(0))); + auto status = runner.Run(); + EXPECT_FALSE(status.ok()); + EXPECT_THAT( + status.message(), + testing::HasSubstr( + "max_quantized_value must be greater than min_quantized_value")); +} + +TEST(DequantizeByteArrayCalculatorTest, TestDequantization) { + CalculatorGraphConfig::Node node_config = + ParseTextProtoOrDie(R"( + calculator: "DequantizeByteArrayCalculator" + input_stream: "ENCODED:encoded" + output_stream: "FLOAT_VECTOR:float_vector" + options { + [mediapipe.DequantizeByteArrayCalculatorOptions.ext]: { + max_quantized_value: 2 + min_quantized_value: -2 + } + } + )"); + CalculatorRunner runner(node_config); + unsigned char input[4] = {0x7F, 0xFF, 0x00, 0x01}; + runner.MutableInputs()->Tag("ENCODED").packets.push_back( + MakePacket( + std::string(reinterpret_cast(input), 4)) + .At(Timestamp(0))); + auto status = runner.Run(); + MP_ASSERT_OK(runner.Run()); + const std::vector& outputs = + runner.Outputs().Tag("FLOAT_VECTOR").packets; + EXPECT_EQ(1, outputs.size()); + const std::vector& result = outputs[0].Get>(); + ASSERT_FALSE(result.empty()); + EXPECT_EQ(4, result.size()); + EXPECT_NEAR(0, result[0], 0.01); + EXPECT_NEAR(2, result[1], 0.01); + EXPECT_NEAR(-2, result[2], 0.01); + EXPECT_NEAR(-1.976, result[3], 0.01); + + EXPECT_EQ(Timestamp(0), outputs[0].Timestamp()); +} + +} // namespace mediapipe diff --git a/mediapipe/calculators/core/previous_loopback_calculator.cc b/mediapipe/calculators/core/previous_loopback_calculator.cc index 6b23a0e70..8c470ef7d 100644 --- a/mediapipe/calculators/core/previous_loopback_calculator.cc +++ b/mediapipe/calculators/core/previous_loopback_calculator.cc @@ -102,6 +102,12 @@ class PreviousLoopbackCalculator : public CalculatorBase { cc->Outputs().Get(loop_out_id_).AddPacket(std::move(previous_loopback)); } } + if (!main_ts_.empty()) { + cc->Outputs().Get(loop_out_id_).SetNextTimestampBound(main_ts_.front()); + } + if (cc->Inputs().Get(main_id_).IsDone() && main_ts_.empty()) { + cc->Outputs().Get(loop_out_id_).Close(); + } return ::mediapipe::OkStatus(); } diff --git a/mediapipe/calculators/core/previous_loopback_calculator_test.cc b/mediapipe/calculators/core/previous_loopback_calculator_test.cc index 6ad569865..4ac38e9f0 100644 --- a/mediapipe/calculators/core/previous_loopback_calculator_test.cc +++ b/mediapipe/calculators/core/previous_loopback_calculator_test.cc @@ -107,5 +107,96 @@ TEST(PreviousLoopbackCalculator, CorrectTimestamps) { MP_EXPECT_OK(graph_.WaitUntilDone()); } +// A Calculator that outputs a summary packet in CalculatorBase::Close(). +class PacketOnCloseCalculator : public CalculatorBase { + public: + static ::mediapipe::Status GetContract(CalculatorContract* cc) { + cc->Inputs().Index(0).Set(); + cc->Outputs().Index(0).Set(); + return ::mediapipe::OkStatus(); + } + + ::mediapipe::Status Open(CalculatorContext* cc) final { + cc->SetOffset(TimestampDiff(0)); + return ::mediapipe::OkStatus(); + } + + ::mediapipe::Status Process(CalculatorContext* cc) final { + sum_ += cc->Inputs().Index(0).Value().Get(); + cc->Outputs().Index(0).AddPacket(cc->Inputs().Index(0).Value()); + return ::mediapipe::OkStatus(); + } + + ::mediapipe::Status Close(CalculatorContext* cc) final { + cc->Outputs().Index(0).AddPacket( + MakePacket(sum_).At(Timestamp::Max())); + return ::mediapipe::OkStatus(); + } + + private: + int sum_ = 0; +}; +REGISTER_CALCULATOR(PacketOnCloseCalculator); + +// Demonstrates that all ouput and input streams in PreviousLoopbackCalculator +// will close as expected when all graph input streams are closed. +TEST(PreviousLoopbackCalculator, ClosesCorrectly) { + std::vector outputs; + CalculatorGraphConfig graph_config_ = + ParseTextProtoOrDie(R"( + input_stream: 'in' + node { + calculator: 'PreviousLoopbackCalculator' + input_stream: 'MAIN:in' + input_stream: 'LOOP:out' + input_stream_info: { tag_index: 'LOOP' back_edge: true } + output_stream: 'PREV_LOOP:previous' + } + # This calculator synchronizes its inputs as normal, so it is used + # to check that both "in" and "previous" are ready. + node { + calculator: 'PassThroughCalculator' + input_stream: 'in' + input_stream: 'previous' + output_stream: 'out' + output_stream: 'previous2' + } + node { + calculator: 'PacketOnCloseCalculator' + input_stream: 'out' + output_stream: 'close_out' + } + )"); + tool::AddVectorSink("close_out", &graph_config_, &outputs); + + CalculatorGraph graph_; + MP_ASSERT_OK(graph_.Initialize(graph_config_, {})); + MP_ASSERT_OK(graph_.StartRun({})); + + auto send_packet = [&graph_](const std::string& input_name, int n) { + MP_EXPECT_OK(graph_.AddPacketToInputStream( + input_name, MakePacket(n).At(Timestamp(n)))); + }; + + send_packet("in", 1); + MP_EXPECT_OK(graph_.WaitUntilIdle()); + EXPECT_EQ(TimestampValues(outputs), (std::vector{1})); + + send_packet("in", 5); + MP_EXPECT_OK(graph_.WaitUntilIdle()); + EXPECT_EQ(TimestampValues(outputs), (std::vector{1, 5})); + + send_packet("in", 15); + MP_EXPECT_OK(graph_.WaitUntilIdle()); + EXPECT_EQ(TimestampValues(outputs), (std::vector{1, 5, 15})); + + MP_EXPECT_OK(graph_.CloseAllInputStreams()); + MP_EXPECT_OK(graph_.WaitUntilIdle()); + EXPECT_EQ(TimestampValues(outputs), + (std::vector{1, 5, 15, Timestamp::Max().Value()})); + + MP_EXPECT_OK(graph_.WaitUntilDone()); +} + } // anonymous namespace } // namespace mediapipe diff --git a/mediapipe/calculators/core/side_packet_to_stream_calculator.cc b/mediapipe/calculators/core/side_packet_to_stream_calculator.cc new file mode 100644 index 000000000..043c91f32 --- /dev/null +++ b/mediapipe/calculators/core/side_packet_to_stream_calculator.cc @@ -0,0 +1,83 @@ +// Copyright 2019 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include +#include + +#include "mediapipe/framework/calculator_framework.h" +#include "mediapipe/framework/port/logging.h" +#include "mediapipe/framework/port/ret_check.h" +#include "mediapipe/framework/port/status.h" + +namespace mediapipe { + +using mediapipe::PacketTypeSet; +using mediapipe::Timestamp; + +namespace { +static std::map* kTimestampMap = []() { + auto* res = new std::map(); + res->emplace("AT_PRESTREAM", Timestamp::PreStream()); + res->emplace("AT_POSTSTREAM", Timestamp::PostStream()); + res->emplace("AT_ZERO", Timestamp(0)); + return res; +}(); + +} // namespace + +// Outputs the single input_side_packet at the timestamp specified in the +// output_stream tag. Valid tags are AT_PRESTREAM, AT_POSTSTREAM and AT_ZERO. +class SidePacketToStreamCalculator : public CalculatorBase { + public: + SidePacketToStreamCalculator() = default; + ~SidePacketToStreamCalculator() override = default; + + static ::mediapipe::Status GetContract(CalculatorContract* cc); + ::mediapipe::Status Process(CalculatorContext* cc) override; + ::mediapipe::Status Close(CalculatorContext* cc) override; +}; +REGISTER_CALCULATOR(SidePacketToStreamCalculator); + +::mediapipe::Status SidePacketToStreamCalculator::GetContract( + CalculatorContract* cc) { + cc->InputSidePackets().Index(0).SetAny(); + + std::set tags = cc->Outputs().GetTags(); + RET_CHECK_EQ(tags.size(), 1); + + RET_CHECK_EQ(kTimestampMap->count(*tags.begin()), 1); + cc->Outputs().Tag(*tags.begin()).SetAny(); + + return ::mediapipe::OkStatus(); +} + +::mediapipe::Status SidePacketToStreamCalculator::Process( + CalculatorContext* cc) { + return mediapipe::tool::StatusStop(); +} + +::mediapipe::Status SidePacketToStreamCalculator::Close(CalculatorContext* cc) { + std::set tags = cc->Outputs().GetTags(); + RET_CHECK_EQ(tags.size(), 1); + const std::string& tag = *tags.begin(); + RET_CHECK_EQ(kTimestampMap->count(tag), 1); + cc->Outputs().Tag(tag).AddPacket( + cc->InputSidePackets().Index(0).At(kTimestampMap->at(tag))); + + return ::mediapipe::OkStatus(); +} + +} // namespace mediapipe diff --git a/mediapipe/calculators/core/split_vector_calculator.h b/mediapipe/calculators/core/split_vector_calculator.h index def156474..0eae022c7 100644 --- a/mediapipe/calculators/core/split_vector_calculator.h +++ b/mediapipe/calculators/core/split_vector_calculator.h @@ -34,7 +34,9 @@ namespace mediapipe { // SplitVectorCalculatorOptions. If the option "element_only" is set to true, // all ranges should be of size 1 and all outputs will be elements of type T. If // "element_only" is false, ranges can be non-zero in size and all outputs will -// be of type std::vector. +// be of type std::vector. If the option "combine_outputs" is set to true, +// only one output stream can be specified and all ranges of elements will be +// combined into one vector. // To use this class for a particular type T, register a calculator using // SplitVectorCalculator. template @@ -49,28 +51,47 @@ class SplitVectorCalculator : public CalculatorBase { const auto& options = cc->Options<::mediapipe::SplitVectorCalculatorOptions>(); - if (cc->Outputs().NumEntries() != options.ranges_size()) { - return ::mediapipe::InvalidArgumentError( - "The number of output streams should match the number of ranges " - "specified in the CalculatorOptions."); - } - - // Set the output types for each output stream. - for (int i = 0; i < cc->Outputs().NumEntries(); ++i) { - if (options.ranges(i).begin() < 0 || options.ranges(i).end() < 0 || - options.ranges(i).begin() >= options.ranges(i).end()) { - return ::mediapipe::InvalidArgumentError( - "Indices should be non-negative and begin index should be less " - "than the end index."); - } - if (options.element_only()) { - if (options.ranges(i).end() - options.ranges(i).begin() != 1) { - return ::mediapipe::InvalidArgumentError( - "Since element_only is true, all ranges should be of size 1."); + if (options.combine_outputs()) { + RET_CHECK_EQ(cc->Outputs().NumEntries(), 1); + cc->Outputs().Index(0).Set>(); + for (int i = 0; i < options.ranges_size() - 1; ++i) { + for (int j = i + 1; j < options.ranges_size(); ++j) { + const auto& range_0 = options.ranges(i); + const auto& range_1 = options.ranges(j); + if ((range_0.begin() >= range_1.begin() && + range_0.begin() < range_1.end()) || + (range_1.begin() >= range_0.begin() && + range_1.begin() < range_0.end())) { + return ::mediapipe::InvalidArgumentError( + "Ranges must be non-overlapping when using combine_outputs " + "option."); + } + } + } + } else { + if (cc->Outputs().NumEntries() != options.ranges_size()) { + return ::mediapipe::InvalidArgumentError( + "The number of output streams should match the number of ranges " + "specified in the CalculatorOptions."); + } + + // Set the output types for each output stream. + for (int i = 0; i < cc->Outputs().NumEntries(); ++i) { + if (options.ranges(i).begin() < 0 || options.ranges(i).end() < 0 || + options.ranges(i).begin() >= options.ranges(i).end()) { + return ::mediapipe::InvalidArgumentError( + "Indices should be non-negative and begin index should be less " + "than the end index."); + } + if (options.element_only()) { + if (options.ranges(i).end() - options.ranges(i).begin() != 1) { + return ::mediapipe::InvalidArgumentError( + "Since element_only is true, all ranges should be of size 1."); + } + cc->Outputs().Index(i).Set(); + } else { + cc->Outputs().Index(i).Set>(); } - cc->Outputs().Index(i).Set(); - } else { - cc->Outputs().Index(i).Set>(); } } @@ -83,13 +104,15 @@ class SplitVectorCalculator : public CalculatorBase { const auto& options = cc->Options<::mediapipe::SplitVectorCalculatorOptions>(); + element_only_ = options.element_only(); + combine_outputs_ = options.combine_outputs(); + for (const auto& range : options.ranges()) { ranges_.push_back({range.begin(), range.end()}); max_range_end_ = std::max(max_range_end_, range.end()); + total_elements_ += range.end() - range.begin(); } - element_only_ = options.element_only(); - return ::mediapipe::OkStatus(); } @@ -97,17 +120,29 @@ class SplitVectorCalculator : public CalculatorBase { const auto& input = cc->Inputs().Index(0).Get>(); RET_CHECK_GE(input.size(), max_range_end_); - if (element_only_) { + if (combine_outputs_) { + auto output = absl::make_unique>(); + output->reserve(total_elements_); for (int i = 0; i < ranges_.size(); ++i) { - cc->Outputs().Index(i).AddPacket( - MakePacket(input[ranges_[i].first]).At(cc->InputTimestamp())); - } - } else { - for (int i = 0; i < ranges_.size(); ++i) { - auto output = absl::make_unique>( + auto elements = absl::make_unique>( input.begin() + ranges_[i].first, input.begin() + ranges_[i].second); - cc->Outputs().Index(i).Add(output.release(), cc->InputTimestamp()); + output->insert(output->end(), elements->begin(), elements->end()); + } + cc->Outputs().Index(0).Add(output.release(), cc->InputTimestamp()); + } else { + if (element_only_) { + for (int i = 0; i < ranges_.size(); ++i) { + cc->Outputs().Index(i).AddPacket( + MakePacket(input[ranges_[i].first]).At(cc->InputTimestamp())); + } + } else { + for (int i = 0; i < ranges_.size(); ++i) { + auto output = absl::make_unique>( + input.begin() + ranges_[i].first, + input.begin() + ranges_[i].second); + cc->Outputs().Index(i).Add(output.release(), cc->InputTimestamp()); + } } } @@ -117,7 +152,9 @@ class SplitVectorCalculator : public CalculatorBase { private: std::vector> ranges_; int32 max_range_end_ = -1; + int32 total_elements_ = 0; bool element_only_ = false; + bool combine_outputs_ = false; }; } // namespace mediapipe diff --git a/mediapipe/calculators/core/split_vector_calculator.proto b/mediapipe/calculators/core/split_vector_calculator.proto index 3ef31475b..53acbb7bf 100644 --- a/mediapipe/calculators/core/split_vector_calculator.proto +++ b/mediapipe/calculators/core/split_vector_calculator.proto @@ -37,4 +37,7 @@ message SplitVectorCalculatorOptions { // just element of type T. By default, if a range specifies only one element, // it is outputted as an std::vector. optional bool element_only = 2 [default = false]; + + // Combines output elements to one vector. + optional bool combine_outputs = 3 [default = false]; } diff --git a/mediapipe/calculators/core/split_vector_calculator_test.cc b/mediapipe/calculators/core/split_vector_calculator_test.cc index 4187e8aba..79243c149 100644 --- a/mediapipe/calculators/core/split_vector_calculator_test.cc +++ b/mediapipe/calculators/core/split_vector_calculator_test.cc @@ -105,6 +105,34 @@ class SplitTfLiteTensorVectorCalculatorTest : public ::testing::Test { } } + void ValidateCombinedVectorOutput(std::vector& output_packets, + int expected_elements, + std::vector& input_begin_indices, + std::vector& input_end_indices) { + ASSERT_EQ(1, output_packets.size()); + ASSERT_EQ(input_begin_indices.size(), input_end_indices.size()); + const std::vector& output_vec = + output_packets[0].Get>(); + ASSERT_EQ(expected_elements, output_vec.size()); + const int num_ranges = input_begin_indices.size(); + + int element_id = 0; + for (int range_id = 0; range_id < num_ranges; ++range_id) { + for (int i = input_begin_indices[range_id]; + i < input_end_indices[range_id]; ++i) { + const int expected_value = i; + const TfLiteTensor* result = &output_vec[element_id]; + float* result_buffer = result->data.f; + ASSERT_NE(result_buffer, nullptr); + ASSERT_EQ(result_buffer, input_buffers_[i]); + for (int j = 0; j < width * height * channels; ++j) { + ASSERT_EQ(expected_value, result_buffer[j]); + } + element_id++; + } + } + } + void ValidateElementOutput(std::vector& output_packets, int input_begin_index) { ASSERT_EQ(1, output_packets.size()); @@ -234,6 +262,65 @@ TEST_F(SplitTfLiteTensorVectorCalculatorTest, InvalidOutputStreamCountTest) { ASSERT_FALSE(graph.Initialize(graph_config).ok()); } +TEST_F(SplitTfLiteTensorVectorCalculatorTest, + InvalidCombineOutputsMultipleOutputsTest) { + ASSERT_NE(interpreter_, nullptr); + + // Prepare a graph to use the SplitTfLiteTensorVectorCalculator. + CalculatorGraphConfig graph_config = + ::mediapipe::ParseTextProtoOrDie( + R"( + input_stream: "tensor_in" + node { + calculator: "SplitTfLiteTensorVectorCalculator" + input_stream: "tensor_in" + output_stream: "range_0" + output_stream: "range_1" + options { + [mediapipe.SplitVectorCalculatorOptions.ext] { + ranges: { begin: 0 end: 1 } + ranges: { begin: 2 end: 3 } + combine_outputs: true + } + } + } + )"); + + // Run the graph. + CalculatorGraph graph; + // The graph should fail running because the number of output streams does not + // match the number of range elements in the options. + ASSERT_FALSE(graph.Initialize(graph_config).ok()); +} + +TEST_F(SplitTfLiteTensorVectorCalculatorTest, InvalidOverlappingRangesTest) { + ASSERT_NE(interpreter_, nullptr); + + // Prepare a graph to use the SplitTfLiteTensorVectorCalculator. + CalculatorGraphConfig graph_config = + ::mediapipe::ParseTextProtoOrDie( + R"( + input_stream: "tensor_in" + node { + calculator: "SplitTfLiteTensorVectorCalculator" + input_stream: "tensor_in" + output_stream: "range_0" + options { + [mediapipe.SplitVectorCalculatorOptions.ext] { + ranges: { begin: 0 end: 3 } + ranges: { begin: 1 end: 4 } + combine_outputs: true + } + } + } + )"); + + // Run the graph. + CalculatorGraph graph; + // The graph should fail running because there are overlapping ranges. + ASSERT_FALSE(graph.Initialize(graph_config).ok()); +} + TEST_F(SplitTfLiteTensorVectorCalculatorTest, SmokeTestElementOnly) { ASSERT_NE(interpreter_, nullptr); @@ -289,6 +376,53 @@ TEST_F(SplitTfLiteTensorVectorCalculatorTest, SmokeTestElementOnly) { MP_ASSERT_OK(graph.WaitUntilDone()); } +TEST_F(SplitTfLiteTensorVectorCalculatorTest, SmokeTestCombiningOutputs) { + ASSERT_NE(interpreter_, nullptr); + + PrepareTfLiteTensorVector(/*vector_size=*/5); + ASSERT_NE(input_vec_, nullptr); + + // Prepare a graph to use the SplitTfLiteTensorVectorCalculator. + CalculatorGraphConfig graph_config = + ::mediapipe::ParseTextProtoOrDie( + R"( + input_stream: "tensor_in" + node { + calculator: "SplitTfLiteTensorVectorCalculator" + input_stream: "tensor_in" + output_stream: "range_0" + options { + [mediapipe.SplitVectorCalculatorOptions.ext] { + ranges: { begin: 0 end: 1 } + ranges: { begin: 2 end: 3 } + ranges: { begin: 4 end: 5 } + combine_outputs: true + } + } + } + )"); + std::vector range_0_packets; + tool::AddVectorSink("range_0", &graph_config, &range_0_packets); + + // Run the graph. + CalculatorGraph graph; + MP_ASSERT_OK(graph.Initialize(graph_config)); + MP_ASSERT_OK(graph.StartRun({})); + MP_ASSERT_OK(graph.AddPacketToInputStream( + "tensor_in", Adopt(input_vec_.release()).At(Timestamp(0)))); + // Wait until the calculator finishes processing. + MP_ASSERT_OK(graph.WaitUntilIdle()); + + std::vector input_begin_indices = {0, 2, 4}; + std::vector input_end_indices = {1, 3, 5}; + ValidateCombinedVectorOutput(range_0_packets, /*expected_elements=*/3, + input_begin_indices, input_end_indices); + + // Fully close the graph at the end. + MP_ASSERT_OK(graph.CloseInputStream("tensor_in")); + MP_ASSERT_OK(graph.WaitUntilDone()); +} + TEST_F(SplitTfLiteTensorVectorCalculatorTest, ElementOnlyDisablesVectorOutputs) { // Prepare a graph to use the SplitTfLiteTensorVectorCalculator. diff --git a/mediapipe/calculators/core/string_to_int_calculator.cc b/mediapipe/calculators/core/string_to_int_calculator.cc new file mode 100644 index 000000000..64600cde3 --- /dev/null +++ b/mediapipe/calculators/core/string_to_int_calculator.cc @@ -0,0 +1,79 @@ +// Copyright 2019 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include + +#include +#include + +#include "absl/strings/numbers.h" +#include "mediapipe/framework/calculator_framework.h" +#include "mediapipe/framework/port/integral_types.h" +#include "mediapipe/framework/port/status.h" + +namespace mediapipe { + +// Calculator that converts a std::string into an integer type, or fails if the +// conversion is not possible. +// +// Example config: +// node { +// calculator: "StringToIntCalculator" +// input_side_packet: "string" +// output_side_packet: "index" +// } +template +class StringToIntCalculatorTemplate : public CalculatorBase { + public: + static ::mediapipe::Status GetContract(CalculatorContract* cc) { + cc->InputSidePackets().Index(0).Set(); + cc->OutputSidePackets().Index(0).Set(); + return ::mediapipe::OkStatus(); + } + + ::mediapipe::Status Open(CalculatorContext* cc) override { + IntType number; + if (!absl::SimpleAtoi(cc->InputSidePackets().Index(0).Get(), + &number)) { + return ::mediapipe::InvalidArgumentError( + "The std::string could not be parsed as an integer."); + } + cc->OutputSidePackets().Index(0).Set(MakePacket(number)); + return ::mediapipe::OkStatus(); + } + + ::mediapipe::Status Process(CalculatorContext* cc) override { + return ::mediapipe::OkStatus(); + } +}; + +using StringToIntCalculator = StringToIntCalculatorTemplate; +REGISTER_CALCULATOR(StringToIntCalculator); + +using StringToUintCalculator = StringToIntCalculatorTemplate; +REGISTER_CALCULATOR(StringToUintCalculator); + +using StringToInt32Calculator = StringToIntCalculatorTemplate; +REGISTER_CALCULATOR(StringToInt32Calculator); + +using StringToUint32Calculator = StringToIntCalculatorTemplate; +REGISTER_CALCULATOR(StringToUint32Calculator); + +using StringToInt64Calculator = StringToIntCalculatorTemplate; +REGISTER_CALCULATOR(StringToInt64Calculator); + +using StringToUint64Calculator = StringToIntCalculatorTemplate; +REGISTER_CALCULATOR(StringToUint64Calculator); + +} // namespace mediapipe diff --git a/mediapipe/calculators/tensorflow/BUILD b/mediapipe/calculators/tensorflow/BUILD index 4231b899e..c9c505495 100644 --- a/mediapipe/calculators/tensorflow/BUILD +++ b/mediapipe/calculators/tensorflow/BUILD @@ -104,6 +104,17 @@ proto_library( deps = ["//mediapipe/framework:calculator_proto"], ) +proto_library( + name = "unpack_media_sequence_calculator_proto", + srcs = ["unpack_media_sequence_calculator.proto"], + visibility = ["//visibility:public"], + deps = [ + "//mediapipe/calculators/core:packet_resampler_calculator_proto", + "//mediapipe/framework:calculator_proto", + "//mediapipe/util:audio_decoder_proto", + ], +) + proto_library( name = "vector_float_to_tensor_calculator_options_proto", srcs = ["vector_float_to_tensor_calculator_options.proto"], @@ -261,6 +272,17 @@ mediapipe_cc_proto_library( deps = [":unpack_media_sequence_calculator_proto"], ) +mediapipe_cc_proto_library( + name = "vector_int_to_tensor_calculator_options_cc_proto", + srcs = ["vector_int_to_tensor_calculator_options.proto"], + cc_deps = [ + "//mediapipe/framework:calculator_cc_proto", + "@org_tensorflow//tensorflow/core:protos_all_cc", + ], + visibility = ["//visibility:public"], + deps = [":vector_int_to_tensor_calculator_options_proto"], +) + mediapipe_cc_proto_library( name = "vector_float_to_tensor_calculator_options_cc_proto", srcs = ["vector_float_to_tensor_calculator_options.proto"], @@ -621,6 +643,22 @@ cc_library( alwayslink = 1, ) +cc_library( + name = "tfrecord_reader_calculator", + srcs = ["tfrecord_reader_calculator.cc"], + visibility = ["//visibility:public"], + deps = [ + "//mediapipe/framework:calculator_framework", + "//mediapipe/framework/port:integral_types", + "//mediapipe/framework/port:logging", + "//mediapipe/framework/port:ret_check", + "//mediapipe/framework/port:status", + "@org_tensorflow//tensorflow/core:lib", + "@org_tensorflow//tensorflow/core:protos_all_cc", + ], + alwayslink = 1, +) + cc_library( name = "tensor_to_vector_float_calculator", srcs = ["tensor_to_vector_float_calculator.cc"], @@ -662,6 +700,20 @@ cc_library( alwayslink = 1, ) +cc_library( + name = "vector_int_to_tensor_calculator", + srcs = ["vector_int_to_tensor_calculator.cc"], + visibility = ["//visibility:public"], + deps = [ + ":vector_int_to_tensor_calculator_options_cc_proto", + "//mediapipe/framework:calculator_framework", + "//mediapipe/framework/port:ret_check", + "//mediapipe/framework/port:status", + "@org_tensorflow//tensorflow/core:framework", + ], + alwayslink = 1, +) + cc_library( name = "vector_float_to_tensor_calculator", srcs = ["vector_float_to_tensor_calculator.cc"], @@ -676,6 +728,20 @@ cc_library( alwayslink = 1, ) +cc_library( + name = "unpack_yt8m_sequence_example_calculator", + srcs = ["unpack_yt8m_sequence_example_calculator.cc"], + visibility = ["//visibility:public"], + deps = [ + "//mediapipe/calculators/tensorflow:lapped_tensor_buffer_calculator_cc_proto", + "//mediapipe/framework:calculator_framework", + "//mediapipe/framework:packet", + "//mediapipe/framework/port:status", + "@org_tensorflow//tensorflow/core:protos_all_cc", + ], + alwayslink = 1, +) + cc_test( name = "graph_tensors_packet_generator_test", srcs = ["graph_tensors_packet_generator_test.cc"], @@ -980,6 +1046,20 @@ cc_test( ], ) +cc_test( + name = "vector_int_to_tensor_calculator_test", + srcs = ["vector_int_to_tensor_calculator_test.cc"], + deps = [ + ":vector_int_to_tensor_calculator", + ":vector_int_to_tensor_calculator_options_cc_proto", + "//mediapipe/framework:calculator_framework", + "//mediapipe/framework:calculator_runner", + "//mediapipe/framework/port:gtest_main", + "@org_tensorflow//tensorflow/core:framework", + "@org_tensorflow//tensorflow/core:protos_all_cc", + ], +) + cc_test( name = "vector_float_to_tensor_calculator_test", srcs = ["vector_float_to_tensor_calculator_test.cc"], diff --git a/mediapipe/calculators/tensorflow/lapped_tensor_buffer_calculator.cc b/mediapipe/calculators/tensorflow/lapped_tensor_buffer_calculator.cc index 78ee50871..5ad8e853c 100644 --- a/mediapipe/calculators/tensorflow/lapped_tensor_buffer_calculator.cc +++ b/mediapipe/calculators/tensorflow/lapped_tensor_buffer_calculator.cc @@ -29,6 +29,11 @@ namespace mediapipe { +const char kBufferSize[] = "BUFFER_SIZE"; +const char kOverlap[] = "OVERLAP"; +const char kTimestampOffset[] = "TIMESTAMP_OFFSET"; +const char kCalculatorOptions[] = "CALCULATOR_OPTIONS"; + namespace tf = tensorflow; // Given an input stream of tensors, concatenates the tensors over timesteps. @@ -72,6 +77,9 @@ class LappedTensorBufferCalculator : public CalculatorBase { ::mediapipe::Status AddBatchDimension(tf::Tensor* input_tensor); int steps_until_output_; + int buffer_size_; + int overlap_; + int timestamp_offset_; std::unique_ptr> timestamp_buffer_; std::unique_ptr> buffer_; LappedTensorBufferCalculatorOptions options_; @@ -87,6 +95,21 @@ REGISTER_CALCULATOR(LappedTensorBufferCalculator); ); RET_CHECK_EQ(cc->Inputs().NumEntries(), 1) << "Only one output stream is supported."; + + if (cc->InputSidePackets().HasTag(kBufferSize)) { + cc->InputSidePackets().Tag(kBufferSize).Set(); + } + if (cc->InputSidePackets().HasTag(kOverlap)) { + cc->InputSidePackets().Tag(kOverlap).Set(); + } + if (cc->InputSidePackets().HasTag(kTimestampOffset)) { + cc->InputSidePackets().Tag(kTimestampOffset).Set(); + } + if (cc->InputSidePackets().HasTag(kCalculatorOptions)) { + cc->InputSidePackets() + .Tag(kCalculatorOptions) + .Set(); + } cc->Outputs().Index(0).Set( // Output tensorflow::Tensor stream with possibly overlapping steps. ); @@ -95,16 +118,33 @@ REGISTER_CALCULATOR(LappedTensorBufferCalculator); ::mediapipe::Status LappedTensorBufferCalculator::Open(CalculatorContext* cc) { options_ = cc->Options(); - RET_CHECK_LT(options_.overlap(), options_.buffer_size()); - RET_CHECK_GE(options_.timestamp_offset(), 0) + if (cc->InputSidePackets().HasTag(kCalculatorOptions)) { + options_ = cc->InputSidePackets() + .Tag(kCalculatorOptions) + .Get(); + } + buffer_size_ = options_.buffer_size(); + if (cc->InputSidePackets().HasTag(kBufferSize)) { + buffer_size_ = cc->InputSidePackets().Tag(kBufferSize).Get(); + } + overlap_ = options_.overlap(); + if (cc->InputSidePackets().HasTag(kOverlap)) { + overlap_ = cc->InputSidePackets().Tag(kOverlap).Get(); + } + timestamp_offset_ = options_.timestamp_offset(); + if (cc->InputSidePackets().HasTag(kTimestampOffset)) { + timestamp_offset_ = cc->InputSidePackets().Tag(kTimestampOffset).Get(); + } + + RET_CHECK_LT(overlap_, buffer_size_); + RET_CHECK_GE(timestamp_offset_, 0) << "Negative timestamp_offset is not allowed."; - RET_CHECK_LT(options_.timestamp_offset(), options_.buffer_size()) + RET_CHECK_LT(timestamp_offset_, buffer_size_) << "output_frame_num_offset has to be less than buffer_size."; timestamp_buffer_ = - absl::make_unique>(options_.buffer_size()); - buffer_ = - absl::make_unique>(options_.buffer_size()); - steps_until_output_ = options_.buffer_size(); + absl::make_unique>(buffer_size_); + buffer_ = absl::make_unique>(buffer_size_); + steps_until_output_ = buffer_size_; return ::mediapipe::OkStatus(); } @@ -128,11 +168,10 @@ REGISTER_CALCULATOR(LappedTensorBufferCalculator); concatenated.get()); RET_CHECK(concat_status.ok()) << concat_status.ToString(); - cc->Outputs().Index(0).Add( - concatenated.release(), - timestamp_buffer_->Get(options_.timestamp_offset())); + cc->Outputs().Index(0).Add(concatenated.release(), + timestamp_buffer_->Get(timestamp_offset_)); - steps_until_output_ = options_.buffer_size() - options_.overlap(); + steps_until_output_ = buffer_size_ - overlap_; } return ::mediapipe::OkStatus(); } diff --git a/mediapipe/calculators/tensorflow/tfrecord_reader_calculator.cc b/mediapipe/calculators/tensorflow/tfrecord_reader_calculator.cc new file mode 100644 index 000000000..5de7b0c0d --- /dev/null +++ b/mediapipe/calculators/tensorflow/tfrecord_reader_calculator.cc @@ -0,0 +1,126 @@ +// Copyright 2019 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include + +#include "mediapipe/framework/calculator_framework.h" +#include "mediapipe/framework/port/integral_types.h" +#include "mediapipe/framework/port/logging.h" +#include "mediapipe/framework/port/ret_check.h" +#include "mediapipe/framework/port/status.h" +#include "tensorflow/core/example/example.pb.h" +#include "tensorflow/core/lib/core/status.h" +#include "tensorflow/core/lib/io/record_reader.h" +#include "tensorflow/core/platform/env.h" +#include "tensorflow/core/platform/file_system.h" + +namespace mediapipe { + +const char kTFRecordPath[] = "TFRECORD_PATH"; +const char kRecordIndex[] = "RECORD_INDEX"; +const char kExampleTag[] = "EXAMPLE"; +const char kSequenceExampleTag[] = "SEQUENCE_EXAMPLE"; + +// Reads a tensorflow example/sequence example from a tfrecord file. +// If the "RECORD_INDEX" input side packet is provided, the calculator is going +// to fetch the example/sequence example of the tfrecord file at the target +// record index. Otherwise, the reader always reads the first example/sequence +// example of the tfrecord file. +// +// Example config: +// node { +// calculator: "TFRecordReaderCalculator" +// input_side_packet: "TFRECORD_PATH:tfrecord_path" +// input_side_packet: "RECORD_INDEX:record_index" +// output_side_packet: "SEQUENCE_EXAMPLE:sequence_example" +// } +class TFRecordReaderCalculator : public CalculatorBase { + public: + static ::mediapipe::Status GetContract(CalculatorContract* cc); + + ::mediapipe::Status Open(CalculatorContext* cc) override; + ::mediapipe::Status Process(CalculatorContext* cc) override; +}; + +::mediapipe::Status TFRecordReaderCalculator::GetContract( + CalculatorContract* cc) { + cc->InputSidePackets().Tag(kTFRecordPath).Set(); + if (cc->InputSidePackets().HasTag(kRecordIndex)) { + cc->InputSidePackets().Tag(kRecordIndex).Set(); + } + + RET_CHECK(cc->OutputSidePackets().HasTag(kExampleTag) || + cc->OutputSidePackets().HasTag(kSequenceExampleTag)) + << "TFRecordReaderCalculator must output either Tensorflow example or " + "sequence example."; + if (cc->OutputSidePackets().HasTag(kExampleTag)) { + cc->OutputSidePackets().Tag(kExampleTag).Set(); + } else { + cc->OutputSidePackets() + .Tag(kSequenceExampleTag) + .Set(); + } + return ::mediapipe::OkStatus(); +} + +::mediapipe::Status TFRecordReaderCalculator::Open(CalculatorContext* cc) { + std::unique_ptr file; + auto tf_status = tensorflow::Env::Default()->NewRandomAccessFile( + cc->InputSidePackets().Tag(kTFRecordPath).Get(), &file); + RET_CHECK(tf_status.ok()) + << "Failed to open tfrecord file: " << tf_status.error_message(); + tensorflow::io::RecordReader reader(file.get(), + tensorflow::io::RecordReaderOptions()); + tensorflow::uint64 offset = 0; + std::string example_str; + const int target_idx = + cc->InputSidePackets().HasTag(kRecordIndex) + ? cc->InputSidePackets().Tag(kRecordIndex).Get() + : 0; + int current_idx = 0; + while (current_idx <= target_idx) { + tf_status = reader.ReadRecord(&offset, &example_str); + RET_CHECK(tf_status.ok()) + << "Failed to read tfrecord: " << tf_status.error_message(); + if (current_idx == target_idx) { + if (cc->OutputSidePackets().HasTag(kExampleTag)) { + tensorflow::Example tf_example; + tf_example.ParseFromString(example_str); + cc->OutputSidePackets() + .Tag(kExampleTag) + .Set(MakePacket(std::move(tf_example))); + } else { + tensorflow::SequenceExample tf_sequence_example; + tf_sequence_example.ParseFromString(example_str); + cc->OutputSidePackets() + .Tag(kSequenceExampleTag) + .Set(MakePacket( + std::move(tf_sequence_example))); + } + } + ++current_idx; + } + + return ::mediapipe::OkStatus(); +} + +::mediapipe::Status TFRecordReaderCalculator::Process(CalculatorContext* cc) { + return ::mediapipe::OkStatus(); +} + +REGISTER_CALCULATOR(TFRecordReaderCalculator); + +} // namespace mediapipe diff --git a/mediapipe/calculators/tensorflow/unpack_yt8m_sequence_example_calculator.cc b/mediapipe/calculators/tensorflow/unpack_yt8m_sequence_example_calculator.cc new file mode 100644 index 000000000..daf7f1117 --- /dev/null +++ b/mediapipe/calculators/tensorflow/unpack_yt8m_sequence_example_calculator.cc @@ -0,0 +1,192 @@ +// Copyright 2019 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include + +#include "mediapipe/calculators/tensorflow/lapped_tensor_buffer_calculator.pb.h" +#include "mediapipe/framework/calculator_framework.h" +#include "mediapipe/framework/packet.h" +#include "mediapipe/framework/port/canonical_errors.h" +#include "tensorflow/core/example/example.pb.h" +#include "tensorflow/core/example/feature.pb.h" + +namespace mediapipe { +namespace { + +const char kId[] = "id"; +const char kRgb[] = "rgb"; +const char kAudio[] = "audio"; +const char kDesiredSegmentSize[] = "DESIRED_SEGMENT_SIZE"; +const char kYt8mId[] = "YT8M_ID"; +const char kYt8mSequenceExample[] = "YT8M_SEQUENCE_EXAMPLE"; +const char kQuantizedRgbFeature[] = "QUANTIZED_RGB_FEATURE"; +const char kQuantizedAudioFeature[] = "QUANTIZED_AUDIO_FEATURE"; +const char kSegmentSize[] = "SEGMENT_SIZE"; +const char kLappedTensorBufferCalculatorOptions[] = + "LAPPED_TENSOR_BUFFER_CALCULATOR_OPTIONS"; + +std::string GetQuantizedFeature( + const tensorflow::SequenceExample& sequence_example, const std::string& key, + int index) { + const auto& bytes_list = sequence_example.feature_lists() + .feature_list() + .at(key) + .feature() + .Get(index) + .bytes_list() + .value(); + CHECK_EQ(1, bytes_list.size()); + return bytes_list.Get(0); +} +} // namespace + +// Unpacks YT8M Sequence Example. Note that the audio feature and rgb feature +// output are quantized. DequantizeByteArrayCalculator can do the dequantization +// for you. +// +// Example config: +// node { +// calculator: "UnpackYt8mSequenceExampleCalculator" +// input_side_packet: "YT8M_SEQUENCE_EXAMPLE:yt8m_sequence_example" +// output_stream: "QUANTIZED_RGB_FEATURE:quantized_rgb_feature" +// output_stream: "QUANTIZED_AUDIO_FEATURE:quantized_audio_feature" +// } +class UnpackYt8mSequenceExampleCalculator : public CalculatorBase { + public: + static ::mediapipe::Status GetContract(CalculatorContract* cc) { + cc->InputSidePackets() + .Tag(kYt8mSequenceExample) + .Set(); + if (cc->InputSidePackets().HasTag(kDesiredSegmentSize)) { + cc->InputSidePackets().Tag(kDesiredSegmentSize).Set(); + } + cc->Outputs().Tag(kQuantizedRgbFeature).Set(); + cc->Outputs().Tag(kQuantizedAudioFeature).Set(); + if (cc->OutputSidePackets().HasTag(kYt8mId)) { + cc->OutputSidePackets().Tag(kYt8mId).Set(); + } + if (cc->OutputSidePackets().HasTag(kLappedTensorBufferCalculatorOptions)) { + cc->OutputSidePackets() + .Tag(kLappedTensorBufferCalculatorOptions) + .Set<::mediapipe::LappedTensorBufferCalculatorOptions>(); + } + if (cc->OutputSidePackets().HasTag(kSegmentSize)) { + cc->OutputSidePackets().Tag(kSegmentSize).Set(); + } + return ::mediapipe::OkStatus(); + } + + ::mediapipe::Status Open(CalculatorContext* cc) override { + const tensorflow::SequenceExample& sequence_example = + cc->InputSidePackets() + .Tag(kYt8mSequenceExample) + .Get(); + const std::string& yt8m_id = + sequence_example.context().feature().at(kId).bytes_list().value().Get( + 0); + if (cc->OutputSidePackets().HasTag(kYt8mId)) { + cc->OutputSidePackets().Tag(kYt8mId).Set( + MakePacket(yt8m_id)); + } + + int rgb_feature_list_length = + sequence_example.feature_lists().feature_list().at(kRgb).feature_size(); + int audio_feature_list_length = sequence_example.feature_lists() + .feature_list() + .at(kAudio) + .feature_size(); + + if (rgb_feature_list_length != audio_feature_list_length) { + return ::mediapipe::FailedPreconditionError(absl::StrCat( + "Data corruption: the length of audio features and rgb features are " + "not equal. Please check the sequence example that contains yt8m " + "id: ", + yt8m_id)); + } + feature_list_length_ = rgb_feature_list_length; + if (cc->OutputSidePackets().HasTag(kLappedTensorBufferCalculatorOptions) || + cc->OutputSidePackets().HasTag(kSegmentSize)) { + // If the desired segment size is specified, take the min of the length of + // the feature list and the desired size to be the output segment size. + int segment_size = feature_list_length_; + if (cc->InputSidePackets().HasTag(kDesiredSegmentSize)) { + int desired_segment_size = + cc->InputSidePackets().Tag(kDesiredSegmentSize).Get(); + RET_CHECK(desired_segment_size > 0) + << "The desired segment size must be greater than zero."; + segment_size = std::min( + feature_list_length_, + cc->InputSidePackets().Tag(kDesiredSegmentSize).Get()); + } + if (cc->OutputSidePackets().HasTag( + kLappedTensorBufferCalculatorOptions)) { + auto lapped_tensor_buffer_calculator_options = absl::make_unique< + ::mediapipe::LappedTensorBufferCalculatorOptions>(); + lapped_tensor_buffer_calculator_options->set_add_batch_dim_to_tensors( + true); + lapped_tensor_buffer_calculator_options->set_buffer_size(segment_size); + lapped_tensor_buffer_calculator_options->set_overlap(segment_size - 1); + lapped_tensor_buffer_calculator_options->set_timestamp_offset( + segment_size - 1); + cc->OutputSidePackets() + .Tag(kLappedTensorBufferCalculatorOptions) + .Set(Adopt(lapped_tensor_buffer_calculator_options.release())); + } + if (cc->OutputSidePackets().HasTag(kSegmentSize)) { + cc->OutputSidePackets() + .Tag(kSegmentSize) + .Set(MakePacket(segment_size)); + } + } + LOG(INFO) << "Reading the sequence example that contains yt8m id: " + << yt8m_id << ". Feature list length: " << feature_list_length_; + return ::mediapipe::OkStatus(); + } + + ::mediapipe::Status Process(CalculatorContext* cc) override { + if (current_index_ >= feature_list_length_) { + return ::mediapipe::tool::StatusStop(); + } + const tensorflow::SequenceExample& sequence_example = + cc->InputSidePackets() + .Tag(kYt8mSequenceExample) + .Get(); + + // Uses microsecond as the unit of time. In the YT8M dataset, each feature + // represents a second. + const Timestamp timestamp = Timestamp(current_index_ * 1000000); + cc->Outputs() + .Tag(kQuantizedRgbFeature) + .AddPacket( + MakePacket( + GetQuantizedFeature(sequence_example, kRgb, current_index_)) + .At(timestamp)); + cc->Outputs() + .Tag(kQuantizedAudioFeature) + .AddPacket( + MakePacket( + GetQuantizedFeature(sequence_example, kAudio, current_index_)) + .At(timestamp)); + ++current_index_; + return ::mediapipe::OkStatus(); + } + + private: + int current_index_ = 0; + int feature_list_length_ = 0; +}; + +REGISTER_CALCULATOR(UnpackYt8mSequenceExampleCalculator); + +} // namespace mediapipe diff --git a/mediapipe/calculators/tensorflow/vector_float_to_tensor_calculator.cc b/mediapipe/calculators/tensorflow/vector_float_to_tensor_calculator.cc index a96e39918..068be5714 100644 --- a/mediapipe/calculators/tensorflow/vector_float_to_tensor_calculator.cc +++ b/mediapipe/calculators/tensorflow/vector_float_to_tensor_calculator.cc @@ -23,10 +23,12 @@ namespace mediapipe { -namespace tf = ::tensorflow; - +namespace { auto& INPUT_1D = VectorFloatToTensorCalculatorOptions::INPUT_1D; auto& INPUT_2D = VectorFloatToTensorCalculatorOptions::INPUT_2D; +} // namespace + +namespace tf = ::tensorflow; // The calculator expects one input (a packet containing a vector or // vector>) and generates one output (a packet containing a diff --git a/mediapipe/calculators/tensorflow/vector_int_to_tensor_calculator.cc b/mediapipe/calculators/tensorflow/vector_int_to_tensor_calculator.cc new file mode 100644 index 000000000..1269e2761 --- /dev/null +++ b/mediapipe/calculators/tensorflow/vector_int_to_tensor_calculator.cc @@ -0,0 +1,203 @@ +// Copyright 2019 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Converts a single int or vector or vector> to 1D (or 2D) +// tf::Tensor. + +#include "mediapipe/calculators/tensorflow/vector_int_to_tensor_calculator_options.pb.h" +#include "mediapipe/framework/calculator_framework.h" +#include "mediapipe/framework/port/ret_check.h" +#include "mediapipe/framework/port/status.h" +#include "tensorflow/core/framework/tensor.h" +#include "tensorflow/core/framework/types.h" + +namespace mediapipe { + +const char kVectorInt[] = "VECTOR_INT"; +const char kSingleInt[] = "SINGLE_INT"; +const char kTensorOut[] = "TENSOR_OUT"; + +namespace { +auto& INPUT_1D = VectorIntToTensorCalculatorOptions::INPUT_1D; +auto& INPUT_2D = VectorIntToTensorCalculatorOptions::INPUT_2D; +} // namespace + +namespace tf = ::tensorflow; + +template +void AssignMatrixValue(int r, int c, int value, tf::Tensor* output_tensor) { + output_tensor->tensor()(r, c) = value; +} + +// The calculator expects one input (a packet containing a single int or +// vector or vector>) and generates one output (a packet +// containing a tf::Tensor containing the same data). The output tensor will be +// either 1D or 2D with dimensions corresponding to the input vector int. It +// will hold DT_INT32 or DT_UINT8 or DT_INT64 values. +// +// Example config: +// node { +// calculator: "VectorIntToTensorCalculator" +// input_stream: "SINGLE_INT:segment_size_int_stream" +// output_stream: "TENSOR_OUT:segment_size_tensor" +// } +// +// or +// +// node { +// calculator: "VectorIntToTensorCalculator" +// input_stream: "VECTOR_INT:vector_int_features" +// output_stream: "TENSOR_OUT:tensor_features" +// } +class VectorIntToTensorCalculator : public CalculatorBase { + public: + static ::mediapipe::Status GetContract(CalculatorContract* cc); + + ::mediapipe::Status Open(CalculatorContext* cc) override; + ::mediapipe::Status Process(CalculatorContext* cc) override; + + private: + VectorIntToTensorCalculatorOptions options_; +}; +REGISTER_CALCULATOR(VectorIntToTensorCalculator); + +::mediapipe::Status VectorIntToTensorCalculator::GetContract( + CalculatorContract* cc) { + const auto& options = cc->Options(); + // Start with only one input packet. + RET_CHECK_EQ(cc->Inputs().NumEntries(), 1) + << "Only one input stream is supported."; + if (options.input_size() == INPUT_2D) { + cc->Inputs().Tag(kVectorInt).Set>>(); + } else if (options.input_size() == INPUT_1D) { + if (cc->Inputs().HasTag(kSingleInt)) { + cc->Inputs().Tag(kSingleInt).Set(); + } else { + cc->Inputs().Tag(kVectorInt).Set>(); + } + } else { + LOG(FATAL) << "input size not supported"; + } + RET_CHECK_EQ(cc->Outputs().NumEntries(), 1) + << "Only one output stream is supported."; + cc->Outputs().Tag(kTensorOut).Set(); + return ::mediapipe::OkStatus(); +} + +::mediapipe::Status VectorIntToTensorCalculator::Open(CalculatorContext* cc) { + options_ = cc->Options(); + RET_CHECK(options_.tensor_data_type() == tf::DT_UINT8 || + options_.tensor_data_type() == tf::DT_INT32 || + options_.tensor_data_type() == tf::DT_INT64) + << "Output tensor data type is not supported."; + return ::mediapipe::OkStatus(); +} + +::mediapipe::Status VectorIntToTensorCalculator::Process( + CalculatorContext* cc) { + tf::TensorShape tensor_shape; + if (options_.input_size() == INPUT_2D) { + const std::vector>& input = + cc->Inputs() + .Tag(kVectorInt) + .Value() + .Get>>(); + + const int32 rows = input.size(); + CHECK_GE(rows, 1); + const int32 cols = input[0].size(); + CHECK_GE(cols, 1); + for (int i = 1; i < rows; ++i) { + CHECK_EQ(input[i].size(), cols); + } + if (options_.transpose()) { + tensor_shape = tf::TensorShape({cols, rows}); + } else { + tensor_shape = tf::TensorShape({rows, cols}); + } + auto output = ::absl::make_unique(options_.tensor_data_type(), + tensor_shape); + if (options_.transpose()) { + for (int r = 0; r < rows; ++r) { + for (int c = 0; c < cols; ++c) { + switch (options_.tensor_data_type()) { + case tf::DT_INT64: + AssignMatrixValue(c, r, input[r][c], output.get()); + break; + case tf::DT_UINT8: + AssignMatrixValue(c, r, input[r][c], output.get()); + break; + case tf::DT_INT32: + AssignMatrixValue(c, r, input[r][c], output.get()); + break; + default: + LOG(FATAL) << "tensor data type is not supported."; + } + } + } + } else { + for (int r = 0; r < rows; ++r) { + for (int c = 0; c < cols; ++c) { + switch (options_.tensor_data_type()) { + case tf::DT_INT64: + AssignMatrixValue(r, c, input[r][c], output.get()); + break; + case tf::DT_UINT8: + AssignMatrixValue(r, c, input[r][c], output.get()); + break; + case tf::DT_INT32: + AssignMatrixValue(r, c, input[r][c], output.get()); + break; + default: + LOG(FATAL) << "tensor data type is not supported."; + } + } + } + } + cc->Outputs().Tag(kTensorOut).Add(output.release(), cc->InputTimestamp()); + } else if (options_.input_size() == INPUT_1D) { + std::vector input; + if (cc->Inputs().HasTag(kSingleInt)) { + input.push_back(cc->Inputs().Tag(kSingleInt).Get()); + } else { + input = cc->Inputs().Tag(kVectorInt).Value().Get>(); + } + CHECK_GE(input.size(), 1); + const int32 length = input.size(); + tensor_shape = tf::TensorShape({length}); + auto output = ::absl::make_unique(options_.tensor_data_type(), + tensor_shape); + for (int i = 0; i < length; ++i) { + switch (options_.tensor_data_type()) { + case tf::DT_INT64: + output->tensor()(i) = input.at(i); + break; + case tf::DT_UINT8: + output->tensor()(i) = input.at(i); + break; + case tf::DT_INT32: + output->tensor()(i) = input.at(i); + break; + default: + LOG(FATAL) << "tensor data type is not supported."; + } + } + cc->Outputs().Tag(kTensorOut).Add(output.release(), cc->InputTimestamp()); + } else { + LOG(FATAL) << "input size not supported"; + } + return ::mediapipe::OkStatus(); +} + +} // namespace mediapipe diff --git a/mediapipe/calculators/tensorflow/vector_int_to_tensor_calculator_options.proto b/mediapipe/calculators/tensorflow/vector_int_to_tensor_calculator_options.proto new file mode 100644 index 000000000..65554bb14 --- /dev/null +++ b/mediapipe/calculators/tensorflow/vector_int_to_tensor_calculator_options.proto @@ -0,0 +1,43 @@ +// Copyright 2019 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +syntax = "proto2"; + +package mediapipe; + +import "mediapipe/framework/calculator.proto"; +import "tensorflow/core/framework/types.proto"; + +message VectorIntToTensorCalculatorOptions { + extend mediapipe.CalculatorOptions { + optional VectorIntToTensorCalculatorOptions ext = 275364184; + } + enum InputSize { + UNKNOWN = 0; + INPUT_1D = 1; + INPUT_2D = 2; + } + + // If input_size is INPUT_2D, unpack a vector> to a + // 2d tensor (matrix). If INPUT_1D, convert a single int or vector + // into a 1d tensor (vector). + optional InputSize input_size = 1 [default = INPUT_1D]; + + // If true, the output tensor is transposed. + // Otherwise, the output tensor is not transposed. + // It will be ignored if tensor_is_2d is INPUT_1D. + optional bool transpose = 2 [default = false]; + + optional tensorflow.DataType tensor_data_type = 3 [default = DT_INT32]; +} diff --git a/mediapipe/calculators/tensorflow/vector_int_to_tensor_calculator_test.cc b/mediapipe/calculators/tensorflow/vector_int_to_tensor_calculator_test.cc new file mode 100644 index 000000000..052a78516 --- /dev/null +++ b/mediapipe/calculators/tensorflow/vector_int_to_tensor_calculator_test.cc @@ -0,0 +1,202 @@ +// Copyright 2018 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "mediapipe/calculators/tensorflow/vector_int_to_tensor_calculator_options.pb.h" +#include "mediapipe/framework/calculator_framework.h" +#include "mediapipe/framework/calculator_runner.h" +#include "mediapipe/framework/port/gmock.h" +#include "mediapipe/framework/port/gtest.h" +#include "tensorflow/core/framework/tensor.h" +#include "tensorflow/core/framework/types.pb.h" + +namespace mediapipe { + +namespace { + +namespace tf = ::tensorflow; + +class VectorIntToTensorCalculatorTest : public ::testing::Test { + protected: + void SetUpRunner( + const VectorIntToTensorCalculatorOptions::InputSize input_size, + const tensorflow::DataType tensor_data_type, const bool transpose, + const bool single_value) { + CalculatorGraphConfig::Node config; + config.set_calculator("VectorIntToTensorCalculator"); + if (single_value) { + config.add_input_stream("SINGLE_INT:input_int"); + } else { + config.add_input_stream("VECTOR_INT:input_int"); + } + config.add_output_stream("TENSOR_OUT:output_tensor"); + auto options = config.mutable_options()->MutableExtension( + VectorIntToTensorCalculatorOptions::ext); + options->set_input_size(input_size); + options->set_transpose(transpose); + options->set_tensor_data_type(tensor_data_type); + runner_ = ::absl::make_unique(config); + } + + void TestConvertFromVectoVectorInt(const bool transpose) { + SetUpRunner(VectorIntToTensorCalculatorOptions::INPUT_2D, + tensorflow::DT_INT32, transpose, false); + auto input = ::absl::make_unique>>( + 2, std::vector(2)); + for (int i = 0; i < 2; ++i) { + for (int j = 0; j < 2; ++j) { + input->at(i).at(j) = i * 2 + j; + } + } + + const int64 time = 1234; + runner_->MutableInputs() + ->Tag("VECTOR_INT") + .packets.push_back(Adopt(input.release()).At(Timestamp(time))); + + EXPECT_TRUE(runner_->Run().ok()); + + const std::vector& output_packets = + runner_->Outputs().Tag("TENSOR_OUT").packets; + EXPECT_EQ(1, output_packets.size()); + EXPECT_EQ(time, output_packets[0].Timestamp().Value()); + const tf::Tensor& output_tensor = output_packets[0].Get(); + + EXPECT_EQ(2, output_tensor.dims()); + EXPECT_EQ(tf::DT_INT32, output_tensor.dtype()); + const auto matrix = output_tensor.matrix(); + + for (int i = 0; i < 2; ++i) { + for (int j = 0; j < 2; ++j) { + if (!transpose) { + EXPECT_EQ(i * 2 + j, matrix(i, j)); + } else { + EXPECT_EQ(j * 2 + i, matrix(i, j)); + } + } + } + } + + std::unique_ptr runner_; +}; + +TEST_F(VectorIntToTensorCalculatorTest, TestSingleValue) { + SetUpRunner(VectorIntToTensorCalculatorOptions::INPUT_1D, + tensorflow::DT_INT32, false, true); + const int64 time = 1234; + runner_->MutableInputs() + ->Tag("SINGLE_INT") + .packets.push_back(MakePacket(1).At(Timestamp(time))); + + EXPECT_TRUE(runner_->Run().ok()); + + const std::vector& output_packets = + runner_->Outputs().Tag("TENSOR_OUT").packets; + EXPECT_EQ(1, output_packets.size()); + EXPECT_EQ(time, output_packets[0].Timestamp().Value()); + const tf::Tensor& output_tensor = output_packets[0].Get(); + + EXPECT_EQ(1, output_tensor.dims()); + EXPECT_EQ(tf::DT_INT32, output_tensor.dtype()); + const auto vec = output_tensor.vec(); + EXPECT_EQ(1, vec(0)); +} + +TEST_F(VectorIntToTensorCalculatorTest, TesOneDim) { + SetUpRunner(VectorIntToTensorCalculatorOptions::INPUT_1D, + tensorflow::DT_INT32, false, false); + auto input = ::absl::make_unique>(5); + for (int i = 0; i < 5; ++i) { + input->at(i) = i; + } + const int64 time = 1234; + runner_->MutableInputs() + ->Tag("VECTOR_INT") + .packets.push_back(Adopt(input.release()).At(Timestamp(time))); + + EXPECT_TRUE(runner_->Run().ok()); + + const std::vector& output_packets = + runner_->Outputs().Tag("TENSOR_OUT").packets; + EXPECT_EQ(1, output_packets.size()); + EXPECT_EQ(time, output_packets[0].Timestamp().Value()); + const tf::Tensor& output_tensor = output_packets[0].Get(); + + EXPECT_EQ(1, output_tensor.dims()); + EXPECT_EQ(tf::DT_INT32, output_tensor.dtype()); + const auto vec = output_tensor.vec(); + + for (int i = 0; i < 5; ++i) { + EXPECT_EQ(i, vec(i)); + } +} + +TEST_F(VectorIntToTensorCalculatorTest, TestTwoDims) { + for (bool transpose : {false, true}) { + TestConvertFromVectoVectorInt(transpose); + } +} + +TEST_F(VectorIntToTensorCalculatorTest, TestInt64) { + SetUpRunner(VectorIntToTensorCalculatorOptions::INPUT_1D, + tensorflow::DT_INT64, false, true); + const int64 time = 1234; + runner_->MutableInputs() + ->Tag("SINGLE_INT") + .packets.push_back(MakePacket(2 ^ 31).At(Timestamp(time))); + + EXPECT_TRUE(runner_->Run().ok()); + + const std::vector& output_packets = + runner_->Outputs().Tag("TENSOR_OUT").packets; + EXPECT_EQ(1, output_packets.size()); + EXPECT_EQ(time, output_packets[0].Timestamp().Value()); + const tf::Tensor& output_tensor = output_packets[0].Get(); + + EXPECT_EQ(1, output_tensor.dims()); + EXPECT_EQ(tf::DT_INT64, output_tensor.dtype()); + const auto vec = output_tensor.vec(); + EXPECT_EQ(2 ^ 31, vec(0)); +} + +TEST_F(VectorIntToTensorCalculatorTest, TestUint8) { + SetUpRunner(VectorIntToTensorCalculatorOptions::INPUT_1D, + tensorflow::DT_UINT8, false, false); + auto input = ::absl::make_unique>(5); + for (int i = 0; i < 5; ++i) { + input->at(i) = i; + } + const int64 time = 1234; + runner_->MutableInputs() + ->Tag("VECTOR_INT") + .packets.push_back(Adopt(input.release()).At(Timestamp(time))); + + EXPECT_TRUE(runner_->Run().ok()); + + const std::vector& output_packets = + runner_->Outputs().Tag("TENSOR_OUT").packets; + EXPECT_EQ(1, output_packets.size()); + EXPECT_EQ(time, output_packets[0].Timestamp().Value()); + const tf::Tensor& output_tensor = output_packets[0].Get(); + + EXPECT_EQ(1, output_tensor.dims()); + EXPECT_EQ(tf::DT_UINT8, output_tensor.dtype()); + const auto vec = output_tensor.vec(); + + for (int i = 0; i < 5; ++i) { + EXPECT_EQ(i, vec(i)); + } +} + +} // namespace +} // namespace mediapipe diff --git a/mediapipe/calculators/tflite/tflite_converter_calculator.cc b/mediapipe/calculators/tflite/tflite_converter_calculator.cc index 598ae4965..a9dccaed8 100644 --- a/mediapipe/calculators/tflite/tflite_converter_calculator.cc +++ b/mediapipe/calculators/tflite/tflite_converter_calculator.cc @@ -25,7 +25,8 @@ #include "tensorflow/lite/error_reporter.h" #include "tensorflow/lite/interpreter.h" -#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__APPLE__) +#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__EMSCRIPTEN__) && \ + !defined(__APPLE__) #include "mediapipe/gpu/gl_calculator_helper.h" #include "mediapipe/gpu/gpu_buffer.h" #include "tensorflow/lite/delegates/gpu/gl/gl_buffer.h" @@ -45,7 +46,8 @@ #include "tensorflow/lite/delegates/gpu/metal_delegate.h" #endif // iOS -#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__APPLE__) +#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__EMSCRIPTEN__) && \ + !defined(__APPLE__) typedef ::tflite::gpu::gl::GlBuffer GpuTensor; #elif defined(__APPLE__) && !TARGET_OS_OSX // iOS typedef id GpuTensor; @@ -67,7 +69,8 @@ typedef Eigen::Matrix namespace mediapipe { -#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__APPLE__) +#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__EMSCRIPTEN__) && \ + !defined(__APPLE__) using ::tflite::gpu::gl::CreateReadWriteShaderStorageBuffer; using ::tflite::gpu::gl::GlProgram; using ::tflite::gpu::gl::GlShader; @@ -146,7 +149,8 @@ class TfLiteConverterCalculator : public CalculatorBase { std::unique_ptr interpreter_ = nullptr; -#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__APPLE__) +#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__EMSCRIPTEN__) && \ + !defined(__APPLE__) mediapipe::GlCalculatorHelper gpu_helper_; std::unique_ptr gpu_data_out_; #elif defined(__APPLE__) && !TARGET_OS_OSX // iOS @@ -181,7 +185,7 @@ REGISTER_CALCULATOR(TfLiteConverterCalculator); if (cc->Inputs().HasTag("IMAGE")) cc->Inputs().Tag("IMAGE").Set(); if (cc->Inputs().HasTag("MATRIX")) cc->Inputs().Tag("MATRIX").Set(); -#if !defined(MEDIAPIPE_DISABLE_GPU) +#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__EMSCRIPTEN__) if (cc->Inputs().HasTag("IMAGE_GPU")) { cc->Inputs().Tag("IMAGE_GPU").Set(); use_gpu |= true; @@ -190,7 +194,7 @@ REGISTER_CALCULATOR(TfLiteConverterCalculator); if (cc->Outputs().HasTag("TENSORS")) cc->Outputs().Tag("TENSORS").Set>(); -#if !defined(MEDIAPIPE_DISABLE_GPU) +#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__EMSCRIPTEN__) if (cc->Outputs().HasTag("TENSORS_GPU")) { cc->Outputs().Tag("TENSORS_GPU").Set>(); use_gpu |= true; @@ -198,7 +202,8 @@ REGISTER_CALCULATOR(TfLiteConverterCalculator); #endif // !MEDIAPIPE_DISABLE_GPU if (use_gpu) { -#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__APPLE__) +#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__EMSCRIPTEN__) && \ + !defined(__APPLE__) MP_RETURN_IF_ERROR(mediapipe::GlCalculatorHelper::UpdateContract(cc)); #elif defined(__APPLE__) && !TARGET_OS_OSX // iOS MP_RETURN_IF_ERROR([MPPMetalHelper updateContract:cc]); @@ -218,7 +223,7 @@ REGISTER_CALCULATOR(TfLiteConverterCalculator); if (cc->Inputs().HasTag("IMAGE_GPU") || cc->Outputs().HasTag("IMAGE_OUT_GPU")) { -#if !defined(MEDIAPIPE_DISABLE_GPU) +#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__EMSCRIPTEN__) use_gpu_ = true; #else RET_CHECK_FAIL() << "GPU processing not enabled."; @@ -231,7 +236,8 @@ REGISTER_CALCULATOR(TfLiteConverterCalculator); cc->Outputs().HasTag("TENSORS_GPU")); // Cannot use quantization. use_quantized_tensors_ = false; -#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__APPLE__) +#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__EMSCRIPTEN__) && \ + !defined(__APPLE__) MP_RETURN_IF_ERROR(gpu_helper_.Open(cc)); #elif defined(__APPLE__) && !TARGET_OS_OSX // iOS gpu_helper_ = [[MPPMetalHelper alloc] initWithCalculatorContext:cc]; @@ -264,7 +270,8 @@ REGISTER_CALCULATOR(TfLiteConverterCalculator); } ::mediapipe::Status TfLiteConverterCalculator::Close(CalculatorContext* cc) { -#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__APPLE__) +#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__EMSCRIPTEN__) && \ + !defined(__APPLE__) gpu_helper_.RunInGlContext([this] { gpu_data_out_.reset(); }); #endif #if defined(__APPLE__) && !TARGET_OS_OSX // iOS @@ -383,7 +390,8 @@ REGISTER_CALCULATOR(TfLiteConverterCalculator); ::mediapipe::Status TfLiteConverterCalculator::ProcessGPU( CalculatorContext* cc) { -#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__APPLE__) +#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__EMSCRIPTEN__) && \ + !defined(__APPLE__) // GpuBuffer to tflite::gpu::GlBuffer conversion. const auto& input = cc->Inputs().Tag("IMAGE_GPU").Get(); MP_RETURN_IF_ERROR( @@ -468,7 +476,7 @@ REGISTER_CALCULATOR(TfLiteConverterCalculator); } ::mediapipe::Status TfLiteConverterCalculator::InitGpu(CalculatorContext* cc) { -#if !defined(MEDIAPIPE_DISABLE_GPU) +#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__EMSCRIPTEN__) // Get input image sizes. const auto& input = cc->Inputs().Tag("IMAGE_GPU").Get(); mediapipe::ImageFormat::Format format = @@ -485,7 +493,8 @@ REGISTER_CALCULATOR(TfLiteConverterCalculator); RET_CHECK_FAIL() << "Num input channels is less than desired output."; #endif // !MEDIAPIPE_DISABLE_GPU -#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__APPLE__) +#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__EMSCRIPTEN__) && \ + !defined(__APPLE__) MP_RETURN_IF_ERROR(gpu_helper_.RunInGlContext( [this, &include_alpha, &input, &single_channel]() -> ::mediapipe::Status { // Device memory. diff --git a/mediapipe/calculators/tflite/tflite_inference_calculator.cc b/mediapipe/calculators/tflite/tflite_inference_calculator.cc index 9bc02b48c..ebd632df9 100644 --- a/mediapipe/calculators/tflite/tflite_inference_calculator.cc +++ b/mediapipe/calculators/tflite/tflite_inference_calculator.cc @@ -27,7 +27,8 @@ #include "tensorflow/lite/kernels/register.h" #include "tensorflow/lite/model.h" -#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__APPLE__) +#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__EMSCRIPTEN__) && \ + !defined(__APPLE__) #include "mediapipe/gpu/gl_calculator_helper.h" #include "mediapipe/gpu/gpu_buffer.h" #include "tensorflow/lite/delegates/gpu/common/shape.h" @@ -52,7 +53,8 @@ namespace { -#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__APPLE__) +#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__EMSCRIPTEN__) && \ + !defined(__APPLE__) typedef ::tflite::gpu::gl::GlBuffer GpuTensor; #elif defined(__APPLE__) && !TARGET_OS_OSX // iOS typedef id GpuTensor; @@ -68,13 +70,14 @@ size_t RoundUp(size_t n, size_t m) { return ((n + m - 1) / m) * m; } // NOLINT // * Aux namespace mediapipe { -#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__APPLE__) +#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__EMSCRIPTEN__) && \ + !defined(__APPLE__) using ::tflite::gpu::gl::CopyBuffer; using ::tflite::gpu::gl::CreateReadWriteShaderStorageBuffer; using ::tflite::gpu::gl::GlBuffer; #endif -#if !defined(MEDIAPIPE_DISABLE_GPU) +#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__EMSCRIPTEN__) struct GPUData { int elements = 1; GpuTensor buffer; @@ -147,7 +150,8 @@ class TfLiteInferenceCalculator : public CalculatorBase { std::unique_ptr model_; TfLiteDelegate* delegate_ = nullptr; -#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__APPLE__) +#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__EMSCRIPTEN__) && \ + !defined(__APPLE__) mediapipe::GlCalculatorHelper gpu_helper_; std::unique_ptr gpu_data_in_; std::vector> gpu_data_out_; @@ -179,7 +183,7 @@ REGISTER_CALCULATOR(TfLiteInferenceCalculator); if (cc->Inputs().HasTag("TENSORS")) cc->Inputs().Tag("TENSORS").Set>(); -#if !defined(MEDIAPIPE_DISABLE_GPU) +#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__EMSCRIPTEN__) if (cc->Inputs().HasTag("TENSORS_GPU")) { cc->Inputs().Tag("TENSORS_GPU").Set>(); use_gpu |= true; @@ -188,7 +192,7 @@ REGISTER_CALCULATOR(TfLiteInferenceCalculator); if (cc->Outputs().HasTag("TENSORS")) cc->Outputs().Tag("TENSORS").Set>(); -#if !defined(MEDIAPIPE_DISABLE_GPU) +#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__EMSCRIPTEN__) if (cc->Outputs().HasTag("TENSORS_GPU")) { cc->Outputs().Tag("TENSORS_GPU").Set>(); use_gpu |= true; @@ -206,7 +210,8 @@ REGISTER_CALCULATOR(TfLiteInferenceCalculator); use_gpu |= options.use_gpu(); if (use_gpu) { -#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__APPLE__) +#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__EMSCRIPTEN__) && \ + !defined(__APPLE__) MP_RETURN_IF_ERROR(mediapipe::GlCalculatorHelper::UpdateContract(cc)); #elif defined(__APPLE__) && !TARGET_OS_OSX // iOS MP_RETURN_IF_ERROR([MPPMetalHelper updateContract:cc]); @@ -225,7 +230,7 @@ REGISTER_CALCULATOR(TfLiteInferenceCalculator); MP_RETURN_IF_ERROR(LoadOptions(cc)); if (cc->Inputs().HasTag("TENSORS_GPU")) { -#if !defined(MEDIAPIPE_DISABLE_GPU) +#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__EMSCRIPTEN__) gpu_input_ = true; gpu_inference_ = true; // Inference must be on GPU also. #else @@ -235,7 +240,7 @@ REGISTER_CALCULATOR(TfLiteInferenceCalculator); } if (cc->Outputs().HasTag("TENSORS_GPU")) { -#if !defined(MEDIAPIPE_DISABLE_GPU) +#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__EMSCRIPTEN__) gpu_output_ = true; RET_CHECK(cc->Inputs().HasTag("TENSORS_GPU")) << "GPU output must also have GPU Input."; @@ -248,13 +253,15 @@ REGISTER_CALCULATOR(TfLiteInferenceCalculator); MP_RETURN_IF_ERROR(LoadModel(cc)); if (gpu_inference_) { -#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__APPLE__) +#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__EMSCRIPTEN__) && \ + !defined(__APPLE__) MP_RETURN_IF_ERROR(gpu_helper_.Open(cc)); #elif defined(__APPLE__) && !TARGET_OS_OSX // iOS gpu_helper_ = [[MPPMetalHelper alloc] initWithCalculatorContext:cc]; RET_CHECK(gpu_helper_); #endif -#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__APPLE__) +#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__EMSCRIPTEN__) && \ + !defined(__APPLE__) MP_RETURN_IF_ERROR(gpu_helper_.RunInGlContext( [this, &cc]() -> ::mediapipe::Status { return LoadDelegate(cc); })); #else @@ -262,6 +269,10 @@ REGISTER_CALCULATOR(TfLiteInferenceCalculator); #endif } +#if defined(__EMSCRIPTEN__) + MP_RETURN_IF_ERROR(LoadDelegate(cc)); +#endif // __EMSCRIPTEN__ + return ::mediapipe::OkStatus(); } @@ -269,7 +280,8 @@ REGISTER_CALCULATOR(TfLiteInferenceCalculator); // 1. Receive pre-processed tensor inputs. if (gpu_input_) { // Read GPU input into SSBO. -#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__APPLE__) +#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__EMSCRIPTEN__) && \ + !defined(__APPLE__) const auto& input_tensors = cc->Inputs().Tag("TENSORS_GPU").Get>(); RET_CHECK_EQ(input_tensors.size(), 1); @@ -315,7 +327,8 @@ REGISTER_CALCULATOR(TfLiteInferenceCalculator); // 2. Run inference. if (gpu_inference_) { -#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__APPLE__) +#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__EMSCRIPTEN__) && \ + !defined(__APPLE__) MP_RETURN_IF_ERROR( gpu_helper_.RunInGlContext([this]() -> ::mediapipe::Status { RET_CHECK_EQ(interpreter_->Invoke(), kTfLiteOk); @@ -330,7 +343,8 @@ REGISTER_CALCULATOR(TfLiteInferenceCalculator); // 3. Output processed tensors. if (gpu_output_) { -#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__APPLE__) +#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__EMSCRIPTEN__) && \ + !defined(__APPLE__) // Output result tensors (GPU). auto output_tensors = absl::make_unique>(); MP_RETURN_IF_ERROR(gpu_helper_.RunInGlContext( @@ -392,7 +406,8 @@ REGISTER_CALCULATOR(TfLiteInferenceCalculator); ::mediapipe::Status TfLiteInferenceCalculator::Close(CalculatorContext* cc) { if (delegate_) { -#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__APPLE__) +#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__EMSCRIPTEN__) && \ + !defined(__APPLE__) MP_RETURN_IF_ERROR(gpu_helper_.RunInGlContext([this]() -> Status { TfLiteGpuDelegateDelete(delegate_); gpu_data_in_.reset(); @@ -456,6 +471,10 @@ REGISTER_CALCULATOR(TfLiteInferenceCalculator); RET_CHECK(interpreter_); +#if defined(__EMSCRIPTEN__) + interpreter_->SetNumThreads(1); +#endif // __EMSCRIPTEN__ + if (gpu_output_) { use_quantized_tensors_ = false; } else { @@ -471,7 +490,8 @@ REGISTER_CALCULATOR(TfLiteInferenceCalculator); ::mediapipe::Status TfLiteInferenceCalculator::LoadDelegate( CalculatorContext* cc) { -#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__APPLE__) +#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__EMSCRIPTEN__) && \ + !defined(__APPLE__) // Configure and create the delegate. TfLiteGpuDelegateOptions options = TfLiteGpuDelegateOptionsDefault(); options.compile_options.precision_loss_allowed = 1; diff --git a/mediapipe/calculators/tflite/tflite_tensors_to_classification_calculator.cc b/mediapipe/calculators/tflite/tflite_tensors_to_classification_calculator.cc index 5e9e9988e..906b4242f 100644 --- a/mediapipe/calculators/tflite/tflite_tensors_to_classification_calculator.cc +++ b/mediapipe/calculators/tflite/tflite_tensors_to_classification_calculator.cc @@ -24,7 +24,8 @@ #include "mediapipe/framework/port/ret_check.h" #include "mediapipe/util/resource_util.h" #include "tensorflow/lite/interpreter.h" -#if defined(__ANDROID__) || (defined(__APPLE__) && !TARGET_OS_OSX) +#if defined(__EMSCRIPTEN__) || defined(__ANDROID__) || \ + (defined(__APPLE__) && !TARGET_OS_OSX) #include "mediapipe/util/android/file/base/file.h" #include "mediapipe/util/android/file/base/helpers.h" #else @@ -66,8 +67,8 @@ class TfLiteTensorsToClassificationCalculator : public CalculatorBase { ::mediapipe::Status Close(CalculatorContext* cc) override; private: + ::mediapipe::TfLiteTensorsToClassificationCalculatorOptions options_; int top_k_ = 0; - double min_score_threshold_ = 0; std::unordered_map label_map_; bool label_map_loaded_ = false; }; @@ -93,15 +94,14 @@ REGISTER_CALCULATOR(TfLiteTensorsToClassificationCalculator); CalculatorContext* cc) { cc->SetOffset(TimestampDiff(0)); - auto options = cc->Options< + options_ = cc->Options< ::mediapipe::TfLiteTensorsToClassificationCalculatorOptions>(); - top_k_ = options.top_k(); - min_score_threshold_ = options.min_score_threshold(); - if (options.has_label_map_path()) { + top_k_ = options_.top_k(); + if (options_.has_label_map_path()) { std::string string_path; ASSIGN_OR_RETURN(string_path, - PathToResourceAsFile(options.label_map_path())); + PathToResourceAsFile(options_.label_map_path())); std::string label_map_string; MP_RETURN_IF_ERROR(file::GetContents(string_path, &label_map_string)); @@ -125,9 +125,11 @@ REGISTER_CALCULATOR(TfLiteTensorsToClassificationCalculator); RET_CHECK_EQ(input_tensors.size(), 1); const TfLiteTensor* raw_score_tensor = &input_tensors[0]; - RET_CHECK_EQ(raw_score_tensor->dims->size, 2); - RET_CHECK_EQ(raw_score_tensor->dims->data[0], 1); - int num_classes = raw_score_tensor->dims->data[1]; + int num_classes = 1; + for (int i = 0; i < raw_score_tensor->dims->size; ++i) { + num_classes *= raw_score_tensor->dims->data[i]; + } + if (label_map_loaded_) { RET_CHECK_EQ(num_classes, label_map_.size()); } @@ -135,7 +137,8 @@ REGISTER_CALCULATOR(TfLiteTensorsToClassificationCalculator); auto classification_list = absl::make_unique(); for (int i = 0; i < num_classes; ++i) { - if (raw_scores[i] < min_score_threshold_) { + if (options_.has_min_score_threshold() && + raw_scores[i] < options_.min_score_threshold()) { continue; } Classification* classification = classification_list->add_classification(); @@ -148,6 +151,7 @@ REGISTER_CALCULATOR(TfLiteTensorsToClassificationCalculator); // Note that partial_sort will raise error when top_k_ > // classification_list->classification_size(). + CHECK_GE(classification_list->classification_size(), top_k_); auto raw_classification_list = classification_list->mutable_classification(); if (top_k_ > 0 && classification_list->classification_size() >= top_k_) { std::partial_sort(raw_classification_list->begin(), diff --git a/mediapipe/calculators/tflite/tflite_tensors_to_detections_calculator.cc b/mediapipe/calculators/tflite/tflite_tensors_to_detections_calculator.cc index 8e790b00a..22d8b4d0e 100644 --- a/mediapipe/calculators/tflite/tflite_tensors_to_detections_calculator.cc +++ b/mediapipe/calculators/tflite/tflite_tensors_to_detections_calculator.cc @@ -27,7 +27,8 @@ #include "mediapipe/framework/port/ret_check.h" #include "tensorflow/lite/interpreter.h" -#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__APPLE__) +#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__EMSCRIPTEN__) && \ + !defined(__APPLE__) #include "mediapipe/gpu/gl_calculator_helper.h" #include "tensorflow/lite/delegates/gpu/gl/gl_buffer.h" #include "tensorflow/lite/delegates/gpu/gl/gl_program.h" @@ -55,12 +56,14 @@ constexpr int kNumCoordsPerBox = 4; namespace mediapipe { -#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__APPLE__) +#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__EMSCRIPTEN__) && \ + !defined(__APPLE__) using ::tflite::gpu::gl::CreateReadWriteShaderStorageBuffer; using ::tflite::gpu::gl::GlShader; #endif -#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__APPLE__) +#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__EMSCRIPTEN__) && \ + !defined(__APPLE__) typedef ::tflite::gpu::gl::GlBuffer GpuTensor; typedef ::tflite::gpu::gl::GlProgram GpuProgram; #elif defined(__APPLE__) && !TARGET_OS_OSX // iOS @@ -70,7 +73,7 @@ typedef id GpuProgram; namespace { -#if !defined(MEDIAPIPE_DISABLE_GPU) +#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__EMSCRIPTEN__) struct GPUData { GpuProgram decode_program; GpuProgram score_program; @@ -169,18 +172,21 @@ class TfLiteTensorsToDetectionsCalculator : public CalculatorBase { const int* detection_classes, std::vector* output_detections); Detection ConvertToDetection(float box_ymin, float box_xmin, float box_ymax, float box_xmax, float score, int class_id, - bool flip_vertically); + int detection_id, bool flip_vertically); int num_classes_ = 0; int num_boxes_ = 0; int num_coords_ = 0; + // Unique detection ID per new detection. + static int next_detection_id_; std::set ignore_classes_; ::mediapipe::TfLiteTensorsToDetectionsCalculatorOptions options_; std::vector anchors_; bool side_packet_anchors_{}; -#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__APPLE__) +#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__EMSCRIPTEN__) && \ + !defined(__APPLE__) mediapipe::GlCalculatorHelper gpu_helper_; std::unique_ptr gpu_data_; #elif defined(__APPLE__) && !TARGET_OS_OSX // iOS @@ -193,6 +199,10 @@ class TfLiteTensorsToDetectionsCalculator : public CalculatorBase { }; REGISTER_CALCULATOR(TfLiteTensorsToDetectionsCalculator); +// Initialization of non-const static member should happen outside class +// definition. +int TfLiteTensorsToDetectionsCalculator::next_detection_id_ = 0; + ::mediapipe::Status TfLiteTensorsToDetectionsCalculator::GetContract( CalculatorContract* cc) { RET_CHECK(!cc->Inputs().GetTags().empty()); @@ -204,7 +214,7 @@ REGISTER_CALCULATOR(TfLiteTensorsToDetectionsCalculator); cc->Inputs().Tag("TENSORS").Set>(); } -#if !defined(MEDIAPIPE_DISABLE_GPU) +#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__EMSCRIPTEN__) if (cc->Inputs().HasTag("TENSORS_GPU")) { cc->Inputs().Tag("TENSORS_GPU").Set>(); use_gpu |= true; @@ -222,7 +232,8 @@ REGISTER_CALCULATOR(TfLiteTensorsToDetectionsCalculator); } if (use_gpu) { -#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__APPLE__) +#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__EMSCRIPTEN__) && \ + !defined(__APPLE__) MP_RETURN_IF_ERROR(mediapipe::GlCalculatorHelper::UpdateContract(cc)); #elif defined(__APPLE__) && !TARGET_OS_OSX // iOS MP_RETURN_IF_ERROR([MPPMetalHelper updateContract:cc]); @@ -238,7 +249,8 @@ REGISTER_CALCULATOR(TfLiteTensorsToDetectionsCalculator); if (cc->Inputs().HasTag("TENSORS_GPU")) { gpu_input_ = true; -#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__APPLE__) +#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__EMSCRIPTEN__) && \ + !defined(__APPLE__) MP_RETURN_IF_ERROR(gpu_helper_.Open(cc)); #elif defined(__APPLE__) && !TARGET_OS_OSX // iOS gpu_helper_ = [[MPPMetalHelper alloc] initWithCalculatorContext:cc]; @@ -400,7 +412,8 @@ REGISTER_CALCULATOR(TfLiteTensorsToDetectionsCalculator); } ::mediapipe::Status TfLiteTensorsToDetectionsCalculator::ProcessGPU( CalculatorContext* cc, std::vector* output_detections) { -#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__APPLE__) +#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__EMSCRIPTEN__) && \ + !defined(__APPLE__) const auto& input_tensors = cc->Inputs().Tag("TENSORS_GPU").Get>(); RET_CHECK_GE(input_tensors.size(), 2); @@ -562,7 +575,8 @@ REGISTER_CALCULATOR(TfLiteTensorsToDetectionsCalculator); ::mediapipe::Status TfLiteTensorsToDetectionsCalculator::Close( CalculatorContext* cc) { -#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__APPLE__) +#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__EMSCRIPTEN__) && \ + !defined(__APPLE__) gpu_helper_.RunInGlContext([this] { gpu_data_.reset(); }); #elif defined(__APPLE__) && !TARGET_OS_OSX // iOS gpu_data_.reset(); @@ -672,7 +686,10 @@ REGISTER_CALCULATOR(TfLiteTensorsToDetectionsCalculator); Detection detection = ConvertToDetection( detection_boxes[box_offset + 0], detection_boxes[box_offset + 1], detection_boxes[box_offset + 2], detection_boxes[box_offset + 3], - detection_scores[i], detection_classes[i], options_.flip_vertically()); + detection_scores[i], detection_classes[i], next_detection_id_, + options_.flip_vertically()); + // Increment to get next unique detection ID. + ++next_detection_id_; // Add keypoints. if (options_.num_keypoints() > 0) { auto* location_data = detection.mutable_location_data(); @@ -695,10 +712,11 @@ REGISTER_CALCULATOR(TfLiteTensorsToDetectionsCalculator); Detection TfLiteTensorsToDetectionsCalculator::ConvertToDetection( float box_ymin, float box_xmin, float box_ymax, float box_xmax, float score, - int class_id, bool flip_vertically) { + int class_id, int detection_id, bool flip_vertically) { Detection detection; detection.add_score(score); detection.add_label_id(class_id); + detection.set_detection_id(detection_id); LocationData* location_data = detection.mutable_location_data(); location_data->set_format(LocationData::RELATIVE_BOUNDING_BOX); @@ -715,7 +733,8 @@ Detection TfLiteTensorsToDetectionsCalculator::ConvertToDetection( ::mediapipe::Status TfLiteTensorsToDetectionsCalculator::GpuInit( CalculatorContext* cc) { -#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__APPLE__) +#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__EMSCRIPTEN__) && \ + !defined(__APPLE__) MP_RETURN_IF_ERROR(gpu_helper_.RunInGlContext([this]() -> ::mediapipe::Status { gpu_data_ = absl::make_unique(); diff --git a/mediapipe/calculators/tflite/tflite_tensors_to_landmarks_calculator.cc b/mediapipe/calculators/tflite/tflite_tensors_to_landmarks_calculator.cc index 1d646e4a3..996b1fa35 100644 --- a/mediapipe/calculators/tflite/tflite_tensors_to_landmarks_calculator.cc +++ b/mediapipe/calculators/tflite/tflite_tensors_to_landmarks_calculator.cc @@ -21,7 +21,8 @@ namespace mediapipe { // A calculator for converting TFLite tensors from regression models into -// landmarks. +// landmarks. Note that if the landmarks in the tensor has more than 3 +// dimensions, only the first 3 dimensions will be converted to x,y,z. // // Input: // TENSORS - Vector of TfLiteTensor of type kTfLiteFloat32. Only the first @@ -122,9 +123,6 @@ REGISTER_CALCULATOR(TfLiteTensorsToLandmarksCalculator); num_values *= raw_tensor->dims->data[i]; } const int num_dimensions = num_values / num_landmarks_; - // Landmarks must have less than 3 dimensions. Otherwise please consider - // using matrix. - CHECK_LE(num_dimensions, 3); CHECK_GT(num_dimensions, 0); const float* raw_landmarks = raw_tensor->data.f; diff --git a/mediapipe/calculators/tflite/tflite_tensors_to_segmentation_calculator.cc b/mediapipe/calculators/tflite/tflite_tensors_to_segmentation_calculator.cc index 16805a066..55279308a 100644 --- a/mediapipe/calculators/tflite/tflite_tensors_to_segmentation_calculator.cc +++ b/mediapipe/calculators/tflite/tflite_tensors_to_segmentation_calculator.cc @@ -28,7 +28,8 @@ #include "mediapipe/util/resource_util.h" #include "tensorflow/lite/interpreter.h" -#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__APPLE__) +#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__EMSCRIPTEN__) && \ + !defined(__APPLE__) #include "mediapipe/gpu/gl_calculator_helper.h" #include "mediapipe/gpu/gl_simple_shaders.h" #include "mediapipe/gpu/shader_util.h" @@ -53,7 +54,8 @@ float Clamp(float val, float min, float max) { namespace mediapipe { -#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__APPLE__) +#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__EMSCRIPTEN__) && \ + !defined(__APPLE__) using ::tflite::gpu::gl::CopyBuffer; using ::tflite::gpu::gl::CreateReadWriteRgbaImageTexture; using ::tflite::gpu::gl::CreateReadWriteShaderStorageBuffer; @@ -129,7 +131,8 @@ class TfLiteTensorsToSegmentationCalculator : public CalculatorBase { int tensor_channels_ = 0; bool use_gpu_ = false; -#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__APPLE__) +#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__EMSCRIPTEN__) && \ + !defined(__APPLE__) mediapipe::GlCalculatorHelper gpu_helper_; std::unique_ptr mask_program_with_prev_; std::unique_ptr mask_program_no_prev_; @@ -159,7 +162,8 @@ REGISTER_CALCULATOR(TfLiteTensorsToSegmentationCalculator); } // Inputs GPU. -#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__APPLE__) +#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__EMSCRIPTEN__) && \ + !defined(__APPLE__) if (cc->Inputs().HasTag("TENSORS_GPU")) { cc->Inputs().Tag("TENSORS_GPU").Set>(); use_gpu |= true; @@ -178,7 +182,8 @@ REGISTER_CALCULATOR(TfLiteTensorsToSegmentationCalculator); if (cc->Outputs().HasTag("MASK")) { cc->Outputs().Tag("MASK").Set(); } -#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__APPLE__) +#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__EMSCRIPTEN__) && \ + !defined(__APPLE__) if (cc->Outputs().HasTag("MASK_GPU")) { cc->Outputs().Tag("MASK_GPU").Set(); use_gpu |= true; @@ -186,7 +191,8 @@ REGISTER_CALCULATOR(TfLiteTensorsToSegmentationCalculator); #endif // !MEDIAPIPE_DISABLE_GPU if (use_gpu) { -#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__APPLE__) +#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__EMSCRIPTEN__) && \ + !defined(__APPLE__) MP_RETURN_IF_ERROR(mediapipe::GlCalculatorHelper::UpdateContract(cc)); #endif // !MEDIAPIPE_DISABLE_GPU } @@ -199,7 +205,8 @@ REGISTER_CALCULATOR(TfLiteTensorsToSegmentationCalculator); if (cc->Inputs().HasTag("TENSORS_GPU")) { use_gpu_ = true; -#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__APPLE__) +#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__EMSCRIPTEN__) && \ + !defined(__APPLE__) MP_RETURN_IF_ERROR(gpu_helper_.Open(cc)); #endif // !MEDIAPIPE_DISABLE_GPU } @@ -207,7 +214,8 @@ REGISTER_CALCULATOR(TfLiteTensorsToSegmentationCalculator); MP_RETURN_IF_ERROR(LoadOptions(cc)); if (use_gpu_) { -#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__APPLE__) +#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__EMSCRIPTEN__) && \ + !defined(__APPLE__) MP_RETURN_IF_ERROR( gpu_helper_.RunInGlContext([this, cc]() -> ::mediapipe::Status { MP_RETURN_IF_ERROR(InitGpu(cc)); @@ -224,7 +232,8 @@ REGISTER_CALCULATOR(TfLiteTensorsToSegmentationCalculator); ::mediapipe::Status TfLiteTensorsToSegmentationCalculator::Process( CalculatorContext* cc) { if (use_gpu_) { -#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__APPLE__) +#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__EMSCRIPTEN__) && \ + !defined(__APPLE__) MP_RETURN_IF_ERROR( gpu_helper_.RunInGlContext([this, cc]() -> ::mediapipe::Status { MP_RETURN_IF_ERROR(ProcessGpu(cc)); @@ -240,7 +249,8 @@ REGISTER_CALCULATOR(TfLiteTensorsToSegmentationCalculator); ::mediapipe::Status TfLiteTensorsToSegmentationCalculator::Close( CalculatorContext* cc) { -#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__APPLE__) +#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__EMSCRIPTEN__) && \ + !defined(__APPLE__) gpu_helper_.RunInGlContext([this] { if (upsample_program_) glDeleteProgram(upsample_program_); upsample_program_ = 0; @@ -367,7 +377,8 @@ REGISTER_CALCULATOR(TfLiteTensorsToSegmentationCalculator); if (cc->Inputs().Tag("TENSORS_GPU").IsEmpty()) { return ::mediapipe::OkStatus(); } -#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__APPLE__) +#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__EMSCRIPTEN__) && \ + !defined(__APPLE__) // Get input streams. const auto& input_tensors = cc->Inputs().Tag("TENSORS_GPU").Get>(); @@ -453,7 +464,8 @@ REGISTER_CALCULATOR(TfLiteTensorsToSegmentationCalculator); } void TfLiteTensorsToSegmentationCalculator::GlRender() { -#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__APPLE__) +#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__EMSCRIPTEN__) && \ + !defined(__APPLE__) static const GLfloat square_vertices[] = { -1.0f, -1.0f, // bottom left 1.0f, -1.0f, // bottom right @@ -525,7 +537,8 @@ void TfLiteTensorsToSegmentationCalculator::GlRender() { ::mediapipe::Status TfLiteTensorsToSegmentationCalculator::InitGpu( CalculatorContext* cc) { -#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__APPLE__) +#if !defined(MEDIAPIPE_DISABLE_GPU) && !defined(__EMSCRIPTEN__) && \ + !defined(__APPLE__) MP_RETURN_IF_ERROR(gpu_helper_.RunInGlContext([this]() -> ::mediapipe::Status { // A shader to process a segmentation tensor into an output mask, diff --git a/mediapipe/calculators/util/BUILD b/mediapipe/calculators/util/BUILD index 7bd06fe97..9f3f687b2 100644 --- a/mediapipe/calculators/util/BUILD +++ b/mediapipe/calculators/util/BUILD @@ -14,7 +14,7 @@ licenses(["notice"]) # Apache 2.0 -package(default_visibility = ["//visibility:private"]) +package(default_visibility = ["//visibility:public"]) exports_files(["LICENSE"]) @@ -234,6 +234,7 @@ cc_library( "//mediapipe/framework/port:status", "//mediapipe/framework/port:vector", "//mediapipe/util:annotation_renderer", + "//mediapipe/util:render_data_cc_proto", ] + select({ "//mediapipe/gpu:disable_gpu": [], "//conditions:default": [ @@ -360,6 +361,16 @@ mediapipe_cc_proto_library( deps = [":landmark_projection_calculator_proto"], ) +mediapipe_cc_proto_library( + name = "landmarks_to_floats_calculator_cc_proto", + srcs = ["landmarks_to_floats_calculator.proto"], + cc_deps = [ + "//mediapipe/framework:calculator_cc_proto", + ], + visibility = ["//visibility:public"], + deps = [":landmarks_to_floats_calculator_proto"], +) + mediapipe_cc_proto_library( name = "rect_transformation_calculator_cc_proto", srcs = ["rect_transformation_calculator.proto"], @@ -372,7 +383,12 @@ mediapipe_cc_proto_library( cc_library( name = "detections_to_rects_calculator", - srcs = ["detections_to_rects_calculator.cc"], + srcs = [ + "detections_to_rects_calculator.cc", + ], + hdrs = [ + "detections_to_rects_calculator.h", + ], visibility = ["//visibility:public"], deps = [ ":detections_to_rects_calculator_cc_proto", @@ -454,6 +470,17 @@ proto_library( ], ) +proto_library( + name = "labels_to_render_data_calculator_proto", + srcs = ["labels_to_render_data_calculator.proto"], + visibility = ["//visibility:public"], + deps = [ + "//mediapipe/framework:calculator_proto", + "//mediapipe/util:color_proto", + "//mediapipe/util:render_data_proto", + ], +) + proto_library( name = "thresholding_calculator_proto", srcs = ["thresholding_calculator.proto"], @@ -483,6 +510,15 @@ proto_library( ], ) +proto_library( + name = "landmarks_to_floats_calculator_proto", + srcs = ["landmarks_to_floats_calculator.proto"], + visibility = ["//visibility:public"], + deps = [ + "//mediapipe/framework:calculator_proto", + ], +) + proto_library( name = "rect_transformation_calculator_proto", srcs = ["rect_transformation_calculator.proto"], @@ -577,6 +613,26 @@ cc_library( alwayslink = 1, ) +cc_library( + name = "labels_to_render_data_calculator", + srcs = ["labels_to_render_data_calculator.cc"], + visibility = ["//visibility:public"], + deps = [ + ":labels_to_render_data_calculator_cc_proto", + "//mediapipe/framework:calculator_framework", + "//mediapipe/framework:calculator_options_cc_proto", + "//mediapipe/framework/formats:classification_cc_proto", + "//mediapipe/framework/formats:video_stream_header", + "//mediapipe/framework/port:ret_check", + "//mediapipe/framework/port:status", + "//mediapipe/framework/port:statusor", + "//mediapipe/util:color_cc_proto", + "//mediapipe/util:render_data_cc_proto", + "@com_google_absl//absl/strings", + ], + alwayslink = 1, +) + cc_library( name = "rect_to_render_data_calculator", srcs = ["rect_to_render_data_calculator.cc"], @@ -658,6 +714,22 @@ cc_library( alwayslink = 1, ) +cc_library( + name = "landmarks_to_floats_calculator", + srcs = ["landmarks_to_floats_calculator.cc"], + visibility = ["//visibility:public"], + deps = [ + ":landmarks_to_floats_calculator_cc_proto", + "//mediapipe/framework:calculator_framework", + "//mediapipe/framework/formats:landmark_cc_proto", + "//mediapipe/framework/formats:matrix", + "//mediapipe/framework/port:ret_check", + "//mediapipe/framework/port:status", + "@eigen_archive//:eigen", + ], + alwayslink = 1, +) + cc_test( name = "detection_letterbox_removal_calculator_test", srcs = ["detection_letterbox_removal_calculator_test.cc"], @@ -714,6 +786,7 @@ cc_library( visibility = ["//visibility:public"], deps = [ ":top_k_scores_calculator_cc_proto", + "//mediapipe/framework/formats:classification_cc_proto", "//mediapipe/framework/port:ret_check", "//mediapipe/framework/port:status", "//mediapipe/framework/port:statusor", @@ -750,3 +823,27 @@ cc_test( "//mediapipe/framework/port:status", ], ) + +mediapipe_cc_proto_library( + name = "labels_to_render_data_calculator_cc_proto", + srcs = ["labels_to_render_data_calculator.proto"], + cc_deps = [ + "//mediapipe/framework:calculator_cc_proto", + "//mediapipe/util:color_cc_proto", + ], + visibility = ["//visibility:public"], + deps = [":labels_to_render_data_calculator_proto"], +) + +cc_library( + name = "local_file_contents_calculator", + srcs = ["local_file_contents_calculator.cc"], + visibility = ["//visibility:public"], + deps = [ + "//mediapipe/framework:calculator_framework", + "//mediapipe/framework/port:file_helpers", + "//mediapipe/framework/port:ret_check", + "//mediapipe/framework/port:status", + ], + alwayslink = 1, +) diff --git a/mediapipe/calculators/util/annotation_overlay_calculator.cc b/mediapipe/calculators/util/annotation_overlay_calculator.cc index 5f5c53582..4b8776c3c 100644 --- a/mediapipe/calculators/util/annotation_overlay_calculator.cc +++ b/mediapipe/calculators/util/annotation_overlay_calculator.cc @@ -26,6 +26,7 @@ #include "mediapipe/framework/port/vector.h" #include "mediapipe/util/annotation_renderer.h" #include "mediapipe/util/color.pb.h" +#include "mediapipe/util/render_data.pb.h" #if !defined(MEDIAPIPE_DISABLE_GPU) #include "mediapipe/gpu/gl_calculator_helper.h" @@ -41,6 +42,8 @@ namespace { constexpr char kInputFrameTag[] = "INPUT_FRAME"; constexpr char kOutputFrameTag[] = "OUTPUT_FRAME"; +constexpr char kInputVectorTag[] = "VECTOR"; + constexpr char kInputFrameTagGpu[] = "INPUT_FRAME_GPU"; constexpr char kOutputFrameTagGpu[] = "OUTPUT_FRAME_GPU"; @@ -65,6 +68,9 @@ constexpr int kAnnotationBackgroundColor[] = {100, 101, 102}; // 2. RenderData proto on variable number of input streams. All the RenderData // at a particular timestamp is drawn on the image in the order of their // input streams. No tags required. +// 3. std::vector on variable number of input streams. RenderData +// objects at a particular timestamp are drawn on the image in order of the +// input vector items. These input streams are tagged with "VECTOR". // // Output: // 1. OUTPUT_FRAME or OUTPUT_FRAME_GPU: A rendered ImageFrame (or GpuBuffer). @@ -85,6 +91,8 @@ constexpr int kAnnotationBackgroundColor[] = {100, 101, 102}; // input_stream: "render_data_1" // input_stream: "render_data_2" // input_stream: "render_data_3" +// input_stream: "VECTOR:0:render_data_vec_0" +// input_stream: "VECTOR:1:render_data_vec_1" // output_stream: "OUTPUT_FRAME:decorated_frames" // options { // [mediapipe.AnnotationOverlayCalculatorOptions.ext] { @@ -99,6 +107,8 @@ constexpr int kAnnotationBackgroundColor[] = {100, 101, 102}; // input_stream: "render_data_1" // input_stream: "render_data_2" // input_stream: "render_data_3" +// input_stream: "VECTOR:0:render_data_vec_0" +// input_stream: "VECTOR:1:render_data_vec_1" // output_stream: "OUTPUT_FRAME_GPU:decorated_frames" // options { // [mediapipe.AnnotationOverlayCalculatorOptions.ext] { @@ -188,8 +198,16 @@ REGISTER_CALCULATOR(AnnotationOverlayCalculator); } // Data streams to render. - for (int i = 0; i < num_render_streams; ++i) { - cc->Inputs().Index(i).Set(); + for (CollectionItemId id = cc->Inputs().BeginId(); id < cc->Inputs().EndId(); + ++id) { + auto tag_and_index = cc->Inputs().TagAndIndexFromId(id); + std::string tag = tag_and_index.first; + if (tag == kInputVectorTag) { + cc->Inputs().Get(id).Set>(); + } else if (tag.empty()) { + // Empty tag defaults to accepting a single object of RenderData type. + cc->Inputs().Get(id).Set(); + } } // Rendered image. @@ -285,12 +303,28 @@ REGISTER_CALCULATOR(AnnotationOverlayCalculator); renderer_->AdoptImage(image_mat.get()); // Render streams onto render target. - for (int i = 0; i < num_render_streams_; ++i) { - if (cc->Inputs().Index(i).IsEmpty()) { + for (CollectionItemId id = cc->Inputs().BeginId(); id < cc->Inputs().EndId(); + ++id) { + auto tag_and_index = cc->Inputs().TagAndIndexFromId(id); + std::string tag = tag_and_index.first; + if (!tag.empty() && tag != kInputVectorTag) { continue; } - const RenderData& render_data = cc->Inputs().Index(i).Get(); - renderer_->RenderDataOnImage(render_data); + if (cc->Inputs().Get(id).IsEmpty()) { + continue; + } + if (tag.empty()) { + // Empty tag defaults to accepting a single object of RenderData type. + const RenderData& render_data = cc->Inputs().Get(id).Get(); + renderer_->RenderDataOnImage(render_data); + } else { + RET_CHECK_EQ(kInputVectorTag, tag); + const std::vector& render_data_vec = + cc->Inputs().Get(id).Get>(); + for (const RenderData& render_data : render_data_vec) { + renderer_->RenderDataOnImage(render_data); + } + } } if (use_gpu_) { diff --git a/mediapipe/calculators/util/detection_label_id_to_text_calculator.cc b/mediapipe/calculators/util/detection_label_id_to_text_calculator.cc index 0fb2d30b8..7e8beadf1 100644 --- a/mediapipe/calculators/util/detection_label_id_to_text_calculator.cc +++ b/mediapipe/calculators/util/detection_label_id_to_text_calculator.cc @@ -19,8 +19,8 @@ #include "mediapipe/framework/port/status.h" #include "mediapipe/util/resource_util.h" -#if defined(MEDIAPIPE_LITE) || defined(__ANDROID__) || \ - (defined(__APPLE__) && !TARGET_OS_OSX) +#if defined(MEDIAPIPE_LITE) || defined(__EMSCRIPTEN__) || \ + defined(__ANDROID__) || (defined(__APPLE__) && !TARGET_OS_OSX) #include "mediapipe/util/android/file/base/file.h" #include "mediapipe/util/android/file/base/helpers.h" #else diff --git a/mediapipe/calculators/util/detections_to_rects_calculator.cc b/mediapipe/calculators/util/detections_to_rects_calculator.cc index bb5ba6d4d..91a400ca1 100644 --- a/mediapipe/calculators/util/detections_to_rects_calculator.cc +++ b/mediapipe/calculators/util/detections_to_rects_calculator.cc @@ -11,6 +11,8 @@ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. +#include "mediapipe/calculators/util/detections_to_rects_calculator.h" + #include #include "mediapipe/calculators/util/detections_to_rects_calculator.pb.h" @@ -24,8 +26,6 @@ namespace mediapipe { -using mediapipe::DetectionsToRectsCalculatorOptions; - namespace { constexpr char kDetectionTag[] = "DETECTION"; @@ -36,7 +36,10 @@ constexpr char kNormRectTag[] = "NORM_RECT"; constexpr char kRectsTag[] = "RECTS"; constexpr char kNormRectsTag[] = "NORM_RECTS"; -::mediapipe::Status DetectionToRect(const Detection& detection, Rect* rect) { +} // namespace + +::mediapipe::Status DetectionsToRectsCalculator::DetectionToRect( + const Detection& detection, Rect* rect) { const LocationData location_data = detection.location_data(); RET_CHECK(location_data.format() == LocationData::BOUNDING_BOX) << "Only Detection with formats of BOUNDING_BOX can be converted to Rect"; @@ -48,8 +51,8 @@ constexpr char kNormRectsTag[] = "NORM_RECTS"; return ::mediapipe::OkStatus(); } -::mediapipe::Status DetectionToNormalizedRect(const Detection& detection, - NormalizedRect* rect) { +::mediapipe::Status DetectionsToRectsCalculator::DetectionToNormalizedRect( + const Detection& detection, NormalizedRect* rect) { const LocationData location_data = detection.location_data(); RET_CHECK(location_data.format() == LocationData::RELATIVE_BOUNDING_BOX) << "Only Detection with formats of RELATIVE_BOUNDING_BOX can be " @@ -63,79 +66,6 @@ constexpr char kNormRectsTag[] = "NORM_RECTS"; return ::mediapipe::OkStatus(); } -// Wraps around an angle in radians to within -M_PI and M_PI. -inline float NormalizeRadians(float angle) { - return angle - 2 * M_PI * std::floor((angle - (-M_PI)) / (2 * M_PI)); -} - -} // namespace - -// A calculator that converts Detection proto to Rect proto. -// -// Detection is the format for encoding one or more detections in an image. -// The input can be a single Detection or std::vector. The output can -// be either a single Rect or NormalizedRect, or std::vector or -// std::vector. If Rect is used, the LocationData format is -// expected to be BOUNDING_BOX, and if NormalizedRect is used it is expected to -// be RELATIVE_BOUNDING_BOX. -// -// When the input is std::vector and the output is a Rect or -// NormalizedRect, only the first detection is converted. When the input is a -// single Detection and the output is a std::vector or -// std::vector, the output is a vector of size 1. -// -// Inputs: -// -// One of the following: -// DETECTION: A Detection proto. -// DETECTIONS: An std::vector. -// -// IMAGE_SIZE (optional): A std::pair represention image width and -// height. This is required only when rotation needs to be computed (see -// calculator options). -// -// Output: -// One of the following: -// RECT: A Rect proto. -// NORM_RECT: A NormalizedRect proto. -// RECTS: An std::vector. -// NORM_RECTS: An std::vector. -// -// Example config: -// node { -// calculator: "DetectionsToRectsCalculator" -// input_stream: "DETECTIONS:detections" -// input_stream: "IMAGE_SIZE:image_size" -// output_stream: "NORM_RECT:rect" -// options: { -// [mediapipe.DetectionsToRectCalculatorOptions.ext] { -// rotation_vector_start_keypoint_index: 0 -// rotation_vector_end_keypoint_index: 2 -// rotation_vector_target_angle_degrees: 90 -// output_zero_rect_for_empty_detections: true -// } -// } -// } -class DetectionsToRectsCalculator : public CalculatorBase { - public: - static ::mediapipe::Status GetContract(CalculatorContract* cc); - - ::mediapipe::Status Open(CalculatorContext* cc) override; - ::mediapipe::Status Process(CalculatorContext* cc) override; - - private: - float ComputeRotation(const Detection& detection, - const std::pair image_size); - - DetectionsToRectsCalculatorOptions options_; - int start_keypoint_index_; - int end_keypoint_index_; - float target_angle_; // In radians. - bool rotate_; - bool output_zero_rect_for_empty_detections_; -}; -REGISTER_CALCULATOR(DetectionsToRectsCalculator); - ::mediapipe::Status DetectionsToRectsCalculator::GetContract( CalculatorContract* cc) { RET_CHECK(cc->Inputs().HasTag(kDetectionTag) ^ @@ -232,6 +162,13 @@ REGISTER_CALCULATOR(DetectionsToRectsCalculator); .Tag(kNormRectTag) .AddPacket(MakePacket().At(cc->InputTimestamp())); } + if (cc->Outputs().HasTag(kNormRectsTag)) { + auto rect_vector = absl::make_unique>(); + rect_vector->emplace_back(NormalizedRect()); + cc->Outputs() + .Tag(kNormRectsTag) + .Add(rect_vector.release(), cc->InputTimestamp()); + } } return ::mediapipe::OkStatus(); } @@ -312,4 +249,6 @@ float DetectionsToRectsCalculator::ComputeRotation( return NormalizeRadians(rotation); } +REGISTER_CALCULATOR(DetectionsToRectsCalculator); + } // namespace mediapipe diff --git a/mediapipe/calculators/util/detections_to_rects_calculator.h b/mediapipe/calculators/util/detections_to_rects_calculator.h new file mode 100644 index 000000000..82b9f7bcc --- /dev/null +++ b/mediapipe/calculators/util/detections_to_rects_calculator.h @@ -0,0 +1,105 @@ +// Copyright 2019 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#ifndef MEDIAPIPE_CALCULATORS_UTIL_DETECTIONS_TO_RECTS_CALCULATOR_H_ +#define MEDIAPIPE_CALCULATORS_UTIL_DETECTIONS_TO_RECTS_CALCULATOR_H_ + +#include + +#include "mediapipe/calculators/util/detections_to_rects_calculator.pb.h" +#include "mediapipe/framework/calculator_framework.h" +#include "mediapipe/framework/calculator_options.pb.h" +#include "mediapipe/framework/formats/detection.pb.h" +#include "mediapipe/framework/formats/location_data.pb.h" +#include "mediapipe/framework/formats/rect.pb.h" +#include "mediapipe/framework/port/ret_check.h" +#include "mediapipe/framework/port/status.h" + +namespace mediapipe { + +// A calculator that converts Detection proto to Rect proto. +// +// Detection is the format for encoding one or more detections in an image. +// The input can be a single Detection or std::vector. The output can +// be either a single Rect or NormalizedRect, or std::vector or +// std::vector. If Rect is used, the LocationData format is +// expected to be BOUNDING_BOX, and if NormalizedRect is used it is expected to +// be RELATIVE_BOUNDING_BOX. +// +// When the input is std::vector and the output is a Rect or +// NormalizedRect, only the first detection is converted. When the input is a +// single Detection and the output is a std::vector or +// std::vector, the output is a vector of size 1. +// +// Inputs: +// +// One of the following: +// DETECTION: A Detection proto. +// DETECTIONS: An std::vector. +// +// IMAGE_SIZE (optional): A std::pair represention image width and +// height. This is required only when rotation needs to be computed (see +// calculator options). +// +// Output: +// One of the following: +// RECT: A Rect proto. +// NORM_RECT: A NormalizedRect proto. +// RECTS: An std::vector. +// NORM_RECTS: An std::vector. +// +// Example config: +// node { +// calculator: "DetectionsToRectsCalculator" +// input_stream: "DETECTIONS:detections" +// input_stream: "IMAGE_SIZE:image_size" +// output_stream: "NORM_RECT:rect" +// options: { +// [mediapipe.DetectionsToRectCalculatorOptions.ext] { +// rotation_vector_start_keypoint_index: 0 +// rotation_vector_end_keypoint_index: 2 +// rotation_vector_target_angle_degrees: 90 +// output_zero_rect_for_empty_detections: true +// } +// } +// } +class DetectionsToRectsCalculator : public CalculatorBase { + public: + static ::mediapipe::Status GetContract(CalculatorContract* cc); + + ::mediapipe::Status Open(CalculatorContext* cc) override; + ::mediapipe::Status Process(CalculatorContext* cc) override; + + protected: + virtual float ComputeRotation(const ::mediapipe::Detection& detection, + const std::pair image_size); + virtual ::mediapipe::Status DetectionToRect( + const ::mediapipe::Detection& detection, ::mediapipe::Rect* rect); + virtual ::mediapipe::Status DetectionToNormalizedRect( + const ::mediapipe::Detection& detection, + ::mediapipe::NormalizedRect* rect); + + static inline float NormalizeRadians(float angle) { + return angle - 2 * M_PI * std::floor((angle - (-M_PI)) / (2 * M_PI)); + } + + ::mediapipe::DetectionsToRectsCalculatorOptions options_; + int start_keypoint_index_; + int end_keypoint_index_; + float target_angle_ = 0.0f; // In radians. + bool rotate_; + bool output_zero_rect_for_empty_detections_; +}; + +} // namespace mediapipe +#endif // MEDIAPIPE_CALCULATORS_UTIL_DETECTIONS_TO_RECTS_CALCULATOR_H_ diff --git a/mediapipe/calculators/util/labels_to_render_data_calculator.cc b/mediapipe/calculators/util/labels_to_render_data_calculator.cc new file mode 100644 index 000000000..a7f517291 --- /dev/null +++ b/mediapipe/calculators/util/labels_to_render_data_calculator.cc @@ -0,0 +1,181 @@ +// Copyright 2019 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include + +#include +#include +#include +#include + +#include "absl/strings/str_cat.h" +#include "mediapipe/calculators/util/labels_to_render_data_calculator.pb.h" +#include "mediapipe/framework/calculator_framework.h" +#include "mediapipe/framework/formats/classification.pb.h" +#include "mediapipe/framework/formats/video_stream_header.h" +#include "mediapipe/framework/port/ret_check.h" +#include "mediapipe/framework/port/status.h" +#include "mediapipe/framework/port/statusor.h" +#include "mediapipe/util/color.pb.h" +#include "mediapipe/util/render_data.pb.h" + +namespace mediapipe { + +constexpr float kFontHeightScale = 1.25f; + +// A calculator takes in pairs of labels and scores or classifications, outputs +// generates render data. Either both "LABELS" and "SCORES" or "CLASSIFICATIONS" +// must be present. +// +// Usage example: +// node { +// calculator: "LabelsToRenderDataCalculator" +// input_stream: "LABELS:labels" +// input_stream: "SCORES:scores" +// output_stream: "VIDEO_PRESTREAM:video_header" +// options { +// [LabelsToRenderDataCalculatorOptions.ext] { +// color { r: 255 g: 0 b: 0 } +// color { r: 0 g: 255 b: 0 } +// color { r: 0 g: 0 b: 255 } +// thickness: 2.0 +// font_height_px: 20 +// max_num_labels: 3 +// font_face: 1 +// location: TOP_LEFT +// } +// } +// } +class LabelsToRenderDataCalculator : public CalculatorBase { + public: + static ::mediapipe::Status GetContract(CalculatorContract* cc); + ::mediapipe::Status Open(CalculatorContext* cc) override; + ::mediapipe::Status Process(CalculatorContext* cc) override; + + private: + LabelsToRenderDataCalculatorOptions options_; + int num_colors_ = 0; + int video_width_ = 0; + int video_height_ = 0; + int label_height_px_ = 0; + int label_left_px_ = 0; +}; +REGISTER_CALCULATOR(LabelsToRenderDataCalculator); + +::mediapipe::Status LabelsToRenderDataCalculator::GetContract( + CalculatorContract* cc) { + if (cc->Inputs().HasTag("CLASSIFICATIONS")) { + cc->Inputs().Tag("CLASSIFICATIONS").Set(); + } else { + RET_CHECK(cc->Inputs().HasTag("LABELS")) + << "Must provide input stream \"LABELS\""; + cc->Inputs().Tag("LABELS").Set>(); + if (cc->Inputs().HasTag("SCORES")) { + cc->Inputs().Tag("SCORES").Set>(); + } + } + if (cc->Inputs().HasTag("VIDEO_PRESTREAM")) { + cc->Inputs().Tag("VIDEO_PRESTREAM").Set(); + } + cc->Outputs().Tag("RENDER_DATA").Set(); + return ::mediapipe::OkStatus(); +} + +::mediapipe::Status LabelsToRenderDataCalculator::Open(CalculatorContext* cc) { + options_ = cc->Options(); + num_colors_ = options_.color_size(); + label_height_px_ = std::ceil(options_.font_height_px() * kFontHeightScale); + return ::mediapipe::OkStatus(); +} + +::mediapipe::Status LabelsToRenderDataCalculator::Process( + CalculatorContext* cc) { + if (cc->Inputs().HasTag("VIDEO_PRESTREAM") && + cc->InputTimestamp() == Timestamp::PreStream()) { + const VideoHeader& video_header = + cc->Inputs().Tag("VIDEO_PRESTREAM").Get(); + video_width_ = video_header.width; + video_height_ = video_header.height; + return ::mediapipe::OkStatus(); + } else { + CHECK_EQ(options_.location(), LabelsToRenderDataCalculatorOptions::TOP_LEFT) + << "Only TOP_LEFT is supported without VIDEO_PRESTREAM."; + } + + std::vector labels; + std::vector scores; + if (cc->Inputs().HasTag("CLASSIFICATIONS")) { + const ClassificationList& classifications = + cc->Inputs().Tag("CLASSIFICATIONS").Get(); + labels.resize(classifications.classification_size()); + scores.resize(classifications.classification_size()); + for (int i = 0; i < classifications.classification_size(); ++i) { + labels[i] = classifications.classification(i).label(); + scores[i] = classifications.classification(i).score(); + } + } else { + const std::vector& label_vector = + cc->Inputs().Tag("LABELS").Get>(); + std::vector score_vector; + if (cc->Inputs().HasTag("SCORES")) { + score_vector = cc->Inputs().Tag("SCORES").Get>(); + } + CHECK_EQ(label_vector.size(), score_vector.size()); + labels.resize(label_vector.size()); + scores.resize(label_vector.size()); + for (int i = 0; i < label_vector.size(); ++i) { + labels[i] = label_vector[i]; + scores[i] = score_vector[i]; + } + } + + RenderData render_data; + int num_label = std::min((int)labels.size(), options_.max_num_labels()); + int label_baseline_px = options_.vertical_offset_px(); + if (options_.location() == LabelsToRenderDataCalculatorOptions::TOP_LEFT) { + label_baseline_px += label_height_px_; + } else if (options_.location() == + LabelsToRenderDataCalculatorOptions::BOTTOM_LEFT) { + label_baseline_px += video_height_ - label_height_px_ * (num_label - 1); + } + label_left_px_ = options_.horizontal_offset_px(); + for (int i = 0; i < num_label; ++i) { + auto* label_annotation = render_data.add_render_annotations(); + label_annotation->set_thickness(options_.thickness()); + if (num_colors_ > 0) { + *(label_annotation->mutable_color()) = options_.color(i % num_colors_); + } else { + label_annotation->mutable_color()->set_r(255); + label_annotation->mutable_color()->set_g(0); + label_annotation->mutable_color()->set_b(0); + } + + auto* text = label_annotation->mutable_text(); + std::string display_text = labels[i]; + if (cc->Inputs().HasTag("SCORES")) { + absl::StrAppend(&display_text, ":", scores[i]); + } + text->set_display_text(display_text); + text->set_font_height(options_.font_height_px()); + text->set_left(label_left_px_); + text->set_baseline(label_baseline_px + i * label_height_px_); + text->set_font_face(options_.font_face()); + } + cc->Outputs() + .Tag("RENDER_DATA") + .AddPacket(MakePacket(render_data).At(cc->InputTimestamp())); + + return ::mediapipe::OkStatus(); +} +} // namespace mediapipe diff --git a/mediapipe/calculators/util/labels_to_render_data_calculator.proto b/mediapipe/calculators/util/labels_to_render_data_calculator.proto new file mode 100644 index 000000000..cd98934a5 --- /dev/null +++ b/mediapipe/calculators/util/labels_to_render_data_calculator.proto @@ -0,0 +1,62 @@ +// Copyright 2019 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +syntax = "proto2"; + +package mediapipe; + +import "mediapipe/framework/calculator.proto"; +import "mediapipe/util/color.proto"; + +message LabelsToRenderDataCalculatorOptions { + extend CalculatorOptions { + optional LabelsToRenderDataCalculatorOptions ext = 271660364; + } + + // Colors for drawing the label(s). + repeated Color color = 1; + + // Thickness for drawing the label(s). + optional double thickness = 2 [default = 2]; + + // The font height in absolute pixels. + optional int32 font_height_px = 3 [default = 50]; + + // The offset of the starting text in horizontal direction in absolute pixels. + optional int32 horizontal_offset_px = 7 [default = 0]; + // The offset of the starting text in vertical direction in absolute pixels. + optional int32 vertical_offset_px = 8 [default = 0]; + + // The maximum number of labels to display. + optional int32 max_num_labels = 4 [default = 1]; + + // Specifies the font for the text. Font must be one of the following from + // OpenCV: + // cv::FONT_HERSHEY_SIMPLEX (0) + // cv::FONT_HERSHEY_PLAIN (1) + // cv::FONT_HERSHEY_DUPLEX (2) + // cv::FONT_HERSHEY_COMPLEX (3) + // cv::FONT_HERSHEY_TRIPLEX (4) + // cv::FONT_HERSHEY_COMPLEX_SMALL (5) + // cv::FONT_HERSHEY_SCRIPT_SIMPLEX (6) + // cv::FONT_HERSHEY_SCRIPT_COMPLEX (7) + optional int32 font_face = 5 [default = 0]; + + // Label location. + enum Location { + TOP_LEFT = 0; + BOTTOM_LEFT = 1; + } + optional Location location = 6 [default = TOP_LEFT]; +} diff --git a/mediapipe/calculators/util/landmarks_to_floats_calculator.cc b/mediapipe/calculators/util/landmarks_to_floats_calculator.cc new file mode 100644 index 000000000..09ab4b575 --- /dev/null +++ b/mediapipe/calculators/util/landmarks_to_floats_calculator.cc @@ -0,0 +1,138 @@ +// Copyright 2019 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Copyright 2019 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include + +#include "Eigen/Core" +#include "mediapipe/calculators/util/landmarks_to_floats_calculator.pb.h" +#include "mediapipe/framework/calculator_framework.h" +#include "mediapipe/framework/formats/landmark.pb.h" +#include "mediapipe/framework/formats/matrix.h" +#include "mediapipe/framework/port/ret_check.h" + +namespace mediapipe { + +namespace { + +constexpr char kLandmarksTag[] = "NORM_LANDMARKS"; +constexpr char kFloatsTag[] = "FLOATS"; +constexpr char kMatrixTag[] = "MATRIX"; + +} // namespace + +// Converts a vector of landmarks to a vector of floats or a matrix. +// Input: +// NORM_LANDMARKS: An std::vector. +// +// Output: +// FLOATS(optional): A vector of floats from flattened landmarks. +// MATRIX(optional): A matrix of floats of the landmarks. +// +// Usage example: +// node { +// calculator: "LandmarksToFloatsCalculator" +// input_stream: "NORM_LANDMARKS:landmarks" +// output_stream: "MATRIX:landmark_matrix" +// } +class LandmarksToFloatsCalculator : public CalculatorBase { + public: + static ::mediapipe::Status GetContract(CalculatorContract* cc) { + cc->Inputs().Tag(kLandmarksTag).Set>(); + RET_CHECK(cc->Outputs().HasTag(kFloatsTag) || + cc->Outputs().HasTag(kMatrixTag)); + if (cc->Outputs().HasTag(kFloatsTag)) { + cc->Outputs().Tag(kFloatsTag).Set>(); + } + if (cc->Outputs().HasTag(kMatrixTag)) { + cc->Outputs().Tag(kMatrixTag).Set(); + } + + return ::mediapipe::OkStatus(); + } + + ::mediapipe::Status Open(CalculatorContext* cc) override { + cc->SetOffset(TimestampDiff(0)); + const auto& options = + cc->Options<::mediapipe::LandmarksToFloatsCalculatorOptions>(); + num_dimensions_ = options.num_dimensions(); + // Currently number of dimensions must be within [1, 3]. + RET_CHECK_GE(num_dimensions_, 1); + RET_CHECK_LE(num_dimensions_, 3); + return ::mediapipe::OkStatus(); + } + + ::mediapipe::Status Process(CalculatorContext* cc) override { + // Only process if there's input landmarks. + if (cc->Inputs().Tag(kLandmarksTag).IsEmpty()) { + return ::mediapipe::OkStatus(); + } + + const auto& input_landmarks = + cc->Inputs().Tag(kLandmarksTag).Get>(); + + if (cc->Outputs().HasTag(kFloatsTag)) { + auto output_floats = absl::make_unique>(); + for (const auto& landmark : input_landmarks) { + output_floats->emplace_back(landmark.x()); + if (num_dimensions_ > 1) { + output_floats->emplace_back(landmark.y()); + } + if (num_dimensions_ > 2) { + output_floats->emplace_back(landmark.z()); + } + } + + cc->Outputs() + .Tag(kFloatsTag) + .Add(output_floats.release(), cc->InputTimestamp()); + } else { + auto output_matrix = absl::make_unique(); + output_matrix->setZero(num_dimensions_, input_landmarks.size()); + for (int i = 0; i < input_landmarks.size(); ++i) { + (*output_matrix)(0, i) = input_landmarks[i].x(); + if (num_dimensions_ > 1) { + (*output_matrix)(1, i) = input_landmarks[i].y(); + } + if (num_dimensions_ > 2) { + (*output_matrix)(2, i) = input_landmarks[i].z(); + } + } + cc->Outputs() + .Tag(kMatrixTag) + .Add(output_matrix.release(), cc->InputTimestamp()); + } + return ::mediapipe::OkStatus(); + } + + private: + int num_dimensions_ = 0; +}; +REGISTER_CALCULATOR(LandmarksToFloatsCalculator); + +} // namespace mediapipe diff --git a/mediapipe/calculators/util/landmarks_to_floats_calculator.proto b/mediapipe/calculators/util/landmarks_to_floats_calculator.proto new file mode 100644 index 000000000..310251e75 --- /dev/null +++ b/mediapipe/calculators/util/landmarks_to_floats_calculator.proto @@ -0,0 +1,28 @@ +// Copyright 2019 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +syntax = "proto2"; + +package mediapipe; + +import "mediapipe/framework/calculator.proto"; + +message LandmarksToFloatsCalculatorOptions { + extend CalculatorOptions { + optional LandmarksToFloatsCalculatorOptions ext = 274035660; + } + + // Number of dimensions to convert. Must within [1, 3]. + optional int32 num_dimensions = 1 [default = 2]; +} diff --git a/mediapipe/calculators/util/local_file_contents_calculator.cc b/mediapipe/calculators/util/local_file_contents_calculator.cc new file mode 100644 index 000000000..9f8d17724 --- /dev/null +++ b/mediapipe/calculators/util/local_file_contents_calculator.cc @@ -0,0 +1,57 @@ +// Copyright 2019 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include + +#include "mediapipe/framework/calculator_framework.h" +#include "mediapipe/framework/port/file_helpers.h" +#include "mediapipe/framework/port/status.h" + +namespace mediapipe { +// The calculator takes the path to the local file as an input side packet and +// outputs the contents of that file. +// +// Example config: +// node { +// calculator: "LocalFileContentsCalculator" +// input_side_packet: "FILE_PATH:file_path" +// output_side_packet: "CONTENTS:contents" +// } +class LocalFileContentsCalculator : public CalculatorBase { + public: + static ::mediapipe::Status GetContract(CalculatorContract* cc) { + cc->InputSidePackets().Tag("FILE_PATH").Set(); + cc->OutputSidePackets().Tag("CONTENTS").Set(); + return ::mediapipe::OkStatus(); + } + + ::mediapipe::Status Open(CalculatorContext* cc) override { + std::string contents; + MP_RETURN_IF_ERROR(mediapipe::file::GetContents( + cc->InputSidePackets().Tag("FILE_PATH").Get(), &contents)); + cc->OutputSidePackets() + .Tag("CONTENTS") + .Set(MakePacket(std::move(contents))); + return ::mediapipe::OkStatus(); + } + + ::mediapipe::Status Process(CalculatorContext* cc) override { + return ::mediapipe::OkStatus(); + } +}; + +REGISTER_CALCULATOR(LocalFileContentsCalculator); + +} // namespace mediapipe diff --git a/mediapipe/calculators/util/top_k_scores_calculator.cc b/mediapipe/calculators/util/top_k_scores_calculator.cc index 18f2eec62..bc8d30f87 100644 --- a/mediapipe/calculators/util/top_k_scores_calculator.cc +++ b/mediapipe/calculators/util/top_k_scores_calculator.cc @@ -23,13 +23,14 @@ #include "mediapipe/calculators/util/top_k_scores_calculator.pb.h" #include "mediapipe/framework/calculator_framework.h" +#include "mediapipe/framework/formats/classification.pb.h" #include "mediapipe/framework/port/ret_check.h" #include "mediapipe/framework/port/status.h" #include "mediapipe/framework/port/statusor.h" #include "mediapipe/util/resource_util.h" -#if defined(MEDIAPIPE_LITE) || defined(__ANDROID__) || \ - (defined(__APPLE__) && !TARGET_OS_OSX) +#if defined(MEDIAPIPE_LITE) || defined(__EMSCRIPTEN__) || \ + defined(__ANDROID__) || (defined(__APPLE__) && !TARGET_OS_OSX) #include "mediapipe/util/android/file/base/file.h" #include "mediapipe/util/android/file/base/helpers.h" #else @@ -37,8 +38,10 @@ #endif namespace mediapipe { + // A calculator that takes a vector of scores and returns the indexes, scores, -// labels of the top k elements. +// labels of the top k elements, classification protos, and summary std::string +// (in csv format). // // Usage example: // node { @@ -47,6 +50,8 @@ namespace mediapipe { // output_stream: "TOP_K_INDEXES:top_k_indexes" // output_stream: "TOP_K_SCORES:top_k_scores" // output_stream: "TOP_K_LABELS:top_k_labels" +// output_stream: "TOP_K_CLASSIFICATIONS:top_k_classes" +// output_stream: "SUMMARY:summary" // options: { // [mediapipe.TopKScoresCalculatorOptions.ext] { // top_k: 5 @@ -69,6 +74,7 @@ class TopKScoresCalculator : public CalculatorBase { int top_k_ = -1; float threshold_ = 0.0; std::unordered_map label_map_; + bool label_map_loaded_ = false; }; REGISTER_CALCULATOR(TopKScoresCalculator); @@ -84,6 +90,12 @@ REGISTER_CALCULATOR(TopKScoresCalculator); if (cc->Outputs().HasTag("TOP_K_LABELS")) { cc->Outputs().Tag("TOP_K_LABELS").Set>(); } + if (cc->Outputs().HasTag("CLASSIFICATIONS")) { + cc->Outputs().Tag("CLASSIFICATIONS").Set(); + } + if (cc->Outputs().HasTag("SUMMARY")) { + cc->Outputs().Tag("SUMMARY").Set(); + } return ::mediapipe::OkStatus(); } @@ -149,7 +161,7 @@ REGISTER_CALCULATOR(TopKScoresCalculator); reverse(top_k_indexes.begin(), top_k_indexes.end()); reverse(top_k_scores.begin(), top_k_scores.end()); - if (cc->Outputs().HasTag("TOP_K_LABELS")) { + if (label_map_loaded_) { for (int index : top_k_indexes) { top_k_labels.push_back(label_map_[index]); } @@ -172,6 +184,35 @@ REGISTER_CALCULATOR(TopKScoresCalculator); .AddPacket(MakePacket>(top_k_labels) .At(cc->InputTimestamp())); } + + if (cc->Outputs().HasTag("SUMMARY")) { + std::vector results; + for (int index = 0; index < top_k_indexes.size(); ++index) { + if (label_map_loaded_) { + results.push_back( + absl::StrCat(top_k_labels[index], ":", top_k_scores[index])); + } else { + results.push_back( + absl::StrCat(top_k_indexes[index], ":", top_k_scores[index])); + } + } + cc->Outputs().Tag("SUMMARY").AddPacket( + MakePacket(absl::StrJoin(results, ",")) + .At(cc->InputTimestamp())); + } + + if (cc->Outputs().HasTag("TOP_K_CLASSIFICATION")) { + auto classification_list = absl::make_unique(); + for (int index = 0; index < top_k_indexes.size(); ++index) { + Classification* classification = + classification_list->add_classification(); + classification->set_index(top_k_indexes[index]); + classification->set_score(top_k_scores[index]); + if (label_map_loaded_) { + classification->set_label(top_k_labels[index]); + } + } + } return ::mediapipe::OkStatus(); } @@ -188,6 +229,7 @@ REGISTER_CALCULATOR(TopKScoresCalculator); while (std::getline(stream, line)) { label_map_[i++] = line; } + label_map_loaded_ = true; return ::mediapipe::OkStatus(); } diff --git a/mediapipe/docs/android_archive_library.md b/mediapipe/docs/android_archive_library.md new file mode 100644 index 000000000..8c7c42b91 --- /dev/null +++ b/mediapipe/docs/android_archive_library.md @@ -0,0 +1,130 @@ +## MediaPipe Android Archive Library + +***Experimental Only*** + +The MediaPipe Android archive library is a convenient way to use MediaPipe with +Android Studio and Gradle. MediaPipe doesn't publish a general AAR that can be +used by all projects. Instead, developers need to add a mediapipe_aar() target +to generate a custom AAR file for their own projects. This is necessary in order +to include specific resources such as MediaPipe calculators needed for each +project. + +### Steps to build a MediaPipe AAR + +1. Create a mediapipe_aar() target. + + In the MediaPipe directory, create a new mediapipe_aar() target in a BUILD + file. You need to figure out what calculators are used in the graph and + provide the calculator dependencies to the mediapipe_aar(). For example, to + build an AAR for [face detection gpu](./face_detection_mobile_gpu.md), you + can put the following code into + mediapipe/examples/android/src/java/com/google/mediapipe/apps/aar_example/BUILD. + + ``` + load("//mediapipe/java/com/google/mediapipe:mediapipe_aar.bzl", "mediapipe_aar") + + mediapipe_aar( + name = "mp_face_detection_aar", + calculators = ["//mediapipe/graphs/face_detection:mobile_calculators"], + ) + ``` + +2. Run the Bazel build command to generate the AAR. + + ```bash + bazel build -c opt --fat_apk_cpu=arm64-v8a,armeabi-v7a //path/to/the/aar/build/file:aar_name + ``` + + For the face detection AAR target we made in the step 1, run: + + ```bash + bazel build -c opt --fat_apk_cpu=arm64-v8a,armeabi-v7a \ + //mediapipe/examples/android/src/java/com/google/mediapipe/apps/aar_example:mp_face_detection_aar + + # It should print: + # Target //mediapipe/examples/android/src/java/com/google/mediapipe/apps/aar_example:mp_face_detection_aar up-to-date: + # bazel-bin/mediapipe/examples/android/src/java/com/google/mediapipe/apps/aar_example/mp_face_detection_aar.aar + ``` + +3. (Optional) Save the AAR to your preferred location. + + ```bash + cp bazel-bin/mediapipe/examples/android/src/java/com/google/mediapipe/apps/aar_example/mp_face_detection_aar.aar + /absolute/path/to/your/preferred/location + ``` + +### Steps to use a MediaPipe AAR in Android Studio with Gradle + +1. Start Android Studio and go to your project. + +2. Copy the AAR into app/libs. + + ```bash + cp bazel-bin/mediapipe/examples/android/src/java/com/google/mediapipe/apps/aar_example/mp_face_detection_aar.aar + /path/to/your/app/libs/ + ``` + + ![Screenshot](images/mobile/aar_location.png) + +3. Make app/src/main/assets and copy assets (graph, model, and etc) into + app/src/main/assets. + + Build the MediaPipe binary graph and copy the assets into + app/src/main/assets, e.g., for the face detection graph, you need to build + and copy + [the binary graph](https://github.com/google/mediapipe/blob/master/mediapipe/examples/android/src/java/com/google/mediapipe/apps/facedetectiongpu/BUILD#L41), + [the tflite model](https://github.com/google/mediapipe/tree/master/mediapipe/models/face_detection_front.tflite), + and + [the label map](https://github.com/google/mediapipe/blob/master/mediapipe/models/face_detection_front_labelmap.txt). + + ```bash + bazel build -c opt mediapipe/examples/android/src/java/com/google/mediapipe/apps/facedetectiongpu:binary_graph + cp bazel-bin/mediapipe/examples/android/src/java/com/google/mediapipe/apps/facedetectiongpu/facedetectiongpu.binarypb /path/to/your/app/src/main/assets/ + cp mediapipe/models/face_detection_front.tflite /path/to/your/app/src/main/assets/ + cp mediapipe/models/face_detection_front_labelmap.txt /path/to/your/app/src/main/assets/ + ``` + + ![Screenshot](images/mobile/assets_location.png) + +4. Make app/src/main/jniLibs and copy OpenCV JNI libraries into + app/src/main/jniLibs. + + MediaPipe depends on OpenCV, you will need to copy the precompiled OpenCV so + files into app/src/main/jniLibs. You can download the official OpenCV + Android SDK from + [here](https://github.com/opencv/opencv/releases/download/4.1.0/opencv-4.1.0-android-sdk.zip) + and run: + + ```bash + cp -R ~/Downloads/OpenCV-android-sdk/sdk/native/libs/arm* /path/to/your/app/src/main/jniLibs/ + ``` + + ![Screenshot](images/mobile/android_studio_opencv_location.png) + +5. Modify app/build.gradle to add MediaPipe dependencies and MediaPipe AAR. + + ``` + dependencies { + implementation fileTree(dir: 'libs', include: ['*.jar', '*.aar']) + implementation 'androidx.appcompat:appcompat:1.0.2' + implementation 'androidx.constraintlayout:constraintlayout:1.1.3' + testImplementation 'junit:junit:4.12' + androidTestImplementation 'androidx.test.ext:junit:1.1.0' + androidTestImplementation 'androidx.test.espresso:espresso-core:3.1.1' + // MediaPipe deps + implementation 'com.google.flogger:flogger:0.3.1' + implementation 'com.google.flogger:flogger-system-backend:0.3.1' + implementation 'com.google.code.findbugs:jsr305:3.0.2' + implementation 'com.google.guava:guava:27.0.1-android' + implementation 'com.google.guava:guava:27.0.1-android' + // CameraX core library + def camerax_version = "1.0.0-alpha06" + implementation "androidx.camera:camera-core:$camerax_version" + implementation "androidx.camera:camera-camera2:$camerax_version" + } + ``` + +6. Follow our Android app examples to use MediaPipe in Android Studio for your + use case. If you are looking for an example, a working face detection + example can be found + [here](https://github.com/jiuqiant/mediapipe_aar_example). diff --git a/mediapipe/docs/examples.md b/mediapipe/docs/examples.md index 404024b5f..e9d27e0fc 100644 --- a/mediapipe/docs/examples.md +++ b/mediapipe/docs/examples.md @@ -96,8 +96,9 @@ using the MediaPipe C++ APIs. ### Feature Extration for YouTube-8M Challenge -[Feature Extration for YouTube-8M Challenge](./youtube_8m.md) shows how to use -MediaPipe to prepare training data for the YouTube-8M Challenge. +[Feature Extration and Model Inference for YouTube-8M Challenge](./youtube_8m.md) +shows how to use MediaPipe to prepare training data for the YouTube-8M Challenge +and do the model inference with the baseline model. ### Preparing Data Sets with MediaSequence diff --git a/mediapipe/docs/face_detection_desktop.md b/mediapipe/docs/face_detection_desktop.md index b95705262..f5e4c452c 100644 --- a/mediapipe/docs/face_detection_desktop.md +++ b/mediapipe/docs/face_detection_desktop.md @@ -36,10 +36,9 @@ $ bazel build -c opt --define MEDIAPIPE_DISABLE_GPU=1 \ # INFO: 711 processes: 710 linux-sandbox, 1 local. # INFO: Build completed successfully, 734 total actions -$ export GLOG_logtostderr=1 # This will open up your webcam as long as it is connected and on # Any errors is likely due to your webcam being not accessible -$ bazel-bin/mediapipe/examples/desktop/face_detection/face_detection_cpu \ +$ GLOG_logtostderr=1 bazel-bin/mediapipe/examples/desktop/face_detection/face_detection_cpu \ --calculator_graph_config_file=mediapipe/graphs/face_detection/face_detection_desktop_live.pbtxt ``` @@ -60,11 +59,10 @@ $ bazel build -c opt --copt -DMESA_EGL_NO_X11_HEADERS \ # INFO: 711 processes: 710 linux-sandbox, 1 local. # INFO: Build completed successfully, 734 total actions -$ export GLOG_logtostderr=1 # This will open up your webcam as long as it is connected and on # Any errors is likely due to your webcam being not accessible, # or GPU drivers not setup properly. -$ bazel-bin/mediapipe/examples/desktop/face_detection/face_detection_gpu \ +$ GLOG_logtostderr=1 bazel-bin/mediapipe/examples/desktop/face_detection/face_detection_gpu \ --calculator_graph_config_file=mediapipe/graphs/face_detection/face_detection_mobile_gpu.pbtxt ``` diff --git a/mediapipe/docs/hair_segmentation_desktop.md b/mediapipe/docs/hair_segmentation_desktop.md index 058902363..e5fd274d0 100644 --- a/mediapipe/docs/hair_segmentation_desktop.md +++ b/mediapipe/docs/hair_segmentation_desktop.md @@ -35,11 +35,10 @@ $ bazel build -c opt --copt -DMESA_EGL_NO_X11_HEADERS \ #INFO: Streaming build results to: http://sponge2/37d5a184-293b-4e98-a43e-b22084db3142 #INFO: Build completed successfully, 12210 total actions -$ export GLOG_logtostderr=1 # This will open up your webcam as long as it is connected and on # Any errors is likely due to your webcam being not accessible, # or GPU drivers not setup properly. -$ bazel-bin/mediapipe/examples/desktop/hair_segmentation/hair_segmentation_gpu \ +$ GLOG_logtostderr=1 bazel-bin/mediapipe/examples/desktop/hair_segmentation/hair_segmentation_gpu \ --calculator_graph_config_file=mediapipe/graphs/hair_segmentation/hair_segmentation_mobile_gpu.pbtxt ``` diff --git a/mediapipe/docs/hand_tracking_desktop.md b/mediapipe/docs/hand_tracking_desktop.md index 6776a4710..3e8b10c8f 100644 --- a/mediapipe/docs/hand_tracking_desktop.md +++ b/mediapipe/docs/hand_tracking_desktop.md @@ -35,10 +35,9 @@ $ bazel build -c opt --define MEDIAPIPE_DISABLE_GPU=1 \ #INFO: Streaming build results to: http://sponge2/360196b9-33ab-44b1-84a7-1022b5043307 #INFO: Build completed successfully, 12517 total actions -$ export GLOG_logtostderr=1 # This will open up your webcam as long as it is connected and on # Any errors is likely due to your webcam being not accessible -$ bazel-bin/mediapipe/examples/desktop/hand_tracking/hand_tracking_cpu \ +$ GLOG_logtostderr=1 bazel-bin/mediapipe/examples/desktop/hand_tracking/hand_tracking_cpu \ --calculator_graph_config_file=mediapipe/graphs/hand_tracking/hand_tracking_desktop_live.pbtxt ``` @@ -59,11 +58,10 @@ $ bazel build -c opt --copt -DMESA_EGL_NO_X11_HEADERS \ #INFO: Streaming build results to: http://sponge2/00c7f95f-6fbc-432d-8978-f5d361efca3b #INFO: Build completed successfully, 22455 total actions -$ export GLOG_logtostderr=1 # This will open up your webcam as long as it is connected and on # Any errors is likely due to your webcam being not accessible, # or GPU drivers not setup properly. -$ bazel-bin/mediapipe/examples/desktop/hand_tracking/hand_tracking_gpu \ +$ GLOG_logtostderr=1 bazel-bin/mediapipe/examples/desktop/hand_tracking/hand_tracking_gpu \ --calculator_graph_config_file=mediapipe/graphs/hand_tracking/hand_tracking_mobile.pbtxt ``` diff --git a/mediapipe/docs/images/mobile/aar_location.png b/mediapipe/docs/images/mobile/aar_location.png new file mode 100644 index 000000000..f85e8219e Binary files /dev/null and b/mediapipe/docs/images/mobile/aar_location.png differ diff --git a/mediapipe/docs/images/mobile/android_studio_opencv_location.png b/mediapipe/docs/images/mobile/android_studio_opencv_location.png new file mode 100644 index 000000000..dbb26af1a Binary files /dev/null and b/mediapipe/docs/images/mobile/android_studio_opencv_location.png differ diff --git a/mediapipe/docs/images/mobile/assets_location.png b/mediapipe/docs/images/mobile/assets_location.png new file mode 100644 index 000000000..573b77f04 Binary files /dev/null and b/mediapipe/docs/images/mobile/assets_location.png differ diff --git a/mediapipe/docs/install.md b/mediapipe/docs/install.md index 99473811e..825f5f831 100644 --- a/mediapipe/docs/install.md +++ b/mediapipe/docs/install.md @@ -24,7 +24,8 @@ Choose your operating system: To build and run Android apps: - [Setting up Android SDK and NDK](#setting-up-android-sdk-and-ndk) -- [Setting up Android Studio with MediaPipe](#setting-up-android-studio-with-mediapipe) +- [Using MediaPipe with Gradle](#using-mediapipe-with-gradle) +- [Using MediaPipe with Bazel](#using-mediapipe-with-bazel) To build and run iOS apps: @@ -41,19 +42,11 @@ To build and run iOS apps: $ cd mediapipe ``` -2. Install Bazel (0.24.1 and above required). +2. Install Bazel (version between 0.24.1 and 0.29.1). - Option 1. Use package manager tool to install the latest version of Bazel. - - ```bash - $ sudo apt-get install bazel - - # Run 'bazel version' to check version of bazel installed - ``` - - Option 2. Follow Bazel's + Follow the official [documentation](https://docs.bazel.build/versions/master/install-ubuntu.html) - to install any version of Bazel manually. + to install Bazel manually. Note that MediaPipe doesn't support Bazel 1.0.0+ yet. 3. Install OpenCV and FFmpeg. @@ -75,10 +68,10 @@ To build and run iOS apps: [documentation](https://docs.opencv.org/3.4.6/d7/d9f/tutorial_linux_install.html) to manually build OpenCV from source code. - Note: You may need to modify [`WORKSAPCE`] and [`opencv_linux.BUILD`] to + Note: You may need to modify [`WORKSPACE`] and [`opencv_linux.BUILD`] to point MediaPipe to your own OpenCV libraries, e.g., if OpenCV 4 is installed in "/usr/local/", you need to update the "linux_opencv" new_local_repository - rule in [`WORKSAPCE`] and "opencv" cc_library rule in [`opencv_linux.BUILD`] + rule in [`WORKSPACE`] and "opencv" cc_library rule in [`opencv_linux.BUILD`] like the following: ```bash @@ -159,11 +152,11 @@ To build and run iOS apps: $ cd mediapipe ``` -2. Install Bazel (0.24.1 and above required). +2. Install Bazel (version between 0.24.1 and 0.29.1). - Follow Bazel's + Follow the official [documentation](https://docs.bazel.build/versions/master/install-redhat.html) - to install Bazel manually. + to install Bazel manually. Note that MediaPipe doesn't support Bazel 1.0.0+ yet. 3. Install OpenCV. @@ -178,10 +171,10 @@ To build and run iOS apps: Option 2. Build OpenCV from source code. - Note: You may need to modify [`WORKSAPCE`] and [`opencv_linux.BUILD`] to + Note: You may need to modify [`WORKSPACE`] and [`opencv_linux.BUILD`] to point MediaPipe to your own OpenCV libraries, e.g., if OpenCV 4 is installed in "/usr/local/", you need to update the "linux_opencv" new_local_repository - rule in [`WORKSAPCE`] and "opencv" cc_library rule in [`opencv_linux.BUILD`] + rule in [`WORKSPACE`] and "opencv" cc_library rule in [`opencv_linux.BUILD`] like the following: ```bash @@ -237,7 +230,7 @@ To build and run iOS apps: * Install [Homebrew](https://brew.sh). * Install [Xcode](https://developer.apple.com/xcode/) and its Command Line - Tools. + Tools by `xcode-select install`. 2. Checkout MediaPipe repository. @@ -247,19 +240,24 @@ To build and run iOS apps: $ cd mediapipe ``` -3. Install Bazel (0.24.1 and above required). +3. Install Bazel (version between 0.24.1 and 0.29.1). - Option 1. Use package manager tool to install the latest version of Bazel. + Option 1. Use package manager tool to install Bazel 0.29.1 ```bash - $ brew install bazel + # If Bazel 1.0.0+ was installed. + $ brew uninstall bazel + + # Install Bazel 0.29.1 + $ brew install https://raw.githubusercontent.com/bazelbuild/homebrew-tap/223ffb570c21c0a2af251afc6df9dec0214c6e74/Formula/bazel.rb + $ brew link bazel # Run 'bazel version' to check version of bazel installed ``` - Option 2. Follow Bazel's + Option 2. Follow the official [documentation](https://docs.bazel.build/versions/master/install-os-x.html#install-with-installer-mac-os-x) - to install any version of Bazel manually. + to install Bazel manually. Note that MediaPipe doesn't support Bazel 1.0.0+ yet. 4. Install OpenCV and FFmpeg. @@ -281,7 +279,7 @@ To build and run iOS apps: $ port install opencv ``` - Note: when using MacPorts, please edit the [`WORKSAPCE`], + Note: when using MacPorts, please edit the [`WORKSPACE`], [`opencv_macos.BUILD`], and [`ffmpeg_macos.BUILD`] files like the following: ```bash @@ -419,10 +417,10 @@ To build and run iOS apps: [documentation](https://docs.opencv.org/3.4.6/d7/d9f/tutorial_linux_install.html) to manually build OpenCV from source code. - Note: You may need to modify [`WORKSAPCE`] and [`opencv_linux.BUILD`] to + Note: You may need to modify [`WORKSPACE`] and [`opencv_linux.BUILD`] to point MediaPipe to your own OpenCV libraries, e.g., if OpenCV 4 is installed in "/usr/local/", you need to update the "linux_opencv" new_local_repository - rule in [`WORKSAPCE`] and "opencv" cc_library rule in [`opencv_linux.BUILD`] + rule in [`WORKSPACE`] and "opencv" cc_library rule in [`opencv_linux.BUILD`] like the following: ```bash @@ -589,10 +587,20 @@ Please verify all the necessary packages are installed. * Android SDK Tools 26.1.1 * Android NDK 17c or above -### Setting up Android Studio with MediaPipe +### Using MediaPipe with Gradle -The steps below use Android Studio 3.5 to build and install a MediaPipe example -app. +MediaPipe can be used within an existing project, such as a Gradle project, +using the MediaPipe AAR target defined in mediapipe_aar.bzl. Please see the +separate [MediaPipe Android Archive Library](./android_archive_library.md) +documentation. + +### Using MediaPipe with Bazel + +The MediaPipe project can be imported to Android Studio using the Bazel plugins. +This allows the MediaPipe examples and demos to be built and modified in Android +Studio. To incorporate MediaPipe into an existing Android Studio project, see: +"Using MediaPipe with Gradle". The steps below use Android Studio 3.5 to build +and install a MediaPipe example app. 1. Install and launch Android Studio 3.5. @@ -682,7 +690,7 @@ app. * Press the `[+]` button to add the new configuration. * Select `Run` to run the example app on the connected Android device. -[`WORKSAPCE`]: https://github.com/google/mediapipe/tree/master/WORKSPACE +[`WORKSPACE`]: https://github.com/google/mediapipe/tree/master/WORKSPACE [`opencv_linux.BUILD`]: https://github.com/google/mediapipe/tree/master/third_party/opencv_linux.BUILD [`opencv_macos.BUILD`]: https://github.com/google/mediapipe/tree/master/third_party/opencv_macos.BUILD [`ffmpeg_macos.BUILD`]:https://github.com/google/mediapipe/tree/master/third_party/ffmpeg_macos.BUILD diff --git a/mediapipe/docs/object_detection_desktop.md b/mediapipe/docs/object_detection_desktop.md index 63de4f1ef..ceb9da362 100644 --- a/mediapipe/docs/object_detection_desktop.md +++ b/mediapipe/docs/object_detection_desktop.md @@ -35,10 +35,9 @@ $ bazel build -c opt \ # INFO: 2675 processes: 2673 linux-sandbox, 2 local. # INFO: Build completed successfully, 2807 total actions -$ export GLOG_logtostderr=1 # Replace and . # You can find a test video in mediapipe/examples/desktop/object_detection. -$ bazel-bin/mediapipe/examples/desktop/object_detection/object_detection_tensorflow \ +$ GLOG_logtostderr=1 bazel-bin/mediapipe/examples/desktop/object_detection/object_detection_tensorflow \ --calculator_graph_config_file=mediapipe/graphs/object_detection/object_detection_desktop_tensorflow_graph.pbtxt \ --input_side_packets=input_video_path=,output_video_path= ``` @@ -200,10 +199,9 @@ $ bazel build -c opt --define MEDIAPIPE_DISABLE_GPU=1 \ # INFO: 711 processes: 710 linux-sandbox, 1 local. # INFO: Build completed successfully, 734 total actions -$ export GLOG_logtostderr=1 # Replace and . # You can find a test video in mediapipe/examples/desktop/object_detection. -$ bazel-bin/mediapipe/examples/desktop/object_detection/object_detection_tflite \ +$ GLOG_logtostderr=1 bazel-bin/mediapipe/examples/desktop/object_detection/object_detection_tflite \ --calculator_graph_config_file=mediapipe/graphs/object_detection/object_detection_desktop_tflite_graph.pbtxt \ --input_side_packets=input_video_path=,output_video_path= ``` @@ -224,10 +222,9 @@ $ bazel build -c opt --define MEDIAPIPE_DISABLE_GPU=1 \ #INFO: Streaming build results to: http://sponge2/1824d4cc-ba63-4350-bdc0-aacbd45b902b #INFO: Build completed successfully, 12154 total actions -$ export GLOG_logtostderr=1 # This will open up your webcam as long as it is connected and on # Any errors is likely due to your webcam being not accessible -$ bazel-bin/mediapipe/examples/desktop/object_detection/object_detection_cpu \ +$ GLOG_logtostderr=1 bazel-bin/mediapipe/examples/desktop/object_detection/object_detection_cpu \ --calculator_graph_config_file=mediapipe/graphs/object_detection/object_detection_desktop_live.pbtxt ``` diff --git a/mediapipe/docs/youtube_8m.md b/mediapipe/docs/youtube_8m.md index dc6b26012..65346a6d3 100644 --- a/mediapipe/docs/youtube_8m.md +++ b/mediapipe/docs/youtube_8m.md @@ -1,9 +1,11 @@ -## Extracting Video Features for YouTube-8M Challenge +# Feature Extration and Model Inference for YouTube-8M Challenge MediaPipe is a useful and general framework for media processing that can assist with research, development, and deployment of ML models. This example focuses on -model development by demonstrating how to prepare training data for the -YouTube-8M Challenge. +model development by demonstrating how to prepare training data and do model +inference for the YouTube-8M Challenge. + +## Extracting Video Features for YouTube-8M Challenge [Youtube-8M Challenge](https://www.kaggle.com/c/youtube8m-2019) is an annual video classification challenge hosted by Google. Over the last two years, the @@ -29,14 +31,14 @@ videos. ### Steps to run the YouTube-8M feature extraction graph -1. Checkout the mediapipe repository +1. Checkout the mediapipe repository. ```bash git clone https://github.com/google/mediapipe.git cd mediapipe ``` -2. Download the PCA and model data +2. Download the PCA and model data. ```bash mkdir /tmp/mediapipe @@ -49,7 +51,7 @@ videos. tar -xvf /tmp/mediapipe/inception-2015-12-05.tgz ``` -3. Get the VGGish frozen graph +3. Get the VGGish frozen graph. Note: To run step 3 and step 4, you must have Python 2.7 or 3.5+ installed with the TensorFlow 1.14+ package installed. @@ -60,24 +62,103 @@ videos. python -m mediapipe.examples.desktop.youtube8m.generate_vggish_frozen_graph ``` -4. Generate a MediaSequence metadata from the input video +4. Generate a MediaSequence metadata from the input video. Note: the output file is /tmp/mediapipe/metadata.tfrecord ```bash + # change clip_end_time_sec to match the length of your video. python -m mediapipe.examples.desktop.youtube8m.generate_input_sequence_example \ - --path_to_input_video=/absolute/path/to/the/local/video/file + --path_to_input_video=/absolute/path/to/the/local/video/file \ + --clip_end_time_sec=120 ``` -5. Run the MediaPipe binary to extract the features +5. Run the MediaPipe binary to extract the features. ```bash bazel build -c opt \ --define MEDIAPIPE_DISABLE_GPU=1 --define no_aws_support=true \ mediapipe/examples/desktop/youtube8m:extract_yt8m_features - ./bazel-bin/mediapipe/examples/desktop/youtube8m/extract_yt8m_features + GLOG_logtostderr=1 bazel-bin/mediapipe/examples/desktop/youtube8m/extract_yt8m_features \ --calculator_graph_config_file=mediapipe/graphs/youtube8m/feature_extraction.pbtxt \ --input_side_packets=input_sequence_example=/tmp/mediapipe/metadata.tfrecord \ --output_side_packets=output_sequence_example=/tmp/mediapipe/output.tfrecord ``` + +## Model Inference for YouTube-8M Challenge + +MediaPipe can help you do model inference for YouTube-8M Challenge with both +local videos and the YouTube-8M dataset. To visualize +[the graph for local videos](https://github.com/google/mediapipe/tree/master/mediapipe/graphs/youtube8m/local_video_model_inference.pbtxt) +and +[the graph for the YouTube-8M dataset](https://github.com/google/mediapipe/tree/master/mediapipe/graphs/youtube8m/yt8m_dataset_model_inference.pbtxt), +copy the text specification of the graph and paste it into +[MediaPipe Visualizer](https://viz.mediapipe.dev/). We use the baseline model +[(model card)](https://drive.google.com/file/d/1xTCi9-Nm9dt2KIk8WR0dDFrIssWawyXy/view) +in our example. But, the model inference pipeline is highly customizable. You +are welcome to add new calculators or use your own machine learning models to do +the inference for both local videos and the dataset + +### Steps to run the YouTube-8M model inference graph with Web Interface + +1. Copy the baseline model + [(model card)](https://drive.google.com/file/d/1xTCi9-Nm9dt2KIk8WR0dDFrIssWawyXy/view) + to local. + + ```bash + curl -o /tmp/mediapipe/yt8m_baseline_saved_model.tar.gz data.yt8m.org/models/baseline/saved_model.tar.gz + + tar -xvf /tmp/mediapipe/yt8m_baseline_saved_model.tar.gz -C /tmp/mediapipe + ``` + +2. Build the inference binary. + + ```bash + bazel build -c opt --define='MEDIAPIPE_DISABLE_GPU=1' \ + mediapipe/examples/desktop/youtube8m:model_inference + ``` + +3. Run the python web server. + + Note: pip install absl-py + + ```bash + python mediapipe/examples/desktop/youtube8m/viewer/server.py --root `pwd` + ``` + + Navigate to localhost:8008 in a web browser. + [Here](https://drive.google.com/file/d/19GSvdAAuAlACpBhHOaqMWZ_9p8bLUYKh/view?usp=sharing) + is a demo video showing the steps to use this web application. Also please + read + [youtube8m/README.md](https://github.com/google/mediapipe/tree/master/mediapipe/examples/desktop/youtube8m/README.md) + if you prefer to run the underlying model_inference binary in command line. + +### Steps to run the YouTube-8M model inference graph with a local video + +1. Make sure you have the output tfrecord from the feature extraction pipeline. + +2. Copy the baseline model + [(model card)](https://drive.google.com/file/d/1xTCi9-Nm9dt2KIk8WR0dDFrIssWawyXy/view) + to local. + + ```bash + curl -o /tmp/mediapipe/yt8m_baseline_saved_model.tar.gz data.yt8m.org/models/baseline/saved_model.tar.gz + + tar -xvf /tmp/mediapipe/yt8m_baseline_saved_model.tar.gz -C /tmp/mediapipe + ``` + +3. Build and run the inference binary. + + ```bash + bazel build -c opt --define='MEDIAPIPE_DISABLE_GPU=1' \ + mediapipe/examples/desktop/youtube8m:model_inference + + # segment_size is the number of seconds window of frames. + # overlap is the number of seconds adjacent segments share. + GLOG_logtostderr=1 bazel-bin/mediapipe/examples/desktop/youtube8m/model_inference \ + --calculator_graph_config_file=mediapipe/graphs/youtube8m/local_video_model_inference.pbtxt \ + --input_side_packets=input_sequence_example_path=/tmp/mediapipe/output.tfrecord,input_video_path=/absolute/path/to/the/local/video/file,output_video_path=/tmp/mediapipe/annotated_video.mp4,segment_size=5,overlap=4 + ``` + +4. View the annotated video. diff --git a/mediapipe/examples/desktop/BUILD b/mediapipe/examples/desktop/BUILD index 3a35d724b..f579c49e5 100644 --- a/mediapipe/examples/desktop/BUILD +++ b/mediapipe/examples/desktop/BUILD @@ -27,7 +27,9 @@ cc_library( "//mediapipe/framework/port:file_helpers", "//mediapipe/framework/port:map_util", "//mediapipe/framework/port:parse_text_proto", + "//mediapipe/framework/port:ret_check", "//mediapipe/framework/port:status", + "//mediapipe/framework/port:statusor", "@com_google_absl//absl/strings", ], ) diff --git a/mediapipe/examples/desktop/simple_run_graph_main.cc b/mediapipe/examples/desktop/simple_run_graph_main.cc index c912837f8..ee54bf231 100644 --- a/mediapipe/examples/desktop/simple_run_graph_main.cc +++ b/mediapipe/examples/desktop/simple_run_graph_main.cc @@ -13,14 +13,23 @@ // limitations under the License. // // A simple main function to run a MediaPipe graph. +#include +#include +#include +#include +#include +#include "absl/strings/str_cat.h" #include "absl/strings/str_split.h" +#include "absl/strings/string_view.h" #include "mediapipe/framework/calculator_framework.h" #include "mediapipe/framework/port/commandlineflags.h" #include "mediapipe/framework/port/file_helpers.h" #include "mediapipe/framework/port/map_util.h" #include "mediapipe/framework/port/parse_text_proto.h" +#include "mediapipe/framework/port/ret_check.h" #include "mediapipe/framework/port/status.h" +#include "mediapipe/framework/port/statusor.h" DEFINE_string( calculator_graph_config_file, "", @@ -31,14 +40,72 @@ DEFINE_string(input_side_packets, "", "for the CalculatorGraph. All values will be treated as the " "string type even if they represent doubles, floats, etc."); +// Local file output flags. +// Output stream +DEFINE_string(output_stream, "", + "The output stream to output to the local file in csv format."); +DEFINE_string(output_stream_file, "", + "The name of the local file to output all packets sent to " + "the stream specified with --output_stream. "); +DEFINE_bool(strip_timestamps, false, + "If true, only the packet contents (without timestamps) will be " + "written into the local file."); +// Output side packets +DEFINE_string(output_side_packets, "", + "A CSV of output side packets to output to local file."); +DEFINE_string(output_side_packets_file, "", + "The name of the local file to output all side packets specified " + "with --output_side_packets. "); + +::mediapipe::Status OutputStreamToLocalFile( + ::mediapipe::OutputStreamPoller& poller) { + std::ofstream file; + file.open(FLAGS_output_stream_file); + ::mediapipe::Packet packet; + while (poller.Next(&packet)) { + std::string output_data; + if (!FLAGS_strip_timestamps) { + absl::StrAppend(&output_data, packet.Timestamp().Value(), ","); + } + absl::StrAppend(&output_data, packet.Get(), "\n"); + file << output_data; + } + file.close(); + return ::mediapipe::OkStatus(); +} + +::mediapipe::Status OutputSidePacketsToLocalFile( + ::mediapipe::CalculatorGraph& graph) { + if (!FLAGS_output_side_packets.empty() && + !FLAGS_output_side_packets_file.empty()) { + std::ofstream file; + file.open(FLAGS_output_side_packets_file); + std::vector side_packet_names = + absl::StrSplit(FLAGS_output_side_packets, ','); + for (const std::string& side_packet_name : side_packet_names) { + ASSIGN_OR_RETURN(auto status_or_packet, + graph.GetOutputSidePacket(side_packet_name)); + file << absl::StrCat(side_packet_name, ":", + status_or_packet.Get(), "\n"); + } + file.close(); + } else { + RET_CHECK(FLAGS_output_side_packets.empty() && + FLAGS_output_side_packets_file.empty()) + << "--output_side_packets and --output_side_packets_file should be " + "specified in pair."; + } + return ::mediapipe::OkStatus(); +} + ::mediapipe::Status RunMPPGraph() { std::string calculator_graph_config_contents; - MP_RETURN_IF_ERROR(mediapipe::file::GetContents( + MP_RETURN_IF_ERROR(::mediapipe::file::GetContents( FLAGS_calculator_graph_config_file, &calculator_graph_config_contents)); LOG(INFO) << "Get calculator graph config contents: " << calculator_graph_config_contents; - mediapipe::CalculatorGraphConfig config = - mediapipe::ParseTextProtoOrDie( + ::mediapipe::CalculatorGraphConfig config = + ::mediapipe::ParseTextProtoOrDie<::mediapipe::CalculatorGraphConfig>( calculator_graph_config_contents); std::map input_side_packets; std::vector kv_pairs = @@ -51,10 +118,23 @@ DEFINE_string(input_side_packets, "", ::mediapipe::MakePacket(name_and_value[1]); } LOG(INFO) << "Initialize the calculator graph."; - mediapipe::CalculatorGraph graph; + ::mediapipe::CalculatorGraph graph; MP_RETURN_IF_ERROR(graph.Initialize(config, input_side_packets)); - LOG(INFO) << "Start running the calculator graph."; - return graph.Run(); + if (!FLAGS_output_stream.empty() && !FLAGS_output_stream_file.empty()) { + ASSIGN_OR_RETURN(auto poller, + graph.AddOutputStreamPoller(FLAGS_output_stream)); + LOG(INFO) << "Start running the calculator graph."; + MP_RETURN_IF_ERROR(graph.StartRun({})); + MP_RETURN_IF_ERROR(OutputStreamToLocalFile(poller)); + } else { + RET_CHECK(FLAGS_output_stream.empty() && FLAGS_output_stream_file.empty()) + << "--output_stream and --output_stream_file should be specified in " + "pair."; + LOG(INFO) << "Start running the calculator graph."; + MP_RETURN_IF_ERROR(graph.StartRun({})); + } + MP_RETURN_IF_ERROR(graph.WaitUntilDone()); + return OutputSidePacketsToLocalFile(graph); } int main(int argc, char** argv) { diff --git a/mediapipe/examples/desktop/youtube8m/BUILD b/mediapipe/examples/desktop/youtube8m/BUILD index c25c5f50d..16b868bdc 100644 --- a/mediapipe/examples/desktop/youtube8m/BUILD +++ b/mediapipe/examples/desktop/youtube8m/BUILD @@ -33,3 +33,14 @@ cc_binary( "@org_tensorflow//tensorflow/core:direct_session", ], ) + +cc_binary( + name = "model_inference", + deps = [ + "//mediapipe/examples/desktop:simple_run_graph_main", + "//mediapipe/graphs/youtube8m:yt8m_inference_calculators_deps", + # TODO: Figure out the minimum set of the kernels needed by this example. + "@org_tensorflow//tensorflow/core:all_kernels", + "@org_tensorflow//tensorflow/core:direct_session", + ], +) diff --git a/mediapipe/examples/desktop/youtube8m/README.md b/mediapipe/examples/desktop/youtube8m/README.md index 2989a7927..8ad5bf482 100644 --- a/mediapipe/examples/desktop/youtube8m/README.md +++ b/mediapipe/examples/desktop/youtube8m/README.md @@ -1,13 +1,13 @@ ### Steps to run the YouTube-8M feature extraction graph -1. Checkout the mediapipe repository +1. Checkout the mediapipe repository. ```bash git clone https://github.com/google/mediapipe.git cd mediapipe ``` -2. Download the PCA and model data +2. Download the PCA and model data. ```bash mkdir /tmp/mediapipe @@ -20,7 +20,7 @@ tar -xvf /tmp/mediapipe/inception-2015-12-05.tgz ``` -3. Get the VGGish frozen graph +3. Get the VGGish frozen graph. Note: To run step 3 and step 4, you must have Python 2.7 or 3.5+ installed with the TensorFlow 1.14+ package installed. @@ -31,26 +31,114 @@ python -m mediapipe.examples.desktop.youtube8m.generate_vggish_frozen_graph ``` -4. Generate a MediaSequence metadata from the input video +4. Generate a MediaSequence metadata from the input video. Note: the output file is /tmp/mediapipe/metadata.tfrecord ```bash + # change clip_end_time_sec to match the length of your video. python -m mediapipe.examples.desktop.youtube8m.generate_input_sequence_example \ --path_to_input_video=/absolute/path/to/the/local/video/file \ - --clip_start_time_sec=0 \ - --clip_end_time_sec=10 + --clip_end_time_sec=120 ``` -5. Run the MediaPipe binary to extract the features +5. Run the MediaPipe binary to extract the features. ```bash bazel build -c opt \ --define MEDIAPIPE_DISABLE_GPU=1 --define no_aws_support=true \ mediapipe/examples/desktop/youtube8m:extract_yt8m_features - ./bazel-bin/mediapipe/examples/desktop/youtube8m/extract_yt8m_features \ + GLOG_logtostderr=1 bazel-bin/mediapipe/examples/desktop/youtube8m/extract_yt8m_features \ --calculator_graph_config_file=mediapipe/graphs/youtube8m/feature_extraction.pbtxt \ --input_side_packets=input_sequence_example=/tmp/mediapipe/metadata.tfrecord \ --output_side_packets=output_sequence_example=/tmp/mediapipe/output.tfrecord ``` + +### Steps to run the YouTube-8M inference graph with the YT8M dataset + +1. Download the YT8M dataset + + For example, download one shard of the training data: + + ```bash + curl http://us.data.yt8m.org/2/frame/train/trainpj.tfrecord --output /tmp/mediapipe/trainpj.tfrecord + ``` + +2. Copy the baseline model [(model card)](https://drive.google.com/file/d/1xTCi9-Nm9dt2KIk8WR0dDFrIssWawyXy/view) to local. + + ```bash + curl -o /tmp/mediapipe/yt8m_baseline_saved_model.tar.gz data.yt8m.org/models/baseline/saved_model.tar.gz + + tar -xvf /tmp/mediapipe/yt8m_baseline_saved_model.tar.gz -C /tmp/mediapipe + ``` + +3. Build and run the inference binary. + + ```bash + bazel build -c opt --define='MEDIAPIPE_DISABLE_GPU=1' \ + mediapipe/examples/desktop/youtube8m:model_inference + + GLOG_logtostderr=1 bazel-bin/mediapipe/examples/desktop/youtube8m/model_inference \ + --calculator_graph_config_file=mediapipe/graphs/youtube8m/yt8m_dataset_model_inference.pbtxt \ + --input_side_packets=tfrecord_path=/tmp/mediapipe/trainpj.tfrecord,record_index=0,desired_segment_size=5 \ + --output_stream=annotation_summary \ + --output_stream_file=/tmp/summary \ + --output_side_packets=yt8m_id \ + --output_side_packets_file=/tmp/yt8m_id + ``` + +### Steps to run the YouTube-8M model inference graph with Web Interface + +1. Copy the baseline model [(model card)](https://drive.google.com/file/d/1xTCi9-Nm9dt2KIk8WR0dDFrIssWawyXy/view) to local. + + + ```bash + curl -o /tmp/mediapipe/yt8m_baseline_saved_model.tar.gz data.yt8m.org/models/baseline/saved_model.tar.gz + + tar -xvf /tmp/mediapipe/yt8m_baseline_saved_model.tar.gz -C /tmp/mediapipe + ``` + +2. Build the inference binary. + + ```bash + bazel build -c opt --define='MEDIAPIPE_DISABLE_GPU=1' \ + mediapipe/examples/desktop/youtube8m:model_inference + ``` + +3. Run the python web server. + + Note: pip install absl-py + + ```bash + python mediapipe/examples/desktop/youtube8m/viewer/server.py --root `pwd` + ``` + + Navigate to localhost:8008 in a web browser. + +### Steps to run the YouTube-8M model inference graph with a local video + +1. Make sure you have the output tfrecord from the feature extraction pipeline. + +2. Copy the baseline model [(model card)](https://drive.google.com/file/d/1xTCi9-Nm9dt2KIk8WR0dDFrIssWawyXy/view) to local. + + ```bash + curl -o /tmp/mediapipe/yt8m_baseline_saved_model.tar.gz data.yt8m.org/models/baseline/saved_model.tar.gz + + tar -xvf /tmp/mediapipe/yt8m_baseline_saved_model.tar.gz -C /tmp/mediapipe + ``` + +3. Build and run the inference binary. + + ```bash + bazel build -c opt --define='MEDIAPIPE_DISABLE_GPU=1' \ + mediapipe/examples/desktop/youtube8m:model_inference + + # segment_size is the number of seconds window of frames. + # overlap is the number of seconds adjacent segments share. + GLOG_logtostderr=1 bazel-bin/mediapipe/examples/desktop/youtube8m/model_inference \ + --calculator_graph_config_file=mediapipe/graphs/youtube8m/local_video_model_inference.pbtxt \ + --input_side_packets=input_sequence_example_path=/tmp/mediapipe/output.tfrecord,input_video_path=/absolute/path/to/the/local/video/file,output_video_path=/tmp/mediapipe/annotated_video.mp4,segment_size=5,overlap=4 + ``` + +4. View the annotated video. diff --git a/mediapipe/examples/desktop/youtube8m/viewer/server.py b/mediapipe/examples/desktop/youtube8m/viewer/server.py new file mode 100644 index 000000000..febaad53d --- /dev/null +++ b/mediapipe/examples/desktop/youtube8m/viewer/server.py @@ -0,0 +1,262 @@ +"""Server for YouTube8M Model Inference Demo. + +Serves up both the static files for the website and provides a service that +fetches the video id and timestamp based labels for a video analyzed in a +tfrecord files. + +""" +from __future__ import print_function +import json +import os +import re +import socket +import subprocess +import sys + +from absl import app +from absl import flags +import http.client +import http.server +from six.moves.urllib import parse + +FLAGS = flags.FLAGS +flags.DEFINE_bool("show_label_at_center", False, + "Show labels at the center of the segment.") +flags.DEFINE_integer("port", 8008, "Port that the API is served over.") +flags.DEFINE_string("tmp_dir", "/tmp/mediapipe", + "Temporary asset storage location.") +flags.DEFINE_string("root", "", "MediaPipe root directory.") +# binary, pbtxt, label_map paths are relative to 'root' path +flags.DEFINE_string( + "binary", + "bazel-bin/mediapipe/examples/desktop/youtube8m/model_inference", + "Inference binary location.") +flags.DEFINE_string( + "pbtxt", + "mediapipe/graphs/youtube8m/yt8m_dataset_model_inference.pbtxt", + "Default pbtxt graph file.") +flags.DEFINE_string("label_map", "mediapipe/graphs/youtube8m/label_map.txt", + "Default label map text file.") + + +class HTTPServerV6(http.server.HTTPServer): + address_family = socket.AF_INET6 + + +class Youtube8MRequestHandler(http.server.SimpleHTTPRequestHandler): + """Static file server with /healthz support.""" + + def do_GET(self): + if self.path.startswith("/healthz"): + self.send_response(200) + self.send_header("Content-type", "text/plain") + self.send_header("Content-length", 2) + self.end_headers() + self.wfile.write("ok") + if self.path.startswith("/video"): + parsed_params = parse.urlparse(self.path) + url_params = parse.parse_qs(parsed_params.query) + + tfrecord_path = "" + segment_size = 5 + + print(url_params) + if "file" in url_params: + tfrecord_path = url_params["file"][0] + if "segments" in url_params: + segment_size = int(url_params["segments"][0]) + + self.fetch(tfrecord_path, segment_size) + + else: + if self.path == "/": + self.path = "/index.html" + # Default to serve up a local file + self.path = "/static" + self.path + http.server.SimpleHTTPRequestHandler.do_GET(self) + + def report_error(self, msg): + """Simplifies sending out a string as a 500 http response.""" + self.send_response(500) + self.send_header("Content-type", "text/plain") + self.end_headers() + if sys.version_info[0] < 3: + self.wfile.write(str(msg).encode("utf-8")) + else: + self.wfile.write(bytes(msg, "utf-8")) + + def report_missing_files(self, files): + """Sends out 500 response with missing files.""" + accumulate = "" + for file_path in files: + if not os.path.exists(file_path): + accumulate = "%s '%s'" % (accumulate, file_path) + + if accumulate: + self.report_error("Could not find:%s" % accumulate) + return True + + return False + + def fetch(self, path, segment_size): + """Returns the video id and labels for a tfrecord at a provided index.""" + + print("Received request. File=", path, "Segment Size =", segment_size) + + if (self.report_missing_files([ + "%s/%s" % (FLAGS.root, FLAGS.pbtxt), + "%s/%s" % (FLAGS.root, FLAGS.binary), + "%s/%s" % (FLAGS.root, FLAGS.label_map) + ])): + return + + # Parse the youtube video id off the end of the link or as a standalone id. + filename_match = re.match( + "(?:.*youtube.*v=)?([a-zA-Z-0-9_]{2})([a-zA-Z-0-9_]+)", path) + tfrecord_url = filename_match.expand(r"data.yt8m.org/2/j/r/\1/\1\2.js") + + print("Trying to get tfrecord via", tfrecord_url) + + connection = http.client.HTTPConnection("data.yt8m.org") + connection.request("GET", tfrecord_url) + response = connection.getresponse() + + response_object = json.loads(response.read()) + filename = response_object["filename_raw"] + index = response_object["index"] + + print("TFRecord discovered: ", filename, ", index", index) + + output_file = r"%s/%s" % (FLAGS.tmp_dir, filename) + tfrecord_url = r"http://us.data.yt8m.org/2/frame/train/%s" % filename + + connection = http.client.HTTPConnection("us.data.yt8m.org") + connection.request("HEAD", + filename_match.expand(r"/2/frame/train/%s" % filename)) + response = connection.getresponse() + if response.getheader("Content-Type") != "application/octet-stream": + self.report_error("Filename '%s' is invalid." % path) + + print(output_file, "exists on yt8m.org. Did we fetch this before?") + + if not os.path.exists(output_file): + print(output_file, "doesn't exist locally, download it now.") + return_code = subprocess.call( + ["curl", "--output", output_file, tfrecord_url], + stdout=subprocess.PIPE, + stderr=subprocess.PIPE) + if return_code: + self.report_error("Could not retrieve contents from %s" % tfrecord_url) + return + else: + print(output_file, "exist locally, reuse it.") + + print("Run the graph...") + process = subprocess.Popen([ + "%s/%s" % (FLAGS.root, FLAGS.binary), + "--calculator_graph_config_file=%s/%s" % (FLAGS.root, FLAGS.pbtxt), + "--input_side_packets=tfrecord_path=%s" % output_file + + ",record_index=%d" % index + ",desired_segment_size=%d" % segment_size, + "--output_stream=annotation_summary", + "--output_stream_file=%s/labels" % FLAGS.tmp_dir, + "--output_side_packets=yt8m_id", + "--output_side_packets_file=%s/yt8m_id" % FLAGS.tmp_dir + ], + stdout=subprocess.PIPE, + stderr=subprocess.PIPE) + stdout_str, stderr_str = process.communicate() + process.wait() + + if stderr_str and "success" not in str(stderr_str).lower(): + self.report_error("Error executing server binary: \n%s" % stderr_str) + return + + f = open("%s/yt8m_id" % FLAGS.tmp_dir, "r") + contents = f.read() + print("yt8m_id is", contents[-5:-1]) + + curl_arg = "data.yt8m.org/2/j/i/%s/%s.js" % (contents[-5:-3], + contents[-5:-1]) + print("Grab labels from", curl_arg) + process = subprocess.Popen(["curl", curl_arg], + stdout=subprocess.PIPE, + stderr=subprocess.PIPE) + stdout = process.communicate() + process.wait() + + stdout_str = stdout[0].decode("utf-8") + + match = re.match(""".+"([^"]+)"[^"]+""", stdout_str) + final_results = { + "video_id": match.group(1), + "link": "https://www.youtube.com/watch?v=%s" % match.group(1), + "entries": [] + } + f = open("%s/labels" % FLAGS.tmp_dir, "r") + lines = f.readlines() + show_at_center = FLAGS.show_label_at_center + + print("%s/labels" % FLAGS.tmp_dir, "holds", len(lines), "entries") + for line in lines: + entry = {"labels": []} + final_results["entries"].append(entry) + first = True + for column in line.split(","): + if first: + subtract = segment_size / 2.0 if show_at_center else 0.0 + entry["time"] = float(int(column)) / 1000000.0 - subtract + first = False + else: + label_score = re.match("(.+):([0-9.]+).*", column) + if label_score: + score = float(label_score.group(2)) + entry["labels"].append({ + "label": label_score.group(1), + "score": score + }) + else: + print("empty score") + + response_json = json.dumps(final_results, indent=2, separators=(",", ": ")) + self.send_response(200) + self.send_header("Content-type", "application/json") + self.end_headers() + if sys.version_info[0] < 3: + self.wfile.write(str(response_json).encode("utf-8")) + else: + self.wfile.write(bytes(response_json, "utf-8")) + + +def update_pbtxt(): + """Update graph.pbtxt to use full path to label_map.txt.""" + edited_line = "" + lines = [] + with open("%s/%s" % (FLAGS.root, FLAGS.pbtxt), "r") as f: + lines = f.readlines() + for line in lines: + if "label_map_path" in line: + kv = line.split(":") + edited_line = kv[0] + (": \"%s/%s\"\n" % (FLAGS.root, FLAGS.label_map)) + with open("%s/%s" % (FLAGS.root, FLAGS.pbtxt), "w") as f: + for line in lines: + if "label_map_path" in line: + f.write(edited_line) + else: + f.write(line) + + +def main(unused_args): + dname = os.path.dirname(os.path.abspath(__file__)) + os.chdir(dname) + if not FLAGS.root: + print("Must specify MediaPipe root directory: --root `pwd`") + return + update_pbtxt() + port = FLAGS.port + print("Listening on port %s" % port) # pylint: disable=superfluous-parens + server = HTTPServerV6(("::", int(port)), Youtube8MRequestHandler) + server.serve_forever() + + +if __name__ == "__main__": + app.run(main) diff --git a/mediapipe/examples/desktop/youtube8m/viewer/static/index.html b/mediapipe/examples/desktop/youtube8m/viewer/static/index.html new file mode 100644 index 000000000..400aa0af0 --- /dev/null +++ b/mediapipe/examples/desktop/youtube8m/viewer/static/index.html @@ -0,0 +1,96 @@ + + + + MediaPipe: YouTube8M Model Inference Demo + + + + + + + + +
+

+ MediaPipe: YouTube8M Model Inference Demo +

+
+
+
+
+
+ +
+
+ +
+
+
+
+ +
+
+ + + + e.g., Both "https://youtube.com/watch?v=huGVGe3Afng" or "huGVGe3Afng" will work. + +
+
+ + +
+ + +
+
+ +
+
+
+ + +
+
+ Labels +
+ +
+
+
+
+
+ + + + + + + + diff --git a/mediapipe/examples/desktop/youtube8m/viewer/static/main.js b/mediapipe/examples/desktop/youtube8m/viewer/static/main.js new file mode 100644 index 000000000..ad66e67ea --- /dev/null +++ b/mediapipe/examples/desktop/youtube8m/viewer/static/main.js @@ -0,0 +1,217 @@ +/** + * @license + * Copyright 2019 The MediaPipe Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +const STATE_PLAYER=0; +const STATE_COVER=1; +const STATE_SPINNER=2; + +/** +* Looks up the value of a url parameter. +* +* @param {string} param The name of the parameter. +* @return {?string} The parameter value or null if there is no such parameter. +*/ +var getUrlParameter = function(param) { + const url = decodeURIComponent(window.location.search.substring(1)); + const url_parts = url.split('&'); + for (var i = 0; i < url_parts.length; i++) { + const param_name = url_parts[i].split(/=(.*)/); + if (param_name[0] === param) { + return param_name[1] === undefined ? null : param_name[1]; + } + } +}; + +/** +* Sets the fields in the form to match the values of the URL parameters. +*/ +const updateFormFromURL = function() { + const form_elements = document.getElementById('form').elements; + const url = decodeURIComponent(window.location.search.substring(1)); + const url_parts = url.split('&'); + for (var i = 0; i < url_parts.length; i++) { + const p = url_parts[i].split(/=(.*)/); + if (p.length >= 2) { + if (form_elements[p[0]]) { + form_elements[p[0]].value = decodeURIComponent(p[1]); + } + } + } +}; + +let player = null; +let intervalID = undefined; +let entries = []; + +/** + * Constructs the embedded YouTube player. + */ +window.onYouTubeIframeAPIReady = () => { + player = new YT.Player('ytplayer', { + events: { + 'onReady': onPlayerReady, + 'onStateChange': onStateChange + } + }); +}; + + +/** + * Listens for YouTube video events. When video is playing, periodically checks + * the time signature and updates the feedback with labels. When video stops, + * shuts off interval timer to save cycles. + * @param {!Event} event YouTube API Event. + */ +function onStateChange(event) { + if (event.data === 1) { + // Youtube switched to playing. + intervalID = setInterval(function(){ + const currentTime = player.getCurrentTime(); + let winner = undefined; + let first = undefined; + for (entry of entries) { + if (!first) { + first = entry.labels; + } + if (entry.time < currentTime) { + winner = entry.labels; + } else { + break; + } + } + if (!winner) { + winner = first; + } + const threshold = + document.getElementById('form').elements['threshold'].value; + let message = ""; + for (var label of winner) { + if (label.score >= threshold) { + message = `${message}${label.label} (score: ${label.score})\n`; + } + } + $("textarea#feedback").val(message); + }); + } else { + if (intervalID) { + clearInterval(intervalID); + } + } +} + +/** + * Turns elements of the player on and off to reflect the state of the "app". + * @param {number} state One of STATE_COVER | STATE_SPINNER | STATE_PLAYER. + */ +function showState(state) { + switch(state) { + case STATE_COVER: + $('#cover').show(); + $('#spinner').hide(); + $('#ytplayer').hide(); + break; + case STATE_SPINNER: + $('#cover').hide(); + $('#spinner').show(); + $('#ytplayer').hide(); + break; + case STATE_PLAYER: + default: + $('#cover').hide(); + $('#spinner').hide(); + $('#ytplayer').show(); + break; + } +} + +/** + * Hide error field and clear its message. + */ +function hideError() { + $('#error_msg').css("visibility", "hidden").text(''); +} + +/** + * Set the error to visible and set its message. + * @param {string} msg Error message as a string. + */ +function showError(msg) { + $('#error_msg').css("visibility", "visible").text(msg); +} + +/** + * Privides numeric feedback for the slider. + */ +function connectSlider() { + $('#threshold_label').text( + `Score Threshold (${$('#threshold')[0].value})`); + $('#threshold').on('input', () => { + $('#threshold_label').text( + `Score Threshold (${$('#threshold')[0].value})`); + }); + $('#segments_label').text( + `Segment Size (${$('#segments')[0].value})`); + $('#segments').on('input', () => { + $('#segments_label').text( + `Segment Size (${$('#segments')[0].value})`); + }); +} + +/** + * Retrieve video information from backend. + * @param {string} filePath name of a tfrecord file. + * @param {number} segments desired number of segments (1-300) + */ +function fetchVideo(filePath, segments) { + const url = "/video?file=" + filePath + "&segments=" + segments; + $.ajax({ + url: url, + success: function(result) { + const videoId = result["video_id"]; + player.loadVideoById(videoId); + entries = result['entries']; + showState(STATE_PLAYER); + }, + error: (err) => { + showState(STATE_COVER); + console.log(err); + showError(err.responseText); + }, + datatype: "json" + }); +} + +/** + * Called when the embedded YouTube player has finished loading. It loads the + * requested video into the player and calls the golden6_viewer API to retrieve + * the frame-level data for that video. + */ +function onPlayerReady() { + const filePath = getUrlParameter('file') || ""; + const segments = parseInt(getUrlParameter('segments')) || 0; + + updateFormFromURL(); + hideError(); + connectSlider(); + + if (!filePath) { + return; + } + + showState(STATE_SPINNER); + fetchVideo(filePath, segments); +} diff --git a/mediapipe/framework/BUILD b/mediapipe/framework/BUILD index 90a4f672c..1a273670e 100644 --- a/mediapipe/framework/BUILD +++ b/mediapipe/framework/BUILD @@ -688,6 +688,12 @@ cc_library( cc_library( name = "demangle", hdrs = ["demangle.h"], + defines = select({ + "//mediapipe/framework/profiler:android_release": [ + "MEDIAPIPE_HAS_CXA_DEMANGLE=0", + ], + "//conditions:default": [], + }), visibility = ["//visibility:public"], ) @@ -1713,3 +1719,10 @@ cc_test( "//mediapipe/framework/tool/testdata:dub_quad_test_subgraph", ], ) + +# Expose the proto source files for building mediapipe AAR. +filegroup( + name = "protos_src", + srcs = glob(["*.proto"]), + visibility = ["//mediapipe:__subpackages__"], +) diff --git a/mediapipe/framework/calculator_graph_bounds_test.cc b/mediapipe/framework/calculator_graph_bounds_test.cc index 1b8c3e9f2..b6144c0ae 100644 --- a/mediapipe/framework/calculator_graph_bounds_test.cc +++ b/mediapipe/framework/calculator_graph_bounds_test.cc @@ -756,7 +756,7 @@ TEST(CalculatorGraphBoundsTest, BoundWithoutInputPackets) { MP_ASSERT_OK(graph.WaitUntilDone()); } -// Shows that when fixed-size-input-stream-hanlder drops packets, +// Shows that when fixed-size-input-stream-handler drops packets, // no timetamp bounds are announced. TEST(CalculatorGraphBoundsTest, FixedSizeHandlerBounds) { // LambdaCalculator with FixedSizeInputStreamHandler will drop packets @@ -876,5 +876,93 @@ TEST(CalculatorGraphBoundsTest, FixedSizeHandlerBounds) { MP_ASSERT_OK(graph.WaitUntilDone()); } +// A Calculator that outputs only the last packet from its input stream. +class LastPacketCalculator : public CalculatorBase { + public: + static ::mediapipe::Status GetContract(CalculatorContract* cc) { + cc->Inputs().Index(0).SetAny(); + cc->Outputs().Index(0).SetAny(); + return ::mediapipe::OkStatus(); + } + ::mediapipe::Status Open(CalculatorContext* cc) final { + return ::mediapipe::OkStatus(); + } + ::mediapipe::Status Process(CalculatorContext* cc) final { + cc->Outputs().Index(0).SetNextTimestampBound(cc->InputTimestamp()); + last_packet_ = cc->Inputs().Index(0).Value(); + return ::mediapipe::OkStatus(); + } + ::mediapipe::Status Close(CalculatorContext* cc) final { + cc->Outputs().Index(0).AddPacket(last_packet_); + return ::mediapipe::OkStatus(); + } + + private: + Packet last_packet_; +}; +REGISTER_CALCULATOR(LastPacketCalculator); + +// Shows that the last packet in an input stream can be detected. +TEST(CalculatorGraphBoundsTest, LastPacketCheck) { + // LastPacketCalculator emits only the last input stream packet. + // It emits a timestamp bound after the arrival of a successor input stream + // packet or input stream close. The output "last_output" shows the + // last packet, and "output" shows the timestamp bounds. + CalculatorGraphConfig config = + ::mediapipe::ParseTextProtoOrDie(R"( + input_stream: 'input' + output_stream: 'output' + output_stream: 'last_output' + node { + calculator: 'PassThroughCalculator' + input_stream: 'input' + output_stream: 'input_2' + } + node { + calculator: 'LastPacketCalculator' + input_stream: 'input_2' + output_stream: 'last_packet' + } + node { + calculator: 'PassThroughCalculator' + input_stream: 'input' + input_stream: 'last_packet' + output_stream: 'output' + output_stream: 'last_output' + } + )"); + CalculatorGraph graph; + std::vector output_packets; + MP_ASSERT_OK(graph.Initialize(config)); + MP_ASSERT_OK(graph.ObserveOutputStream("output", [&](const Packet& p) { + output_packets.push_back(p); + return ::mediapipe::OkStatus(); + })); + std::vector last_output_packets; + MP_ASSERT_OK(graph.ObserveOutputStream("last_output", [&](const Packet& p) { + last_output_packets.push_back(p); + return ::mediapipe::OkStatus(); + })); + MP_ASSERT_OK(graph.StartRun({})); + MP_ASSERT_OK(graph.WaitUntilIdle()); + + // Add four packets into the graph. + constexpr int kNumInputs = 4; + for (int i = 0; i < kNumInputs; ++i) { + Packet p = MakePacket(33).At(Timestamp(i)); + MP_ASSERT_OK(graph.AddPacketToInputStream("input", p)); + MP_ASSERT_OK(graph.WaitUntilIdle()); + EXPECT_EQ(i, output_packets.size()); + EXPECT_EQ(0, last_output_packets.size()); + } + + // Shutdown the graph. + MP_ASSERT_OK(graph.CloseAllPacketSources()); + MP_ASSERT_OK(graph.WaitUntilIdle()); + EXPECT_EQ(kNumInputs, output_packets.size()); + EXPECT_EQ(1, last_output_packets.size()); + MP_ASSERT_OK(graph.WaitUntilDone()); +} + } // namespace } // namespace mediapipe diff --git a/mediapipe/framework/calculator_graph_side_packet_test.cc b/mediapipe/framework/calculator_graph_side_packet_test.cc index 166826ff1..01171a6c1 100644 --- a/mediapipe/framework/calculator_graph_side_packet_test.cc +++ b/mediapipe/framework/calculator_graph_side_packet_test.cc @@ -743,5 +743,66 @@ TEST(CalculatorGraph, GetOutputSidePacket) { } } +typedef std::string HugeModel; + +// Generates an output-side-packet once for each calculator-graph. +class OutputSidePacketCachedCalculator : public CalculatorBase { + public: + static ::mediapipe::Status GetContract(CalculatorContract* cc) { + cc->OutputSidePackets().Index(0).Set(); + return ::mediapipe::OkStatus(); + } + + ::mediapipe::Status Open(CalculatorContext* cc) final { + cc->OutputSidePackets().Index(0).Set(MakePacket( + R"(An expensive side-packet created only once per graph)")); + return ::mediapipe::OkStatus(); + } + + ::mediapipe::Status Process(CalculatorContext* cc) final { + LOG(FATAL) << "Not reached."; + return ::mediapipe::OkStatus(); + } +}; +REGISTER_CALCULATOR(OutputSidePacketCachedCalculator); + +// Returns true if two packets hold the same data. +bool Equals(Packet p1, Packet p2) { + return packet_internal::GetHolder(p1) == packet_internal::GetHolder(p2); +} + +TEST(CalculatorGraph, OutputSidePacketCached) { + CalculatorGraphConfig config = + ::mediapipe::ParseTextProtoOrDie(R"( + node { + calculator: "OutputSidePacketCachedCalculator" + output_side_packet: "model" + } + node { + calculator: "SidePacketToStreamPacketCalculator" + input_side_packet: "model" + output_stream: "output" + } + )"); + CalculatorGraph graph; + MP_ASSERT_OK(graph.Initialize(config)); + std::vector output_packets; + MP_ASSERT_OK(graph.ObserveOutputStream( + "output", [&output_packets](const Packet& packet) { + output_packets.push_back(packet); + return ::mediapipe::OkStatus(); + })); + + // Run the graph three times. + for (int run = 0; run < 3; ++run) { + MP_ASSERT_OK(graph.StartRun({})); + MP_ASSERT_OK(graph.WaitUntilDone()); + } + ASSERT_EQ(3, output_packets.size()); + for (int run = 0; run < output_packets.size(); ++run) { + EXPECT_TRUE(Equals(output_packets[0], output_packets[run])); + } +} + } // namespace } // namespace mediapipe diff --git a/mediapipe/framework/calculator_node.cc b/mediapipe/framework/calculator_node.cc index f3cd90eea..d4a81ff9d 100644 --- a/mediapipe/framework/calculator_node.cc +++ b/mediapipe/framework/calculator_node.cc @@ -391,6 +391,38 @@ void CalculatorNode::SetMaxInputStreamQueueSize(int max_queue_size) { return ::mediapipe::OkStatus(); } +namespace { +// Returns the Packet sent to an OutputSidePacket, or an empty packet +// if none available. +const Packet GetPacket(const OutputSidePacket& out) { + auto impl = dynamic_cast(&out); + return (impl == nullptr) ? Packet() : impl->GetPacket(); +} + +// Resends the output-side-packets from the previous graph run. +::mediapipe::Status ResendSidePackets(CalculatorContext* cc) { + auto& outs = cc->OutputSidePackets(); + for (CollectionItemId id = outs.BeginId(); id < outs.EndId(); ++id) { + Packet packet = GetPacket(outs.Get(id)); + if (!packet.IsEmpty()) { + // OutputSidePacket::Set re-announces the side-packet to its mirrors. + outs.Get(id).Set(packet); + } + } + return ::mediapipe::OkStatus(); +} +} // namespace + +bool CalculatorNode::OutputsAreConstant(CalculatorContext* cc) { + if (cc->Inputs().NumEntries() > 0 || cc->Outputs().NumEntries() > 0) { + return false; + } + if (input_side_packet_handler_.InputSidePacketsChanged()) { + return false; + } + return true; +} + ::mediapipe::Status CalculatorNode::OpenNode() { VLOG(2) << "CalculatorNode::OpenNode() for " << DebugName(); @@ -407,8 +439,9 @@ void CalculatorNode::SetMaxInputStreamQueueSize(int max_queue_size) { default_context, Timestamp::Unstarted()); ::mediapipe::Status result; - - { + if (OutputsAreConstant(default_context)) { + result = ResendSidePackets(default_context); + } else { MEDIAPIPE_PROFILING(OPEN, default_context); LegacyCalculatorSupport::Scoped s(default_context); result = calculator_->Open(default_context); @@ -494,7 +527,10 @@ void CalculatorNode::CloseOutputStreams(OutputStreamShardSet* outputs) { ::mediapipe::Status result; - { + if (OutputsAreConstant(default_context)) { + // Do nothing. + result = ::mediapipe::OkStatus(); + } else { MEDIAPIPE_PROFILING(CLOSE, default_context); LegacyCalculatorSupport::Scoped s(default_context); result = calculator_->Close(default_context); @@ -770,7 +806,10 @@ std::string CalculatorNode::DebugName() const { VLOG(2) << "Calling Calculator::Process() for node: " << DebugName(); - { + if (OutputsAreConstant(calculator_context)) { + // Do nothing. + result = ::mediapipe::OkStatus(); + } else { MEDIAPIPE_PROFILING(PROCESS, calculator_context); LegacyCalculatorSupport::Scoped s( calculator_context); diff --git a/mediapipe/framework/calculator_node.h b/mediapipe/framework/calculator_node.h index fd17d4ada..f39636e5d 100644 --- a/mediapipe/framework/calculator_node.h +++ b/mediapipe/framework/calculator_node.h @@ -280,6 +280,9 @@ class CalculatorNode { // Get a std::string describing the input streams. std::string DebugInputStreamNames() const; + // Returns true if all outputs will be identical to the previous graph run. + bool OutputsAreConstant(CalculatorContext* cc); + // The calculator. std::unique_ptr calculator_; // Keeps data which a Calculator subclass needs access to. diff --git a/mediapipe/framework/collection.h b/mediapipe/framework/collection.h index b3f972b0a..448968be2 100644 --- a/mediapipe/framework/collection.h +++ b/mediapipe/framework/collection.h @@ -240,6 +240,22 @@ class Collection { return tag_map_->EndId(tag); } + // Equal Collections contain equal mappings and equal elements. + bool operator==(const Collection& other) const { + if (tag_map_->Mapping() != other.TagMap()->Mapping()) { + return false; + } + for (CollectionItemId id = BeginId(); id < EndId(); ++id) { + if (Get(id) != other.Get(id)) { + return false; + } + } + return true; + } + bool operator!=(const Collection& other) const { + return !(*this == other); + } + private: // An iterator which is identical to ItType** except that the // dereference operator (operator*) does a double dereference and diff --git a/mediapipe/framework/demangle.h b/mediapipe/framework/demangle.h index e9624c5ac..45ebd1691 100644 --- a/mediapipe/framework/demangle.h +++ b/mediapipe/framework/demangle.h @@ -15,23 +15,25 @@ #ifndef MEDIAPIPE_FRAMEWORK_DEMANGLE_H_ #define MEDIAPIPE_FRAMEWORK_DEMANGLE_H_ +#ifndef MEDIAPIPE_HAS_CXA_DEMANGLE // We only support some compilers that support __cxa_demangle. // TODO: Checks if Android NDK has fixed this issue or not. #if defined(__ANDROID__) && (defined(__i386__) || defined(__x86_64__)) -#define HAS_CXA_DEMANGLE 0 +#define MEDIAPIPE_HAS_CXA_DEMANGLE 0 #elif (__GNUC__ >= 4 || (__GNUC__ >= 3 && __GNUC_MINOR__ >= 4)) && \ !defined(__mips__) -#define HAS_CXA_DEMANGLE 1 +#define MEDIAPIPE_HAS_CXA_DEMANGLE 1 #elif defined(__clang__) && !defined(_MSC_VER) -#define HAS_CXA_DEMANGLE 1 +#define MEDIAPIPE_HAS_CXA_DEMANGLE 1 #else -#define HAS_CXA_DEMANGLE 0 +#define MEDIAPIPE_HAS_CXA_DEMANGLE 0 +#endif #endif #include #include -#if HAS_CXA_DEMANGLE +#if MEDIAPIPE_HAS_CXA_DEMANGLE #include #endif @@ -65,7 +67,7 @@ namespace mediapipe { inline std::string Demangle(const char* mangled) { int status = 0; char* demangled = nullptr; -#if HAS_CXA_DEMANGLE +#if MEDIAPIPE_HAS_CXA_DEMANGLE demangled = abi::__cxa_demangle(mangled, nullptr, nullptr, &status); #endif std::string out; diff --git a/mediapipe/framework/deps/BUILD b/mediapipe/framework/deps/BUILD index f3ca5dc1d..cc84a99e7 100644 --- a/mediapipe/framework/deps/BUILD +++ b/mediapipe/framework/deps/BUILD @@ -15,10 +15,9 @@ # Description: # The dependencies of mediapipe. -licenses(["notice"]) # Apache 2.0 - load("//mediapipe/framework/port:build_config.bzl", "mediapipe_cc_proto_library") -load("//mediapipe/framework/port:build_config.bzl", "mediapipe_py_proto_library") + +licenses(["notice"]) # Apache 2.0 package(default_visibility = ["//visibility:private"]) diff --git a/mediapipe/framework/formats/image_format.proto b/mediapipe/framework/formats/image_format.proto index a367f4b62..ea99dfee4 100644 --- a/mediapipe/framework/formats/image_format.proto +++ b/mediapipe/framework/formats/image_format.proto @@ -66,5 +66,9 @@ message ImageFormat { // LAB, interleaved: one byte for L, then one byte for a, then one // byte for b for each pixel. LAB8 = 10; + + // sBGRA, interleaved: one byte for B, one byte for G, one byte for R, + // one byte for alpha or unused. This is the N32 format for Skia. + SBGRA = 11; } } diff --git a/mediapipe/framework/formats/image_frame.cc b/mediapipe/framework/formats/image_frame.cc index 996702ae2..338dfe165 100644 --- a/mediapipe/framework/formats/image_frame.cc +++ b/mediapipe/framework/formats/image_frame.cc @@ -279,6 +279,8 @@ int ImageFrame::NumberOfChannelsForFormat(ImageFormat::Format format) { return 1; case ImageFormat::LAB8: return 3; + case ImageFormat::SBGRA: + return 4; default: LOG(FATAL) << InvalidFormatString(format); } @@ -304,6 +306,8 @@ int ImageFrame::ChannelSizeForFormat(ImageFormat::Format format) { return sizeof(float); case ImageFormat::LAB8: return sizeof(uint8); + case ImageFormat::SBGRA: + return sizeof(uint8); default: LOG(FATAL) << InvalidFormatString(format); } @@ -329,6 +333,8 @@ int ImageFrame::ByteDepthForFormat(ImageFormat::Format format) { return 4; case ImageFormat::LAB8: return 1; + case ImageFormat::SBGRA: + return 1; default: LOG(FATAL) << InvalidFormatString(format); } diff --git a/mediapipe/framework/formats/image_frame_opencv.cc b/mediapipe/framework/formats/image_frame_opencv.cc index bf723cda3..bf8b908b3 100644 --- a/mediapipe/framework/formats/image_frame_opencv.cc +++ b/mediapipe/framework/formats/image_frame_opencv.cc @@ -59,6 +59,9 @@ int GetMatType(const mediapipe::ImageFormat::Format format) { case mediapipe::ImageFormat::LAB8: type = CV_8U; break; + case mediapipe::ImageFormat::SBGRA: + type = CV_8U; + break; default: // Invalid or unknown; Default to uchar. type = CV_8U; diff --git a/mediapipe/framework/formats/landmark.proto b/mediapipe/framework/formats/landmark.proto index cdc2ee151..220b3725d 100644 --- a/mediapipe/framework/formats/landmark.proto +++ b/mediapipe/framework/formats/landmark.proto @@ -32,3 +32,8 @@ message NormalizedLandmark { optional float y = 2; optional float z = 3; } + +// Group of NormalizedLandmark protos. +message NormalizedLandmarkList { + repeated NormalizedLandmark landmark = 1; +} diff --git a/mediapipe/framework/input_side_packet_handler.cc b/mediapipe/framework/input_side_packet_handler.cc index fb66f0694..ce43508d2 100644 --- a/mediapipe/framework/input_side_packet_handler.cc +++ b/mediapipe/framework/input_side_packet_handler.cc @@ -27,6 +27,7 @@ namespace mediapipe { std::function input_side_packets_ready_callback, std::function error_callback) { int missing_input_side_packet_count; + prev_input_side_packets_ = std::move(input_side_packets_); ASSIGN_OR_RETURN( input_side_packets_, tool::FillPacketSet(*input_side_packet_types, all_side_packets, @@ -41,6 +42,12 @@ namespace mediapipe { return ::mediapipe::OkStatus(); } +bool InputSidePacketHandler::InputSidePacketsChanged() { + return prev_input_side_packets_ == nullptr || + input_side_packets_ == nullptr || + *input_side_packets_ != *prev_input_side_packets_; +} + void InputSidePacketHandler::Set(CollectionItemId id, const Packet& packet) { ::mediapipe::Status status = SetInternal(id, packet); if (!status.ok()) { diff --git a/mediapipe/framework/input_side_packet_handler.h b/mediapipe/framework/input_side_packet_handler.h index 5112731da..ecfa2239e 100644 --- a/mediapipe/framework/input_side_packet_handler.h +++ b/mediapipe/framework/input_side_packet_handler.h @@ -52,6 +52,10 @@ class InputSidePacketHandler { const PacketSet& InputSidePackets() const { return *input_side_packets_; } + // Returns true if the set of input-side-packets has changed since the + // previous run. + bool InputSidePacketsChanged(); + // Returns the number of missing input side packets. int MissingInputSidePacketCount() const { return missing_input_side_packet_count_.load(std::memory_order_relaxed); @@ -68,6 +72,7 @@ class InputSidePacketHandler { const PacketTypeSet* input_side_packet_types_; std::unique_ptr input_side_packets_; + std::unique_ptr prev_input_side_packets_; std::atomic missing_input_side_packet_count_{0}; diff --git a/mediapipe/framework/output_side_packet_impl.cc b/mediapipe/framework/output_side_packet_impl.cc index 09cc294ff..f2771da5d 100644 --- a/mediapipe/framework/output_side_packet_impl.cc +++ b/mediapipe/framework/output_side_packet_impl.cc @@ -30,7 +30,7 @@ namespace mediapipe { void OutputSidePacketImpl::PrepareForRun( std::function error_callback) { error_callback_ = std::move(error_callback); - packet_ = Packet(); + initialized_ = false; } void OutputSidePacketImpl::Set(const Packet& packet) { @@ -47,7 +47,7 @@ void OutputSidePacketImpl::AddMirror( } ::mediapipe::Status OutputSidePacketImpl::SetInternal(const Packet& packet) { - if (!packet_.IsEmpty()) { + if (initialized_) { return ::mediapipe::AlreadyExistsErrorBuilder(MEDIAPIPE_LOC) << "Output side packet \"" << name_ << "\" was already set."; } @@ -72,6 +72,7 @@ void OutputSidePacketImpl::AddMirror( } packet_ = packet; + initialized_ = true; for (const auto& mirror : mirrors_) { mirror.input_side_packet_handler->Set(mirror.id, packet_); } diff --git a/mediapipe/framework/output_side_packet_impl.h b/mediapipe/framework/output_side_packet_impl.h index c654769c5..df9ac4082 100644 --- a/mediapipe/framework/output_side_packet_impl.h +++ b/mediapipe/framework/output_side_packet_impl.h @@ -80,6 +80,7 @@ class OutputSidePacketImpl : public OutputSidePacket { const PacketType* packet_type_; std::function error_callback_; Packet packet_; + bool initialized_ = false; std::vector mirrors_; }; diff --git a/mediapipe/framework/packet.h b/mediapipe/framework/packet.h index 8782d924c..8564abce6 100644 --- a/mediapipe/framework/packet.h +++ b/mediapipe/framework/packet.h @@ -653,6 +653,14 @@ Packet PointToForeign(const T* ptr) { return packet_internal::Create(new packet_internal::ForeignHolder(ptr)); } +// Equal Packets refer to the same memory contents, like equal pointers. +inline bool operator==(const Packet& p1, const Packet& p2) { + return packet_internal::GetHolder(p1) == packet_internal::GetHolder(p2); +} +inline bool operator!=(const Packet& p1, const Packet& p2) { + return !(p1 == p2); +} + } // namespace mediapipe #endif // MEDIAPIPE_FRAMEWORK_PACKET_H_ diff --git a/mediapipe/framework/port.h b/mediapipe/framework/port.h index c45a4546d..275f8ca98 100644 --- a/mediapipe/framework/port.h +++ b/mediapipe/framework/port.h @@ -28,4 +28,22 @@ #define MEDIAPIPE_MOBILE #endif +#if !defined(MEDIAPIPE_ANDROID) && defined(__ANDROID__) +#define MEDIAPIPE_ANDROID +#endif + +#if defined(__APPLE__) +#include "TargetConditionals.h" // for TARGET_OS_* +#if !defined(MEDIAPIPE_IOS) && !TARGET_OS_OSX +#define MEDIAPIPE_IOS +#endif +#endif + +// These platforms do not support OpenGL ES Compute Shaders (v3.1 and up), +// but can still run OpenGL ES 3.0 and below. +#if !defined(MEDIAPIPE_DISABLE_GL_COMPUTE) && \ + (defined(__APPLE__) || defined(__EMSCRIPTEN__)) +#define MEDIAPIPE_DISABLE_GL_COMPUTE +#endif + #endif // MEDIAPIPE_FRAMEWORK_PORT_H_ diff --git a/mediapipe/framework/profiler/graph_profiler_test.cc b/mediapipe/framework/profiler/graph_profiler_test.cc index cf7717556..86c6a16c3 100644 --- a/mediapipe/framework/profiler/graph_profiler_test.cc +++ b/mediapipe/framework/profiler/graph_profiler_test.cc @@ -247,25 +247,45 @@ TEST_F(GraphProfilerTestPeer, InitializeConfig) { // Checks histogram_interval_size_usec and num_histogram_intervals. CalculatorProfile actual = GetCalculatorProfilesMap()->find(kDummyTestCalculatorName)->second; - ASSERT_EQ(actual.name(), kDummyTestCalculatorName); - ASSERT_FALSE(actual.has_open_runtime()); - ASSERT_FALSE(actual.has_close_runtime()); - - ASSERT_EQ(actual.process_runtime().interval_size_usec(), 1000); - ASSERT_EQ(actual.process_runtime().num_intervals(), 3); - - ASSERT_EQ(actual.process_input_latency().interval_size_usec(), 1000); - ASSERT_EQ(actual.process_input_latency().num_intervals(), 3); - - ASSERT_EQ(actual.process_output_latency().interval_size_usec(), 1000); - ASSERT_EQ(actual.process_output_latency().num_intervals(), 3); - - ASSERT_EQ(actual.input_stream_profiles().size(), 1); - ASSERT_EQ(actual.input_stream_profiles(0).name(), "input_stream"); - ASSERT_FALSE(actual.input_stream_profiles(0).back_edge()); - ASSERT_EQ(actual.input_stream_profiles(0).latency().interval_size_usec(), - 1000); - ASSERT_EQ(actual.input_stream_profiles(0).latency().num_intervals(), 3); + EXPECT_THAT(actual, EqualsProto(R"( + name: "DummyTestCalculator" + process_runtime { + total: 0 + interval_size_usec: 1000 + num_intervals: 3 + count: 0 + count: 0 + count: 0 + } + process_input_latency { + total: 0 + interval_size_usec: 1000 + num_intervals: 3 + count: 0 + count: 0 + count: 0 + } + process_output_latency { + total: 0 + interval_size_usec: 1000 + num_intervals: 3 + count: 0 + count: 0 + count: 0 + } + input_stream_profiles { + name: "input_stream" + back_edge: false + latency { + total: 0 + interval_size_usec: 1000 + num_intervals: 3 + count: 0 + count: 0 + count: 0 + } + } + )")); } // Tests that Initialize() uses the ProfilerConfig in the graph definition. @@ -291,16 +311,17 @@ TEST_F(GraphProfilerTestPeer, InitializeConfigWithoutStreamLatency) { // Checks histogram_interval_size_usec and num_histogram_intervals. CalculatorProfile actual = GetCalculatorProfilesMap()->find(kDummyTestCalculatorName)->second; - ASSERT_EQ(actual.name(), kDummyTestCalculatorName); - ASSERT_FALSE(actual.has_open_runtime()); - ASSERT_FALSE(actual.has_close_runtime()); - - ASSERT_EQ(actual.process_runtime().interval_size_usec(), 1000); - ASSERT_EQ(actual.process_runtime().num_intervals(), 3); - - ASSERT_FALSE(actual.has_process_input_latency()); - ASSERT_FALSE(actual.has_process_output_latency()); - ASSERT_EQ(actual.input_stream_profiles().size(), 0); + EXPECT_THAT(actual, EqualsProto(R"( + name: "DummyTestCalculator" + process_runtime { + total: 0 + interval_size_usec: 1000 + num_intervals: 3 + count: 0 + count: 0 + count: 0 + } + )")); } // Tests that Initialize() reads all the configs defined in the graph @@ -633,10 +654,11 @@ TEST_F(GraphProfilerTestPeer, SetOpenRuntime) { simulation_clock->ThreadFinish(); ASSERT_EQ(profiles.size(), 1); - ASSERT_EQ(profiles[0].open_runtime(), 100); - ASSERT_FALSE(profiles[0].has_close_runtime()); - ASSERT_THAT(profiles[0].process_runtime(), - Partially(EqualsProto(CreateTimeHistogram(/*total=*/0, {0})))); + EXPECT_THAT(profiles[0], Partially(EqualsProto(R"( + name: "DummyTestCalculator" + open_runtime: 100 + process_runtime { total: 0 } + )"))); // Checks packets_info_ map hasn't changed. ASSERT_EQ(GetPacketsInfoMap()->size(), 0); } @@ -688,14 +710,29 @@ TEST_F(GraphProfilerTestPeer, SetOpenRuntimeWithStreamLatency) { ASSERT_EQ(profiles.size(), 2); CalculatorProfile source_profile = GetProfileWithName(profiles, "source_calc"); - ASSERT_EQ(source_profile.open_runtime(), 150); - ASSERT_FALSE(source_profile.has_close_runtime()); - ASSERT_THAT(source_profile.process_runtime(), - Partially(EqualsProto(CreateTimeHistogram(/*total=*/0, {0})))); - ASSERT_THAT(source_profile.process_input_latency(), - Partially(EqualsProto(CreateTimeHistogram(/*total=*/0, {0})))); - ASSERT_THAT(source_profile.process_output_latency(), - Partially(EqualsProto(CreateTimeHistogram(/*total=*/0, {0})))); + + EXPECT_THAT(source_profile, EqualsProto(R"( + name: "source_calc" + open_runtime: 150 + process_runtime { + total: 0 + interval_size_usec: 1000000 + num_intervals: 1 + count: 0 + } + process_input_latency { + total: 0 + interval_size_usec: 1000000 + num_intervals: 1 + count: 0 + } + process_output_latency { + total: 0 + interval_size_usec: 1000000 + num_intervals: 1 + count: 0 + } + )")); // Check packets_info_ map has been updated. ASSERT_EQ(GetPacketsInfoMap()->size(), 1); @@ -736,11 +773,16 @@ TEST_F(GraphProfilerTestPeer, SetCloseRuntime) { std::vector profiles = Profiles(); simulation_clock->ThreadFinish(); - ASSERT_EQ(profiles.size(), 1); - ASSERT_FALSE(profiles[0].open_runtime()); - ASSERT_EQ(profiles[0].close_runtime(), 100); - ASSERT_THAT(profiles[0].process_runtime(), - Partially(EqualsProto(CreateTimeHistogram(/*total=*/0, {0})))); + EXPECT_THAT(profiles[0], EqualsProto(R"( + name: "DummyTestCalculator" + close_runtime: 100 + process_runtime { + total: 0 + interval_size_usec: 1000000 + num_intervals: 1 + count: 0 + } + )")); } // Tests that SetCloseRuntime() updates |close_runtime| and doesn't affect other @@ -789,11 +831,39 @@ TEST_F(GraphProfilerTestPeer, SetCloseRuntimeWithStreamLatency) { ASSERT_EQ(profiles.size(), 2); CalculatorProfile source_profile = GetProfileWithName(profiles, "source_calc"); - ASSERT_FALSE(source_profile.open_runtime()); - ASSERT_EQ(source_profile.close_runtime(), 100); - ASSERT_THAT(source_profile.process_runtime(), - Partially(EqualsProto(CreateTimeHistogram(/*total=*/0, {0})))); - ASSERT_EQ(GetPacketsInfoMap()->size(), 1); + + EXPECT_THAT(source_profile, EqualsProto(R"( + name: "source_calc" + close_runtime: 100 + process_runtime { + total: 0 + interval_size_usec: 1000000 + num_intervals: 1 + count: 0 + } + process_input_latency { + total: 0 + interval_size_usec: 1000000 + num_intervals: 1 + count: 0 + } + process_output_latency { + total: 0 + interval_size_usec: 1000000 + num_intervals: 1 + count: 0 + } + input_stream_profiles { + name: "input_stream" + back_edge: false + latency { + total: 0 + interval_size_usec: 1000000 + num_intervals: 1 + count: 0 + } + } + )")); PacketInfo expected_packet_info = {0, /*production_time_usec=*/1000 + 100, /*source_process_start_usec=*/1000 + 0}; @@ -933,10 +1003,15 @@ TEST_F(GraphProfilerTestPeer, AddProcessSample) { simulation_clock->ThreadFinish(); ASSERT_EQ(profiles.size(), 1); - ASSERT_THAT(profiles[0].process_runtime(), - Partially(EqualsProto(CreateTimeHistogram(/*total=*/150, {1})))); - ASSERT_FALSE(profiles[0].has_open_runtime()); - ASSERT_FALSE(profiles[0].has_close_runtime()); + EXPECT_THAT(profiles[0], EqualsProto(R"( + name: "DummyTestCalculator" + process_runtime { + total: 150 + interval_size_usec: 1000000 + num_intervals: 1 + count: 1 + } + )")); // Checks packets_info_ map hasn't changed. ASSERT_EQ(GetPacketsInfoMap()->size(), 0); } @@ -985,12 +1060,27 @@ TEST_F(GraphProfilerTestPeer, AddProcessSampleWithStreamLatency) { ASSERT_EQ(profiles.size(), 2); CalculatorProfile source_profile = GetProfileWithName(profiles, "source_calc"); - ASSERT_THAT(source_profile.process_runtime(), - Partially(EqualsProto(CreateTimeHistogram(/*total=*/150, {1})))); - ASSERT_THAT(source_profile.process_input_latency(), - Partially(EqualsProto(CreateTimeHistogram(/*total=*/0, {1})))); - ASSERT_THAT(source_profile.process_output_latency(), - Partially(EqualsProto(CreateTimeHistogram(/*total=*/150, {1})))); + + EXPECT_THAT(profiles[0], Partially(EqualsProto(R"( + process_runtime { + total: 150 + interval_size_usec: 1000000 + num_intervals: 1 + count: 1 + } + process_input_latency { + total: 0 + interval_size_usec: 1000000 + num_intervals: 1 + count: 1 + } + process_output_latency { + total: 150 + interval_size_usec: 1000000 + num_intervals: 1 + count: 1 + } + )"))); // Check packets_info_ map has been updated. ASSERT_EQ(GetPacketsInfoMap()->size(), 1); @@ -1019,22 +1109,24 @@ TEST_F(GraphProfilerTestPeer, AddProcessSampleWithStreamLatency) { CalculatorProfile consumer_profile = GetProfileWithName(profiles, "consumer_calc"); - ASSERT_THAT(consumer_profile.process_runtime(), - Partially(EqualsProto(CreateTimeHistogram(/*total=*/250, {1})))); - ASSERT_THAT(consumer_profile.process_input_latency(), - Partially(EqualsProto(CreateTimeHistogram( - /*total=*/2000 - when_source_started, {1})))); - ASSERT_THAT(consumer_profile.process_output_latency(), - Partially(EqualsProto(CreateTimeHistogram( - /*total=*/2000 + 250 - when_source_started, {1})))); - ASSERT_EQ(consumer_profile.input_stream_profiles().size(), 2); - // For "stream_0" should have not changed since it was empty. - ASSERT_THAT(consumer_profile.input_stream_profiles(0).latency(), - Partially(EqualsProto(CreateTimeHistogram(/*total=*/0, {0})))); - // For "stream_1" - ASSERT_THAT(consumer_profile.input_stream_profiles(1).latency(), - Partially(EqualsProto(CreateTimeHistogram( - /*total=*/2000 - when_source_finished, {1})))); + + // process input latency total = 2000 (end) - 1000 (when source started) = + // 1000 process output latency total = 2000 (end) + 250 - 1000 (when source + // started) = 1250 For "stream_0" should have not changed since it was empty. + // For "stream_1" = 2000 (end) - 1250 (when source finished) = 850 + EXPECT_THAT(consumer_profile, Partially(EqualsProto(R"( + name: "consumer_calc" + process_input_latency { total: 1000 } + process_output_latency { total: 1250 } + input_stream_profiles { + name: "stream_0" + latency { total: 0 } + } + input_stream_profiles { + name: "stream_1" + latency { total: 850 } + } + )"))); // Check packets_info_ map for PacketId({"stream_1", 100}) should not yet be // garbage collected. diff --git a/mediapipe/framework/profiler/trace_buffer.h b/mediapipe/framework/profiler/trace_buffer.h index 167bc2a89..c435d0d52 100644 --- a/mediapipe/framework/profiler/trace_buffer.h +++ b/mediapipe/framework/profiler/trace_buffer.h @@ -39,9 +39,20 @@ inline const void* GetPacketDataId(const HolderBase* holder) { struct TraceEvent { using EventType = GraphTrace::EventType; // GraphTrace::EventType constants, repeated here to match GraphProfilerStub. - static const EventType UNKNOWN, OPEN, PROCESS, CLOSE, NOT_READY, - READY_FOR_PROCESS, READY_FOR_CLOSE, THROTTLED, UNTHROTTLED, CPU_TASK_USER, - CPU_TASK_SYSTEM, GPU_TASK, DSP_TASK, TPU_TASK; + static constexpr EventType UNKNOWN = GraphTrace::UNKNOWN; + static constexpr EventType OPEN = GraphTrace::OPEN; + static constexpr EventType PROCESS = GraphTrace::PROCESS; + static constexpr EventType CLOSE = GraphTrace::CLOSE; + static constexpr EventType NOT_READY = GraphTrace::NOT_READY; + static constexpr EventType READY_FOR_PROCESS = GraphTrace::READY_FOR_PROCESS; + static constexpr EventType READY_FOR_CLOSE = GraphTrace::READY_FOR_CLOSE; + static constexpr EventType THROTTLED = GraphTrace::THROTTLED; + static constexpr EventType UNTHROTTLED = GraphTrace::UNTHROTTLED; + static constexpr EventType CPU_TASK_USER = GraphTrace::CPU_TASK_USER; + static constexpr EventType CPU_TASK_SYSTEM = GraphTrace::CPU_TASK_SYSTEM; + static constexpr EventType GPU_TASK = GraphTrace::GPU_TASK; + static constexpr EventType DSP_TASK = GraphTrace::DSP_TASK; + static constexpr EventType TPU_TASK = GraphTrace::TPU_TASK; absl::Time event_time; EventType event_type = UNKNOWN; bool is_finish = false; diff --git a/mediapipe/framework/profiler/trace_builder.cc b/mediapipe/framework/profiler/trace_builder.cc index e609e6dcb..197472b32 100644 --- a/mediapipe/framework/profiler/trace_builder.cc +++ b/mediapipe/framework/profiler/trace_builder.cc @@ -385,21 +385,21 @@ void TraceBuilder::CreateLog(const TraceBuffer& buffer, absl::Time begin_time, } void TraceBuilder::Clear() { impl_->Clear(); } -// Defined here since inline constants fail to link in android builds. -const TraceEvent::EventType // - TraceEvent::UNKNOWN = GraphTrace::UNKNOWN, - TraceEvent::OPEN = GraphTrace::OPEN, - TraceEvent::PROCESS = GraphTrace::PROCESS, - TraceEvent::CLOSE = GraphTrace::CLOSE, - TraceEvent::NOT_READY = GraphTrace::NOT_READY, - TraceEvent::READY_FOR_PROCESS = GraphTrace::READY_FOR_PROCESS, - TraceEvent::READY_FOR_CLOSE = GraphTrace::READY_FOR_CLOSE, - TraceEvent::THROTTLED = GraphTrace::THROTTLED, - TraceEvent::UNTHROTTLED = GraphTrace::UNTHROTTLED, - TraceEvent::CPU_TASK_USER = GraphTrace::CPU_TASK_USER, - TraceEvent::CPU_TASK_SYSTEM = GraphTrace::CPU_TASK_SYSTEM, - TraceEvent::GPU_TASK = GraphTrace::GPU_TASK, - TraceEvent::DSP_TASK = GraphTrace::DSP_TASK, - TraceEvent::TPU_TASK = GraphTrace::TPU_TASK; +// Defined here since constexpr requires out-of-class definition until C++17. +const TraceEvent::EventType // + TraceEvent::UNKNOWN, // + TraceEvent::OPEN, // + TraceEvent::PROCESS, // + TraceEvent::CLOSE, // + TraceEvent::NOT_READY, // + TraceEvent::READY_FOR_PROCESS, // + TraceEvent::READY_FOR_CLOSE, // + TraceEvent::THROTTLED, // + TraceEvent::UNTHROTTLED, // + TraceEvent::CPU_TASK_USER, // + TraceEvent::CPU_TASK_SYSTEM, // + TraceEvent::GPU_TASK, // + TraceEvent::DSP_TASK, // + TraceEvent::TPU_TASK; } // namespace mediapipe diff --git a/mediapipe/framework/tool/tag_map.h b/mediapipe/framework/tool/tag_map.h index bdc250924..e2ec97599 100644 --- a/mediapipe/framework/tool/tag_map.h +++ b/mediapipe/framework/tool/tag_map.h @@ -127,6 +127,11 @@ class TagMap { std::vector names_; }; +// Equal TagData structs define equal id ranges. +inline bool operator==(const TagMap::TagData& d1, const TagMap::TagData& d2) { + return d1.id == d2.id && d1.count == d2.count; +} + } // namespace tool } // namespace mediapipe diff --git a/mediapipe/framework/tool/template_expander.cc b/mediapipe/framework/tool/template_expander.cc index 2597dd597..e2de6e3e7 100644 --- a/mediapipe/framework/tool/template_expander.cc +++ b/mediapipe/framework/tool/template_expander.cc @@ -567,6 +567,10 @@ class TemplateExpanderImpl { result = AsDict(args); } else if (expr.op() == "list") { result = AsList(args); + } else if (expr.op() == "size") { + return AsArgument(static_cast( + args[0].has_dict() ? args[0].mutable_dict()->arg_size() + : args[0].mutable_element()->size())); } return result; } diff --git a/mediapipe/framework/tool/template_parser.cc b/mediapipe/framework/tool/template_parser.cc index 62380bf19..2954566e8 100644 --- a/mediapipe/framework/tool/template_parser.cc +++ b/mediapipe/framework/tool/template_parser.cc @@ -1318,8 +1318,8 @@ bool IsInfixOperator(const std::string& token) { // A function-style operator, including a for or if expression. bool IsFunctionOperator(const std::string& token) { static auto kTokens = new std::set{ - "min", "max", "for", "if", "!", - "concat", "lowercase", "uppercase", "dict", "list", + "min", "max", "for", "if", "!", "concat", + "lowercase", "uppercase", "size", "dict", "list", }; return kTokens->count(token) > 0; } diff --git a/mediapipe/gpu/gl_simple_shaders.h b/mediapipe/gpu/gl_simple_shaders.h index 3fed608ad..8bc612ddd 100644 --- a/mediapipe/gpu/gl_simple_shaders.h +++ b/mediapipe/gpu/gl_simple_shaders.h @@ -101,6 +101,10 @@ static const GLfloat kBasicTextureVertices[] = { 1.0f, 1.0f, // top right }; +// Places a texture on kBasicSquareVertices, flipped horizontally. +static const GLfloat kBasicTextureVerticesFlipX[] = { + V4(kBasicTextureVertices, 1, 0, 3, 2)}; + // Places a texture on kBasicSquareVertices, flipped vertically. static const GLfloat kBasicTextureVerticesFlipY[] = { V4(kBasicTextureVertices, 2, 3, 0, 1)}; diff --git a/mediapipe/graphs/youtube8m/BUILD b/mediapipe/graphs/youtube8m/BUILD index be0fff44c..c697d16c0 100644 --- a/mediapipe/graphs/youtube8m/BUILD +++ b/mediapipe/graphs/youtube8m/BUILD @@ -44,3 +44,30 @@ cc_library( "//mediapipe/calculators/video:opencv_video_decoder_calculator", ], ) + +cc_library( + name = "yt8m_inference_calculators_deps", + deps = [ + "//mediapipe/calculators/core:concatenate_vector_calculator", + "//mediapipe/calculators/core:dequantize_byte_array_calculator", + "//mediapipe/calculators/core:packet_cloner_calculator", + "//mediapipe/calculators/core:side_packet_to_stream_calculator", + "//mediapipe/calculators/core:string_to_int_calculator", + "//mediapipe/calculators/tensorflow:lapped_tensor_buffer_calculator", + "//mediapipe/calculators/tensorflow:string_to_sequence_example_calculator", + "//mediapipe/calculators/tensorflow:tensor_to_vector_float_calculator", + "//mediapipe/calculators/tensorflow:tensorflow_inference_calculator", + "//mediapipe/calculators/tensorflow:tensorflow_session_from_saved_model_calculator", + "//mediapipe/calculators/tensorflow:tfrecord_reader_calculator", + "//mediapipe/calculators/tensorflow:unpack_media_sequence_calculator", + "//mediapipe/calculators/tensorflow:unpack_yt8m_sequence_example_calculator", + "//mediapipe/calculators/tensorflow:vector_float_to_tensor_calculator", + "//mediapipe/calculators/tensorflow:vector_int_to_tensor_calculator", + "//mediapipe/calculators/util:annotation_overlay_calculator", + "//mediapipe/calculators/util:labels_to_render_data_calculator", + "//mediapipe/calculators/util:local_file_contents_calculator", + "//mediapipe/calculators/util:top_k_scores_calculator", + "//mediapipe/calculators/video:opencv_video_decoder_calculator", + "//mediapipe/calculators/video:opencv_video_encoder_calculator", + ], +) diff --git a/mediapipe/graphs/youtube8m/label_map.txt b/mediapipe/graphs/youtube8m/label_map.txt new file mode 100644 index 000000000..8321ec772 --- /dev/null +++ b/mediapipe/graphs/youtube8m/label_map.txt @@ -0,0 +1,3862 @@ +Game +Video game +Vehicle +Concert +Musician +Cartoon +Performance art +Car +Dance +Guitar +String instrument +Food +Association football +Musical ensemble +Music video +Animal +Animation +Motorsport +Pet +Racing +Recipe +Mobile phone +Cooking +Smartphone +Gadget +Trailer (promotion) +Toy +Minecraft +Drum kit +Cuisine +Motorcycle +Piano +Dish (food) +Drum +Acoustic guitar +Action-adventure game +Call of Duty +Electric guitar +Drummer +Cosmetics +Keyboard instrument +Choir +Strategy video game +Fishing +Aircraft +Train +Airplane +Pianist +Sports car +Art +Hair +Rail transport +Basketball +Cycling +Orchestra +Motorcycling +Transport +Musical keyboard +Bicycle +Fish +Outdoor recreation +Disc jockey +Machine +Sports game +Radio-controlled model +Hairstyle +Fashion +Dog +Skateboarding +Fighting game +Basketball moves +Wedding +Skateboard +IPhone +Personal computer +Truck +Boat +Railroad car +Snare drum +American football +Drawing +Pokémon +Winter sport +Tractor +Naruto +Grand Theft Auto V +Cymbal +Horse +House +Festival +Engine +Highlight film +Boxing +World of Warcraft +Call of Duty: Black Ops II +Four-wheel drive +Bird +Violin +Skateboarding trick +Christmas +Weight training +Recreational fishing +Warcraft +Ice skating +Driving +Video game console +Microsoft Windows +Airline +Pokémon (video game series) +Landing +Combat +League of Legends +Vegetable +Model aircraft +Airliner +Samsung Galaxy +Sport utility vehicle +Electronic keyboard +Hockey +Radio-controlled aircraft +??? +Eye shadow +Cooking show +Dessert +Battlefield (series) +Slam dunk +Plant +Painting +Drifting (motorsport) +Rallying +Lego +Tablet computer +Call of Duty: Modern Warfare 2 +Comedy (drama) +Grand Theft Auto: San Andreas +Off-road vehicle +The Walt Disney Company +Locomotive +Takeoff +RuneScape +Puppy +Amusement park +Call of Duty: Modern Warfare 3 +Motocross +Dragon Ball +Airport +Photography +Call of Duty: Black Ops +Shoe +Radio-controlled car +Sonic the Hedgehog +Skatepark +Bride +First-person shooter +Accordion +Jet aircraft +Mascara +Halo (series) +Camera +Final Fantasy +Skiing +Gym +Aviation +Mountain bike +Marching band +??? +Extreme sport +FIFA 15 +Brass instrument +Sasuke Uchiha +Cat +Sedan (automobile) +Pickup truck +Meat +BMW +Parade +Cake +Supercar +Aquarium +Weather +Weapon +Nail (anatomy) +Surfing +PlayStation 3 +Room +Call of Duty 4: Modern Warfare +Helicopter +Laptop +Saxophone +Star Wars +Goku +Hotel +Xbox 360 +Arcade game +Doll +News presenter +Exhaust system +Volkswagen +Hatchback +Action figure +Computer +Carnival +Lipstick +Wii +Sonic the Hedgehog (character) +School +Ballet +Eye liner +Heavy equipment +IPad +Running +Baking +Rapid transit +Coupé +Road bicycle +Card game +Nail polish +Playing card +Bus +Counter-Strike (video game) +Gardening +Outline of meals +Nail art +Tank +??? +Bollywood +Tennis +Ship +BMX bike +Drink +Grand Theft Auto IV +Snowboarding +Mountain biking +Rouge (cosmetics) +Super Smash Bros. +??? +Street Fighter +Stadium +Underwater +Hunting +Kickflip +Metin2 +The Sims +Viola +Pony +PlayStation 4 +Television +??? +Beach +Manicure +Chocolate +Wood +Snow +Sneakers +??? +Roller coaster +Afro-textured hair +Timbales +Need for Speed +Robot +Paper +Gymnastics +Farm +Diatonic button accordion +Fighter aircraft +Sketch (drawing) +Mercedes-Benz +Chevrolet +Batman +Loudspeaker +Tool +Nike, Inc. +Race track +Ski +Underwater diving +Computer hardware +Garden +Paint +Cello +Digital camera +Scooter (motorcycle) +Motorboat +Harry Potter +??? +GoPro +Assassin's Creed +Fishing rod +Battlefield 3 +IPod +Nature +Dota 2 +Tree +My Little Pony +Dress +Xbox One +Train station +Firefighter +Jeep +Rail transport modelling +Resort +Flute +Touhou Project +Fruit +Chicken as food +Knife +Dashcam +Clash of Clans +Kitchen +Slide show +The Legend of Zelda +Fireworks +Swimming pool +Rugby football +Building +Kitten +Television advertisement +??? +Battlefield 4 +Horse racing +MapleStory +Subwoofer +Flour +IPod Touch +World of Tanks +Music festival +Comedian +Figurine +Kingdom Hearts +Manga +Wrestling +Trumpet +Xbox +Model (person) +Jumping +Dough +FIFA 13 +Pro Evolution Soccer +Resident Evil +Eye +Guitar Hero +Enduro +Home appliance +News program +Watch +Audi +Off-road racing +Ice dancing +Construction +Organ (music) +PlayStation Portable +Figure skating +Fiddle +WWE 2K +Climbing +Spider-Man +Braid +Muscle +The Elder Scrolls V: Skyrim +Nintendo 3DS +Fire +Human swimming +BMW Motorrad +One Piece +Wildlife +Apartment +Dressage +Scuba diving +Call of Duty: Ghosts +Eating +Kickboxing +Egg as food +Origami +The Elder Scrolls +Ford Mustang +Fishing lure +Light +Running back +Air force +M.U.G.E.N +Transformers +Living room +Soldier +Bag +Ballroom dance +Gohan +Kayak +Sheet music +Destiny (video game) +Wall +Church (building) +Sewing +Chipmunk +Surfboard +Concealer +Drag racing +Mega Man +Walt Disney World +Chicken +Parachuting +Classic car +Furniture +Jewellery +Recreational vehicle +Call of Duty: Advanced Warfare +Street Fighter IV +Sakura Haruno +Restaurant +Halo 3 +Wheelie +Mario Kart +Headphones +Factory +Yu-Gi-Oh! Trading Card Game +Speedometer +Circus +Muscle car +Bedroom +Tekken +Graffiti +River +Lighting +Guitar amplifier +Knitting +Call of Duty: Zombies +PlayStation +Radio-controlled helicopter +Cookware and bakeware +Trail +Camping +University +Indian cuisine +Multiplayer online battle arena +Ball +Nightclub +Book +Lego minifigure +PlayStation 2 +Dodge +Garry's Mod +Camera lens +Hockey puck +Barbie +Thomas the Tank Engine +Go-kart +Vegetarian cuisine +Monster High +Yacht +Collectible card game +Auto Race (Japanese sport) +Role-playing game +Madden NFL +Unidentified flying object +Longboard (skateboard) +Toddler +Digital single-lens reflex camera +Xbox (console) +Rail freight transport +Honda Civic +Convertible +The Sims 2 +Lamborghini +Printer (computing) +Cream +Parrot +Tire +Quadcopter +Littlest Pet Shop +Wii U +Planet +??? +The Sims 3 +Sony Xperia +Salad +Sailboat +Cruise ship +Unmanned aerial vehicle +Naruto: Ultimate Ninja +Barbecue +Mortal Kombat +Slot machine +Longboarding +Halo: Reach +Paragliding +Bread +Monster Hunter +Stitch (textile arts) +Dofus +StarCraft II: Wings of Liberty +Game controller +Gears of War +Mud bogging +Snowboard +Synthesia +Wig +Road bicycle racing +Wheel +Macintosh +Home improvement +Printing +Insect +Road +Parachute +Cattle +Hair coloring +IPhone 4S +Advertising +Potato +Runway +Van +Zoo +Handheld game console +Water +Rock Band +Volkswagen Golf +Bathroom +Stunt performer +Bleach (manga) +Metal Gear +Santa Claus +Hiking +Samsung Electronics +Runway (fashion) +Elevator +Cricket +Gran Turismo (series) +Fire engine +Kinder Surprise +Play-Doh +Grilling +Eyelash +Table tennis +Fiat Automobiles +Dragon +Lion +Nintendo Entertainment System +PlayStation (console) +Stallion +Ice skate +Baseball park +Flamenco +Steam engine +Plough +Farming Simulator +Soup +Snowmobile +Mare +Counter-Strike: Source +Sail +Squat (exercise) +Bass (fish) +Banjo +Harmonica +Quartet +Drum stick +IPhone 5 +Reptile +Prayer +T-shirt +Talent show +Rice +Roasting +Diablo III +CrossFire (video game) +Renault +Pizza +Trombone +Chevrolet Camaro +Barbell +Ryu (Street Fighter) +Clay +Beyblade +Lake +Sauce +??? +Cube +Forza (series) +Cookie +Taiko no Tatsujin +Mixtape +Medicine +Door +Monster +Call of Duty: World at War +Mud +Computer keyboard +Clarinet +Defense of the Ancients +Sora (Kingdom Hearts) +Computer monitor +Super Street Fighter IV +PlayStation Vita +Guild Wars +Album +Model car +Tenor saxophone +The Twilight Saga (film series) +Rubik's Cube +Sailor Moon +Teacher +Mixing console +Card manipulation +Combine harvester +Boeing 737 +Bull +Fish as food +Cheese +Concrete +Board game +Moped +Puzzle +Lego Star Wars +Poker +Portrait +Luigi +Dining room +Pokémon X and Y +Floor +Asus +Inuyasha +Livestock +Lawn mower +Tibia (video game) +Tabletop game +Iron Man +Tomato +Juice +Final Fantasy VII +Lip gloss +Super Smash Bros. Melee +Central processing unit +Sitcom +Cockpit +Emergency vehicle +FIFA 12 +Bodyboarding +Earth +The Lego Group +Ice cream +Microphone +Rallycross +Website +Table (furniture) +Ice +Magic: The Gathering +Ninja +Darth Vader +Saw +Mickey Mouse +Handbag +The King of Fighters +Ballet dancer +Samsung Galaxy Note series +Washing machine +Zee TV +Point Blank (2008 video game) +Gibson Les Paul +Dune buggy +DayZ (video game) +Television set +Dirt track racing +Edward Cullen +Beauty salon +Hetalia: Axis Powers +Vampire +Gliding +Batman: Arkham +Mountain +Rain +Shark +Waterfall +DarkOrbit +Bagpipes +Comics +Rock climbing +Skin +Arena +IPhone 4 +ARMA (series) +Super Smash Bros. for Nintendo 3DS and Wii U +Curry +Pasta +Halo 4 +Superman +Icing (food) +Google Nexus +Marathon +Deer +Guitar Hero III: Legends of Rock +Balloon +Goalkeeper (association football) +Red Bull +Nissan GT-R +Noodle +Fishing bait +Pencil +Plants vs. Zombies +Athlete +Computer case +Stretching +Terrier +Outer space +Textile +Mercedes-AMG +Hard disk drive +Biceps +Handball +Land Rover +Kamen Rider Series +Parakeet +Bear +Rim (wheel) +Chevrolet Corvette +Battery (electricity) +Milk +Roblox +BMW M3 +Christmas decoration +Moon +Microsoft Lumia +Combat Arms (video game) +Maize +Cargo +Headset (audio) +Bee +Helmet +Street art +Clown +Tattoo +Cupcake +Traxxas +Money +Hatsune Miku: Project DIVA +Bead +Angry Birds +Movieclips +Optimus Prime +MacBook +Mass Effect +Bowser (character) +Sega Genesis +Pachinko +Jedi +Jeep Wrangler +Dragon Ball Z: Budokai Tenkaichi +Tales (series) +Loader (equipment) +Water park +Beef +Sewing machine +Beer +Glass +Silage +Seafood +Gran Turismo 5 +Harp +Joker (comics) +Volkswagen Beetle +??? +BlackBerry +AdventureQuest Worlds +Bowling +Guild Wars 2 +Dragon Quest +Washing +Mermaid +Cue stick +Boot +Stir frying +Grand Theft Auto: Vice City +Penguin +Acrylic paint +Cocktail +Kingdom Hearts II +Coral +Borderlands 2 +Telephone +Gears of War (video game) +Far Cry +Tractor pulling +Rock Band (video game) +Crane (machine) +Updo +Stuffed toy +Lawn +Tekken (video game) +Airbus A320 family +IPhone 5S +Watercolor painting +Ten-pin bowling +Duck +Pokémon Trading Card Game +Oven +Subaru Impreza +Porsche 911 +Backpack +Carl Johnson (Grand Theft Auto) +German Shepherd +Turtle +Metal +Left 4 Dead +Ultralight aviation +Comic book +Batting (cricket) +Tram +Mower +Reef aquarium +??? +Swing (dance) +Lego City +Game Boy Advance +Diesel engine +Pitcher +Dance studio +Hamburger +Cake decorating +Left 4 Dead 2 +Bible +Candy +Vacuum cleaner +Pokémon Omega Ruby and Alpha Sapphire +Sowing +Roof +Donkey Kong +Trout +Coin +Tent +Digimon +Costume +Warface +Sandwich +BMW 3 Series +Star Wars: The Old Republic +Trampoline +Pipe organ +Latin dance +Aerobics +Aion: Upheaval +Supermoto +Netbook +Gift +Strum +Mitsubishi Lancer Evolution +Drum and bugle corps (modern) +Gramophone record +Gundam (mobile suit) +Euro Truck Simulator 2 +Tai chi +Teenage Mutant Ninja Turtles +Aerobatics +Wedding dress +Hair conditioner +Achievement (video gaming) +Boeing 777 +Shadow the Hedgehog +Boeing 747 +Simba +Silkroad Online +Kindergarten +Smartwatch +Computer mouse +Bell +Museum +Rabbit +Total War (series) +DVD +Devil May Cry +Face +Lathe +Five Nights at Freddy's +Logging +String quartet +Bridge +Super Mario Bros. +Fishing reel +Badminton +Clock +Stove +Wine +Subaru +Leather +IPad 2 +Terraria +Attack on Titan +Bottle +Kick +Police officer +Raw foodism +Video card +Alpine skiing +String (music) +StarCraft (video game) +Roadster (automobile) +Steak +Hearthstone (video game) +Solo dance +Foreign exchange market +God of War (series) +Hulk (comics) +Easter egg +Ceiling +Yo-kai Watch +Wakeboarding +Monster truck +McDonald's +Assassin's Creed III +Chopper (motorcycle) +Largemouth bass +Roller skating +Glider (aircraft) +Jacket +Marimba +Christmas tree +Sand +Afro +MacBook Pro +Booster pack +Dark Souls II +Bartender +Quarterback +Illustration +ARMA 2 +Star Trek +Itachi Uchiha +Hot rod +Saints Row +Freeza +Need for Speed: Most Wanted (2012 video game) +Hair twists +Super Mario World +Crash Bandicoot +Pork +Shampoo +Mask +Hair iron +Marvel vs. Capcom +Castlevania +Halo 2 +Battery charger +Tower defense +BBC +Kawasaki motorcycles +Link (The Legend of Zelda) +Muffler +Nintendo 64 +Marriage proposal +Fingerboard (skateboard) +Beehive +Pokémon HeartGold and SoulSilver +Bowling ball +Tower of Saviors +Artificial nails +Final Fantasy XIII +Chair +Hijab +Juggling +Nissan Skyline +Anpanman +Car wash +Kite +Diablo (video game) +Resident Evil 4 +Candy Crush Saga +Rocket +Video game arcade cabinet +Whale +Glider (sailplane) +Flooring +Kingdom Hearts (video game) +??? +Fast food +Mandolin +Metal detector +Cinema 4D +Ash Ketchum +Router (computing) +Yamaha YZF-R1 +Uncharted +DC Comics +Egg +Lexus +Ollie (skateboarding) +Hamster +Chainsaw +Galaxy +Embroidery +Suite (hotel) +Brush +Electronic drum +Gran Turismo 6 +NBA 2K15 +Dolphin +Salmon +Window +Drill +Pen +Backpacking (wilderness) +Torte +Web page +Dreadlocks +Hot Wheels +Brake +Tuba +Volcano +Ibiza +Dragon Age +Mini +Perfect World (video game) +Knot +Tails (character) +Thunderstorm +Video camera +Smoothie +Crossover (automobile) +Condominium +Desert +Pump +Strawberry +Coffeemaker +The Legend of Zelda: Ocarina of Time +Tarot +Architecture +Portal (video game) +Dynasty Warriors +Lightning McQueen +Pirates of the Caribbean (film series) +Tile +Battlefield: Bad Company 2 +Sketch comedy +Aikido +V8 engine +Sailor Moon (character) +Lamborghini Aventador +Carp fishing +Kirby (series) +Banana +Police car +Laser lighting display +Necklace +??? +WWE '13 +Mini (marque) +Tanki Online +Oil +Radio-controlled boat +Dinosaur +Pie +President of the United States +NBA 2K14 +Labrador Retriever +Blender +Plarail +Captain America +Electric locomotive +Street racing +Need for Speed: Most Wanted (2005 video game) +Canoe +Golf club +Sheep +Bar +CDJ +Lace +Gold +Glove +Halo: Combat Evolved +Alphabet +Fender Telecaster +IPhone 3GS +Beadwork +Personal water craft +Dietary supplement +James Bond +Ragnarok Online +French braid +Road racing +Star +Dean Winchester +Snake +Seed +Christmas lights +Plaster +Trunks (Dragon Ball) +Forage harvester +Cartoon Network +Honda CBR series +Battlefield Hardline +Tekken 6 +Glitter +Ford Focus +Roland V-Drums +Ski-Doo +Tyrannosaurus +New Super Mario Bros. +Cue sports +Rainbow Loom +Samsung Galaxy S III +Glasses +Italian cuisine +RollerCoaster Tycoon 3 +Pig +Lock (security device) +The Lord of the Rings (film series) +Military parade +Elephant +Pull-up (exercise) +Eyelash extensions +Ring (jewellery) +Minivan +Coca-Cola +Mural +Love song +Portal 2 +Mortal Kombat (2011 video game) +Yarn +Pokémon Ruby and Sapphire +Dragon Nest +Japanese cuisine +Resident Evil 5 +Jeans +Map +Pikachu +Sun +Pond +Bulldog +Greenhouse +Škoda Auto +Baby transport +Apple +The Doctor (Doctor Who) +Turbine +Naruto: Ultimate Ninja Storm +Watch Dogs +VHS +Ariel (Disney) +Sculpture +Bulldozer +Transformice +Sushi +Home run +Fountain +Slopestyle +Fullmetal Alchemist +Ultimate Marvel vs. Capcom 3 +Automotive lighting +Lightsaber +Chevrolet Silverado +Honey +Wangan Midnight +Sword +Toilet +Super Mario Galaxy +Akuma (Street Fighter) +Shiva +Bed +Toy train +Manufacturing +Ram Trucks +Stuffing +Biscuit +Kia Motors +Spa +Samsung Galaxy S II +Demolition +Airbus A330 +Breakfast +Airbus A380 +Pancake +Kawasaki Ninja +Mitsubishi Lancer +Mushroom +Grand Theft Auto: The Lost and Damned +Microsoft Flight Simulator +Spacecraft +Logo +Stock car racing +Goat +Pool (cue sports) +Assassin's Creed (video game) +Majin Boo +Vespa +??? +Samsung Galaxy S4 +Assassin's Creed IV: Black Flag +Batman: Arkham City +Monkey +Death Note +WWE 2K15 +Pumpkin +Shopping mall +Rose +Cola +Minnie Mouse +Caporales +Jet Ski +World of Warcraft: Wrath of the Lich King +Winter +Prom +Karaoke box +Minibike +RFactor +Art exhibition +Plush +Chocolate cake +Ford F-Series +Soap +Knuckles the Echidna +Dump truck +Giant panda +Dance Dance Revolution +Princess +Street food +Flashlight +Animal Crossing +Pilates +Pipe band +Toyota Land Cruiser +Lara Croft +Jumbotron +Ferrari F430 +Cell (Dragon Ball) +BMW 3 Series (E36) +Injustice: Gods Among Us +Dumbbell +Samsung Galaxy Tab series +Bodyweight exercise +Penalty kick (association football) +Lizard +City +Bionicle +Kirby (character) +WWE 2K14 +Pokémon Battle Revolution +Sonic the Hedgehog (1991 video game) +Alliance of Valiant Arms +Racket (sports equipment) +K-1 +Acer Inc. +Recorder (musical instrument) +Earring +National park +The Elder Scrolls IV: Oblivion +Audi R8 +Clothes dryer +Military band +Silver +Warcraft III: Reign of Chaos +Classroom +Samsung Galaxy S5 +Black cat +Scarf +Kratos (God of War) +Skylanders +Super Robot Wars +Electric car +Video lesson +Smoking (cooking) +Antenna (radio) +Sonic Generations +Butter +Chess +Hello Kitty +Goldfish +Carrot +Blu-ray +Squirrel +Balloon (aeronautics) +Microwave oven +Range Rover +Wool +TalesRunner +IPad Mini +Pokémon Emerald +Inflatable boat +Bull riding +Football boot +Gears of War 2 +Bugatti Veyron +Airbrush +Brick +Avengers (comics) +Plants vs. Zombies 2: It's About Time +United States Navy +Ball (association football) +Volkswagen Gol +Yo-yo +Forza Motorsport 4 +Logitech +Shirt +Golden Retriever +Alarm device +Water slide +Paramotor +Fondant icing +Acrobatic gymnastics +Coach (sport) +The Witcher 3: Wild Hunt +Tabla +Kinect +Zee Bangla +??? +Cabinetry +Quilt +Claw crane +Spyro (series) +Yoshi +Tekken Tag Tournament 2 +Diamond +Samsung Galaxy S series +BMW 3 Series (E46) +Tiger +Number +Traffic +Metalworking +Haruhi Suzumiya +Gown +Luxury yacht +Yuna (Final Fantasy) +Station wagon +Softball +The Legend of Zelda: Twilight Princess HD +Dungeon Fighter Online +Plasticine +LG Optimus series +Source (game engine) +Battlefield 2 +BMW 3 Series (E30) +Ink +Half-Life 2 +Hitman (series) +Inline skates +Remote control +Mercedes-Benz C-Class +The Sims 4 +Harlem Shake (meme) +Magic Kingdom +Dune +Prince of Persia +Final Fantasy XIV +Marvel Universe +Draco Malfoy +Ram Pickup +DC Universe Online +Assassin's Creed II +Mars +Xylophone +Dragon Age: Inquisition +Game Boy +Carpet +Roxas (Kingdom Hearts) +Balance beam +Mass Effect 2 +Dragon Ball Xenoverse +Call of Duty: Black Ops – Zombies +Cadillac +Guinea pig +The Hobbit (film series) +Need for Speed: World +Pastry +Chapel +Rayman +Armour +Mouse +Assassin's Creed: Brotherhood +Lord Voldemort +Magnet +The Sims (video game) +Rubber band +Grocery store +Reborn doll +Ford GT +WWE '12 +PlanetSide 2 +Jaguar Cars +Volvo Cars +Jeep Cherokee (SJ) +Homer Simpson +USB flash drive +Torero +Persona (series) +Model railroad layout +Buttercream +Serve (tennis) +Ferrari 458 +Honda Accord +Chevrolet Impala +Command & Conquer +Warframe +Chrysler (brand) +Standup paddleboarding +Pretty Cure +Campsite +Final Fantasy VIII +Audi A4 +Sailing ship +Rafting +Custom car +Belle (Disney) +Rowing (sport) +Jeep Grand Cherokee +Wire +BMW M5 +Hula hoop +Pinball +Spaghetti +Monster Hunter Freedom Unite +Far Cry 4 +Pro Evolution Soccer 2015 +Test Drive (series) +Motorcycle helmet +Router (woodworking) +Cave +Cheesecake +Birthday cake +Suzuki Jimny +New Super Mario Bros. Wii +Ezio Auditore da Firenze +Fisherman +Mime artist +Roller skates +Pump It Up (video game series) +Dissidia Final Fantasy +Supercharger +Gemstone +Titanfall +Downhill +Medal +Garbage truck +Forehand +Heroes of Newerth +Plastic +??? +Astronaut +Guitar Hero World Tour +ArcheAge +Lowrider +Police dog +Toyota Corolla +Ford Fiesta +Helmet camera +Cabal Online +Assassin's Creed Unity +Ceramic +Kidō Senshi Gundam: Senjō no Kizuna +Hot air balloon +Shower +Donald Duck +Multi Theft Auto +Rock Band 3 +Porsche 911 GT3 +Stick figure +Sled +Lemon +Frog +Mexican Creole hairless pig +Forklift +Dog agility +Kettlebell +Shelby Mustang +Candle +Bowling (cricket) +Kick (football) +Electric vehicle +Oboe +Desktop computer +Wing Chun +Statue +DayZ (mod) +Eagle +Fire station +Nike Air Max +Rage (video game) +Woodturning +Fireplace +Volkswagen Jetta +Madison Square Garden +Fly tying +Spore (2008 video game) +Hammond organ +Sam Winchester +The Pink Panther +Saints Row: The Third +Cherry blossom +Doraemon +WWE action figures +Marvel vs. Capcom 3: Fate of Two Worlds +Bugatti Automobiles +Fire Emblem +Border Collie +Aircraft carrier +Snow blower +Culinary art +Ken Masters +Seafight +Sport bike +Dentist +Easter egg (media) +Joystick +Tuna +Crysis 2 +Audi Quattro +Academy Awards +Ponytail +Ramen +Hummer +Fishing tackle +Final Fantasy X-2 +Coupon +Porsche Carrera +Wood carving +Rocksmith +Wallet +Refrigerator +Koi +Battlefield Heroes +Phonograph +Onion +Biceps curl +Trainz +Hat +Jubeat +Nissan Skyline GT-R +Mattel +GameCube +LittleBigPlanet 2 +Epiphone +Inazuma Eleven +Soft tennis +Killer whale +Hair straightening +Merienda +The Witcher (video game) +Skate (video game) +Live for Speed +Rooster +Chihuahua (dog) +Triangle +Land Rover Defender +Marvel Legends +Trousers +SD Gundam Capsule Fighter +Ratchet & Clank +Doughnut +Hatsune Miku: Project DIVA F +Bouzouki +Domestic canary +Half-Life (video game) +Raven (comics) +Black Butler +Mario Kart 8 +Chili pepper +BMW 5 Series +Hail +Ouran High School Host Club +Brain +Chinese cuisine +Playmobil +Model building +Ribbon +Pit bike +Sonic Unleashed +Solar panel +Orange (fruit) +Otis Elevator Company +Mu Online +Hang gliding +Path of Exile +Animal Crossing: New Leaf +Steel guitar +Sword Art Online +Lego Ninjago +Paddle +Second Life +Aikatsu! +IPhone 5C +Gothic (series) +Batman: Arkham Asylum +Carburetor +Crab +Espresso machine +The Phantom of the Opera (1986 musical) +Hellsing +Spider +Super Mario Galaxy 2 +Duel Masters Trading Card Game +Drywall +Laundry +United States Air Force +Assassin's Creed: Revelations +Corel +Omelette +Composer +Ford Escort (Europe) +Grape +Honda CB600F +Tea +Elmo +Temple +Need for Speed: Carbon +Catamaran +Perfect World (company) +Skate 3 +Missile +Infomercial +Chevrolet Chevelle +Airport terminal +Crysis (video game) +StepMania +Red Dead Redemption +Atari +Couch +The Idolmaster +Beatmania IIDX +Big wave surfing +Tokyo Mew Mew +Wheat +Warhammer Fantasy Battle +Rock (geology) +Snowplow +Submarine +Doctor Eggman +Wood flooring +Bangs (hair) +Yamaha YZF-R6 +Pontiac Firebird +Red Dead +Field hockey +Vineyard +Waterfowl hunting +Domestic pigeon +Toyota Hilux +CNET +Preacher +Sonic Adventure +Lamborghini Murciélago +Marinera +Screen printing +Crazyracing Kartrider +The Legend of Zelda: Majora's Mask +Sunglasses +Log cabin +Fungus +Wedding photography +Flag +Devil May Cry 4 +Cappuccino +Flamenco guitar +Projector +Rock dove +The Elder Scrolls Online +LittleBigPlanet (2008 video game) +Digital video recorder +Djembe +Vending machine +Mehndi +Telescope +Flyff +Pattern (sewing) +Stairs +Nissan 350Z +Cell (biology) +Need for Speed: Underground 2 +Incandescent light bulb +Gallon +Greeting card +Balloon modelling +Sensor +Realm of the Mad God +Nest +Writing +Logic Pro +Opel Astra +Campervan +Cooked rice +Muffin +Wind power +Hedgehog +Soft drink +Calculator +Harness racing +Buick +Beast (Disney) +Destroyer +Point guard +Forza Horizon +Mercedes-Benz SLS AMG +Supermarket +Catfish +Final Fantasy XI +The Last of Us +Battleship +Dodge Challenger +Peter Pan +Metal Gear Solid 4: Guns of the Patriots +Toyota 86 +Bakery +Compact disc +Backhoe +Saddle +Total Drama Island +Erhu +Bumblebee (Transformers) +Cajón +Beatmania +Ice rink +Child safety seat +Honda S2000 +Samsung Galaxy Note II +Higurashi When They Cry +Union Pacific Railroad +BMW 3 Series (E90) +V6 engine +BlazBlue +Rottweiler +Necktie +Image scanner +White-tailed deer +TV4 (Sweden) +Bishop +Need for Speed: Hot Pursuit (2010 video game) +Princess Peach +Rust (video game) +Doom (1993 video game) +Fender Custom Shop +Smite (video game) +Nissan Silvia +??? +Pudding +Sephiroth (Final Fantasy) +Irish dance +MacBook Air +Commodore 64 +IMac +Space Shuttle +Automobile repair shop +Collie +Dragon Age: Origins +Sangokushi Taisen +Calligraphy +Black belt (martial arts) +??? +Valve +Crisis Core: Final Fantasy VII +Two-stroke engine +Killzone (series) +Full moon +Hunter × Hunter +New York City Subway +Latte +Mercedes-Benz S-Class +Tetris +Samurai +Predator (alien) +Arabian horse +Mercedes-Benz E-Class +Spinach +Dōjinshi +Polar bear +Body piercing +Amazon Kindle +Biology +Key (lock) +Mobile Suit Gundam: Extreme Vs. +Rappelz +Bobber (motorcycle) +Toy balloon +Mexican cuisine +Rope +Taco +Taxicab +Infestation: Survivor Stories +Clutch +PlayStation Network +Garage (residential) +Milkshake +Cloud Strife +Honda Integra +Eintopf +Primary school +Kingdom Hearts Birth by Sleep +Resident Evil (1996 video game) +Foal +GameSpot +Castle +Human hair color +Scorpion (Mortal Kombat) +Poultry +Poodle +Vans +Forza Horizon 2 +Zero (Mega Man) +Toyota Camry +Chemical reaction +Test Drive Unlimited 2 +Bacon +Mario Party +18 Wheels of Steel +Goose +Sausage +Compost +Cucumber +French horn +Analog synthesizer +Siamese fighting fish +??? +Las Vegas Strip +Crysis 3 +School bus +Oculus Rift +Carnival Cruise Line +Honda CBR600RR +Pokémon Red and Blue +Autobot +Christ (title) +Cockatiel +Ace Combat +Mazda MX-5 +Countertop +Safari +Final Fantasy XIV: A Realm Reborn +Track (rail transport) +Ganon +Two-wheel tractor +??? +Watermelon +Paper plane +Rainbow trout +??? +Tony Hawk's (series) +Korean cuisine +Lip balm +Angry Birds (video game) +Lead guitar +Pug +Monster Hunter Tri +Playground +God of War III +Herd +Niko Bellic +Bungee jumping +Soil +Subway Surfers +Hindu temple +Audi A6 +Hogwarts +Eggplant +Mabinogi (video game) +Sugar +Makeup brush +Rocksmith 2014 +Ocean +Asphalt (series) +Dental braces +Bob cut +Nissan 240SX +Cement +Sharpening +Leopard +United States Army +Tom and Jerry +Xbox 360 controller +Dragon Ball: Raging Blast 2 +Winnie the Pooh (franchise) +Trophy +Inazuma Eleven (manga) +Owl +Street Fighter II: The World Warrior +Golf ball +Floyd Mayweather Jr. vs. Manny Pacquiao +Belt (clothing) +Slender: The Eight Pages +Test Drive Unlimited +Super Mario Bros. 3 +Power supply +Retail +Venom (comics) +IPad (3rd generation) +Teddy bear +Denim +Baseball bat +Halo 3: ODST +Train Simulator (Dovetail Games) +Bowhunting +Lotus Cars +Pineapple +Boeing 737 Next Generation +Audi A3 +Dreamcast +City-building game +Diablo II +Suzuki Hayabusa +Gamepad +Electrical wiring +Kitchen stove +Yamaha Aerox +Monster Hunter Portable 3rd +BMX racing +Katara (Avatar: The Last Airbender) +HP Pavilion (computer) +Emirates (airline) +Amiga +Touchscreen +Winter storm +Driver (video game series) +Pac-Man +Fantage +Land Rover Discovery +Flash (photography) +Human back +Intermodal container +Infiniti +Guilty Gear +Animal shelter +Butterfly +Piccolo (Dragon Ball) +Bicycle frame +Boeing 787 Dreamliner +Toontown Online +Renault Mégane +Age of Empires +Canyon +Ski jumping +Lumber +Carousel +Phantasy Star Online 2 +Dodge Viper +Madden NFL 13 +A-18 Hornet +String trimmer +Mattress +Mixer (cooking) +Sub-Zero (Mortal Kombat) +Ford Ranger (North America) +ESPN +ABS-CBN News and Current Affairs +Synchronised swimming +G-Shock +??? +Angel +Champion +Horse show +??? +Rurouni Kenshin +Halo 5: Guardians +Coconut +Deep frying +Dollhouse +Campus +Volkswagen Golf Mk6 +Curtain +Mountain pass +Dojo +Boiler +PRS Guitars +Diesel locomotive +Monster Hunter 4 +French Bulldog +Prince (Prince of Persia) +Fixed-gear bicycle +Ninja Gaiden +Samsung Galaxy Note 3 +Opel Corsa +Jack Sparrow +Boeing 767 +Lexus IS +Tales of Symphonia +Autumn +Inline skating +Filter (aquarium) +Naruto Shippuden: Ultimate Ninja Storm Generations +Garmon +Flower bouquet +SimCity +Gravy +Bully (video game) +French fries +Kawasaki Ninja 250R +Rock fishing +Batman: Arkham Origins +Ceiling fan +Audi TT +Space Marines (Warhammer 40,000) +Acer Aspire +D.Gray-man +Duct tape +Electromagnetic coil +Heroes of the Storm +Tom Clancy's Ghost Recon +Sponge cake +Steelpan +Modem +The King of Fighters 2002 +Dying Light +Need for Speed: Shift +Riot Games +Rainbow +Bean +Chevrolet Opala +Reborn! +Floral design +Megatron +Kawasaki Ninja ZX-6R +Agriculture +Cottage +Television presenter +Metal Gear Solid V: The Phantom Pain +Juicing +BioShock +Plymouth (automobile) +Crêpe +Fist of the North Star +The Legend of Zelda: The Wind Waker +X-Men +Piston +Deck (building) +Nativity scene +Sega Saturn +Stardoll +Just Dance (video game) +Chun-Li +BMW R1200GS +LG G3 +Fisheye lens +Dragon Ball: Raging Blast +Big Boss (Metal Gear) +Dam +Gel +JBL +Dachshund +Bane (comics) +E-reader +The Lord of the Rings Online +Ferb Fletcher +Yeast +Monastery +Vampire Knight +Vodka +IPhone 3G +Tricycle +Metal Slug (series) +Steel +LED lamp +Geometry Dash +Dominoes +Gibson Les Paul Custom +Street Fighter III: 3rd Strike +Hay +Honda CR-X +Spray painting +Flip Video +Bald eagle +God of War II +Clay animation +Tomato sauce +Clone trooper +Beagle +Popcorn +Rubber stamp +Clannad (visual novel) +Fried rice +Moto G (1st generation) +Toyota Prius +Mega Man Battle Network +Doom II: Hell on Earth +Grand Theft Auto: Vice City Stories +Deadpool +Phantasy Star +Lock picking +Sugar paste +Chevrolet Caprice +??? +Herb +The Legend of Zelda: Skyward Sword +Domesticated turkey +Final Fantasy VI +BMW S1000RR +Mitsubishi Pajero +Mazda3 +IKEA +Chevrolet S-10 +Paper Mario +India TV +Tow truck +Orochimaru (Naruto) +Ape +Line (geometry) +Kawasaki Ninja ZX-10R +Aerosol spray +Power supply unit (computer) +Zucchini +Doberman Pinscher +Wolfenstein (series) +Contortion +Fertilizer +Cooler Master +Highway +Chocolate brownie +Street Fighter III +Tsubasa: Reservoir Chronicle +Parking +Olaf (Disney) +Frets on Fire +Multi-function printer +Suzuki GSX-R1000 +Lush (company) +Hang (instrument) +Nexus 7 (2012) +Skyscraper +Gorilla +Ōendan +Puff pastry +Crossbow +Forza Motorsport 5 +Uncharted 2: Among Thieves +Pokémon Mystery Dungeon +Closet +??? +Daytona International Speedway +VTEC +Cheerleading +Slot car +Garden railway +Albert Wesker +Naruto Shippuden: Ultimate Ninja Storm 2 +Sewing needle +Trials (series) +Sheriff Woody +K +Straw +Mitsubishi Eclipse +Frisbee +TrackMania +Manure +Chocolate chip +Cart +Borderlands: The Pre-Sequel +Diving +Wood-burning stove +Medal game +Chrono Trigger +Sherlock Holmes +Library +Volkswagen Golf Mk2 +Guzheng +Malinois dog +Goofy +Pedal steel guitar +Virtua Fighter 5 +Lego Marvel Super Heroes +Kantai Collection +Electric violin +Firewood +Devil May Cry 3: Dante's Awakening +Digital painting +Flair bartending +Boxer (dog) +Melon +Low-carbohydrate diet +Škoda Octavia +The Crew (video game) +Unicycle +GAZ +Gummy bear +Marker pen +Need for Speed: The Run +Dead Space (2008 video game) +Duke Nukem +Dirt 3 +Movie theater +Final Fantasy XIII-2 +Comet +WWE SmackDown vs. Raw 2010 +Gran Turismo 4 +Star Wars: Battlefront II +Lamb and mutton +Ant +Loki (comics) +Percy the Small Engine +Villain +Plumbing +Avocado +BioShock Infinite +Dormitory +Mango +Lucky Star (manga) +Shadow the Hedgehog (video game) +Cabbage +Peanut butter +Didgeridoo +Hard Rock Cafe +Donkey Kong Country +Amazon.com +Star Wars Battlefront (2015 video game) +Harpsichord +Aston Martin Vantage (2005) +Suzuki Swift +Crocodile +Jet engine +Sonic the Hedgehog 2 +Delta Air Lines +Harry Potter and the Deathly Hallows +Trunk (car) +Zangief +Brave Frontier +Chuck E. Cheese's +Iori Yagami +Robotics +Kebab +Cheeseburger +Hatsune Miku: Project DIVA F 2nd +Humbucker +Camcorder +Mega Man X (video game) +Landscape +Shih Tzu +Volkswagen Golf Mk4 +Pollution +Guppy +Coffeehouse +Killer Instinct +Crusher +Allods Online +??? +Boeing 757 +Eclipse +Meatball +Saints Row 2 +Roulette +Grand Theft Auto: Liberty City Stories +Walleye +Walmart +Bearing (mechanical) +Forest +Forever 21 +Canvas +Rat rod +Soulcalibur V +Sonic the Hedgehog (2006 video game) +Multirotor +??? +LG G2 +Moisturizer +Halo: The Master Chief Collection +SEAT León +Skylanders: Swap Force +Pan flute +Chevrolet Tahoe +Metal Gear Online +Fiat 126 +Mount & Blade: Warband +Kennel +Vibraphone +Satellite +Yamaha Raptor 700R +Sonic & Knuckles +Honda Fit +Caridea +Armored Core +Bull Terrier +Firefighting +Catwoman +Octopus +Fencing +Sitar +Limousine +Nintendo DSi +HTC One (M8) +McDonnell Douglas F-15 Eagle +Rat +GoldenEye 007 (1997 video game) +Gasoline +Ken (doll) +Quadracycle +Dead or Alive (series) +Microsoft Surface +Scooby-Doo +Landscape painting +Toyota Land Cruiser Prado +Hair removal +Sink +Mount & Blade +BMW 5 Series (E39) +Mewtwo +Mambo (music) +The Witcher 2: Assassins of Kings +North American P-51 Mustang +Alien (creature in Alien franchise) +Cloud +Forge +Christian Church +Tom Clancy's Rainbow Six +Mirror +Chevrolet Big-Block engine +Chevrolet Corvette (C6) +Abarth +Mazda RX-8 +Pendant +Metal Gear Solid 3: Snake Eater +Buffet +Haunted house +Cockatoo +Royal Air Force +The Embodiment of Scarlet Devil +LG G series +Fishing vessel +DualShock +Sonic Heroes +Drawer (furniture) +BMW 1 Series +Werewolf +DatPiff +Koi pond +Toyota Celica +Twelve-string guitar +Potato chip +Stargate +Killer Instinct (2013 video game) +Caramel +Sprite (computer graphics) +NHL 14 +Ham +Sky +Sweater +Chocolate chip cookie +stay night +Text (literary theory) +Skate 2 +Engraving +Final Fantasy XV +Cornrows +Light Yagami +Floristry +Sly Cooper +Volkswagen Golf Mk5 +Snowman +??? +Vox (musical equipment) +Happy Farm +Orc +Suit (clothing) +PC game +Ace Online +Saints Row IV +Slingshot +Dead Island +Ratchet (Ratchet & Clank) +Gears of War: Judgment +Dragon Quest X +Furby +Crayon Shin-chan +Soprano saxophone +Tifa Lockhart +European perch +Patio +Fried chicken +Sawmill +Mirror's Edge +Canon PowerShot +Guitar Hero: Warriors of Rock +Rome: Total War +Hummer H2 +Radar +Final Fantasy IV +Table saw +Barista +BMW 7 Series +Camel +Windows Media Video +Felt +Audi S4 +Cowboy +Molding (process) +Contact lens +Fiat Punto +The Hobbit +Indoor cycling +Sunset +??? +Persian cat +Hitman: Absolution +Battlefield: Bad Company +Eren Yeager +Sinterklaas +Crash Bandicoot (video game) +Midnight Club: Los Angeles +Metal Gear Rising: Revengeance +Hand-to-hand combat +Avon Products +Log splitter +Stormtrooper (Star Wars) +Epic Rap Battles of History +Shed +Walking +Belt (mechanical) +Hot dog +Sock +Chicken coop +Humpback whale +Character (arts) +Peugeot 106 +Toast +Princess Jasmine +Exercise ball +Fox +Green Lantern +Looney Tunes +Wedding ring +Tap (valve) +Charizard +Mii +Rolls-Royce Limited +Copic +Mega Man Zero (video game) +Jak and Daxter +Priston Tale +Glacier +IPod Nano +Banknote +Mario & Sonic at the Olympic Games +Hero Factory +Bamboo +Fillet (cut) +Stencil +Winch +Dogfight +Treadmill +Bassoon +Staffordshire Bull Terrier +Cardboard +Epiphone Les Paul +Compact Cassette +Gelatin +White House +Suitcase +MX vs. ATV +Clank (Ratchet & Clank) +Beach volleyball +Loadout +Batter (cooking) +Zack Fair +Cliff +Baggage +Cream cheese +Lantern +Naruto: Clash of Ninja +Treasure +Raccoon +Mini 4WD +Robotic vacuum cleaner +Gate +Ribs (food) +Oatmeal +Water filter +Super Mario Sunshine +Animal Crossing: City Folk +Driver's license +Asus ZenFone +American black bear +Little Red Riding Hood +??? +Stable +Gashapon +Need for Speed: Underground +Dishwasher +Frying pan +Schutzhund +Mario Kart 7 +Disney Infinity +Saab Automobile +F-Zero +Halloween costume +Thor (Marvel Comics) +Foam +Tokyo Ghoul +Chevrolet Monte Carlo +Flush toilet +Axe +Worms (series) +Marble +Driver's education +Madden NFL 12 +Pressure washing +Christmas ornament +Buffalo wing +Duct (flow) +Indiana Jones +Chart +Yoshi's Island +Subaru Forester +Scar (The Lion King) +Mousse +Lalaloopsy +Micropterus +Gibson SG +Express train +Citroën C4 +Submission wrestling +Broccoli +Donkey Kong Country 2: Diddy's Kong Quest +Barrel organ +Mega Man 2 +Dragon boat +New Super Mario Bros. U +Gecko +Pillow +Kemenche +Porsche Cayenne +??? +Shift 2: Unleashed +Bomberman +Dungeons & Dragons +BeamNG.drive +AdventureQuest +Mario Kart 64 +Disc brake +Bloons Tower Defense +Forza Motorsport 3 +Guitar Center +Super Smash Bros. (video game) +Fiat Uno +Printed circuit board +Porcelain +E-book +Macaroni +Lego Friends +Max Payne 3 +StarCraft II: Heart of the Swarm +Medal of Honor: Warfighter +Kamaz +Air France +Porsche Carrera GT +Black Rock Shooter +Rosary +Halo Wars +Car dealership +Toys "R" Us +Total War: Rome II +Need for Speed: ProStreet +Mansion +Cheetah +Marshmallow +Shorts +Unturned +Charango +Lithium polymer battery +Sea turtle +Vatican City +Starbucks +Emergency vehicle lighting +Volkswagen Golf Mk1 +Lupin the Third +Pearl +Wii Sports +Hero +Chrysler 300 +GMC (automobile) +Charm bracelet +Kamen Rider Battle: Ganbaride +Ys (series) +Asus Eee Pad Transformer +BMW 5 Series (E60) +Ford Mustang SVT Cobra +Autocross +Royal icing +Laboratory +Peugeot 206 +Maltese (dog) +Soulcalibur IV +Wardrobe +Garlic +Tugboat +Luke Skywalker +Electronic circuit +Coat (clothing) +Passenger +??? +Cactus +Ford Crown Victoria +Elfen Lied +Circular saw +Radha +Welsh Corgi +Eiffel Tower +Softail +Bajo sexto +Lobster +Colt (horse) +Solar eclipse +Greyhound +Pepsi +Black Widow (Natasha Romanova) +Virtua Fighter +Filly +Canning +Fat +Goth subculture +Slow cooker +Lightning (Final Fantasy) +Water polo +Apple pie +Inkjet printing +Mercedes-Benz SLK-Class +Bandsaw +Cammy +Fight Night (EA video game series) +Tortoise +Multicooker +Ferret +Dipping sauce +Circle +Rocket launch +Pembroke Welsh Corgi +Cold porcelain +Battlefield Play4Free +ThinkPad +BMW X6 +??? +Sony Xperia Z +Selfie +Mahjong +Cherry +IPod Touch (5th generation) +Colin McRae: Dirt 2 +Tekken 5 +Shawl +Ultron +Guitar pick +Elk +Sunrise +Amusement arcade +Hammock +Decoupage +Mug +Sander +Autogyro +Woodchipper +Texas Instruments +Baby Alive +Tarantula +Shrub +Donkey Kong (video game) +Coating +Steirische Harmonika +Racing wheel +Raphael (Teenage Mutant Ninja Turtles) +Bank +Opel Vectra +Skull +Sand art and play +Birth +Lasagne +Infinity Ward +Philippine cuisine +Custard +Lettuce +Megami Tensei +Flappy Bird +Sleeping Dogs (video game) +Fender Jazz Bass +Devil Kings +Blouse +Notebook +Aloe vera +Funko +Lelouch Lamperouge +Macramé +Casserole +Capacitor +I Wanna Be the Guy +Hose +Subaru Legacy +Star Citizen +Sabian +Ventriloquism +Call of Duty (video game) +Kindle Fire +Starfire (Koriand'r) +Zeus +Microscope +Basket +Coyote +Bart Simpson +Volvo FH +Spinnerbait +Honda CR-V +Sony Xperia Z1 +Satan +Mercedes-Benz Sprinter +Team roping +Jeep Cherokee (XJ) +Friendship bracelet +Leonardo (Teenage Mutant Ninja Turtles) +Single track (mountain biking) +Chickpea +Vegetable carving +??? +Spark plug +Akita (dog) +Canoeing +Recumbent bicycle +Boom Beach +Puppetry +Sport stacking +Kendama +Punching bag +Staples Center +Marvel vs. Capcom 2: New Age of Heroes +Apple TV +Davul +Scratchcard +Disgaea +Larva +Used car +DmC: Devil May Cry +Kyo Kusanagi +Mega Man (video game) +K'Nex +Burger King +Dungeon crawl +Pro Evolution Soccer 2009 +Blueberry +Village +Convenience store +Golf cart +BMW M6 +Fiber +Resistance (series) +Picture frame +Trouble in Terrorist Town +Volkswagen Type 2 +Domestic pig +Grand Tourer Injection +Alucard (Hellsing) +Aerith Gainsborough +Batmobile +Gummi candy +Cauliflower +Marlin +Gold medal +Shin Megami Tensei: Persona 3 +Table football +Shikamaru Nara +Truggy +Ford Explorer +Chevrolet Cruze +American Airlines +Jupiter +Galaxy Nexus +KFC +Spec Ops: The Line +Rigs of Rods +EA Sports UFC +Plastic bottle +Hubble Space Telescope +Barn +Hand +Star Wars: Battlefront (2004 video game) +Digimon Masters +Gibson ES-335 +Waffle +Paper model +Ressha Sentai ToQger +Gas tungsten arc welding +Pavement (architecture) +Sonic & Sega All-Stars Racing +??? +Palace +Stealth game +God of War (2005 video game) +Mazda6 +Dragon Age II +Warhammer Online: Age of Reckoning +Switch +Grizzly bear +??? +H.A.V.E. Online +Lowlands (festival) +Wok +Window blind +Nokia N8 +Android Wear +V10 engine +Toyota Tundra +Marble (toy) +Alligator +Screencast +Range Rover Sport +Moose +Polo +Laminate flooring +BVE Trainsim +Baby sling +Garage door +Compact car +Dishonored +Parrot AR.Drone +Giraffe +Need for Speed Rivals +McLaren 12C +Pork ribs +Track cycling +Don't Starve +Marvel: Avengers Alliance +Popeye +Ford Mondeo +HTC One (M7) +Pyramid +Asphalt +Beetle +Canon EOS 600D +Oldsmobile Cutlass +Suzuki GSX-R750 +Audi A8 +World of Warcraft: The Burning Crusade +Homing pigeon +NHL 15 +Touring motorcycle +Goblin +Nissan 370Z +Metro: Last Light +Skylanders: Giants +Ran Online +Gear +Mercedes-Benz G-Class +Travian +Burnout Paradise +Tag team +Electric motorcycles and scooters +Kazuya Mishima +Serious Sam +Nexus 7 (2013) +Super Paper Mario +Doodle +Gelatin dessert +Andalusian horse +Warrior +Ferrari 360 +DVD player +WildStar (video game) +Hyundai Genesis +Chutney +Pizzica +Dead Rising 2 +Potter's wheel +Yoda +Cylinder (engine) +M. Bison +Metal Gear Solid: Peace Walker +Masonry +Edward Elric +Split (gymnastics) +Mario Kart DS +Ghost Rider +Grand Theft Auto: Episodes from Liberty City +F1 2012 (video game) +Cookie Monster +Red hair +Nami (One Piece) +Canon EF lens mount +Finger +Asteroid +Nissan Navara +Riddler +Traffic light +Nikon Coolpix series +Dragonica +Broth +Metal Gear Solid 2: Sons of Liberty +Samsung Galaxy Y +Wedding cake +Half-pipe +Gothic II +Vehicle horn +Motor oil +Credit card +Resident Evil 2 +British Airways +Great Dane +Stain +Super Mario 3D World +Yamaha YZ125 +Atari 2600 +Rover (space exploration) +Cayman +Ragdoll +Basement +Betta +Mobile home +Heroes of Might and Magic +Photograph +Wreath +Universe of The Legend of Zelda +Lamborghini Diablo +Albus Dumbledore +BlackBerry Bold +Prototype 2 +Soybean +Hurdling +Spock +Sony Xperia Z2 +Monopoly (game) +Fruit preserves +SimCity (2013 video game) +Cutlet +Volkswagen Touareg +Aerosol paint +Risotto +Toyota 4Runner +Driveclub +Moshing +Total War: Shogun 2 +Elf +Hot tub +President +NHL 13 +Rudolph the Red-Nosed Reindeer +Bugs Bunny +Mario & Luigi: Superstar Saga +Tulip +Paper Mario: The Thousand-Year Door +Hammer +EarthBound +Meta Knight +La Tale +Shadow of the Colossus +GLaDOS +Hunting dog +BioShock 2 +Supercars Championship +Orbit +God of War: Ascension +Bloons +Ney +Toyota MR2 +Cam +??? +Zoom lens +H&M +Hovercraft +Sanshin +Instant noodle +Luigi's Mansion +Tales of Vesperia +Dekotora +??? +Talking Tom and Friends +Baseball glove +Ale +Meringue +Canon EOS 7D +Shaolin Kung Fu +Hawk +Donkey Kong Country Returns +The Salvation Army +Brown trout +Sugarcane +Cake pop +Suzuki Bandit series +Green tea +Warehouse +Appalachian dulcimer +Kermit the Frog +Unicorn +Fountain pen +Acer Iconia +Master System +Robocraft +Merlin +Sweet potato +Alice's Adventures in Wonderland +Solar flare +DigiTech +Saturn +Flash (comics) +Reindeer +Justice League +Line Rider +Runes of Magic +Chevrolet Suburban +Michael Myers (Halloween) +Need for Speed: Undercover +Wand +Chevrolet Malibu +Coal +Antena 3 (Spain) +Driver: San Francisco +Font +Stingray +Thermostat +Toph Beifong +Vert ramp +Ridge Racer +Goat Simulator +Lineage (video game) +CNBC +Juri (Street Fighter) +TARDIS +Pigeon racing +Lap steel guitar +Shovel +Mosaic +Monster Retsuden Oreca Battle +Pair skating +Wallpaper +The Simpsons: Tapped Out +The Elder Scrolls III: Morrowind +Padel (sport) +Fender (vehicle) +Furnace +Nissan Altima +Cornet +Škoda Fabia +Lockheed Martin F-35 Lightning II +Electribe +Alesis +Motorola Razr +Halo: Combat Evolved Anniversary +Darksiders +Neo Geo (system) +Snail +Milking +Pluto (Disney) +Peanut +Verona Arena +Chubby Bunny +Jerry Mouse +Corvette Stingray (concept car) +Cigarette +Cube World +??? +Cybertron +Dacia Duster +Pastel +Transformer +Split screen (computer graphics) +Sukhoi Su-27 +Gabrielle (Xena: Warrior Princess) +Opel Kadett +Nokia Lumia 920 +Twin-turbo +Jiraiya (Naruto) +The Legend of Zelda: A Link to the Past +Crappie +Rechargeable battery +??? +Super Mario 3D Land +??? +DragonFable +Aragorn +Crash Bandicoot 2: Cortex Strikes Back +Southwest Airlines +Multi-tool +Passport +Porsche Panamera +Airship +Tuxedo Mask +Tom Clancy's Ghost Recon: Future Soldier +Melty Blood +Beam (structure) +Gas metal arc welding +Audi Q7 +Bell pepper +Chewing gum +Drinking water +Heat pump +Kenshiro +Patrick Drake and Robin Scorpio +Miniature wargaming +Kawasaki Ninja 650R +Captain Falcon +J-Stars Victory VS +Imperishable Night +Citrus +Drift trike +Optical illusion +Command & Conquer: Red Alert 3 +Suzuka Circuit +Mayonnaise +Quake III Arena +Keychain +God Mode +Ford Bronco +Crocodilia +Black and white +Llanero +Monorail +Nova +G.I. Joe +S.T.A.L.K.E.R.: Call of Pripyat +Perfect Cherry Blossom +Wine tasting +Olive +Ultra Series +Beat 'em up +Jellyfish +Lego Legends of Chima +Sauna +Tom Clancy's Splinter Cell: Blacklist +Starscream +Aang +Misty (Pokémon) +IPad Air +Ice pop +Lute +Jigsaw puzzle +Baritone saxophone +BMW Z4 +Mana (series) +Motorized bicycle +Dalmatian (dog) +Bose Corporation +Burton Snowboards +Kingdom Hearts: Chain of Memories +Mass Rapid Transit (Singapore) +Boombox +Napkin +Chimpanzee +Guitar Hero: Metallica +Radar detector +Honda NSX +Empire: Total War +Darts +Light fixture +Super Mario Bros. 2 +Temple Run +Kristoff (Disney) +Adrenalyn XL +Tatra (company) +Mini-Z +Tin can +Market garden +Mercedes-Benz Actros +Hug +Whipped cream +Wasp +Oni +Princess Daisy +Constellation +HTC One X +Fender Precision Bass +Prawn +Christmas card +Handbell +Coconut milk +Toshiba Satellite +Riven +Referee +Dragon's Dogma +Dalek +Folding bicycle +2 Days +Kimono +Seiko +Hippopotamus +Resident Evil: Revelations +Billboard (magazine) +Padlock +Butterfly stroke +Mashed potato +Yuan Zai (giant panda) +Aurora +Mop +Tubing (recreation) +Clothes iron +Order & Chaos Online +Zebra +Crème caramel +Warhammer 40,000: Dawn of War +Tom Clancy's Splinter Cell: Conviction +Wakfu +Stitch (Lilo & Stitch) +Calf +Cars 2 (video game) +Crayfish +Engagement ring +Infamous Second Son +Jukebox +Biryani +DJ Hero +Super GT +Chameleon +Oyster +Warcraft III: The Frozen Throne +Dynasty Warriors 7 +Postage stamp +Derek Shepherd +Plotter +Amnesia: The Dark Descent +Jinn +Rayman Legends +Tinker Bell +Patchwork +Doom 3 +Wat +Paiste +Mercedes-Benz CLS-Class +Liquid +GameTrailers +Pep squad +Clam +SaGa (series) +Nollie +Company of Heroes +Green Arrow +Naruto Uzumaki +DeWalt +Putter +Family +Transistor +SOCOM (series) +Pea +Social media +Aliens vs. Predator (2010 video game) +HTC HD2 +Ducati Monster +Aggressive inline skating +Maserati GranTurismo +PortAventura World +Lego Batman: The Videogame +Energy drink +Turban +Pokémon Yellow +Alaskan Malamute +Monica's Gang +Suzuki Vitara +Black Desert Online +Zara (retailer) +Just Dance 2015 +Maid Sama! +Disguise +Kidney +Water well +Farmer +Toyota RAV4 +Night +DJMax +Richter-tuned harmonica +Real Racing 3 +Solid Snake +United States dollar +F1 2010 (video game) +Samsung Galaxy Ace +Trials Evolution +Cadillac CTS +Daihatsu +Balcony +Xperia Play +Rookie +Timing belt (camshaft) +Monster Energy +Ork (Warhammer 40,000) +Toyota JZ engine +Drive-through +Spektrum RC +Hyundai Sonata +Chinchilla +Wii Sports Resort +Interchange (road) +Whitewater slalom +Ticket (admission) +Bayonetta +Salsa (sauce) +PlayStation All-Stars Battle Royale +Lego Minecraft +??? +Mule +Starbound +Scissors +Asparagus +Sony NEX-5 +Electrical connector +Rayquaza +Eight-ball +Steel-string acoustic guitar +Strap +Times Square +Bus driver +SEAT Ibiza +Converse (shoe company) +Atlantic bluefin tuna +Mercedes-Benz W124 +??? +Goggles +Kawasaki Z1000 +Shrimp and prawn as food +Garnier +Semi-trailer +Cod +Carpet cleaning +Lost Planet +Sonic the Hedgehog CD +Final Fantasy V +F1 2013 (video game) +Modelling clay +Audi Sportback concept +WWE All Stars +Mitsubishi Outlander +Punch-Out!! +Disney Infinity: Marvel Super Heroes +Mulch +Willy Wonka +Dead Space 3 +Eurofighter Typhoon +H1Z1: Just Survive +Fakie +Super Mario RPG +Dance Central 3 +Puppet +Cursor (user interface) +Prince of Persia: Warrior Within +Ultimate Mortal Kombat 3 +Macross +Upholstery +The Binding of Isaac (video game) +Deathstroke +The King of Fighters '98 +Dragon Ball Z: Battle of Z +Theatre organ +Valve Corporation +Age of Conan +GameStop +Unreal Tournament +Metroid Prime +Annie (musical) +Cinderella (Disney character) +Eric Cartman +The Prince of Tennis +Kia Sportage +Vase +Nightwing +Wing +Gouken +Loft +Ferris wheel +Newspaper +Cash +A Certain Magical Index +Pretty Rhythm +Marionette +Swing (seat) +He-Man +Cook (profession) +Bentley Continental GT +Shaman King +Hakuōki +Essential oil +Balalaika +Baja 1000 +Hummingbird +PSA HDi engine +Nissan Sentra +??? +Infamous (video game) +Game Boy Color +343 Industries +Six Flags Magic Mountain +Woozworld +It's a Small World +Star Fox 64 +Xenoblade Chronicles +TurboGrafx-16 +Tesla coil +HTC Evo 4G +Super Metroid +Label +Gothic (video game) +Samsung Galaxy Gear +??? +Viola caipira +Space Engineers +Yamaha MT-09 +Mortal Kombat: Armageddon +Angry Birds Star Wars +Aerography (arts) +Python (genus) +Hyundai Elantra +MG Cars +Tesla Model S +Castlevania: Symphony of the Night +Body armor +Bone +Tekken 5: Dark Resurrection +Kimchi +Wedding invitation +Porsche 930 +Whey protein +Winery +Honda Integra DC5 +Hatter (Alice's Adventures in Wonderland) +Double Dutch (jump rope) +Cort Guitars +One-man band +Dentures +Tupperware +The Lion King (musical) +BlackBerry Z10 +Kingdom Hearts III +Zipper +Leaf +Samsung Galaxy Note 10.1 +Bansuri +BMW 5 Series (F10) +Australian Shepherd +Crash Bandicoot: Warped +Pou (video game) +Tilapia +Peugeot 205 +AC Cobra +Tin whistle +Tooth brushing +Battlefield 1942 +Virginia Tech +Quarry +Amphibious ATV +Dome +Portable stove +Sound system (Jamaican) +Suikoden +Lunar eclipse +Tiramisu +Inazuma Eleven GO (video game) +Nissan 300ZX +Neverwinter (video game) +Axle +Altaïr Ibn-La'Ahad +Radiator +Resident Evil (2002 video game) +Prince of Persia: The Sands of Time +Crop circle +Rhinoceros +??? +Bookcase +Common quail +The Hunger Games +Mercedes-Benz A-Class +Sarah Walker (Chuck) +Cinnamon +Hiru TV +Bread roll +Magician (fantasy) +Lotion +Killzone 3 +Cadillac Escalade +Silhouette +Swan +Lemonade +Trabant +Mojito +Fossil +Macy's +Silk +Puma SE +Nissan Maxima +Battlefield 2142 +Twisted Metal +Olive oil +Wii Remote +Universal Studios Hollywood +Berserk (manga) +Wellington boot +Tomb Raider: Anniversary +Almond +Audi RS 6 +Ladder +Fire Emblem Awakening +Stained glass +Tape recorder +Emerald +Ford Fusion (Americas) +Iguana +Might and Magic +Pluto +Mazda Raceway Laguna Seca +Air Force 1 (shoe) +Pub +Oshun +Honda K engine +Nerd +Renault 5 +F1 2011 (video game) +Windscreen wiper +Lex Luthor +Track racing +Escalator +Charlie Brown +Chauffeur +Soba +Window film +Bowl +Alarm clock +Pokémon Mystery Dungeon: Explorers of Time and Explorers of Darkness +Roomba +Honda Shadow +Lightning Returns: Final Fantasy XIII +LATAM Brasil +Top +American Bulldog +Legoland +Caterpillar +Windows Phone 8 +Automated teller machine +Samsung Galaxy S III Mini +Portrait photography +Office +Para Para +Hockey stick +Singapore Airlines +Volvo S60 +Udon +Chevrolet K5 bazelr +Bath & Body Works +Segway PT +Castlevania: Lords of Shadow +Mario Kart: Double Dash +Mew (Pokémon) +Walkman +Mentos +Jilbāb +Canter and gallop +Cinderella +Skylanders: Trap Team +Lego Duplo +Morgan le Fay +Decal +Handycam +Women's Tennis Association +Yeti +Multi-valve +Pokémon Stadium +Matryoshka doll +Lexus LFA +Keirin +??? +Honda Prelude +Burrito +Midna +Shuriken +New Super Mario Bros. 2 +Nebula +BlackBerry PlayBook +Typography +Hare +Mohawk hairstyle +Onsen +Jet pack +Wagon +Just Dance 3 +Nissan S30 +Noah's Ark +Ronald McDonald +Bombardier Dash 8 +Raspberry +Hair dryer +The Simpsons: Hit & Run +Still life +Ice climbing +Lada Riva +Port +Compound bow +Resident Evil 3: Nemesis +R2-D2 +Sand animation +ABS-CBN (television network) +Leica Camera +Final Fantasy (video game) +Arkham Asylum +Dynasty Warriors 8 +Text messaging +Nursery (room) +Donkey Kong 64 +Star Wars Jedi Knight: Jedi Academy +Typing +Mapex Drums +Granado Espada +Calendar +UFC Undisputed 3 +Airbag +DMC World DJ Championships +Gingerbread +Rayman Origins +Lamborghini Reventón +Trials Fusion +Mafia (video game) +Paso Fino +??? +Sport kite +Taco Bell +Envelope +Mazdaspeed3 +Transformers: Generation 1 +Empanada +Mega Man 3 +Transformers: Fall of Cybertron +Rosalina (character) +Mosquito +Volkswagen Tiguan +Metal Gear Solid V: Ground Zeroes +Marmalade +Pandeiro +Miss Saigon +Yosemite National Park +Dutch Warmblood +Pre-flight safety demonstration +Citroën Saxo +Mack Trucks +Medley swimming +??? +Spindle (tool) +Greek cuisine +Hyundai Santa Fe +Chili con carne +Poster +Kawasaki Ninja 300 +Baby food +Grand Theft Auto (Game Boy Advance) +Sim racing +Chromebook +Peter Griffin +Stainless steel +Beverage can +Pixie cut +Chevrolet SS (concept car) +Chokehold +Bullion +Super Mario Kart +The Sims FreePlay +Giant Bicycles +Sgt. Frog +Age of Empires II +Abadá +Kingdom Hearts HD 1.5 Remix +Blackjack +Canon EOS 60D +Filling station +Plywood +Pheasant +Wilson Sporting Goods +Comb +Lighthouse +Rock and Roll Hall of Fame +Tōshirō Hitsugaya +Tales of the Abyss +Maze +Resident Evil: Operation Raccoon City +Cimbalom +??? +Monkey Island (series) +Civilization V +Venus +Peugeot 207 +The Amazing Spider-Man (2012 video game) +Chrono Cross +New Balance +Dassault Rafale +Daredevil (Marvel Comics character) +Silent Hill 2 +Beanie (seamed cap) +Nut (fruit) +Jill Valentine +Scion tC +Percy Jackson +Lord of the Dance (musical) +Far Cry (video game) +Star Wars: The Force Unleashed II +Memory card +Motorola Droid +Skylanders: Spyro's Adventure +Yamaha DT125 +Audi Q5 +Jaguar +Jaguar XJ +Animal Crossing: Wild World +Cockroach +Wetsuit +Funny Car +FarmVille +The Sims 3: Pets +Peel (fruit) +Melting +Aurora (Disney character) +Dry ice +Star Ocean +Duke Nukem Forever +Toribash +Yamaha YZ250 +Tekken 3 +Orihime Inoue +Spyro: Year of the Dragon +Eight-string guitar +Sonic Riders +Penny (The Big Bang Theory) +Honda XR series +Neodymium magnet toys +Leatherman +Maximum Destruction +Super Mario 64 DS +Unreal Tournament 3 +Health club +Chrysler Hemi engine +The North Face +CBS News +Pentium +Cannon +London Fashion Week +Military tactics +Smallmouth bass +Leopard gecko +Top (clothing) +Fable III +Panasonic Lumix DMC-GH4 +Sikorsky UH-60 Black Hawk +Blue Dragon +Loudspeaker enclosure +Ōkami +Tribal Wars +Hot chocolate +Beetroot +??? +Nokia N97 +Blue Exorcist +??? +Sonic and the Black Knight +Headscarf +Plasma display +Woody Woodpecker +??? +Beyblade: Shogun Steel +29er (bicycle) +QR code +Dyson (company) +Yanmar +Gladiator +Nissan Pathfinder +Nissan X-Trail +Autofocus +King Dedede +Zoo Tycoon 2 +Wheat tortilla +Team Rocket +Classical ballet +New York City Police Department +Heihachi Mishima +Crochet hook +Pencil case +Gods Eater Burst +??? +DS 3 +Periodic table +General Electric +Nissan Juke +Lollipop +Jaguar F-Type +MechWarrior Online +Dodge Neon SRT-4 +Fried egg +Revell +Indoor soccer +Gratin +Punisher +Washburn Guitars +Caster board +Eldar (Warhammer 40,000) +Final Fantasy Type-0 +NBA 2K10 +The Lord of the Rings: The Battle for Middle-earth II +Texas Longhorns +3D television +Scorpion +Warhammer 40,000: Dawn of War II +Burpee (exercise) +The Order: 1886 +Poptropica +Tomb Raider: Legend +Pelmeni +Bánh +PriPara +Legacy of Kain +Bowser Jr. +Yonex +Humanoid robot +Sony Ericsson Xperia X10 +Rain gutter +FIFA Street (2012 video game) +Castle Crashers +Meteoroid +Macaroni and cheese +Sega CD +Mac Mini +Tales of Xillia +Sonic Lost World +Orphanage +Siku Toys +Lego Batman 3: Beyond Gotham +Daenerys Targaryen +Orangutan +Town +Command & Conquer: Generals +Samurai Shodown +ZX Spectrum +Quake Live +Weighing scale +Dead Frontier +Wolfenstein: The New Order +Colin McRae: Dirt +Square dance +Assassin's Creed Rogue +Airboat +Uncharted: Drake's Fortune +Diddy Kong +Yamaha Motif +Theremin +Rilakkuma +Tie-dye +Flip-flops +Cylinder +Gothic 3 +Unreal (1998 video game) +Beyond: Two Souls +Umbrella +Dream Club +Gradius +Nexus One +Nokia N900 +Tamagotchi +Husband +Sleeping bag +Look-alike +Papaya +Mother 3 +The Beatles: Rock Band +Prince of Persia: The Two Thrones +??? +Darth Maul +Knife sharpening +Meteor shower +Flugelhorn +One Piece: Pirate Warriors +Asterix +Talk box +With Your Destiny +Alan Wake +Barcode +Recurve bow +Diaper bag +Ferrari F12berlinetta +Taskbar +Mortar (masonry) +Toner (skin care) +Freddy Krueger +Marriott International +Mass Effect (video game) +Hawkeye (comics) +Killing Floor (video game) +Chibiusa +Screenshot +Pear +Injury +Kia Sorento +Shredder (Teenage Mutant Ninja Turtles) +Lifeguard +Kei car +Fight Night Champion +Terra (comics) +Gamblerz diff --git a/mediapipe/graphs/youtube8m/local_video_model_inference.pbtxt b/mediapipe/graphs/youtube8m/local_video_model_inference.pbtxt new file mode 100644 index 000000000..3b598a534 --- /dev/null +++ b/mediapipe/graphs/youtube8m/local_video_model_inference.pbtxt @@ -0,0 +1,178 @@ +input_side_packet: "input_sequence_example_path" +input_side_packet: "input_video_path" +input_side_packet: "output_video_path" +input_side_packet: "segment_size" +input_side_packet: "overlap" + +node { + calculator: "LocalFileContentsCalculator" + input_side_packet: "FILE_PATH:input_sequence_example_path" + output_side_packet: "CONTENTS:input_sequence_example" +} + +node { + calculator: "StringToSequenceExampleCalculator" + input_side_packet: "STRING:input_sequence_example" + output_side_packet: "SEQUENCE_EXAMPLE:parsed_sequence_example" +} + +node { + calculator: "UnpackMediaSequenceCalculator" + input_side_packet: "SEQUENCE_EXAMPLE:parsed_sequence_example" + output_stream: "FLOAT_FEATURE_RGB:rgb_feature_vector" + output_stream: "FLOAT_FEATURE_AUDIO:audio_feature_vector" +} + +node { + calculator: "ConcatenateFloatVectorCalculator" + input_stream: "rgb_feature_vector" + input_stream: "audio_feature_vector" + output_stream: "feature_vector" +} + +node { + calculator: "VectorFloatToTensorCalculator" + input_stream: "feature_vector" + output_stream: "feature_tensor" +} + +node { + calculator: "StringToInt32Calculator" + input_side_packet: "segment_size" + output_side_packet: "segment_size_int" +} + +node { + calculator: "StringToInt32Calculator" + input_side_packet: "overlap" + output_side_packet: "overlap_int" +} + +node { + calculator: "LappedTensorBufferCalculator" + input_stream: "feature_tensor" + output_stream: "lapped_feature_tensor" + input_side_packet: "BUFFER_SIZE:segment_size_int" + input_side_packet: "OVERLAP:overlap_int" + node_options: { + [type.googleapis.com/mediapipe.LappedTensorBufferCalculatorOptions] { + add_batch_dim_to_tensors: true + } + } +} + +node { + calculator: "SidePacketToStreamCalculator" + input_side_packet: "segment_size_int" + output_stream: "AT_ZERO:segment_size_int_stream" +} + +node { + calculator: "VectorIntToTensorCalculator" + input_stream: "SINGLE_INT:segment_size_int_stream" + output_stream: "TENSOR_OUT:segment_size_tensor" +} + +node { + calculator: "PacketClonerCalculator" + input_stream: "segment_size_tensor" + input_stream: "lapped_feature_tensor" + output_stream: "synced_segment_size_tensor" +} + +node { + calculator: "TensorFlowSessionFromSavedModelCalculator" + output_side_packet: "SESSION:session" + node_options: { + [type.googleapis.com/mediapipe.TensorFlowSessionFromSavedModelCalculatorOptions]: { + saved_model_path: "/tmp/mediapipe/saved_model" + } + } +} + +node: { + calculator: "TensorFlowInferenceCalculator" + input_side_packet: "SESSION:session" + input_stream: "NUM_FRAMES:synced_segment_size_tensor" + input_stream: "RGB_AND_AUDIO:lapped_feature_tensor" + output_stream: "PREDICTIONS:prediction_tensor" + node_options: { + [type.googleapis.com/mediapipe.TensorFlowInferenceCalculatorOptions]: { + batch_size: 32 + } + } +} + +node { + calculator: "TensorToVectorFloatCalculator" + input_stream: "prediction_tensor" + output_stream: "prediction_vector" +} + +node { + calculator: "TopKScoresCalculator" + input_stream: "SCORES:prediction_vector" + output_stream: "TOP_K_INDEXES:top_k_indexes" + output_stream: "TOP_K_SCORES:top_k_scores" + output_stream: "TOP_K_LABELS:top_k_labels" + node_options: { + [type.googleapis.com/mediapipe.TopKScoresCalculatorOptions]: { + top_k: 3 + label_map_path: "mediapipe/graphs/youtube8m/label_map.txt" + } + } +} + +node { + calculator: "OpenCvVideoDecoderCalculator" + input_side_packet: "INPUT_FILE_PATH:input_video_path" + output_stream: "VIDEO:input_video" + output_stream: "VIDEO_PRESTREAM:input_video_header" +} + +node { + calculator: "LabelsToRenderDataCalculator" + input_stream: "LABELS:top_k_labels" + input_stream: "SCORES:top_k_scores" + input_stream: "VIDEO_PRESTREAM:input_video_header" + output_stream: "RENDER_DATA:render_data" + node_options: { + [type.googleapis.com/mediapipe.LabelsToRenderDataCalculatorOptions]: { + color { r: 255 g: 0 b: 0 } + color { r: 0 g: 255 b: 0 } + color { r: 0 g: 0 b: 255 } + thickness: 2.0 + font_height_px: 20 + max_num_labels: 3 + location: TOP_LEFT + } + } +} + +node { + calculator: "PacketClonerCalculator" + input_stream: "render_data" + input_stream: "input_video" + output_stream: "synchronized_render_data" +} + +node { + calculator: "AnnotationOverlayCalculator" + input_stream: "INPUT_FRAME:input_video" + input_stream: "synchronized_render_data" + output_stream: "OUTPUT_FRAME:output_video" +} + +node { + calculator: "OpenCvVideoEncoderCalculator" + input_stream: "VIDEO:output_video" + input_stream: "VIDEO_PRESTREAM:input_video_header" + input_side_packet: "OUTPUT_FILE_PATH:output_video_path" + node_options: { + [type.googleapis.com/mediapipe.OpenCvVideoEncoderCalculatorOptions]: { + codec: "avc1" + video_format: "mp4" + } + } +} + diff --git a/mediapipe/graphs/youtube8m/yt8m_dataset_model_inference.pbtxt b/mediapipe/graphs/youtube8m/yt8m_dataset_model_inference.pbtxt new file mode 100644 index 000000000..38a02570b --- /dev/null +++ b/mediapipe/graphs/youtube8m/yt8m_dataset_model_inference.pbtxt @@ -0,0 +1,139 @@ +input_side_packet: "desired_segment_size" +input_side_packet: "record_index" +input_side_packet: "tfrecord_path" +output_side_packet: "yt8m_id" +output_stream: "annotation_summary" + +node { + calculator: "StringToInt32Calculator" + input_side_packet: "record_index" + output_side_packet: "record_index_int" +} + +node { + calculator: "StringToInt32Calculator" + input_side_packet: "desired_segment_size" + output_side_packet: "desired_segment_size_int" +} + +node { + calculator: "TFRecordReaderCalculator" + input_side_packet: "TFRECORD_PATH:tfrecord_path" + input_side_packet: "RECORD_INDEX:record_index_int" + output_side_packet: "SEQUENCE_EXAMPLE:yt8m_sequence_example" +} + +node { + calculator: "UnpackYt8mSequenceExampleCalculator" + input_side_packet: "YT8M_SEQUENCE_EXAMPLE:yt8m_sequence_example" + input_side_packet: "DESIRED_SEGMENT_SIZE:desired_segment_size_int" + output_side_packet: "YT8M_ID:yt8m_id" + output_side_packet: "SEGMENT_SIZE:segment_size" + output_side_packet: "LAPPED_TENSOR_BUFFER_CALCULATOR_OPTIONS:lapped_tensor_buffer_calculator_options" + output_stream: "QUANTIZED_RGB_FEATURE:quantized_rgb_feature" + output_stream: "QUANTIZED_AUDIO_FEATURE:quantized_audio_feature" +} + +node { + calculator: "DequantizeByteArrayCalculator" + input_stream: "ENCODED:quantized_rgb_feature" + output_stream: "FLOAT_VECTOR:rgb_feature_vector" + node_options: { + [type.googleapis.com/mediapipe.DequantizeByteArrayCalculatorOptions]: { + max_quantized_value: 2 + min_quantized_value: -2 + } + } +} + +node { + calculator: "DequantizeByteArrayCalculator" + input_stream: "ENCODED:quantized_audio_feature" + output_stream: "FLOAT_VECTOR:audio_feature_vector" + node_options: { + [type.googleapis.com/mediapipe.DequantizeByteArrayCalculatorOptions]: { + max_quantized_value: 2 + min_quantized_value: -2 + } + } +} + +node { + calculator: "ConcatenateFloatVectorCalculator" + input_stream: "rgb_feature_vector" + input_stream: "audio_feature_vector" + output_stream: "feature_vector" +} + +node { + calculator: "VectorFloatToTensorCalculator" + input_stream: "feature_vector" + output_stream: "feature_tensor" +} + +node { + calculator: "LappedTensorBufferCalculator" + input_stream: "feature_tensor" + input_side_packet: "CALCULATOR_OPTIONS:lapped_tensor_buffer_calculator_options" + output_stream: "lapped_feature_tensor" +} + +node { + calculator: "SidePacketToStreamCalculator" + input_side_packet: "segment_size" + output_stream: "AT_ZERO:segment_size_int_stream" +} + +node { + calculator: "VectorIntToTensorCalculator" + input_stream: "SINGLE_INT:segment_size_int_stream" + output_stream: "TENSOR_OUT:segment_size_tensor" +} + +node { + calculator: "PacketClonerCalculator" + input_stream: "segment_size_tensor" + input_stream: "lapped_feature_tensor" + output_stream: "synced_segment_size_tensor" +} + +node { + calculator: "TensorFlowSessionFromSavedModelCalculator" + output_side_packet: "SESSION:session" + node_options: { + [type.googleapis.com/mediapipe.TensorFlowSessionFromSavedModelCalculatorOptions]: { + saved_model_path: "/tmp/mediapipe/saved_model" + } + } +} + +node: { + calculator: "TensorFlowInferenceCalculator" + input_side_packet: "SESSION:session" + input_stream: "NUM_FRAMES:synced_segment_size_tensor" + input_stream: "RGB_AND_AUDIO:lapped_feature_tensor" + output_stream: "PREDICTIONS:prediction_tensor" + node_options: { + [type.googleapis.com/mediapipe.TensorFlowInferenceCalculatorOptions]: { + batch_size: 32 + } + } +} + +node { + calculator: "TensorToVectorFloatCalculator" + input_stream: "prediction_tensor" + output_stream: "prediction_vector" +} + +node { + calculator: "TopKScoresCalculator" + input_stream: "SCORES:prediction_vector" + output_stream: "SUMMARY:annotation_summary" + node_options: { + [type.googleapis.com/mediapipe.TopKScoresCalculatorOptions]: { + top_k: 9 + label_map_path: "mediapipe/graphs/youtube8m/label_map.txt" + } + } +} diff --git a/mediapipe/java/com/google/mediapipe/BUILD b/mediapipe/java/com/google/mediapipe/BUILD new file mode 100644 index 000000000..82e2f52c2 --- /dev/null +++ b/mediapipe/java/com/google/mediapipe/BUILD @@ -0,0 +1,15 @@ +# Copyright 2019 The MediaPipe Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http:#www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +licenses(["notice"]) # Apache 2.0 diff --git a/mediapipe/java/com/google/mediapipe/components/BUILD b/mediapipe/java/com/google/mediapipe/components/BUILD index 80b65e3d4..7fd808387 100644 --- a/mediapipe/java/com/google/mediapipe/components/BUILD +++ b/mediapipe/java/com/google/mediapipe/components/BUILD @@ -68,3 +68,10 @@ android_library( "@com_google_guava_android//jar", ], ) + +# Expose the java source files for building mediapipe AAR. +filegroup( + name = "java_src", + srcs = glob(["*.java"]), + visibility = ["//mediapipe:__subpackages__"], +) diff --git a/mediapipe/java/com/google/mediapipe/components/ExternalTextureConverter.java b/mediapipe/java/com/google/mediapipe/components/ExternalTextureConverter.java index 122f598ea..0d34e23e3 100644 --- a/mediapipe/java/com/google/mediapipe/components/ExternalTextureConverter.java +++ b/mediapipe/java/com/google/mediapipe/components/ExternalTextureConverter.java @@ -150,6 +150,7 @@ public class ExternalTextureConverter implements TextureFrameProducer { private ExternalTextureRenderer renderer = null; private long timestampOffset = 0; private long previousTimestamp = 0; + private boolean previousTimestampValid = false; protected int destinationWidth = 0; protected int destinationHeight = 0; @@ -335,11 +336,12 @@ public class ExternalTextureConverter implements TextureFrameProducer { // ensures that surface texture has the up-to-date timestamp. (Also adjust |timestampOffset| // to ensure that timestamps increase monotonically.) long textureTimestamp = surfaceTexture.getTimestamp() / NANOS_PER_MICRO; - if (textureTimestamp + timestampOffset <= previousTimestamp) { + if (previousTimestampValid && textureTimestamp + timestampOffset <= previousTimestamp) { timestampOffset = previousTimestamp + 1 - textureTimestamp; } outputFrame.setTimestamp(textureTimestamp + timestampOffset); previousTimestamp = outputFrame.getTimestamp(); + previousTimestampValid = true; } private void waitUntilReleased(AppTextureFrame frame) { diff --git a/mediapipe/java/com/google/mediapipe/framework/BUILD b/mediapipe/java/com/google/mediapipe/framework/BUILD index e6ad76ed9..5e582ebff 100644 --- a/mediapipe/java/com/google/mediapipe/framework/BUILD +++ b/mediapipe/java/com/google/mediapipe/framework/BUILD @@ -82,3 +82,10 @@ android_library( "@com_google_guava_android//jar", ], ) + +# Expose the java source files for building mediapipe AAR. +filegroup( + name = "java_src", + srcs = glob(["*.java"]), + visibility = ["//mediapipe:__subpackages__"], +) diff --git a/mediapipe/java/com/google/mediapipe/glutil/BUILD b/mediapipe/java/com/google/mediapipe/glutil/BUILD index fc378b4eb..4ad0d16d9 100644 --- a/mediapipe/java/com/google/mediapipe/glutil/BUILD +++ b/mediapipe/java/com/google/mediapipe/glutil/BUILD @@ -30,3 +30,10 @@ android_library( "@com_google_guava_android//jar", ], ) + +# Expose the java source files for building mediapipe AAR. +filegroup( + name = "java_src", + srcs = glob(["**/*.java"]), + visibility = ["//mediapipe:__subpackages__"], +) diff --git a/mediapipe/java/com/google/mediapipe/mediapipe_aar.bzl b/mediapipe/java/com/google/mediapipe/mediapipe_aar.bzl new file mode 100644 index 000000000..eaf4612cf --- /dev/null +++ b/mediapipe/java/com/google/mediapipe/mediapipe_aar.bzl @@ -0,0 +1,157 @@ +"""Generate MediaPipe AAR including different variants of .so in jni folder. + +Usage: + +Create a new mediapipe_aar() target in a BUILD file. For example, +putting the following code into mediapipe/examples/android/aar_demo/BUILD. + +``` +load("//mediapipe/java/com/google/mediapipe:mediapipe_aar.bzl", "mediapipe_aar") + +mediapipe_aar( + name = "my_aar", + calculators = ["//mediapipe/calculators/core:pass_through_calculator"], +) +``` + +Then, run the following Bazel command to generate the AAR. + +``` +$ bazel build -c opt --fat_apk_cpu=arm64-v8a,armeabi-v7a mediapipe/examples/android/aar_demo:my_aar +``` + +Finally, import the AAR into Android Studio. + +""" + +load("@build_bazel_rules_android//android:rules.bzl", "android_binary", "android_library") + +def mediapipe_aar(name, calculators = []): + """Generate MediaPipe AAR. + + Args: + name: the name of the AAR. + calculators: the calculator libraries to be compiled into the .so. + """ + native.cc_binary( + name = "libmediapipe_jni.so", + linkshared = 1, + linkstatic = 1, + deps = [ + "//mediapipe/java/com/google/mediapipe/framework/jni:mediapipe_framework_jni", + ] + calculators, + ) + + native.cc_library( + name = name + "_mediapipe_jni_lib", + srcs = [":libmediapipe_jni.so"], + alwayslink = 1, + ) + + native.genrule( + name = name + "_aar_manifest_generator", + outs = ["AndroidManifest.xml"], + cmd = """ +cat > $(OUTS) < + + + + +""", + ) + + native.genrule( + name = name + "_calculator_proto_java_src_generator", + srcs = [ + "//mediapipe/framework:protos_src", + "@com_google_protobuf_javalite//:well_known_protos", + ], + outs = ["CalculatorProto.java"], + cmd = "$(location @com_google_protobuf_javalite//:protoc) " + + "--plugin=protoc-gen-javalite=$(location @com_google_protobuf_javalite//:protoc_gen_javalite) " + + "--proto_path=. --proto_path=$(GENDIR) " + + "--proto_path=$$(pwd)/external/com_google_protobuf_javalite/src " + + "--javalite_out=$$(dirname $(location CalculatorProto.java)) mediapipe/framework/calculator.proto && " + + "mv $$(dirname $(location CalculatorProto.java))/com/google/mediapipe/proto/CalculatorProto.java $$(dirname $(location CalculatorProto.java))", + tools = [ + "@com_google_protobuf_javalite//:protoc", + "@com_google_protobuf_javalite//:protoc_gen_javalite", + ], + ) + + android_library( + name = name + "_android_lib", + srcs = [ + "//mediapipe/java/com/google/mediapipe/components:java_src", + "//mediapipe/java/com/google/mediapipe/framework:java_src", + "//mediapipe/java/com/google/mediapipe/glutil:java_src", + "CalculatorProto.java", + ], + manifest = "AndroidManifest.xml", + proguard_specs = ["//mediapipe/java/com/google/mediapipe/framework:proguard.pgcfg"], + deps = [ + ":" + name + "_mediapipe_jni_lib", + "//mediapipe/framework:calculator_java_proto_lite", + "//mediapipe/framework:calculator_profile_java_proto_lite", + "//mediapipe/framework/tool:calculator_graph_template_java_proto_lite", + "//third_party:androidx_annotation", + "//third_party:androidx_appcompat", + "//third_party:androidx_core", + "//third_party:androidx_legacy_support_v4", + "//third_party:camerax_core", + "//third_party:camera2", + "@com_google_code_findbugs//jar", + "@com_google_common_flogger//jar", + "@com_google_common_flogger_system_backend//jar", + "@com_google_guava_android//jar", + "@androidx_lifecycle//jar", + ], + ) + + _aar_with_jni(name, name + "_android_lib") + +def _aar_with_jni(name, android_library): + # Generate dummy AndroidManifest.xml for dummy apk usage + # (dummy apk is generated by _dummy_app target below) + native.genrule( + name = name + "_binary_manifest_generator", + outs = [name + "_generated_AndroidManifest.xml"], + cmd = """ +cat > $(OUTS) < + + +EOF +""", + ) + + # Generate dummy apk including .so files. + # We extract out .so files and throw away the apk. + android_binary( + name = name + "_dummy_app", + manifest = name + "_generated_AndroidManifest.xml", + custom_package = "dummy.package.for.so", + deps = [android_library], + ) + + native.genrule( + name = name, + srcs = [android_library + ".aar", name + "_dummy_app_unsigned.apk"], + outs = [name + ".aar"], + tags = ["manual"], + cmd = """ +cp $(location {}.aar) $(location :{}.aar) +chmod +w $(location :{}.aar) +origdir=$$PWD +cd $$(mktemp -d) +unzip $$origdir/$(location :{}_dummy_app_unsigned.apk) "lib/*" +cp -r lib jni +zip -r $$origdir/$(location :{}.aar) jni/*/*.so +""".format(android_library, name, name, name, name), + ) diff --git a/mediapipe/util/sequence/README.md b/mediapipe/util/sequence/README.md index 244ba82ed..18b795618 100644 --- a/mediapipe/util/sequence/README.md +++ b/mediapipe/util/sequence/README.md @@ -466,6 +466,7 @@ tasks and tracking (or class) fields for tracking information. |-----|------|------------------------|-------------| |`CLASS_SEGMENTATION/image/encoded`|feature list bytes|`add_class_segmentation_encoded` / `AddClassSegmentationEncoded`|The encoded image of class labels at each timestep.| |`CLASS_SEGMENTATION/image/timestamp`|feature list int|`add_class_segmentation_timestamp` / `AddClassSegmentationTimestamp`|The timestamp in microseconds for the class labels.| +|`CLASS_SEGMENTATION/image/multi_encoded`|feature list bytes list|`add_class_segmentation_multi_encoded` / `AddClassSegmentationMultiEncoded`|Storing multiple segmentation masks in case they overlap.| |`CLASS_SEGMENTATION/image/format`|context bytes|`set_class_segmentation_format` / `SetClassSegmentationFormat`|The encoding format of the class label images.| |`CLASS_SEGMENTATION/image/height`|context int|`set_class_segmentation_height` / `SetClassSegmentationHeight`|The height of the image in pixels.| |`CLASS_SEGMENTATION/image/width`|context int|`set_class_segmentation_width` / `SetClassSegmentationWidth`|The width of the image in pixels.| @@ -477,6 +478,7 @@ tasks and tracking (or class) fields for tracking information. |-----|------|------------------------|-------------| |`INSTANCE_SEGMENTATION/image/ encoded`|feature list bytes|`add_instance_segmentation_encoded` / `AddInstanceSegmentationEncoded`|The encoded image of object instance labels at each timestep.| |`INSTANCE_SEGMENTATION/image/ timestamp`|feature list int|`add_instance_segmentation_timestamp` / `AddInstanceSegmentationTimestamp`|The timestamp in microseconds for the object instance labels.| +|`INSTANCE_SEGMENTATION/image/multi_encoded`|feature list bytes list|`add_instance_segmentation_multi_encoded` / `AddInstanceSegmentationEncoded`|Storing multiple segmentation masks in case they overlap.| |`INSTANCE_SEGMENTATION/image/ format`|context bytes|`set_instance_segmentation_format` / `SetInstanceSegmentationFormat`|The encoding format of the object instance labels.| |`INSTANCE_SEGMENTATION/image/ height`|context int|`set_instance_segmentation_height` / `SetInstanceSegmentationHeight`|The height of the image in pixels.| |`INSTANCE_SEGMENTATION/image/ width`|context int|`set_instance_segmentation_width` / `SetInstanceSegmentationWidth`|The width of the image in pixels.| diff --git a/mediapipe/util/sequence/media_sequence.py b/mediapipe/util/sequence/media_sequence.py index 3191cffef..fc1f15d32 100644 --- a/mediapipe/util/sequence/media_sequence.py +++ b/mediapipe/util/sequence/media_sequence.py @@ -489,7 +489,9 @@ def _create_image_with_prefix(name, prefix): prefix=prefix, module_dict=globals()) msu.create_int_feature_list(name + "_timestamp", IMAGE_TIMESTAMP_KEY, prefix=prefix, module_dict=globals()) - + msu.create_bytes_list_feature_list(name + "_multi_encoded", + IMAGE_MULTI_ENCODED_KEY, prefix=prefix, + module_dict=globals()) FORWARD_FLOW_PREFIX = "FORWARD_FLOW" CLASS_SEGMENTATION_PREFIX = "CLASS_SEGMENTATION" INSTANCE_SEGMENTATION_PREFIX = "INSTANCE_SEGMENTATION" diff --git a/mediapipe/util/sequence/media_sequence_test.py b/mediapipe/util/sequence/media_sequence_test.py index 6c4846c4b..3a634c486 100644 --- a/mediapipe/util/sequence/media_sequence_test.py +++ b/mediapipe/util/sequence/media_sequence_test.py @@ -78,8 +78,10 @@ class MediaSequenceTest(tf.test.TestCase): ms.set_bbox_parts((b"HEAD", b"TOE"), example) # feature lists ms.add_image_encoded(b"test", example) + ms.add_image_multi_encoded([b"test", b"test"], example) ms.add_image_timestamp(47, example) ms.add_forward_flow_encoded(b"test", example) + ms.add_forward_flow_multi_encoded([b"test", b"test"], example) ms.add_forward_flow_timestamp(47, example) ms.add_bbox_ymin((0.47, 0.49), example) ms.add_bbox_xmin((0.47, 0.49), example) @@ -109,7 +111,9 @@ class MediaSequenceTest(tf.test.TestCase): ms.add_predicted_bbox_class_string((b"test", b"strings"), example) ms.add_predicted_bbox_timestamp(47, example) ms.add_class_segmentation_encoded(b"test", example) + ms.add_class_segmentation_multi_encoded([b"test", b"test"], example) ms.add_instance_segmentation_encoded(b"test", example) + ms.add_instance_segmentation_multi_encoded([b"test", b"test"], example) ms.add_class_segmentation_timestamp(47, example) ms.set_bbox_embedding_dimensions_per_region((47, 49), example) ms.set_bbox_embedding_format(b"test", example)