diff --git a/mediapipe/calculators/audio/spectrogram_calculator.cc b/mediapipe/calculators/audio/spectrogram_calculator.cc index 5f7f20c06..56a6338f9 100644 --- a/mediapipe/calculators/audio/spectrogram_calculator.cc +++ b/mediapipe/calculators/audio/spectrogram_calculator.cc @@ -113,8 +113,15 @@ class SpectrogramCalculator : public CalculatorBase { ::mediapipe::Status Close(CalculatorContext* cc) override; private: - Timestamp CurrentOutputTimestamp() { - // Current output timestamp is the *center* of the next frame to be + Timestamp CurrentOutputTimestamp(CalculatorContext* cc) { + if (use_local_timestamp_) { + return cc->InputTimestamp(); + } + return CumulativeOutputTimestamp(); + } + + Timestamp CumulativeOutputTimestamp() { + // Cumulative output timestamp is the *center* of the next frame to be // emitted, hence delayed by half a window duration compared to relevant // input timestamp. return initial_input_timestamp_ + @@ -141,6 +148,7 @@ class SpectrogramCalculator : public CalculatorBase { const OutputMatrixType postprocess_output_fn(const OutputMatrixType&), CalculatorContext* cc); + bool use_local_timestamp_; double input_sample_rate_; bool pad_final_packet_; int frame_duration_samples_; @@ -173,6 +181,8 @@ const float SpectrogramCalculator::kLnPowerToDb = 4.342944819032518; SpectrogramCalculatorOptions spectrogram_options = cc->Options(); + use_local_timestamp_ = spectrogram_options.use_local_timestamp(); + if (spectrogram_options.frame_duration_seconds() <= 0.0) { ::mediapipe::InvalidArgumentErrorBuilder(MEDIAPIPE_LOC) << "Invalid or missing frame_duration_seconds.\n" @@ -351,11 +361,11 @@ template << "Inconsistent number of spectrogram channels."; if (allow_multichannel_input_) { cc->Outputs().Index(0).Add(spectrogram_matrices.release(), - CurrentOutputTimestamp()); + CurrentOutputTimestamp(cc)); } else { cc->Outputs().Index(0).Add( new OutputMatrixType(spectrogram_matrices->at(0)), - CurrentOutputTimestamp()); + CurrentOutputTimestamp(cc)); } cumulative_completed_frames_ += output_vectors.size(); } diff --git a/mediapipe/calculators/audio/spectrogram_calculator.proto b/mediapipe/calculators/audio/spectrogram_calculator.proto index af3ad9d19..b721117d4 100644 --- a/mediapipe/calculators/audio/spectrogram_calculator.proto +++ b/mediapipe/calculators/audio/spectrogram_calculator.proto @@ -66,4 +66,11 @@ message SpectrogramCalculatorOptions { // uniformly regardless of output type (i.e., even dBs are multiplied, not // offset). optional double output_scale = 7 [default = 1.0]; + + // If use_local_timestamp is true, the output packet's timestamp is based on + // the last sample of the packet and it's inferred from the latest input + // packet's timestamp. If false, the output packet's timestamp is based on + // the cumulative timestamping, which is inferred from the intial input + // timestamp and the cumulative number of samples. + optional bool use_local_timestamp = 8 [default = false]; } diff --git a/mediapipe/calculators/core/BUILD b/mediapipe/calculators/core/BUILD index 4abb4c4b2..6324cce1a 100644 --- a/mediapipe/calculators/core/BUILD +++ b/mediapipe/calculators/core/BUILD @@ -761,3 +761,29 @@ cc_test( "//mediapipe/framework/port:status", ], ) + +cc_library( + name = "stream_to_side_packet_calculator", + srcs = ["stream_to_side_packet_calculator.cc"], + visibility = ["//visibility:public"], + deps = [ + "//mediapipe/framework:calculator_framework", + "//mediapipe/framework:timestamp", + "//mediapipe/framework/port:status", + ], + alwayslink = 1, +) + +cc_test( + name = "stream_to_side_packet_calculator_test", + srcs = ["stream_to_side_packet_calculator_test.cc"], + deps = [ + ":stream_to_side_packet_calculator", + "//mediapipe/framework:calculator_runner", + "//mediapipe/framework:packet", + "//mediapipe/framework:timestamp", + "//mediapipe/framework/port:gtest_main", + "//mediapipe/framework/port:status", + "@com_google_absl//absl/memory", + ], +) diff --git a/mediapipe/calculators/core/stream_to_side_packet_calculator.cc b/mediapipe/calculators/core/stream_to_side_packet_calculator.cc new file mode 100644 index 000000000..07bb8c852 --- /dev/null +++ b/mediapipe/calculators/core/stream_to_side_packet_calculator.cc @@ -0,0 +1,48 @@ +// Copyright 2019 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "mediapipe/framework/calculator_framework.h" +#include "mediapipe/framework/port/status.h" +#include "mediapipe/framework/timestamp.h" + +namespace mediapipe { + +// A calculator that takes a packet of an input stream and converts it to an +// output side packet. This calculator only works under the assumption that the +// input stream only has a single packet passing through. +// +// Example config: +// node { +// calculator: "StreamToSidePacketCalculator" +// input_stream: "stream" +// output_side_packet: "side_packet" +// } +class StreamToSidePacketCalculator : public mediapipe::CalculatorBase { + public: + static mediapipe::Status GetContract(mediapipe::CalculatorContract* cc) { + cc->Inputs().Index(0).SetAny(); + cc->OutputSidePackets().Index(0).SetAny(); + return mediapipe::OkStatus(); + } + + mediapipe::Status Process(mediapipe::CalculatorContext* cc) override { + mediapipe::Packet& packet = cc->Inputs().Index(0).Value(); + cc->OutputSidePackets().Index(0).Set( + packet.At(mediapipe::Timestamp::Unset())); + return mediapipe::OkStatus(); + } +}; +REGISTER_CALCULATOR(StreamToSidePacketCalculator); + +} // namespace mediapipe diff --git a/mediapipe/calculators/core/stream_to_side_packet_calculator_test.cc b/mediapipe/calculators/core/stream_to_side_packet_calculator_test.cc new file mode 100644 index 000000000..12f417c58 --- /dev/null +++ b/mediapipe/calculators/core/stream_to_side_packet_calculator_test.cc @@ -0,0 +1,67 @@ +// Copyright 2019 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include + +#include "absl/memory/memory.h" +#include "mediapipe/framework/calculator_runner.h" +#include "mediapipe/framework/packet.h" +#include "mediapipe/framework/port/gmock.h" +#include "mediapipe/framework/port/gtest.h" +#include "mediapipe/framework/port/status.h" +#include "mediapipe/framework/port/status_matchers.h" +#include "mediapipe/framework/timestamp.h" + +namespace mediapipe { + +using ::testing::Test; + +class StreamToSidePacketCalculatorTest : public Test { + protected: + StreamToSidePacketCalculatorTest() { + const char kConfig[] = R"( + calculator: "StreamToSidePacketCalculator" + input_stream: "stream" + output_side_packet: "side_packet" + )"; + runner_ = absl::make_unique(kConfig); + } + + std::unique_ptr runner_; +}; + +TEST_F(StreamToSidePacketCalculatorTest, + StreamToSidePacketCalculatorWithEmptyStreamFails) { + EXPECT_EQ(runner_->Run().code(), mediapipe::StatusCode::kUnavailable); +} + +TEST_F(StreamToSidePacketCalculatorTest, + StreamToSidePacketCalculatorWithSinglePacketCreatesSidePacket) { + runner_->MutableInputs()->Index(0).packets.push_back( + Adopt(new std::string("test")).At(Timestamp(1))); + MP_ASSERT_OK(runner_->Run()); + EXPECT_EQ(runner_->OutputSidePackets().Index(0).Get(), "test"); +} + +TEST_F(StreamToSidePacketCalculatorTest, + StreamToSidePacketCalculatorWithMultiplePacketsFails) { + runner_->MutableInputs()->Index(0).packets.push_back( + Adopt(new std::string("test1")).At(Timestamp(1))); + runner_->MutableInputs()->Index(0).packets.push_back( + Adopt(new std::string("test2")).At(Timestamp(2))); + EXPECT_EQ(runner_->Run().code(), mediapipe::StatusCode::kAlreadyExists); +} + +} // namespace mediapipe diff --git a/mediapipe/docs/android_archive_library.md b/mediapipe/docs/android_archive_library.md index 8c7c42b91..5351e58f9 100644 --- a/mediapipe/docs/android_archive_library.md +++ b/mediapipe/docs/android_archive_library.md @@ -117,6 +117,7 @@ project. implementation 'com.google.code.findbugs:jsr305:3.0.2' implementation 'com.google.guava:guava:27.0.1-android' implementation 'com.google.guava:guava:27.0.1-android' + implementation 'com.google.protobuf:protobuf-lite:3.0.0' // CameraX core library def camerax_version = "1.0.0-alpha06" implementation "androidx.camera:camera-core:$camerax_version" diff --git a/mediapipe/docs/install.md b/mediapipe/docs/install.md index 825f5f831..fb3803cfa 100644 --- a/mediapipe/docs/install.md +++ b/mediapipe/docs/install.md @@ -579,6 +579,11 @@ export ANDROID_HOME= export ANDROID_NDK_HOME= ``` +In order to use MediaPipe on earlier Android versions, MediaPipe needs to switch +to a lower Android API level. You can achieve this by specifying `api_level = +` in android_ndk_repository() and/or android_sdk_repository() +in the [`WORKSPACE`] file. + Please verify all the necessary packages are installed. * Android SDK Platform API Level 28 or 29 diff --git a/mediapipe/docs/youtube_8m.md b/mediapipe/docs/youtube_8m.md index 65346a6d3..045c05845 100644 --- a/mediapipe/docs/youtube_8m.md +++ b/mediapipe/docs/youtube_8m.md @@ -64,7 +64,7 @@ videos. 4. Generate a MediaSequence metadata from the input video. - Note: the output file is /tmp/mediapipe/metadata.tfrecord + Note: the output file is /tmp/mediapipe/metadata.pb ```bash # change clip_end_time_sec to match the length of your video. @@ -82,8 +82,17 @@ videos. GLOG_logtostderr=1 bazel-bin/mediapipe/examples/desktop/youtube8m/extract_yt8m_features \ --calculator_graph_config_file=mediapipe/graphs/youtube8m/feature_extraction.pbtxt \ - --input_side_packets=input_sequence_example=/tmp/mediapipe/metadata.tfrecord \ - --output_side_packets=output_sequence_example=/tmp/mediapipe/output.tfrecord + --input_side_packets=input_sequence_example=/tmp/mediapipe/metadata.pb \ + --output_side_packets=output_sequence_example=/tmp/mediapipe/features.pb + ``` + +6. [Optional] Read the features.pb in Python. + + ``` + import tensorflow as tf + + sequence_example = open('/tmp/mediapipe/features.pb', 'rb').read() + print(tf.train.SequenceExample.FromString(sequence_example)) ``` ## Model Inference for YouTube-8M Challenge @@ -136,7 +145,7 @@ the inference for both local videos and the dataset ### Steps to run the YouTube-8M model inference graph with a local video -1. Make sure you have the output tfrecord from the feature extraction pipeline. +1. Make sure you have the features.pb from the feature extraction pipeline. 2. Copy the baseline model [(model card)](https://drive.google.com/file/d/1xTCi9-Nm9dt2KIk8WR0dDFrIssWawyXy/view) @@ -158,7 +167,7 @@ the inference for both local videos and the dataset # overlap is the number of seconds adjacent segments share. GLOG_logtostderr=1 bazel-bin/mediapipe/examples/desktop/youtube8m/model_inference \ --calculator_graph_config_file=mediapipe/graphs/youtube8m/local_video_model_inference.pbtxt \ - --input_side_packets=input_sequence_example_path=/tmp/mediapipe/output.tfrecord,input_video_path=/absolute/path/to/the/local/video/file,output_video_path=/tmp/mediapipe/annotated_video.mp4,segment_size=5,overlap=4 + --input_side_packets=input_sequence_example_path=/tmp/mediapipe/features.pb,input_video_path=/absolute/path/to/the/local/video/file,output_video_path=/tmp/mediapipe/annotated_video.mp4,segment_size=5,overlap=4 ``` 4. View the annotated video. diff --git a/mediapipe/examples/desktop/youtube8m/README.md b/mediapipe/examples/desktop/youtube8m/README.md index a5baaed65..6668c0612 100644 --- a/mediapipe/examples/desktop/youtube8m/README.md +++ b/mediapipe/examples/desktop/youtube8m/README.md @@ -33,7 +33,7 @@ 4. Generate a MediaSequence metadata from the input video. - Note: the output file is /tmp/mediapipe/metadata.tfrecord + Note: the output file is /tmp/mediapipe/metadata.pb ```bash # change clip_end_time_sec to match the length of your video. @@ -51,8 +51,17 @@ GLOG_logtostderr=1 bazel-bin/mediapipe/examples/desktop/youtube8m/extract_yt8m_features \ --calculator_graph_config_file=mediapipe/graphs/youtube8m/feature_extraction.pbtxt \ - --input_side_packets=input_sequence_example=/tmp/mediapipe/metadata.tfrecord \ - --output_side_packets=output_sequence_example=/tmp/mediapipe/output.tfrecord + --input_side_packets=input_sequence_example=/tmp/mediapipe/metadata.pb \ + --output_side_packets=output_sequence_example=/tmp/mediapipe/features.pb + ``` + +6. [Optional] Read the features.pb in Python. + + ``` + import tensorflow as tf + + sequence_example = open('/tmp/mediapipe/features.pb', 'rb').read() + print(tf.train.SequenceExample.FromString(sequence_example)) ``` ### Steps to run the YouTube-8M inference graph with the YT8M dataset @@ -118,7 +127,7 @@ ### Steps to run the YouTube-8M model inference graph with a local video -1. Make sure you have the output tfrecord from the feature extraction pipeline. +1. Make sure you have the features.pb from the feature extraction pipeline. 2. Copy the baseline model [(model card)](https://drive.google.com/file/d/1xTCi9-Nm9dt2KIk8WR0dDFrIssWawyXy/view) to local. @@ -138,7 +147,7 @@ # overlap is the number of seconds adjacent segments share. GLOG_logtostderr=1 bazel-bin/mediapipe/examples/desktop/youtube8m/model_inference \ --calculator_graph_config_file=mediapipe/graphs/youtube8m/local_video_model_inference.pbtxt \ - --input_side_packets=input_sequence_example_path=/tmp/mediapipe/output.tfrecord,input_video_path=/absolute/path/to/the/local/video/file,output_video_path=/tmp/mediapipe/annotated_video.mp4,segment_size=5,overlap=4 + --input_side_packets=input_sequence_example_path=/tmp/mediapipe/features.pb,input_video_path=/absolute/path/to/the/local/video/file,output_video_path=/tmp/mediapipe/annotated_video.mp4,segment_size=5,overlap=4 ``` 4. View the annotated video. diff --git a/mediapipe/examples/desktop/youtube8m/generate_input_sequence_example.py b/mediapipe/examples/desktop/youtube8m/generate_input_sequence_example.py index 7438a5134..3a6b98181 100644 --- a/mediapipe/examples/desktop/youtube8m/generate_input_sequence_example.py +++ b/mediapipe/examples/desktop/youtube8m/generate_input_sequence_example.py @@ -53,7 +53,7 @@ def main(argv): flags.FLAGS.clip_start_time_sec * SECONDS_TO_MICROSECONDS, metadata) ms.set_clip_end_timestamp( flags.FLAGS.clip_end_time_sec * SECONDS_TO_MICROSECONDS, metadata) - with open('/tmp/mediapipe/metadata.tfrecord', 'wb') as writer: + with open('/tmp/mediapipe/metadata.pb', 'wb') as writer: writer.write(metadata.SerializeToString())