diff --git a/README.md b/README.md index 420eea905..b458d5a4f 100644 --- a/README.md +++ b/README.md @@ -50,6 +50,10 @@ Object Detection [MediaSequence](https://google.github.io/mediapipe/solutions/media_sequence) | | | ✅ | | | [YouTube 8M](https://google.github.io/mediapipe/solutions/youtube_8m) | | | ✅ | | | +See also +[MediaPipe Models and Model Cards](https://google.github.io/mediapipe/solutions/models) +for ML models released in MediaPipe. + ## MediaPipe on the Web MediaPipe on the Web is an effort to run the same ML solutions built for mobile @@ -89,7 +93,8 @@ run code search using ## Publications -* [Face AR with MediaPipe Face Mesh](https://mediapipe.page.link/face-geometry-blog) in Google Developers Blog +* [MediaPipe 3D Face Transform](https://mediapipe.page.link/face-geometry-blog) + in Google Developers Blog * [Instant Motion Tracking With MediaPipe](https://developers.googleblog.com/2020/08/instant-motion-tracking-with-mediapipe.html) in Google Developers Blog * [BlazePose - On-device Real-time Body Pose Tracking](https://ai.googleblog.com/2020/08/on-device-real-time-body-pose-tracking.html) diff --git a/docs/index.md b/docs/index.md index f4d743b11..06fc76dd5 100644 --- a/docs/index.md +++ b/docs/index.md @@ -50,6 +50,10 @@ Object Detection [MediaSequence](https://google.github.io/mediapipe/solutions/media_sequence) | | | ✅ | | | [YouTube 8M](https://google.github.io/mediapipe/solutions/youtube_8m) | | | ✅ | | | +See also +[MediaPipe Models and Model Cards](https://google.github.io/mediapipe/solutions/models) +for ML models released in MediaPipe. + ## MediaPipe on the Web MediaPipe on the Web is an effort to run the same ML solutions built for mobile @@ -89,7 +93,8 @@ run code search using ## Publications -* [Face AR with MediaPipe Face Mesh](https://mediapipe.page.link/face-geometry-blog) in Google Developers Blog +* [MediaPipe 3D Face Transform](https://mediapipe.page.link/face-geometry-blog) + in Google Developers Blog * [Instant Motion Tracking With MediaPipe](https://developers.googleblog.com/2020/08/instant-motion-tracking-with-mediapipe.html) in Google Developers Blog * [BlazePose - On-device Real-time Body Pose Tracking](https://ai.googleblog.com/2020/08/on-device-real-time-body-pose-tracking.html) diff --git a/docs/solutions/face_mesh.md b/docs/solutions/face_mesh.md index ed187ba09..76726c208 100644 --- a/docs/solutions/face_mesh.md +++ b/docs/solutions/face_mesh.md @@ -277,7 +277,7 @@ only works for a single face. For visual reference, please refer to *Fig. 4*. * TensorFlow Blog: [Face and hand tracking in the browser with MediaPipe and TensorFlow.js](https://blog.tensorflow.org/2020/03/face-and-hand-tracking-in-browser-with-mediapipe-and-tensorflowjs.html) * Google Developers Blog: - [Face AR with MediaPipe Face Mesh](https://mediapipe.page.link/face-geometry-blog) + [MediaPipe 3D Face Transform](https://mediapipe.page.link/face-geometry-blog) * Paper: [Real-time Facial Surface Geometry from Monocular Video on Mobile GPUs](https://arxiv.org/abs/1907.06724) ([poster](https://docs.google.com/presentation/d/1-LWwOMO9TzEVdrZ1CS1ndJzciRHfYDJfbSxH_ke_JRg/present?slide=id.g5986dd4b4c_4_212)) diff --git a/docs/solutions/models.md b/docs/solutions/models.md index ac105f733..2ba18a750 100644 --- a/docs/solutions/models.md +++ b/docs/solutions/models.md @@ -5,7 +5,7 @@ parent: Solutions nav_order: 30 --- -# Models and Model Cards +# MediaPipe Models and Model Cards {: .no_toc } 1. TOC diff --git a/docs/solutions/solutions.md b/docs/solutions/solutions.md index 6a852b751..66e64ef75 100644 --- a/docs/solutions/solutions.md +++ b/docs/solutions/solutions.md @@ -32,3 +32,7 @@ has_toc: false [AutoFlip](https://google.github.io/mediapipe/solutions/autoflip) | | | ✅ | | | [MediaSequence](https://google.github.io/mediapipe/solutions/media_sequence) | | | ✅ | | | [YouTube 8M](https://google.github.io/mediapipe/solutions/youtube_8m) | | | ✅ | | | + +See also +[MediaPipe Models and Model Cards](https://google.github.io/mediapipe/solutions/models) +for ML models released in MediaPipe. diff --git a/mediapipe/calculators/tensorflow/lapped_tensor_buffer_calculator.cc b/mediapipe/calculators/tensorflow/lapped_tensor_buffer_calculator.cc index 36c3da7e7..68e58ebac 100644 --- a/mediapipe/calculators/tensorflow/lapped_tensor_buffer_calculator.cc +++ b/mediapipe/calculators/tensorflow/lapped_tensor_buffer_calculator.cc @@ -42,16 +42,16 @@ namespace tf = tensorflow; // a flag controls whether a new first dimension is inserted before // concatenation. // -// Currently, the number of tensors output will be buffer_size less than the -// number of input tensors because no padding is implemented and only full -// buffers are output. +// The number of tensors output will be buffer_size less than the +// number of input tensors unless padding is set to a non-zero value in the +// options proto. // // The timestamp of the output batch will match the timestamp of the first // tensor in that batch by default. (e.g. when buffer_size frames are added, the // output tensor will have the timestamp of the first input.). This behavior can // be adjusted by the timestamp_offset option. // -// Example config: +// Example config without padding: // node { // calculator: "LappedTensorBufferCalculator" // input_stream: "input_tensor" @@ -64,26 +64,50 @@ namespace tf = tensorflow; // } // } // } +// +// Example config with padding and timestamp output: +// node { +// calculator: "LappedTensorBufferCalculator" +// input_stream: "input_tensor" +// output_stream: "output_tensor" +// output_stream: "output_timestamp" +// options { +// [mediapipe.LappedTensorBufferCalculatorOptions.ext] { +// buffer_size: 100 +// overlap: 50 +// add_batch_dim_to_tensors: true +// timestamp_offset: 25 +// padding: 25 +// } +// } +// } + class LappedTensorBufferCalculator : public CalculatorBase { public: static ::mediapipe::Status GetContract(CalculatorContract* cc); ::mediapipe::Status Open(CalculatorContext* cc) override; ::mediapipe::Status Process(CalculatorContext* cc) override; + ::mediapipe::Status Close(CalculatorContext* cc) override; private: - // Adds a batch dimension to the input tensor if specified in the calculator - // options. + // Adds a batch dimension to the input tensor if specified in the + // calculator options. ::mediapipe::Status AddBatchDimension(tf::Tensor* input_tensor); + // Sends the current buffer downstream. + ::mediapipe::Status ProcessBuffer(CalculatorContext* cc); int steps_until_output_; int buffer_size_; int overlap_; int timestamp_offset_; + int initialized_; + std::unique_ptr> timestamp_buffer_; std::unique_ptr> buffer_; LappedTensorBufferCalculatorOptions options_; }; + REGISTER_CALCULATOR(LappedTensorBufferCalculator); ::mediapipe::Status LappedTensorBufferCalculator::GetContract( @@ -93,8 +117,8 @@ REGISTER_CALCULATOR(LappedTensorBufferCalculator); cc->Inputs().Index(0).Set( // tensorflow::Tensor stream. ); - RET_CHECK_EQ(cc->Outputs().NumEntries(), 1) - << "Only one output stream is supported."; + RET_CHECK_LE(cc->Outputs().NumEntries(), 2) + << "Only one or two output stream(s) is/are supported."; if (cc->InputSidePackets().HasTag(kBufferSize)) { cc->InputSidePackets().Tag(kBufferSize).Set(); @@ -108,11 +132,15 @@ REGISTER_CALCULATOR(LappedTensorBufferCalculator); if (cc->InputSidePackets().HasTag(kCalculatorOptions)) { cc->InputSidePackets() .Tag(kCalculatorOptions) - .Set(); + .Set(); } cc->Outputs().Index(0).Set( // Output tensorflow::Tensor stream with possibly overlapping steps. ); + // Output timestamp stream with possibly overlapping steps. + if (cc->Outputs().NumEntries() > 1) { + cc->Outputs().Index(1).Set>(); + } return ::mediapipe::OkStatus(); } @@ -141,10 +169,13 @@ REGISTER_CALCULATOR(LappedTensorBufferCalculator); << "Negative timestamp_offset is not allowed."; RET_CHECK_LT(timestamp_offset_, buffer_size_) << "output_frame_num_offset has to be less than buffer_size."; + RET_CHECK_LT(options_.padding(), buffer_size_) + << "padding option must be smaller than buffer size."; timestamp_buffer_ = absl::make_unique>(buffer_size_); buffer_ = absl::make_unique>(buffer_size_); - steps_until_output_ = buffer_size_; + steps_until_output_ = buffer_size_ - options_.padding(); + initialized_ = false; return ::mediapipe::OkStatus(); } @@ -156,23 +187,36 @@ REGISTER_CALCULATOR(LappedTensorBufferCalculator); if (options_.add_batch_dim_to_tensors()) { RET_CHECK_OK(AddBatchDimension(&input_tensor)); } + // Pad frames at the beginning with the first frame. + if (!initialized_) { + for (int i = 0; i < options_.padding(); ++i) { + buffer_->push_back(input_tensor); + timestamp_buffer_->push_back(cc->InputTimestamp()); + } + initialized_ = true; + } buffer_->push_back(input_tensor); timestamp_buffer_->push_back(cc->InputTimestamp()); --steps_until_output_; - if (steps_until_output_ <= 0) { - auto concatenated = ::absl::make_unique(); - - const tf::Status concat_status = tf::tensor::Concat( - std::vector(buffer_->begin(), buffer_->end()), - concatenated.get()); - RET_CHECK(concat_status.ok()) << concat_status.ToString(); - - cc->Outputs().Index(0).Add(concatenated.release(), - timestamp_buffer_->Get(timestamp_offset_)); - - steps_until_output_ = buffer_size_ - overlap_; + MP_RETURN_IF_ERROR(ProcessBuffer(cc)); } + + return ::mediapipe::OkStatus(); +} + +::mediapipe::Status LappedTensorBufferCalculator::Close(CalculatorContext* cc) { + if (!initialized_ || options_.padding() == 0) { + return ::mediapipe::OkStatus(); + } + int last_frame = buffer_size_ - steps_until_output_ - 1; + const auto& pad_frame = buffer_->Get(last_frame); + for (int i = 0; i < steps_until_output_ + options_.padding(); ++i) { + buffer_->push_back(pad_frame); + timestamp_buffer_->push_back(cc->InputTimestamp()); + } + MP_RETURN_IF_ERROR(ProcessBuffer(cc)); + return ::mediapipe::OkStatus(); } @@ -190,4 +234,29 @@ REGISTER_CALCULATOR(LappedTensorBufferCalculator); return ::mediapipe::OkStatus(); } +// Process buffer +::mediapipe::Status LappedTensorBufferCalculator::ProcessBuffer( + CalculatorContext* cc) { + auto concatenated = ::absl::make_unique(); + const tf::Status concat_status = tf::tensor::Concat( + std::vector(buffer_->begin(), buffer_->end()), + concatenated.get()); + RET_CHECK(concat_status.ok()) << concat_status.ToString(); + // Output cancatenated tensor. + cc->Outputs().Index(0).Add(concatenated.release(), + timestamp_buffer_->Get(timestamp_offset_)); + if (cc->Outputs().NumEntries() > 1) { + auto output_timestamp = ::absl::make_unique>(); + // Output timestamp vector. + *output_timestamp = std::vector(timestamp_buffer_->begin(), + timestamp_buffer_->end()); + RET_CHECK_EQ(output_timestamp->size(), buffer_size_) + << "Output timestamp size is not correct."; + cc->Outputs().Index(1).Add(output_timestamp.release(), + timestamp_buffer_->Get(timestamp_offset_)); + } + steps_until_output_ = buffer_size_ - overlap_; + return ::mediapipe::OkStatus(); +} + } // namespace mediapipe diff --git a/mediapipe/calculators/tensorflow/lapped_tensor_buffer_calculator.proto b/mediapipe/calculators/tensorflow/lapped_tensor_buffer_calculator.proto index 543c65368..bcd14985b 100644 --- a/mediapipe/calculators/tensorflow/lapped_tensor_buffer_calculator.proto +++ b/mediapipe/calculators/tensorflow/lapped_tensor_buffer_calculator.proto @@ -45,4 +45,8 @@ message LappedTensorBufferCalculatorOptions { // This is useful for aligning the timestamp to be centered on the input // range. optional int32 timestamp_offset = 4 [default = 0]; + + // Amount of padding (repeating of first/last value) to add to the beginning + // and end of the input stream. + optional int32 padding = 5; } diff --git a/mediapipe/calculators/tensorflow/lapped_tensor_buffer_calculator_test.cc b/mediapipe/calculators/tensorflow/lapped_tensor_buffer_calculator_test.cc index 71cc6d1da..f34d81ccb 100644 --- a/mediapipe/calculators/tensorflow/lapped_tensor_buffer_calculator_test.cc +++ b/mediapipe/calculators/tensorflow/lapped_tensor_buffer_calculator_test.cc @@ -31,11 +31,15 @@ namespace tf = ::tensorflow; class LappedTensorBufferCalculatorTest : public ::testing::Test { protected: void SetUpCalculator(int buffer_size, int overlap, bool add_dim, - int timestamp_offset) { + int timestamp_offset, int padding, + bool timestamp_output) { CalculatorGraphConfig::Node config; config.set_calculator("LappedTensorBufferCalculator"); config.add_input_stream("input_tensor"); config.add_output_stream("output_tensor"); + if (timestamp_output) { + config.add_output_stream("output_timestamp"); + } auto options = config.mutable_options()->MutableExtension( LappedTensorBufferCalculatorOptions::ext); options->set_buffer_size(buffer_size); @@ -44,13 +48,14 @@ class LappedTensorBufferCalculatorTest : public ::testing::Test { options->set_add_batch_dim_to_tensors(true); } options->set_timestamp_offset(timestamp_offset); + options->set_padding(padding); runner_ = ::absl::make_unique(config); } std::unique_ptr runner_; }; TEST_F(LappedTensorBufferCalculatorTest, OneToOne) { - SetUpCalculator(1, 0, false, 0); + SetUpCalculator(1, 0, false, 0, 0, false); int num_timesteps = 3; for (int i = 0; i < num_timesteps; ++i) { auto input = ::absl::make_unique( @@ -74,7 +79,7 @@ TEST_F(LappedTensorBufferCalculatorTest, OneToTwo) { int buffer_size = 2; int overlap = 1; bool add_dim = false; - SetUpCalculator(buffer_size, overlap, add_dim, 0); + SetUpCalculator(buffer_size, overlap, add_dim, 0, 0, false); int num_timesteps = 3; for (int i = 0; i < num_timesteps; ++i) { auto input = ::absl::make_unique( @@ -100,7 +105,7 @@ TEST_F(LappedTensorBufferCalculatorTest, OneToThree) { int buffer_size = 3; int overlap = 2; bool add_dim = false; - SetUpCalculator(buffer_size, overlap, add_dim, 0); + SetUpCalculator(buffer_size, overlap, add_dim, 0, 0, false); int num_timesteps = 3; for (int i = 0; i < num_timesteps; ++i) { auto input = ::absl::make_unique( @@ -126,7 +131,7 @@ TEST_F(LappedTensorBufferCalculatorTest, OneToThreeSkip) { int buffer_size = 3; int overlap = 1; bool add_dim = false; - SetUpCalculator(buffer_size, overlap, add_dim, 0); + SetUpCalculator(buffer_size, overlap, add_dim, 0, 0, false); int num_timesteps = 3; for (int i = 0; i < num_timesteps; ++i) { auto input = ::absl::make_unique( @@ -152,7 +157,7 @@ TEST_F(LappedTensorBufferCalculatorTest, OneToThreeBatch) { int buffer_size = 3; int overlap = 2; bool add_dim = true; - SetUpCalculator(buffer_size, overlap, add_dim, 0); + SetUpCalculator(buffer_size, overlap, add_dim, 0, 0, false); int num_timesteps = 3; for (int i = 0; i < num_timesteps; ++i) { auto input = ::absl::make_unique( @@ -180,7 +185,7 @@ TEST_F(LappedTensorBufferCalculatorTest, NegativeTimestampOffsetFails) { int overlap = 15; bool add_dim = true; int timestamp_offset = -7; - SetUpCalculator(buffer_size, overlap, add_dim, timestamp_offset); + SetUpCalculator(buffer_size, overlap, add_dim, timestamp_offset, 0, false); int num_timesteps = 20; for (int i = 0; i < num_timesteps; ++i) { auto input = ::absl::make_unique( @@ -197,7 +202,7 @@ TEST_F(LappedTensorBufferCalculatorTest, OutOfRangeTimestampOffsetFails) { int overlap = 15; bool add_dim = true; int timestamp_offset = buffer_size; - SetUpCalculator(buffer_size, overlap, add_dim, timestamp_offset); + SetUpCalculator(buffer_size, overlap, add_dim, timestamp_offset, 0, false); int num_timesteps = 20; for (int i = 0; i < num_timesteps; ++i) { auto input = ::absl::make_unique( @@ -214,7 +219,7 @@ TEST_F(LappedTensorBufferCalculatorTest, OneToThreeBatchTimestampOffset) { int overlap = 15; bool add_dim = true; int timestamp_offset = 7; - SetUpCalculator(buffer_size, overlap, add_dim, timestamp_offset); + SetUpCalculator(buffer_size, overlap, add_dim, timestamp_offset, 0, false); int num_timesteps = 20; for (int i = 0; i < num_timesteps; ++i) { auto input = ::absl::make_unique( @@ -236,5 +241,37 @@ TEST_F(LappedTensorBufferCalculatorTest, OneToThreeBatchTimestampOffset) { } } +TEST_F(LappedTensorBufferCalculatorTest, + OneToThreeBatchTimestampOffsetPadding) { + int buffer_size = 12; + int overlap = 6; + bool add_dim = true; + int timestamp_offset = 3; + int padding = 0; + SetUpCalculator(buffer_size, overlap, add_dim, timestamp_offset, padding, + true); + int num_timesteps = 20; + for (int i = 0; i < num_timesteps; ++i) { + auto input = ::absl::make_unique( + tensorflow::DT_FLOAT, tensorflow::TensorShape({1})); + input->tensor()(0) = i; + runner_->MutableInputs()->Index(0).packets.push_back( + Adopt(input.release()).At(Timestamp(i))); + } + ASSERT_TRUE(runner_->Run().ok()); + + const int output_size = num_timesteps / buffer_size + 1; + const std::vector& output_packets = + runner_->Outputs().Index(0).packets; + ASSERT_EQ(output_size, output_packets.size()); + for (int i = 0; i < output_size; ++i) { + int64 value = output_packets[i].Timestamp().Value(); + ASSERT_EQ(i * overlap + timestamp_offset, value); + } + const std::vector& output_timestamps = + runner_->Outputs().Index(1).packets; + ASSERT_EQ(output_size, output_timestamps.size()); +} + } // namespace } // namespace mediapipe