diff --git a/README.md b/README.md index 1630df000..6fbca03ca 100644 --- a/README.md +++ b/README.md @@ -9,26 +9,32 @@ ## ML Solutions in MediaPipe -* [Face Detection](mediapipe/docs/face_detection_mobile_gpu.md) [[Web Demo]](https://viz.mediapipe.dev/runner/demos/face_detection/face_detection.html) +* [Face Detection](mediapipe/docs/face_detection_mobile_gpu.md) [(web demo)](https://viz.mediapipe.dev/runner/demos/face_detection/face_detection.html) +* [Face Mesh](mediapipe/docs/face_mesh_mobile_gpu.md) +* [Hand Detection](mediapipe/docs/hand_detection_mobile_gpu.md) +* [Hand Tracking](mediapipe/docs/hand_tracking_mobile_gpu.md) [(web demo)](https://viz.mediapipe.dev/runner/demos/hand_tracking/hand_tracking.html) * [Multi-hand Tracking](mediapipe/docs/multi_hand_tracking_mobile_gpu.md) -* [Hand Tracking](mediapipe/docs/hand_tracking_mobile_gpu.md) [[Web Demo]](https://viz.mediapipe.dev/runner/demos/hand_tracking/hand_tracking.html) -* [Hair Segmentation](mediapipe/docs/hair_segmentation_mobile_gpu.md) [[Web Demo]](https://viz.mediapipe.dev/runner/demos/hair_segmentation/hair_segmentation.html) +* [Hair Segmentation](mediapipe/docs/hair_segmentation_mobile_gpu.md) [(web demo)](https://viz.mediapipe.dev/runner/demos/hair_segmentation/hair_segmentation.html) * [Object Detection](mediapipe/docs/object_detection_mobile_gpu.md) * [Object Detection and Tracking](mediapipe/docs/object_tracking_mobile_gpu.md) * [Objectron: 3D Object Detection and Tracking](mediapipe/docs/objectron_mobile_gpu.md) -* [AutoFlip](mediapipe/docs/autoflip.md) +* [AutoFlip: Intelligent Video Reframing](mediapipe/docs/autoflip.md) ![face_detection](mediapipe/docs/images/mobile/face_detection_android_gpu_small.gif) -![multi-hand_tracking](mediapipe/docs/images/mobile/multi_hand_tracking_android_gpu_small.gif) -![hand_tracking](mediapipe/docs/images/mobile/hand_tracking_3d_android_gpu_small.gif) +![face_mesh](mediapipe/docs/images/mobile/face_mesh_android_gpu_small.gif) +![hand_tracking](mediapipe/docs/images/mobile/hand_tracking_android_gpu_small.gif) +![multi-hand_tracking](mediapipe/docs/images/mobile/multi_hand_tracking_3d_android_gpu_small.gif) ![hair_segmentation](mediapipe/docs/images/mobile/hair_segmentation_android_gpu_small.gif) +![object_detection](mediapipe/docs/images/mobile/object_detection_android_gpu_small.gif) ![object_tracking](mediapipe/docs/images/mobile/object_tracking_android_gpu_small.gif) +![objectron_shoes](mediapipe/docs/images/mobile/objectron_shoe_android_gpu_small.gif) +![objectron_chair](mediapipe/docs/images/mobile/objectron_chair_android_gpu_small.gif) ## Installation Follow these [instructions](mediapipe/docs/install.md). ## Getting started -See mobile, desktop and Google Coral [examples](mediapipe/docs/examples.md). +See mobile, desktop, web and Google Coral [examples](mediapipe/docs/examples.md). Check out some web demos [[Edge detection]](https://viz.mediapipe.dev/runner/demos/edge_detection/edge_detection.html) [[Face detection]](https://viz.mediapipe.dev/runner/demos/face_detection/face_detection.html) [[Hand Tracking]](https://viz.mediapipe.dev/runner/demos/hand_tracking/hand_tracking.html) @@ -40,10 +46,14 @@ Check out the [Examples page](https://mediapipe.readthedocs.io/en/latest/example ## Visualizing MediaPipe graphs A web-based visualizer is hosted on [viz.mediapipe.dev](https://viz.mediapipe.dev/). Please also see instructions [here](mediapipe/docs/visualizer.md). +## Google Open Source Code search +Search MediaPipe Github repository using [Google Open Source code search](https://t.co/LSZnbMUUnT?amp=1) + ## Videos * [YouTube Channel](https://www.youtube.com/channel/UCObqmpuSMx-usADtL_qdMAw) ## Publications +* [Alfred Camera: Smart camera features using MediaPipe](https://developers.googleblog.com/2020/03/alfred-camera-smart-camera-features-using-mediapipe.html) * [MediaPipe Objectron: Real-time 3D Object Detection on Mobile Devices](https://mediapipe.page.link/objectron-aiblog) * [AutoFlip: An Open Source Framework for Intelligent Video Reframing](https://mediapipe.page.link/autoflip) * [Google Developer Blog: MediaPipe on the Web](https://mediapipe.page.link/webdevblog) @@ -52,6 +62,7 @@ A web-based visualizer is hosted on [viz.mediapipe.dev](https://viz.mediapipe.de * [MediaPipe: A Framework for Building Perception Pipelines](https://arxiv.org/abs/1906.08172) ## Events +* [MediaPipe Seattle Meetup, Google Building Waterside, 13 Feb 2020](https://mediapipe.page.link/seattle2020) * [AI Nextcon 2020, 12-16 Feb 2020, Seattle](http://aisea20.xnextcon.com/) * [MediaPipe Madrid Meetup, 16 Dec 2019](https://www.meetup.com/Madrid-AI-Developers-Group/events/266329088/) * [MediaPipe London Meetup, Google 123 Building, 12 Dec 2019](https://www.meetup.com/London-AI-Tech-Talk/events/266329038) diff --git a/mediapipe/calculators/audio/spectrogram_calculator.cc b/mediapipe/calculators/audio/spectrogram_calculator.cc index 56a6338f9..7bac73ff7 100644 --- a/mediapipe/calculators/audio/spectrogram_calculator.cc +++ b/mediapipe/calculators/audio/spectrogram_calculator.cc @@ -184,23 +184,14 @@ const float SpectrogramCalculator::kLnPowerToDb = 4.342944819032518; use_local_timestamp_ = spectrogram_options.use_local_timestamp(); if (spectrogram_options.frame_duration_seconds() <= 0.0) { - ::mediapipe::InvalidArgumentErrorBuilder(MEDIAPIPE_LOC) - << "Invalid or missing frame_duration_seconds.\n" - "frame_duration_seconds: " - << spectrogram_options.frame_overlap_seconds(); + // TODO: return an error. } if (spectrogram_options.frame_overlap_seconds() >= spectrogram_options.frame_duration_seconds()) { - ::mediapipe::InvalidArgumentErrorBuilder(MEDIAPIPE_LOC) - << "Invalid frame_overlap_seconds.\nframe_overlap_seconds: " - << spectrogram_options.frame_overlap_seconds() - << "\nframe_duration_seconds: " - << spectrogram_options.frame_duration_seconds(); + // TODO: return an error. } if (spectrogram_options.frame_overlap_seconds() < 0.0) { - ::mediapipe::InvalidArgumentErrorBuilder(MEDIAPIPE_LOC) - << "Frame_overlap_seconds is < 0.0.\nframe_overlap_seconds: " - << spectrogram_options.frame_overlap_seconds(); + // TODO: return an error. } TimeSeriesHeader input_header; @@ -212,9 +203,7 @@ const float SpectrogramCalculator::kLnPowerToDb = 4.342944819032518; if (!spectrogram_options.allow_multichannel_input() && num_input_channels_ != 1) { - ::mediapipe::InvalidArgumentErrorBuilder(MEDIAPIPE_LOC) - << "The current setting only supports single-channel input. Please set " - "allow_multichannel_input.\n"; + // TODO: return an error. } frame_duration_samples_ = @@ -293,10 +282,7 @@ const float SpectrogramCalculator::kLnPowerToDb = 4.342944819032518; const Matrix& input_stream = cc->Inputs().Index(0).Get(); if (input_stream.rows() != num_input_channels_) { - ::mediapipe::InvalidArgumentErrorBuilder(MEDIAPIPE_LOC) - << "Number of input channels do not correspond to the number of rows " - << "in the input matrix: " << num_input_channels_ << "channels vs " - << input_stream.rows() << " rows"; + // TODO: return an error. } cumulative_input_samples_ += input_stream.cols(); diff --git a/mediapipe/calculators/core/BUILD b/mediapipe/calculators/core/BUILD index 1c9910fc7..367194f5a 100644 --- a/mediapipe/calculators/core/BUILD +++ b/mediapipe/calculators/core/BUILD @@ -815,6 +815,38 @@ cc_test( ], ) +cc_library( + name = "split_normalized_landmark_list_calculator", + srcs = ["split_normalized_landmark_list_calculator.cc"], + visibility = ["//visibility:public"], + deps = [ + ":split_vector_calculator_cc_proto", + "//mediapipe/framework:calculator_framework", + "//mediapipe/framework/formats:landmark_cc_proto", + "//mediapipe/framework/port:ret_check", + "//mediapipe/framework/port:status", + "//mediapipe/util:resource_util", + ], + alwayslink = 1, +) + +cc_test( + name = "split_normalized_landmark_list_calculator_test", + srcs = ["split_normalized_landmark_list_calculator_test.cc"], + deps = [ + ":split_normalized_landmark_list_calculator", + ":split_vector_calculator_cc_proto", + "//mediapipe/framework:calculator_framework", + "//mediapipe/framework:calculator_runner", + "//mediapipe/framework/deps:file_path", + "//mediapipe/framework/formats:landmark_cc_proto", + "//mediapipe/framework/port:gtest_main", + "//mediapipe/framework/port:integral_types", + "//mediapipe/framework/port:parse_text_proto", + "//mediapipe/framework/tool:validate_type", + ], +) + cc_library( name = "dequantize_byte_array_calculator", srcs = ["dequantize_byte_array_calculator.cc"], diff --git a/mediapipe/calculators/core/constant_side_packet_calculator.cc b/mediapipe/calculators/core/constant_side_packet_calculator.cc index 55bfb5acd..2a60a2fd1 100644 --- a/mediapipe/calculators/core/constant_side_packet_calculator.cc +++ b/mediapipe/calculators/core/constant_side_packet_calculator.cc @@ -51,8 +51,8 @@ namespace mediapipe { class ConstantSidePacketCalculator : public CalculatorBase { public: static ::mediapipe::Status GetContract(CalculatorContract* cc) { - const auto& options = cc->Options().GetExtension( - ::mediapipe::ConstantSidePacketCalculatorOptions::ext); + const auto& options = + cc->Options<::mediapipe::ConstantSidePacketCalculatorOptions>(); RET_CHECK_EQ(cc->OutputSidePackets().NumEntries(kPacketTag), options.packet_size()) << "Number of output side packets has to be same as number of packets " @@ -80,8 +80,8 @@ class ConstantSidePacketCalculator : public CalculatorBase { } ::mediapipe::Status Open(CalculatorContext* cc) override { - const auto& options = cc->Options().GetExtension( - ::mediapipe::ConstantSidePacketCalculatorOptions::ext); + const auto& options = + cc->Options<::mediapipe::ConstantSidePacketCalculatorOptions>(); int index = 0; for (CollectionItemId id = cc->OutputSidePackets().BeginId(kPacketTag); id != cc->OutputSidePackets().EndId(kPacketTag); ++id, ++index) { diff --git a/mediapipe/calculators/core/split_normalized_landmark_list_calculator.cc b/mediapipe/calculators/core/split_normalized_landmark_list_calculator.cc new file mode 100644 index 000000000..e95173e0c --- /dev/null +++ b/mediapipe/calculators/core/split_normalized_landmark_list_calculator.cc @@ -0,0 +1,165 @@ +// Copyright 2019 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef MEDIAPIPE_CALCULATORS_CORE_SPLIT_NORMALIZED_LANDMARK_LIST_CALCULATOR_H_ // NOLINT +#define MEDIAPIPE_CALCULATORS_CORE_SPLIT_NORMALIZED_LANDMARK_LIST_CALCULATOR_H_ // NOLINT + +#include "mediapipe/calculators/core/split_vector_calculator.pb.h" +#include "mediapipe/framework/calculator_framework.h" +#include "mediapipe/framework/formats/landmark.pb.h" +#include "mediapipe/framework/port/canonical_errors.h" +#include "mediapipe/framework/port/ret_check.h" +#include "mediapipe/util/resource_util.h" + +namespace mediapipe { + +// Splits an input packet with NormalizedLandmarkList into +// multiple NormalizedLandmarkList output packets using the [begin, end) ranges +// specified in SplitVectorCalculatorOptions. If the option "element_only" is +// set to true, all ranges should be of size 1 and all outputs will be elements +// of type NormalizedLandmark. If "element_only" is false, ranges can be +// non-zero in size and all outputs will be of type NormalizedLandmarkList. +// If the option "combine_outputs" is set to true, only one output stream can be +// specified and all ranges of elements will be combined into one +// NormalizedLandmarkList. +class SplitNormalizedLandmarkListCalculator : public CalculatorBase { + public: + static ::mediapipe::Status GetContract(CalculatorContract* cc) { + RET_CHECK(cc->Inputs().NumEntries() == 1); + RET_CHECK(cc->Outputs().NumEntries() != 0); + + cc->Inputs().Index(0).Set(); + + const auto& options = + cc->Options<::mediapipe::SplitVectorCalculatorOptions>(); + + if (options.combine_outputs()) { + RET_CHECK_EQ(cc->Outputs().NumEntries(), 1); + cc->Outputs().Index(0).Set(); + for (int i = 0; i < options.ranges_size() - 1; ++i) { + for (int j = i + 1; j < options.ranges_size(); ++j) { + const auto& range_0 = options.ranges(i); + const auto& range_1 = options.ranges(j); + if ((range_0.begin() >= range_1.begin() && + range_0.begin() < range_1.end()) || + (range_1.begin() >= range_0.begin() && + range_1.begin() < range_0.end())) { + return ::mediapipe::InvalidArgumentError( + "Ranges must be non-overlapping when using combine_outputs " + "option."); + } + } + } + } else { + if (cc->Outputs().NumEntries() != options.ranges_size()) { + return ::mediapipe::InvalidArgumentError( + "The number of output streams should match the number of ranges " + "specified in the CalculatorOptions."); + } + + // Set the output types for each output stream. + for (int i = 0; i < cc->Outputs().NumEntries(); ++i) { + if (options.ranges(i).begin() < 0 || options.ranges(i).end() < 0 || + options.ranges(i).begin() >= options.ranges(i).end()) { + return ::mediapipe::InvalidArgumentError( + "Indices should be non-negative and begin index should be less " + "than the end index."); + } + if (options.element_only()) { + if (options.ranges(i).end() - options.ranges(i).begin() != 1) { + return ::mediapipe::InvalidArgumentError( + "Since element_only is true, all ranges should be of size 1."); + } + cc->Outputs().Index(i).Set(); + } else { + cc->Outputs().Index(i).Set(); + } + } + } + + return ::mediapipe::OkStatus(); + } + + ::mediapipe::Status Open(CalculatorContext* cc) override { + cc->SetOffset(TimestampDiff(0)); + + const auto& options = + cc->Options<::mediapipe::SplitVectorCalculatorOptions>(); + + element_only_ = options.element_only(); + combine_outputs_ = options.combine_outputs(); + + for (const auto& range : options.ranges()) { + ranges_.push_back({range.begin(), range.end()}); + max_range_end_ = std::max(max_range_end_, range.end()); + total_elements_ += range.end() - range.begin(); + } + + return ::mediapipe::OkStatus(); + } + + ::mediapipe::Status Process(CalculatorContext* cc) override { + const NormalizedLandmarkList& input = + cc->Inputs().Index(0).Get(); + RET_CHECK_GE(input.landmark_size(), max_range_end_); + + if (combine_outputs_) { + NormalizedLandmarkList output; + for (int i = 0; i < ranges_.size(); ++i) { + for (int j = ranges_[i].first; j < ranges_[i].second; ++j) { + const NormalizedLandmark& input_landmark = input.landmark(j); + *output.add_landmark() = input_landmark; + } + } + RET_CHECK_EQ(output.landmark_size(), total_elements_); + cc->Outputs().Index(0).AddPacket( + MakePacket(output).At(cc->InputTimestamp())); + } else { + if (element_only_) { + for (int i = 0; i < ranges_.size(); ++i) { + cc->Outputs().Index(i).AddPacket( + MakePacket(input.landmark(ranges_[i].first)) + .At(cc->InputTimestamp())); + } + } else { + for (int i = 0; i < ranges_.size(); ++i) { + NormalizedLandmarkList output; + for (int j = ranges_[i].first; j < ranges_[i].second; ++j) { + const NormalizedLandmark& input_landmark = input.landmark(j); + *output.add_landmark() = input_landmark; + } + cc->Outputs().Index(i).AddPacket( + MakePacket(output).At( + cc->InputTimestamp())); + } + } + } + + return ::mediapipe::OkStatus(); + } + + private: + std::vector> ranges_; + int32 max_range_end_ = -1; + int32 total_elements_ = 0; + bool element_only_ = false; + bool combine_outputs_ = false; +}; + +REGISTER_CALCULATOR(SplitNormalizedLandmarkListCalculator); + +} // namespace mediapipe + +// NOLINTNEXTLINE +#endif // MEDIAPIPE_CALCULATORS_CORE_SPLIT_NORMALIZED_LANDMARK_LIST_CALCULATOR_H_ diff --git a/mediapipe/calculators/core/split_normalized_landmark_list_calculator_test.cc b/mediapipe/calculators/core/split_normalized_landmark_list_calculator_test.cc new file mode 100644 index 000000000..ce02dcd8a --- /dev/null +++ b/mediapipe/calculators/core/split_normalized_landmark_list_calculator_test.cc @@ -0,0 +1,404 @@ +// Copyright 2019 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include + +#include "mediapipe/calculators/core/split_vector_calculator.pb.h" +#include "mediapipe/framework/calculator_framework.h" +#include "mediapipe/framework/calculator_runner.h" +#include "mediapipe/framework/deps/file_path.h" +#include "mediapipe/framework/formats/landmark.pb.h" +#include "mediapipe/framework/port/gmock.h" +#include "mediapipe/framework/port/gtest.h" +#include "mediapipe/framework/port/integral_types.h" +#include "mediapipe/framework/port/parse_text_proto.h" +#include "mediapipe/framework/port/status_matchers.h" // NOLINT +#include "mediapipe/framework/tool/validate_type.h" + +namespace mediapipe { + +constexpr float kLocationVal = 3; + +class SplitNormalizedLandmarkListCalculatorTest : public ::testing::Test { + protected: + void TearDown() { expected_landmarks_.reset(); } + + void PrepareNormalizedLandmarkList(int list_size) { + // Prepare input landmark list. + input_landmarks_ = absl::make_unique(); + expected_landmarks_ = absl::make_unique(); + for (int i = 0; i < list_size; ++i) { + NormalizedLandmark* landmark = input_landmarks_->add_landmark(); + landmark->set_x(i * kLocationVal); + landmark->set_y(i * kLocationVal); + landmark->set_z(i * kLocationVal); + // Save the landmarks for comparison after the graph runs. + *expected_landmarks_->add_landmark() = *landmark; + } + } + + void ValidateListOutput(std::vector& output_packets, + int expected_elements, int input_begin_index) { + ASSERT_EQ(1, output_packets.size()); + const NormalizedLandmarkList& output_landmarks = + output_packets[0].Get(); + ASSERT_EQ(expected_elements, output_landmarks.landmark_size()); + + for (int i = 0; i < expected_elements; ++i) { + const NormalizedLandmark& expected_landmark = + expected_landmarks_->landmark(input_begin_index + i); + const NormalizedLandmark& result = output_landmarks.landmark(i); + EXPECT_FLOAT_EQ(expected_landmark.x(), result.x()); + EXPECT_FLOAT_EQ(expected_landmark.y(), result.y()); + EXPECT_FLOAT_EQ(expected_landmark.z(), result.z()); + } + } + + void ValidateCombinedListOutput(std::vector& output_packets, + int expected_elements, + std::vector& input_begin_indices, + std::vector& input_end_indices) { + ASSERT_EQ(1, output_packets.size()); + ASSERT_EQ(input_begin_indices.size(), input_end_indices.size()); + const NormalizedLandmarkList& output_landmarks = + output_packets[0].Get(); + ASSERT_EQ(expected_elements, output_landmarks.landmark_size()); + const int num_ranges = input_begin_indices.size(); + + int element_id = 0; + for (int range_id = 0; range_id < num_ranges; ++range_id) { + for (int i = input_begin_indices[range_id]; + i < input_end_indices[range_id]; ++i) { + const NormalizedLandmark& expected_landmark = + expected_landmarks_->landmark(i); + const NormalizedLandmark& result = + output_landmarks.landmark(element_id); + EXPECT_FLOAT_EQ(expected_landmark.x(), result.x()); + EXPECT_FLOAT_EQ(expected_landmark.y(), result.y()); + EXPECT_FLOAT_EQ(expected_landmark.z(), result.z()); + element_id++; + } + } + } + + void ValidateElementOutput(std::vector& output_packets, + int input_begin_index) { + ASSERT_EQ(1, output_packets.size()); + + const NormalizedLandmark& output_landmark = + output_packets[0].Get(); + ASSERT_TRUE(output_landmark.IsInitialized()); + + const NormalizedLandmark& expected_landmark = + expected_landmarks_->landmark(input_begin_index); + + EXPECT_FLOAT_EQ(expected_landmark.x(), output_landmark.x()); + EXPECT_FLOAT_EQ(expected_landmark.y(), output_landmark.y()); + EXPECT_FLOAT_EQ(expected_landmark.z(), output_landmark.z()); + } + + std::unique_ptr input_landmarks_ = nullptr; + std::unique_ptr expected_landmarks_ = nullptr; + std::unique_ptr runner_ = nullptr; +}; + +TEST_F(SplitNormalizedLandmarkListCalculatorTest, SmokeTest) { + PrepareNormalizedLandmarkList(/*list_size=*/5); + ASSERT_NE(input_landmarks_, nullptr); + + // Prepare a graph to use the SplitNormalizedLandmarkListCalculator. + CalculatorGraphConfig graph_config = + ::mediapipe::ParseTextProtoOrDie( + R"( + input_stream: "landmarks_in" + node { + calculator: "SplitNormalizedLandmarkListCalculator" + input_stream: "landmarks_in" + output_stream: "range_0" + output_stream: "range_1" + output_stream: "range_2" + options { + [mediapipe.SplitVectorCalculatorOptions.ext] { + ranges: { begin: 0 end: 1 } + ranges: { begin: 1 end: 4 } + ranges: { begin: 4 end: 5 } + } + } + } + )"); + std::vector range_0_packets; + tool::AddVectorSink("range_0", &graph_config, &range_0_packets); + std::vector range_1_packets; + tool::AddVectorSink("range_1", &graph_config, &range_1_packets); + std::vector range_2_packets; + tool::AddVectorSink("range_2", &graph_config, &range_2_packets); + + // Run the graph. + CalculatorGraph graph; + MP_ASSERT_OK(graph.Initialize(graph_config)); + MP_ASSERT_OK(graph.StartRun({})); + MP_ASSERT_OK(graph.AddPacketToInputStream( + "landmarks_in", Adopt(input_landmarks_.release()).At(Timestamp(0)))); + // Wait until the calculator finishes processing. + MP_ASSERT_OK(graph.WaitUntilIdle()); + + ValidateListOutput(range_0_packets, /*expected_elements=*/1, + /*input_begin_index=*/0); + ValidateListOutput(range_1_packets, /*expected_elements=*/3, + /*input_begin_index=*/1); + ValidateListOutput(range_2_packets, /*expected_elements=*/1, + /*input_begin_index=*/4); + + // Fully close the graph at the end. + MP_ASSERT_OK(graph.CloseInputStream("landmarks_in")); + MP_ASSERT_OK(graph.WaitUntilDone()); +} + +TEST_F(SplitNormalizedLandmarkListCalculatorTest, InvalidRangeTest) { + // Prepare a graph to use the SplitNormalizedLandmarkListCalculator. + CalculatorGraphConfig graph_config = + ::mediapipe::ParseTextProtoOrDie( + R"( + input_stream: "landmarks_in" + node { + calculator: "SplitNormalizedLandmarkListCalculator" + input_stream: "landmarks_in" + output_stream: "range_0" + options { + [mediapipe.SplitVectorCalculatorOptions.ext] { + ranges: { begin: 0 end: 0 } + } + } + } + )"); + + // Run the graph. + CalculatorGraph graph; + // The graph should fail running because of an invalid range (begin == end). + ASSERT_FALSE(graph.Initialize(graph_config).ok()); +} + +TEST_F(SplitNormalizedLandmarkListCalculatorTest, + InvalidOutputStreamCountTest) { + // Prepare a graph to use the SplitNormalizedLandmarkListCalculator. + CalculatorGraphConfig graph_config = + ::mediapipe::ParseTextProtoOrDie( + R"( + input_stream: "landmarks_in" + node { + calculator: "SplitNormalizedLandmarkListCalculator" + input_stream: "landmarks_in" + output_stream: "range_0" + output_stream: "range_1" + options { + [mediapipe.SplitVectorCalculatorOptions.ext] { + ranges: { begin: 0 end: 1 } + } + } + } + )"); + + // Run the graph. + CalculatorGraph graph; + // The graph should fail running because the number of output streams does not + // match the number of range elements in the options. + ASSERT_FALSE(graph.Initialize(graph_config).ok()); +} + +TEST_F(SplitNormalizedLandmarkListCalculatorTest, + InvalidCombineOutputsMultipleOutputsTest) { + // Prepare a graph to use the SplitNormalizedLandmarkListCalculator. + CalculatorGraphConfig graph_config = + ::mediapipe::ParseTextProtoOrDie( + R"( + input_stream: "landmarks_in" + node { + calculator: "SplitNormalizedLandmarkListCalculator" + input_stream: "landmarks_in" + output_stream: "range_0" + output_stream: "range_1" + options { + [mediapipe.SplitVectorCalculatorOptions.ext] { + ranges: { begin: 0 end: 1 } + ranges: { begin: 2 end: 3 } + combine_outputs: true + } + } + } + )"); + + // Run the graph. + CalculatorGraph graph; + // The graph should fail running because the number of output streams does not + // match the number of range elements in the options. + ASSERT_FALSE(graph.Initialize(graph_config).ok()); +} + +TEST_F(SplitNormalizedLandmarkListCalculatorTest, + InvalidOverlappingRangesTest) { + // Prepare a graph to use the SplitNormalizedLandmarkListCalculator. + CalculatorGraphConfig graph_config = + ::mediapipe::ParseTextProtoOrDie( + R"( + input_stream: "landmarks_in" + node { + calculator: "SplitNormalizedLandmarkListCalculator" + input_stream: "landmarks_in" + output_stream: "range_0" + options { + [mediapipe.SplitVectorCalculatorOptions.ext] { + ranges: { begin: 0 end: 3 } + ranges: { begin: 1 end: 4 } + combine_outputs: true + } + } + } + )"); + + // Run the graph. + CalculatorGraph graph; + // The graph should fail running because there are overlapping ranges. + ASSERT_FALSE(graph.Initialize(graph_config).ok()); +} + +TEST_F(SplitNormalizedLandmarkListCalculatorTest, SmokeTestElementOnly) { + PrepareNormalizedLandmarkList(/*list_size=*/5); + ASSERT_NE(input_landmarks_, nullptr); + + // Prepare a graph to use the SplitNormalizedLandmarkListCalculator. + CalculatorGraphConfig graph_config = + ::mediapipe::ParseTextProtoOrDie( + R"( + input_stream: "landmarks_in" + node { + calculator: "SplitNormalizedLandmarkListCalculator" + input_stream: "landmarks_in" + output_stream: "range_0" + output_stream: "range_1" + output_stream: "range_2" + options { + [mediapipe.SplitVectorCalculatorOptions.ext] { + ranges: { begin: 0 end: 1 } + ranges: { begin: 2 end: 3 } + ranges: { begin: 4 end: 5 } + element_only: true + } + } + } + )"); + std::vector range_0_packets; + tool::AddVectorSink("range_0", &graph_config, &range_0_packets); + std::vector range_1_packets; + tool::AddVectorSink("range_1", &graph_config, &range_1_packets); + std::vector range_2_packets; + tool::AddVectorSink("range_2", &graph_config, &range_2_packets); + + // Run the graph. + CalculatorGraph graph; + MP_ASSERT_OK(graph.Initialize(graph_config)); + MP_ASSERT_OK(graph.StartRun({})); + MP_ASSERT_OK(graph.AddPacketToInputStream( + "landmarks_in", Adopt(input_landmarks_.release()).At(Timestamp(0)))); + // Wait until the calculator finishes processing. + MP_ASSERT_OK(graph.WaitUntilIdle()); + + ValidateElementOutput(range_0_packets, + /*input_begin_index=*/0); + ValidateElementOutput(range_1_packets, + /*input_begin_index=*/2); + ValidateElementOutput(range_2_packets, + /*input_begin_index=*/4); + + // Fully close the graph at the end. + MP_ASSERT_OK(graph.CloseInputStream("landmarks_in")); + MP_ASSERT_OK(graph.WaitUntilDone()); +} + +TEST_F(SplitNormalizedLandmarkListCalculatorTest, SmokeTestCombiningOutputs) { + PrepareNormalizedLandmarkList(/*list_size=*/5); + ASSERT_NE(input_landmarks_, nullptr); + + // Prepare a graph to use the SplitNormalizedLandmarkListCalculator. + CalculatorGraphConfig graph_config = + ::mediapipe::ParseTextProtoOrDie( + R"( + input_stream: "landmarks_in" + node { + calculator: "SplitNormalizedLandmarkListCalculator" + input_stream: "landmarks_in" + output_stream: "range_0" + options { + [mediapipe.SplitVectorCalculatorOptions.ext] { + ranges: { begin: 0 end: 1 } + ranges: { begin: 2 end: 3 } + ranges: { begin: 4 end: 5 } + combine_outputs: true + } + } + } + )"); + std::vector range_0_packets; + tool::AddVectorSink("range_0", &graph_config, &range_0_packets); + + // Run the graph. + CalculatorGraph graph; + MP_ASSERT_OK(graph.Initialize(graph_config)); + MP_ASSERT_OK(graph.StartRun({})); + MP_ASSERT_OK(graph.AddPacketToInputStream( + "landmarks_in", Adopt(input_landmarks_.release()).At(Timestamp(0)))); + // Wait until the calculator finishes processing. + MP_ASSERT_OK(graph.WaitUntilIdle()); + + std::vector input_begin_indices = {0, 2, 4}; + std::vector input_end_indices = {1, 3, 5}; + ValidateCombinedListOutput(range_0_packets, /*expected_elements=*/3, + input_begin_indices, input_end_indices); + + // Fully close the graph at the end. + MP_ASSERT_OK(graph.CloseInputStream("landmarks_in")); + MP_ASSERT_OK(graph.WaitUntilDone()); +} + +TEST_F(SplitNormalizedLandmarkListCalculatorTest, + ElementOnlyDisablesVectorOutputs) { + // Prepare a graph to use the SplitNormalizedLandmarkListCalculator. + CalculatorGraphConfig graph_config = + ::mediapipe::ParseTextProtoOrDie( + R"( + input_stream: "landmarks_in" + node { + calculator: "SplitNormalizedLandmarkListCalculator" + input_stream: "landmarks_in" + output_stream: "range_0" + output_stream: "range_1" + output_stream: "range_2" + options { + [mediapipe.SplitVectorCalculatorOptions.ext] { + ranges: { begin: 0 end: 1 } + ranges: { begin: 1 end: 4 } + ranges: { begin: 4 end: 5 } + element_only: true + } + } + } + )"); + + // Run the graph. + CalculatorGraph graph; + ASSERT_FALSE(graph.Initialize(graph_config).ok()); +} + +} // namespace mediapipe diff --git a/mediapipe/calculators/image/image_transformation_calculator.cc b/mediapipe/calculators/image/image_transformation_calculator.cc index 683e82511..34d618614 100644 --- a/mediapipe/calculators/image/image_transformation_calculator.cc +++ b/mediapipe/calculators/image/image_transformation_calculator.cc @@ -376,13 +376,12 @@ REGISTER_CALCULATOR(ImageTransformationCalculator); ::mediapipe::Status ImageTransformationCalculator::RenderCpu( CalculatorContext* cc) { - int input_width = cc->Inputs().Tag("IMAGE").Get().Width(); - int input_height = cc->Inputs().Tag("IMAGE").Get().Height(); - const auto& input_img = cc->Inputs().Tag("IMAGE").Get(); cv::Mat input_mat = formats::MatView(&input_img); cv::Mat scaled_mat; + const int input_width = input_img.Width(); + const int input_height = input_img.Height(); if (!output_height_ || !output_width_) { output_height_ = input_height; output_width_ = input_width; @@ -455,8 +454,9 @@ REGISTER_CALCULATOR(ImageTransformationCalculator); ::mediapipe::Status ImageTransformationCalculator::RenderGpu( CalculatorContext* cc) { #if !defined(MEDIAPIPE_DISABLE_GPU) - int input_width = cc->Inputs().Tag("IMAGE_GPU").Get().width(); - int input_height = cc->Inputs().Tag("IMAGE_GPU").Get().height(); + const auto& input = cc->Inputs().Tag("IMAGE_GPU").Get(); + const int input_width = input.width(); + const int input_height = input.height(); int output_width; int output_height; @@ -472,7 +472,6 @@ REGISTER_CALCULATOR(ImageTransformationCalculator); .Add(padding.release(), cc->InputTimestamp()); } - const auto& input = cc->Inputs().Tag("IMAGE_GPU").Get(); QuadRenderer* renderer = nullptr; GlTexture src1; diff --git a/mediapipe/calculators/tensorflow/pack_media_sequence_calculator.cc b/mediapipe/calculators/tensorflow/pack_media_sequence_calculator.cc index 0957e35a6..cf5635d3a 100644 --- a/mediapipe/calculators/tensorflow/pack_media_sequence_calculator.cc +++ b/mediapipe/calculators/tensorflow/pack_media_sequence_calculator.cc @@ -244,7 +244,7 @@ class PackMediaSequenceCalculator : public CalculatorBase { ::mediapipe::Status VerifySequence() { std::string error_msg = "Missing features - "; bool all_present = true; - for (auto iter : features_present_) { + for (const auto& iter : features_present_) { if (!iter.second) { all_present = false; absl::StrAppend(&error_msg, iter.first, ", "); diff --git a/mediapipe/calculators/tensorflow/tensorflow_session_from_saved_model_calculator.cc b/mediapipe/calculators/tensorflow/tensorflow_session_from_saved_model_calculator.cc index 6100ddceb..b54976478 100644 --- a/mediapipe/calculators/tensorflow/tensorflow_session_from_saved_model_calculator.cc +++ b/mediapipe/calculators/tensorflow/tensorflow_session_from_saved_model_calculator.cc @@ -126,7 +126,7 @@ class TensorFlowSessionFromSavedModelCalculator : public CalculatorBase { // Set user specified tags properly. // If no tags specified will use tensorflow::kSavedModelTagServe by default. std::unordered_set tags_set; - for (std::string tag : options.saved_model_tag()) { + for (const std::string& tag : options.saved_model_tag()) { tags_set.insert(tag); } if (tags_set.empty()) { diff --git a/mediapipe/calculators/tensorflow/tensorflow_session_from_saved_model_generator.cc b/mediapipe/calculators/tensorflow/tensorflow_session_from_saved_model_generator.cc index 4e956e759..aeb69822b 100644 --- a/mediapipe/calculators/tensorflow/tensorflow_session_from_saved_model_generator.cc +++ b/mediapipe/calculators/tensorflow/tensorflow_session_from_saved_model_generator.cc @@ -121,7 +121,7 @@ class TensorFlowSessionFromSavedModelGenerator : public PacketGenerator { // Set user specified tags properly. // If no tags specified will use tensorflow::kSavedModelTagServe by default. std::unordered_set tags_set; - for (std::string tag : options.saved_model_tag()) { + for (const std::string& tag : options.saved_model_tag()) { tags_set.insert(tag); } if (tags_set.empty()) { diff --git a/mediapipe/calculators/tflite/tflite_inference_calculator.cc b/mediapipe/calculators/tflite/tflite_inference_calculator.cc index 954cfea9f..af6572c02 100644 --- a/mediapipe/calculators/tflite/tflite_inference_calculator.cc +++ b/mediapipe/calculators/tflite/tflite_inference_calculator.cc @@ -513,6 +513,7 @@ REGISTER_CALCULATOR(TfLiteInferenceCalculator); if (gpu_inference_) { #if !defined(MEDIAPIPE_DISABLE_GL_COMPUTE) MP_RETURN_IF_ERROR(gpu_helper_.RunInGlContext([this]() -> Status { + interpreter_ = nullptr; delegate_ = nullptr; for (int i = 0; i < gpu_data_in_.size(); ++i) { gpu_data_in_[i].reset(); @@ -523,6 +524,7 @@ REGISTER_CALCULATOR(TfLiteInferenceCalculator); return ::mediapipe::OkStatus(); })); #elif defined(MEDIAPIPE_IOS) + interpreter_ = nullptr; delegate_ = nullptr; for (int i = 0; i < gpu_data_in_.size(); ++i) { gpu_data_in_[i].reset(); @@ -532,6 +534,7 @@ REGISTER_CALCULATOR(TfLiteInferenceCalculator); } #endif } else { + interpreter_ = nullptr; delegate_ = nullptr; } } diff --git a/mediapipe/calculators/tflite/tflite_tensors_to_detections_calculator.cc b/mediapipe/calculators/tflite/tflite_tensors_to_detections_calculator.cc index 8d946ced5..732adb26b 100644 --- a/mediapipe/calculators/tflite/tflite_tensors_to_detections_calculator.cc +++ b/mediapipe/calculators/tflite/tflite_tensors_to_detections_calculator.cc @@ -99,7 +99,7 @@ void ConvertAnchorsToRawValues(const std::vector& anchors, int num_boxes, float* raw_anchors) { CHECK_EQ(anchors.size(), num_boxes); int box = 0; - for (auto anchor : anchors) { + for (const auto& anchor : anchors) { raw_anchors[box * kNumCoordsPerBox + 0] = anchor.y_center(); raw_anchors[box * kNumCoordsPerBox + 1] = anchor.x_center(); raw_anchors[box * kNumCoordsPerBox + 2] = anchor.h(); diff --git a/mediapipe/calculators/util/detection_label_id_to_text_calculator.cc b/mediapipe/calculators/util/detection_label_id_to_text_calculator.cc index 4d6d980bf..7d9cd5740 100644 --- a/mediapipe/calculators/util/detection_label_id_to_text_calculator.cc +++ b/mediapipe/calculators/util/detection_label_id_to_text_calculator.cc @@ -71,16 +71,23 @@ REGISTER_CALCULATOR(DetectionLabelIdToTextCalculator); const auto& options = cc->Options<::mediapipe::DetectionLabelIdToTextCalculatorOptions>(); - std::string string_path; - ASSIGN_OR_RETURN(string_path, PathToResourceAsFile(options.label_map_path())); - std::string label_map_string; - MP_RETURN_IF_ERROR(file::GetContents(string_path, &label_map_string)); + if (options.has_label_map_path()) { + std::string string_path; + ASSIGN_OR_RETURN(string_path, + PathToResourceAsFile(options.label_map_path())); + std::string label_map_string; + MP_RETURN_IF_ERROR(file::GetContents(string_path, &label_map_string)); - std::istringstream stream(label_map_string); - std::string line; - int i = 0; - while (std::getline(stream, line)) { - label_map_[i++] = line; + std::istringstream stream(label_map_string); + std::string line; + int i = 0; + while (std::getline(stream, line)) { + label_map_[i++] = line; + } + } else { + for (int i = 0; i < options.label_size(); ++i) { + label_map_[i] = options.label(i); + } } return ::mediapipe::OkStatus(); } diff --git a/mediapipe/calculators/util/detection_label_id_to_text_calculator.proto b/mediapipe/calculators/util/detection_label_id_to_text_calculator.proto index 0486d1d0a..b722b41c2 100644 --- a/mediapipe/calculators/util/detection_label_id_to_text_calculator.proto +++ b/mediapipe/calculators/util/detection_label_id_to_text_calculator.proto @@ -25,4 +25,10 @@ message DetectionLabelIdToTextCalculatorOptions { // Path to a label map file for getting the actual name of detected classes. optional string label_map_path = 1; + + // Alternative way to specify label map + // label: "label for id 0" + // label: "label for id 1" + // ... + repeated string label = 2; } diff --git a/mediapipe/calculators/util/landmarks_to_render_data_calculator.cc b/mediapipe/calculators/util/landmarks_to_render_data_calculator.cc index d83df435d..a099e81f3 100644 --- a/mediapipe/calculators/util/landmarks_to_render_data_calculator.cc +++ b/mediapipe/calculators/util/landmarks_to_render_data_calculator.cc @@ -186,6 +186,7 @@ class LandmarksToRenderDataCalculator : public CalculatorBase { private: LandmarksToRenderDataCalculatorOptions options_; + std::vector landmark_connections_; }; REGISTER_CALCULATOR(LandmarksToRenderDataCalculator); @@ -217,6 +218,14 @@ REGISTER_CALCULATOR(LandmarksToRenderDataCalculator); cc->SetOffset(TimestampDiff(0)); options_ = cc->Options(); + // Parse landmarks connections to a vector. + RET_CHECK_EQ(options_.landmark_connections_size() % 2, 0) + << "Number of entries in landmark connections must be a multiple of 2"; + + for (int i = 0; i < options_.landmark_connections_size(); ++i) { + landmark_connections_.push_back(options_.landmark_connections(i)); + } + return ::mediapipe::OkStatus(); } @@ -236,14 +245,6 @@ REGISTER_CALCULATOR(LandmarksToRenderDataCalculator); thickness *= render_scale; } - // Parse landmarks connections to a vector. - RET_CHECK_EQ(options_.landmark_connections_size() % 2, 0) - << "Number of entries in landmark connections must be a multiple of 2"; - std::vector landmark_connections; - for (int i = 0; i < options_.landmark_connections_size(); i += 1) { - landmark_connections.push_back(options_.landmark_connections(i)); - } - if (cc->Inputs().HasTag(kLandmarksTag)) { const LandmarkList& landmarks = cc->Inputs().Tag(kLandmarksTag).Get(); @@ -252,6 +253,15 @@ REGISTER_CALCULATOR(LandmarksToRenderDataCalculator); } // Only change rendering if there are actually z values other than 0. visualize_depth &= ((z_max - z_min) > 1e-3); + if (visualize_depth) { + AddConnectionsWithDepth( + landmarks, landmark_connections_, thickness, /*normalized=*/false, + z_min, z_max, render_data.get()); + } else { + AddConnections( + landmarks, landmark_connections_, options_.connection_color(), + thickness, /*normalized=*/false, render_data.get()); + } for (int i = 0; i < landmarks.landmark_size(); ++i) { const Landmark& landmark = landmarks.landmark(i); auto* landmark_data_render = AddPointRenderData( @@ -265,15 +275,6 @@ REGISTER_CALCULATOR(LandmarksToRenderDataCalculator); landmark_data->set_x(landmark.x()); landmark_data->set_y(landmark.y()); } - if (visualize_depth) { - AddConnectionsWithDepth( - landmarks, landmark_connections, thickness, /*normalized=*/false, - z_min, z_max, render_data.get()); - } else { - AddConnections( - landmarks, landmark_connections, options_.connection_color(), - thickness, /*normalized=*/false, render_data.get()); - } } if (cc->Inputs().HasTag(kNormLandmarksTag)) { @@ -285,6 +286,15 @@ REGISTER_CALCULATOR(LandmarksToRenderDataCalculator); } // Only change rendering if there are actually z values other than 0. visualize_depth &= ((z_max - z_min) > 1e-3); + if (visualize_depth) { + AddConnectionsWithDepth( + landmarks, landmark_connections_, thickness, /*normalized=*/true, + z_min, z_max, render_data.get()); + } else { + AddConnections( + landmarks, landmark_connections_, options_.connection_color(), + thickness, /*normalized=*/true, render_data.get()); + } for (int i = 0; i < landmarks.landmark_size(); ++i) { const NormalizedLandmark& landmark = landmarks.landmark(i); auto* landmark_data_render = AddPointRenderData( @@ -298,15 +308,6 @@ REGISTER_CALCULATOR(LandmarksToRenderDataCalculator); landmark_data->set_x(landmark.x()); landmark_data->set_y(landmark.y()); } - if (visualize_depth) { - AddConnectionsWithDepth( - landmarks, landmark_connections, thickness, /*normalized=*/true, - z_min, z_max, render_data.get()); - } else { - AddConnections( - landmarks, landmark_connections, options_.connection_color(), - thickness, /*normalized=*/true, render_data.get()); - } } cc->Outputs() diff --git a/mediapipe/docs/examples.md b/mediapipe/docs/examples.md index 506564573..39ce4b06f 100644 --- a/mediapipe/docs/examples.md +++ b/mediapipe/docs/examples.md @@ -73,6 +73,18 @@ can be easily adapted to run on CPU v.s. GPU. * [Android](./face_detection_mobile_cpu.md) * [iOS](./face_detection_mobile_cpu.md) +### Face Mesh with GPU + +[Face Mesh with GPU](./face_mesh_mobile_gpu.md) illustrates how to run the +MediaPipe Face Mesh pipeline to perform 3D face landmark estimation in real-time +on mobile devices, utilizing GPU acceleration. The pipeline is based on +["Real-time Facial Surface Geometry from Monocular Video on Mobile GPUs"](https://arxiv.org/abs/1907.06724), +and details of the underlying ML models are described in the +[model card](https://drive.google.com/file/d/1VFC_wIpw4O7xBOiTgUldl79d9LA-LsnA/view). + +* [Android](./face_mesh_mobile_gpu.md) +* [iOS](./face_mesh_mobile_gpu.md) + ### Hand Detection with GPU [Hand Detection with GPU](./hand_detection_mobile_gpu.md) illustrates how to use @@ -84,7 +96,7 @@ MediaPipe with a TFLite model for hand detection in a GPU-accelerated pipeline. ### Hand Tracking with GPU [Hand Tracking with GPU](./hand_tracking_mobile_gpu.md) illustrates how to use -MediaPipe with a TFLite model for hand tracking in a GPU-accelerated pipeline. +MediaPipe with TFLite models for hand tracking in a GPU-accelerated pipeline. * [Android](./hand_tracking_mobile_gpu.md) * [iOS](./hand_tracking_mobile_gpu.md) @@ -92,7 +104,7 @@ MediaPipe with a TFLite model for hand tracking in a GPU-accelerated pipeline. ### Multi-Hand Tracking with GPU [Multi-Hand Tracking with GPU](./multi_hand_tracking_mobile_gpu.md) illustrates -how to use MediaPipe with a TFLite model for multi-hand tracking in a +how to use MediaPipe with TFLite models for multi-hand tracking in a GPU-accelerated pipeline. * [Android](./multi_hand_tracking_mobile_gpu.md) @@ -150,11 +162,20 @@ GPU with live video from a webcam. * [Desktop GPU](./face_detection_desktop.md) * [Desktop CPU](./face_detection_desktop.md) +### Face Mesh on Desktop with Webcam + +[Face Mesh on Desktop with Webcam](./face_mesh_desktop.md) shows how to run the +MediaPipe Face Mesh pipeline to perform 3D face landmark estimation in real-time +on desktop with webcam input. + +* [Desktop GPU](./face_mesh_desktop.md) +* [Desktop CPU](./face_mesh_desktop.md) + ### Hand Tracking on Desktop with Webcam [Hand Tracking on Desktop with Webcam](./hand_tracking_desktop.md) shows how to -use MediaPipe with a TFLite model for hand tracking on desktop using CPU or GPU +use MediaPipe with TFLite models for hand tracking on desktop using CPU or GPU with live video from a webcam. * [Desktop GPU](./hand_tracking_desktop.md) @@ -163,8 +184,8 @@ with live video from a webcam. ### Multi-Hand Tracking on Desktop with Webcam [Multi-Hand Tracking on Desktop with Webcam](./multi_hand_tracking_desktop.md) -shows how to use MediaPipe with a TFLite model for multi-hand tracking on -desktop using CPU or GPU with live video from a webcam. +shows how to use MediaPipe with TFLite models for multi-hand tracking on desktop +using CPU or GPU with live video from a webcam. * [Desktop GPU](./multi_hand_tracking_desktop.md) * [Desktop CPU](./multi_hand_tracking_desktop.md) diff --git a/mediapipe/docs/face_detection_mobile_cpu.md b/mediapipe/docs/face_detection_mobile_cpu.md index e6b1d91b8..a68aee1c5 100644 --- a/mediapipe/docs/face_detection_mobile_cpu.md +++ b/mediapipe/docs/face_detection_mobile_cpu.md @@ -4,6 +4,8 @@ This doc focuses on the [example graph](https://github.com/google/mediapipe/tree/master/mediapipe/graphs/face_detection/face_detection_mobile_cpu.pbtxt) that performs face detection with TensorFlow Lite on CPU. +![face_detection_android_gpu_gif](images/mobile/face_detection_android_gpu.gif) + ## Android [Source](https://github.com/google/mediapipe/tree/master/mediapipe/examples/android/src/java/com/google/mediapipe/apps/facedetectioncpu) diff --git a/mediapipe/docs/face_mesh_desktop.md b/mediapipe/docs/face_mesh_desktop.md new file mode 100644 index 000000000..0b6877aa0 --- /dev/null +++ b/mediapipe/docs/face_mesh_desktop.md @@ -0,0 +1,58 @@ +## Face Mesh on Desktop with Webcam + +This doc focuses on running the **MediaPipe Face Mesh** pipeline to perform 3D +face landmark estimation in real-time on desktop with webcam input. The pipeline +internally incorporates TensorFlow Lite models. To know more about the models, +please refer to the model +[README file](https://github.com/google/mediapipe/tree/master/mediapipe/models/README.md#face-mesh). +Moreover, if you are interested in running the same pipeline on Android/iOS, +please see [Face Mesh on Android/iOS](face_mesh_mobile_gpu.md). + +- [Face Mesh on Desktop with Webcam (CPU)](#face-mesh-on-desktop-with-webcam-cpu) + +- [Face Mesh on Desktop with Webcam (GPU)](#face-mesh-on-desktop-with-webcam-gpu) + +Note: Desktop GPU works only on Linux. Mesa drivers need to be installed. Please +see +[step 4 of "Installing on Debian and Ubuntu" in the installation guide](./install.md). + +Note: If MediaPipe depends on OpenCV 2, please see the [known issues with OpenCV 2](#known-issues-with-opencv-2) section. + +### Face Mesh on Desktop with Webcam (CPU) + +To build and run Face Mesh on desktop with webcam (CPU), run: + +```bash +$ bazel build -c opt --define MEDIAPIPE_DISABLE_GPU=1 \ + mediapipe/examples/desktop/face_mesh:face_mesh_cpu + +# It should print: +# Target //mediapipe/examples/desktop/face_mesh:face_mesh_cpu up-to-date: +# bazel-bin/mediapipe/examples/desktop/face_mesh/face_mesh_cpu + +# This will open up your webcam as long as it is connected. Errors are likely +# due to your webcam being not accessible. +$ GLOG_logtostderr=1 bazel-bin/mediapipe/examples/desktop/face_mesh/face_mesh_cpu \ + --calculator_graph_config_file=mediapipe/graphs/face_mesh/face_mesh_desktop_live.pbtxt +``` + +### Face Mesh on Desktop with Webcam (GPU) + +Note: please first [check that your GPU is supported](gpu.md#desktop-gpu-linux). + +To build and run Face Mesh on desktop with webcam (GPU), run: + +```bash +# This works only for linux currently +$ bazel build -c opt --copt -DMESA_EGL_NO_X11_HEADERS --copt -DEGL_NO_X11 \ + mediapipe/examples/desktop/face_mesh:face_mesh_gpu + +# It should print: +# Target //mediapipe/examples/desktop/face_mesh:face_mesh_gpu up-to-date: +# bazel-bin/mediapipe/examples/desktop/face_mesh/face_mesh_gpu + +# This will open up your webcam as long as it is connected. Errors are likely +# due to your webcam being not accessible, or GPU drivers not setup properly. +$ GLOG_logtostderr=1 bazel-bin/mediapipe/examples/desktop/face_mesh/face_mesh_gpu \ + --calculator_graph_config_file=mediapipe/graphs/face_mesh/face_mesh_desktop_live_gpu.pbtxt +``` diff --git a/mediapipe/docs/face_mesh_mobile_gpu.md b/mediapipe/docs/face_mesh_mobile_gpu.md new file mode 100644 index 000000000..f85cdabcd --- /dev/null +++ b/mediapipe/docs/face_mesh_mobile_gpu.md @@ -0,0 +1,90 @@ +# Face Mesh (GPU) + +This example focuses on running the **MediaPipe Face Mesh** pipeline on mobile +devices to perform 3D face landmark estimation in real-time, utilizing GPU +acceleration. The pipeline internally incorporates TensorFlow Lite models. To +know more about the models, please refer to the model +[README file](https://github.com/google/mediapipe/tree/master/mediapipe/models/README.md#face-mesh). +The pipeline is related to the +[face detection example](./face_detection_mobile_gpu.md) as it internally +utilizes face detection and performs landmark estimation only within the +detected region. + +![face_mesh_android_gpu.gif](images/mobile/face_mesh_android_gpu.gif) + +**MediaPipe Face Mesh** generates 468 3D face landmarks in real-time on mobile +devices. In the visualization above, the red dots represent the landmarks, and +the green lines connecting landmarks illustrate the contours around the eyes, +eyebrows, lips and the entire face. + +## Android + +[Source](https://github.com/google/mediapipe/tree/master/mediapipe/examples/android/src/java/com/google/mediapipe/apps/facemeshgpu) + +A prebuilt arm64 APK can be +[downloaded here](https://drive.google.com/open?id=1pUmd7CXCL_onYMbsZo5p91cH0oNnR4gi). + +To build the app yourself, run: + +```bash +bazel build -c opt --config=android_arm64 mediapipe/examples/android/src/java/com/google/mediapipe/apps/facemeshgpu +``` + +Once the app is built, install it on Android device with: + +```bash +adb install bazel-bin/mediapipe/examples/android/src/java/com/google/mediapipe/apps/facemeshgpu/facemeshgpu.apk +``` + +## iOS + +[Source](https://github.com/google/mediapipe/tree/master/mediapipe/examples/ios/facemeshgpu). + +See the general [instructions](./mediapipe_ios_setup.md) for building iOS +examples and generating an Xcode project. This will be the FaceMeshGpuApp +target. + +To build on the command line: + +```bash +bazel build -c opt --config=ios_arm64 mediapipe/examples/ios/facemeshgpu:FaceMeshGpuApp +``` + +## Graph + +The face mesh [main graph](#main-graph) utilizes a +[face landmark subgraph](#face-landmark-subgraph) from the +[face landmark module](https://github.com/google/mediapipe/tree/master/mediapipe/modules/face_landmark), +and renders using a dedicated [face renderer subgraph](#face-renderer-subgraph). + +The subgraphs show up in the main graph visualization as nodes colored in +purple, and the subgraph itself can also be visualized just like a regular +graph. For more information on how to visualize a graph that includes subgraphs, +see the Visualizing Subgraphs section in the +[visualizer documentation](./visualizer.md). + +### Main Graph + +![face_mesh_mobile_graph](images/mobile/face_mesh_mobile.png) + +[Source pbtxt file](https://github.com/google/mediapipe/tree/master/mediapipe/graphs/face_mesh/face_mesh_mobile.pbtxt) + +### Face Landmark Subgraph + +The +[face landmark module](https://github.com/google/mediapipe/tree/master/mediapipe/modules/face_landmark) +contains several subgraphs that can be used to detect and track face landmarks. +In particular, in this example the +[FaceLandmarkFrontGPU](https://github.com/google/mediapipe/tree/master/mediapipe/modules/face_landmark/face_landmark_front_gpu.pbtxt) +subgraph, suitable for images from front-facing cameras (i.e., selfie images) +and utilizing GPU acceleration, is selected. + +![face_landmark_front_gpu_subgraph](images/mobile/face_landmark_front_gpu_subgraph.png) + +[Source pbtxt file](https://github.com/google/mediapipe/tree/master/mediapipe/modules/face_landmark/face_landmark_front_gpu.pbtxt) + +### Face Renderer Subgraph + +![face_renderer_gpu_subgraph](images/mobile/face_renderer_gpu_subgraph.png) + +[Source pbtxt file](https://github.com/google/mediapipe/tree/master/mediapipe/graphs/face_mesh/subgraphs/face_renderer_gpu.pbtxt) diff --git a/mediapipe/docs/images/logo_horizontal_black.png b/mediapipe/docs/images/logo_horizontal_black.png new file mode 100644 index 000000000..89f708fd0 Binary files /dev/null and b/mediapipe/docs/images/logo_horizontal_black.png differ diff --git a/mediapipe/docs/images/logo_horizontal_white.png b/mediapipe/docs/images/logo_horizontal_white.png new file mode 100644 index 000000000..bd0e6d9ef Binary files /dev/null and b/mediapipe/docs/images/logo_horizontal_white.png differ diff --git a/mediapipe/docs/images/mediapipe_small.png b/mediapipe/docs/images/mediapipe_small.png index 85c284129..368e2b651 100644 Binary files a/mediapipe/docs/images/mediapipe_small.png and b/mediapipe/docs/images/mediapipe_small.png differ diff --git a/mediapipe/docs/images/mobile/face_detection_android_gpu.gif b/mediapipe/docs/images/mobile/face_detection_android_gpu.gif index 983595e68..75d9228b3 100644 Binary files a/mediapipe/docs/images/mobile/face_detection_android_gpu.gif and b/mediapipe/docs/images/mobile/face_detection_android_gpu.gif differ diff --git a/mediapipe/docs/images/mobile/face_detection_android_gpu_small.gif b/mediapipe/docs/images/mobile/face_detection_android_gpu_small.gif index 08a89aa0a..0476602a3 100644 Binary files a/mediapipe/docs/images/mobile/face_detection_android_gpu_small.gif and b/mediapipe/docs/images/mobile/face_detection_android_gpu_small.gif differ diff --git a/mediapipe/docs/images/mobile/face_landmark_front_gpu_subgraph.png b/mediapipe/docs/images/mobile/face_landmark_front_gpu_subgraph.png new file mode 100644 index 000000000..a97b3da0b Binary files /dev/null and b/mediapipe/docs/images/mobile/face_landmark_front_gpu_subgraph.png differ diff --git a/mediapipe/docs/images/mobile/face_mesh_android_gpu.gif b/mediapipe/docs/images/mobile/face_mesh_android_gpu.gif new file mode 100644 index 000000000..cdba62021 Binary files /dev/null and b/mediapipe/docs/images/mobile/face_mesh_android_gpu.gif differ diff --git a/mediapipe/docs/images/mobile/face_mesh_android_gpu_small.gif b/mediapipe/docs/images/mobile/face_mesh_android_gpu_small.gif new file mode 100644 index 000000000..5ab431ef5 Binary files /dev/null and b/mediapipe/docs/images/mobile/face_mesh_android_gpu_small.gif differ diff --git a/mediapipe/docs/images/mobile/face_mesh_mobile.png b/mediapipe/docs/images/mobile/face_mesh_mobile.png new file mode 100644 index 000000000..0a109d617 Binary files /dev/null and b/mediapipe/docs/images/mobile/face_mesh_mobile.png differ diff --git a/mediapipe/docs/images/mobile/face_renderer_gpu_subgraph.png b/mediapipe/docs/images/mobile/face_renderer_gpu_subgraph.png new file mode 100644 index 000000000..c53d854bd Binary files /dev/null and b/mediapipe/docs/images/mobile/face_renderer_gpu_subgraph.png differ diff --git a/mediapipe/docs/images/mobile/hair_segmentation_android_gpu.gif b/mediapipe/docs/images/mobile/hair_segmentation_android_gpu.gif index fa727e429..565f1849a 100644 Binary files a/mediapipe/docs/images/mobile/hair_segmentation_android_gpu.gif and b/mediapipe/docs/images/mobile/hair_segmentation_android_gpu.gif differ diff --git a/mediapipe/docs/images/mobile/hair_segmentation_android_gpu_small.gif b/mediapipe/docs/images/mobile/hair_segmentation_android_gpu_small.gif index 6669abb84..737ef1506 100644 Binary files a/mediapipe/docs/images/mobile/hair_segmentation_android_gpu_small.gif and b/mediapipe/docs/images/mobile/hair_segmentation_android_gpu_small.gif differ diff --git a/mediapipe/docs/images/mobile/hand_detection_android_gpu.gif b/mediapipe/docs/images/mobile/hand_detection_android_gpu.gif index 86f6f91f8..38e32becf 100644 Binary files a/mediapipe/docs/images/mobile/hand_detection_android_gpu.gif and b/mediapipe/docs/images/mobile/hand_detection_android_gpu.gif differ diff --git a/mediapipe/docs/images/mobile/hand_detection_android_gpu_small.gif b/mediapipe/docs/images/mobile/hand_detection_android_gpu_small.gif new file mode 100644 index 000000000..bd61268fa Binary files /dev/null and b/mediapipe/docs/images/mobile/hand_detection_android_gpu_small.gif differ diff --git a/mediapipe/docs/images/mobile/hand_tracking_3d_android_gpu.gif b/mediapipe/docs/images/mobile/hand_tracking_3d_android_gpu.gif index 271cc47f9..60a95d438 100644 Binary files a/mediapipe/docs/images/mobile/hand_tracking_3d_android_gpu.gif and b/mediapipe/docs/images/mobile/hand_tracking_3d_android_gpu.gif differ diff --git a/mediapipe/docs/images/mobile/hand_tracking_3d_android_gpu_small.gif b/mediapipe/docs/images/mobile/hand_tracking_3d_android_gpu_small.gif deleted file mode 100644 index 7f3983d7e..000000000 Binary files a/mediapipe/docs/images/mobile/hand_tracking_3d_android_gpu_small.gif and /dev/null differ diff --git a/mediapipe/docs/images/mobile/hand_tracking_android_gpu.gif b/mediapipe/docs/images/mobile/hand_tracking_android_gpu.gif index 675f15121..b40e2986b 100644 Binary files a/mediapipe/docs/images/mobile/hand_tracking_android_gpu.gif and b/mediapipe/docs/images/mobile/hand_tracking_android_gpu.gif differ diff --git a/mediapipe/docs/images/mobile/hand_tracking_android_gpu_small.gif b/mediapipe/docs/images/mobile/hand_tracking_android_gpu_small.gif index 2627b763d..c657edae0 100644 Binary files a/mediapipe/docs/images/mobile/hand_tracking_android_gpu_small.gif and b/mediapipe/docs/images/mobile/hand_tracking_android_gpu_small.gif differ diff --git a/mediapipe/docs/images/mobile/multi_hand_tracking_3d_android_gpu.gif b/mediapipe/docs/images/mobile/multi_hand_tracking_3d_android_gpu.gif index fbdd8f573..6aae8abca 100644 Binary files a/mediapipe/docs/images/mobile/multi_hand_tracking_3d_android_gpu.gif and b/mediapipe/docs/images/mobile/multi_hand_tracking_3d_android_gpu.gif differ diff --git a/mediapipe/docs/images/mobile/multi_hand_tracking_3d_android_gpu_small.gif b/mediapipe/docs/images/mobile/multi_hand_tracking_3d_android_gpu_small.gif new file mode 100644 index 000000000..24c101829 Binary files /dev/null and b/mediapipe/docs/images/mobile/multi_hand_tracking_3d_android_gpu_small.gif differ diff --git a/mediapipe/docs/images/mobile/multi_hand_tracking_android_gpu.gif b/mediapipe/docs/images/mobile/multi_hand_tracking_android_gpu.gif index 2cc920c86..1e20dd082 100644 Binary files a/mediapipe/docs/images/mobile/multi_hand_tracking_android_gpu.gif and b/mediapipe/docs/images/mobile/multi_hand_tracking_android_gpu.gif differ diff --git a/mediapipe/docs/images/mobile/multi_hand_tracking_android_gpu_small.gif b/mediapipe/docs/images/mobile/multi_hand_tracking_android_gpu_small.gif deleted file mode 100644 index f844fc59b..000000000 Binary files a/mediapipe/docs/images/mobile/multi_hand_tracking_android_gpu_small.gif and /dev/null differ diff --git a/mediapipe/docs/images/mobile/object_detection_android_cpu.gif b/mediapipe/docs/images/mobile/object_detection_android_cpu.gif index fc5eb7fd6..66c07d6ca 100644 Binary files a/mediapipe/docs/images/mobile/object_detection_android_cpu.gif and b/mediapipe/docs/images/mobile/object_detection_android_cpu.gif differ diff --git a/mediapipe/docs/images/mobile/object_detection_android_gpu.gif b/mediapipe/docs/images/mobile/object_detection_android_gpu.gif index 76be3a5c2..25e75f862 100644 Binary files a/mediapipe/docs/images/mobile/object_detection_android_gpu.gif and b/mediapipe/docs/images/mobile/object_detection_android_gpu.gif differ diff --git a/mediapipe/docs/images/mobile/object_detection_android_gpu_small.gif b/mediapipe/docs/images/mobile/object_detection_android_gpu_small.gif index c21822213..db55678ba 100644 Binary files a/mediapipe/docs/images/mobile/object_detection_android_gpu_small.gif and b/mediapipe/docs/images/mobile/object_detection_android_gpu_small.gif differ diff --git a/mediapipe/docs/images/mobile/object_tracking_android_gpu.gif b/mediapipe/docs/images/mobile/object_tracking_android_gpu.gif index cb8b4f6e3..ed6f84ce7 100644 Binary files a/mediapipe/docs/images/mobile/object_tracking_android_gpu.gif and b/mediapipe/docs/images/mobile/object_tracking_android_gpu.gif differ diff --git a/mediapipe/docs/images/mobile/object_tracking_android_gpu_detection_only.gif b/mediapipe/docs/images/mobile/object_tracking_android_gpu_detection_only.gif new file mode 100644 index 000000000..b2c68520e Binary files /dev/null and b/mediapipe/docs/images/mobile/object_tracking_android_gpu_detection_only.gif differ diff --git a/mediapipe/docs/images/mobile/object_tracking_android_gpu_small.gif b/mediapipe/docs/images/mobile/object_tracking_android_gpu_small.gif index 9fd53144f..db070efa2 100644 Binary files a/mediapipe/docs/images/mobile/object_tracking_android_gpu_small.gif and b/mediapipe/docs/images/mobile/object_tracking_android_gpu_small.gif differ diff --git a/mediapipe/docs/images/mobile/objectron_chair_android_gpu_small.gif b/mediapipe/docs/images/mobile/objectron_chair_android_gpu_small.gif new file mode 100644 index 000000000..bef4c5b18 Binary files /dev/null and b/mediapipe/docs/images/mobile/objectron_chair_android_gpu_small.gif differ diff --git a/mediapipe/docs/images/mobile/objectron_shoe_android_gpu_small.gif b/mediapipe/docs/images/mobile/objectron_shoe_android_gpu_small.gif new file mode 100644 index 000000000..611f85dbe Binary files /dev/null and b/mediapipe/docs/images/mobile/objectron_shoe_android_gpu_small.gif differ diff --git a/mediapipe/docs/object_detection_desktop.md b/mediapipe/docs/object_detection_desktop.md index 6ad872927..bcb5ebefe 100644 --- a/mediapipe/docs/object_detection_desktop.md +++ b/mediapipe/docs/object_detection_desktop.md @@ -12,7 +12,7 @@ We show the object detection demo with both TensorFlow model and TensorFlow Lite - [TensorFlow Object Detection Demo](#tensorflow-object-detection-demo) - [TensorFlow Lite Object Detection Demo](#tensorflow-lite-object-detection-demo) -- [TensorFlow Lite Object Detection Demo with Webcam (CPU)](#tensorflow-lite-object-detection-demo) +- [TensorFlow Lite Object Detection Demo with Webcam (CPU)](#tensorflow-lite-object-detection-demo-with-webcam-cpu) Note: If MediaPipe depends on OpenCV 2, please see the [known issues with OpenCV 2](#known-issues-with-opencv-2) section. diff --git a/mediapipe/examples/android/src/java/com/google/mediapipe/apps/facemeshgpu/AndroidManifest.xml b/mediapipe/examples/android/src/java/com/google/mediapipe/apps/facemeshgpu/AndroidManifest.xml new file mode 100644 index 000000000..fe7cd9cca --- /dev/null +++ b/mediapipe/examples/android/src/java/com/google/mediapipe/apps/facemeshgpu/AndroidManifest.xml @@ -0,0 +1,33 @@ + + + + + + + + + + + + + + + + + + + + + + + diff --git a/mediapipe/examples/android/src/java/com/google/mediapipe/apps/facemeshgpu/BUILD b/mediapipe/examples/android/src/java/com/google/mediapipe/apps/facemeshgpu/BUILD new file mode 100644 index 000000000..1bf3b56b0 --- /dev/null +++ b/mediapipe/examples/android/src/java/com/google/mediapipe/apps/facemeshgpu/BUILD @@ -0,0 +1,82 @@ +# Copyright 2019 The MediaPipe Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +licenses(["notice"]) # Apache 2.0 + +package(default_visibility = ["//visibility:private"]) + +cc_binary( + name = "libmediapipe_jni.so", + linkshared = 1, + linkstatic = 1, + deps = [ + "//mediapipe/graphs/face_mesh:mobile_calculators", + "//mediapipe/java/com/google/mediapipe/framework/jni:mediapipe_framework_jni", + ], +) + +cc_library( + name = "mediapipe_jni_lib", + srcs = [":libmediapipe_jni.so"], + alwayslink = 1, +) + +# Maps the binary graph to an alias (e.g., the app name) for convenience so that the alias can be +# easily incorporated into the app via, for example, +# MainActivity.BINARY_GRAPH_NAME = "appname.binarypb". +genrule( + name = "binary_graph", + srcs = ["//mediapipe/graphs/face_mesh:face_mesh_mobile_gpu_binary_graph"], + outs = ["facemeshgpu.binarypb"], + cmd = "cp $< $@", +) + +android_library( + name = "mediapipe_lib", + srcs = glob(["*.java"]), + assets = [ + ":binary_graph", + "//mediapipe/modules/face_landmark:face_landmark.tflite", + "//mediapipe/modules/face_detection:face_detection_front.tflite", + ], + assets_dir = "", + manifest = "AndroidManifest.xml", + resource_files = glob(["res/**"]), + deps = [ + ":mediapipe_jni_lib", + "//mediapipe/framework/formats:landmark_java_proto_lite", + "//mediapipe/java/com/google/mediapipe/components:android_camerax_helper", + "//mediapipe/java/com/google/mediapipe/components:android_components", + "//mediapipe/java/com/google/mediapipe/framework:android_framework", + "//mediapipe/java/com/google/mediapipe/glutil", + "//third_party:androidx_appcompat", + "//third_party:androidx_constraint_layout", + "//third_party:androidx_legacy_support_v4", + "//third_party:androidx_recyclerview", + "//third_party:opencv", + "@maven//:androidx_concurrent_concurrent_futures", + "@maven//:androidx_lifecycle_lifecycle_common", + "@maven//:com_google_guava_guava", + ], +) + +android_binary( + name = "facemeshgpu", + manifest = "AndroidManifest.xml", + manifest_values = {"applicationId": "com.google.mediapipe.apps.facemeshgpu"}, + multidex = "native", + deps = [ + ":mediapipe_lib", + ], +) diff --git a/mediapipe/examples/android/src/java/com/google/mediapipe/apps/facemeshgpu/MainActivity.java b/mediapipe/examples/android/src/java/com/google/mediapipe/apps/facemeshgpu/MainActivity.java new file mode 100644 index 000000000..8cf90eee5 --- /dev/null +++ b/mediapipe/examples/android/src/java/com/google/mediapipe/apps/facemeshgpu/MainActivity.java @@ -0,0 +1,232 @@ +// Copyright 2019 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package com.google.mediapipe.apps.facemeshgpu; + +import android.graphics.SurfaceTexture; +import android.os.Bundle; +import androidx.appcompat.app.AppCompatActivity; +import android.util.Log; +import android.util.Size; +import android.view.SurfaceHolder; +import android.view.SurfaceView; +import android.view.View; +import android.view.ViewGroup; +import com.google.mediapipe.formats.proto.LandmarkProto.NormalizedLandmark; +import com.google.mediapipe.formats.proto.LandmarkProto.NormalizedLandmarkList; +import com.google.mediapipe.components.CameraHelper; +import com.google.mediapipe.components.CameraXPreviewHelper; +import com.google.mediapipe.components.ExternalTextureConverter; +import com.google.mediapipe.components.FrameProcessor; +import com.google.mediapipe.components.PermissionHelper; +import com.google.mediapipe.framework.AndroidAssetUtil; +import com.google.mediapipe.framework.AndroidPacketCreator; +import com.google.mediapipe.framework.Packet; +import com.google.mediapipe.framework.PacketGetter; +import com.google.mediapipe.glutil.EglManager; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +/** Main activity of MediaPipe example apps. */ +public class MainActivity extends AppCompatActivity { + private static final String TAG = "MainActivity"; + + private static final String BINARY_GRAPH_NAME = "facemeshgpu.binarypb"; + private static final String INPUT_VIDEO_STREAM_NAME = "input_video"; + private static final String INPUT_NUM_FACES_SIDE_PACKET_NAME = "num_faces"; + private static final String OUTPUT_VIDEO_STREAM_NAME = "output_video"; + private static final String OUTPUT_LANDMARKS_STREAM_NAME = "multi_face_landmarks"; + private static final CameraHelper.CameraFacing CAMERA_FACING = CameraHelper.CameraFacing.FRONT; + + // Max number of faces to detect/process. + private static final int NUM_FACES = 1; + + // Flips the camera-preview frames vertically before sending them into FrameProcessor to be + // processed in a MediaPipe graph, and flips the processed frames back when they are displayed. + // This is needed because OpenGL represents images assuming the image origin is at the bottom-left + // corner, whereas MediaPipe in general assumes the image origin is at top-left. + private static final boolean FLIP_FRAMES_VERTICALLY = true; + + static { + // Load all native libraries needed by the app. + System.loadLibrary("mediapipe_jni"); + System.loadLibrary("opencv_java3"); + } + + // {@link SurfaceTexture} where the camera-preview frames can be accessed. + private SurfaceTexture previewFrameTexture; + // {@link SurfaceView} that displays the camera-preview frames processed by a MediaPipe graph. + private SurfaceView previewDisplayView; + + // Creates and manages an {@link EGLContext}. + private EglManager eglManager; + // Sends camera-preview frames into a MediaPipe graph for processing, and displays the processed + // frames onto a {@link Surface}. + private FrameProcessor processor; + // Converts the GL_TEXTURE_EXTERNAL_OES texture from Android camera into a regular texture to be + // consumed by {@link FrameProcessor} and the underlying MediaPipe graph. + private ExternalTextureConverter converter; + + // Handles camera access via the {@link CameraX} Jetpack support library. + private CameraXPreviewHelper cameraHelper; + + @Override + protected void onCreate(Bundle savedInstanceState) { + super.onCreate(savedInstanceState); + setContentView(R.layout.activity_main); + + previewDisplayView = new SurfaceView(this); + setupPreviewDisplayView(); + + // Initialize asset manager so that MediaPipe native libraries can access the app assets, e.g., + // binary graphs. + AndroidAssetUtil.initializeNativeAssetManager(this); + + eglManager = new EglManager(null); + processor = + new FrameProcessor( + this, + eglManager.getNativeContext(), + BINARY_GRAPH_NAME, + INPUT_VIDEO_STREAM_NAME, + OUTPUT_VIDEO_STREAM_NAME); + processor.getVideoSurfaceOutput().setFlipY(FLIP_FRAMES_VERTICALLY); + + AndroidPacketCreator packetCreator = processor.getPacketCreator(); + Map inputSidePackets = new HashMap<>(); + inputSidePackets.put(INPUT_NUM_FACES_SIDE_PACKET_NAME, packetCreator.createInt32(NUM_FACES)); + processor.setInputSidePackets(inputSidePackets); + + processor.addPacketCallback( + OUTPUT_LANDMARKS_STREAM_NAME, + (packet) -> { + Log.d(TAG, "Received multi face landmarks packet."); + List multiFaceLandmarks = + PacketGetter.getProtoVector(packet, NormalizedLandmarkList.parser()); + Log.d( + TAG, + "[TS:" + + packet.getTimestamp() + + "] " + + getMultiFaceLandmarksDebugString(multiFaceLandmarks)); + }); + + PermissionHelper.checkAndRequestCameraPermissions(this); + } + + @Override + protected void onResume() { + super.onResume(); + converter = new ExternalTextureConverter(eglManager.getContext()); + converter.setFlipY(FLIP_FRAMES_VERTICALLY); + converter.setConsumer(processor); + if (PermissionHelper.cameraPermissionsGranted(this)) { + startCamera(); + } + } + + @Override + protected void onPause() { + super.onPause(); + converter.close(); + } + + @Override + public void onRequestPermissionsResult( + int requestCode, String[] permissions, int[] grantResults) { + super.onRequestPermissionsResult(requestCode, permissions, grantResults); + PermissionHelper.onRequestPermissionsResult(requestCode, permissions, grantResults); + } + + private void setupPreviewDisplayView() { + previewDisplayView.setVisibility(View.GONE); + ViewGroup viewGroup = findViewById(R.id.preview_display_layout); + viewGroup.addView(previewDisplayView); + + previewDisplayView + .getHolder() + .addCallback( + new SurfaceHolder.Callback() { + @Override + public void surfaceCreated(SurfaceHolder holder) { + processor.getVideoSurfaceOutput().setSurface(holder.getSurface()); + } + + @Override + public void surfaceChanged(SurfaceHolder holder, int format, int width, int height) { + // (Re-)Compute the ideal size of the camera-preview display (the area that the + // camera-preview frames get rendered onto, potentially with scaling and rotation) + // based on the size of the SurfaceView that contains the display. + Size viewSize = new Size(width, height); + Size displaySize = cameraHelper.computeDisplaySizeFromViewSize(viewSize); + boolean isCameraRotated = cameraHelper.isCameraRotated(); + + // Connect the converter to the camera-preview frames as its input (via + // previewFrameTexture), and configure the output width and height as the computed + // display size. + converter.setSurfaceTextureAndAttachToGLContext( + previewFrameTexture, + isCameraRotated ? displaySize.getHeight() : displaySize.getWidth(), + isCameraRotated ? displaySize.getWidth() : displaySize.getHeight()); + } + + @Override + public void surfaceDestroyed(SurfaceHolder holder) { + processor.getVideoSurfaceOutput().setSurface(null); + } + }); + } + + private void startCamera() { + cameraHelper = new CameraXPreviewHelper(); + cameraHelper.setOnCameraStartedListener( + surfaceTexture -> { + previewFrameTexture = surfaceTexture; + // Make the display view visible to start showing the preview. This triggers the + // SurfaceHolder.Callback added to (the holder of) previewDisplayView. + previewDisplayView.setVisibility(View.VISIBLE); + }); + cameraHelper.startCamera(this, CAMERA_FACING, /*surfaceTexture=*/ null); + } + + private static String getMultiFaceLandmarksDebugString( + List multiFaceLandmarks) { + if (multiFaceLandmarks.isEmpty()) { + return "No face landmarks"; + } + String multiFaceLandmarksStr = "Number of faces detected: " + multiFaceLandmarks.size() + "\n"; + int faceIndex = 0; + for (NormalizedLandmarkList landmarks : multiFaceLandmarks) { + multiFaceLandmarksStr += + "\t#Face landmarks for face[" + faceIndex + "]: " + landmarks.getLandmarkCount() + "\n"; + int landmarkIndex = 0; + for (NormalizedLandmark landmark : landmarks.getLandmarkList()) { + multiFaceLandmarksStr += + "\t\tLandmark [" + + landmarkIndex + + "]: (" + + landmark.getX() + + ", " + + landmark.getY() + + ", " + + landmark.getZ() + + ")\n"; + ++landmarkIndex; + } + ++faceIndex; + } + return multiFaceLandmarksStr; + } +} diff --git a/mediapipe/examples/android/src/java/com/google/mediapipe/apps/facemeshgpu/res/layout/activity_main.xml b/mediapipe/examples/android/src/java/com/google/mediapipe/apps/facemeshgpu/res/layout/activity_main.xml new file mode 100644 index 000000000..c19d7e628 --- /dev/null +++ b/mediapipe/examples/android/src/java/com/google/mediapipe/apps/facemeshgpu/res/layout/activity_main.xml @@ -0,0 +1,20 @@ + + + + + + + diff --git a/mediapipe/examples/android/src/java/com/google/mediapipe/apps/facemeshgpu/res/values/colors.xml b/mediapipe/examples/android/src/java/com/google/mediapipe/apps/facemeshgpu/res/values/colors.xml new file mode 100644 index 000000000..69b22338c --- /dev/null +++ b/mediapipe/examples/android/src/java/com/google/mediapipe/apps/facemeshgpu/res/values/colors.xml @@ -0,0 +1,6 @@ + + + #008577 + #00574B + #D81B60 + diff --git a/mediapipe/examples/android/src/java/com/google/mediapipe/apps/facemeshgpu/res/values/strings.xml b/mediapipe/examples/android/src/java/com/google/mediapipe/apps/facemeshgpu/res/values/strings.xml new file mode 100644 index 000000000..cdc0cf5c9 --- /dev/null +++ b/mediapipe/examples/android/src/java/com/google/mediapipe/apps/facemeshgpu/res/values/strings.xml @@ -0,0 +1,4 @@ + + Face Mesh GPU + Please grant camera permissions. + diff --git a/mediapipe/examples/android/src/java/com/google/mediapipe/apps/facemeshgpu/res/values/styles.xml b/mediapipe/examples/android/src/java/com/google/mediapipe/apps/facemeshgpu/res/values/styles.xml new file mode 100644 index 000000000..5885930df --- /dev/null +++ b/mediapipe/examples/android/src/java/com/google/mediapipe/apps/facemeshgpu/res/values/styles.xml @@ -0,0 +1,11 @@ + + + + + + diff --git a/mediapipe/examples/desktop/autoflip/autoflip_messages.proto b/mediapipe/examples/desktop/autoflip/autoflip_messages.proto index 492817ae1..e77a05f18 100644 --- a/mediapipe/examples/desktop/autoflip/autoflip_messages.proto +++ b/mediapipe/examples/desktop/autoflip/autoflip_messages.proto @@ -150,4 +150,34 @@ message ConversionOptions { optional int32 target_height = 2; } -// TODO: Move other autoflip messages into this area. +// Self-contained message that provides all needed information to render +// autoflip with an external renderer. One of these messages is required for +// each frame of the video. +message ExternalRenderFrame { + // Rectangle using opencv standard. + message Rect { + optional float x = 1; + optional float y = 2; + optional float width = 3; + optional float height = 4; + } + // RGB color [0...255] + message Color { + optional int32 r = 1; + optional int32 g = 2; + optional int32 b = 3; + } + // Rect that must be cropped out of the input frame. It is in the + // original dimensions of the input video. The first step to render this + // frame is to crop this rect from the input frame. + optional Rect crop_from_location = 1; + // The placement location where the above rect is placed on the output frame. + // This will always have the same aspect ratio as the above rect but scaling + // may be required. + optional Rect render_to_location = 2; + // If render_to_location is smaller than the output dimensions of the frame, + // fill the rest of the frame with this color. + optional Color padding_color = 3; + // Timestamp in microseconds of this frame. + optional uint64 timestamp_us = 4; +} diff --git a/mediapipe/examples/desktop/autoflip/calculators/scene_cropping_calculator.cc b/mediapipe/examples/desktop/autoflip/calculators/scene_cropping_calculator.cc index f8b8c8e6f..a81059a28 100644 --- a/mediapipe/examples/desktop/autoflip/calculators/scene_cropping_calculator.cc +++ b/mediapipe/examples/desktop/autoflip/calculators/scene_cropping_calculator.cc @@ -44,11 +44,19 @@ constexpr char kInputExternalSettings[] = "EXTERNAL_SETTINGS"; // TargetSizeType::MAXIMIZE_TARGET_DIMENSION constexpr char kAspectRatio[] = "EXTERNAL_ASPECT_RATIO"; +// Output the cropped frames, as well as visualization of crop regions and focus +// points. Note that, KEY_FRAME_CROP_REGION_VIZ_FRAMES and +// SALIENT_POINT_FRAME_VIZ_FRAMES can only be enabled when CROPPED_FRAMES is +// enabled. constexpr char kOutputCroppedFrames[] = "CROPPED_FRAMES"; constexpr char kOutputKeyFrameCropViz[] = "KEY_FRAME_CROP_REGION_VIZ_FRAMES"; constexpr char kOutputFocusPointFrameViz[] = "SALIENT_POINT_FRAME_VIZ_FRAMES"; constexpr char kOutputSummary[] = "CROPPING_SUMMARY"; +// External rendering outputs +constexpr char kExternalRenderingPerFrame[] = "EXTERNAL_RENDERING_PER_FRAME"; +constexpr char kExternalRenderingFullVid[] = "EXTERNAL_RENDERING_FULL_VID"; + ::mediapipe::Status SceneCroppingCalculator::GetContract( ::mediapipe::CalculatorContract* cc) { if (cc->InputSidePackets().HasTag(kInputExternalSettings)) { @@ -67,16 +75,36 @@ constexpr char kOutputSummary[] = "CROPPING_SUMMARY"; } cc->Inputs().Tag(kInputShotBoundaries).Set(); - cc->Outputs().Tag(kOutputCroppedFrames).Set(); + if (cc->Outputs().HasTag(kOutputCroppedFrames)) { + cc->Outputs().Tag(kOutputCroppedFrames).Set(); + } if (cc->Outputs().HasTag(kOutputKeyFrameCropViz)) { + RET_CHECK(cc->Outputs().HasTag(kOutputCroppedFrames)) + << "KEY_FRAME_CROP_REGION_VIZ_FRAMES can only be used when " + "CROPPED_FRAMES is specified."; cc->Outputs().Tag(kOutputKeyFrameCropViz).Set(); } if (cc->Outputs().HasTag(kOutputFocusPointFrameViz)) { + RET_CHECK(cc->Outputs().HasTag(kOutputCroppedFrames)) + << "SALIENT_POINT_FRAME_VIZ_FRAMES can only be used when " + "CROPPED_FRAMES is specified."; cc->Outputs().Tag(kOutputFocusPointFrameViz).Set(); } if (cc->Outputs().HasTag(kOutputSummary)) { cc->Outputs().Tag(kOutputSummary).Set(); } + if (cc->Outputs().HasTag(kExternalRenderingPerFrame)) { + cc->Outputs().Tag(kExternalRenderingPerFrame).Set(); + } + if (cc->Outputs().HasTag(kExternalRenderingFullVid)) { + cc->Outputs() + .Tag(kExternalRenderingFullVid) + .Set>(); + } + RET_CHECK(cc->Outputs().HasTag(kExternalRenderingPerFrame) || + cc->Outputs().HasTag(kExternalRenderingFullVid) || + cc->Outputs().HasTag(kOutputCroppedFrames)) + << "At leaset one output stream must be specified"; return ::mediapipe::OkStatus(); } @@ -104,6 +132,11 @@ constexpr char kOutputSummary[] = "CROPPING_SUMMARY"; if (cc->Outputs().HasTag(kOutputSummary)) { summary_ = absl::make_unique(); } + if (cc->Outputs().HasTag(kExternalRenderingFullVid)) { + external_render_list_ = + absl::make_unique>(); + } + should_perform_frame_cropping_ = cc->Outputs().HasTag(kOutputCroppedFrames); return ::mediapipe::OkStatus(); } @@ -127,6 +160,28 @@ namespace { *aspect_ratio = width_ratio / height_ratio; return ::mediapipe::OkStatus(); } +void ConstructExternalRenderMessage( + const cv::Rect& crop_from_location, const cv::Rect& render_to_location, + const cv::Scalar& padding_color, const uint64 timestamp_us, + ExternalRenderFrame* external_render_message) { + auto crop_from_message = + external_render_message->mutable_crop_from_location(); + crop_from_message->set_x(crop_from_location.x); + crop_from_message->set_y(crop_from_location.y); + crop_from_message->set_width(crop_from_location.width); + crop_from_message->set_height(crop_from_location.height); + auto render_to_message = + external_render_message->mutable_render_to_location(); + render_to_message->set_x(render_to_location.x); + render_to_message->set_y(render_to_location.y); + render_to_message->set_width(render_to_location.width); + render_to_message->set_height(render_to_location.height); + auto padding_color_message = external_render_message->mutable_padding_color(); + padding_color_message->set_r(padding_color[0]); + padding_color_message->set_g(padding_color[1]); + padding_color_message->set_b(padding_color[2]); + external_render_message->set_timestamp_us(timestamp_us); +} } // namespace ::mediapipe::Status SceneCroppingCalculator::Process( @@ -230,8 +285,9 @@ namespace { is_end_of_scene = cc->Inputs().Tag(kInputShotBoundaries).Get(); } const bool force_buffer_flush = - scene_frames_.size() >= options_.max_scene_size(); - if (!scene_frames_.empty() && (is_end_of_scene || force_buffer_flush)) { + scene_frame_timestamps_.size() >= options_.max_scene_size(); + if (!scene_frame_timestamps_.empty() && + (is_end_of_scene || force_buffer_flush)) { MP_RETURN_IF_ERROR(ProcessScene(is_end_of_scene, cc)); } @@ -240,11 +296,14 @@ namespace { LOG_EVERY_N(ERROR, 10) << "------------------------ (Breathing) Time(s): " << cc->Inputs().Tag(kInputVideoFrames).Value().Timestamp().Seconds(); - const auto& frame = cc->Inputs().Tag(kInputVideoFrames).Get(); - const cv::Mat frame_mat = formats::MatView(&frame); - cv::Mat copy_mat; - frame_mat.copyTo(copy_mat); - scene_frames_.push_back(copy_mat); + // Only buffer frames if |should_perform_frame_cropping_| is true. + if (should_perform_frame_cropping_) { + const auto& frame = cc->Inputs().Tag(kInputVideoFrames).Get(); + const cv::Mat frame_mat = formats::MatView(&frame); + cv::Mat copy_mat; + frame_mat.copyTo(copy_mat); + scene_frames_or_empty_.push_back(copy_mat); + } scene_frame_timestamps_.push_back(cc->InputTimestamp().Value()); is_key_frames_.push_back( !cc->Inputs().Tag(kInputDetections).Value().IsEmpty()); @@ -274,7 +333,7 @@ namespace { ::mediapipe::Status SceneCroppingCalculator::Close( ::mediapipe::CalculatorContext* cc) { - if (!scene_frames_.empty()) { + if (!scene_frame_timestamps_.empty()) { MP_RETURN_IF_ERROR(ProcessScene(/* is_end_of_scene = */ true, cc)); } if (cc->Outputs().HasTag(kOutputSummary)) { @@ -282,16 +341,25 @@ namespace { .Tag(kOutputSummary) .Add(summary_.release(), Timestamp::PostStream()); } + if (cc->Outputs().HasTag(kExternalRenderingFullVid)) { + cc->Outputs() + .Tag(kExternalRenderingFullVid) + .Add(external_render_list_.release(), Timestamp::PostStream()); + } return ::mediapipe::OkStatus(); } -::mediapipe::Status SceneCroppingCalculator::RemoveStaticBorders() { - int top_border_size = 0, bottom_border_size = 0; +// TODO: split this function into two, one for calculating the border +// sizes, the other for the actual removal of borders from the frames. +::mediapipe::Status SceneCroppingCalculator::RemoveStaticBorders( + int* top_border_size, int* bottom_border_size) { + *top_border_size = 0; + *bottom_border_size = 0; MP_RETURN_IF_ERROR(ComputeSceneStaticBordersSize( - static_features_, &top_border_size, &bottom_border_size)); + static_features_, top_border_size, bottom_border_size)); const double scale = static_cast(frame_height_) / key_frame_height_; - top_border_distance_ = std::round(scale * top_border_size); - const int bottom_border_distance = std::round(scale * bottom_border_size); + top_border_distance_ = std::round(scale * *top_border_size); + const int bottom_border_distance = std::round(scale * *bottom_border_size); effective_frame_height_ = frame_height_ - top_border_distance_ - bottom_border_distance; @@ -301,10 +369,10 @@ namespace { // Remove borders from frames. cv::Rect roi(0, top_border_distance_, frame_width_, effective_frame_height_); - for (int i = 0; i < scene_frames_.size(); ++i) { + for (int i = 0; i < scene_frames_or_empty_.size(); ++i) { cv::Mat tmp; - scene_frames_[i](roi).copyTo(tmp); - scene_frames_[i] = tmp; + scene_frames_or_empty_[i](roi).copyTo(tmp); + scene_frames_or_empty_[i] = tmp; } // Adjust detection bounding boxes. for (int i = 0; i < key_frame_infos_.size(); ++i) { @@ -373,7 +441,9 @@ void SceneCroppingCalculator::FilterKeyFrameInfo() { FilterKeyFrameInfo(); // Removes any static borders. - MP_RETURN_IF_ERROR(RemoveStaticBorders()); + int top_static_border_size, bottom_static_border_size; + MP_RETURN_IF_ERROR( + RemoveStaticBorders(&top_static_border_size, &bottom_static_border_size)); // Decides if solid background color padding is possible and sets up color // interpolation functions in CIELAB. Uses linear interpolation by default. @@ -409,21 +479,32 @@ void SceneCroppingCalculator::FilterKeyFrameInfo() { // Crops scene frames. std::vector cropped_frames; + std::vector crop_from_locations; + + auto* cropped_frames_ptr = + should_perform_frame_cropping_ ? &cropped_frames : nullptr; + MP_RETURN_IF_ERROR(scene_cropper_->CropFrames( - scene_summary, scene_frames_, focus_point_frames, - prior_focus_point_frames_, &cropped_frames)); + scene_summary, scene_frame_timestamps_.size(), scene_frames_or_empty_, + focus_point_frames, prior_focus_point_frames_, top_static_border_size, + bottom_static_border_size, &crop_from_locations, cropped_frames_ptr)); // Formats and outputs cropped frames. bool apply_padding = false; float vertical_fill_precent; - MP_RETURN_IF_ERROR(FormatAndOutputCroppedFrames( - cropped_frames, &apply_padding, &vertical_fill_precent, cc)); - + std::vector render_to_locations; + cv::Scalar padding_color; + if (should_perform_frame_cropping_) { + MP_RETURN_IF_ERROR(FormatAndOutputCroppedFrames( + cropped_frames, &render_to_locations, &apply_padding, &padding_color, + &vertical_fill_precent, cc)); + } // Caches prior FocusPointFrames if this was not the end of a scene. prior_focus_point_frames_.clear(); if (!is_end_of_scene) { - const int start = std::max(0, static_cast(scene_frames_.size()) - - options_.prior_frame_buffer_size()); + const int start = + std::max(0, static_cast(scene_frame_timestamps_.size()) - + options_.prior_frame_buffer_size()); for (int i = start; i < num_key_frames; ++i) { prior_focus_point_frames_.push_back(focus_point_frames[i]); } @@ -449,8 +530,31 @@ void SceneCroppingCalculator::FilterKeyFrameInfo() { scene_summary->set_is_padded(apply_padding); } + if (cc->Outputs().HasTag(kExternalRenderingPerFrame)) { + for (int i = 0; i < scene_frame_timestamps_.size(); i++) { + auto external_render_message = absl::make_unique(); + ConstructExternalRenderMessage( + crop_from_locations[i], render_to_locations[i], padding_color, + scene_frame_timestamps_[i], external_render_message.get()); + cc->Outputs() + .Tag(kExternalRenderingPerFrame) + .Add(external_render_message.release(), + Timestamp(scene_frame_timestamps_[i])); + } + } + + if (cc->Outputs().HasTag(kExternalRenderingFullVid)) { + for (int i = 0; i < scene_frame_timestamps_.size(); i++) { + ExternalRenderFrame render_frame; + ConstructExternalRenderMessage(crop_from_locations[i], + render_to_locations[i], padding_color, + scene_frame_timestamps_[i], &render_frame); + external_render_list_->push_back(render_frame); + } + } + key_frame_infos_.clear(); - scene_frames_.clear(); + scene_frames_or_empty_.clear(); scene_frame_timestamps_.clear(); is_key_frames_.clear(); static_features_.clear(); @@ -459,8 +563,10 @@ void SceneCroppingCalculator::FilterKeyFrameInfo() { } ::mediapipe::Status SceneCroppingCalculator::FormatAndOutputCroppedFrames( - const std::vector& cropped_frames, bool* apply_padding, - float* vertical_fill_precent, CalculatorContext* cc) { + const std::vector& cropped_frames, + std::vector* render_to_locations, bool* apply_padding, + cv::Scalar* padding_color, float* vertical_fill_precent, + CalculatorContext* cc) { RET_CHECK(apply_padding) << "Has padding boolean is null."; if (cropped_frames.empty()) { return ::mediapipe::OkStatus(); @@ -493,10 +599,22 @@ void SceneCroppingCalculator::FilterKeyFrameInfo() { << " target height = " << target_height_; } + // Compute the "render to" location. This is where the rect taken from the + // input video gets pasted on the output frame. For use with external + // rendering solutions. + const int num_frames = cropped_frames.size(); + for (int i = 0; i < num_frames; i++) { + if (*apply_padding) { + render_to_locations->push_back(padder_->ComputeOutputLocation()); + } else { + render_to_locations->push_back( + cv::Rect(0, 0, target_width_, target_height_)); + } + } + // Resizes cropped frames, pads frames, and output frames. cv::Scalar* background_color = nullptr; cv::Scalar interpolated_color; - const int num_frames = cropped_frames.size(); for (int i = 0; i < num_frames; ++i) { const int64 time_ms = scene_frame_timestamps_[i]; const Timestamp timestamp(time_ms); @@ -561,9 +679,9 @@ mediapipe::Status SceneCroppingCalculator::OutputVizFrames( if (cc->Outputs().HasTag(kOutputKeyFrameCropViz)) { std::vector> viz_frames; MP_RETURN_IF_ERROR(DrawDetectionsAndCropRegions( - scene_frames_, is_key_frames_, key_frame_infos_, key_frame_crop_results, - frame_format_, &viz_frames)); - for (int i = 0; i < scene_frames_.size(); ++i) { + scene_frames_or_empty_, is_key_frames_, key_frame_infos_, + key_frame_crop_results, frame_format_, &viz_frames)); + for (int i = 0; i < scene_frames_or_empty_.size(); ++i) { cc->Outputs() .Tag(kOutputKeyFrameCropViz) .Add(viz_frames[i].release(), Timestamp(scene_frame_timestamps_[i])); @@ -572,9 +690,10 @@ mediapipe::Status SceneCroppingCalculator::OutputVizFrames( if (cc->Outputs().HasTag(kOutputFocusPointFrameViz)) { std::vector> viz_frames; MP_RETURN_IF_ERROR(DrawFocusPointAndCropWindow( - scene_frames_, focus_point_frames, options_.viz_overlay_opacity(), - crop_window_width, crop_window_height, frame_format_, &viz_frames)); - for (int i = 0; i < scene_frames_.size(); ++i) { + scene_frames_or_empty_, focus_point_frames, + options_.viz_overlay_opacity(), crop_window_width, crop_window_height, + frame_format_, &viz_frames)); + for (int i = 0; i < scene_frames_or_empty_.size(); ++i) { cc->Outputs() .Tag(kOutputFocusPointFrameViz) .Add(viz_frames[i].release(), Timestamp(scene_frame_timestamps_[i])); diff --git a/mediapipe/examples/desktop/autoflip/calculators/scene_cropping_calculator.h b/mediapipe/examples/desktop/autoflip/calculators/scene_cropping_calculator.h index f910935bf..f467b3698 100644 --- a/mediapipe/examples/desktop/autoflip/calculators/scene_cropping_calculator.h +++ b/mediapipe/examples/desktop/autoflip/calculators/scene_cropping_calculator.h @@ -79,8 +79,10 @@ namespace autoflip { // Indicators for shot boundaries (output of shot boundary detection). // - optional tag KEY_FRAMES (type ImageFrame): // Key frames on which features are detected. This is only used to set the -// detection features frame size, and when it is omitted, the features frame -// size is assumed to be the original scene frame size. +// detection features frame size. Alternatively, set +// video_feature_width/video_features_height within the options proto to +// define this value. When neither is set, the features frame size is +// assumed to be the original scene frame size. // // Output streams: // - required tag CROPPED_FRAMES (type ImageFrame): @@ -95,6 +97,12 @@ namespace autoflip { // - optional tag CROPPING_SUMMARY (type VideoCroppingSummary): // Debug summary information for the video. Only generates one packet when // calculator closes. +// - optional tag EXTERNAL_RENDERING_PER_FRAME (type ExternalRenderFrame) +// Provides a per-frame message that can be used to render autoflip using an +// external renderer. +// - optional tag EXTERNAL_RENDERING_FULL_VID (type Vector) +// Provides an end-stream message that can be used to render autoflip using +// an external renderer. // // Example config: // node { @@ -134,8 +142,11 @@ class SceneCroppingCalculator : public CalculatorBase { ::mediapipe::Status Close(::mediapipe::CalculatorContext* cc) override; private: - // Removes any static borders from the scene frames before cropping. - ::mediapipe::Status RemoveStaticBorders(); + // Removes any static borders from the scene frames before cropping. The + // arguments |top_border_size| and |bottom_border_size| report the size of the + // removed borders. + ::mediapipe::Status RemoveStaticBorders(int* top_border_size, + int* bottom_border_size); // Initializes a FrameCropRegionComputer given input and target frame sizes. ::mediapipe::Status InitializeFrameCropRegionComputer(); @@ -158,8 +169,10 @@ class SceneCroppingCalculator : public CalculatorBase { // solid background from static features if possible, otherwise uses blurred // background. Sets apply_padding to true if the scene is padded. ::mediapipe::Status FormatAndOutputCroppedFrames( - const std::vector& cropped_frames, bool* apply_padding, - float* vertical_fill_precent, CalculatorContext* cc); + const std::vector& cropped_frames, + std::vector* render_to_locations, bool* apply_padding, + cv::Scalar* padding_color, float* vertical_fill_precent, + CalculatorContext* cc); // Draws and outputs visualization frames if those streams are present. ::mediapipe::Status OutputVizFrames( @@ -193,7 +206,11 @@ class SceneCroppingCalculator : public CalculatorBase { // Buffered frames, timestamps, and indicators for key frames in the current // scene (size = number of input video frames). - std::vector scene_frames_; + // Note: scene_frames_or_empty_ may be empty if the actual cropping operation + // of frames is turned off, e.g. when |should_perform_frame_cropping_| is + // false, so rely on scene_frame_timestamps_.size() to query the number of + // accumulated timestamps rather than scene_frames_or_empty_.size(). + std::vector scene_frames_or_empty_; std::vector scene_frame_timestamps_; std::vector is_key_frames_; @@ -242,6 +259,17 @@ class SceneCroppingCalculator : public CalculatorBase { // Optional diagnostic summary output emitted in Close(). std::unique_ptr summary_ = nullptr; + + // Optional list of external rendering messages for each processed frame. + std::unique_ptr> external_render_list_; + + // Determines whether to perform real cropping on input frames. This flag is + // useful when the user only needs to compute cropping windows, in which case + // setting this flag to false can avoid buffering as well as cropping frames. + // This can significantly reduce memory usage and speed up processing. Some + // debugging visualization inevitably will be disabled because of this flag + // too. + bool should_perform_frame_cropping_ = false; }; } // namespace autoflip } // namespace mediapipe diff --git a/mediapipe/examples/desktop/autoflip/calculators/scene_cropping_calculator_test.cc b/mediapipe/examples/desktop/autoflip/calculators/scene_cropping_calculator_test.cc index bb7ee2333..7c9f5009f 100644 --- a/mediapipe/examples/desktop/autoflip/calculators/scene_cropping_calculator_test.cc +++ b/mediapipe/examples/desktop/autoflip/calculators/scene_cropping_calculator_test.cc @@ -68,6 +68,22 @@ constexpr char kNoKeyFrameConfig[] = R"( } })"; +constexpr char kDebugConfigNoCroppedFrame[] = R"( + calculator: "SceneCroppingCalculator" + input_stream: "VIDEO_FRAMES:camera_frames_org" + input_stream: "KEY_FRAMES:down_sampled_frames" + input_stream: "DETECTION_FEATURES:salient_regions" + input_stream: "STATIC_FEATURES:border_features" + input_stream: "SHOT_BOUNDARIES:shot_boundary_frames" + output_stream: "KEY_FRAME_CROP_REGION_VIZ_FRAMES:key_frame_crop_viz_frames" + output_stream: "SALIENT_POINT_FRAME_VIZ_FRAMES:salient_point_viz_frames" + options: { + [mediapipe.autoflip.SceneCroppingCalculatorOptions.ext]: { + target_width: $0 + target_height: $1 + } + })"; + constexpr char kDebugConfig[] = R"( calculator: "SceneCroppingCalculator" input_stream: "VIDEO_FRAMES:camera_frames_org" @@ -79,6 +95,8 @@ constexpr char kDebugConfig[] = R"( output_stream: "KEY_FRAME_CROP_REGION_VIZ_FRAMES:key_frame_crop_viz_frames" output_stream: "SALIENT_POINT_FRAME_VIZ_FRAMES:salient_point_viz_frames" output_stream: "CROPPING_SUMMARY:cropping_summaries" + output_stream: "EXTERNAL_RENDERING_PER_FRAME:external_rendering_per_frame" + output_stream: "EXTERNAL_RENDERING_FULL_VID:external_rendering_full_vid" options: { [mediapipe.autoflip.SceneCroppingCalculatorOptions.ext]: { target_width: $0 @@ -257,6 +275,17 @@ TEST(SceneCroppingCalculatorTest, ChecksPriorFrameBufferSize) { HasSubstr("Prior frame buffer size is negative.")); } +TEST(SceneCroppingCalculatorTest, ChecksDebugConfigWithoutCroppedFrame) { + const CalculatorGraphConfig::Node config = + ParseTextProtoOrDie(absl::Substitute( + kDebugConfigNoCroppedFrame, kTargetWidth, kTargetHeight, + kTargetSizeType, 0, kPriorFrameBufferSize)); + auto runner = absl::make_unique(config); + const auto status = runner->Run(); + EXPECT_FALSE(status.ok()); + EXPECT_THAT(status.ToString(), HasSubstr("can only be used when")); +} + // Checks that the calculator crops scene frames when there is no input key // frames stream. TEST(SceneCroppingCalculatorTest, HandlesNoKeyFrames) { @@ -299,14 +328,34 @@ TEST(SceneCroppingCalculatorTest, OutputsDebugStreams) { EXPECT_TRUE(outputs.HasTag("KEY_FRAME_CROP_REGION_VIZ_FRAMES")); EXPECT_TRUE(outputs.HasTag("SALIENT_POINT_FRAME_VIZ_FRAMES")); EXPECT_TRUE(outputs.HasTag("CROPPING_SUMMARY")); + EXPECT_TRUE(outputs.HasTag("EXTERNAL_RENDERING_PER_FRAME")); + EXPECT_TRUE(outputs.HasTag("EXTERNAL_RENDERING_FULL_VID")); const auto& crop_region_viz_frames_outputs = outputs.Tag("KEY_FRAME_CROP_REGION_VIZ_FRAMES").packets; const auto& salient_point_viz_frames_outputs = outputs.Tag("SALIENT_POINT_FRAME_VIZ_FRAMES").packets; const auto& summary_output = outputs.Tag("CROPPING_SUMMARY").packets; + const auto& ext_render_per_frame = + outputs.Tag("EXTERNAL_RENDERING_PER_FRAME").packets; + const auto& ext_render_full_vid = + outputs.Tag("EXTERNAL_RENDERING_FULL_VID").packets; EXPECT_EQ(crop_region_viz_frames_outputs.size(), num_frames); EXPECT_EQ(salient_point_viz_frames_outputs.size(), num_frames); EXPECT_EQ(summary_output.size(), 1); + EXPECT_EQ(ext_render_per_frame.size(), num_frames); + EXPECT_EQ(ext_render_full_vid.size(), 1); + EXPECT_EQ(ext_render_per_frame[0].Get().timestamp_us(), + 0); + EXPECT_EQ(ext_render_full_vid[0] + .Get>()[0] + .timestamp_us(), + 0); + EXPECT_EQ(ext_render_per_frame[1].Get().timestamp_us(), + 20000); + EXPECT_EQ(ext_render_full_vid[0] + .Get>()[1] + .timestamp_us(), + 20000); for (int i = 0; i < num_frames; ++i) { const auto& crop_region_viz_frame = diff --git a/mediapipe/examples/desktop/autoflip/quality/padding_effect_generator.cc b/mediapipe/examples/desktop/autoflip/quality/padding_effect_generator.cc index ef9f80f37..3da821f08 100644 --- a/mediapipe/examples/desktop/autoflip/quality/padding_effect_generator.cc +++ b/mediapipe/examples/desktop/autoflip/quality/padding_effect_generator.cc @@ -173,5 +173,28 @@ PaddingEffectGenerator::PaddingEffectGenerator(const int input_width, return ::mediapipe::OkStatus(); } +cv::Rect PaddingEffectGenerator::ComputeOutputLocation() { + const int effective_input_width = + is_vertical_padding_ ? input_width_ : input_height_; + const int effective_input_height = + is_vertical_padding_ ? input_height_ : input_width_; + const int effective_output_width = + is_vertical_padding_ ? output_width_ : output_height_; + const int effective_output_height = + is_vertical_padding_ ? output_height_ : output_width_; + + // Step 3 from "process" call above, compute foreground location. + const int foreground_height = + effective_input_height * effective_output_width / effective_input_width; + const int x = 0; + const int y = (effective_output_height - foreground_height) / 2; + const int width = effective_output_width; + const int height = foreground_height; + + cv::Rect region_to_embed_foreground(x, y, width, height); + + return region_to_embed_foreground; +} + } // namespace autoflip } // namespace mediapipe diff --git a/mediapipe/examples/desktop/autoflip/quality/padding_effect_generator.h b/mediapipe/examples/desktop/autoflip/quality/padding_effect_generator.h index 445dadd00..679f01a68 100644 --- a/mediapipe/examples/desktop/autoflip/quality/padding_effect_generator.h +++ b/mediapipe/examples/desktop/autoflip/quality/padding_effect_generator.h @@ -55,6 +55,10 @@ class PaddingEffectGenerator { ImageFrame* output_frame, const cv::Scalar* background_color_in_rgb = nullptr); + // Compute the "render location" on the output frame where the "crop from" + // location is to be placed. For use with external rendering soutions. + cv::Rect ComputeOutputLocation(); + private: double target_aspect_ratio_; int input_width_ = -1; diff --git a/mediapipe/examples/desktop/autoflip/quality/padding_effect_generator_test.cc b/mediapipe/examples/desktop/autoflip/quality/padding_effect_generator_test.cc index 6a1a5e7af..0bf5c0960 100644 --- a/mediapipe/examples/desktop/autoflip/quality/padding_effect_generator_test.cc +++ b/mediapipe/examples/desktop/autoflip/quality/padding_effect_generator_test.cc @@ -182,6 +182,16 @@ TEST(PaddingEffectGeneratorTest, ScaleToMultipleOfTwo) { EXPECT_EQ(result_frame.Width(), expect_width); EXPECT_EQ(result_frame.Height(), expect_height); } + +TEST(PaddingEffectGeneratorTest, ComputeOutputLocation) { + PaddingEffectGenerator generator(1920, 1080, 1.0); + + auto result_rect = generator.ComputeOutputLocation(); + EXPECT_EQ(result_rect.x, 0); + EXPECT_EQ(result_rect.y, 236); + EXPECT_EQ(result_rect.width, 1080); + EXPECT_EQ(result_rect.height, 607); +} } // namespace } // namespace autoflip } // namespace mediapipe diff --git a/mediapipe/examples/desktop/autoflip/quality/scene_cropper.cc b/mediapipe/examples/desktop/autoflip/quality/scene_cropper.cc index 65eaa2bf8..d70b7d677 100644 --- a/mediapipe/examples/desktop/autoflip/quality/scene_cropper.cc +++ b/mediapipe/examples/desktop/autoflip/quality/scene_cropper.cc @@ -25,14 +25,13 @@ namespace mediapipe { namespace autoflip { ::mediapipe::Status SceneCropper::CropFrames( - const SceneKeyFrameCropSummary& scene_summary, - const std::vector& scene_frames, + const SceneKeyFrameCropSummary& scene_summary, const int num_scene_frames, + const std::vector& scene_frames_or_empty, const std::vector& focus_point_frames, const std::vector& prior_focus_point_frames, + int top_static_border_size, int bottom_static_border_size, + std::vector* crop_from_location, std::vector* cropped_frames) const { - RET_CHECK_NE(cropped_frames, nullptr) << "Output cropped frames is null."; - - const int num_scene_frames = scene_frames.size(); RET_CHECK_GT(num_scene_frames, 0) << "No scene frames."; RET_CHECK_EQ(focus_point_frames.size(), num_scene_frames) << "Wrong size of FocusPointFrames."; @@ -69,15 +68,36 @@ namespace autoflip { xform = affine_opencv; } + // If no cropped_frames is passed in, return directly. + if (!cropped_frames) { + return ::mediapipe::OkStatus(); + } + RET_CHECK(!scene_frames_or_empty.empty()) + << "If |cropped_frames| != nullptr, scene_frames_or_empty must not be " + "empty."; // Prepares cropped frames. cropped_frames->resize(num_scene_frames); for (int i = 0; i < num_scene_frames; ++i) { - (*cropped_frames)[i] = - cv::Mat::zeros(crop_height, crop_width, scene_frames[i].type()); + (*cropped_frames)[i] = cv::Mat::zeros(crop_height, crop_width, + scene_frames_or_empty[i].type()); } - return AffineRetarget(cv::Size(crop_width, crop_height), scene_frames, - scene_frame_xforms, cropped_frames); + // Store the "crop from" location on the input frame for use with an external + // renderer. + for (int i = 0; i < num_scene_frames; i++) { + const int left = scene_frame_xforms[i].at(0, 2); + const int right = left + crop_width; + const int top = top_static_border_size; + const int bottom = + top_static_border_size + + (crop_height - top_static_border_size - bottom_static_border_size); + crop_from_location->push_back( + cv::Rect(left, top, right - left, bottom - top)); + } + + return AffineRetarget(cv::Size(crop_width, crop_height), + scene_frames_or_empty, scene_frame_xforms, + cropped_frames); } } // namespace autoflip diff --git a/mediapipe/examples/desktop/autoflip/quality/scene_cropper.h b/mediapipe/examples/desktop/autoflip/quality/scene_cropper.h index 49c7293a0..0235eb1b2 100644 --- a/mediapipe/examples/desktop/autoflip/quality/scene_cropper.h +++ b/mediapipe/examples/desktop/autoflip/quality/scene_cropper.h @@ -48,14 +48,19 @@ class SceneCropper { SceneCropper() {} ~SceneCropper() {} - // Crops scene frames given SceneKeyFrameCropSummary, FocusPointFrames, and - // any prior FocusPointFrames (to ensure smoothness when there was no actual - // scene change). + // Computes transformation matrix given SceneKeyFrameCropSummary, + // FocusPointFrames, and any prior FocusPointFrames (to ensure smoothness when + // there was no actual scene change). Optionally crops the input frames based + // on the transform matrix if |cropped_frames| is not nullptr and + // |scene_frames_or_empty| isn't empty. + // TODO: split this function into two separate functions. ::mediapipe::Status CropFrames( - const SceneKeyFrameCropSummary& scene_summary, - const std::vector& scene_frames, + const SceneKeyFrameCropSummary& scene_summary, const int num_scene_frames, + const std::vector& scene_frames_or_empty, const std::vector& focus_point_frames, const std::vector& prior_focus_point_frames, + int top_static_border_size, int bottom_static_border_size, + std::vector* all_scene_frame_xforms, std::vector* cropped_frames) const; }; diff --git a/mediapipe/examples/desktop/autoflip/quality/scene_cropper_test.cc b/mediapipe/examples/desktop/autoflip/quality/scene_cropper_test.cc index 8efdb7d35..6c7dc3e41 100644 --- a/mediapipe/examples/desktop/autoflip/quality/scene_cropper_test.cc +++ b/mediapipe/examples/desktop/autoflip/quality/scene_cropper_test.cc @@ -71,24 +71,16 @@ std::vector GetDefaultFocusPointFrames() { return GetFocusPointFrames(kNumSceneFrames); } -// Checks that CropFrames checks output pointer is not null. -TEST(SceneCropperTest, CropFramesChecksOutputNotNull) { - SceneCropper scene_cropper; - const auto status = scene_cropper.CropFrames( - GetDefaultSceneKeyFrameCropSummary(), GetDefaultSceneFrames(), - GetDefaultFocusPointFrames(), GetFocusPointFrames(0), nullptr); - EXPECT_FALSE(status.ok()); - EXPECT_THAT(status.ToString(), HasSubstr("Output cropped frames is null.")); -} - // Checks that CropFrames checks that scene frames size is positive. TEST(SceneCropperTest, CropFramesChecksSceneFramesSize) { SceneCropper scene_cropper; std::vector scene_frames(0); std::vector cropped_frames; + std::vector crop_from_locations; const auto status = scene_cropper.CropFrames( - GetDefaultSceneKeyFrameCropSummary(), scene_frames, - GetDefaultFocusPointFrames(), GetFocusPointFrames(0), &cropped_frames); + GetDefaultSceneKeyFrameCropSummary(), scene_frames.size(), scene_frames, + GetDefaultFocusPointFrames(), GetFocusPointFrames(0), 0, 0, + &crop_from_locations, &cropped_frames); EXPECT_FALSE(status.ok()); EXPECT_THAT(status.ToString(), HasSubstr("No scene frames.")); } @@ -97,10 +89,12 @@ TEST(SceneCropperTest, CropFramesChecksSceneFramesSize) { TEST(SceneCropperTest, CropFramesChecksFocusPointFramesSize) { SceneCropper scene_cropper; std::vector cropped_frames; + std::vector crop_from_locations; + const auto& scene_frames = GetDefaultSceneFrames(); const auto status = scene_cropper.CropFrames( - GetDefaultSceneKeyFrameCropSummary(), GetDefaultSceneFrames(), - GetFocusPointFrames(kNumSceneFrames - 1), GetFocusPointFrames(0), - &cropped_frames); + GetDefaultSceneKeyFrameCropSummary(), scene_frames.size(), scene_frames, + GetFocusPointFrames(kNumSceneFrames - 1), GetFocusPointFrames(0), 0, 0, + &crop_from_locations, &cropped_frames); EXPECT_FALSE(status.ok()); EXPECT_THAT(status.ToString(), HasSubstr("Wrong size of FocusPointFrames")); } @@ -111,9 +105,12 @@ TEST(SceneCropperTest, CropFramesChecksCropSizePositive) { scene_summary.set_crop_window_width(-1); SceneCropper scene_cropper; std::vector cropped_frames; + std::vector crop_from_locations; + const auto& scene_frames = GetDefaultSceneFrames(); const auto status = scene_cropper.CropFrames( - scene_summary, GetDefaultSceneFrames(), GetDefaultFocusPointFrames(), - GetFocusPointFrames(0), &cropped_frames); + scene_summary, scene_frames.size(), scene_frames, + GetDefaultFocusPointFrames(), GetFocusPointFrames(0), 0, 0, + &crop_from_locations, &cropped_frames); EXPECT_FALSE(status.ok()); EXPECT_THAT(status.ToString(), HasSubstr("Crop width is non-positive.")); } @@ -124,9 +121,12 @@ TEST(SceneCropperTest, InitializesRetargeterChecksCropSizeNotExceedFrameSize) { scene_summary.set_crop_window_height(kSceneHeight + 1); SceneCropper scene_cropper; std::vector cropped_frames; + std::vector crop_from_locations; + const auto& scene_frames = GetDefaultSceneFrames(); const auto status = scene_cropper.CropFrames( - scene_summary, GetDefaultSceneFrames(), GetDefaultFocusPointFrames(), - GetFocusPointFrames(0), &cropped_frames); + scene_summary, scene_frames.size(), scene_frames, + GetDefaultFocusPointFrames(), GetFocusPointFrames(0), 0, 0, + &crop_from_locations, &cropped_frames); EXPECT_FALSE(status.ok()); EXPECT_THAT(status.ToString(), HasSubstr("Crop height exceeds frame height.")); @@ -136,9 +136,12 @@ TEST(SceneCropperTest, InitializesRetargeterChecksCropSizeNotExceedFrameSize) { TEST(SceneCropperTest, CropFramesWorksWithoutPriorFocusPointFrames) { SceneCropper scene_cropper; std::vector cropped_frames; + std::vector crop_from_locations; + const auto& scene_frames = GetDefaultSceneFrames(); MP_ASSERT_OK(scene_cropper.CropFrames( - GetDefaultSceneKeyFrameCropSummary(), GetDefaultSceneFrames(), - GetDefaultFocusPointFrames(), GetFocusPointFrames(0), &cropped_frames)); + GetDefaultSceneKeyFrameCropSummary(), scene_frames.size(), scene_frames, + GetDefaultFocusPointFrames(), GetFocusPointFrames(0), 0, 0, + &crop_from_locations, &cropped_frames)); ASSERT_EQ(cropped_frames.size(), kNumSceneFrames); for (int i = 0; i < kNumSceneFrames; ++i) { EXPECT_EQ(cropped_frames[i].rows, kCropHeight); @@ -150,9 +153,12 @@ TEST(SceneCropperTest, CropFramesWorksWithoutPriorFocusPointFrames) { TEST(SceneCropperTest, CropFramesWorksWithPriorFocusPointFrames) { SceneCropper scene_cropper; std::vector cropped_frames; + std::vector crop_from_locations; + const auto& scene_frames = GetDefaultSceneFrames(); MP_EXPECT_OK(scene_cropper.CropFrames( - GetDefaultSceneKeyFrameCropSummary(), GetDefaultSceneFrames(), - GetDefaultFocusPointFrames(), GetFocusPointFrames(3), &cropped_frames)); + GetDefaultSceneKeyFrameCropSummary(), scene_frames.size(), scene_frames, + GetDefaultFocusPointFrames(), GetFocusPointFrames(3), 0, 0, + &crop_from_locations, &cropped_frames)); EXPECT_EQ(cropped_frames.size(), kNumSceneFrames); for (int i = 0; i < kNumSceneFrames; ++i) { EXPECT_EQ(cropped_frames[i].rows, kCropHeight); diff --git a/mediapipe/examples/desktop/face_mesh/BUILD b/mediapipe/examples/desktop/face_mesh/BUILD new file mode 100644 index 000000000..268d590ef --- /dev/null +++ b/mediapipe/examples/desktop/face_mesh/BUILD @@ -0,0 +1,42 @@ +# Copyright 2019 The MediaPipe Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +licenses(["notice"]) # Apache 2.0 + +package(default_visibility = ["//mediapipe/examples:__subpackages__"]) + +cc_binary( + name = "face_mesh_tflite", + deps = [ + "//mediapipe/examples/desktop:simple_run_graph_main", + "//mediapipe/graphs/face_mesh:desktop_calculators", + ], +) + +cc_binary( + name = "face_mesh_cpu", + deps = [ + "//mediapipe/examples/desktop:demo_run_graph_main", + "//mediapipe/graphs/face_mesh:desktop_live_calculators", + ], +) + +# Linux only +cc_binary( + name = "face_mesh_gpu", + deps = [ + "//mediapipe/examples/desktop:demo_run_graph_main_gpu", + "//mediapipe/graphs/face_mesh:desktop_live_gpu_calculators", + ], +) diff --git a/mediapipe/examples/ios/facemeshgpu/AppDelegate.h b/mediapipe/examples/ios/facemeshgpu/AppDelegate.h new file mode 100644 index 000000000..6b0377ef2 --- /dev/null +++ b/mediapipe/examples/ios/facemeshgpu/AppDelegate.h @@ -0,0 +1,21 @@ +// Copyright 2019 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#import + +@interface AppDelegate : UIResponder + +@property(strong, nonatomic) UIWindow *window; + +@end diff --git a/mediapipe/examples/ios/facemeshgpu/AppDelegate.m b/mediapipe/examples/ios/facemeshgpu/AppDelegate.m new file mode 100644 index 000000000..9e1b7ff0e --- /dev/null +++ b/mediapipe/examples/ios/facemeshgpu/AppDelegate.m @@ -0,0 +1,59 @@ +// Copyright 2019 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#import "AppDelegate.h" + +@interface AppDelegate () + +@end + +@implementation AppDelegate + +- (BOOL)application:(UIApplication *)application + didFinishLaunchingWithOptions:(NSDictionary *)launchOptions { + // Override point for customization after application launch. + return YES; +} + +- (void)applicationWillResignActive:(UIApplication *)application { + // Sent when the application is about to move from active to inactive state. This can occur for + // certain types of temporary interruptions (such as an incoming phone call or SMS message) or + // when the user quits the application and it begins the transition to the background state. Use + // this method to pause ongoing tasks, disable timers, and invalidate graphics rendering + // callbacks. Games should use this method to pause the game. +} + +- (void)applicationDidEnterBackground:(UIApplication *)application { + // Use this method to release shared resources, save user data, invalidate timers, and store + // enough application state information to restore your application to its current state in case + // it is terminated later. If your application supports background execution, this method is + // called instead of applicationWillTerminate: when the user quits. +} + +- (void)applicationWillEnterForeground:(UIApplication *)application { + // Called as part of the transition from the background to the active state; here you can undo + // many of the changes made on entering the background. +} + +- (void)applicationDidBecomeActive:(UIApplication *)application { + // Restart any tasks that were paused (or not yet started) while the application was inactive. If + // the application was previously in the background, optionally refresh the user interface. +} + +- (void)applicationWillTerminate:(UIApplication *)application { + // Called when the application is about to terminate. Save data if appropriate. See also + // applicationDidEnterBackground:. +} + +@end diff --git a/mediapipe/examples/ios/facemeshgpu/Assets.xcassets/AppIcon.appiconset/Contents.json b/mediapipe/examples/ios/facemeshgpu/Assets.xcassets/AppIcon.appiconset/Contents.json new file mode 100644 index 000000000..a1895a242 --- /dev/null +++ b/mediapipe/examples/ios/facemeshgpu/Assets.xcassets/AppIcon.appiconset/Contents.json @@ -0,0 +1,99 @@ +{ + "images" : [ + { + "idiom" : "iphone", + "size" : "20x20", + "scale" : "2x" + }, + { + "idiom" : "iphone", + "size" : "20x20", + "scale" : "3x" + }, + { + "idiom" : "iphone", + "size" : "29x29", + "scale" : "2x" + }, + { + "idiom" : "iphone", + "size" : "29x29", + "scale" : "3x" + }, + { + "idiom" : "iphone", + "size" : "40x40", + "scale" : "2x" + }, + { + "idiom" : "iphone", + "size" : "40x40", + "scale" : "3x" + }, + { + "idiom" : "iphone", + "size" : "60x60", + "scale" : "2x" + }, + { + "idiom" : "iphone", + "size" : "60x60", + "scale" : "3x" + }, + { + "idiom" : "ipad", + "size" : "20x20", + "scale" : "1x" + }, + { + "idiom" : "ipad", + "size" : "20x20", + "scale" : "2x" + }, + { + "idiom" : "ipad", + "size" : "29x29", + "scale" : "1x" + }, + { + "idiom" : "ipad", + "size" : "29x29", + "scale" : "2x" + }, + { + "idiom" : "ipad", + "size" : "40x40", + "scale" : "1x" + }, + { + "idiom" : "ipad", + "size" : "40x40", + "scale" : "2x" + }, + { + "idiom" : "ipad", + "size" : "76x76", + "scale" : "1x" + }, + { + "idiom" : "ipad", + "size" : "76x76", + "scale" : "2x" + }, + { + "idiom" : "ipad", + "size" : "83.5x83.5", + "scale" : "2x" + }, + { + "idiom" : "ios-marketing", + "size" : "1024x1024", + "scale" : "1x" + } + ], + "info" : { + "version" : 1, + "author" : "xcode" + } +} + diff --git a/mediapipe/examples/ios/facemeshgpu/Assets.xcassets/Contents.json b/mediapipe/examples/ios/facemeshgpu/Assets.xcassets/Contents.json new file mode 100644 index 000000000..7afcdfaf8 --- /dev/null +++ b/mediapipe/examples/ios/facemeshgpu/Assets.xcassets/Contents.json @@ -0,0 +1,7 @@ +{ + "info" : { + "version" : 1, + "author" : "xcode" + } +} + diff --git a/mediapipe/examples/ios/facemeshgpu/BUILD b/mediapipe/examples/ios/facemeshgpu/BUILD new file mode 100644 index 000000000..5c9df3feb --- /dev/null +++ b/mediapipe/examples/ios/facemeshgpu/BUILD @@ -0,0 +1,76 @@ +# Copyright 2019 The MediaPipe Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +load( + "@build_bazel_rules_apple//apple:ios.bzl", + "ios_application", +) + +licenses(["notice"]) # Apache 2.0 + +MIN_IOS_VERSION = "10.0" + +ios_application( + name = "FaceMeshGpuApp", + bundle_id = "com.google.mediapipe.FaceMeshGpu", + families = [ + "iphone", + "ipad", + ], + infoplists = ["Info.plist"], + minimum_os_version = MIN_IOS_VERSION, + provisioning_profile = "//mediapipe/examples/ios:provisioning_profile", + deps = [ + ":FaceMeshGpuAppLibrary", + "@ios_opencv//:OpencvFramework", + ], +) + +objc_library( + name = "FaceMeshGpuAppLibrary", + srcs = [ + "AppDelegate.m", + "ViewController.mm", + "main.m", + ], + hdrs = [ + "AppDelegate.h", + "ViewController.h", + ], + data = [ + "Base.lproj/LaunchScreen.storyboard", + "Base.lproj/Main.storyboard", + "//mediapipe/graphs/face_mesh:face_mesh_mobile_gpu_binary_graph", + "//mediapipe/modules/face_detection:face_detection_front.tflite", + "//mediapipe/modules/face_landmark:face_landmark.tflite", + ], + sdk_frameworks = [ + "AVFoundation", + "CoreGraphics", + "CoreMedia", + "UIKit", + ], + deps = [ + "//mediapipe/objc:mediapipe_framework_ios", + "//mediapipe/objc:mediapipe_input_sources_ios", + "//mediapipe/objc:mediapipe_layer_renderer", + ] + select({ + "//mediapipe:ios_i386": [], + "//mediapipe:ios_x86_64": [], + "//conditions:default": [ + "//mediapipe/graphs/face_mesh:mobile_calculators", + "//mediapipe/framework/formats:landmark_cc_proto", + ], + }), +) diff --git a/mediapipe/examples/ios/facemeshgpu/Base.lproj/LaunchScreen.storyboard b/mediapipe/examples/ios/facemeshgpu/Base.lproj/LaunchScreen.storyboard new file mode 100644 index 000000000..bfa361294 --- /dev/null +++ b/mediapipe/examples/ios/facemeshgpu/Base.lproj/LaunchScreen.storyboard @@ -0,0 +1,25 @@ + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/mediapipe/examples/ios/facemeshgpu/Base.lproj/Main.storyboard b/mediapipe/examples/ios/facemeshgpu/Base.lproj/Main.storyboard new file mode 100644 index 000000000..e3bd912a4 --- /dev/null +++ b/mediapipe/examples/ios/facemeshgpu/Base.lproj/Main.storyboard @@ -0,0 +1,51 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/mediapipe/examples/ios/facemeshgpu/Info.plist b/mediapipe/examples/ios/facemeshgpu/Info.plist new file mode 100644 index 000000000..30db14c62 --- /dev/null +++ b/mediapipe/examples/ios/facemeshgpu/Info.plist @@ -0,0 +1,42 @@ + + + + + NSCameraUsageDescription + This app uses the camera to demonstrate live video processing. + CFBundleDevelopmentRegion + en + CFBundleExecutable + $(EXECUTABLE_NAME) + CFBundleIdentifier + $(PRODUCT_BUNDLE_IDENTIFIER) + CFBundleInfoDictionaryVersion + 6.0 + CFBundleName + $(PRODUCT_NAME) + CFBundlePackageType + APPL + CFBundleShortVersionString + 1.0 + CFBundleVersion + 1 + LSRequiresIPhoneOS + + UILaunchStoryboardName + LaunchScreen + UIMainStoryboardFile + Main + UIRequiredDeviceCapabilities + + armv7 + + UISupportedInterfaceOrientations + + UIInterfaceOrientationPortrait + + UISupportedInterfaceOrientations~ipad + + UIInterfaceOrientationPortrait + + + diff --git a/mediapipe/examples/ios/facemeshgpu/ViewController.h b/mediapipe/examples/ios/facemeshgpu/ViewController.h new file mode 100644 index 000000000..e0a5a6367 --- /dev/null +++ b/mediapipe/examples/ios/facemeshgpu/ViewController.h @@ -0,0 +1,19 @@ +// Copyright 2019 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#import + +@interface ViewController : UIViewController + +@end diff --git a/mediapipe/examples/ios/facemeshgpu/ViewController.mm b/mediapipe/examples/ios/facemeshgpu/ViewController.mm new file mode 100644 index 000000000..c2beca30c --- /dev/null +++ b/mediapipe/examples/ios/facemeshgpu/ViewController.mm @@ -0,0 +1,210 @@ +// Copyright 2019 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#import "ViewController.h" + +#import "mediapipe/objc/MPPCameraInputSource.h" +#import "mediapipe/objc/MPPGraph.h" +#import "mediapipe/objc/MPPLayerRenderer.h" + +#include "mediapipe/framework/formats/landmark.pb.h" + +static NSString* const kGraphName = @"face_mesh_mobile_gpu"; + +static const char* kInputStream = "input_video"; +static const char* kNumFacesInputSidePacket = "num_faces"; +static const char* kOutputStream = "output_video"; +static const char* kLandmarksOutputStream = "multi_face_landmarks"; +static const char* kVideoQueueLabel = "com.google.mediapipe.example.videoQueue"; + +// Max number of faces to detect/process. +static const int kNumFaces = 1; + +@interface ViewController () + +// The MediaPipe graph currently in use. Initialized in viewDidLoad, started in viewWillAppear: and +// sent video frames on _videoQueue. +@property(nonatomic) MPPGraph* mediapipeGraph; + +@end + +@implementation ViewController { + /// Handles camera access via AVCaptureSession library. + MPPCameraInputSource* _cameraSource; + + /// Inform the user when camera is unavailable. + IBOutlet UILabel* _noCameraLabel; + /// Display the camera preview frames. + IBOutlet UIView* _liveView; + /// Render frames in a layer. + MPPLayerRenderer* _renderer; + + /// Process camera frames on this queue. + dispatch_queue_t _videoQueue; +} + +#pragma mark - Cleanup methods + +- (void)dealloc { + self.mediapipeGraph.delegate = nil; + [self.mediapipeGraph cancel]; + // Ignore errors since we're cleaning up. + [self.mediapipeGraph closeAllInputStreamsWithError:nil]; + [self.mediapipeGraph waitUntilDoneWithError:nil]; +} + +#pragma mark - MediaPipe graph methods + ++ (MPPGraph*)loadGraphFromResource:(NSString*)resource { + // Load the graph config resource. + NSError* configLoadError = nil; + NSBundle* bundle = [NSBundle bundleForClass:[self class]]; + if (!resource || resource.length == 0) { + return nil; + } + NSURL* graphURL = [bundle URLForResource:resource withExtension:@"binarypb"]; + NSData* data = [NSData dataWithContentsOfURL:graphURL options:0 error:&configLoadError]; + if (!data) { + NSLog(@"Failed to load MediaPipe graph config: %@", configLoadError); + return nil; + } + + // Parse the graph config resource into mediapipe::CalculatorGraphConfig proto object. + mediapipe::CalculatorGraphConfig config; + config.ParseFromArray(data.bytes, data.length); + + // Create MediaPipe graph with mediapipe::CalculatorGraphConfig proto object. + MPPGraph* newGraph = [[MPPGraph alloc] initWithGraphConfig:config]; + [newGraph addFrameOutputStream:kOutputStream outputPacketType:MPPPacketTypePixelBuffer]; + [newGraph addFrameOutputStream:kLandmarksOutputStream outputPacketType:MPPPacketTypeRaw]; + [newGraph setSidePacket:(mediapipe::MakePacket(kNumFaces)) named:kNumFacesInputSidePacket]; + return newGraph; +} + +#pragma mark - UIViewController methods + +- (void)viewDidLoad { + [super viewDidLoad]; + + _renderer = [[MPPLayerRenderer alloc] init]; + _renderer.layer.frame = _liveView.layer.bounds; + [_liveView.layer addSublayer:_renderer.layer]; + _renderer.frameScaleMode = MPPFrameScaleModeFillAndCrop; + // When using the front camera, mirror the input for a more natural look. + _renderer.mirrored = YES; + + dispatch_queue_attr_t qosAttribute = dispatch_queue_attr_make_with_qos_class( + DISPATCH_QUEUE_SERIAL, QOS_CLASS_USER_INTERACTIVE, /*relative_priority=*/0); + _videoQueue = dispatch_queue_create(kVideoQueueLabel, qosAttribute); + + _cameraSource = [[MPPCameraInputSource alloc] init]; + [_cameraSource setDelegate:self queue:_videoQueue]; + _cameraSource.sessionPreset = AVCaptureSessionPresetHigh; + _cameraSource.cameraPosition = AVCaptureDevicePositionFront; + // The frame's native format is rotated with respect to the portrait orientation. + _cameraSource.orientation = AVCaptureVideoOrientationPortrait; + + self.mediapipeGraph = [[self class] loadGraphFromResource:kGraphName]; + self.mediapipeGraph.delegate = self; + // Set maxFramesInFlight to a small value to avoid memory contention for real-time processing. + self.mediapipeGraph.maxFramesInFlight = 2; +} + +// In this application, there is only one ViewController which has no navigation to other view +// controllers, and there is only one View with live display showing the result of running the +// MediaPipe graph on the live video feed. If more view controllers are needed later, the graph +// setup/teardown and camera start/stop logic should be updated appropriately in response to the +// appearance/disappearance of this ViewController, as viewWillAppear: can be invoked multiple times +// depending on the application navigation flow in that case. +- (void)viewWillAppear:(BOOL)animated { + [super viewWillAppear:animated]; + + [_cameraSource requestCameraAccessWithCompletionHandler:^void(BOOL granted) { + if (granted) { + [self startGraphAndCamera]; + dispatch_async(dispatch_get_main_queue(), ^{ + _noCameraLabel.hidden = YES; + }); + } + }]; +} + +- (void)startGraphAndCamera { + // Start running self.mediapipeGraph. + NSError* error; + if (![self.mediapipeGraph startWithError:&error]) { + NSLog(@"Failed to start graph: %@", error); + } + + // Start fetching frames from the camera. + dispatch_async(_videoQueue, ^{ + [_cameraSource start]; + }); +} + +#pragma mark - MPPGraphDelegate methods + +// Receives CVPixelBufferRef from the MediaPipe graph. Invoked on a MediaPipe worker thread. +- (void)mediapipeGraph:(MPPGraph*)graph + didOutputPixelBuffer:(CVPixelBufferRef)pixelBuffer + fromStream:(const std::string&)streamName { + if (streamName == kOutputStream) { + // Display the captured image on the screen. + CVPixelBufferRetain(pixelBuffer); + dispatch_async(dispatch_get_main_queue(), ^{ + [_renderer renderPixelBuffer:pixelBuffer]; + CVPixelBufferRelease(pixelBuffer); + }); + } +} + +// Receives a raw packet from the MediaPipe graph. Invoked on a MediaPipe worker thread. +- (void)mediapipeGraph:(MPPGraph*)graph + didOutputPacket:(const ::mediapipe::Packet&)packet + fromStream:(const std::string&)streamName { + if (streamName == kLandmarksOutputStream) { + if (packet.IsEmpty()) { + NSLog(@"[TS:%lld] No face landmarks", packet.Timestamp().Value()); + return; + } + const auto& multi_face_landmarks = packet.Get>(); + NSLog(@"[TS:%lld] Number of face instances with landmarks: %lu", packet.Timestamp().Value(), + multi_face_landmarks.size()); + for (int face_index = 0; face_index < multi_face_landmarks.size(); ++face_index) { + const auto& landmarks = multi_face_landmarks[face_index]; + NSLog(@"\tNumber of landmarks for face[%d]: %d", face_index, landmarks.landmark_size()); + for (int i = 0; i < landmarks.landmark_size(); ++i) { + NSLog(@"\t\tLandmark[%d]: (%f, %f, %f)", i, landmarks.landmark(i).x(), + landmarks.landmark(i).y(), landmarks.landmark(i).z()); + } + } + } +} + +#pragma mark - MPPInputSourceDelegate methods + +// Must be invoked on _videoQueue. +- (void)processVideoFrame:(CVPixelBufferRef)imageBuffer + timestamp:(CMTime)timestamp + fromSource:(MPPInputSource*)source { + if (source != _cameraSource) { + NSLog(@"Unknown source: %@", source); + return; + } + [self.mediapipeGraph sendPixelBuffer:imageBuffer + intoStream:kInputStream + packetType:MPPPacketTypePixelBuffer]; +} + +@end diff --git a/mediapipe/examples/ios/facemeshgpu/main.m b/mediapipe/examples/ios/facemeshgpu/main.m new file mode 100644 index 000000000..7ffe5ea5d --- /dev/null +++ b/mediapipe/examples/ios/facemeshgpu/main.m @@ -0,0 +1,22 @@ +// Copyright 2019 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#import +#import "AppDelegate.h" + +int main(int argc, char * argv[]) { + @autoreleasepool { + return UIApplicationMain(argc, argv, nil, NSStringFromClass([AppDelegate class])); + } +} diff --git a/mediapipe/framework/calculator_graph_bounds_test.cc b/mediapipe/framework/calculator_graph_bounds_test.cc index 4de8ffb7b..8f1d2faa0 100644 --- a/mediapipe/framework/calculator_graph_bounds_test.cc +++ b/mediapipe/framework/calculator_graph_bounds_test.cc @@ -28,6 +28,8 @@ namespace mediapipe { namespace { +constexpr int kIntTestValue = 33; + typedef std::function<::mediapipe::Status(CalculatorContext* cc)> CalculatorContextFunction; @@ -617,8 +619,9 @@ TEST(CalculatorGraphBoundsTest, ImmediateHandlerBounds) { MP_ASSERT_OK(graph.WaitUntilIdle()); // Add four packets into the graph. - for (int i = 0; i < 4; ++i) { - Packet p = MakePacket(33).At(Timestamp(i)); + constexpr int kNumInputs = 4; + for (int i = 0; i < kNumInputs; ++i) { + Packet p = MakePacket(kIntTestValue).At(Timestamp(i)); MP_ASSERT_OK(graph.AddPacketToInputStream("input", p)); } @@ -709,7 +712,7 @@ REGISTER_CALCULATOR(FuturePacketCalculator); // produces no output packets. TEST(CalculatorGraphBoundsTest, OffsetBoundPropagation) { // OffsetBoundCalculator produces only timestamp bounds. - // The PassthroughCalculator delivers an output packet whenever the + // The PassThroughCalculator delivers an output packet whenever the // OffsetBoundCalculator delivers a timestamp bound. CalculatorGraphConfig config = ::mediapipe::ParseTextProtoOrDie(R"( @@ -740,7 +743,7 @@ TEST(CalculatorGraphBoundsTest, OffsetBoundPropagation) { // Add four packets into the graph. constexpr int kNumInputs = 4; for (int i = 0; i < kNumInputs; ++i) { - Packet p = MakePacket(33).At(Timestamp(i)); + Packet p = MakePacket(kIntTestValue).At(Timestamp(i)); MP_ASSERT_OK(graph.AddPacketToInputStream("input", p)); } @@ -791,12 +794,15 @@ TEST(CalculatorGraphBoundsTest, BoundWithoutInputPackets) { // Add four packets into the graph. constexpr int kNumInputs = 4; for (int i = 0; i < kNumInputs; ++i) { - Packet p = MakePacket(33).At(Timestamp(i)); + Packet p = MakePacket(kIntTestValue).At(Timestamp(i)); MP_ASSERT_OK(graph.AddPacketToInputStream("input", p)); MP_ASSERT_OK(graph.WaitUntilIdle()); } - // No packets arrive, because updated timestamp bounds do not invoke + // No packets arrive, because FuturePacketCalculator produces 4 packets but + // OffsetBoundCalculator relays only the 4 timestamps without any packets, and + // BoundToPacketCalculator does not process timestamps using + // SetProcessTimestampBounds. Thus, the graph does not invoke // BoundToPacketCalculator::Process. MP_ASSERT_OK(graph.WaitUntilIdle()); EXPECT_EQ(output_packets.size(), 0); @@ -1138,6 +1144,8 @@ class ProcessBoundToPacketCalculator : public CalculatorBase { ::mediapipe::Status Process(CalculatorContext* cc) final { for (int i = 0; i < cc->Outputs().NumEntries(); ++i) { Timestamp t = cc->Inputs().Index(i).Value().Timestamp(); + // Create a new packet for each input stream with a new timestamp bound, + // as long as the new timestamp satisfies the output timestamp bound. if (t == cc->InputTimestamp() && t >= cc->Outputs().Index(i).NextTimestampBound()) { cc->Outputs().Index(i).Add(new auto(t), t); @@ -1168,6 +1176,8 @@ class ImmediatePassthroughCalculator : public CalculatorBase { if (!cc->Inputs().Index(i).IsEmpty()) { cc->Outputs().Index(i).AddPacket(cc->Inputs().Index(i).Value()); } else { + // Update the output stream "i" nextTimestampBound to the timestamp at + // which a packet may next be available in input stream "i". Timestamp input_bound = cc->Inputs().Index(i).Value().Timestamp().NextAllowedInStream(); if (cc->Outputs().Index(i).NextTimestampBound() < input_bound) { @@ -1219,33 +1229,22 @@ void TestProcessForEmptyInputs(const std::string& input_stream_handler) { MP_ASSERT_OK(graph.StartRun({})); MP_ASSERT_OK(graph.WaitUntilIdle()); - // Add four packets into the graph. + // Add four packets into the graph at ts {0, 10, 20, 30}. constexpr int kFutureMicros = FuturePacketCalculator::kOutputFutureMicros; - Packet p; - p = MakePacket(33).At(Timestamp(0)); - MP_ASSERT_OK(graph.AddPacketToInputStream("input", p)); - MP_ASSERT_OK(graph.WaitUntilIdle()); + constexpr int kNumInputs = 4; + std::vector expected; + for (int i = 0; i < kNumInputs; ++i) { + const int ts = i * 10; + Packet p = MakePacket(kIntTestValue).At(Timestamp(ts)); + MP_ASSERT_OK(graph.AddPacketToInputStream("input", p)); + MP_ASSERT_OK(graph.WaitUntilIdle()); - p = MakePacket(33).At(Timestamp(10)); - MP_ASSERT_OK(graph.AddPacketToInputStream("input", p)); - MP_ASSERT_OK(graph.WaitUntilIdle()); - - p = MakePacket(33).At(Timestamp(20)); - MP_ASSERT_OK(graph.AddPacketToInputStream("input", p)); - MP_ASSERT_OK(graph.WaitUntilIdle()); - - p = MakePacket(33).At(Timestamp(30)); - MP_ASSERT_OK(graph.AddPacketToInputStream("input", p)); - MP_ASSERT_OK(graph.WaitUntilIdle()); + expected.emplace_back(Timestamp(ts + kFutureMicros)); + } // Packets arrive. MP_ASSERT_OK(graph.WaitUntilIdle()); - EXPECT_EQ(bounds_ts_packets.size(), 4); - - std::vector expected = { - Timestamp(0 + kFutureMicros), Timestamp(10 + kFutureMicros), - Timestamp(20 + kFutureMicros), Timestamp(30 + kFutureMicros)}; - EXPECT_EQ(GetContents(bounds_ts_packets), expected); + EXPECT_EQ(bounds_ts_packets.size(), kNumInputs); // Shutdown the graph. MP_ASSERT_OK(graph.CloseAllPacketSources()); @@ -1335,34 +1334,41 @@ TEST(CalculatorGraphBoundsTest, ProcessTimestampBounds_Passthrough) { MP_ASSERT_OK(graph.WaitUntilIdle()); // Add four packets to input_0. - for (int i = 0; i < 4; ++i) { - Packet p = MakePacket(33).At(Timestamp(i * 10)); + constexpr int kNumInputs0 = 4; + std::vector expected_output_0; + for (int i = 0; i < kNumInputs0; ++i) { + const int ts = i * 10; + Packet p = MakePacket(kIntTestValue).At(Timestamp(ts)); MP_ASSERT_OK(graph.AddPacketToInputStream("input_0", p)); MP_ASSERT_OK(graph.WaitUntilIdle()); + + expected_output_0.emplace_back(Timestamp(ts)); } // Packets arrive. MP_ASSERT_OK(graph.WaitUntilIdle()); - EXPECT_EQ(output_0_packets.size(), 4); + EXPECT_EQ(output_0_packets.size(), kNumInputs0); + // No packets were pushed in "input_1". EXPECT_EQ(output_1_packets.size(), 0); - std::vector expected = // - {Timestamp(0), Timestamp(10), Timestamp(20), Timestamp(30)}; - EXPECT_EQ(GetContents(output_0_packets), expected); + EXPECT_EQ(GetContents(output_0_packets), expected_output_0); - // Add two timestamp bounds to bound_1. - for (int i = 0; i < 2; ++i) { - Packet p = MakePacket(33).At(Timestamp(10 + i * 10)); + // Add two timestamp bounds to "input_1" and update "bound_1" at {10, 20}. + constexpr int kNumInputs1 = 2; + std::vector expected_output_1; + for (int i = 0; i < kNumInputs1; ++i) { + const int ts = 10 + i * 10; + Packet p = MakePacket(kIntTestValue).At(Timestamp(ts)); MP_ASSERT_OK(graph.AddPacketToInputStream("input_1", p)); MP_ASSERT_OK(graph.WaitUntilIdle()); + + expected_output_1.emplace_back(Timestamp(ts)); } // Bounds arrive. MP_ASSERT_OK(graph.WaitUntilIdle()); - EXPECT_EQ(output_0_packets.size(), 4); - EXPECT_EQ(output_1_packets.size(), 2); - expected = // - {Timestamp(10), Timestamp(20)}; - EXPECT_EQ(GetContents(output_1_packets), expected); + EXPECT_EQ(output_0_packets.size(), kNumInputs0); + EXPECT_EQ(output_1_packets.size(), kNumInputs1); + EXPECT_EQ(GetContents(output_1_packets), expected_output_1); // Shutdown the graph. MP_ASSERT_OK(graph.CloseAllPacketSources()); diff --git a/mediapipe/framework/profiler/BUILD b/mediapipe/framework/profiler/BUILD index f3d0d0ddc..e266fb867 100644 --- a/mediapipe/framework/profiler/BUILD +++ b/mediapipe/framework/profiler/BUILD @@ -186,6 +186,7 @@ cc_library( "//mediapipe/framework:packet", "//mediapipe/framework:timestamp", "//mediapipe/framework/port:integral_types", + "@com_google_absl//absl/container:node_hash_map", "@com_google_absl//absl/time", ], ) diff --git a/mediapipe/framework/profiler/trace_builder.cc b/mediapipe/framework/profiler/trace_builder.cc index c33e14b8f..ff20f9c91 100644 --- a/mediapipe/framework/profiler/trace_builder.cc +++ b/mediapipe/framework/profiler/trace_builder.cc @@ -24,6 +24,7 @@ #include #include +#include "absl/container/node_hash_map.h" #include "mediapipe/framework/calculator_profile.pb.h" #include "mediapipe/framework/packet.h" #include "mediapipe/framework/port/integral_types.h" @@ -130,10 +131,10 @@ class AddressIdMap { return pointer_id_map_[id] = next_id++; } void clear() { pointer_id_map_.clear(); } - const std::unordered_map& map() { return pointer_id_map_; } + const absl::node_hash_map& map() { return pointer_id_map_; } private: - std::unordered_map pointer_id_map_; + absl::node_hash_map pointer_id_map_; int32 next_id = 0; }; diff --git a/mediapipe/framework/test_calculators.cc b/mediapipe/framework/test_calculators.cc index f3d1f0c79..8bcf59baf 100644 --- a/mediapipe/framework/test_calculators.cc +++ b/mediapipe/framework/test_calculators.cc @@ -568,7 +568,7 @@ class LambdaCalculator : public CalculatorBase { if (cc->InputSidePackets().HasTag("") > 0) { cc->InputSidePackets().Tag("").Set(); } - for (std::string tag : {"OPEN", "PROCESS", "CLOSE"}) { + for (const std::string& tag : {"OPEN", "PROCESS", "CLOSE"}) { if (cc->InputSidePackets().HasTag(tag)) { cc->InputSidePackets().Tag(tag).Set(); } diff --git a/mediapipe/framework/tool/subgraph_expansion.cc b/mediapipe/framework/tool/subgraph_expansion.cc index 9b9a50fb5..67487a582 100644 --- a/mediapipe/framework/tool/subgraph_expansion.cc +++ b/mediapipe/framework/tool/subgraph_expansion.cc @@ -150,7 +150,7 @@ static ::mediapipe::Status PrefixNames(std::string prefix, const proto_ns::RepeatedPtrField& dst_streams) { ASSIGN_OR_RETURN(auto src_map, tool::TagMap::Create(src_streams)); ASSIGN_OR_RETURN(auto dst_map, tool::TagMap::Create(dst_streams)); - for (auto it : dst_map->Mapping()) { + for (const auto& it : dst_map->Mapping()) { const std::string& tag = it.first; const TagMap::TagData* src_tag_data = ::mediapipe::FindOrNull(src_map->Mapping(), tag); diff --git a/mediapipe/graphs/face_mesh/BUILD b/mediapipe/graphs/face_mesh/BUILD new file mode 100644 index 000000000..961f2907b --- /dev/null +++ b/mediapipe/graphs/face_mesh/BUILD @@ -0,0 +1,69 @@ +# Copyright 2019 The MediaPipe Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +load( + "//mediapipe/framework/tool:mediapipe_graph.bzl", + "mediapipe_binary_graph", +) + +licenses(["notice"]) # Apache 2.0 + +package(default_visibility = ["//visibility:public"]) + +cc_library( + name = "desktop_calculators", + deps = [ + "//mediapipe/calculators/core:constant_side_packet_calculator", + "//mediapipe/calculators/video:opencv_video_decoder_calculator", + "//mediapipe/calculators/video:opencv_video_encoder_calculator", + "//mediapipe/graphs/face_mesh/subgraphs:face_renderer_cpu", + "//mediapipe/modules/face_landmark:face_landmark_front_cpu", + ], +) + +cc_library( + name = "desktop_live_calculators", + deps = [ + "//mediapipe/calculators/core:constant_side_packet_calculator", + "//mediapipe/calculators/core:flow_limiter_calculator", + "//mediapipe/graphs/face_mesh/subgraphs:face_renderer_cpu", + "//mediapipe/modules/face_landmark:face_landmark_front_cpu", + ], +) + +cc_library( + name = "desktop_live_gpu_calculators", + deps = [ + "//mediapipe/calculators/core:constant_side_packet_calculator", + "//mediapipe/calculators/core:flow_limiter_calculator", + "//mediapipe/graphs/face_mesh/subgraphs:face_renderer_gpu", + "//mediapipe/modules/face_landmark:face_landmark_front_gpu", + ], +) + +cc_library( + name = "mobile_calculators", + deps = [ + "//mediapipe/calculators/core:flow_limiter_calculator", + "//mediapipe/graphs/face_mesh/subgraphs:face_renderer_gpu", + "//mediapipe/modules/face_landmark:face_landmark_front_gpu", + ], +) + +mediapipe_binary_graph( + name = "face_mesh_mobile_gpu_binary_graph", + graph = "face_mesh_mobile.pbtxt", + output_name = "face_mesh_mobile_gpu.binarypb", + deps = [":mobile_calculators"], +) diff --git a/mediapipe/graphs/face_mesh/face_mesh_desktop.pbtxt b/mediapipe/graphs/face_mesh/face_mesh_desktop.pbtxt new file mode 100644 index 000000000..c3aa3945d --- /dev/null +++ b/mediapipe/graphs/face_mesh/face_mesh_desktop.pbtxt @@ -0,0 +1,67 @@ +# MediaPipe graph that performs face mesh on desktop with TensorFlow Lite +# on CPU. + +# Path to the input video file. (string) +input_side_packet: "input_video_path" +# Path to the output video file. (string) +input_side_packet: "output_video_path" + +# max_queue_size limits the number of packets enqueued on any input stream +# by throttling inputs to the graph. This makes the graph only process one +# frame per time. +max_queue_size: 1 + +# Decodes an input video file into images and a video header. +node { + calculator: "OpenCvVideoDecoderCalculator" + input_side_packet: "INPUT_FILE_PATH:input_video_path" + output_stream: "VIDEO:input_video" + output_stream: "VIDEO_PRESTREAM:input_video_header" +} + +# Defines side packets for further use in the graph. +node { + calculator: "ConstantSidePacketCalculator" + output_side_packet: "PACKET:num_faces" + node_options: { + [type.googleapis.com/mediapipe.ConstantSidePacketCalculatorOptions]: { + packet { int_value: 1 } + } + } +} + +# Subgraph that detects faces and corresponding landmarks. +node { + calculator: "FaceLandmarkFrontCpu" + input_stream: "IMAGE:input_video" + input_side_packet: "NUM_FACES:num_faces" + output_stream: "LANDMARKS:multi_face_landmarks" + output_stream: "ROIS_FROM_LANDMARKS:face_rects_from_landmarks" + output_stream: "DETECTIONS:face_detections" + output_stream: "ROIS_FROM_DETECTIONS:face_rects_from_detections" +} + +# Subgraph that renders face-landmark annotation onto the input video. +node { + calculator: "FaceRendererCpu" + input_stream: "IMAGE:input_video" + input_stream: "LANDMARKS:multi_face_landmarks" + input_stream: "NORM_RECTS:face_rects_from_landmarks" + input_stream: "DETECTIONS:face_detections" + output_stream: "IMAGE:output_video" +} + +# Encodes the annotated images into a video file, adopting properties specified +# in the input video header, e.g., video framerate. +node { + calculator: "OpenCvVideoEncoderCalculator" + input_stream: "VIDEO:output_video" + input_stream: "VIDEO_PRESTREAM:input_video_header" + input_side_packet: "OUTPUT_FILE_PATH:output_video_path" + node_options: { + [type.googleapis.com/mediapipe.OpenCvVideoEncoderCalculatorOptions]: { + codec: "avc1" + video_format: "mp4" + } + } +} diff --git a/mediapipe/graphs/face_mesh/face_mesh_desktop_live.pbtxt b/mediapipe/graphs/face_mesh/face_mesh_desktop_live.pbtxt new file mode 100644 index 000000000..57654436a --- /dev/null +++ b/mediapipe/graphs/face_mesh/face_mesh_desktop_live.pbtxt @@ -0,0 +1,63 @@ +# MediaPipe graph that performs face mesh with TensorFlow Lite on CPU. + +# Input image. (ImageFrame) +input_stream: "input_video" + +# Output image with rendered results. (ImageFrame) +output_stream: "output_video" +# Collection of detected/processed faces, each represented as a list of +# landmarks. (std::vector) +output_stream: "multi_face_landmarks" + +# Throttles the images flowing downstream for flow control. It passes through +# the very first incoming image unaltered, and waits for downstream nodes +# (calculators and subgraphs) in the graph to finish their tasks before it +# passes through another image. All images that come in while waiting are +# dropped, limiting the number of in-flight images in most part of the graph to +# 1. This prevents the downstream nodes from queuing up incoming images and data +# excessively, which leads to increased latency and memory usage, unwanted in +# real-time mobile applications. It also eliminates unnecessarily computation, +# e.g., the output produced by a node may get dropped downstream if the +# subsequent nodes are still busy processing previous inputs. +node { + calculator: "FlowLimiterCalculator" + input_stream: "input_video" + input_stream: "FINISHED:output_video" + input_stream_info: { + tag_index: "FINISHED" + back_edge: true + } + output_stream: "throttled_input_video" +} + +# Defines side packets for further use in the graph. +node { + calculator: "ConstantSidePacketCalculator" + output_side_packet: "PACKET:num_faces" + node_options: { + [type.googleapis.com/mediapipe.ConstantSidePacketCalculatorOptions]: { + packet { int_value: 1 } + } + } +} + +# Subgraph that detects faces and corresponding landmarks. +node { + calculator: "FaceLandmarkFrontCpu" + input_stream: "IMAGE:throttled_input_video" + input_side_packet: "NUM_FACES:num_faces" + output_stream: "LANDMARKS:multi_face_landmarks" + output_stream: "ROIS_FROM_LANDMARKS:face_rects_from_landmarks" + output_stream: "DETECTIONS:face_detections" + output_stream: "ROIS_FROM_DETECTIONS:face_rects_from_detections" +} + +# Subgraph that renders face-landmark annotation onto the input image. +node { + calculator: "FaceRendererCpu" + input_stream: "IMAGE:throttled_input_video" + input_stream: "LANDMARKS:multi_face_landmarks" + input_stream: "NORM_RECTS:face_rects_from_landmarks" + input_stream: "DETECTIONS:face_detections" + output_stream: "IMAGE:output_video" +} diff --git a/mediapipe/graphs/face_mesh/face_mesh_desktop_live_gpu.pbtxt b/mediapipe/graphs/face_mesh/face_mesh_desktop_live_gpu.pbtxt new file mode 100644 index 000000000..cfa75c2c7 --- /dev/null +++ b/mediapipe/graphs/face_mesh/face_mesh_desktop_live_gpu.pbtxt @@ -0,0 +1,63 @@ +# MediaPipe graph that performs face mesh with TensorFlow Lite on GPU. + +# Input image. (GpuBuffer) +input_stream: "input_video" + +# Output image with rendered results. (GpuBuffer) +output_stream: "output_video" +# Collection of detected/processed faces, each represented as a list of +# landmarks. (std::vector) +output_stream: "multi_face_landmarks" + +# Throttles the images flowing downstream for flow control. It passes through +# the very first incoming image unaltered, and waits for downstream nodes +# (calculators and subgraphs) in the graph to finish their tasks before it +# passes through another image. All images that come in while waiting are +# dropped, limiting the number of in-flight images in most part of the graph to +# 1. This prevents the downstream nodes from queuing up incoming images and data +# excessively, which leads to increased latency and memory usage, unwanted in +# real-time mobile applications. It also eliminates unnecessarily computation, +# e.g., the output produced by a node may get dropped downstream if the +# subsequent nodes are still busy processing previous inputs. +node { + calculator: "FlowLimiterCalculator" + input_stream: "input_video" + input_stream: "FINISHED:output_video" + input_stream_info: { + tag_index: "FINISHED" + back_edge: true + } + output_stream: "throttled_input_video" +} + +# Defines side packets for further use in the graph. +node { + calculator: "ConstantSidePacketCalculator" + output_side_packet: "PACKET:num_faces" + node_options: { + [type.googleapis.com/mediapipe.ConstantSidePacketCalculatorOptions]: { + packet { int_value: 1 } + } + } +} + +# Subgraph that detects faces and corresponding landmarks. +node { + calculator: "FaceLandmarkFrontGpu" + input_stream: "IMAGE:throttled_input_video" + input_side_packet: "NUM_FACES:num_faces" + output_stream: "LANDMARKS:multi_face_landmarks" + output_stream: "ROIS_FROM_LANDMARKS:face_rects_from_landmarks" + output_stream: "DETECTIONS:face_detections" + output_stream: "ROIS_FROM_DETECTIONS:face_rects_from_detections" +} + +# Subgraph that renders face-landmark annotation onto the input image. +node { + calculator: "FaceRendererGpu" + input_stream: "IMAGE:throttled_input_video" + input_stream: "LANDMARKS:multi_face_landmarks" + input_stream: "NORM_RECTS:face_rects_from_landmarks" + input_stream: "DETECTIONS:face_detections" + output_stream: "IMAGE:output_video" +} diff --git a/mediapipe/graphs/face_mesh/face_mesh_mobile.pbtxt b/mediapipe/graphs/face_mesh/face_mesh_mobile.pbtxt new file mode 100644 index 000000000..bf176765a --- /dev/null +++ b/mediapipe/graphs/face_mesh/face_mesh_mobile.pbtxt @@ -0,0 +1,55 @@ +# MediaPipe graph that performs face mesh with TensorFlow Lite on GPU. + +# GPU buffer. (GpuBuffer) +input_stream: "input_video" + +# Max number of faces to detect/process. (int) +input_side_packet: "num_faces" + +# Output image with rendered results. (GpuBuffer) +output_stream: "output_video" +# Collection of detected/processed faces, each represented as a list of +# landmarks. (std::vector) +output_stream: "multi_face_landmarks" + +# Throttles the images flowing downstream for flow control. It passes through +# the very first incoming image unaltered, and waits for downstream nodes +# (calculators and subgraphs) in the graph to finish their tasks before it +# passes through another image. All images that come in while waiting are +# dropped, limiting the number of in-flight images in most part of the graph to +# 1. This prevents the downstream nodes from queuing up incoming images and data +# excessively, which leads to increased latency and memory usage, unwanted in +# real-time mobile applications. It also eliminates unnecessarily computation, +# e.g., the output produced by a node may get dropped downstream if the +# subsequent nodes are still busy processing previous inputs. +node { + calculator: "FlowLimiterCalculator" + input_stream: "input_video" + input_stream: "FINISHED:output_video" + input_stream_info: { + tag_index: "FINISHED" + back_edge: true + } + output_stream: "throttled_input_video" +} + +# Subgraph that detects faces and corresponding landmarks. +node { + calculator: "FaceLandmarkFrontGpu" + input_stream: "IMAGE:throttled_input_video" + input_side_packet: "NUM_FACES:num_faces" + output_stream: "LANDMARKS:multi_face_landmarks" + output_stream: "ROIS_FROM_LANDMARKS:face_rects_from_landmarks" + output_stream: "DETECTIONS:face_detections" + output_stream: "ROIS_FROM_DETECTIONS:face_rects_from_detections" +} + +# Subgraph that renders face-landmark annotation onto the input image. +node { + calculator: "FaceRendererGpu" + input_stream: "IMAGE:throttled_input_video" + input_stream: "LANDMARKS:multi_face_landmarks" + input_stream: "NORM_RECTS:face_rects_from_landmarks" + input_stream: "DETECTIONS:face_detections" + output_stream: "IMAGE:output_video" +} diff --git a/mediapipe/graphs/face_mesh/subgraphs/BUILD b/mediapipe/graphs/face_mesh/subgraphs/BUILD new file mode 100644 index 000000000..7de55bd0b --- /dev/null +++ b/mediapipe/graphs/face_mesh/subgraphs/BUILD @@ -0,0 +1,51 @@ +# Copyright 2019 The MediaPipe Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +load( + "//mediapipe/framework/tool:mediapipe_graph.bzl", + "mediapipe_simple_subgraph", +) + +licenses(["notice"]) # Apache 2.0 + +package(default_visibility = ["//visibility:public"]) + +cc_library( + name = "renderer_calculators", + deps = [ + "//mediapipe/calculators/core:split_normalized_landmark_list_calculator", + "//mediapipe/calculators/util:annotation_overlay_calculator", + "//mediapipe/calculators/util:detections_to_render_data_calculator", + "//mediapipe/calculators/util:landmarks_to_render_data_calculator", + "//mediapipe/calculators/util:rect_to_render_data_calculator", + ], +) + +mediapipe_simple_subgraph( + name = "face_renderer_gpu", + graph = "face_renderer_gpu.pbtxt", + register_as = "FaceRendererGpu", + deps = [ + ":renderer_calculators", + ], +) + +mediapipe_simple_subgraph( + name = "face_renderer_cpu", + graph = "face_renderer_cpu.pbtxt", + register_as = "FaceRendererCpu", + deps = [ + ":renderer_calculators", + ], +) diff --git a/mediapipe/graphs/face_mesh/subgraphs/face_renderer_cpu.pbtxt b/mediapipe/graphs/face_mesh/subgraphs/face_renderer_cpu.pbtxt new file mode 100644 index 000000000..eee7496a1 --- /dev/null +++ b/mediapipe/graphs/face_mesh/subgraphs/face_renderer_cpu.pbtxt @@ -0,0 +1,350 @@ +# MediaPipe face mesh rendering subgraph. + +type: "FaceRendererCpu" + +# CPU image. (ImageFrame) +input_stream: "IMAGE:input_image" +# Collection of detected/predicted faces, each represented as a list of +# landmarks. (std::vector) +input_stream: "LANDMARKS:multi_face_landmarks" +# Regions of interest calculated based on palm detections. +# (std::vector) +input_stream: "NORM_RECTS:rects" +# Detected palms. (std::vector) +input_stream: "DETECTIONS:detections" + +# CPU image with rendered data. (ImageFrame) +output_stream: "IMAGE:output_image" + +node { + calculator: "ImagePropertiesCalculator" + input_stream: "IMAGE:input_image" + output_stream: "SIZE:image_size" +} + +# Converts detections to drawing primitives for annotation overlay. +node { + calculator: "DetectionsToRenderDataCalculator" + input_stream: "DETECTIONS:detections" + output_stream: "RENDER_DATA:detections_render_data" + node_options: { + [type.googleapis.com/mediapipe.DetectionsToRenderDataCalculatorOptions] { + thickness: 4.0 + color { r: 0 g: 255 b: 0 } + } + } +} + +# Outputs each element of multi_face_landmarks at a fake timestamp for the rest +# of the graph to process. At the end of the loop, outputs the BATCH_END +# timestamp for downstream calculators to inform them that all elements in the +# vector have been processed. +node { + calculator: "BeginLoopNormalizedLandmarkListVectorCalculator" + input_stream: "ITERABLE:multi_face_landmarks" + output_stream: "ITEM:face_landmarks" + output_stream: "BATCH_END:landmark_timestamp" +} + +# Converts landmarks to drawing primitives for annotation overlay. +node { + calculator: "LandmarksToRenderDataCalculator" + input_stream: "NORM_LANDMARKS:face_landmarks" + output_stream: "RENDER_DATA:landmark_render_data" + node_options: { + [type.googleapis.com/mediapipe.LandmarksToRenderDataCalculatorOptions] { + # Lips. + landmark_connections: 61 + landmark_connections: 146 + landmark_connections: 146 + landmark_connections: 91 + landmark_connections: 91 + landmark_connections: 181 + landmark_connections: 181 + landmark_connections: 84 + landmark_connections: 84 + landmark_connections: 17 + landmark_connections: 17 + landmark_connections: 314 + landmark_connections: 314 + landmark_connections: 405 + landmark_connections: 405 + landmark_connections: 321 + landmark_connections: 321 + landmark_connections: 375 + landmark_connections: 375 + landmark_connections: 291 + landmark_connections: 61 + landmark_connections: 185 + landmark_connections: 185 + landmark_connections: 40 + landmark_connections: 40 + landmark_connections: 39 + landmark_connections: 39 + landmark_connections: 37 + landmark_connections: 37 + landmark_connections: 0 + landmark_connections: 0 + landmark_connections: 267 + landmark_connections: 267 + landmark_connections: 269 + landmark_connections: 269 + landmark_connections: 270 + landmark_connections: 270 + landmark_connections: 409 + landmark_connections: 409 + landmark_connections: 291 + landmark_connections: 78 + landmark_connections: 95 + landmark_connections: 95 + landmark_connections: 88 + landmark_connections: 88 + landmark_connections: 178 + landmark_connections: 178 + landmark_connections: 87 + landmark_connections: 87 + landmark_connections: 14 + landmark_connections: 14 + landmark_connections: 317 + landmark_connections: 317 + landmark_connections: 402 + landmark_connections: 402 + landmark_connections: 318 + landmark_connections: 318 + landmark_connections: 324 + landmark_connections: 324 + landmark_connections: 308 + landmark_connections: 78 + landmark_connections: 191 + landmark_connections: 191 + landmark_connections: 80 + landmark_connections: 80 + landmark_connections: 81 + landmark_connections: 81 + landmark_connections: 82 + landmark_connections: 82 + landmark_connections: 13 + landmark_connections: 13 + landmark_connections: 312 + landmark_connections: 312 + landmark_connections: 311 + landmark_connections: 311 + landmark_connections: 310 + landmark_connections: 310 + landmark_connections: 415 + landmark_connections: 415 + landmark_connections: 308 + # Left eye. + landmark_connections: 33 + landmark_connections: 7 + landmark_connections: 7 + landmark_connections: 163 + landmark_connections: 163 + landmark_connections: 144 + landmark_connections: 144 + landmark_connections: 145 + landmark_connections: 145 + landmark_connections: 153 + landmark_connections: 153 + landmark_connections: 154 + landmark_connections: 154 + landmark_connections: 155 + landmark_connections: 155 + landmark_connections: 133 + landmark_connections: 33 + landmark_connections: 246 + landmark_connections: 246 + landmark_connections: 161 + landmark_connections: 161 + landmark_connections: 160 + landmark_connections: 160 + landmark_connections: 159 + landmark_connections: 159 + landmark_connections: 158 + landmark_connections: 158 + landmark_connections: 157 + landmark_connections: 157 + landmark_connections: 173 + landmark_connections: 173 + landmark_connections: 133 + # Left eyebrow. + landmark_connections: 46 + landmark_connections: 53 + landmark_connections: 53 + landmark_connections: 52 + landmark_connections: 52 + landmark_connections: 65 + landmark_connections: 65 + landmark_connections: 55 + landmark_connections: 70 + landmark_connections: 63 + landmark_connections: 63 + landmark_connections: 105 + landmark_connections: 105 + landmark_connections: 66 + landmark_connections: 66 + landmark_connections: 107 + # Right eye. + landmark_connections: 263 + landmark_connections: 249 + landmark_connections: 249 + landmark_connections: 390 + landmark_connections: 390 + landmark_connections: 373 + landmark_connections: 373 + landmark_connections: 374 + landmark_connections: 374 + landmark_connections: 380 + landmark_connections: 380 + landmark_connections: 381 + landmark_connections: 381 + landmark_connections: 382 + landmark_connections: 382 + landmark_connections: 362 + landmark_connections: 263 + landmark_connections: 466 + landmark_connections: 466 + landmark_connections: 388 + landmark_connections: 388 + landmark_connections: 387 + landmark_connections: 387 + landmark_connections: 386 + landmark_connections: 386 + landmark_connections: 385 + landmark_connections: 385 + landmark_connections: 384 + landmark_connections: 384 + landmark_connections: 398 + landmark_connections: 398 + landmark_connections: 362 + # Right eyebrow. + landmark_connections: 276 + landmark_connections: 283 + landmark_connections: 283 + landmark_connections: 282 + landmark_connections: 282 + landmark_connections: 295 + landmark_connections: 295 + landmark_connections: 285 + landmark_connections: 300 + landmark_connections: 293 + landmark_connections: 293 + landmark_connections: 334 + landmark_connections: 334 + landmark_connections: 296 + landmark_connections: 296 + landmark_connections: 336 + # Face oval. + landmark_connections: 10 + landmark_connections: 338 + landmark_connections: 338 + landmark_connections: 297 + landmark_connections: 297 + landmark_connections: 332 + landmark_connections: 332 + landmark_connections: 284 + landmark_connections: 284 + landmark_connections: 251 + landmark_connections: 251 + landmark_connections: 389 + landmark_connections: 389 + landmark_connections: 356 + landmark_connections: 356 + landmark_connections: 454 + landmark_connections: 454 + landmark_connections: 323 + landmark_connections: 323 + landmark_connections: 361 + landmark_connections: 361 + landmark_connections: 288 + landmark_connections: 288 + landmark_connections: 397 + landmark_connections: 397 + landmark_connections: 365 + landmark_connections: 365 + landmark_connections: 379 + landmark_connections: 379 + landmark_connections: 378 + landmark_connections: 378 + landmark_connections: 400 + landmark_connections: 400 + landmark_connections: 377 + landmark_connections: 377 + landmark_connections: 152 + landmark_connections: 152 + landmark_connections: 148 + landmark_connections: 148 + landmark_connections: 176 + landmark_connections: 176 + landmark_connections: 149 + landmark_connections: 149 + landmark_connections: 150 + landmark_connections: 150 + landmark_connections: 136 + landmark_connections: 136 + landmark_connections: 172 + landmark_connections: 172 + landmark_connections: 58 + landmark_connections: 58 + landmark_connections: 132 + landmark_connections: 132 + landmark_connections: 93 + landmark_connections: 93 + landmark_connections: 234 + landmark_connections: 234 + landmark_connections: 127 + landmark_connections: 127 + landmark_connections: 162 + landmark_connections: 162 + landmark_connections: 21 + landmark_connections: 21 + landmark_connections: 54 + landmark_connections: 54 + landmark_connections: 103 + landmark_connections: 103 + landmark_connections: 67 + landmark_connections: 67 + landmark_connections: 109 + landmark_connections: 109 + landmark_connections: 10 + landmark_color { r: 255 g: 0 b: 0 } + connection_color { r: 0 g: 255 b: 0 } + thickness: 1.5 + visualize_landmark_depth: false + } + } +} + +# Collects a RenderData object for each hand into a vector. Upon receiving the +# BATCH_END timestamp, outputs the vector of RenderData at the BATCH_END +# timestamp. +node { + calculator: "EndLoopRenderDataCalculator" + input_stream: "ITEM:landmark_render_data" + input_stream: "BATCH_END:landmark_timestamp" + output_stream: "ITERABLE:multi_face_landmarks_render_data" +} + +# Converts normalized rects to drawing primitives for annotation overlay. +node { + calculator: "RectToRenderDataCalculator" + input_stream: "NORM_RECTS:rects" + output_stream: "RENDER_DATA:rects_render_data" + node_options: { + [type.googleapis.com/mediapipe.RectToRenderDataCalculatorOptions] { + filled: false + color { r: 255 g: 0 b: 0 } + thickness: 4.0 + } + } +} + +# Draws annotations and overlays them on top of the input images. +node { + calculator: "AnnotationOverlayCalculator" + input_stream: "IMAGE:input_image" + input_stream: "detections_render_data" + input_stream: "VECTOR:0:multi_face_landmarks_render_data" + input_stream: "rects_render_data" + output_stream: "IMAGE:output_image" +} diff --git a/mediapipe/graphs/face_mesh/subgraphs/face_renderer_gpu.pbtxt b/mediapipe/graphs/face_mesh/subgraphs/face_renderer_gpu.pbtxt new file mode 100644 index 000000000..6de89656b --- /dev/null +++ b/mediapipe/graphs/face_mesh/subgraphs/face_renderer_gpu.pbtxt @@ -0,0 +1,350 @@ +# MediaPipe face mesh rendering subgraph. + +type: "FaceRendererGpu" + +# GPU image. (GpuBuffer) +input_stream: "IMAGE:input_image" +# Collection of detected/predicted faces, each represented as a list of +# landmarks. (std::vector) +input_stream: "LANDMARKS:multi_face_landmarks" +# Regions of interest calculated based on palm detections. +# (std::vector) +input_stream: "NORM_RECTS:rects" +# Detected palms. (std::vector) +input_stream: "DETECTIONS:detections" + +# GPU image with rendered data. (GpuBuffer) +output_stream: "IMAGE:output_image" + +node { + calculator: "ImagePropertiesCalculator" + input_stream: "IMAGE_GPU:input_image" + output_stream: "SIZE:image_size" +} + +# Converts detections to drawing primitives for annotation overlay. +node { + calculator: "DetectionsToRenderDataCalculator" + input_stream: "DETECTIONS:detections" + output_stream: "RENDER_DATA:detections_render_data" + node_options: { + [type.googleapis.com/mediapipe.DetectionsToRenderDataCalculatorOptions] { + thickness: 4.0 + color { r: 0 g: 255 b: 0 } + } + } +} + +# Outputs each element of multi_face_landmarks at a fake timestamp for the rest +# of the graph to process. At the end of the loop, outputs the BATCH_END +# timestamp for downstream calculators to inform them that all elements in the +# vector have been processed. +node { + calculator: "BeginLoopNormalizedLandmarkListVectorCalculator" + input_stream: "ITERABLE:multi_face_landmarks" + output_stream: "ITEM:face_landmarks" + output_stream: "BATCH_END:end_timestamp" +} + +# Converts landmarks to drawing primitives for annotation overlay. +node { + calculator: "LandmarksToRenderDataCalculator" + input_stream: "NORM_LANDMARKS:face_landmarks" + output_stream: "RENDER_DATA:landmarks_render_data" + node_options: { + [type.googleapis.com/mediapipe.LandmarksToRenderDataCalculatorOptions] { + # Lips. + landmark_connections: 61 + landmark_connections: 146 + landmark_connections: 146 + landmark_connections: 91 + landmark_connections: 91 + landmark_connections: 181 + landmark_connections: 181 + landmark_connections: 84 + landmark_connections: 84 + landmark_connections: 17 + landmark_connections: 17 + landmark_connections: 314 + landmark_connections: 314 + landmark_connections: 405 + landmark_connections: 405 + landmark_connections: 321 + landmark_connections: 321 + landmark_connections: 375 + landmark_connections: 375 + landmark_connections: 291 + landmark_connections: 61 + landmark_connections: 185 + landmark_connections: 185 + landmark_connections: 40 + landmark_connections: 40 + landmark_connections: 39 + landmark_connections: 39 + landmark_connections: 37 + landmark_connections: 37 + landmark_connections: 0 + landmark_connections: 0 + landmark_connections: 267 + landmark_connections: 267 + landmark_connections: 269 + landmark_connections: 269 + landmark_connections: 270 + landmark_connections: 270 + landmark_connections: 409 + landmark_connections: 409 + landmark_connections: 291 + landmark_connections: 78 + landmark_connections: 95 + landmark_connections: 95 + landmark_connections: 88 + landmark_connections: 88 + landmark_connections: 178 + landmark_connections: 178 + landmark_connections: 87 + landmark_connections: 87 + landmark_connections: 14 + landmark_connections: 14 + landmark_connections: 317 + landmark_connections: 317 + landmark_connections: 402 + landmark_connections: 402 + landmark_connections: 318 + landmark_connections: 318 + landmark_connections: 324 + landmark_connections: 324 + landmark_connections: 308 + landmark_connections: 78 + landmark_connections: 191 + landmark_connections: 191 + landmark_connections: 80 + landmark_connections: 80 + landmark_connections: 81 + landmark_connections: 81 + landmark_connections: 82 + landmark_connections: 82 + landmark_connections: 13 + landmark_connections: 13 + landmark_connections: 312 + landmark_connections: 312 + landmark_connections: 311 + landmark_connections: 311 + landmark_connections: 310 + landmark_connections: 310 + landmark_connections: 415 + landmark_connections: 415 + landmark_connections: 308 + # Left eye. + landmark_connections: 33 + landmark_connections: 7 + landmark_connections: 7 + landmark_connections: 163 + landmark_connections: 163 + landmark_connections: 144 + landmark_connections: 144 + landmark_connections: 145 + landmark_connections: 145 + landmark_connections: 153 + landmark_connections: 153 + landmark_connections: 154 + landmark_connections: 154 + landmark_connections: 155 + landmark_connections: 155 + landmark_connections: 133 + landmark_connections: 33 + landmark_connections: 246 + landmark_connections: 246 + landmark_connections: 161 + landmark_connections: 161 + landmark_connections: 160 + landmark_connections: 160 + landmark_connections: 159 + landmark_connections: 159 + landmark_connections: 158 + landmark_connections: 158 + landmark_connections: 157 + landmark_connections: 157 + landmark_connections: 173 + landmark_connections: 173 + landmark_connections: 133 + # Left eyebrow. + landmark_connections: 46 + landmark_connections: 53 + landmark_connections: 53 + landmark_connections: 52 + landmark_connections: 52 + landmark_connections: 65 + landmark_connections: 65 + landmark_connections: 55 + landmark_connections: 70 + landmark_connections: 63 + landmark_connections: 63 + landmark_connections: 105 + landmark_connections: 105 + landmark_connections: 66 + landmark_connections: 66 + landmark_connections: 107 + # Right eye. + landmark_connections: 263 + landmark_connections: 249 + landmark_connections: 249 + landmark_connections: 390 + landmark_connections: 390 + landmark_connections: 373 + landmark_connections: 373 + landmark_connections: 374 + landmark_connections: 374 + landmark_connections: 380 + landmark_connections: 380 + landmark_connections: 381 + landmark_connections: 381 + landmark_connections: 382 + landmark_connections: 382 + landmark_connections: 362 + landmark_connections: 263 + landmark_connections: 466 + landmark_connections: 466 + landmark_connections: 388 + landmark_connections: 388 + landmark_connections: 387 + landmark_connections: 387 + landmark_connections: 386 + landmark_connections: 386 + landmark_connections: 385 + landmark_connections: 385 + landmark_connections: 384 + landmark_connections: 384 + landmark_connections: 398 + landmark_connections: 398 + landmark_connections: 362 + # Right eyebrow. + landmark_connections: 276 + landmark_connections: 283 + landmark_connections: 283 + landmark_connections: 282 + landmark_connections: 282 + landmark_connections: 295 + landmark_connections: 295 + landmark_connections: 285 + landmark_connections: 300 + landmark_connections: 293 + landmark_connections: 293 + landmark_connections: 334 + landmark_connections: 334 + landmark_connections: 296 + landmark_connections: 296 + landmark_connections: 336 + # Face oval. + landmark_connections: 10 + landmark_connections: 338 + landmark_connections: 338 + landmark_connections: 297 + landmark_connections: 297 + landmark_connections: 332 + landmark_connections: 332 + landmark_connections: 284 + landmark_connections: 284 + landmark_connections: 251 + landmark_connections: 251 + landmark_connections: 389 + landmark_connections: 389 + landmark_connections: 356 + landmark_connections: 356 + landmark_connections: 454 + landmark_connections: 454 + landmark_connections: 323 + landmark_connections: 323 + landmark_connections: 361 + landmark_connections: 361 + landmark_connections: 288 + landmark_connections: 288 + landmark_connections: 397 + landmark_connections: 397 + landmark_connections: 365 + landmark_connections: 365 + landmark_connections: 379 + landmark_connections: 379 + landmark_connections: 378 + landmark_connections: 378 + landmark_connections: 400 + landmark_connections: 400 + landmark_connections: 377 + landmark_connections: 377 + landmark_connections: 152 + landmark_connections: 152 + landmark_connections: 148 + landmark_connections: 148 + landmark_connections: 176 + landmark_connections: 176 + landmark_connections: 149 + landmark_connections: 149 + landmark_connections: 150 + landmark_connections: 150 + landmark_connections: 136 + landmark_connections: 136 + landmark_connections: 172 + landmark_connections: 172 + landmark_connections: 58 + landmark_connections: 58 + landmark_connections: 132 + landmark_connections: 132 + landmark_connections: 93 + landmark_connections: 93 + landmark_connections: 234 + landmark_connections: 234 + landmark_connections: 127 + landmark_connections: 127 + landmark_connections: 162 + landmark_connections: 162 + landmark_connections: 21 + landmark_connections: 21 + landmark_connections: 54 + landmark_connections: 54 + landmark_connections: 103 + landmark_connections: 103 + landmark_connections: 67 + landmark_connections: 67 + landmark_connections: 109 + landmark_connections: 109 + landmark_connections: 10 + landmark_color { r: 255 g: 0 b: 0 } + connection_color { r: 0 g: 255 b: 0 } + thickness: 2 + visualize_landmark_depth: false + } + } +} + +# Collects a RenderData object for each hand into a vector. Upon receiving the +# BATCH_END timestamp, outputs the vector of RenderData at the BATCH_END +# timestamp. +node { + calculator: "EndLoopRenderDataCalculator" + input_stream: "ITEM:landmarks_render_data" + input_stream: "BATCH_END:end_timestamp" + output_stream: "ITERABLE:multi_face_landmarks_render_data" +} + +# Converts normalized rects to drawing primitives for annotation overlay. +node { + calculator: "RectToRenderDataCalculator" + input_stream: "NORM_RECTS:rects" + output_stream: "RENDER_DATA:rects_render_data" + node_options: { + [type.googleapis.com/mediapipe.RectToRenderDataCalculatorOptions] { + filled: false + color { r: 255 g: 0 b: 0 } + thickness: 4.0 + } + } +} + +# Draws annotations and overlays them on top of the input images. +node { + calculator: "AnnotationOverlayCalculator" + input_stream: "IMAGE_GPU:input_image" + input_stream: "detections_render_data" + input_stream: "VECTOR:0:multi_face_landmarks_render_data" + input_stream: "rects_render_data" + output_stream: "IMAGE_GPU:output_image" +} diff --git a/mediapipe/models/README.md b/mediapipe/models/README.md index dfa5e4e4d..6cbcce939 100644 --- a/mediapipe/models/README.md +++ b/mediapipe/models/README.md @@ -1,6 +1,6 @@ ## MediaPipe Models -Here are descriptions of the models used in the [example applications](../docs/examples.md). +Here are the descriptions of the models used in the [example applications](../docs/examples.md). ### Object Detection * [TFLite model](https://github.com/google/mediapipe/tree/master/mediapipe/models/ssdlite_object_detection.tflite) @@ -8,24 +8,29 @@ Here are descriptions of the models used in the [example applications](../docs/e ### Face Detection * [TFLite model](https://github.com/google/mediapipe/tree/master/mediapipe/models/face_detection_front.tflite) + * [Model page](https://sites.google.com/corp/view/perception-cv4arvr/blazeface) * Paper: ["BlazeFace: Sub-millisecond Neural Face Detection on Mobile GPUs"](https://arxiv.org/abs/1907.05047) * [Model card](https://sites.google.com/corp/view/perception-cv4arvr/blazeface#h.p_21ojPZDx3cqq) ### Face Mesh - * [TF.js model](https://tfhub.dev/mediapipe/facemesh/1) + * Face detection: [TFLite model](https://github.com/google/mediapipe/tree/master/mediapipe/models/face_detection_front.tflite) (see above) + * 3D face landmarks: [TFLite model](https://github.com/google/mediapipe/tree/master/mediapipe/models/face_landmark.tflite), [TF.js model](https://tfhub.dev/mediapipe/facemesh/1) + * [Model page](https://sites.google.com/corp/view/perception-cv4arvr/facemesh) * Paper: ["Real-time Facial Surface Geometry from Monocular Video on Mobile GPUs"](https://arxiv.org/abs/1907.06724) + * [Google AI Blog post](https://ai.googleblog.com/2019/03/real-time-ar-self-expression-with.html) * [TensorFlow Blog post](https://blog.tensorflow.org/2020/03/face-and-hand-tracking-in-browser-with-mediapipe-and-tensorflowjs.html) * [Model card](https://drive.google.com/file/d/1VFC_wIpw4O7xBOiTgUldl79d9LA-LsnA/view) ### Hand Detection and Tracking * Palm detection: [TFLite model](https://github.com/google/mediapipe/tree/master/mediapipe/models/palm_detection.tflite), [TF.js model](https://tfhub.dev/mediapipe/handdetector/1) - * 2D hand landmark: [TFLite model](https://github.com/google/mediapipe/tree/master/mediapipe/models/hand_landmark.tflite) - * 3D hand landmark: [TFLite model](https://github.com/google/mediapipe/tree/master/mediapipe/models/hand_landmark_3d.tflite), [TF.js model](https://tfhub.dev/mediapipe/handskeleton/1) + * 2D hand landmarks: [TFLite model](https://github.com/google/mediapipe/tree/master/mediapipe/models/hand_landmark.tflite) + * 3D hand landmarks: [TFLite model](https://github.com/google/mediapipe/tree/master/mediapipe/models/hand_landmark_3d.tflite), [TF.js model](https://tfhub.dev/mediapipe/handskeleton/1) * [Google AI Blog post](https://mediapipe.page.link/handgoogleaiblog) * [TensorFlow Blog post](https://blog.tensorflow.org/2020/03/face-and-hand-tracking-in-browser-with-mediapipe-and-tensorflowjs.html) * [Model card](https://mediapipe.page.link/handmc) ### Hair Segmentation * [TFLite model](https://github.com/google/mediapipe/tree/master/mediapipe/models/hair_segmentation.tflite) + * [Model page](https://sites.google.com/corp/view/perception-cv4arvr/hair-segmentation) * Paper: ["Real-time Hair segmentation and recoloring on Mobile GPUs"](https://arxiv.org/abs/1907.06740) - * [Model card](https://sites.google.com/corp/view/perception-cv4arvr/hair-segmentation#h.p_NimuO7PgHxlY) + * [Model card](https://drive.google.com/file/d/1lPwJ8BD_-3UUor4LayQ0xpa_RIC_hoRh/view) diff --git a/mediapipe/models/face_landmark.tflite b/mediapipe/models/face_landmark.tflite new file mode 100644 index 000000000..9058eaa33 Binary files /dev/null and b/mediapipe/models/face_landmark.tflite differ diff --git a/mediapipe/modules/README.md b/mediapipe/modules/README.md new file mode 100644 index 000000000..7fdbaa297 --- /dev/null +++ b/mediapipe/modules/README.md @@ -0,0 +1,11 @@ +# Modules + +Each module (represented as a subfolder) provides subgraphs and corresponding resources (e.g. tflite models) to perform domain-specific tasks (e.g. detect faces, detect face landmarks). + +*Modules listed below are already used in some of `mediapipe/graphs` and more graphs are being migrated to use existing and upcoming modules.* + +| Module | Description | +| :--- | :--- | +| [`face_detection`](face_detection/README.md) | Subgraphs to detect faces. | +| [`face_landmark`](face_landmark/README.md) | Subgraphs to detect and track face landmarks. | + diff --git a/mediapipe/modules/face_detection/BUILD b/mediapipe/modules/face_detection/BUILD new file mode 100644 index 000000000..2c90c3593 --- /dev/null +++ b/mediapipe/modules/face_detection/BUILD @@ -0,0 +1,58 @@ +# Copyright 2019 The MediaPipe Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +load( + "//mediapipe/framework/tool:mediapipe_graph.bzl", + "mediapipe_simple_subgraph", +) + +licenses(["notice"]) # Apache 2.0 + +package(default_visibility = ["//visibility:public"]) + +mediapipe_simple_subgraph( + name = "face_detection_front_cpu", + graph = "face_detection_front_cpu.pbtxt", + register_as = "FaceDetectionFrontCpu", + deps = [ + "//mediapipe/calculators/image:image_transformation_calculator", + "//mediapipe/calculators/tflite:ssd_anchors_calculator", + "//mediapipe/calculators/tflite:tflite_converter_calculator", + "//mediapipe/calculators/tflite:tflite_inference_calculator", + "//mediapipe/calculators/tflite:tflite_tensors_to_detections_calculator", + "//mediapipe/calculators/util:detection_letterbox_removal_calculator", + "//mediapipe/calculators/util:non_max_suppression_calculator", + ], +) + +mediapipe_simple_subgraph( + name = "face_detection_front_gpu", + graph = "face_detection_front_gpu.pbtxt", + register_as = "FaceDetectionFrontGpu", + deps = [ + "//mediapipe/calculators/image:image_transformation_calculator", + "//mediapipe/calculators/tflite:ssd_anchors_calculator", + "//mediapipe/calculators/tflite:tflite_converter_calculator", + "//mediapipe/calculators/tflite:tflite_inference_calculator", + "//mediapipe/calculators/tflite:tflite_tensors_to_detections_calculator", + "//mediapipe/calculators/util:detection_letterbox_removal_calculator", + "//mediapipe/calculators/util:non_max_suppression_calculator", + ], +) + +exports_files( + srcs = [ + "face_detection_front.tflite", + ], +) diff --git a/mediapipe/modules/face_detection/README.md b/mediapipe/modules/face_detection/README.md new file mode 100644 index 000000000..f40625268 --- /dev/null +++ b/mediapipe/modules/face_detection/README.md @@ -0,0 +1,7 @@ +# face_detection + +Subgraphs|Details +:--- | :--- +[`FaceDetectionFrontCpu`](https://github.com/google/mediapipe/tree/master/mediapipe/modules/face_detection/face_detection_front_cpu.pbtxt)| Detects faces. Works best for images from front-facing cameras (i.e. selfie images). (CPU input, and inference is executed on CPU.) +[`FaceDetectionFrontGpu`](https://github.com/google/mediapipe/tree/master/mediapipe/modules/face_detection/face_detection_front_gpu.pbtxt)| Detects faces. Works best for images from front-facing cameras (i.e. selfie images). (GPU input, and inference is executed on GPU.) + diff --git a/mediapipe/modules/face_detection/face_detection_front.tflite b/mediapipe/modules/face_detection/face_detection_front.tflite new file mode 100755 index 000000000..419e1a8a1 Binary files /dev/null and b/mediapipe/modules/face_detection/face_detection_front.tflite differ diff --git a/mediapipe/modules/face_detection/face_detection_front_cpu.pbtxt b/mediapipe/modules/face_detection/face_detection_front_cpu.pbtxt new file mode 100644 index 000000000..7a52a3c6b --- /dev/null +++ b/mediapipe/modules/face_detection/face_detection_front_cpu.pbtxt @@ -0,0 +1,143 @@ +# MediaPipe graph to detect faces. (CPU input, and inference is executed on +# CPU.) +# +# It is required that "face_detection_front.tflite" is available at +# "mediapipe/modules/face_detection/face_detection_front.tflite" +# path during execution. +# +# EXAMPLE: +# node { +# calculator: "FaceDetectionFrontCpu" +# input_stream: "IMAGE:image" +# output_stream: "DETECTIONS:face_detections" +# } + +type: "FaceDetectionFrontCpu" + +# CPU image. (ImageFrame) +input_stream: "IMAGE:image" + +# Detected faces. (std::vector) +# NOTE: there will not be an output packet in the DETECTIONS stream for this +# particular timestamp if none of faces detected. However, the MediaPipe +# framework will internally inform the downstream calculators of the absence of +# this packet so that they don't wait for it unnecessarily. +output_stream: "DETECTIONS:detections" + +# Transforms the input image on CPU to a 128x128 image. To scale the input +# image, the scale_mode option is set to FIT to preserve the aspect ratio +# (what is expected by the corresponding face detection model), resulting in +# potential letterboxing in the transformed image. +node: { + calculator: "ImageTransformationCalculator" + input_stream: "IMAGE:image" + output_stream: "IMAGE:transformed_image" + output_stream: "LETTERBOX_PADDING:letterbox_padding" + options: { + [mediapipe.ImageTransformationCalculatorOptions.ext] { + output_width: 128 + output_height: 128 + scale_mode: FIT + } + } +} + +# Converts the transformed input image on CPU into an image tensor stored as a +# TfLiteTensor. +node { + calculator: "TfLiteConverterCalculator" + input_stream: "IMAGE:transformed_image" + output_stream: "TENSORS:input_tensors" +} + +# Runs a TensorFlow Lite model on CPU that takes an image tensor and outputs a +# vector of tensors representing, for instance, detection boxes/keypoints and +# scores. +node { + calculator: "TfLiteInferenceCalculator" + input_stream: "TENSORS:input_tensors" + output_stream: "TENSORS:detection_tensors" + options: { + [mediapipe.TfLiteInferenceCalculatorOptions.ext] { + model_path: "mediapipe/modules/face_detection/face_detection_front.tflite" + } + } +} + +# Generates a single side packet containing a vector of SSD anchors based on +# the specification in the options. +node { + calculator: "SsdAnchorsCalculator" + output_side_packet: "anchors" + options: { + [mediapipe.SsdAnchorsCalculatorOptions.ext] { + num_layers: 4 + min_scale: 0.1484375 + max_scale: 0.75 + input_size_height: 128 + input_size_width: 128 + anchor_offset_x: 0.5 + anchor_offset_y: 0.5 + strides: 8 + strides: 16 + strides: 16 + strides: 16 + aspect_ratios: 1.0 + fixed_anchor_size: true + } + } +} + +# Decodes the detection tensors generated by the TensorFlow Lite model, based on +# the SSD anchors and the specification in the options, into a vector of +# detections. Each detection describes a detected object. +node { + calculator: "TfLiteTensorsToDetectionsCalculator" + input_stream: "TENSORS:detection_tensors" + input_side_packet: "ANCHORS:anchors" + output_stream: "DETECTIONS:unfiltered_detections" + options: { + [mediapipe.TfLiteTensorsToDetectionsCalculatorOptions.ext] { + num_classes: 1 + num_boxes: 896 + num_coords: 16 + box_coord_offset: 0 + keypoint_coord_offset: 4 + num_keypoints: 6 + num_values_per_keypoint: 2 + sigmoid_score: true + score_clipping_thresh: 100.0 + reverse_output_order: true + x_scale: 128.0 + y_scale: 128.0 + h_scale: 128.0 + w_scale: 128.0 + min_score_thresh: 0.75 + } + } +} + +# Performs non-max suppression to remove excessive detections. +node { + calculator: "NonMaxSuppressionCalculator" + input_stream: "unfiltered_detections" + output_stream: "filtered_detections" + options: { + [mediapipe.NonMaxSuppressionCalculatorOptions.ext] { + min_suppression_threshold: 0.3 + overlap_type: INTERSECTION_OVER_UNION + algorithm: WEIGHTED + } + } +} + +# Adjusts detection locations (already normalized to [0.f, 1.f]) on the +# letterboxed image (after image transformation with the FIT scale mode) to the +# corresponding locations on the same image with the letterbox removed (the +# input image to the graph before image transformation). +node { + calculator: "DetectionLetterboxRemovalCalculator" + input_stream: "DETECTIONS:filtered_detections" + input_stream: "LETTERBOX_PADDING:letterbox_padding" + output_stream: "DETECTIONS:detections" +} diff --git a/mediapipe/modules/face_detection/face_detection_front_gpu.pbtxt b/mediapipe/modules/face_detection/face_detection_front_gpu.pbtxt new file mode 100644 index 000000000..4807cb6d2 --- /dev/null +++ b/mediapipe/modules/face_detection/face_detection_front_gpu.pbtxt @@ -0,0 +1,143 @@ +# MediaPipe graph to detect faces. (GPU input, and inference is executed on +# GPU.) +# +# It is required that "face_detection_front.tflite" is available at +# "mediapipe/modules/face_detection/face_detection_front.tflite" +# path during execution. +# +# EXAMPLE: +# node { +# calculator: "FaceDetectionFrontGpu" +# input_stream: "IMAGE:image" +# output_stream: "DETECTIONS:face_detections" +# } + +type: "FaceDetectionFrontGpu" + +# GPU image. (GpuBuffer) +input_stream: "IMAGE:image" + +# Detected faces. (std::vector) +# NOTE: there will not be an output packet in the DETECTIONS stream for this +# particular timestamp if none of faces detected. However, the MediaPipe +# framework will internally inform the downstream calculators of the absence of +# this packet so that they don't wait for it unnecessarily. +output_stream: "DETECTIONS:detections" + +# Transforms the input image on GPU to a 128x128 image. To scale the input +# image, the scale_mode option is set to FIT to preserve the aspect ratio +# (what is expected by the corresponding face detection model), resulting in +# potential letterboxing in the transformed image. +node: { + calculator: "ImageTransformationCalculator" + input_stream: "IMAGE_GPU:image" + output_stream: "IMAGE_GPU:transformed_image" + output_stream: "LETTERBOX_PADDING:letterbox_padding" + options: { + [mediapipe.ImageTransformationCalculatorOptions.ext] { + output_width: 128 + output_height: 128 + scale_mode: FIT + } + } +} + +# Converts the transformed input image on GPU into an image tensor stored as a +# TfLiteTensor. +node { + calculator: "TfLiteConverterCalculator" + input_stream: "IMAGE_GPU:transformed_image" + output_stream: "TENSORS_GPU:input_tensors" +} + +# Runs a TensorFlow Lite model on GPU that takes an image tensor and outputs a +# vector of tensors representing, for instance, detection boxes/keypoints and +# scores. +node { + calculator: "TfLiteInferenceCalculator" + input_stream: "TENSORS_GPU:input_tensors" + output_stream: "TENSORS_GPU:detection_tensors" + options: { + [mediapipe.TfLiteInferenceCalculatorOptions.ext] { + model_path: "mediapipe/modules/face_detection/face_detection_front.tflite" + } + } +} + +# Generates a single side packet containing a vector of SSD anchors based on +# the specification in the options. +node { + calculator: "SsdAnchorsCalculator" + output_side_packet: "anchors" + options: { + [mediapipe.SsdAnchorsCalculatorOptions.ext] { + num_layers: 4 + min_scale: 0.1484375 + max_scale: 0.75 + input_size_height: 128 + input_size_width: 128 + anchor_offset_x: 0.5 + anchor_offset_y: 0.5 + strides: 8 + strides: 16 + strides: 16 + strides: 16 + aspect_ratios: 1.0 + fixed_anchor_size: true + } + } +} + +# Decodes the detection tensors generated by the TensorFlow Lite model, based on +# the SSD anchors and the specification in the options, into a vector of +# detections. Each detection describes a detected object. +node { + calculator: "TfLiteTensorsToDetectionsCalculator" + input_stream: "TENSORS_GPU:detection_tensors" + input_side_packet: "ANCHORS:anchors" + output_stream: "DETECTIONS:unfiltered_detections" + options: { + [mediapipe.TfLiteTensorsToDetectionsCalculatorOptions.ext] { + num_classes: 1 + num_boxes: 896 + num_coords: 16 + box_coord_offset: 0 + keypoint_coord_offset: 4 + num_keypoints: 6 + num_values_per_keypoint: 2 + sigmoid_score: true + score_clipping_thresh: 100.0 + reverse_output_order: true + x_scale: 128.0 + y_scale: 128.0 + h_scale: 128.0 + w_scale: 128.0 + min_score_thresh: 0.75 + } + } +} + +# Performs non-max suppression to remove excessive detections. +node { + calculator: "NonMaxSuppressionCalculator" + input_stream: "unfiltered_detections" + output_stream: "filtered_detections" + options: { + [mediapipe.NonMaxSuppressionCalculatorOptions.ext] { + min_suppression_threshold: 0.3 + overlap_type: INTERSECTION_OVER_UNION + algorithm: WEIGHTED + } + } +} + +# Adjusts detection locations (already normalized to [0.f, 1.f]) on the +# letterboxed image (after image transformation with the FIT scale mode) to the +# corresponding locations on the same image with the letterbox removed (the +# input image to the graph before image transformation). +node { + calculator: "DetectionLetterboxRemovalCalculator" + input_stream: "DETECTIONS:filtered_detections" + input_stream: "LETTERBOX_PADDING:letterbox_padding" + output_stream: "DETECTIONS:detections" +} diff --git a/mediapipe/modules/face_landmark/BUILD b/mediapipe/modules/face_landmark/BUILD new file mode 100644 index 000000000..3dd41ecb4 --- /dev/null +++ b/mediapipe/modules/face_landmark/BUILD @@ -0,0 +1,127 @@ +# Copyright 2019 The MediaPipe Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +load( + "//mediapipe/framework/tool:mediapipe_graph.bzl", + "mediapipe_simple_subgraph", +) + +licenses(["notice"]) # Apache 2.0 + +package(default_visibility = ["//visibility:public"]) + +mediapipe_simple_subgraph( + name = "face_landmark_cpu", + graph = "face_landmark_cpu.pbtxt", + register_as = "FaceLandmarkCpu", + deps = [ + "//mediapipe/calculators/core:gate_calculator", + "//mediapipe/calculators/core:split_vector_calculator", + "//mediapipe/calculators/image:image_cropping_calculator", + "//mediapipe/calculators/image:image_transformation_calculator", + "//mediapipe/calculators/tflite:tflite_converter_calculator", + "//mediapipe/calculators/tflite:tflite_inference_calculator", + "//mediapipe/calculators/tflite:tflite_tensors_to_floats_calculator", + "//mediapipe/calculators/tflite:tflite_tensors_to_landmarks_calculator", + "//mediapipe/calculators/util:landmark_projection_calculator", + "//mediapipe/calculators/util:thresholding_calculator", + ], +) + +mediapipe_simple_subgraph( + name = "face_landmark_gpu", + graph = "face_landmark_gpu.pbtxt", + register_as = "FaceLandmarkGpu", + deps = [ + "//mediapipe/calculators/core:gate_calculator", + "//mediapipe/calculators/core:split_vector_calculator", + "//mediapipe/calculators/image:image_cropping_calculator", + "//mediapipe/calculators/image:image_transformation_calculator", + "//mediapipe/calculators/tflite:tflite_converter_calculator", + "//mediapipe/calculators/tflite:tflite_inference_calculator", + "//mediapipe/calculators/tflite:tflite_tensors_to_floats_calculator", + "//mediapipe/calculators/tflite:tflite_tensors_to_landmarks_calculator", + "//mediapipe/calculators/util:landmark_projection_calculator", + "//mediapipe/calculators/util:thresholding_calculator", + ], +) + +mediapipe_simple_subgraph( + name = "face_landmark_front_cpu", + graph = "face_landmark_front_cpu.pbtxt", + register_as = "FaceLandmarkFrontCpu", + deps = [ + ":face_detection_front_detection_to_roi", + ":face_landmark_cpu", + ":face_landmark_landmarks_to_roi", + "//mediapipe/calculators/core:begin_loop_calculator", + "//mediapipe/calculators/core:clip_vector_size_calculator", + "//mediapipe/calculators/core:end_loop_calculator", + "//mediapipe/calculators/core:gate_calculator", + "//mediapipe/calculators/core:merge_calculator", + "//mediapipe/calculators/core:previous_loopback_calculator", + "//mediapipe/calculators/image:image_properties_calculator", + "//mediapipe/calculators/util:association_norm_rect_calculator", + "//mediapipe/calculators/util:collection_has_min_size_calculator", + "//mediapipe/modules/face_detection:face_detection_front_cpu", + ], +) + +mediapipe_simple_subgraph( + name = "face_landmark_front_gpu", + graph = "face_landmark_front_gpu.pbtxt", + register_as = "FaceLandmarkFrontGpu", + deps = [ + ":face_detection_front_detection_to_roi", + ":face_landmark_gpu", + ":face_landmark_landmarks_to_roi", + "//mediapipe/calculators/core:begin_loop_calculator", + "//mediapipe/calculators/core:clip_vector_size_calculator", + "//mediapipe/calculators/core:end_loop_calculator", + "//mediapipe/calculators/core:gate_calculator", + "//mediapipe/calculators/core:merge_calculator", + "//mediapipe/calculators/core:previous_loopback_calculator", + "//mediapipe/calculators/image:image_properties_calculator", + "//mediapipe/calculators/util:association_norm_rect_calculator", + "//mediapipe/calculators/util:collection_has_min_size_calculator", + "//mediapipe/modules/face_detection:face_detection_front_gpu", + ], +) + +exports_files( + srcs = [ + "face_landmark.tflite", + ], +) + +mediapipe_simple_subgraph( + name = "face_detection_front_detection_to_roi", + graph = "face_detection_front_detection_to_roi.pbtxt", + register_as = "FaceDetectionFrontDetectionToRoi", + deps = [ + "//mediapipe/calculators/util:detections_to_rects_calculator", + "//mediapipe/calculators/util:rect_transformation_calculator", + ], +) + +mediapipe_simple_subgraph( + name = "face_landmark_landmarks_to_roi", + graph = "face_landmark_landmarks_to_roi.pbtxt", + register_as = "FaceLandmarkLandmarksToRoi", + deps = [ + "//mediapipe/calculators/util:detections_to_rects_calculator", + "//mediapipe/calculators/util:landmarks_to_detection_calculator", + "//mediapipe/calculators/util:rect_transformation_calculator", + ], +) diff --git a/mediapipe/modules/face_landmark/README.md b/mediapipe/modules/face_landmark/README.md new file mode 100644 index 000000000..eed21a2d7 --- /dev/null +++ b/mediapipe/modules/face_landmark/README.md @@ -0,0 +1,9 @@ +# face_landmark + +Subgraphs|Details +:--- | :--- +[`FaceLandmarkCpu`](https://github.com/google/mediapipe/tree/master/mediapipe/modules/face_landmark/face_landmark_cpu.pbtxt)| Detects landmarks on a single face. (CPU input, and inference is executed on CPU.) +[`FaceLandmarkGpu`](https://github.com/google/mediapipe/tree/master/mediapipe/modules/face_landmark/face_landmark_gpu.pbtxt)| Detects landmarks on a single face. (GPU input, and inference is executed on GPU) +[`FaceLandmarkFrontCpu`](https://github.com/google/mediapipe/tree/master/mediapipe/modules/face_landmark/face_landmark_front_cpu.pbtxt)| Detects and tracks landmarks on multiple faces. (CPU input, and inference is executed on CPU) +[`FaceLandmarkFrontGpu`](https://github.com/google/mediapipe/tree/master/mediapipe/modules/face_landmark/face_landmark_front_gpu.pbtxt)| Detects and tracks landmarks on multiple faces. (GPU input, and inference is executed on GPU.) + diff --git a/mediapipe/modules/face_landmark/face_detection_front_detection_to_roi.pbtxt b/mediapipe/modules/face_landmark/face_detection_front_detection_to_roi.pbtxt new file mode 100644 index 000000000..c64c10558 --- /dev/null +++ b/mediapipe/modules/face_landmark/face_detection_front_detection_to_roi.pbtxt @@ -0,0 +1,48 @@ +# MediaPipe graph to calculate face region of interest (ROI) from the very +# first face detection in the vector of detections provided by +# "FaceDetectionFrontCpu" or "FaceDetectionFrontGpu" +# +# NOTE: this graph is subject to change and should not be used directly. + +type: "FaceDetectionFrontDetectionToRoi" + +# Face detection. (Detection) +input_stream: "DETECTION:detection" +# Frame size (width and height). (std::pair) +input_stream: "IMAGE_SIZE:image_size" +# ROI according to the first detection of input detections. (NormalizedRect) +output_stream: "ROI:roi" + +# Converts results of face detection into a rectangle (normalized by image size) +# that encloses the face and is rotated such that the line connecting left eye +# and right eye is aligned with the X-axis of the rectangle. +node { + calculator: "DetectionsToRectsCalculator" + input_stream: "DETECTION:detection" + input_stream: "IMAGE_SIZE:image_size" + output_stream: "NORM_RECT:initial_roi" + options: { + [mediapipe.DetectionsToRectsCalculatorOptions.ext] { + rotation_vector_start_keypoint_index: 0 # Left eye. + rotation_vector_end_keypoint_index: 1 # Right eye. + rotation_vector_target_angle_degrees: 0 + output_zero_rect_for_empty_detections: true + } + } +} + +# Expands and shifts the rectangle that contains the face so that it's likely +# to cover the entire face. +node { + calculator: "RectTransformationCalculator" + input_stream: "NORM_RECT:initial_roi" + input_stream: "IMAGE_SIZE:image_size" + output_stream: "roi" + options: { + [mediapipe.RectTransformationCalculatorOptions.ext] { + scale_x: 1.5 + scale_y: 1.5 + square_long: true + } + } +} diff --git a/mediapipe/modules/face_landmark/face_landmark.tflite b/mediapipe/modules/face_landmark/face_landmark.tflite new file mode 100644 index 000000000..9058eaa33 Binary files /dev/null and b/mediapipe/modules/face_landmark/face_landmark.tflite differ diff --git a/mediapipe/modules/face_landmark/face_landmark_cpu.pbtxt b/mediapipe/modules/face_landmark/face_landmark_cpu.pbtxt new file mode 100644 index 000000000..8d0311993 --- /dev/null +++ b/mediapipe/modules/face_landmark/face_landmark_cpu.pbtxt @@ -0,0 +1,146 @@ +# MediaPipe graph to detect/predict face landmarks. (CPU input, and inference is +# executed on CPU.) +# +# It is required that "face_landmark.tflite" is available at +# "mediapipe/modules/face_landmark/face_landmark.tflite" +# path during execution. +# +# EXAMPLE: +# node { +# calculator: "FaceLandmarkCpu" +# input_stream: "IMAGE:image" +# input_stream: "ROI:face_roi" +# output_stream: "LANDMARKS:face_landmarks" +# } + +type: "FaceLandmarkCpu" + +# CPU image. (ImageFrame) +input_stream: "IMAGE:image" +# ROI (region of interest) within the given image where a face is located. +# (NormalizedRect) +input_stream: "ROI:roi" + +# 468 face landmarks within the given ROI. (NormalizedLandmarkList) +# NOTE: if a face is not present within the given ROI, for this particular +# timestamp there will not be an output packet in the LANDMARKS stream. However, +# the MediaPipe framework will internally inform the downstream calculators of +# the absence of this packet so that they don't wait for it unnecessarily. +output_stream: "LANDMARKS:face_landmarks" + +# Crops the input image to the region of interest. +node { + calculator: "ImageCroppingCalculator" + input_stream: "IMAGE:image" + input_stream: "NORM_RECT:roi" + output_stream: "IMAGE:face_region" + options: { + [mediapipe.ImageCroppingCalculatorOptions.ext] { + border_mode: BORDER_REPLICATE + } + } +} + +# Transforms the input image on CPU to a 192x192 image. To scale the input +# image, the scale_mode option is set to FIT to preserve the aspect ratio, +# resulting in potential letterboxing in the transformed image. +node: { + calculator: "ImageTransformationCalculator" + input_stream: "IMAGE:face_region" + output_stream: "IMAGE:transformed_face_region" + options: { + [mediapipe.ImageTransformationCalculatorOptions.ext] { + output_width: 192 + output_height: 192 + } + } +} + +# Converts the transformed input image on CPU into an image tensor stored as a +# TfLiteTensor. +node { + calculator: "TfLiteConverterCalculator" + input_stream: "IMAGE:transformed_face_region" + output_stream: "TENSORS:input_tensor" +} + +# Runs a TensorFlow Lite model on CPU that takes an image tensor and outputs a +# vector of tensors representing, for instance, detection boxes/keypoints and +# scores. +node { + calculator: "TfLiteInferenceCalculator" + input_stream: "TENSORS:input_tensor" + output_stream: "TENSORS:output_tensors" + options: { + [mediapipe.TfLiteInferenceCalculatorOptions.ext] { + model_path: "mediapipe/modules/face_landmark/face_landmark.tflite" + } + } +} + +# Splits a vector of tensors into multiple vectors. +node { + calculator: "SplitTfLiteTensorVectorCalculator" + input_stream: "output_tensors" + output_stream: "landmark_tensors" + output_stream: "face_flag_tensor" + options: { + [mediapipe.SplitVectorCalculatorOptions.ext] { + ranges: { begin: 0 end: 1 } + ranges: { begin: 1 end: 2 } + } + } +} + +# Converts the face-flag tensor into a float that represents the confidence +# score of face presence. +node { + calculator: "TfLiteTensorsToFloatsCalculator" + input_stream: "TENSORS:face_flag_tensor" + output_stream: "FLOAT:face_presence_score" +} + +# Applies a threshold to the confidence score to determine whether a face is +# present. +node { + calculator: "ThresholdingCalculator" + input_stream: "FLOAT:face_presence_score" + output_stream: "FLAG:face_presence" + options: { + [mediapipe.ThresholdingCalculatorOptions.ext] { + threshold: 0.1 + } + } +} + +# Drop landmarks tensors if face is not present. +node { + calculator: "GateCalculator" + input_stream: "landmark_tensors" + input_stream: "ALLOW:face_presence" + output_stream: "ensured_landmark_tensors" +} + +# Decodes the landmark tensors into a vector of lanmarks, where the landmark +# coordinates are normalized by the size of the input image to the model. +node { + calculator: "TfLiteTensorsToLandmarksCalculator" + input_stream: "TENSORS:ensured_landmark_tensors" + output_stream: "NORM_LANDMARKS:landmarks" + options: { + [mediapipe.TfLiteTensorsToLandmarksCalculatorOptions.ext] { + num_landmarks: 468 + input_image_width: 192 + input_image_height: 192 + } + } +} + +# Projects the landmarks from the cropped face image to the corresponding +# locations on the full image before cropping (input to the graph). +node { + calculator: "LandmarkProjectionCalculator" + input_stream: "NORM_LANDMARKS:landmarks" + input_stream: "NORM_RECT:roi" + output_stream: "NORM_LANDMARKS:face_landmarks" +} diff --git a/mediapipe/modules/face_landmark/face_landmark_front_cpu.pbtxt b/mediapipe/modules/face_landmark/face_landmark_front_cpu.pbtxt new file mode 100644 index 000000000..cdd8a03f2 --- /dev/null +++ b/mediapipe/modules/face_landmark/face_landmark_front_cpu.pbtxt @@ -0,0 +1,216 @@ +# MediaPipe graph to detect/predict face landmarks. (CPU input, and inference is +# executed on CPU.) This graph tries to skip face detection as much as possible +# by using previously detected/predicted landmarks for new images. +# +# It is required that "face_detection_front.tflite" is available at +# "mediapipe/modules/face_detection/face_detection_front.tflite" +# path during execution. +# +# It is required that "face_landmark.tflite" is available at +# "mediapipe/modules/face_landmark/face_landmark.tflite" +# path during execution. +# +# EXAMPLE: +# node { +# calculator: "FaceLandmarkFrontCpu" +# input_stream: "IMAGE:image" +# input_side_packet: "NUM_FACES:num_faces" +# output_stream: "LANDMARKS:multi_face_landmarks" +# } + +type: "FaceLandmarkFrontCpu" + +# CPU image. (ImageFrame) +input_stream: "IMAGE:image" + +# Max number of faces to detect/track. (int) +input_side_packet: "NUM_FACES:num_faces" + +# Collection of detected/predicted faces, each represented as a list of 468 face +# landmarks. (std::vector) +# NOTE: there will not be an output packet in the LANDMARKS stream for this +# particular timestamp if none of faces detected. However, the MediaPipe +# framework will internally inform the downstream calculators of the absence of +# this packet so that they don't wait for it unnecessarily. +output_stream: "LANDMARKS:multi_face_landmarks" + +# Extra outputs (for debugging, for instance). +# Detected faces. (std::vector) +output_stream: "DETECTIONS:face_detections" +# Regions of interest calculated based on landmarks. +# (std::vector) +output_stream: "ROIS_FROM_LANDMARKS:face_rects_from_landmarks" +# Regions of interest calculated based on face detections. +# (std::vector) +output_stream: "ROIS_FROM_DETECTIONS:face_rects_from_detections" + +# Determines if an input vector of NormalizedRect has a size greater than or +# equal to the provided num_faces. +node { + calculator: "NormalizedRectVectorHasMinSizeCalculator" + input_stream: "ITERABLE:prev_face_rects_from_landmarks" + input_side_packet: "num_faces" + output_stream: "prev_has_enough_faces" +} + +# Drops the incoming image if FaceLandmarkCpu was able to identify face presence +# in the previous image. Otherwise, passes the incoming image through to trigger +# a new round of face detection in FaceDetectionFrontCpu. +node { + calculator: "GateCalculator" + input_stream: "image" + input_stream: "DISALLOW:prev_has_enough_faces" + output_stream: "gated_image" + options: { + [mediapipe.GateCalculatorOptions.ext] { + empty_packets_as_allow: true + } + } +} + +# Detects faces. +node { + calculator: "FaceDetectionFrontCpu" + input_stream: "IMAGE:gated_image" + output_stream: "DETECTIONS:all_face_detections" +} + +# Makes sure there are no more detections than the provided num_faces. +node { + calculator: "ClipDetectionVectorSizeCalculator" + input_stream: "all_face_detections" + output_stream: "face_detections" + input_side_packet: "num_faces" +} + +# Calculate size of the image. +node { + calculator: "ImagePropertiesCalculator" + input_stream: "IMAGE:gated_image" + output_stream: "SIZE:gated_image_size" +} + +# Outputs each element of face_detections at a fake timestamp for the rest of +# the graph to process. Clones the image size packet for each face_detection at +# the fake timestamp. At the end of the loop, outputs the BATCH_END timestamp +# for downstream calculators to inform them that all elements in the vector have +# been processed. +node { + calculator: "BeginLoopDetectionCalculator" + input_stream: "ITERABLE:face_detections" + input_stream: "CLONE:gated_image_size" + output_stream: "ITEM:face_detection" + output_stream: "CLONE:detections_loop_image_size" + output_stream: "BATCH_END:detections_loop_end_timestamp" +} + +# Calculates region of interest based on face detections, so that can be used +# to detect landmarks. +node { + calculator: "FaceDetectionFrontDetectionToRoi" + input_stream: "DETECTION:face_detection" + input_stream: "IMAGE_SIZE:detections_loop_image_size" + output_stream: "ROI:face_rect_from_detection" +} + +# Collects a NormalizedRect for each face into a vector. Upon receiving the +# BATCH_END timestamp, outputs the vector of NormalizedRect at the BATCH_END +# timestamp. +node { + calculator: "EndLoopNormalizedRectCalculator" + input_stream: "ITEM:face_rect_from_detection" + input_stream: "BATCH_END:detections_loop_end_timestamp" + output_stream: "ITERABLE:face_rects_from_detections" +} + +# Performs association between NormalizedRect vector elements from previous +# image and rects based on face detections from the current image. This +# calculator ensures that the output face_rects vector doesn't contain +# overlapping regions based on the specified min_similarity_threshold. +node { + calculator: "AssociationNormRectCalculator" + input_stream: "prev_face_rects_from_landmarks" + input_stream: "face_rects_from_detections" + output_stream: "face_rects" + options: { + [mediapipe.AssociationCalculatorOptions.ext] { + min_similarity_threshold: 0.5 + } + } +} + +# Calculate size of the image. +node { + calculator: "ImagePropertiesCalculator" + input_stream: "IMAGE:image" + output_stream: "SIZE:image_size" +} + +# Outputs each element of face_rects at a fake timestamp for the rest of the +# graph to process. Clones image and image size packets for each +# single_face_rect at the fake timestamp. At the end of the loop, outputs the +# BATCH_END timestamp for downstream calculators to inform them that all +# elements in the vector have been processed. +node { + calculator: "BeginLoopNormalizedRectCalculator" + input_stream: "ITERABLE:face_rects" + input_stream: "CLONE:0:image" + input_stream: "CLONE:1:image_size" + output_stream: "ITEM:face_rect" + output_stream: "CLONE:0:landmarks_loop_image" + output_stream: "CLONE:1:landmarks_loop_image_size" + output_stream: "BATCH_END:landmarks_loop_end_timestamp" +} + +# Detects face landmarks within specified region of interest of the image. +node { + calculator: "FaceLandmarkCpu" + input_stream: "IMAGE:landmarks_loop_image" + input_stream: "ROI:face_rect" + output_stream: "LANDMARKS:face_landmarks" +} + +# Calculates region of interest based on face landmarks, so that can be reused +# for subsequent image. +node { + calculator: "FaceLandmarkLandmarksToRoi" + input_stream: "LANDMARKS:face_landmarks" + input_stream: "IMAGE_SIZE:landmarks_loop_image_size" + output_stream: "ROI:face_rect_from_landmarks" +} + +# Collects a set of landmarks for each face into a vector. Upon receiving the +# BATCH_END timestamp, outputs the vector of landmarks at the BATCH_END +# timestamp. +node { + calculator: "EndLoopNormalizedLandmarkListVectorCalculator" + input_stream: "ITEM:face_landmarks" + input_stream: "BATCH_END:landmarks_loop_end_timestamp" + output_stream: "ITERABLE:multi_face_landmarks" +} + +# Collects a NormalizedRect for each face into a vector. Upon receiving the +# BATCH_END timestamp, outputs the vector of NormalizedRect at the BATCH_END +# timestamp. +node { + calculator: "EndLoopNormalizedRectCalculator" + input_stream: "ITEM:face_rect_from_landmarks" + input_stream: "BATCH_END:landmarks_loop_end_timestamp" + output_stream: "ITERABLE:face_rects_from_landmarks" +} + +# Caches face rects calculated from landmarks, and upon the arrival of the next +# input image, sends out the cached rects with timestamps replaced by that of +# the input image, essentially generating a packet that carries the previous +# face rects. Note that upon the arrival of the very first input image, a +# timestamp bound update occurs to jump start the feedback loop. +node { + calculator: "PreviousLoopbackCalculator" + input_stream: "MAIN:image" + input_stream: "LOOP:face_rects_from_landmarks" + input_stream_info: { + tag_index: "LOOP" + back_edge: true + } + output_stream: "PREV_LOOP:prev_face_rects_from_landmarks" +} diff --git a/mediapipe/modules/face_landmark/face_landmark_front_gpu.pbtxt b/mediapipe/modules/face_landmark/face_landmark_front_gpu.pbtxt new file mode 100644 index 000000000..d06aff1df --- /dev/null +++ b/mediapipe/modules/face_landmark/face_landmark_front_gpu.pbtxt @@ -0,0 +1,216 @@ +# MediaPipe graph to detect/predict face landmarks. (GPU input, and inference is +# executed on GPU.) This graph tries to skip face detection as much as possible +# by using previously detected/predicted landmarks for new images. +# +# It is required that "face_detection_front.tflite" is available at +# "mediapipe/modules/face_detection/face_detection_front.tflite" +# path during execution. +# +# It is required that "face_landmark.tflite" is available at +# "mediapipe/modules/face_landmark/face_landmark.tflite" +# path during execution. +# +# EXAMPLE: +# node { +# calculator: "FaceLandmarkFrontGpu" +# input_stream: "IMAGE:image" +# input_side_packet: "NUM_FACES:num_faces" +# output_stream: "LANDMARKS:multi_face_landmarks" +# } + +type: "FaceLandmarkFrontGpu" + +# GPU image. (GpuBuffer) +input_stream: "IMAGE:image" + +# Max number of faces to detect/track. (int) +input_side_packet: "NUM_FACES:num_faces" + +# Collection of detected/predicted faces, each represented as a list of 468 face +# landmarks. (std::vector) +# NOTE: there will not be an output packet in the LANDMARKS stream for this +# particular timestamp if none of faces detected. However, the MediaPipe +# framework will internally inform the downstream calculators of the absence of +# this packet so that they don't wait for it unnecessarily. +output_stream: "LANDMARKS:multi_face_landmarks" + +# Extra outputs (for debugging, for instance). +# Detected faces. (std::vector) +output_stream: "DETECTIONS:face_detections" +# Regions of interest calculated based on landmarks. +# (std::vector) +output_stream: "ROIS_FROM_LANDMARKS:face_rects_from_landmarks" +# Regions of interest calculated based on face detections. +# (std::vector) +output_stream: "ROIS_FROM_DETECTIONS:face_rects_from_detections" + +# Determines if an input vector of NormalizedRect has a size greater than or +# equal to the provided num_faces. +node { + calculator: "NormalizedRectVectorHasMinSizeCalculator" + input_stream: "ITERABLE:prev_face_rects_from_landmarks" + input_side_packet: "num_faces" + output_stream: "prev_has_enough_faces" +} + +# Drops the incoming image if FaceLandmarkGpu was able to identify face presence +# in the previous image. Otherwise, passes the incoming image through to trigger +# a new round of face detection in FaceDetectionFrontGpu. +node { + calculator: "GateCalculator" + input_stream: "image" + input_stream: "DISALLOW:prev_has_enough_faces" + output_stream: "gated_image" + options: { + [mediapipe.GateCalculatorOptions.ext] { + empty_packets_as_allow: true + } + } +} + +# Detects faces. +node { + calculator: "FaceDetectionFrontGpu" + input_stream: "IMAGE:gated_image" + output_stream: "DETECTIONS:all_face_detections" +} + +# Makes sure there are no more detections than the provided num_faces. +node { + calculator: "ClipDetectionVectorSizeCalculator" + input_stream: "all_face_detections" + output_stream: "face_detections" + input_side_packet: "num_faces" +} + +# Calculate size of the image. +node { + calculator: "ImagePropertiesCalculator" + input_stream: "IMAGE_GPU:gated_image" + output_stream: "SIZE:gated_image_size" +} + +# Outputs each element of face_detections at a fake timestamp for the rest of +# the graph to process. Clones the image size packet for each face_detection at +# the fake timestamp. At the end of the loop, outputs the BATCH_END timestamp +# for downstream calculators to inform them that all elements in the vector have +# been processed. +node { + calculator: "BeginLoopDetectionCalculator" + input_stream: "ITERABLE:face_detections" + input_stream: "CLONE:gated_image_size" + output_stream: "ITEM:face_detection" + output_stream: "CLONE:detections_loop_image_size" + output_stream: "BATCH_END:detections_loop_end_timestamp" +} + +# Calculates region of interest based on face detections, so that can be used +# to detect landmarks. +node { + calculator: "FaceDetectionFrontDetectionToRoi" + input_stream: "DETECTION:face_detection" + input_stream: "IMAGE_SIZE:detections_loop_image_size" + output_stream: "ROI:face_rect_from_detection" +} + +# Collects a NormalizedRect for each face into a vector. Upon receiving the +# BATCH_END timestamp, outputs the vector of NormalizedRect at the BATCH_END +# timestamp. +node { + calculator: "EndLoopNormalizedRectCalculator" + input_stream: "ITEM:face_rect_from_detection" + input_stream: "BATCH_END:detections_loop_end_timestamp" + output_stream: "ITERABLE:face_rects_from_detections" +} + +# Performs association between NormalizedRect vector elements from previous +# image and rects based on face detections from the current image. This +# calculator ensures that the output face_rects vector doesn't contain +# overlapping regions based on the specified min_similarity_threshold. +node { + calculator: "AssociationNormRectCalculator" + input_stream: "prev_face_rects_from_landmarks" + input_stream: "face_rects_from_detections" + output_stream: "face_rects" + options: { + [mediapipe.AssociationCalculatorOptions.ext] { + min_similarity_threshold: 0.5 + } + } +} + +# Calculate size of the image. +node { + calculator: "ImagePropertiesCalculator" + input_stream: "IMAGE_GPU:image" + output_stream: "SIZE:image_size" +} + +# Outputs each element of face_rects at a fake timestamp for the rest of the +# graph to process. Clones image and image size packets for each +# single_face_rect at the fake timestamp. At the end of the loop, outputs the +# BATCH_END timestamp for downstream calculators to inform them that all +# elements in the vector have been processed. +node { + calculator: "BeginLoopNormalizedRectCalculator" + input_stream: "ITERABLE:face_rects" + input_stream: "CLONE:0:image" + input_stream: "CLONE:1:image_size" + output_stream: "ITEM:face_rect" + output_stream: "CLONE:0:landmarks_loop_image" + output_stream: "CLONE:1:landmarks_loop_image_size" + output_stream: "BATCH_END:landmarks_loop_end_timestamp" +} + +# Detects face landmarks within specified region of interest of the image. +node { + calculator: "FaceLandmarkGpu" + input_stream: "IMAGE:landmarks_loop_image" + input_stream: "ROI:face_rect" + output_stream: "LANDMARKS:face_landmarks" +} + +# Calculates region of interest based on face landmarks, so that can be reused +# for subsequent image. +node { + calculator: "FaceLandmarkLandmarksToRoi" + input_stream: "LANDMARKS:face_landmarks" + input_stream: "IMAGE_SIZE:landmarks_loop_image_size" + output_stream: "ROI:face_rect_from_landmarks" +} + +# Collects a set of landmarks for each face into a vector. Upon receiving the +# BATCH_END timestamp, outputs the vector of landmarks at the BATCH_END +# timestamp. +node { + calculator: "EndLoopNormalizedLandmarkListVectorCalculator" + input_stream: "ITEM:face_landmarks" + input_stream: "BATCH_END:landmarks_loop_end_timestamp" + output_stream: "ITERABLE:multi_face_landmarks" +} + +# Collects a NormalizedRect for each face into a vector. Upon receiving the +# BATCH_END timestamp, outputs the vector of NormalizedRect at the BATCH_END +# timestamp. +node { + calculator: "EndLoopNormalizedRectCalculator" + input_stream: "ITEM:face_rect_from_landmarks" + input_stream: "BATCH_END:landmarks_loop_end_timestamp" + output_stream: "ITERABLE:face_rects_from_landmarks" +} + +# Caches face rects calculated from landmarks, and upon the arrival of the next +# input image, sends out the cached rects with timestamps replaced by that of +# the input image, essentially generating a packet that carries the previous +# face rects. Note that upon the arrival of the very first input image, a +# timestamp bound update occurs to jump start the feedback loop. +node { + calculator: "PreviousLoopbackCalculator" + input_stream: "MAIN:image" + input_stream: "LOOP:face_rects_from_landmarks" + input_stream_info: { + tag_index: "LOOP" + back_edge: true + } + output_stream: "PREV_LOOP:prev_face_rects_from_landmarks" +} diff --git a/mediapipe/modules/face_landmark/face_landmark_gpu.pbtxt b/mediapipe/modules/face_landmark/face_landmark_gpu.pbtxt new file mode 100644 index 000000000..3460d766d --- /dev/null +++ b/mediapipe/modules/face_landmark/face_landmark_gpu.pbtxt @@ -0,0 +1,146 @@ +# MediaPipe graph to detect/predict face landmarks. (GPU input, and inference is +# executed on GPU.) +# +# It is required that "face_landmark.tflite" is available at +# "mediapipe/modules/face_landmark/face_landmark.tflite" +# path during execution. +# +# EXAMPLE: +# node { +# calculator: "FaceLandmarkGpu" +# input_stream: "IMAGE:image" +# input_stream: "ROI:face_roi" +# output_stream: "LANDMARKS:face_landmarks" +# } + +type: "FaceLandmarkGpu" + +# GPU image. (GpuBuffer) +input_stream: "IMAGE:image" +# ROI (region of interest) within the given image where a face is located. +# (NormalizedRect) +input_stream: "ROI:roi" + +# 468 face landmarks within the given ROI. (NormalizedLandmarkList) +# NOTE: if a face is not present within the given ROI, for this particular +# timestamp there will not be an output packet in the LANDMARKS stream. However, +# the MediaPipe framework will internally inform the downstream calculators of +# the absence of this packet so that they don't wait for it unnecessarily. +output_stream: "LANDMARKS:face_landmarks" + +# Crops the input image to the given region of interest. +node { + calculator: "ImageCroppingCalculator" + input_stream: "IMAGE_GPU:image" + input_stream: "NORM_RECT:roi" + output_stream: "IMAGE_GPU:face_region" + options: { + [mediapipe.ImageCroppingCalculatorOptions.ext] { + border_mode: BORDER_REPLICATE + } + } +} + +# Transforms the input image on GPU to a 192x192 image. To scale the input +# image, the scale_mode option is set to FIT to preserve the aspect ratio, +# resulting in potential letterboxing in the transformed image. +node: { + calculator: "ImageTransformationCalculator" + input_stream: "IMAGE_GPU:face_region" + output_stream: "IMAGE_GPU:transformed_face_region" + options: { + [mediapipe.ImageTransformationCalculatorOptions.ext] { + output_width: 192 + output_height: 192 + } + } +} + +# Converts the transformed input image on GPU into an image tensor stored as a +# TfLiteTensor. +node { + calculator: "TfLiteConverterCalculator" + input_stream: "IMAGE_GPU:transformed_face_region" + output_stream: "TENSORS_GPU:input_tensor" +} + +# Runs a TensorFlow Lite model on GPU that takes an image tensor and outputs a +# vector of GPU tensors representing, for instance, detection boxes/keypoints +# and scores. +node { + calculator: "TfLiteInferenceCalculator" + input_stream: "TENSORS_GPU:input_tensor" + output_stream: "TENSORS:output_tensors" + options: { + [mediapipe.TfLiteInferenceCalculatorOptions.ext] { + model_path: "mediapipe/modules/face_landmark/face_landmark.tflite" + } + } +} + +# Splits a vector of tensors into multiple vectors. +node { + calculator: "SplitTfLiteTensorVectorCalculator" + input_stream: "output_tensors" + output_stream: "landmark_tensors" + output_stream: "face_flag_tensor" + options: { + [mediapipe.SplitVectorCalculatorOptions.ext] { + ranges: { begin: 0 end: 1 } + ranges: { begin: 1 end: 2 } + } + } +} + +# Converts the face-flag tensor into a float that represents the confidence +# score of face presence. +node { + calculator: "TfLiteTensorsToFloatsCalculator" + input_stream: "TENSORS:face_flag_tensor" + output_stream: "FLOAT:face_presence_score" +} + +# Applies a threshold to the confidence score to determine whether a face is +# present. +node { + calculator: "ThresholdingCalculator" + input_stream: "FLOAT:face_presence_score" + output_stream: "FLAG:face_presence" + options: { + [mediapipe.ThresholdingCalculatorOptions.ext] { + threshold: 0.1 + } + } +} + +# Drop landmarks tensors if face is not present. +node { + calculator: "GateCalculator" + input_stream: "landmark_tensors" + input_stream: "ALLOW:face_presence" + output_stream: "ensured_landmark_tensors" +} + +# Decodes the landmark tensors into a vector of lanmarks, where the landmark +# coordinates are normalized by the size of the input image to the model. +node { + calculator: "TfLiteTensorsToLandmarksCalculator" + input_stream: "TENSORS:ensured_landmark_tensors" + output_stream: "NORM_LANDMARKS:landmarks" + options: { + [mediapipe.TfLiteTensorsToLandmarksCalculatorOptions.ext] { + num_landmarks: 468 + input_image_width: 192 + input_image_height: 192 + } + } +} + +# Projects the landmarks from the cropped face image to the corresponding +# locations on the full image before cropping (input to the graph). +node { + calculator: "LandmarkProjectionCalculator" + input_stream: "NORM_LANDMARKS:landmarks" + input_stream: "NORM_RECT:roi" + output_stream: "NORM_LANDMARKS:face_landmarks" +} diff --git a/mediapipe/modules/face_landmark/face_landmark_landmarks_to_roi.pbtxt b/mediapipe/modules/face_landmark/face_landmark_landmarks_to_roi.pbtxt new file mode 100644 index 000000000..2df053820 --- /dev/null +++ b/mediapipe/modules/face_landmark/face_landmark_landmarks_to_roi.pbtxt @@ -0,0 +1,54 @@ +# MediaPipe graph to calculate face region of interest (ROI) from landmarks +# detected by "FaceLandmarkCpu" or "FaceLandmarkGpu". +# +# NOTE: this graph is subject to change and should not be used directly. + +type: "FaceLandmarkLandmarksToRoi" + +# Normalized landmarks. (NormalizedLandmarkList) +input_stream: "LANDMARKS:landmarks" +# Frame size (width & height). (std::pair) +input_stream: "IMAGE_SIZE:image_size" +# ROI according to landmarks. (NormalizedRect) +output_stream: "ROI:roi" + +# Converts face landmarks to a detection that tightly encloses all landmarks. +node { + calculator: "LandmarksToDetectionCalculator" + input_stream: "NORM_LANDMARKS:landmarks" + output_stream: "DETECTION:face_detection" +} + +# Converts the face detection into a rectangle (normalized by image size) +# that encloses the face and is rotated such that the line connecting left side +# of the left eye and right side of the right eye is aligned with the X-axis of +# the rectangle. +node { + calculator: "DetectionsToRectsCalculator" + input_stream: "DETECTION:face_detection" + input_stream: "IMAGE_SIZE:image_size" + output_stream: "NORM_RECT:face_rect_from_landmarks" + options: { + [mediapipe.DetectionsToRectsCalculatorOptions.ext] { + rotation_vector_start_keypoint_index: 33 # Left side of left eye. + rotation_vector_end_keypoint_index: 133 # Right side of right eye. + rotation_vector_target_angle_degrees: 0 + } + } +} + +# Expands the face rectangle so that in the next video image it's likely to +# still contain the face even with some motion. +node { + calculator: "RectTransformationCalculator" + input_stream: "NORM_RECT:face_rect_from_landmarks" + input_stream: "IMAGE_SIZE:image_size" + output_stream: "roi" + options: { + [mediapipe.RectTransformationCalculatorOptions.ext] { + scale_x: 1.5 + scale_y: 1.5 + square_long: true + } + } +} diff --git a/mediapipe/util/tflite/operations/max_pool_argmax.cc b/mediapipe/util/tflite/operations/max_pool_argmax.cc index b3b9f71a1..478322ca5 100644 --- a/mediapipe/util/tflite/operations/max_pool_argmax.cc +++ b/mediapipe/util/tflite/operations/max_pool_argmax.cc @@ -153,10 +153,10 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { output_size->data[2] = out_width; output_size->data[3] = channels_out; TfLiteIntArray* indices_size = TfLiteIntArrayCopy(output_size); - if (context->ResizeTensor(context, output, output_size) == kTfLiteError) { + if (context->ResizeTensor(context, output, output_size) != kTfLiteOk) { return kTfLiteError; } - if (context->ResizeTensor(context, indices, indices_size) == kTfLiteError) { + if (context->ResizeTensor(context, indices, indices_size) != kTfLiteOk) { return kTfLiteError; } return kTfLiteOk; diff --git a/mediapipe/util/tracking/region_flow_computation.cc b/mediapipe/util/tracking/region_flow_computation.cc index 58b2750b9..403440f94 100644 --- a/mediapipe/util/tracking/region_flow_computation.cc +++ b/mediapipe/util/tracking/region_flow_computation.cc @@ -2999,7 +2999,7 @@ void RegionFlowComputation::ComputeBlockBasedFlow( if (!feature_list->empty() && options_.median_magnitude_bounds() > 0) { std::vector motion_magnitudes; motion_magnitudes.reserve(feature_list->size()); - for (auto feature : *feature_list) { + for (const auto& feature : *feature_list) { motion_magnitudes.push_back(feature.flow.Norm2()); } auto median_iter = motion_magnitudes.begin() + motion_magnitudes.size() / 2; @@ -3279,7 +3279,7 @@ void RegionFlowComputation::RegionFlowFeatureListToRegionFlow( } // Add feature according smallest block width and height to regions. - for (auto feature : feature_list.feature()) { + for (const auto& feature : feature_list.feature()) { const int x = static_cast(feature.x()); const int y = static_cast(feature.y()); // Guard, in case equation is wrong. diff --git a/mediapipe/util/tracking/streaming_buffer.cc b/mediapipe/util/tracking/streaming_buffer.cc index e089c367f..2e5b0ac2f 100644 --- a/mediapipe/util/tracking/streaming_buffer.cc +++ b/mediapipe/util/tracking/streaming_buffer.cc @@ -105,7 +105,7 @@ bool StreamingBuffer::TruncateBuffer(bool flush) { first_frame_index_ += elems_to_clear; const int remaining_elems = flush ? 0 : overlap_; - for (auto item : data_) { + for (const auto& item : data_) { const auto& buffer = item.second; if (buffer.size() != remaining_elems) { LOG(WARNING) << "After trunctation, for tag " << item.first << "got "